Full Code of ggml-org/llama.cpp for AI

master 4888137b1736 cached

2536 files

77.2 MB

17.7M tokens

2 requests

Copy disabled (too large) Download .txt

Showing preview only (70,649K chars total). Download the full file to get everything.

Repository: ggml-org/llama.cpp
Branch: master
Commit: 4888137b1736
Files: 2536
Total size: 77.2 MB

Directory structure:
gitextract_xcrsk4vf/

├── .clang-format
├── .clang-tidy
├── .devops/
│   ├── cann.Dockerfile
│   ├── cpu.Dockerfile
│   ├── cuda-new.Dockerfile
│   ├── cuda.Dockerfile
│   ├── intel.Dockerfile
│   ├── llama-cli-cann.Dockerfile
│   ├── llama-cpp-cuda.srpm.spec
│   ├── llama-cpp.srpm.spec
│   ├── musa.Dockerfile
│   ├── nix/
│   │   ├── apps.nix
│   │   ├── devshells.nix
│   │   ├── docker.nix
│   │   ├── jetson-support.nix
│   │   ├── nixpkgs-instances.nix
│   │   ├── package-gguf-py.nix
│   │   ├── package.nix
│   │   ├── python-scripts.nix
│   │   ├── scope.nix
│   │   └── sif.nix
│   ├── openvino.Dockerfile
│   ├── rocm.Dockerfile
│   ├── s390x.Dockerfile
│   ├── tools.sh
│   └── vulkan.Dockerfile
├── .dockerignore
├── .ecrc
├── .editorconfig
├── .flake8
├── .gemini/
│   └── settings.json
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── 010-bug-compilation.yml
│   │   ├── 011-bug-results.yml
│   │   ├── 019-bug-misc.yml
│   │   ├── 020-enhancement.yml
│   │   ├── 030-research.yml
│   │   ├── 040-refactor.yml
│   │   └── config.yml
│   ├── actions/
│   │   ├── get-tag-name/
│   │   │   └── action.yml
│   │   ├── install-exe/
│   │   │   └── action.yml
│   │   ├── linux-setup-openvino/
│   │   │   └── action.yml
│   │   ├── linux-setup-spacemit/
│   │   │   └── action.yml
│   │   ├── linux-setup-vulkan/
│   │   │   └── action.yml
│   │   ├── unarchive-tar/
│   │   │   └── action.yml
│   │   ├── windows-setup-cuda/
│   │   │   └── action.yml
│   │   └── windows-setup-rocm/
│   │       └── action.yml
│   ├── labeler.yml
│   ├── pull_request_template.md
│   └── workflows/
│       ├── ai-issues.yml
│       ├── bench.yml.disabled
│       ├── build-3rd-party.yml
│       ├── build-android.yml
│       ├── build-apple.yml
│       ├── build-cache.yml
│       ├── build-cann.yml
│       ├── build-cmake-pkg.yml
│       ├── build-cross.yml
│       ├── build-msys.yml
│       ├── build-riscv.yml
│       ├── build-sanitize.yml
│       ├── build-self-hosted.yml
│       ├── build-vulkan.yml
│       ├── build.yml
│       ├── check-vendor.yml
│       ├── close-issue.yml
│       ├── copilot-setup-steps.yml
│       ├── docker.yml
│       ├── editorconfig.yml
│       ├── gguf-publish.yml
│       ├── hip-quality-check.yml
│       ├── labeler.yml
│       ├── pre-tokenizer-hashes.yml
│       ├── python-check-requirements.yml
│       ├── python-lint.yml
│       ├── python-type-check.yml
│       ├── release.yml
│       ├── server-sanitize.yml
│       ├── server-self-hosted.yml
│       ├── server-webui.yml
│       ├── server.yml
│       ├── update-ops-docs.yml
│       └── winget.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── AGENTS.md
├── AUTHORS
├── CLAUDE.md
├── CMakeLists.txt
├── CMakePresets.json
├── CODEOWNERS
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── benches/
│   ├── dgx-spark/
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json
│   │   └── dgx-spark.md
│   ├── mac-m2-ultra/
│   │   └── mac-m2-ultra.md
│   └── nemotron/
│       └── nemotron-dgx-spark.md
├── ci/
│   ├── README-MUSA.md
│   ├── README.md
│   └── run.sh
├── cmake/
│   ├── arm64-apple-clang.cmake
│   ├── arm64-windows-llvm.cmake
│   ├── build-info.cmake
│   ├── common.cmake
│   ├── download-models.cmake
│   ├── git-vars.cmake
│   ├── license.cmake
│   ├── llama-config.cmake.in
│   ├── llama.pc.in
│   ├── riscv64-spacemit-linux-gnu-gcc.cmake
│   └── x64-windows-llvm.cmake
├── common/
│   ├── CMakeLists.txt
│   ├── arg.cpp
│   ├── arg.h
│   ├── base64.hpp
│   ├── build-info.cpp.in
│   ├── chat-auto-parser-generator.cpp
│   ├── chat-auto-parser-helpers.cpp
│   ├── chat-auto-parser-helpers.h
│   ├── chat-auto-parser.h
│   ├── chat-diff-analyzer.cpp
│   ├── chat-peg-parser.cpp
│   ├── chat-peg-parser.h
│   ├── chat.cpp
│   ├── chat.h
│   ├── common.cpp
│   ├── common.h
│   ├── console.cpp
│   ├── console.h
│   ├── debug.cpp
│   ├── debug.h
│   ├── download.cpp
│   ├── download.h
│   ├── hf-cache.cpp
│   ├── hf-cache.h
│   ├── http.h
│   ├── jinja/
│   │   ├── README.md
│   │   ├── caps.cpp
│   │   ├── caps.h
│   │   ├── lexer.cpp
│   │   ├── lexer.h
│   │   ├── parser.cpp
│   │   ├── parser.h
│   │   ├── runtime.cpp
│   │   ├── runtime.h
│   │   ├── string.cpp
│   │   ├── string.h
│   │   ├── utils.h
│   │   ├── value.cpp
│   │   └── value.h
│   ├── json-partial.cpp
│   ├── json-partial.h
│   ├── json-schema-to-grammar.cpp
│   ├── json-schema-to-grammar.h
│   ├── llguidance.cpp
│   ├── log.cpp
│   ├── log.h
│   ├── ngram-cache.cpp
│   ├── ngram-cache.h
│   ├── ngram-map.cpp
│   ├── ngram-map.h
│   ├── ngram-mod.cpp
│   ├── ngram-mod.h
│   ├── peg-parser.cpp
│   ├── peg-parser.h
│   ├── preset.cpp
│   ├── preset.h
│   ├── reasoning-budget.cpp
│   ├── reasoning-budget.h
│   ├── regex-partial.cpp
│   ├── regex-partial.h
│   ├── sampling.cpp
│   ├── sampling.h
│   ├── speculative.cpp
│   ├── speculative.h
│   ├── unicode.cpp
│   └── unicode.h
├── convert_hf_to_gguf.py
├── convert_hf_to_gguf_update.py
├── convert_llama_ggml_to_gguf.py
├── convert_lora_to_gguf.py
├── docs/
│   ├── android.md
│   ├── autoparser.md
│   ├── backend/
│   │   ├── BLIS.md
│   │   ├── CANN.md
│   │   ├── CUDA-FEDORA.md
│   │   ├── OPENCL.md
│   │   ├── OPENVINO.md
│   │   ├── SYCL.md
│   │   ├── VirtGPU/
│   │   │   ├── configuration.md
│   │   │   └── development.md
│   │   ├── VirtGPU.md
│   │   ├── ZenDNN.md
│   │   ├── snapdragon/
│   │   │   ├── CMakeUserPresets.json
│   │   │   ├── README.md
│   │   │   ├── developer.md
│   │   │   └── windows.md
│   │   └── zDNN.md
│   ├── build-riscv64-spacemit.md
│   ├── build-s390x.md
│   ├── build.md
│   ├── development/
│   │   ├── HOWTO-add-model.md
│   │   ├── debugging-tests.md
│   │   ├── llama-star/
│   │   │   └── idea-arch.key
│   │   ├── parsing.md
│   │   └── token_generation_performance_tips.md
│   ├── docker.md
│   ├── function-calling.md
│   ├── install.md
│   ├── llguidance.md
│   ├── multimodal/
│   │   ├── MobileVLM.md
│   │   ├── gemma3.md
│   │   ├── glmedge.md
│   │   ├── granitevision.md
│   │   ├── llava.md
│   │   ├── minicpmo2.6.md
│   │   ├── minicpmo4.0.md
│   │   ├── minicpmv2.5.md
│   │   ├── minicpmv2.6.md
│   │   ├── minicpmv4.0.md
│   │   └── minicpmv4.5.md
│   ├── multimodal.md
│   ├── ops/
│   │   ├── BLAS.csv
│   │   ├── CANN.csv
│   │   ├── CPU.csv
│   │   ├── CUDA.csv
│   │   ├── Metal.csv
│   │   ├── OpenCL.csv
│   │   ├── SYCL.csv
│   │   ├── Vulkan.csv
│   │   ├── WebGPU.csv
│   │   ├── ZenDNN.csv
│   │   └── zDNN.csv
│   ├── ops.md
│   ├── preset.md
│   └── speculative.md
├── examples/
│   ├── CMakeLists.txt
│   ├── batched/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── batched.cpp
│   ├── batched.swift/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── Package.swift
│   │   ├── README.md
│   │   └── Sources/
│   │       └── main.swift
│   ├── convert-llama2c-to-ggml/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── convert-llama2c-to-ggml.cpp
│   ├── convert_legacy_llama.py
│   ├── debug/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── debug.cpp
│   ├── deprecation-warning/
│   │   ├── README.md
│   │   └── deprecation-warning.cpp
│   ├── diffusion/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── diffusion-cli.cpp
│   ├── embedding/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── embedding.cpp
│   ├── eval-callback/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── eval-callback.cpp
│   ├── gen-docs/
│   │   ├── CMakeLists.txt
│   │   └── gen-docs.cpp
│   ├── gguf/
│   │   ├── CMakeLists.txt
│   │   └── gguf.cpp
│   ├── gguf-hash/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── deps/
│   │   │   ├── rotate-bits/
│   │   │   │   ├── package.json
│   │   │   │   └── rotate-bits.h
│   │   │   ├── sha1/
│   │   │   │   ├── package.json
│   │   │   │   ├── sha1.c
│   │   │   │   └── sha1.h
│   │   │   ├── sha256/
│   │   │   │   ├── package.json
│   │   │   │   ├── sha256.c
│   │   │   │   └── sha256.h
│   │   │   └── xxhash/
│   │   │       ├── clib.json
│   │   │       ├── xxhash.c
│   │   │       └── xxhash.h
│   │   └── gguf-hash.cpp
│   ├── idle/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── idle.cpp
│   ├── json_schema_pydantic_example.py
│   ├── json_schema_to_grammar.py
│   ├── llama.android/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── java/
│   │   │           │   └── com/
│   │   │           │       └── example/
│   │   │           │           └── llama/
│   │   │           │               ├── MainActivity.kt
│   │   │           │               └── MessageAdapter.kt
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   ├── bg_assistant_message.xml
│   │   │               │   ├── bg_user_message.xml
│   │   │               │   ├── ic_launcher_background.xml
│   │   │               │   ├── ic_launcher_foreground.xml
│   │   │               │   ├── outline_folder_open_24.xml
│   │   │               │   └── outline_send_24.xml
│   │   │               ├── layout/
│   │   │               │   ├── activity_main.xml
│   │   │               │   ├── item_message_assistant.xml
│   │   │               │   └── item_message_user.xml
│   │   │               ├── mipmap-anydpi/
│   │   │               │   ├── ic_launcher.xml
│   │   │               │   └── ic_launcher_round.xml
│   │   │               ├── values/
│   │   │               │   ├── colors.xml
│   │   │               │   ├── strings.xml
│   │   │               │   └── themes.xml
│   │   │               └── xml/
│   │   │                   ├── backup_rules.xml
│   │   │                   └── data_extraction_rules.xml
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── lib/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── consumer-rules.pro
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── android/
│   │   │       │           └── llama/
│   │   │       │               └── cpp/
│   │   │       │                   └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── cpp/
│   │   │       │   │   ├── CMakeLists.txt
│   │   │       │   │   ├── ai_chat.cpp
│   │   │       │   │   └── logging.h
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── arm/
│   │   │       │               └── aichat/
│   │   │       │                   ├── AiChat.kt
│   │   │       │                   ├── InferenceEngine.kt
│   │   │       │                   ├── gguf/
│   │   │       │                   │   ├── FileType.kt
│   │   │       │                   │   ├── GgufMetadata.kt
│   │   │       │                   │   └── GgufMetadataReader.kt
│   │   │       │                   └── internal/
│   │   │       │                       ├── InferenceEngineImpl.kt
│   │   │       │                       └── gguf/
│   │   │       │                           └── GgufMetadataReaderImpl.kt
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── android/
│   │   │                   └── llama/
│   │   │                       └── cpp/
│   │   │                           └── ExampleUnitTest.kt
│   │   └── settings.gradle.kts
│   ├── llama.swiftui/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── llama.cpp.swift/
│   │   │   └── LibLlama.swift
│   │   ├── llama.swiftui/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── Models/
│   │   │   │   └── LlamaState.swift
│   │   │   ├── Resources/
│   │   │   │   └── models/
│   │   │   │       └── .gitignore
│   │   │   ├── UI/
│   │   │   │   ├── ContentView.swift
│   │   │   │   ├── DownloadButton.swift
│   │   │   │   ├── InputButton.swift
│   │   │   │   └── LoadCustomButton.swift
│   │   │   └── llama_swiftuiApp.swift
│   │   └── llama.swiftui.xcodeproj/
│   │       ├── project.pbxproj
│   │       └── project.xcworkspace/
│   │           └── contents.xcworkspacedata
│   ├── llama.vim
│   ├── lookahead/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── lookahead.cpp
│   ├── lookup/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── lookup-create.cpp
│   │   ├── lookup-merge.cpp
│   │   ├── lookup-stats.cpp
│   │   └── lookup.cpp
│   ├── model-conversion/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── requirements.txt
│   │   └── scripts/
│   │       ├── causal/
│   │       │   ├── compare-embeddings-logits.sh
│   │       │   ├── compare-logits.py
│   │       │   ├── convert-model.sh
│   │       │   ├── modelcard.template
│   │       │   ├── run-casual-gen-embeddings-org.py
│   │       │   ├── run-converted-model-embeddings-logits.sh
│   │       │   ├── run-converted-model.sh
│   │       │   └── run-org-model.py
│   │       ├── embedding/
│   │       │   ├── compare-embeddings-logits.sh
│   │       │   ├── convert-model.sh
│   │       │   ├── modelcard.template
│   │       │   ├── run-converted-model.sh
│   │       │   └── run-original-model.py
│   │       └── utils/
│   │           ├── __init__.py
│   │           ├── check-nmse.py
│   │           ├── common.py
│   │           ├── compare_tokens.py
│   │           ├── create-collection-add-model.sh
│   │           ├── curl-embedding-server.sh
│   │           ├── hf-add-model-to-collection.py
│   │           ├── hf-create-collection.py
│   │           ├── hf-create-model.py
│   │           ├── hf-upload-gguf-model.py
│   │           ├── inspect-converted-model.sh
│   │           ├── inspect-org-model.py
│   │           ├── perplexity-gen.sh
│   │           ├── perplexity-run-simple.sh
│   │           ├── perplexity-run.sh
│   │           ├── quantize.sh
│   │           ├── run-embedding-server.sh
│   │           └── semantic_check.py
│   ├── parallel/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── parallel.cpp
│   ├── passkey/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── passkey.cpp
│   ├── pydantic_models_to_grammar.py
│   ├── pydantic_models_to_grammar_examples.py
│   ├── reason-act.sh
│   ├── regex_to_grammar.py
│   ├── retrieval/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── retrieval.cpp
│   ├── save-load-state/
│   │   ├── CMakeLists.txt
│   │   └── save-load-state.cpp
│   ├── server-llama2-13B.sh
│   ├── server_embd.py
│   ├── simple/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── simple.cpp
│   ├── simple-chat/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── simple-chat.cpp
│   ├── simple-cmake-pkg/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   └── README.md
│   ├── speculative/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── speculative.cpp
│   ├── speculative-simple/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── speculative-simple.cpp
│   ├── sycl/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── ls-sycl-device.cpp
│   │   ├── run-llama2.sh
│   │   ├── test.sh
│   │   ├── win-build-sycl.bat
│   │   ├── win-run-llama2.bat
│   │   └── win-test.bat
│   ├── training/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── finetune.cpp
│   └── ts-type-to-grammar.sh
├── flake.nix
├── ggml/
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── cmake/
│   │   ├── GitVars.cmake
│   │   ├── common.cmake
│   │   └── ggml-config.cmake.in
│   ├── include/
│   │   ├── ggml-alloc.h
│   │   ├── ggml-backend.h
│   │   ├── ggml-blas.h
│   │   ├── ggml-cann.h
│   │   ├── ggml-cpp.h
│   │   ├── ggml-cpu.h
│   │   ├── ggml-cuda.h
│   │   ├── ggml-hexagon.h
│   │   ├── ggml-metal.h
│   │   ├── ggml-opencl.h
│   │   ├── ggml-openvino.h
│   │   ├── ggml-opt.h
│   │   ├── ggml-rpc.h
│   │   ├── ggml-sycl.h
│   │   ├── ggml-virtgpu.h
│   │   ├── ggml-vulkan.h
│   │   ├── ggml-webgpu.h
│   │   ├── ggml-zdnn.h
│   │   ├── ggml-zendnn.h
│   │   ├── ggml.h
│   │   └── gguf.h
│   └── src/
│       ├── CMakeLists.txt
│       ├── ggml-alloc.c
│       ├── ggml-backend-dl.cpp
│       ├── ggml-backend-dl.h
│       ├── ggml-backend-impl.h
│       ├── ggml-backend-reg.cpp
│       ├── ggml-backend.cpp
│       ├── ggml-blas/
│       │   ├── CMakeLists.txt
│       │   └── ggml-blas.cpp
│       ├── ggml-cann/
│       │   ├── CMakeLists.txt
│       │   ├── acl_tensor.cpp
│       │   ├── acl_tensor.h
│       │   ├── aclnn_ops.cpp
│       │   ├── aclnn_ops.h
│       │   ├── common.h
│       │   └── ggml-cann.cpp
│       ├── ggml-common.h
│       ├── ggml-cpu/
│       │   ├── CMakeLists.txt
│       │   ├── amx/
│       │   │   ├── amx.cpp
│       │   │   ├── amx.h
│       │   │   ├── common.h
│       │   │   ├── mmq.cpp
│       │   │   └── mmq.h
│       │   ├── arch/
│       │   │   ├── arm/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   ├── quants.c
│       │   │   │   └── repack.cpp
│       │   │   ├── loongarch/
│       │   │   │   └── quants.c
│       │   │   ├── powerpc/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   └── quants.c
│       │   │   ├── riscv/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   ├── quants.c
│       │   │   │   └── repack.cpp
│       │   │   ├── s390/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   └── quants.c
│       │   │   ├── wasm/
│       │   │   │   └── quants.c
│       │   │   └── x86/
│       │   │       ├── cpu-feats.cpp
│       │   │       ├── quants.c
│       │   │       └── repack.cpp
│       │   ├── arch-fallback.h
│       │   ├── binary-ops.cpp
│       │   ├── binary-ops.h
│       │   ├── cmake/
│       │   │   └── FindSIMD.cmake
│       │   ├── common.h
│       │   ├── ggml-cpu-impl.h
│       │   ├── ggml-cpu.c
│       │   ├── ggml-cpu.cpp
│       │   ├── hbm.cpp
│       │   ├── hbm.h
│       │   ├── kleidiai/
│       │   │   ├── kernels.cpp
│       │   │   ├── kernels.h
│       │   │   ├── kleidiai.cpp
│       │   │   └── kleidiai.h
│       │   ├── llamafile/
│       │   │   ├── sgemm.cpp
│       │   │   └── sgemm.h
│       │   ├── ops.cpp
│       │   ├── ops.h
│       │   ├── quants.c
│       │   ├── quants.h
│       │   ├── repack.cpp
│       │   ├── repack.h
│       │   ├── simd-gemm.h
│       │   ├── simd-mappings.h
│       │   ├── spacemit/
│       │   │   ├── ime.cpp
│       │   │   ├── ime.h
│       │   │   ├── ime1_kernels.cpp
│       │   │   └── ime_kernels.h
│       │   ├── traits.cpp
│       │   ├── traits.h
│       │   ├── unary-ops.cpp
│       │   ├── unary-ops.h
│       │   ├── vec.cpp
│       │   └── vec.h
│       ├── ggml-cuda/
│       │   ├── CMakeLists.txt
│       │   ├── acc.cu
│       │   ├── acc.cuh
│       │   ├── add-id.cu
│       │   ├── add-id.cuh
│       │   ├── arange.cu
│       │   ├── arange.cuh
│       │   ├── argmax.cu
│       │   ├── argmax.cuh
│       │   ├── argsort.cu
│       │   ├── argsort.cuh
│       │   ├── binbcast.cu
│       │   ├── binbcast.cuh
│       │   ├── clamp.cu
│       │   ├── clamp.cuh
│       │   ├── common.cuh
│       │   ├── concat.cu
│       │   ├── concat.cuh
│       │   ├── conv-transpose-1d.cu
│       │   ├── conv-transpose-1d.cuh
│       │   ├── conv2d-dw.cu
│       │   ├── conv2d-dw.cuh
│       │   ├── conv2d-transpose.cu
│       │   ├── conv2d-transpose.cuh
│       │   ├── conv2d.cu
│       │   ├── conv2d.cuh
│       │   ├── convert.cu
│       │   ├── convert.cuh
│       │   ├── count-equal.cu
│       │   ├── count-equal.cuh
│       │   ├── cp-async.cuh
│       │   ├── cpy-utils.cuh
│       │   ├── cpy.cu
│       │   ├── cpy.cuh
│       │   ├── cross-entropy-loss.cu
│       │   ├── cross-entropy-loss.cuh
│       │   ├── cumsum.cu
│       │   ├── cumsum.cuh
│       │   ├── dequantize.cuh
│       │   ├── diag.cu
│       │   ├── diag.cuh
│       │   ├── diagmask.cu
│       │   ├── diagmask.cuh
│       │   ├── fattn-common.cuh
│       │   ├── fattn-mma-f16.cuh
│       │   ├── fattn-tile.cu
│       │   ├── fattn-tile.cuh
│       │   ├── fattn-vec.cuh
│       │   ├── fattn-wmma-f16.cu
│       │   ├── fattn-wmma-f16.cuh
│       │   ├── fattn.cu
│       │   ├── fattn.cuh
│       │   ├── fill.cu
│       │   ├── fill.cuh
│       │   ├── gated_delta_net.cu
│       │   ├── gated_delta_net.cuh
│       │   ├── getrows.cu
│       │   ├── getrows.cuh
│       │   ├── ggml-cuda.cu
│       │   ├── gla.cu
│       │   ├── gla.cuh
│       │   ├── im2col.cu
│       │   ├── im2col.cuh
│       │   ├── mean.cu
│       │   ├── mean.cuh
│       │   ├── mma.cuh
│       │   ├── mmf.cu
│       │   ├── mmf.cuh
│       │   ├── mmid.cu
│       │   ├── mmid.cuh
│       │   ├── mmq.cu
│       │   ├── mmq.cuh
│       │   ├── mmvf.cu
│       │   ├── mmvf.cuh
│       │   ├── mmvq.cu
│       │   ├── mmvq.cuh
│       │   ├── norm.cu
│       │   ├── norm.cuh
│       │   ├── opt-step-adamw.cu
│       │   ├── opt-step-adamw.cuh
│       │   ├── opt-step-sgd.cu
│       │   ├── opt-step-sgd.cuh
│       │   ├── out-prod.cu
│       │   ├── out-prod.cuh
│       │   ├── pad.cu
│       │   ├── pad.cuh
│       │   ├── pad_reflect_1d.cu
│       │   ├── pad_reflect_1d.cuh
│       │   ├── pool2d.cu
│       │   ├── pool2d.cuh
│       │   ├── quantize.cu
│       │   ├── quantize.cuh
│       │   ├── reduce_rows.cuh
│       │   ├── roll.cu
│       │   ├── roll.cuh
│       │   ├── rope.cu
│       │   ├── rope.cuh
│       │   ├── scale.cu
│       │   ├── scale.cuh
│       │   ├── set-rows.cu
│       │   ├── set-rows.cuh
│       │   ├── set.cu
│       │   ├── set.cuh
│       │   ├── softcap.cu
│       │   ├── softcap.cuh
│       │   ├── softmax.cu
│       │   ├── softmax.cuh
│       │   ├── solve_tri.cu
│       │   ├── solve_tri.cuh
│       │   ├── ssm-conv.cu
│       │   ├── ssm-conv.cuh
│       │   ├── ssm-scan.cu
│       │   ├── ssm-scan.cuh
│       │   ├── sum.cu
│       │   ├── sum.cuh
│       │   ├── sumrows.cu
│       │   ├── sumrows.cuh
│       │   ├── template-instances/
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_32.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_32.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
│       │   │   ├── fattn-tile-instance-dkq112-dv112.cu
│       │   │   ├── fattn-tile-instance-dkq128-dv128.cu
│       │   │   ├── fattn-tile-instance-dkq256-dv256.cu
│       │   │   ├── fattn-tile-instance-dkq40-dv40.cu
│       │   │   ├── fattn-tile-instance-dkq512-dv512.cu
│       │   │   ├── fattn-tile-instance-dkq576-dv512.cu
│       │   │   ├── fattn-tile-instance-dkq64-dv64.cu
│       │   │   ├── fattn-tile-instance-dkq72-dv72.cu
│       │   │   ├── fattn-tile-instance-dkq80-dv80.cu
│       │   │   ├── fattn-tile-instance-dkq96-dv96.cu
│       │   │   ├── fattn-vec-instance-bf16-bf16.cu
│       │   │   ├── fattn-vec-instance-bf16-f16.cu
│       │   │   ├── fattn-vec-instance-bf16-q4_0.cu
│       │   │   ├── fattn-vec-instance-bf16-q4_1.cu
│       │   │   ├── fattn-vec-instance-bf16-q5_0.cu
│       │   │   ├── fattn-vec-instance-bf16-q5_1.cu
│       │   │   ├── fattn-vec-instance-bf16-q8_0.cu
│       │   │   ├── fattn-vec-instance-f16-bf16.cu
│       │   │   ├── fattn-vec-instance-f16-f16.cu
│       │   │   ├── fattn-vec-instance-f16-q4_0.cu
│       │   │   ├── fattn-vec-instance-f16-q4_1.cu
│       │   │   ├── fattn-vec-instance-f16-q5_0.cu
│       │   │   ├── fattn-vec-instance-f16-q5_1.cu
│       │   │   ├── fattn-vec-instance-f16-q8_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q4_0-f16.cu
│       │   │   ├── fattn-vec-instance-q4_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q4_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q4_0-q8_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-bf16.cu
│       │   │   ├── fattn-vec-instance-q4_1-f16.cu
│       │   │   ├── fattn-vec-instance-q4_1-q4_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-q4_1.cu
│       │   │   ├── fattn-vec-instance-q4_1-q5_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-q5_1.cu
│       │   │   ├── fattn-vec-instance-q4_1-q8_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q5_0-f16.cu
│       │   │   ├── fattn-vec-instance-q5_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q5_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q5_0-q8_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-bf16.cu
│       │   │   ├── fattn-vec-instance-q5_1-f16.cu
│       │   │   ├── fattn-vec-instance-q5_1-q4_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-q4_1.cu
│       │   │   ├── fattn-vec-instance-q5_1-q5_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-q5_1.cu
│       │   │   ├── fattn-vec-instance-q5_1-q8_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q8_0-f16.cu
│       │   │   ├── fattn-vec-instance-q8_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q8_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q8_0-q8_0.cu
│       │   │   ├── generate_cu_files.py
│       │   │   ├── mmf-instance-ncols_1.cu
│       │   │   ├── mmf-instance-ncols_10.cu
│       │   │   ├── mmf-instance-ncols_11.cu
│       │   │   ├── mmf-instance-ncols_12.cu
│       │   │   ├── mmf-instance-ncols_13.cu
│       │   │   ├── mmf-instance-ncols_14.cu
│       │   │   ├── mmf-instance-ncols_15.cu
│       │   │   ├── mmf-instance-ncols_16.cu
│       │   │   ├── mmf-instance-ncols_2.cu
│       │   │   ├── mmf-instance-ncols_3.cu
│       │   │   ├── mmf-instance-ncols_4.cu
│       │   │   ├── mmf-instance-ncols_5.cu
│       │   │   ├── mmf-instance-ncols_6.cu
│       │   │   ├── mmf-instance-ncols_7.cu
│       │   │   ├── mmf-instance-ncols_8.cu
│       │   │   ├── mmf-instance-ncols_9.cu
│       │   │   ├── mmq-instance-iq1_s.cu
│       │   │   ├── mmq-instance-iq2_s.cu
│       │   │   ├── mmq-instance-iq2_xs.cu
│       │   │   ├── mmq-instance-iq2_xxs.cu
│       │   │   ├── mmq-instance-iq3_s.cu
│       │   │   ├── mmq-instance-iq3_xxs.cu
│       │   │   ├── mmq-instance-iq4_nl.cu
│       │   │   ├── mmq-instance-iq4_xs.cu
│       │   │   ├── mmq-instance-mxfp4.cu
│       │   │   ├── mmq-instance-nvfp4.cu
│       │   │   ├── mmq-instance-q2_k.cu
│       │   │   ├── mmq-instance-q3_k.cu
│       │   │   ├── mmq-instance-q4_0.cu
│       │   │   ├── mmq-instance-q4_1.cu
│       │   │   ├── mmq-instance-q4_k.cu
│       │   │   ├── mmq-instance-q5_0.cu
│       │   │   ├── mmq-instance-q5_1.cu
│       │   │   ├── mmq-instance-q5_k.cu
│       │   │   ├── mmq-instance-q6_k.cu
│       │   │   └── mmq-instance-q8_0.cu
│       │   ├── top-k.cu
│       │   ├── top-k.cuh
│       │   ├── topk-moe.cu
│       │   ├── topk-moe.cuh
│       │   ├── tri.cu
│       │   ├── tri.cuh
│       │   ├── tsembd.cu
│       │   ├── tsembd.cuh
│       │   ├── unary.cu
│       │   ├── unary.cuh
│       │   ├── upscale.cu
│       │   ├── upscale.cuh
│       │   ├── vecdotq.cuh
│       │   ├── vendors/
│       │   │   ├── cuda.h
│       │   │   ├── hip.h
│       │   │   └── musa.h
│       │   ├── wkv.cu
│       │   └── wkv.cuh
│       ├── ggml-hexagon/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-hexagon.cpp
│       │   ├── htp/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── act-ops.c
│       │   │   ├── argsort-ops.c
│       │   │   ├── binary-ops.c
│       │   │   ├── cmake-toolchain.cmake
│       │   │   ├── cpy-ops.c
│       │   │   ├── cumsum-ops.c
│       │   │   ├── flash-attn-ops.c
│       │   │   ├── get-rows-ops.c
│       │   │   ├── hex-dma.c
│       │   │   ├── hex-dma.h
│       │   │   ├── hex-dump.h
│       │   │   ├── hex-fastdiv.h
│       │   │   ├── hex-utils.h
│       │   │   ├── hmx-matmul-ops.c
│       │   │   ├── hmx-ops.h
│       │   │   ├── hmx-profile.h
│       │   │   ├── hmx-utils.h
│       │   │   ├── htp-ctx.h
│       │   │   ├── htp-msg.h
│       │   │   ├── htp-ops.h
│       │   │   ├── htp_iface.idl
│       │   │   ├── hvx-arith.h
│       │   │   ├── hvx-base.h
│       │   │   ├── hvx-copy.h
│       │   │   ├── hvx-div.h
│       │   │   ├── hvx-dump.h
│       │   │   ├── hvx-exp.h
│       │   │   ├── hvx-floor.h
│       │   │   ├── hvx-inverse.h
│       │   │   ├── hvx-reduce.h
│       │   │   ├── hvx-scale.h
│       │   │   ├── hvx-sigmoid.h
│       │   │   ├── hvx-sqrt.h
│       │   │   ├── hvx-types.h
│       │   │   ├── hvx-utils.h
│       │   │   ├── main.c
│       │   │   ├── matmul-ops.c
│       │   │   ├── repeat-ops.c
│       │   │   ├── rope-ops.c
│       │   │   ├── set-rows-ops.c
│       │   │   ├── softmax-ops.c
│       │   │   ├── ssm-conv.c
│       │   │   ├── sum-rows-ops.c
│       │   │   ├── unary-ops.c
│       │   │   ├── worker-pool.c
│       │   │   └── worker-pool.h
│       │   ├── htp-drv.cpp
│       │   ├── htp-drv.h
│       │   ├── libdl.h
│       │   ├── libggml-htp.inf
│       │   └── op-desc.h
│       ├── ggml-hip/
│       │   └── CMakeLists.txt
│       ├── ggml-impl.h
│       ├── ggml-metal/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-metal-common.cpp
│       │   ├── ggml-metal-common.h
│       │   ├── ggml-metal-context.h
│       │   ├── ggml-metal-context.m
│       │   ├── ggml-metal-device.cpp
│       │   ├── ggml-metal-device.h
│       │   ├── ggml-metal-device.m
│       │   ├── ggml-metal-impl.h
│       │   ├── ggml-metal-ops.cpp
│       │   ├── ggml-metal-ops.h
│       │   ├── ggml-metal.cpp
│       │   └── ggml-metal.metal
│       ├── ggml-musa/
│       │   ├── CMakeLists.txt
│       │   ├── mudnn.cu
│       │   └── mudnn.cuh
│       ├── ggml-opencl/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-opencl.cpp
│       │   └── kernels/
│       │       ├── add.cl
│       │       ├── add_id.cl
│       │       ├── argsort.cl
│       │       ├── clamp.cl
│       │       ├── concat.cl
│       │       ├── conv2d.cl
│       │       ├── conv2d_f16_f32.cl
│       │       ├── cpy.cl
│       │       ├── cumsum.cl
│       │       ├── cvt.cl
│       │       ├── diag.cl
│       │       ├── diag_mask_inf.cl
│       │       ├── div.cl
│       │       ├── embed_kernel.py
│       │       ├── exp.cl
│       │       ├── expm1.cl
│       │       ├── fill.cl
│       │       ├── flash_attn_f16.cl
│       │       ├── flash_attn_f32.cl
│       │       ├── flash_attn_f32_f16.cl
│       │       ├── gelu.cl
│       │       ├── gemm_moe_mxfp4_f32.cl
│       │       ├── gemm_noshuffle_q4_1_f32.cl
│       │       ├── gemm_noshuffle_q4_k_f32.cl
│       │       ├── gemm_noshuffle_q6_k_f32.cl
│       │       ├── gemv_moe_mxfp4_f32.cl
│       │       ├── gemv_noshuffle.cl
│       │       ├── gemv_noshuffle_general.cl
│       │       ├── gemv_noshuffle_general_q8_0_f32.cl
│       │       ├── gemv_noshuffle_q4_1_f32.cl
│       │       ├── gemv_noshuffle_q4_k_f32.cl
│       │       ├── gemv_noshuffle_q6_k_f32.cl
│       │       ├── get_rows.cl
│       │       ├── glu.cl
│       │       ├── group_norm.cl
│       │       ├── im2col_f16.cl
│       │       ├── im2col_f32.cl
│       │       ├── l2_norm.cl
│       │       ├── mean.cl
│       │       ├── mul.cl
│       │       ├── mul_mat_Ab_Bi_8x4.cl
│       │       ├── mul_mat_f16_f32.cl
│       │       ├── mul_mm_f16_f32_kq_kqv.cl
│       │       ├── mul_mm_f16_f32_l4_lm.cl
│       │       ├── mul_mm_f32_f32_l4_lm.cl
│       │       ├── mul_mm_q4_0_f32_l4_lm.cl
│       │       ├── mul_mm_q4_1_f32_l4_lm.cl
│       │       ├── mul_mm_q4_k_f32_l4_lm.cl
│       │       ├── mul_mm_q6_k_f32_l4_lm.cl
│       │       ├── mul_mm_q8_0_f32_8x4.cl
│       │       ├── mul_mm_q8_0_f32_l4_lm.cl
│       │       ├── mul_mv_f16_f16.cl
│       │       ├── mul_mv_f16_f32.cl
│       │       ├── mul_mv_f16_f32_1row.cl
│       │       ├── mul_mv_f16_f32_l4.cl
│       │       ├── mul_mv_f32_f32.cl
│       │       ├── mul_mv_id_mxfp4_f32.cl
│       │       ├── mul_mv_id_mxfp4_f32_flat.cl
│       │       ├── mul_mv_id_q4_0_f32_8x_flat.cl
│       │       ├── mul_mv_id_q8_0_f32.cl
│       │       ├── mul_mv_id_q8_0_f32_flat.cl
│       │       ├── mul_mv_mxfp4_f32.cl
│       │       ├── mul_mv_mxfp4_f32_flat.cl
│       │       ├── mul_mv_q4_0_f32.cl
│       │       ├── mul_mv_q4_0_f32_1d_16x_flat.cl
│       │       ├── mul_mv_q4_0_f32_1d_8x_flat.cl
│       │       ├── mul_mv_q4_0_f32_8x_flat.cl
│       │       ├── mul_mv_q4_0_f32_v.cl
│       │       ├── mul_mv_q4_1_f32.cl
│       │       ├── mul_mv_q4_1_f32_flat.cl
│       │       ├── mul_mv_q4_k_f32.cl
│       │       ├── mul_mv_q4_k_f32_flat.cl
│       │       ├── mul_mv_q6_k_f32.cl
│       │       ├── mul_mv_q6_k_f32_flat.cl
│       │       ├── mul_mv_q8_0_f32.cl
│       │       ├── mul_mv_q8_0_f32_flat.cl
│       │       ├── neg.cl
│       │       ├── norm.cl
│       │       ├── pad.cl
│       │       ├── relu.cl
│       │       ├── repeat.cl
│       │       ├── rms_norm.cl
│       │       ├── rope.cl
│       │       ├── scale.cl
│       │       ├── set_rows.cl
│       │       ├── sigmoid.cl
│       │       ├── silu.cl
│       │       ├── softmax_4_f16.cl
│       │       ├── softmax_4_f32.cl
│       │       ├── softmax_f16.cl
│       │       ├── softmax_f32.cl
│       │       ├── softplus.cl
│       │       ├── solve_tri.cl
│       │       ├── sqr.cl
│       │       ├── sqrt.cl
│       │       ├── ssm_conv.cl
│       │       ├── sub.cl
│       │       ├── sum_rows.cl
│       │       ├── tanh.cl
│       │       ├── transpose.cl
│       │       ├── tri.cl
│       │       ├── tsembd.cl
│       │       └── upscale.cl
│       ├── ggml-openvino/
│       │   ├── .clang-format
│       │   ├── CMakeLists.txt
│       │   ├── ggml-decoder.cpp
│       │   ├── ggml-decoder.h
│       │   ├── ggml-openvino-extra.cpp
│       │   ├── ggml-openvino-extra.h
│       │   ├── ggml-openvino.cpp
│       │   ├── ggml-quants.cpp
│       │   ├── ggml-quants.h
│       │   ├── openvino/
│       │   │   ├── decoder.h
│       │   │   ├── frontend.cpp
│       │   │   ├── frontend.h
│       │   │   ├── input_model.cpp
│       │   │   ├── input_model.h
│       │   │   ├── node_context.h
│       │   │   ├── op/
│       │   │   │   ├── cont.cpp
│       │   │   │   ├── cpy.cpp
│       │   │   │   ├── flash_attn_ext.cpp
│       │   │   │   ├── get_rows.cpp
│       │   │   │   ├── glu_geglu.cpp
│       │   │   │   ├── glu_swiglu.cpp
│       │   │   │   ├── mulmat.cpp
│       │   │   │   ├── permute.cpp
│       │   │   │   ├── reshape.cpp
│       │   │   │   ├── rms_norm.cpp
│       │   │   │   ├── rope.cpp
│       │   │   │   ├── scale.cpp
│       │   │   │   ├── set_rows.cpp
│       │   │   │   ├── softmax.cpp
│       │   │   │   ├── transpose.cpp
│       │   │   │   ├── unary_silu.cpp
│       │   │   │   └── view.cpp
│       │   │   ├── op_table.cpp
│       │   │   ├── op_table.h
│       │   │   ├── pass/
│       │   │   │   ├── eliminate_zp.cpp
│       │   │   │   ├── eliminate_zp.h
│       │   │   │   ├── fuse_to_sdpa.cpp
│       │   │   │   ├── fuse_to_sdpa.h
│       │   │   │   ├── mark_decompression_convert_constant_folding.h
│       │   │   │   ├── squeeze_matmul.cpp
│       │   │   │   └── squeeze_matmul.h
│       │   │   ├── translate_session.cpp
│       │   │   ├── translate_session.h
│       │   │   ├── utils.cpp
│       │   │   └── utils.h
│       │   ├── utils.cpp
│       │   └── utils.h
│       ├── ggml-opt.cpp
│       ├── ggml-quants.c
│       ├── ggml-quants.h
│       ├── ggml-rpc/
│       │   ├── CMakeLists.txt
│       │   └── ggml-rpc.cpp
│       ├── ggml-sycl/
│       │   ├── CMakeLists.txt
│       │   ├── add-id.cpp
│       │   ├── add-id.hpp
│       │   ├── backend.hpp
│       │   ├── binbcast.cpp
│       │   ├── binbcast.hpp
│       │   ├── common.cpp
│       │   ├── common.hpp
│       │   ├── concat.cpp
│       │   ├── concat.hpp
│       │   ├── conv.cpp
│       │   ├── conv.hpp
│       │   ├── convert.cpp
│       │   ├── convert.hpp
│       │   ├── count-equal.cpp
│       │   ├── count-equal.hpp
│       │   ├── cpy.cpp
│       │   ├── cpy.hpp
│       │   ├── dequantize.hpp
│       │   ├── dmmv.cpp
│       │   ├── dmmv.hpp
│       │   ├── dpct/
│       │   │   └── helper.hpp
│       │   ├── element_wise.cpp
│       │   ├── element_wise.hpp
│       │   ├── fattn-common.hpp
│       │   ├── fattn-tile.cpp
│       │   ├── fattn-tile.hpp
│       │   ├── fattn-vec.hpp
│       │   ├── fattn.cpp
│       │   ├── fattn.hpp
│       │   ├── gated_delta_net.cpp
│       │   ├── gated_delta_net.hpp
│       │   ├── gemm.hpp
│       │   ├── getrows.cpp
│       │   ├── getrows.hpp
│       │   ├── ggml-sycl.cpp
│       │   ├── gla.cpp
│       │   ├── gla.hpp
│       │   ├── im2col.cpp
│       │   ├── im2col.hpp
│       │   ├── mmq.cpp
│       │   ├── mmq.hpp
│       │   ├── mmvq.cpp
│       │   ├── mmvq.hpp
│       │   ├── norm.cpp
│       │   ├── norm.hpp
│       │   ├── outprod.cpp
│       │   ├── outprod.hpp
│       │   ├── pad.cpp
│       │   ├── pad.hpp
│       │   ├── pad_reflect_1d.cpp
│       │   ├── pad_reflect_1d.hpp
│       │   ├── presets.hpp
│       │   ├── quantize.hpp
│       │   ├── quants.hpp
│       │   ├── repeat_back.cpp
│       │   ├── repeat_back.hpp
│       │   ├── roll.cpp
│       │   ├── roll.hpp
│       │   ├── rope.cpp
│       │   ├── rope.hpp
│       │   ├── set.cpp
│       │   ├── set.hpp
│       │   ├── set_rows.cpp
│       │   ├── set_rows.hpp
│       │   ├── softmax.cpp
│       │   ├── softmax.hpp
│       │   ├── ssm_conv.cpp
│       │   ├── ssm_conv.hpp
│       │   ├── sycl_hw.cpp
│       │   ├── sycl_hw.hpp
│       │   ├── template-instances/
│       │   │   ├── fattn-tile-instance-dkq112-dv112.cpp
│       │   │   ├── fattn-tile-instance-dkq128-dv128.cpp
│       │   │   ├── fattn-tile-instance-dkq256-dv256.cpp
│       │   │   ├── fattn-tile-instance-dkq40-dv40.cpp
│       │   │   ├── fattn-tile-instance-dkq576-dv512.cpp
│       │   │   ├── fattn-tile-instance-dkq64-dv64.cpp
│       │   │   ├── fattn-tile-instance-dkq72-dv72.cpp
│       │   │   ├── fattn-tile-instance-dkq80-dv80.cpp
│       │   │   ├── fattn-tile-instance-dkq96-dv96.cpp
│       │   │   ├── fattn-vec-instance-f16-f16.cpp
│       │   │   ├── fattn-vec-instance-f16-q4_0.cpp
│       │   │   ├── fattn-vec-instance-f16-q4_1.cpp
│       │   │   ├── fattn-vec-instance-f16-q5_0.cpp
│       │   │   ├── fattn-vec-instance-f16-q5_1.cpp
│       │   │   ├── fattn-vec-instance-f16-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-f16.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-f16.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q5_1.cpp
│       │   │   └── fattn-vec-instance-q8_0-q8_0.cpp
│       │   ├── tsembd.cpp
│       │   ├── tsembd.hpp
│       │   ├── type.hpp
│       │   ├── upscale.cpp
│       │   ├── upscale.hpp
│       │   ├── vecdotq.hpp
│       │   ├── wkv.cpp
│       │   └── wkv.hpp
│       ├── ggml-threading.cpp
│       ├── ggml-threading.h
│       ├── ggml-virtgpu/
│       │   ├── CMakeLists.txt
│       │   ├── apir_cs_ggml-rpc-front.cpp
│       │   ├── backend/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── apir_cs_ggml-rpc-back.cpp
│       │   │   ├── backend-convert.h
│       │   │   ├── backend-dispatched-backend.cpp
│       │   │   ├── backend-dispatched-buffer-type.cpp
│       │   │   ├── backend-dispatched-buffer.cpp
│       │   │   ├── backend-dispatched-device.cpp
│       │   │   ├── backend-dispatched.cpp
│       │   │   ├── backend-dispatched.gen.h
│       │   │   ├── backend-dispatched.h
│       │   │   ├── backend-virgl-apir.h
│       │   │   ├── backend.cpp
│       │   │   └── shared/
│       │   │       ├── api_remoting.h
│       │   │       ├── apir_backend.gen.h
│       │   │       ├── apir_backend.h
│       │   │       ├── apir_cs.h
│       │   │       ├── apir_cs_ggml.h
│       │   │       └── apir_cs_rpc.h
│       │   ├── ggml-backend-buffer-type.cpp
│       │   ├── ggml-backend-buffer.cpp
│       │   ├── ggml-backend-device.cpp
│       │   ├── ggml-backend-reg.cpp
│       │   ├── ggml-backend.cpp
│       │   ├── ggml-remoting.h
│       │   ├── ggmlremoting_functions.yaml
│       │   ├── include/
│       │   │   └── apir_hw.h
│       │   ├── regenerate_remoting.py
│       │   ├── virtgpu-apir.h
│       │   ├── virtgpu-forward-backend.cpp
│       │   ├── virtgpu-forward-buffer-type.cpp
│       │   ├── virtgpu-forward-buffer.cpp
│       │   ├── virtgpu-forward-device.cpp
│       │   ├── virtgpu-forward-impl.h
│       │   ├── virtgpu-forward.gen.h
│       │   ├── virtgpu-shm.cpp
│       │   ├── virtgpu-shm.h
│       │   ├── virtgpu-utils.cpp
│       │   ├── virtgpu-utils.h
│       │   ├── virtgpu.cpp
│       │   └── virtgpu.h
│       ├── ggml-vulkan/
│       │   ├── CMakeLists.txt
│       │   ├── cmake/
│       │   │   └── host-toolchain.cmake.in
│       │   ├── ggml-vulkan.cpp
│       │   └── vulkan-shaders/
│       │       ├── CMakeLists.txt
│       │       ├── abs.comp
│       │       ├── acc.comp
│       │       ├── add.comp
│       │       ├── add1.comp
│       │       ├── add_id.comp
│       │       ├── arange.comp
│       │       ├── argmax.comp
│       │       ├── argsort.comp
│       │       ├── argsort_large.comp
│       │       ├── ceil.comp
│       │       ├── clamp.comp
│       │       ├── concat.comp
│       │       ├── contig_copy.comp
│       │       ├── conv2d_dw.comp
│       │       ├── conv2d_mm.comp
│       │       ├── conv_transpose_1d.comp
│       │       ├── copy.comp
│       │       ├── copy_from_quant.comp
│       │       ├── copy_to_quant.comp
│       │       ├── copy_transpose.comp
│       │       ├── cos.comp
│       │       ├── count_equal.comp
│       │       ├── count_experts.comp
│       │       ├── cumsum.comp
│       │       ├── cumsum_multipass1.comp
│       │       ├── cumsum_multipass2.comp
│       │       ├── dequant_f32.comp
│       │       ├── dequant_funcs.glsl
│       │       ├── dequant_funcs_cm2.glsl
│       │       ├── dequant_head.glsl
│       │       ├── dequant_iq1_m.comp
│       │       ├── dequant_iq1_s.comp
│       │       ├── dequant_iq2_s.comp
│       │       ├── dequant_iq2_xs.comp
│       │       ├── dequant_iq2_xxs.comp
│       │       ├── dequant_iq3_s.comp
│       │       ├── dequant_iq3_xxs.comp
│       │       ├── dequant_iq4_nl.comp
│       │       ├── dequant_iq4_xs.comp
│       │       ├── dequant_mxfp4.comp
│       │       ├── dequant_q2_k.comp
│       │       ├── dequant_q3_k.comp
│       │       ├── dequant_q4_0.comp
│       │       ├── dequant_q4_1.comp
│       │       ├── dequant_q4_k.comp
│       │       ├── dequant_q5_0.comp
│       │       ├── dequant_q5_1.comp
│       │       ├── dequant_q5_k.comp
│       │       ├── dequant_q6_k.comp
│       │       ├── dequant_q8_0.comp
│       │       ├── diag.comp
│       │       ├── diag_mask_inf.comp
│       │       ├── div.comp
│       │       ├── elu.comp
│       │       ├── exp.comp
│       │       ├── feature-tests/
│       │       │   ├── bfloat16.comp
│       │       │   ├── coopmat.comp
│       │       │   ├── coopmat2.comp
│       │       │   └── integer_dot.comp
│       │       ├── fill.comp
│       │       ├── flash_attn.comp
│       │       ├── flash_attn_base.glsl
│       │       ├── flash_attn_cm1.comp
│       │       ├── flash_attn_cm2.comp
│       │       ├── flash_attn_mask_opt.comp
│       │       ├── flash_attn_split_k_reduce.comp
│       │       ├── floor.comp
│       │       ├── gated_delta_net.comp
│       │       ├── geglu.comp
│       │       ├── geglu_erf.comp
│       │       ├── geglu_quick.comp
│       │       ├── gelu.comp
│       │       ├── gelu_erf.comp
│       │       ├── gelu_quick.comp
│       │       ├── generic_binary_head.glsl
│       │       ├── generic_head.glsl
│       │       ├── generic_unary_head.glsl
│       │       ├── get_rows.comp
│       │       ├── get_rows_quant.comp
│       │       ├── glu_head.glsl
│       │       ├── glu_main.glsl
│       │       ├── group_norm.comp
│       │       ├── hardsigmoid.comp
│       │       ├── hardswish.comp
│       │       ├── im2col.comp
│       │       ├── im2col_3d.comp
│       │       ├── l2_norm.comp
│       │       ├── leaky_relu.comp
│       │       ├── log.comp
│       │       ├── mul.comp
│       │       ├── mul_mat_split_k_reduce.comp
│       │       ├── mul_mat_vec.comp
│       │       ├── mul_mat_vec_base.glsl
│       │       ├── mul_mat_vec_iface.glsl
│       │       ├── mul_mat_vec_iq1_m.comp
│       │       ├── mul_mat_vec_iq1_s.comp
│       │       ├── mul_mat_vec_iq2_s.comp
│       │       ├── mul_mat_vec_iq2_xs.comp
│       │       ├── mul_mat_vec_iq2_xxs.comp
│       │       ├── mul_mat_vec_iq3_s.comp
│       │       ├── mul_mat_vec_iq3_xxs.comp
│       │       ├── mul_mat_vec_nc.comp
│       │       ├── mul_mat_vec_p021.comp
│       │       ├── mul_mat_vec_q2_k.comp
│       │       ├── mul_mat_vec_q3_k.comp
│       │       ├── mul_mat_vec_q4_k.comp
│       │       ├── mul_mat_vec_q5_k.comp
│       │       ├── mul_mat_vec_q6_k.comp
│       │       ├── mul_mat_vecq.comp
│       │       ├── mul_mat_vecq_funcs.glsl
│       │       ├── mul_mm.comp
│       │       ├── mul_mm_cm2.comp
│       │       ├── mul_mm_funcs.glsl
│       │       ├── mul_mm_id_funcs.glsl
│       │       ├── mul_mmq.comp
│       │       ├── mul_mmq_funcs.glsl
│       │       ├── mul_mmq_shmem_types.glsl
│       │       ├── multi_add.comp
│       │       ├── neg.comp
│       │       ├── norm.comp
│       │       ├── opt_step_adamw.comp
│       │       ├── opt_step_sgd.comp
│       │       ├── pad.comp
│       │       ├── pool2d.comp
│       │       ├── quantize_q8_1.comp
│       │       ├── reglu.comp
│       │       ├── relu.comp
│       │       ├── repeat.comp
│       │       ├── repeat_back.comp
│       │       ├── rms_norm.comp
│       │       ├── rms_norm_back.comp
│       │       ├── rms_norm_partials.comp
│       │       ├── roll.comp
│       │       ├── rope_funcs.glsl
│       │       ├── rope_head.glsl
│       │       ├── rope_multi.comp
│       │       ├── rope_neox.comp
│       │       ├── rope_norm.comp
│       │       ├── rope_params.glsl
│       │       ├── rope_vision.comp
│       │       ├── round.comp
│       │       ├── rte.glsl
│       │       ├── scale.comp
│       │       ├── sgn.comp
│       │       ├── sigmoid.comp
│       │       ├── silu.comp
│       │       ├── silu_back.comp
│       │       ├── sin.comp
│       │       ├── soft_max.comp
│       │       ├── soft_max_back.comp
│       │       ├── soft_max_large1.comp
│       │       ├── soft_max_large2.comp
│       │       ├── soft_max_large3.comp
│       │       ├── soft_max_large_common.glsl
│       │       ├── softplus.comp
│       │       ├── solve_tri.comp
│       │       ├── sqrt.comp
│       │       ├── square.comp
│       │       ├── ssm_conv.comp
│       │       ├── ssm_scan.comp
│       │       ├── step.comp
│       │       ├── sub.comp
│       │       ├── sum_rows.comp
│       │       ├── sum_rows.glsl
│       │       ├── swiglu.comp
│       │       ├── swiglu_oai.comp
│       │       ├── tanh.comp
│       │       ├── timestep_embedding.comp
│       │       ├── topk_argsort.comp
│       │       ├── topk_moe.comp
│       │       ├── topk_nary_search.comp
│       │       ├── tri.comp
│       │       ├── trunc.comp
│       │       ├── types.glsl
│       │       ├── upscale.comp
│       │       ├── utils.glsl
│       │       ├── vulkan-shaders-gen.cpp
│       │       ├── wkv6.comp
│       │       ├── wkv7.comp
│       │       └── xielu.comp
│       ├── ggml-webgpu/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-webgpu-shader-lib.hpp
│       │   ├── ggml-webgpu.cpp
│       │   ├── pre_wgsl.hpp
│       │   └── wgsl-shaders/
│       │       ├── argmax.wgsl
│       │       ├── argsort.wgsl
│       │       ├── argsort_merge.wgsl
│       │       ├── binary.wgsl
│       │       ├── common_decls.tmpl
│       │       ├── concat.wgsl
│       │       ├── cpy.wgsl
│       │       ├── cumsum.wgsl
│       │       ├── embed_wgsl.py
│       │       ├── flash_attn.wgsl
│       │       ├── gated_delta_net.wgsl
│       │       ├── get_rows.wgsl
│       │       ├── glu.wgsl
│       │       ├── memset.wgsl
│       │       ├── mul_mat.wgsl
│       │       ├── mul_mat_decls.tmpl
│       │       ├── mul_mat_reg_tile.wgsl
│       │       ├── mul_mat_subgroup_matrix.wgsl
│       │       ├── mul_mat_vec.wgsl
│       │       ├── pad.wgsl
│       │       ├── repeat.wgsl
│       │       ├── rope.wgsl
│       │       ├── row_norm.wgsl
│       │       ├── scale.wgsl
│       │       ├── set.wgsl
│       │       ├── set_rows.wgsl
│       │       ├── soft_max.wgsl
│       │       ├── solve_tri.wgsl
│       │       ├── ssm_conv.wgsl
│       │       ├── sum_rows.wgsl
│       │       └── unary.wgsl
│       ├── ggml-zdnn/
│       │   ├── .gitignore
│       │   ├── CMakeLists.txt
│       │   ├── common.hpp
│       │   ├── ggml-zdnn.cpp
│       │   ├── mmf.cpp
│       │   ├── mmf.hpp
│       │   ├── utils.cpp
│       │   └── utils.hpp
│       ├── ggml-zendnn/
│       │   ├── CMakeLists.txt
│       │   └── ggml-zendnn.cpp
│       ├── ggml.c
│       ├── ggml.cpp
│       └── gguf.cpp
├── gguf-py/
│   ├── LICENSE
│   ├── README.md
│   ├── examples/
│   │   ├── reader.py
│   │   └── writer.py
│   ├── gguf/
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── gguf.py
│   │   ├── gguf_reader.py
│   │   ├── gguf_writer.py
│   │   ├── lazy.py
│   │   ├── metadata.py
│   │   ├── py.typed
│   │   ├── quants.py
│   │   ├── scripts/
│   │   │   ├── gguf_convert_endian.py
│   │   │   ├── gguf_dump.py
│   │   │   ├── gguf_editor_gui.py
│   │   │   ├── gguf_hash.py
│   │   │   ├── gguf_new_metadata.py
│   │   │   └── gguf_set_metadata.py
│   │   ├── tensor_mapping.py
│   │   ├── utility.py
│   │   └── vocab.py
│   ├── pyproject.toml
│   └── tests/
│       ├── __init__.py
│       ├── test_metadata.py
│       └── test_quants.py
├── grammars/
│   ├── README.md
│   ├── arithmetic.gbnf
│   ├── c.gbnf
│   ├── chess.gbnf
│   ├── english.gbnf
│   ├── japanese.gbnf
│   ├── json.gbnf
│   ├── json_arr.gbnf
│   └── list.gbnf
├── include/
│   ├── llama-cpp.h
│   └── llama.h
├── licenses/
│   └── LICENSE-jsonhpp
├── models/
│   ├── .editorconfig
│   ├── ggml-vocab-aquila.gguf
│   ├── ggml-vocab-baichuan.gguf
│   ├── ggml-vocab-bert-bge.gguf
│   ├── ggml-vocab-bert-bge.gguf.inp
│   ├── ggml-vocab-bert-bge.gguf.out
│   ├── ggml-vocab-command-r.gguf
│   ├── ggml-vocab-command-r.gguf.inp
│   ├── ggml-vocab-command-r.gguf.out
│   ├── ggml-vocab-deepseek-coder.gguf
│   ├── ggml-vocab-deepseek-coder.gguf.inp
│   ├── ggml-vocab-deepseek-coder.gguf.out
│   ├── ggml-vocab-deepseek-llm.gguf
│   ├── ggml-vocab-deepseek-llm.gguf.inp
│   ├── ggml-vocab-deepseek-llm.gguf.out
│   ├── ggml-vocab-falcon.gguf
│   ├── ggml-vocab-falcon.gguf.inp
│   ├── ggml-vocab-falcon.gguf.out
│   ├── ggml-vocab-gpt-2.gguf
│   ├── ggml-vocab-gpt-2.gguf.inp
│   ├── ggml-vocab-gpt-2.gguf.out
│   ├── ggml-vocab-gpt-neox.gguf
│   ├── ggml-vocab-llama-bpe.gguf
│   ├── ggml-vocab-llama-bpe.gguf.inp
│   ├── ggml-vocab-llama-bpe.gguf.out
│   ├── ggml-vocab-llama-spm.gguf
│   ├── ggml-vocab-llama-spm.gguf.inp
│   ├── ggml-vocab-llama-spm.gguf.out
│   ├── ggml-vocab-mpt.gguf
│   ├── ggml-vocab-mpt.gguf.inp
│   ├── ggml-vocab-mpt.gguf.out
│   ├── ggml-vocab-nomic-bert-moe.gguf
│   ├── ggml-vocab-phi-3.gguf
│   ├── ggml-vocab-phi-3.gguf.inp
│   ├── ggml-vocab-phi-3.gguf.out
│   ├── ggml-vocab-qwen2.gguf
│   ├── ggml-vocab-qwen2.gguf.inp
│   ├── ggml-vocab-qwen2.gguf.out
│   ├── ggml-vocab-refact.gguf
│   ├── ggml-vocab-refact.gguf.inp
│   ├── ggml-vocab-refact.gguf.out
│   ├── ggml-vocab-starcoder.gguf
│   ├── ggml-vocab-starcoder.gguf.inp
│   ├── ggml-vocab-starcoder.gguf.out
│   └── templates/
│       ├── Apertus-8B-Instruct.jinja
│       ├── Apriel-1.6-15b-Thinker-fixed.jinja
│       ├── Bielik-11B-v3.0-Instruct.jinja
│       ├── ByteDance-Seed-OSS.jinja
│       ├── CohereForAI-c4ai-command-r-plus-tool_use.jinja
│       ├── CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
│       ├── GLM-4.6.jinja
│       ├── GLM-4.7-Flash.jinja
│       ├── GigaChat3-10B-A1.8B.jinja
│       ├── GigaChat3.1-10B-A1.8B.jinja
│       ├── HuggingFaceTB-SmolLM3-3B.jinja
│       ├── Kimi-K2-Instruct.jinja
│       ├── Kimi-K2-Thinking.jinja
│       ├── LFM2-8B-A1B.jinja
│       ├── LFM2.5-Instruct.jinja
│       ├── MiMo-VL.jinja
│       ├── MiniMax-M2.jinja
│       ├── Mistral-Small-3.2-24B-Instruct-2506.jinja
│       ├── NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja
│       ├── NVIDIA-Nemotron-Nano-v2.jinja
│       ├── NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
│       ├── NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
│       ├── Qwen-QwQ-32B.jinja
│       ├── Qwen-Qwen2.5-7B-Instruct.jinja
│       ├── Qwen-Qwen3-0.6B.jinja
│       ├── Qwen3-Coder.jinja
│       ├── Qwen3.5-4B.jinja
│       ├── README.md
│       ├── StepFun3.5-Flash.jinja
│       ├── deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
│       ├── deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
│       ├── deepseek-ai-DeepSeek-V3.1.jinja
│       ├── fireworks-ai-llama-3-firefunction-v2.jinja
│       ├── google-gemma-2-2b-it.jinja
│       ├── ibm-granite-granite-3.3-2B-Instruct.jinja
│       ├── llama-cpp-deepseek-r1.jinja
│       ├── llama-cpp-rwkv-world.jinja
│       ├── meetkai-functionary-medium-v3.1.jinja
│       ├── meetkai-functionary-medium-v3.2.jinja
│       ├── meta-llama-Llama-3.1-8B-Instruct.jinja
│       ├── meta-llama-Llama-3.2-3B-Instruct.jinja
│       ├── meta-llama-Llama-3.3-70B-Instruct.jinja
│       ├── microsoft-Phi-3.5-mini-instruct.jinja
│       ├── mistralai-Ministral-3-14B-Reasoning-2512.jinja
│       ├── mistralai-Mistral-Nemo-Instruct-2407.jinja
│       ├── moonshotai-Kimi-K2.jinja
│       ├── openai-gpt-oss-120b.jinja
│       ├── stepfun-ai-Step-3.5-Flash.jinja
│       ├── unsloth-Apriel-1.5.jinja
│       ├── unsloth-mistral-Devstral-Small-2507.jinja
│       └── upstage-Solar-Open-100B.jinja
├── mypy.ini
├── pocs/
│   ├── CMakeLists.txt
│   └── vdot/
│       ├── CMakeLists.txt
│       ├── q8dot.cpp
│       └── vdot.cpp
├── pyproject.toml
├── pyrightconfig.json
├── requirements/
│   ├── requirements-all.txt
│   ├── requirements-compare-llama-bench.txt
│   ├── requirements-convert_hf_to_gguf.txt
│   ├── requirements-convert_hf_to_gguf_update.txt
│   ├── requirements-convert_legacy_llama.txt
│   ├── requirements-convert_llama_ggml_to_gguf.txt
│   ├── requirements-convert_lora_to_gguf.txt
│   ├── requirements-gguf_editor_gui.txt
│   ├── requirements-pydantic.txt
│   ├── requirements-server-bench.txt
│   ├── requirements-test-tokenizer-random.txt
│   └── requirements-tool_bench.txt
├── requirements.txt
├── scripts/
│   ├── apple/
│   │   ├── validate-apps.sh
│   │   ├── validate-ios.sh
│   │   ├── validate-macos.sh
│   │   ├── validate-tvos.sh
│   │   └── validate-visionos.sh
│   ├── bench-models.sh
│   ├── build-info.sh
│   ├── check-requirements.sh
│   ├── compare-commits.sh
│   ├── compare-llama-bench.py
│   ├── compare-logprobs.py
│   ├── create_ops_docs.py
│   ├── debug-test.sh
│   ├── fetch_server_test_models.py
│   ├── gen-authors.sh
│   ├── gen-unicode-data.py
│   ├── get-flags.mk
│   ├── get-hellaswag.sh
│   ├── get-pg.sh
│   ├── get-wikitext-2.sh
│   ├── get-winogrande.sh
│   ├── get_chat_template.py
│   ├── git-bisect-run.sh
│   ├── git-bisect.sh
│   ├── hf.sh
│   ├── hip/
│   │   └── gcn-cdna-vgpr-check.py
│   ├── install-oneapi.bat
│   ├── jinja/
│   │   ├── jinja-tester.py
│   │   └── requirements.txt
│   ├── pr2wt.sh
│   ├── serve-static.js
│   ├── server-bench.py
│   ├── server-test-function-call.py
│   ├── server-test-model.py
│   ├── snapdragon/
│   │   ├── adb/
│   │   │   ├── llama-cli.farf
│   │   │   ├── run-bench.sh
│   │   │   ├── run-cli.sh
│   │   │   ├── run-completion.sh
│   │   │   ├── run-mtmd.sh
│   │   │   └── run-tool.sh
│   │   ├── qdc/
│   │   │   ├── readme.md
│   │   │   ├── requirements.txt
│   │   │   └── tests/
│   │   │       └── test_bench.py
│   │   └── windows/
│   │       ├── run-bench.ps1
│   │       ├── run-cli.ps1
│   │       ├── run-completion.ps1
│   │       ├── run-mtmd.ps1
│   │       ├── run-tool.ps1
│   │       └── setup-build.ps1
│   ├── sync-ggml-am.sh
│   ├── sync-ggml.last
│   ├── sync-ggml.sh
│   ├── sync_vendor.py
│   ├── tool_bench.py
│   ├── tool_bench.sh
│   ├── verify-checksum-models.py
│   └── xxd.cmake
├── src/
│   ├── CMakeLists.txt
│   ├── llama-adapter.cpp
│   ├── llama-adapter.h
│   ├── llama-arch.cpp
│   ├── llama-arch.h
│   ├── llama-batch.cpp
│   ├── llama-batch.h
│   ├── llama-chat.cpp
│   ├── llama-chat.h
│   ├── llama-context.cpp
│   ├── llama-context.h
│   ├── llama-cparams.cpp
│   ├── llama-cparams.h
│   ├── llama-ext.h
│   ├── llama-grammar.cpp
│   ├── llama-grammar.h
│   ├── llama-graph.cpp
│   ├── llama-graph.h
│   ├── llama-hparams.cpp
│   ├── llama-hparams.h
│   ├── llama-impl.cpp
│   ├── llama-impl.h
│   ├── llama-io.cpp
│   ├── llama-io.h
│   ├── llama-kv-cache-iswa.cpp
│   ├── llama-kv-cache-iswa.h
│   ├── llama-kv-cache.cpp
│   ├── llama-kv-cache.h
│   ├── llama-kv-cells.h
│   ├── llama-memory-hybrid-iswa.cpp
│   ├── llama-memory-hybrid-iswa.h
│   ├── llama-memory-hybrid.cpp
│   ├── llama-memory-hybrid.h
│   ├── llama-memory-recurrent.cpp
│   ├── llama-memory-recurrent.h
│   ├── llama-memory.cpp
│   ├── llama-memory.h
│   ├── llama-mmap.cpp
│   ├── llama-mmap.h
│   ├── llama-model-loader.cpp
│   ├── llama-model-loader.h
│   ├── llama-model-saver.cpp
│   ├── llama-model-saver.h
│   ├── llama-model.cpp
│   ├── llama-model.h
│   ├── llama-quant.cpp
│   ├── llama-quant.h
│   ├── llama-sampler.cpp
│   ├── llama-sampler.h
│   ├── llama-vocab.cpp
│   ├── llama-vocab.h
│   ├── llama.cpp
│   ├── models/
│   │   ├── afmoe.cpp
│   │   ├── apertus.cpp
│   │   ├── arcee.cpp
│   │   ├── arctic.cpp
│   │   ├── arwkv7.cpp
│   │   ├── baichuan.cpp
│   │   ├── bailingmoe.cpp
│   │   ├── bailingmoe2.cpp
│   │   ├── bert.cpp
│   │   ├── bitnet.cpp
│   │   ├── bloom.cpp
│   │   ├── chameleon.cpp
│   │   ├── chatglm.cpp
│   │   ├── codeshell.cpp
│   │   ├── cogvlm.cpp
│   │   ├── cohere2-iswa.cpp
│   │   ├── command-r.cpp
│   │   ├── dbrx.cpp
│   │   ├── deci.cpp
│   │   ├── deepseek.cpp
│   │   ├── deepseek2.cpp
│   │   ├── delta-net-base.cpp
│   │   ├── dots1.cpp
│   │   ├── dream.cpp
│   │   ├── ernie4-5-moe.cpp
│   │   ├── ernie4-5.cpp
│   │   ├── eurobert.cpp
│   │   ├── exaone-moe.cpp
│   │   ├── exaone.cpp
│   │   ├── exaone4.cpp
│   │   ├── falcon-h1.cpp
│   │   ├── falcon.cpp
│   │   ├── gemma-embedding.cpp
│   │   ├── gemma.cpp
│   │   ├── gemma2-iswa.cpp
│   │   ├── gemma3.cpp
│   │   ├── gemma3n-iswa.cpp
│   │   ├── glm4-moe.cpp
│   │   ├── glm4.cpp
│   │   ├── gpt2.cpp
│   │   ├── gptneox.cpp
│   │   ├── granite-hybrid.cpp
│   │   ├── granite.cpp
│   │   ├── grok.cpp
│   │   ├── grovemoe.cpp
│   │   ├── hunyuan-dense.cpp
│   │   ├── hunyuan-moe.cpp
│   │   ├── internlm2.cpp
│   │   ├── jais.cpp
│   │   ├── jais2.cpp
│   │   ├── jamba.cpp
│   │   ├── kimi-linear.cpp
│   │   ├── lfm2.cpp
│   │   ├── llada-moe.cpp
│   │   ├── llada.cpp
│   │   ├── llama-iswa.cpp
│   │   ├── llama.cpp
│   │   ├── maincoder.cpp
│   │   ├── mamba-base.cpp
│   │   ├── mamba.cpp
│   │   ├── mimo2-iswa.cpp
│   │   ├── minicpm3.cpp
│   │   ├── minimax-m2.cpp
│   │   ├── mistral3.cpp
│   │   ├── models.h
│   │   ├── modern-bert.cpp
│   │   ├── mpt.cpp
│   │   ├── nemotron-h.cpp
│   │   ├── nemotron.cpp
│   │   ├── neo-bert.cpp
│   │   ├── olmo.cpp
│   │   ├── olmo2.cpp
│   │   ├── olmoe.cpp
│   │   ├── openai-moe-iswa.cpp
│   │   ├── openelm.cpp
│   │   ├── orion.cpp
│   │   ├── paddleocr.cpp
│   │   ├── pangu-embedded.cpp
│   │   ├── phi2.cpp
│   │   ├── phi3.cpp
│   │   ├── plamo.cpp
│   │   ├── plamo2.cpp
│   │   ├── plamo3.cpp
│   │   ├── plm.cpp
│   │   ├── qwen.cpp
│   │   ├── qwen2.cpp
│   │   ├── qwen2moe.cpp
│   │   ├── qwen2vl.cpp
│   │   ├── qwen3.cpp
│   │   ├── qwen35.cpp
│   │   ├── qwen35moe.cpp
│   │   ├── qwen3moe.cpp
│   │   ├── qwen3next.cpp
│   │   ├── qwen3vl-moe.cpp
│   │   ├── qwen3vl.cpp
│   │   ├── refact.cpp
│   │   ├── rnd1.cpp
│   │   ├── rwkv6-base.cpp
│   │   ├── rwkv6.cpp
│   │   ├── rwkv6qwen2.cpp
│   │   ├── rwkv7-base.cpp
│   │   ├── rwkv7.cpp
│   │   ├── seed-oss.cpp
│   │   ├── smallthinker.cpp
│   │   ├── smollm3.cpp
│   │   ├── stablelm.cpp
│   │   ├── starcoder.cpp
│   │   ├── starcoder2.cpp
│   │   ├── step35-iswa.cpp
│   │   ├── t5-dec.cpp
│   │   ├── t5-enc.cpp
│   │   ├── wavtokenizer-dec.cpp
│   │   └── xverse.cpp
│   ├── unicode-data.cpp
│   ├── unicode-data.h
│   ├── unicode.cpp
│   └── unicode.h
├── tests/
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── export-graph-ops.cpp
│   ├── get-model.cpp
│   ├── get-model.h
│   ├── gguf-model-data.cpp
│   ├── gguf-model-data.h
│   ├── peg-parser/
│   │   ├── simple-tokenize.cpp
│   │   ├── simple-tokenize.h
│   │   ├── test-basic.cpp
│   │   ├── test-gbnf-generation.cpp
│   │   ├── test-json-parser.cpp
│   │   ├── test-json-serialization.cpp
│   │   ├── test-python-dict-parser.cpp
│   │   ├── test-unicode.cpp
│   │   └── tests.h
│   ├── run-json-schema-to-grammar.mjs
│   ├── test-alloc.cpp
│   ├── test-arg-parser.cpp
│   ├── test-autorelease.cpp
│   ├── test-backend-ops.cpp
│   ├── test-backend-sampler.cpp
│   ├── test-barrier.cpp
│   ├── test-c.c
│   ├── test-chat-auto-parser.cpp
│   ├── test-chat-peg-parser.cpp
│   ├── test-chat-template.cpp
│   ├── test-chat.cpp
│   ├── test-double-float.cpp
│   ├── test-gbnf-validator.cpp
│   ├── test-gguf-model-data.cpp
│   ├── test-gguf.cpp
│   ├── test-grammar-integration.cpp
│   ├── test-grammar-llguidance.cpp
│   ├── test-grammar-parser.cpp
│   ├── test-jinja.cpp
│   ├── test-json-partial.cpp
│   ├── test-json-schema-to-grammar.cpp
│   ├── test-llama-archs.cpp
│   ├── test-llama-grammar.cpp
│   ├── test-log.cpp
│   ├── test-lora-conversion-inference.sh
│   ├── test-model-load-cancel.cpp
│   ├── test-mtmd-c-api.c
│   ├── test-opt.cpp
│   ├── test-peg-parser.cpp
│   ├── test-quantize-fns.cpp
│   ├── test-quantize-perf.cpp
│   ├── test-quantize-stats.cpp
│   ├── test-reasoning-budget.cpp
│   ├── test-regex-partial.cpp
│   ├── test-rope.cpp
│   ├── test-sampling.cpp
│   ├── test-state-restore-fragmented.cpp
│   ├── test-thread-safety.cpp
│   ├── test-tokenizer-0.cpp
│   ├── test-tokenizer-0.py
│   ├── test-tokenizer-0.sh
│   ├── test-tokenizer-1-bpe.cpp
│   ├── test-tokenizer-1-spm.cpp
│   ├── test-tokenizer-random.py
│   ├── test-tokenizers-repo.sh
│   └── testing.h
├── tools/
│   ├── CMakeLists.txt
│   ├── batched-bench/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── batched-bench.cpp
│   ├── cli/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── cli.cpp
│   ├── completion/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── completion.cpp
│   ├── cvector-generator/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── completions.txt
│   │   ├── cvector-generator.cpp
│   │   ├── mean.hpp
│   │   ├── negative.txt
│   │   ├── pca.hpp
│   │   └── positive.txt
│   ├── export-lora/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── export-lora.cpp
│   ├── fit-params/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── fit-params.cpp
│   ├── gguf-split/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── gguf-split.cpp
│   │   └── tests.sh
│   ├── imatrix/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── imatrix.cpp
│   ├── llama-bench/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── llama-bench.cpp
│   ├── mtmd/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── clip-graph.h
│   │   ├── clip-impl.h
│   │   ├── clip-model.h
│   │   ├── clip.cpp
│   │   ├── clip.h
│   │   ├── debug/
│   │   │   ├── mtmd-debug.cpp
│   │   │   ├── mtmd-debug.h
│   │   │   └── mtmd-debug.md
│   │   ├── deprecation-warning.cpp
│   │   ├── legacy-models/
│   │   │   ├── convert_image_encoder_to_gguf.py
│   │   │   ├── glmedge-convert-image-encoder-to-gguf.py
│   │   │   ├── glmedge-surgery.py
│   │   │   ├── llava_surgery.py
│   │   │   ├── llava_surgery_v2.py
│   │   │   ├── minicpmv-convert-image-encoder-to-gguf.py
│   │   │   └── minicpmv-surgery.py
│   │   ├── models/
│   │   │   ├── cogvlm.cpp
│   │   │   ├── conformer.cpp
│   │   │   ├── deepseekocr.cpp
│   │   │   ├── glm4v.cpp
│   │   │   ├── internvl.cpp
│   │   │   ├── kimik25.cpp
│   │   │   ├── kimivl.cpp
│   │   │   ├── llama4.cpp
│   │   │   ├── llava.cpp
│   │   │   ├── minicpmv.cpp
│   │   │   ├── mobilenetv5.cpp
│   │   │   ├── models.h
│   │   │   ├── nemotron-v2-vl.cpp
│   │   │   ├── paddleocr.cpp
│   │   │   ├── pixtral.cpp
│   │   │   ├── qwen2vl.cpp
│   │   │   ├── qwen3vl.cpp
│   │   │   ├── siglip.cpp
│   │   │   ├── whisper-enc.cpp
│   │   │   └── youtuvl.cpp
│   │   ├── mtmd-audio.cpp
│   │   ├── mtmd-audio.h
│   │   ├── mtmd-cli.cpp
│   │   ├── mtmd-helper.cpp
│   │   ├── mtmd-helper.h
│   │   ├── mtmd-image.cpp
│   │   ├── mtmd-image.h
│   │   ├── mtmd.cpp
│   │   ├── mtmd.h
│   │   ├── requirements.txt
│   │   ├── tests/
│   │   │   ├── test-1-extracted.md
│   │   │   ├── test-1-extracted.txt
│   │   │   ├── test-deepseek-ocr.py
│   │   │   └── tests-requirements.txt
│   │   └── tests.sh
│   ├── parser/
│   │   ├── CMakeLists.txt
│   │   ├── debug-template-parser.cpp
│   │   └── template-analysis.cpp
│   ├── perplexity/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── perplexity.cpp
│   ├── quantize/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── quantize.cpp
│   │   └── tests.sh
│   ├── results/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── results.cpp
│   ├── rpc/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── rpc-server.cpp
│   ├── server/
│   │   ├── CMakeLists.txt
│   │   ├── README-dev.md
│   │   ├── README.md
│   │   ├── bench/
│   │   │   ├── README.md
│   │   │   ├── bench.py
│   │   │   ├── prometheus.yml
│   │   │   ├── requirements.txt
│   │   │   └── script.js
│   │   ├── chat-llama2.sh
│   │   ├── chat.mjs
│   │   ├── chat.sh
│   │   ├── public/
│   │   │   ├── bundle.css
│   │   │   ├── bundle.js
│   │   │   ├── index.html
│   │   │   └── loading.html
│   │   ├── public_legacy/
│   │   │   ├── colorthemes.css
│   │   │   ├── completion.js
│   │   │   ├── index-new.html
│   │   │   ├── index.html
│   │   │   ├── index.js
│   │   │   ├── json-schema-to-grammar.mjs
│   │   │   ├── loading.html
│   │   │   ├── prompt-formats.js
│   │   │   ├── style.css
│   │   │   ├── system-prompts.js
│   │   │   ├── theme-beeninorder.css
│   │   │   ├── theme-ketivah.css
│   │   │   ├── theme-mangotango.css
│   │   │   ├── theme-playground.css
│   │   │   ├── theme-polarnight.css
│   │   │   └── theme-snowstorm.css
│   │   ├── public_simplechat/
│   │   │   ├── datautils.mjs
│   │   │   ├── index.html
│   │   │   ├── readme.md
│   │   │   ├── simplechat.css
│   │   │   ├── simplechat.js
│   │   │   └── ui.mjs
│   │   ├── server-common.cpp
│   │   ├── server-common.h
│   │   ├── server-context.cpp
│   │   ├── server-context.h
│   │   ├── server-cors-proxy.h
│   │   ├── server-http.cpp
│   │   ├── server-http.h
│   │   ├── server-models.cpp
│   │   ├── server-models.h
│   │   ├── server-queue.cpp
│   │   ├── server-queue.h
│   │   ├── server-task.cpp
│   │   ├── server-task.h
│   │   ├── server-tools.cpp
│   │   ├── server-tools.h
│   │   ├── server.cpp
│   │   ├── tests/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── conftest.py
│   │   │   ├── pytest.ini
│   │   │   ├── requirements.txt
│   │   │   ├── tests.sh
│   │   │   ├── unit/
│   │   │   │   ├── test_basic.py
│   │   │   │   ├── test_chat_completion.py
│   │   │   │   ├── test_compat_anthropic.py
│   │   │   │   ├── test_compat_oai_responses.py
│   │   │   │   ├── test_completion.py
│   │   │   │   ├── test_ctx_shift.py
│   │   │   │   ├── test_embedding.py
│   │   │   │   ├── test_infill.py
│   │   │   │   ├── test_lora.py
│   │   │   │   ├── test_proxy.py
│   │   │   │   ├── test_rerank.py
│   │   │   │   ├── test_router.py
│   │   │   │   ├── test_security.py
│   │   │   │   ├── test_sleep.py
│   │   │   │   ├── test_slot_save.py
│   │   │   │   ├── test_speculative.py
│   │   │   │   ├── test_template.py
│   │   │   │   ├── test_tokenize.py
│   │   │   │   ├── test_tool_call.py
│   │   │   │   └── test_vision_api.py
│   │   │   └── utils.py
│   │   ├── themes/
│   │   │   ├── README.md
│   │   │   ├── buttons-top/
│   │   │   │   ├── README.md
│   │   │   │   └── index.html
│   │   │   └── wild/
│   │   │       ├── README.md
│   │   │       └── index.html
│   │   └── webui/
│   │       ├── .gitignore
│   │       ├── .npmrc
│   │       ├── .prettierignore
│   │       ├── .prettierrc
│   │       ├── .storybook/
│   │       │   ├── ModeWatcherDecorator.svelte
│   │       │   ├── TooltipProviderDecorator.svelte
│   │       │   ├── main.ts
│   │       │   ├── preview.ts
│   │       │   └── vitest.setup.ts
│   │       ├── README.md
│   │       ├── components.json
│   │       ├── docs/
│   │       │   ├── architecture/
│   │       │   │   ├── high-level-architecture-simplified.md
│   │       │   │   └── high-level-architecture.md
│   │       │   └── flows/
│   │       │       ├── chat-flow.md
│   │       │       ├── conversations-flow.md
│   │       │       ├── data-flow-simplified-model-mode.md
│   │       │       ├── data-flow-simplified-router-mode.md
│   │       │       ├── database-flow.md
│   │       │       ├── mcp-flow.md
│   │       │       ├── models-flow.md
│   │       │       ├── server-flow.md
│   │       │       └── settings-flow.md
│   │       ├── eslint.config.js
│   │       ├── package.json
│   │       ├── playwright.config.ts
│   │       ├── scripts/
│   │       │   ├── dev.sh
│   │       │   ├── install-git-hooks.sh
│   │       │   └── post-build.sh
│   │       ├── src/
│   │       │   ├── app.css
│   │       │   ├── app.d.ts
│   │       │   ├── app.html
│   │       │   ├── lib/
│   │       │   │   ├── actions/
│   │       │   │   │   └── fade-in-view.svelte.ts
│   │       │   │   ├── components/
│   │       │   │   │   ├── app/
│   │       │   │   │   │   ├── actions/
│   │       │   │   │   │   │   ├── ActionIcon.svelte
│   │       │   │   │   │   │   ├── ActionIconCopyToClipboard.svelte
│   │       │   │   │   │   │   ├── ActionIconRemove.svelte
│   │       │   │   │   │   │   ├── ActionIconsCodeBlock.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── badges/
│   │       │   │   │   │   │   ├── BadgeChatStatistic.svelte
│   │       │   │   │   │   │   ├── BadgeInfo.svelte
│   │       │   │   │   │   │   ├── BadgeModality.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── chat/
│   │       │   │   │   │   │   ├── ChatAttachments/
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpPrompt.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpResource.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpResources.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentPreview.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentThumbnailFile.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentThumbnailImage.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentsList.svelte
│   │       │   │   │   │   │   │   └── ChatAttachmentsViewAll.svelte
│   │       │   │   │   │   │   ├── ChatForm/
│   │       │   │   │   │   │   │   ├── ChatForm.svelte
│   │       │   │   │   │   │   │   ├── ChatFormActions/
│   │       │   │   │   │   │   │   │   ├── ChatFormActionAttachmentsDropdown.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionAttachmentsSheet.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionRecord.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionSubmit.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormActions.svelte
│   │       │   │   │   │   │   │   ├── ChatFormFileInputInvisible.svelte
│   │       │   │   │   │   │   │   ├── ChatFormHelperText.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPicker/
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerItemHeader.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerList.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerListItem.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormPickerListItemSkeleton.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPickerPopover.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPromptPicker/
│   │       │   │   │   │   │   │   │   ├── ChatFormPromptPicker.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPromptPickerArgumentForm.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormPromptPickerArgumentInput.svelte
│   │       │   │   │   │   │   │   ├── ChatFormResourcePicker/
│   │       │   │   │   │   │   │   │   └── ChatFormResourcePicker.svelte
│   │       │   │   │   │   │   │   └── ChatFormTextarea.svelte
│   │       │   │   │   │   │   ├── ChatMessages/
│   │       │   │   │   │   │   │   ├── ChatMessage.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageActions.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageAgenticContent.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageAssistant.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageBranchingControls.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageEditForm.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageMcpPrompt.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageMcpPromptContent.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageStatistics.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageSystem.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageUser.svelte
│   │       │   │   │   │   │   │   └── ChatMessages.svelte
│   │       │   │   │   │   │   ├── ChatScreen/
│   │       │   │   │   │   │   │   ├── ChatScreen.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenDragOverlay.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenForm.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenHeader.svelte
│   │       │   │   │   │   │   │   └── ChatScreenProcessingInfo.svelte
│   │       │   │   │   │   │   ├── ChatSettings/
│   │       │   │   │   │   │   │   ├── ChatSettings.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsFields.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsFooter.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsImportExportTab.svelte
│   │       │   │   │   │   │   │   └── ChatSettingsParameterSourceIndicator.svelte
│   │       │   │   │   │   │   ├── ChatSidebar/
│   │       │   │   │   │   │   │   ├── ChatSidebar.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarActions.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarConversationItem.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarSearch.svelte
│   │       │   │   │   │   │   │   └── handle-mobile-sidebar-item-click.ts
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── content/
│   │       │   │   │   │   │   ├── CollapsibleContentBlock.svelte
│   │       │   │   │   │   │   ├── MarkdownContent.svelte
│   │       │   │   │   │   │   ├── SyntaxHighlightedCode.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── dialogs/
│   │       │   │   │   │   │   ├── DialogChatAttachmentPreview.svelte
│   │       │   │   │   │   │   ├── DialogChatAttachmentsViewAll.svelte
│   │       │   │   │   │   │   ├── DialogChatError.svelte
│   │       │   │   │   │   │   ├── DialogChatSettings.svelte
│   │       │   │   │   │   │   ├── DialogCodePreview.svelte
│   │       │   │   │   │   │   ├── DialogConfirmation.svelte
│   │       │   │   │   │   │   ├── DialogConversationSelection.svelte
│   │       │   │   │   │   │   ├── DialogConversationTitleUpdate.svelte
│   │       │   │   │   │   │   ├── DialogEmptyFileAlert.svelte
│   │       │   │   │   │   │   ├── DialogMcpResourcePreview.svelte
│   │       │   │   │   │   │   ├── DialogMcpResources.svelte
│   │       │   │   │   │   │   ├── DialogMcpServersSettings.svelte
│   │       │   │   │   │   │   ├── DialogModelInformation.svelte
│   │       │   │   │   │   │   ├── DialogModelNotAvailable.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── forms/
│   │       │   │   │   │   │   ├── InputWithSuggestions.svelte
│   │       │   │   │   │   │   ├── KeyValuePairs.svelte
│   │       │   │   │   │   │   ├── SearchInput.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── index.ts
│   │       │   │   │   │   ├── mcp/
│   │       │   │   │   │   │   ├── McpActiveServersAvatars.svelte
│   │       │   │   │   │   │   ├── McpCapabilitiesBadges.svelte
│   │       │   │   │   │   │   ├── McpConnectionLogs.svelte
│   │       │   │   │   │   │   ├── McpLogo.svelte
│   │       │   │   │   │   │   ├── McpResourceBrowser/
│   │       │   │   │   │   │   │   ├── McpResourceBrowser.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserEmptyState.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserHeader.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserServerItem.svelte
│   │       │   │   │   │   │   │   └── mcp-resource-browser.ts
│   │       │   │   │   │   │   ├── McpResourcePreview.svelte
│   │       │   │   │   │   │   ├── McpResourceTemplateForm.svelte
│   │       │   │   │   │   │   ├── McpServerCard/
│   │       │   │   │   │   │   │   ├── McpServerCard.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardActions.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardDeleteDialog.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardEditForm.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardHeader.svelte
│   │       │   │   │   │   │   │   └── McpServerCardToolsList.svelte
│   │       │   │   │   │   │   ├── McpServerCardSkeleton.svelte
│   │       │   │   │   │   │   ├── McpServerForm.svelte
│   │       │   │   │   │   │   ├── McpServerInfo.svelte
│   │       │   │   │   │   │   ├── McpServersSelector.svelte
│   │       │   │   │   │   │   ├── McpServersSettings.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── misc/
│   │       │   │   │   │   │   ├── ConversationSelection.svelte
│   │       │   │   │   │   │   ├── HorizontalScrollCarousel.svelte
│   │       │   │   │   │   │   ├── KeyboardShortcutInfo.svelte
│   │       │   │   │   │   │   ├── TruncatedText.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── models/
│   │       │   │   │   │   │   ├── ModelBadge.svelte
│   │       │   │   │   │   │   ├── ModelId.svelte
│   │       │   │   │   │   │   ├── ModelsSelector.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorList.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorOption.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorSheet.svelte
│   │       │   │   │   │   │   ├── index.ts
│   │       │   │   │   │   │   └── utils.ts
│   │       │   │   │   │   ├── navigation/
│   │       │   │   │   │   │   ├── DropdownMenuActions.svelte
│   │       │   │   │   │   │   ├── DropdownMenuSearchable.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   └── server/
│   │       │   │   │   │       ├── ServerErrorSplash.svelte
│   │       │   │   │   │       ├── ServerLoadingSplash.svelte
│   │       │   │   │   │       ├── ServerStatus.svelte
│   │       │   │   │   │       └── index.ts
│   │       │   │   │   └── ui/
│   │       │   │   │       ├── alert/
│   │       │   │   │       │   ├── alert-description.svelte
│   │       │   │   │       │   ├── alert-title.svelte
│   │       │   │   │       │   ├── alert.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── alert-dialog/
│   │       │   │   │       │   ├── alert-dialog-action.svelte
│   │       │   │   │       │   ├── alert-dialog-cancel.svelte
│   │       │   │   │       │   ├── alert-dialog-content.svelte
│   │       │   │   │       │   ├── alert-dialog-description.svelte
│   │       │   │   │       │   ├── alert-dialog-footer.svelte
│   │       │   │   │       │   ├── alert-dialog-header.svelte
│   │       │   │   │       │   ├── alert-dialog-overlay.svelte
│   │       │   │   │       │   ├── alert-dialog-title.svelte
│   │       │   │   │       │   ├── alert-dialog-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── badge/
│   │       │   │   │       │   ├── badge.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── button/
│   │       │   │   │       │   ├── button.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── card/
│   │       │   │   │       │   ├── card-action.svelte
│   │       │   │   │       │   ├── card-content.svelte
│   │       │   │   │       │   ├── card-description.svelte
│   │       │   │   │       │   ├── card-footer.svelte
│   │       │   │   │       │   ├── card-header.svelte
│   │       │   │   │       │   ├── card-title.svelte
│   │       │   │   │       │   ├── card.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── checkbox/
│   │       │   │   │       │   ├── checkbox.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── collapsible/
│   │       │   │   │       │   ├── collapsible-content.svelte
│   │       │   │   │       │   ├── collapsible-trigger.svelte
│   │       │   │   │       │   ├── collapsible.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── dialog/
│   │       │   │   │       │   ├── dialog-close.svelte
│   │       │   │   │       │   ├── dialog-content.svelte
│   │       │   │   │       │   ├── dialog-description.svelte
│   │       │   │   │       │   ├── dialog-footer.svelte
│   │       │   │   │       │   ├── dialog-header.svelte
│   │       │   │   │       │   ├── dialog-overlay.svelte
│   │       │   │   │       │   ├── dialog-title.svelte
│   │       │   │   │       │   ├── dialog-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── dropdown-menu/
│   │       │   │   │       │   ├── dropdown-menu-checkbox-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-content.svelte
│   │       │   │   │       │   ├── dropdown-menu-group-heading.svelte
│   │       │   │   │       │   ├── dropdown-menu-group.svelte
│   │       │   │   │       │   ├── dropdown-menu-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-label.svelte
│   │       │   │   │       │   ├── dropdown-menu-radio-group.svelte
│   │       │   │   │       │   ├── dropdown-menu-radio-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-separator.svelte
│   │       │   │   │       │   ├── dropdown-menu-shortcut.svelte
│   │       │   │   │       │   ├── dropdown-menu-sub-content.svelte
│   │       │   │   │       │   ├── dropdown-menu-sub-trigger.svelte
│   │       │   │   │       │   ├── dropdown-menu-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── input/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── input.svelte
│   │       │   │   │       ├── label/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── label.svelte
│   │       │   │   │       ├── popover/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── popover-close.svelte
│   │       │   │   │       │   ├── popover-content.svelte
│   │       │   │   │       │   ├── popover-portal.svelte
│   │       │   │   │       │   ├── popover-trigger.svelte
│   │       │   │   │       │   └── popover.svelte
│   │       │   │   │       ├── scroll-area/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── scroll-area-scrollbar.svelte
│   │       │   │   │       │   └── scroll-area.svelte
│   │       │   │   │       ├── select/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── select-content.svelte
│   │       │   │   │       │   ├── select-group-heading.svelte
│   │       │   │   │       │   ├── select-group.svelte
│   │       │   │   │       │   ├── select-item.svelte
│   │       │   │   │       │   ├── select-label.svelte
│   │       │   │   │       │   ├── select-scroll-down-button.svelte
│   │       │   │   │       │   ├── select-scroll-up-button.svelte
│   │       │   │   │       │   ├── select-separator.svelte
│   │       │   │   │       │   └── select-trigger.svelte
│   │       │   │   │       ├── separator/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── separator.svelte
│   │       │   │   │       ├── sheet/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── sheet-close.svelte
│   │       │   │   │       │   ├── sheet-content.svelte
│   │       │   │   │       │   ├── sheet-description.svelte
│   │       │   │   │       │   ├── sheet-footer.svelte
│   │       │   │   │       │   ├── sheet-header.svelte
│   │       │   │   │       │   ├── sheet-overlay.svelte
│   │       │   │   │       │   ├── sheet-title.svelte
│   │       │   │   │       │   └── sheet-trigger.svelte
│   │       │   │   │       ├── sidebar/
│   │       │   │   │       │   ├── constants.ts
│   │       │   │   │       │   ├── context.svelte.ts
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── sidebar-content.svelte
│   │       │   │   │       │   ├── sidebar-footer.svelte
│   │       │   │   │       │   ├── sidebar-group-action.svelte
│   │       │   │   │       │   ├── sidebar-group-content.svelte
│   │       │   │   │       │   ├── sidebar-group-label.svelte
│   │       │   │   │       │   ├── sidebar-group.svelte
│   │       │   │   │       │   ├── sidebar-header.svelte
│   │       │   │   │       │   ├── sidebar-input.svelte
│   │       │   │   │       │   ├── sidebar-inset.svelte
│   │       │   │   │       │   ├── sidebar-menu-action.svelte
│   │       │   │   │       │   ├── sidebar-menu-badge.svelte
│   │       │   │   │       │   ├── sidebar-menu-button.svelte
│   │       │   │   │       │   ├── sidebar-menu-item.svelte
│   │       │   │   │       │   ├── sidebar-menu-skeleton.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub-button.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub-item.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub.svelte
│   │       │   │   │       │   ├── sidebar-menu.svelte
│   │       │   │   │       │   ├── sidebar-provider.svelte
│   │       │   │   │       │   ├── sidebar-rail.svelte
│   │       │   │   │       │   ├── sidebar-separator.svelte
│   │       │   │   │       │   ├── sidebar-trigger.svelte
│   │       │   │   │       │   └── sidebar.svelte
│   │       │   │   │       ├── skeleton/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── skeleton.svelte
│   │       │   │   │       ├── switch/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── switch.svelte
│   │       │   │   │       ├── table/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── table-body.svelte
│   │       │   │   │       │   ├── table-caption.svelte
│   │       │   │   │       │   ├── table-cell.svelte
│   │       │   │   │       │   ├── table-footer.svelte
│   │       │   │   │       │   ├── table-head.svelte
│   │       │   │   │       │   ├── table-header.svelte
│   │       │   │   │       │   ├── table-row.svelte
│   │       │   │   │       │   └── table.svelte
│   │       │   │   │       ├── textarea/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── textarea.svelte
│   │       │   │   │       ├── tooltip/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── tooltip-content.svelte
│   │       │   │   │       │   └── tooltip-trigger.svelte
│   │       │   │   │       └── utils.ts
│   │       │   │   ├── constants/
│   │       │   │   │   ├── agentic.ts
│   │       │   │   │   ├── api-endpoints.ts
│   │       │   │   │   ├── attachment-labels.ts
│   │       │   │   │   ├── auto-scroll.ts
│   │       │   │   │   ├── binary-detection.ts
│   │       │   │   │   ├── cache.ts
│   │       │   │   │   ├── chat-form.ts
│   │       │   │   │   ├── code-blocks.ts
│   │       │   │   │   ├── code.ts
│   │       │   │   │   ├── context-keys.ts
│   │       │   │   │   ├── css-classes.ts
│   │       │   │   │   ├── favicon.ts
│   │       │   │   │   ├── floating-ui-constraints.ts
│   │       │   │   │   ├── formatters.ts
│   │       │   │   │   ├── icons.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── key-value-pairs.ts
│   │       │   │   │   ├── latex-protection.ts
│   │       │   │   │   ├── literal-html.ts
│   │       │   │   │   ├── localstorage-keys.ts
│   │       │   │   │   ├── markdown.ts
│   │       │   │   │   ├── max-bundle-size.ts
│   │       │   │   │   ├── mcp-form.ts
│   │       │   │   │   ├── mcp-resource.ts
│   │       │   │   │   ├── mcp.ts
│   │       │   │   │   ├── message-export.ts
│   │       │   │   │   ├── model-id.ts
│   │       │   │   │   ├── precision.ts
│   │       │   │   │   ├── processing-info.ts
│   │       │   │   │   ├── settings-config.ts
│   │       │   │   │   ├── settings-fields.ts
│   │       │   │   │   ├── settings-keys.ts
│   │       │   │   │   ├── settings-sections.ts
│   │       │   │   │   ├── supported-file-types.ts
│   │       │   │   │   ├── table-html-restorer.ts
│   │       │   │   │   ├── tooltip-config.ts
│   │       │   │   │   ├── ui.ts
│   │       │   │   │   ├── uri-template.ts
│   │       │   │   │   └── viewport.ts
│   │       │   │   ├── contexts/
│   │       │   │   │   ├── chat-actions.context.ts
│   │       │   │   │   ├── chat-settings-dialog.context.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   └── message-edit.context.ts
│   │       │   │   ├── enums/
│   │       │   │   │   ├── agentic.ts
│   │       │   │   │   ├── attachment.ts
│   │       │   │   │   ├── chat.ts
│   │       │   │   │   ├── files.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── keyboard.ts
│   │       │   │   │   ├── mcp.ts
│   │       │   │   │   ├── model.ts
│   │       │   │   │   ├── server.ts
│   │       │   │   │   ├── settings.ts
│   │       │   │   │   └── ui.ts
│   │       │   │   ├── hooks/
│   │       │   │   │   ├── is-mobile.svelte.ts
│   │       │   │   │   ├── use-auto-scroll.svelte.ts
│   │       │   │   │   └── use-processing-state.svelte.ts
│   │       │   │   ├── markdown/
│   │       │   │   │   ├── enhance-code-blocks.ts
│   │       │   │   │   ├── enhance-links.ts
│   │       │   │   │   ├── literal-html.ts
│   │       │   │   │   ├── resolve-attachment-images.ts
│   │       │   │   │   └── table-html-restorer.ts
│   │       │   │   ├── services/
│   │       │   │   │   ├── chat.service.ts
│   │       │   │   │   ├── database.service.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── mcp.service.ts
│   │       │   │   │   ├── models.service.ts
│   │       │   │   │   ├── parameter-sync.service.spec.ts
│   │       │   │   │   ├── parameter-sync.service.ts
│   │       │   │   │   └── props.service.ts
│   │       │   │   ├── stores/
│   │       │   │   │   ├── agentic.svelte.ts
│   │       │   │   │   ├── chat.svelte.ts
│   │       │   │   │   ├── conversations.svelte.ts
│   │       │   │   │   ├── mcp-resources.svelte.ts
│   │       │   │   │   ├── mcp.svelte.ts
│   │       │   │   │   ├── models.svelte.ts
│   │       │   │   │   ├── persisted.svelte.ts
│   │       │   │   │   ├── server.svelte.ts
│   │       │   │   │   └── settings.svelte.ts
│   │       │   │   ├── types/
│   │       │   │   │   ├── agentic.d.ts
│   │       │   │   │   ├── api.d.ts
│   │       │   │   │   ├── chat.d.ts
│   │       │   │   │   ├── common.d.ts
│   │       │   │   │   ├── database.d.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── mcp.d.ts
│   │       │   │   │   ├── models.d.ts
│   │       │   │   │   └── settings.d.ts
│   │       │   │   └── utils/
│   │       │   │       ├── abort.ts
│   │       │   │       ├── agentic.ts
│   │       │   │       ├── api-fetch.ts
│   │       │   │       ├── api-headers.ts
│   │       │   │       ├── api-key-validation.ts
│   │       │   │       ├── attachment-display.ts
│   │       │   │       ├── attachment-type.ts
│   │       │   │       ├── audio-recording.ts
│   │       │   │       ├── autoresize-textarea.ts
│   │       │   │       ├── branching.ts
│   │       │   │       ├── browser-only.ts
│   │       │   │       ├── cache-ttl.ts
│   │       │   │       ├── clipboard.ts
│   │       │   │       ├── code.ts
│   │       │   │       ├── config-helpers.ts
│   │       │   │       ├── conversation-utils.ts
│   │       │   │       ├── convert-files-to-extra.ts
│   │       │   │       ├── cors-proxy.ts
│   │       │   │       ├── data-url.ts
│   │       │   │       ├── debounce.ts
│   │       │   │       ├── favicon.ts
│   │       │   │       ├── file-preview.ts
│   │       │   │       ├── file-type.ts
│   │       │   │       ├── formatters.ts
│   │       │   │       ├── headers.ts
│   │       │   │       ├── image-error-fallback.ts
│   │       │   │       ├── index.ts
│   │       │   │       ├── is-ime-composing.ts
│   │       │   │       ├── latex-protection.ts
│   │       │   │       ├── legacy-migration.ts
│   │       │   │       ├── mcp.ts
│   │       │   │       ├── modality-file-validation.ts
│   │       │   │       ├── model-names.ts
│   │       │   │       ├── pdf-processing.ts
│   │       │   │       ├── portal-to-body.ts
│   │       │   │       ├── precision.ts
│   │       │   │       ├── process-uploaded-files.ts
│   │       │   │       ├── sanitize.ts
│   │       │   │       ├── svg-to-png.ts
│   │       │   │       ├── syntax-highlight-language.ts
│   │       │   │       ├── text-files.ts
│   │       │   │       ├── text.ts
│   │       │   │       ├── uri-template.ts
│   │       │   │       ├── uuid.ts
│   │       │   │       └── webp-to-png.ts
│   │       │   ├── routes/
│   │       │   │   ├── +error.svelte
│   │       │   │   ├── +layout.svelte
│   │       │   │   ├── +page.svelte
│   │       │   │   ├── +page.ts
│   │       │   │   └── chat/
│   │       │   │       └── [id]/
│   │       │   │           ├── +page.svelte
│   │       │   │           └── +page.ts
│   │       │   └── styles/
│   │       │       └── katex-custom.scss
│   │       ├── static/
│   │       │   └── loading.html
│   │       ├── svelte.config.js
│   │       ├── tests/
│   │       │   ├── client/
│   │       │   │   ├── components/
│   │       │   │   │   └── TestWrapper.svelte
│   │       │   │   └── page.svelte.test.ts
│   │       │   ├── e2e/
│   │       │   │   └── demo.test.ts
│   │       │   ├── stories/
│   │       │   │   ├── ChatMessage.stories.svelte
│   │       │   │   ├── ChatScreenForm.stories.svelte
│   │       │   │   ├── ChatSettings.stories.svelte
│   │       │   │   ├── ChatSidebar.stories.svelte
│   │       │   │   ├── Introduction.mdx
│   │       │   │   ├── MarkdownContent.stories.svelte
│   │       │   │   └── fixtures/
│   │       │   │       ├── ai-tutorial.ts
│   │       │   │       ├── api-docs.ts
│   │       │   │       ├── blog-post.ts
│   │       │   │       ├── data-analysis.ts
│   │       │   │       ├── empty.ts
│   │       │   │       ├── math-formulas.ts
│   │       │   │       ├── readme.ts
│   │       │   │       └── storybook-mocks.ts
│   │       │   └── unit/
│   │       │       ├── agentic-sections.test.ts
│   │       │       ├── agentic-strip.test.ts
│   │       │       ├── clipboard.test.ts
│   │       │       ├── latex-protection.test.ts
│   │       │       ├── model-id-parser.test.ts
│   │       │       ├── model-names.test.ts
│   │       │       ├── reasoning-context.test.ts
│   │       │       └── uri-template.test.ts
│   │       ├── tsconfig.json
│   │       ├── vite.config.ts
│   │       └── vitest-setup-client.ts
│   ├── tokenize/
│   │   ├── CMakeLists.txt
│   │   └── tokenize.cpp
│   └── tts/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── convert_pt_to_hf.py
│       ├── tts-outetts.py
│       └── tts.cpp
├── ty.toml
└── vendor/
    ├── cpp-httplib/
    │   ├── CMakeLists.txt
    │   ├── LICENSE
    │   ├── httplib.cpp
    │   └── httplib.h
    ├── miniaudio/
    │   └── miniaudio.h
    ├── nlohmann/
    │   ├── json.hpp
    │   └── json_fwd.hpp
    ├── sheredom/
    │   └── subprocess.h
    └── stb/
        └── stb_image.h

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
Language:        Cpp
AlignAfterOpenBracket: Align
AlignArrayOfStructures: Left
AlignConsecutiveAssignments: AcrossComments
AlignConsecutiveBitFields: AcrossComments
AlignConsecutiveDeclarations: AcrossComments
AlignConsecutiveMacros: AcrossComments
# AlignConsecutiveShortCaseStatements: AcrossComments
AlignEscapedNewlines: Left # LeftWithLastLine
AlignOperands:   Align
AlignTrailingComments:
  Kind: Always
  OverEmptyLines: 1
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
AttributeMacros:
  - __host__
  - __device__
  - __global__
  - __forceinline__
  - __launch_bounds__
BinPackArguments: true
BinPackParameters: false # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
  AfterCaseLabel:  true
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  AfterExternBlock: false
  BeforeCatch:     false
  BeforeElse:      false
  BeforeLambdaBody: false
  BeforeWhile: false
  IndentBraces:    false
  SplitEmptyFunction: false
  SplitEmptyRecord: false
  SplitEmptyNamespace: false
# BreakAdjacentStringLiterals: true
BreakAfterAttributes: Never
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: false
# BreakBinaryOperations: Never
BreakConstructorInitializers: AfterColon
# BreakFunctionDefinitionParameters: false
BreakInheritanceList: AfterComma
BreakStringLiterals: true
# BreakTemplateDeclarations: Yes
ColumnLimit:     120
CommentPragmas:  '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat:   false
EmptyLineBeforeAccessModifier: Leave
EmptyLineAfterAccessModifier: Never
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks:   Regroup
IncludeCategories:
  - Regex:           '".*"'
    Priority:        1
    SortPriority:    0
  - Regex:           '^<.*\.h>'
    Priority:        2
    SortPriority:    0
  - Regex:           '^<.*'
    Priority:        3
    SortPriority:    0
  - Regex:           '.*'
    Priority:        4
    SortPriority:    0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: true
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentWidth:     4
IndentWrappedFunctionNames: false
InsertBraces:    true # NOTE: may lead to incorrect formatting
InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
LineEnding: LF
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PPIndentWidth: -1
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Middle
QualifierAlignment: Left
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
RawStringFormats:
  - Language:        Cpp
    Delimiters:
      - cc
      - CC
      - cpp
      - Cpp
      - CPP
      - 'c++'
      - 'C++'
    CanonicalDelimiter: ''
ReferenceAlignment: Middle
ReflowComments:  false # IndentOnly
SeparateDefinitionBlocks: Always
SortIncludes:    CaseInsensitive
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles:  Never
SpacesInContainerLiterals: true
SpacesInLineCommentPrefix:
  Minimum: 1
  Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard:        c++17
TabWidth:        4
UseTab:          Never
WhitespaceSensitiveMacros: ['STRINGIZE']
...



================================================
FILE: .clang-tidy
================================================
---
Checks: >
    bugprone-*,
    -bugprone-easily-swappable-parameters,
    -bugprone-implicit-widening-of-multiplication-result,
    -bugprone-misplaced-widening-cast,
    -bugprone-narrowing-conversions,
    readability-*,
    -readability-avoid-unconditional-preprocessor-if,
    -readability-function-cognitive-complexity,
    -readability-identifier-length,
    -readability-implicit-bool-conversion,
    -readability-magic-numbers,
    -readability-uppercase-literal-suffix,
    -readability-simplify-boolean-expr,
    -readability-math-missing-parentheses,
    clang-analyzer-*,
    -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
    performance-*,
    -performance-enum-size,
    portability-*,
    -portability-simd-intrinsics,
    misc-*,
    -misc-const-correctness,
    -misc-non-private-member-variables-in-classes,
    -misc-no-recursion,
    -misc-use-anonymous-namespace,
FormatStyle: none


================================================
FILE: .devops/cann.Dockerfile
================================================
# ==============================================================================
# ARGUMENTS
# ==============================================================================

# Define the CANN base image for easier version updates later
ARG CHIP_TYPE=910b
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11

# ==============================================================================
# BUILD STAGE
# Compile all binary files and libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS build

# -- Install build dependencies --
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
    yum clean all && \
    rm -rf /var/cache/yum

# -- Set the working directory --
WORKDIR /app

# -- Copy project files --
COPY . .

# -- Set CANN environment variables (required for compilation) --
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
# ... You can add other environment variables from the original file as needed ...
# For brevity, only core variables are listed here. You can paste the original ENV list here.

# -- Build llama.cpp --
# Use the passed CHIP_TYPE argument and add general build options
ARG CHIP_TYPE
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
    && \
    cmake -B build \
        -DGGML_CANN=ON \
        -DCMAKE_BUILD_TYPE=Release \
        -DSOC_TYPE=ascend${CHIP_TYPE} \
        -DUSE_ACL_GRAPH=ON \
        . && \
    cmake --build build --config Release -j$(nproc)

# -- Organize build artifacts for copying in later stages --
# Create a lib directory to store all .so files
RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

# Create a full directory to store all executables and Python scripts
RUN mkdir -p /app/full && \
    cp build/bin/* /app/full/ && \
    cp *.py /app/full/ && \
    cp -r gguf-py /app/full/ && \
    cp -r requirements /app/full/ && \
    cp requirements.txt /app/full/
    # If you have a tools.sh script, make sure it is copied here
    # cp .devops/tools.sh /app/full/tools.sh

# ==============================================================================
# BASE STAGE
# Create a minimal base image with CANN runtime and common libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS base

# -- Install runtime dependencies --
RUN yum install -y libgomp curl && \
    yum clean all && \
    rm -rf /var/cache/yum

# -- Set CANN environment variables (required for runtime) --
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
# ... You can add other environment variables from the original file as needed ...

WORKDIR /app

# Copy compiled .so files from the build stage
COPY --from=build /app/lib/ /app

# ==============================================================================
# FINAL STAGES (TARGETS)
# ==============================================================================

### Target: full
# Complete image with all tools, Python bindings, and dependencies
# ==============================================================================
FROM base AS full

COPY --from=build /app/full /app

# Install Python dependencies
RUN yum install -y git python3 python3-pip && \
    pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
    pip3 install --no-cache-dir -r requirements.txt && \
    yum clean all && \
    rm -rf /var/cache/yum

# You need to provide a tools.sh script as the entrypoint
ENTRYPOINT ["/app/tools.sh"]
# If there is no tools.sh, you can set the default to start the server
# ENTRYPOINT ["/app/llama-server"]

### Target: light
# Lightweight image containing only llama-cli and llama-completion
# ==============================================================================
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

ENTRYPOINT [ "/app/llama-cli" ]

### Target: server
# Dedicated server image containing only llama-server
# ==============================================================================
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/cpu.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04

FROM ubuntu:$UBUNTU_VERSION AS build

ARG TARGETARCH

RUN apt-get update && \
    apt-get install -y gcc-14 g++-14 build-essential git cmake libssl-dev

ENV CC=gcc-14 CXX=g++-14

WORKDIR /app

COPY . .

RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
        cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
    else \
        echo "Unsupported architecture"; \
        exit 1; \
    fi && \
    cmake --build build -j $(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ubuntu:$UBUNTU_VERSION AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
    && apt-get install -y \
    git \
    python3 \
    python3-pip \
    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/cuda-new.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=13.1.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default

RUN apt-get update && \
    apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1

ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14

WORKDIR /app

COPY . .

RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
    export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
    fi && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
    cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ${BASE_CUDA_RUN_CONTAINER} AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
    && apt-get install -y \
    git \
    python3 \
    python3-pip \
    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete


ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/cuda.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=12.8.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}

ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}

FROM ${BASE_CUDA_DEV_CONTAINER} AS build

# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default

RUN apt-get update && \
    apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1

ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14

WORKDIR /app

COPY . .

RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
    export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
    fi && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
    cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ${BASE_CUDA_RUN_CONTAINER} AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
    && apt-get install -y \
    git \
    python3 \
    python3-pip \
    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete


ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/intel.Dockerfile
================================================
ARG ONEAPI_VERSION=2025.3.2-0-devel-ubuntu24.04

## Build Image

FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build

ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
    apt-get install -y git libssl-dev

WORKDIR /app

COPY . .

RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
        echo "GGML_SYCL_F16 is set" \
        && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
    fi && \
    echo "Building with dynamic libs" && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
    cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base

ARG IGC_VERSION=v2.30.1
ARG IGC_VERSION_FULL=2_2.30.1+20950
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
ARG IGDGMM_VERSION=22.9.0
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
  && wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
  && wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libigdgmm12_${IGDGMM_VERSION}_amd64.deb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
  && wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
  && dpkg --install *.deb

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

### Full
FROM base AS full

COPY --from=build /app/lib/ /app
COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update && \
    apt-get install -y \
        git \
        python3 \
        python3-pip \
        python3-venv && \
    python3 -m venv /opt/venv && \
    . /opt/venv/bin/activate && \
    pip install --upgrade pip setuptools wheel && \
    pip install -r requirements.txt && \
    apt autoremove -y && \
    apt clean -y && \
    rm -rf /tmp/* /var/tmp/* && \
    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
    find /var/cache -type f -delete

ENV PATH="/opt/venv/bin:$PATH"

ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/lib/ /app
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/lib/ /app
COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]



================================================
FILE: .devops/llama-cli-cann.Dockerfile
================================================
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10

FROM ascendai/cann:$ASCEND_VERSION AS build

WORKDIR /app

COPY . .

RUN yum install -y gcc g++ cmake make openssl-devel
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}

# find libascend_hal.so, because the drive hasn`t been mounted.
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH

RUN echo "Building with static libs" && \
    source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF  && \
    cmake --build build --config Release --target llama-cli && \
    cmake --build build --config Release --target llama-completion

# TODO: use image with NNRT
FROM ascendai/cann:$ASCEND_VERSION AS runtime
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /

ENV LC_ALL=C.utf8

ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}

ENTRYPOINT ["/llama-cli" ]


================================================
FILE: .devops/llama-cpp-cuda.srpm.spec
================================================
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal

# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
#    We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
#    Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
#    It is up to the user to install the correct vendor-specific support.

Name:           llama.cpp-cuda
Version:        %( date "+%%Y%%m%%d" )
Release:        1%{?dist}
Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License:        MIT
Source0:        https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires:  coreutils make gcc-c++ git cuda-toolkit
Requires:       cuda-toolkit
URL:            https://github.com/ggml-org/llama.cpp

%define debug_package %{nil}
%define source_date_epoch_from_changelog 0

%description
CPU inference for Meta's Lllama2 models using default options.

%prep
%setup -n llama.cpp-master

%build
make -j GGML_CUDA=1

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF  > %{buildroot}/usr/lib/systemd/system/llamacuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target

[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

[Install]
WantedBy=default.target
EOF

mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF  > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF

%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-completion
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama

%pre

%post

%preun
%postun

%changelog


================================================
FILE: .devops/llama-cpp.srpm.spec
================================================
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal

# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
#    We need to declare standard versioning if people want to sort latest releases.
#    In the meantime, YYYYMMDD format will be used.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
#    Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
#    It is up to the user to install the correct vendor-specific support.

Name:           llama.cpp
Version:        %( date "+%%Y%%m%%d" )
Release:        1%{?dist}
Summary:        CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License:        MIT
Source0:        https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires:  coreutils make gcc-c++ git libstdc++-devel
Requires:       libstdc++
URL:            https://github.com/ggml-org/llama.cpp

%define debug_package %{nil}
%define source_date_epoch_from_changelog 0

%description
CPU inference for Meta's Lllama2 models using default options.
Models are not included in this package and must be downloaded separately.

%prep
%setup -n llama.cpp-master

%build
make -j

%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple

mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF  > %{buildroot}/usr/lib/systemd/system/llama.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target

[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never

[Install]
WantedBy=default.target
EOF

mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF  > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF

%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*

%files
%{_bindir}/llama-cli
%{_bindir}/llama-completion
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama

%pre

%post

%preun
%postun

%changelog


================================================
FILE: .devops/musa.Dockerfile
================================================
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.3.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64

ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64

FROM ${BASE_MUSA_DEV_CONTAINER} AS build

# MUSA architecture to build for (defaults to all supported archs)
ARG MUSA_DOCKER_ARCH=default

RUN apt-get update && \
    apt-get install -y \
    build-essential \
    cmake \
    python3 \
    python3-pip \
    git \
    libssl-dev \
    libgomp1

WORKDIR /app

COPY . .

RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
        export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
    fi && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
    cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ${BASE_MUSA_RUN_CONTAINER} AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
    && apt-get install -y \
    git \
    python3 \
    python3-pip \
    && pip install --upgrade pip setuptools wheel \
    && pip install -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete


ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/nix/apps.nix
================================================
{
  perSystem =
    { config, lib, ... }:
    {
      apps =
        let
          inherit (config.packages) default;
          binaries = [
            "llama-cli"
            "llama-embedding"
            "llama-server"
            "llama-quantize"
          ];
          mkApp = name: {
            type = "app";
            program = "${default}/bin/${name}";
          };
        in
        lib.genAttrs binaries mkApp;
    };
}


================================================
FILE: .devops/nix/devshells.nix
================================================
{ inputs, ... }:

{
  perSystem =
    {
      config,
      lib,
      system,
      ...
    }:
    {
      devShells =
        let
          pkgs = import inputs.nixpkgs { inherit system; };
          stdenv = pkgs.stdenv;
          scripts = config.packages.python-scripts;
        in
        lib.pipe (config.packages) [
          (lib.concatMapAttrs (
            name: package: {
              ${name} = pkgs.mkShell {
                name = "${name}";
                inputsFrom = [ package ];
                shellHook = ''
                  echo "Entering ${name} devShell"
                '';
              };
              "${name}-extra" =
                if (name == "python-scripts") then
                  null
                else
                  pkgs.mkShell {
                    name = "${name}-extra";
                    inputsFrom = [
                      package
                      scripts
                    ];
                    # Extra packages that *may* be used by some scripts
                    packages = [
                        pkgs.python3Packages.tiktoken
                    ];
                    shellHook = ''
                      echo "Entering ${name} devShell"
                      addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
                    '';
                  };
            }
          ))
          (lib.filterAttrs (name: value: value != null))
        ];
    };
}


================================================
FILE: .devops/nix/docker.nix
================================================
{
  lib,
  dockerTools,
  buildEnv,
  llama-cpp,
  interactive ? true,
  coreutils,
}:

# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result

# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/

# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.

dockerTools.buildLayeredImage {
  name = llama-cpp.pname;
  tag = "latest";

  contents =
    [ llama-cpp ]
    ++ lib.optionals interactive [
      coreutils
      dockerTools.binSh
      dockerTools.caCertificates
    ];
}


================================================
FILE: .devops/nix/jetson-support.nix
================================================
{ inputs, ... }:
{
  perSystem =
    {
      config,
      system,
      lib,
      pkgsCuda,
      ...
    }:
    {
      legacyPackages =
        let
          caps.llamaPackagesXavier = "7.2";
          caps.llamaPackagesOrin = "8.7";
          caps.llamaPackagesTX2 = "6.2";
          caps.llamaPackagesNano = "5.3";

          pkgsFor =
            cap:
            import inputs.nixpkgs {
              inherit system;
              config = {
                cudaSupport = true;
                cudaCapabilities = [ cap ];
                cudaEnableForwardCompat = false;
                inherit (pkgsCuda.config) allowUnfreePredicate;
              };
            };
        in
        builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;

      packages = lib.optionalAttrs (system == "aarch64-linux") {
        jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
        jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
        jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
      };
    };
}


================================================
FILE: .devops/nix/nixpkgs-instances.nix
================================================
{ inputs, ... }:
{
  # The _module.args definitions are passed on to modules as arguments. E.g.
  # the module `{ pkgs ... }: { /* config */ }` implicitly uses
  # `_module.args.pkgs` (defined in this case by flake-parts).
  perSystem =
    { lib, system, ... }:
    {
      _module.args = {
        # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
        # again, the below creates several nixpkgs instances which the
        # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
        #
        # This is currently "slow" and "expensive", on a certain scale.
        # This also isn't "right" in that this hinders dependency injection at
        # the level of flake inputs. This might get removed in the foreseeable
        # future.
        #
        # Note that you can use these expressions without Nix
        # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).

        pkgsCuda = import inputs.nixpkgs {
          inherit system;
          # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
          # and ucx are built with CUDA support)
          config.cudaSupport = true;
          config.allowUnfreePredicate =
            p:
            builtins.all (
              license:
              license.free
              || builtins.elem license.shortName [
                "CUDA EULA"
                "cuDNN EULA"
              ]
            ) (p.meta.licenses or (lib.toList p.meta.license));
        };
        # Ensure dependencies use ROCm consistently
        pkgsRocm = import inputs.nixpkgs {
          inherit system;
          config.rocmSupport = true;
        };
      };
    };
}


================================================
FILE: .devops/nix/package-gguf-py.nix
================================================
{
  lib,
  llamaVersion,
  numpy,
  tqdm,
  requests,
  sentencepiece,
  pyyaml,
  poetry-core,
  buildPythonPackage,
  pytestCheckHook,
}:

buildPythonPackage {
  pname = "gguf";
  version = llamaVersion;
  pyproject = true;
  nativeBuildInputs = [ poetry-core ];
  propagatedBuildInputs = [
    numpy
    tqdm
    sentencepiece
    pyyaml
    requests
  ];
  src = lib.cleanSource ../../gguf-py;
  pythonImportsCheck = [
    "numpy"
    "gguf"
  ];
  nativeCheckInputs = [ pytestCheckHook ];
  doCheck = true;
  meta = with lib; {
    description = "Python package for writing binary files in the GGUF format";
    license = licenses.mit;
    maintainers = [ maintainers.ditsuke ];
  };
}


================================================
FILE: .devops/nix/package.nix
================================================
{
  lib,
  glibc,
  config,
  stdenv,
  runCommand,
  cmake,
  ninja,
  pkg-config,
  git,
  mpi,
  blas,
  cudaPackages,
  autoAddDriverRunpath,
  darwin,
  rocmPackages,
  vulkan-headers,
  vulkan-loader,
  curl,
  shaderc,
  useBlas ?
    builtins.all (x: !x) [
      useCuda
      useMetalKit
      useRocm
      useVulkan
    ]
    && blas.meta.available,
  useCuda ? config.cudaSupport,
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
  # Increases the runtime closure size by ~700M
  useMpi ? false,
  useRocm ? config.rocmSupport,
  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
  useVulkan ? false,
  useRpc ? false,
  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake

  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  precompileMetalShaders ? false,
  useWebUi ? true,
}:

let
  inherit (lib)
    cmakeBool
    cmakeFeature
    optionalAttrs
    optionals
    strings
    ;

  stdenv = throw "Use effectiveStdenv instead";

  suffices =
    lib.optionals useBlas [ "BLAS" ]
    ++ lib.optionals useCuda [ "CUDA" ]
    ++ lib.optionals useMetalKit [ "MetalKit" ]
    ++ lib.optionals useMpi [ "MPI" ]
    ++ lib.optionals useRocm [ "ROCm" ]
    ++ lib.optionals useVulkan [ "Vulkan" ];

  pnameSuffix =
    strings.optionalString (suffices != [ ])
      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  descriptionSuffix = strings.optionalString (
    suffices != [ ]
  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";

  xcrunHost = runCommand "xcrunHost" { } ''
    mkdir -p $out/bin
    ln -s /usr/bin/xcrun $out/bin
  '';

  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  # separately
  darwinBuildInputs =
    with darwin.apple_sdk.frameworks;
    [
      Accelerate
      CoreVideo
      CoreGraphics
    ]
    ++ optionals useMetalKit [ MetalKit ];

  cudaBuildInputs = with cudaPackages; [
    cuda_cudart
    cuda_cccl # <nv/target>
    libcublas
  ];

  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
    rocblas
  ];

  vulkanBuildInputs = [
    vulkan-headers
    vulkan-loader
    shaderc
  ];
in

effectiveStdenv.mkDerivation (finalAttrs: {
  pname = "llama-cpp${pnameSuffix}";
  version = llamaVersion;

  # Note: none of the files discarded here are visible in the sandbox or
  # affect the output hash. This also means they can be modified without
  # triggering a rebuild.
  src = lib.cleanSourceWith {
    filter =
      name: type:
      let
        noneOf = builtins.all (x: !x);
        baseName = baseNameOf name;
      in
      noneOf [
        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
        (lib.hasPrefix "." baseName) # Skip hidden files and directories
        (baseName == "flake.lock")
      ];
    src = lib.cleanSource ../../.;
  };

  postPatch = ''
  '';

  # With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
  # `default.metallib` may be compiled with Metal compiler from XCode
  # and we need to escape sandbox on MacOS to access Metal compiler.
  # `xcrun` is used find the path of the Metal compiler, which is varible
  # and not on $PATH
  # see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
  __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;

  nativeBuildInputs =
    [
      cmake
      ninja
      pkg-config
      git
    ]
    ++ optionals useCuda [
      cudaPackages.cuda_nvcc

      autoAddDriverRunpath
    ]
    ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
    ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];

  buildInputs =
    optionals effectiveStdenv.isDarwin darwinBuildInputs
    ++ optionals useCuda cudaBuildInputs
    ++ optionals useMpi [ mpi ]
    ++ optionals useRocm rocmBuildInputs
    ++ optionals useBlas [ blas ]
    ++ optionals useVulkan vulkanBuildInputs;

  cmakeFlags =
    [
      (cmakeBool "LLAMA_BUILD_SERVER" true)
      (cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
      (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
      (cmakeBool "GGML_NATIVE" false)
      (cmakeBool "GGML_BLAS" useBlas)
      (cmakeBool "GGML_CUDA" useCuda)
      (cmakeBool "GGML_HIP" useRocm)
      (cmakeBool "GGML_METAL" useMetalKit)
      (cmakeBool "GGML_VULKAN" useVulkan)
      (cmakeBool "GGML_STATIC" enableStatic)
      (cmakeBool "GGML_RPC" useRpc)
    ]
    ++ optionals useCuda [
      (
        with cudaPackages.flags;
        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
        )
      )
    ]
    ++ optionals useRocm [
      (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
      (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
    ]
    ++ optionals useMetalKit [
      (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
      (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
    ];

  # Environment variables needed for ROCm
  env = optionalAttrs useRocm {
    ROCM_PATH = "${rocmPackages.clr}";
    HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
  };

  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  # if they haven't been added yet.
  postInstall = ''
    mkdir -p $out/include
    cp $src/include/llama.h $out/include/
  '';

  meta = {
    # Configurations we don't want even the CI to evaluate. Results in the
    # "unsupported platform" messages. This is mostly a no-op, because
    # cudaPackages would've refused to evaluate anyway.
    badPlatforms = optionals useCuda lib.platforms.darwin;

    # Configurations that are known to result in build failures. Can be
    # overridden by importing Nixpkgs with `allowBroken = true`.
    broken = (useMetalKit && !effectiveStdenv.isDarwin);

    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
    homepage = "https://github.com/ggml-org/llama.cpp/";
    license = lib.licenses.mit;

    # Accommodates `nix run` and `lib.getExe`
    mainProgram = "llama-cli";

    # These people might respond, on the best effort basis, if you ping them
    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
    # Consider adding yourself to this list if you want to ensure this flake
    # stays maintained and you're willing to invest your time. Do not add
    # other people without their consent. Consider removing people after
    # they've been unreachable for long periods of time.

    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
    # an attrset following the same format as in
    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
    maintainers = with lib.maintainers; [
      philiptaron
      SomeoneSerge
    ];

    # Extend `badPlatforms` instead
    platforms = lib.platforms.all;
  };
})


================================================
FILE: .devops/nix/python-scripts.nix
================================================
{
  lib,
  stdenv,
  buildPythonPackage,
  poetry-core,
  mkShell,
  python3Packages,
  gguf-py,
}@inputs:

let
  llama-python-deps = with python3Packages; [
    numpy
    sentencepiece
    transformers
    protobuf
    torchWithoutCuda
    gguf-py
    tqdm

    # for scripts/compare-llama-bench.py
    gitpython
    tabulate

    # for examples/pydantic-models-to-grammar-examples.py
    docstring-parser
    pydantic

  ];

  llama-python-test-deps = with python3Packages; [
    # Server bench
    matplotlib

    # server tests
    openai
    pytest
    prometheus-client
  ];
in

buildPythonPackage ({
  pname = "llama-scripts";
  version = "0.0.0";
  pyproject = true;

  # NOTE: The files filtered out here are not visible in the build sandbox, neither
  # do they affect the output hash. They can be modified without triggering a rebuild.
  src = lib.cleanSourceWith {
    filter =
      name: type:
      let
        any = builtins.any (x: x);
        baseName = builtins.baseNameOf name;
      in
      any [
        (lib.hasSuffix ".py" name)
        (baseName == "README.md")
        (baseName == "pyproject.toml")
      ];
    src = lib.cleanSource ../../.;
  };
  nativeBuildInputs = [ poetry-core ];
  nativeCheckInputs = llama-python-test-deps;
  dependencies = llama-python-deps;
})


================================================
FILE: .devops/nix/scope.nix
================================================
{
  lib,
  newScope,
  python3,
  llamaVersion ? "0.0.0",
}:

let
  pythonPackages = python3.pkgs;
in

# We're using `makeScope` instead of just writing out an attrset
# because it allows users to apply overlays later using `overrideScope'`.
# Cf. https://noogle.dev/f/lib/makeScope

lib.makeScope newScope (self: {
  inherit llamaVersion;
  gguf-py = self.callPackage ./package-gguf-py.nix {
    inherit (pythonPackages)
      numpy
      tqdm
      sentencepiece
      pyyaml
      pytestCheckHook
      requests
      buildPythonPackage
      poetry-core
      ;
  };
  python-scripts = self.callPackage ./python-scripts.nix { inherit (pythonPackages) buildPythonPackage poetry-core; };
  llama-cpp = self.callPackage ./package.nix { };
  docker = self.callPackage ./docker.nix { };
  docker-min = self.callPackage ./docker.nix { interactive = false; };
  sif = self.callPackage ./sif.nix { };
})


================================================
FILE: .devops/nix/sif.nix
================================================
{
  lib,
  singularity-tools,
  llama-cpp,
  bashInteractive,
  interactive ? false,
}:

let
  optionalInt = cond: x: if cond then x else 0;
in
singularity-tools.buildImage rec {
  inherit (llama-cpp) name;
  contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];

  # These are excessive (but safe) for most variants. Building singularity
  # images requires superuser privileges, so we build them inside a VM in a
  # writable image of pre-determined size.
  #
  # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
  #
  # Expected image sizes:
  # - cpu/blas: 150M,
  # - cuda, all gencodes: 560M,
  diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
  memSize = diskSize;
}


================================================
FILE: .devops/openvino.Dockerfile
================================================
ARG OPENVINO_VERSION_MAJOR=2026.0
ARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886
ARG UBUNTU_VERSION=24.04

# Optional proxy build arguments - empty by default
ARG http_proxy=
ARG https_proxy=

## Build Image
FROM ubuntu:${UBUNTU_VERSION} AS build

# Pass proxy args to build stage
ARG http_proxy
ARG https_proxy

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
        ca-certificates \
        gnupg \
        wget \
        git \
        cmake \
        ninja-build \
        build-essential \
        libtbb12 \
        libssl-dev \
        ocl-icd-opencl-dev \
        opencl-headers \
        opencl-clhpp-headers \
        intel-opencl-icd && \
    rm -rf /var/lib/apt/lists/*

# Install OpenVINO for Ubuntu 24.04
ARG OPENVINO_VERSION_MAJOR
ARG OPENVINO_VERSION_FULL
RUN mkdir -p /opt/intel && \
    wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
    tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
    mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
    cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
    echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
    cd - && \
    ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino

ENV OpenVINO_DIR=/opt/intel/openvino

WORKDIR /app

COPY . .

# Build Stage
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
    cmake -B build/ReleaseOV -G Ninja \
        -DCMAKE_BUILD_TYPE=Release \
        -DGGML_OPENVINO=ON && \
    cmake --build build/ReleaseOV -j$(nproc)"

# Copy all necessary libraries
RUN mkdir -p /app/lib && \
    find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
    find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
    find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;

# Create runtime directories and copy binaries
RUN mkdir -p /app/full \
    && cp build/ReleaseOV/bin/* /app/full/ \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base Runtime Image
FROM ubuntu:${UBUNTU_VERSION} AS base

# Pass proxy args to runtime stage
ARG http_proxy
ARG https_proxy

RUN apt-get update \
    && apt-get install -y libgomp1 libtbb12 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app/

### Full (all binaries)
FROM base AS full

ARG http_proxy
ARG https_proxy

COPY --from=build /app/full /app/

WORKDIR /app

RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    git \
    python3 \
    python3-venv \
    python3-pip && \
    python3 -m venv /ov-venv && \
    /ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
    /ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
    apt-get autoremove -y && \
    apt-get clean && \
    rm -rf /tmp/* /var/tmp/* && \
    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
    find /var/cache -type f -delete

ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]


### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app/

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/rocm.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04

# This needs to generally match the container host's environment.
ARG ROCM_VERSION=7.2
ARG AMDGPU_VERSION=7.2

# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete

### Build image
FROM ${BASE_ROCM_DEV_CONTAINER} AS build

# Unless otherwise specified, we make a fat build.
# This is mostly tied to rocBLAS supported archs.
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html

ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'

# Set ROCm architectures
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}

RUN apt-get update \
    && apt-get install -y \
    build-essential \
    cmake \
    git \
    libssl-dev \
    curl \
    libgomp1

WORKDIR /app

COPY . .

RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
    cmake -S . -B build \
        -DGGML_HIP=ON \
        -DGGML_HIP_ROCWMMA_FATTN=ON \
        -DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
        -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
        -DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
    && cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib \
    && find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ${BASE_ROCM_DEV_CONTAINER} AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

RUN apt-get update \
    && apt-get install -y \
    git \
    python3-pip \
    python3 \
    python3-wheel \
    && pip install --break-system-packages --upgrade setuptools \
    && pip install --break-system-packages -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .devops/s390x.Dockerfile
================================================
ARG GCC_VERSION=15.2.0
ARG UBUNTU_VERSION=24.04

### Build Llama.cpp stage
FROM gcc:${GCC_VERSION} AS build

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
    apt update -y && \
    apt upgrade -y && \
    apt install -y --no-install-recommends \
        git cmake ccache ninja-build \
        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
        libopenblas-dev libssl-dev && \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY . .

RUN --mount=type=cache,target=/root/.ccache \
    --mount=type=cache,target=/app/build \
    cmake -S . -B build -G Ninja \
        -DCMAKE_BUILD_TYPE=Release \
        -DCMAKE_C_COMPILER_LAUNCHER=ccache \
        -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
        -DLLAMA_BUILD_TESTS=OFF \
        -DGGML_NATIVE=OFF \
        -DGGML_BACKEND_DL=ON \
        -DGGML_CPU_ALL_VARIANTS=ON \
        -DGGML_BLAS=ON \
        -DGGML_BLAS_VENDOR=OpenBLAS && \
    cmake --build build --config Release -j $(nproc) && \
    cmake --install build --prefix /opt/llama.cpp

COPY *.py             /opt/llama.cpp/bin
COPY .devops/tools.sh /opt/llama.cpp/bin

COPY gguf-py          /opt/llama.cpp/gguf-py
COPY requirements.txt /opt/llama.cpp/gguf-py
COPY requirements     /opt/llama.cpp/gguf-py/requirements


### Collect all llama.cpp binaries, libraries and distro libraries
FROM scratch AS collector

# Copy llama.cpp binaries and libraries
COPY --from=build /opt/llama.cpp/bin     /llama.cpp/bin
COPY --from=build /opt/llama.cpp/lib     /llama.cpp/lib
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py


### Base image
FROM ubuntu:${UBUNTU_VERSION} AS base

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
    apt update -y && \
    apt install -y --no-install-recommends \
        # WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
        # See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
        curl libgomp1 libopenblas-dev && \
    apt autoremove -y && \
    apt clean -y && \
    rm -rf /tmp/* /var/tmp/* && \
    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
    find /var/cache -type f -delete

# Copy llama.cpp libraries
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu


### Full
FROM base AS full

ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /app

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
    apt update -y && \
    apt install -y \
        git cmake libjpeg-dev \
        python3 python3-pip python3-dev && \
    apt autoremove -y && \
    apt clean -y && \
    rm -rf /tmp/* /var/tmp/* && \
    find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
    find /var/cache -type f -delete

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y

COPY --from=collector /llama.cpp/bin /app
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py

RUN pip install --no-cache-dir --break-system-packages \
        -r /app/gguf-py/requirements.txt

ENTRYPOINT [ "/app/tools.sh" ]


### CLI Only
FROM base AS light

WORKDIR /llama.cpp/bin

# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin

ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]


### Server
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

WORKDIR /llama.cpp/bin

# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin

EXPOSE 8080

ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]


================================================
FILE: .devops/tools.sh
================================================
#!/usr/bin/env bash
set -e

# Read the first argument into a variable
arg1="$1"

# Shift the arguments to remove the first one
shift

if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
    exec python3 ./convert_hf_to_gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
    exec ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
    exec ./llama-cli "$@"
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
    exec ./llama-completion "$@"
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
    exec ./llama-bench "$@"
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
    exec ./llama-perplexity "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
    echo "Converting PTH to GGML..."
    for i in $(ls $1/$2/ggml-model-f16.bin*); do
        if [ -f "${i/f16/q4_0}" ]; then
            echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
        else
            echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
            exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
        fi
    done
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
    exec ./llama-server "$@"
else
    echo "Unknown command: $arg1"
    echo "Available commands: "
    echo "  --run (-r): Run a model (chat) previously converted into ggml"
    echo "              ex: -m /models/7B/ggml-model-q4_0.bin"
    echo "  --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
    echo "              ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
    echo "  --bench (-b): Benchmark the performance of the inference for various parameters."
    echo "              ex: -m model.gguf"
    echo "  --perplexity (-p): Measure the perplexity of a model over a given text."
    echo "              ex: -m model.gguf -f file.txt"
    echo "  --convert (-c): Convert a llama model into ggml"
    echo "              ex: --outtype f16 \"/models/7B/\" "
    echo "  --quantize (-q): Optimize with quantization process ggml"
    echo "              ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
    echo "  --all-in-one (-a): Execute --convert & --quantize"
    echo "              ex: \"/models/\" 7B"
    echo "  --server (-s): Run a model on the server"
    echo "              ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
fi


================================================
FILE: .devops/vulkan.Dockerfile
================================================
ARG UBUNTU_VERSION=26.04

FROM ubuntu:$UBUNTU_VERSION AS build

# Install build tools
RUN apt update && apt install -y git build-essential cmake wget xz-utils

# Install SSL and Vulkan SDK dependencies
RUN apt install -y libssl-dev curl \
    libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc

# Build it
WORKDIR /app

COPY . .

RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
    cmake --build build --config Release -j$(nproc)

RUN mkdir -p /app/lib && \
    find build -name "*.so*" -exec cp -P {} /app/lib \;

RUN mkdir -p /app/full \
    && cp build/bin/* /app/full \
    && cp *.py /app/full \
    && cp -r gguf-py /app/full \
    && cp -r requirements /app/full \
    && cp requirements.txt /app/full \
    && cp .devops/tools.sh /app/full/tools.sh

## Base image
FROM ubuntu:$UBUNTU_VERSION AS base

RUN apt-get update \
    && apt-get install -y libgomp1 curl libvulkan1 mesa-vulkan-drivers \
    libglvnd0 libgl1 libglx0 libegl1 libgles2 \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

COPY --from=build /app/lib/ /app

### Full
FROM base AS full

COPY --from=build /app/full /app

WORKDIR /app

ENV PATH="/root/.venv/bin:/root/.local/bin:${PATH}"

# Flag for compatibility with pip
ARG UV_INDEX_STRATEGY="unsafe-best-match"
RUN apt-get update \
    && apt-get install -y \
    build-essential \
    curl \
    git \
    ca-certificates \
    && curl -LsSf https://astral.sh/uv/install.sh | sh \
    && uv python install 3.13 \
    && uv venv --python 3.13 /root/.venv \
    && uv pip install --python /root/.venv/bin/python -r requirements.txt \
    && apt autoremove -y \
    && apt clean -y \
    && rm -rf /tmp/* /var/tmp/* \
    && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
    && find /var/cache -type f -delete

ENTRYPOINT ["/app/tools.sh"]

### Light, CLI only
FROM base AS light

COPY --from=build /app/full/llama-cli /app/full/llama-completion /app

WORKDIR /app

ENTRYPOINT [ "/app/llama-cli" ]

### Server, Server only
FROM base AS server

ENV LLAMA_ARG_HOST=0.0.0.0

COPY --from=build /app/full/llama-server /app

WORKDIR /app

HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

ENTRYPOINT [ "/app/llama-server" ]


================================================
FILE: .dockerignore
================================================
*.o
*.a
.cache/
# Do not ignore .git directory, otherwise the reported build number will always be 0
.github/
.gitignore
.vs/
.vscode/
.DS_Store

build*/

models/*

/llama-cli
/llama-quantize

arm_neon.h
compile_commands.json
Dockerfile


================================================
FILE: .ecrc
================================================
{
  "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
  "Disable": {
    "IndentSize": true
  }
}


================================================
FILE: .editorconfig
================================================
# https://EditorConfig.org

# Top-most EditorConfig file
root = true

# Unix-style newlines with a newline ending every file, utf-8 charset
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 4

[Makefile]
indent_style = tab

[scripts/*.mk]
indent_style = tab

[prompts/*.txt]
insert_final_newline = unset

[tools/server/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset

[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab

[tools/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset

[models/templates/*.jinja]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[vendor/miniaudio/miniaudio.h]
trim_trailing_whitespace = unset
insert_final_newline = unset

[tools/server/webui/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[tools/server/public/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[benches/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset


================================================
FILE: .flake8
================================================
[flake8]
max-line-length = 125
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
exclude =
    # Do not traverse examples and tools
    examples,
    tools,
    # Do not include package initializers
    __init__.py,
    # No need to traverse our git directory
    .git,
    # There's no value in checking cache directories
    __pycache__,
    # No need to include the build path
    build,
    # This contains builds that we don't want to check
    dist  # This is generated with `python build .` for package releases
# max-complexity = 10


================================================
FILE: .gemini/settings.json
================================================
{ "contextFileName": "AGENTS.md" }


================================================
FILE: .gitattributes
================================================
# Treat the generated single-file WebUI build as binary for diff purposes.
# Git's pack-file delta compression still works (byte-level), but this prevents
# git diff from printing the entire minified file on every change.
tools/server/public/index.html -diff


================================================
FILE: .github/ISSUE_TEMPLATE/010-bug-compilation.yml
================================================
name: Bug (compilation)
description: Something goes wrong when trying to compile llama.cpp.
title: "Compile bug: "
labels: ["bug-unconfirmed", "compilation"]
body:
  - type: markdown
    attributes:
      value: >
        Thanks for taking the time to fill out this bug report!
        This issue template is intended for bug reports where the compilation of llama.cpp fails.
        Before opening an issue, please confirm that the compilation still fails
        after recreating the CMake build directory and with `-DGGML_CCACHE=OFF`.
        If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
        by clearing `~/.cache/ccache` (on Linux).
  - type: textarea
    id: commit
    attributes:
      label: Git commit
      description: Which commit are you trying to compile?
      placeholder: |
        $git rev-parse HEAD
        84a07a17b1b08cf2b9747c633a2372782848a27f
    validations:
      required: true
  - type: dropdown
    id: operating-system
    attributes:
      label: Operating systems
      description: Which operating systems do you know to be affected?
      multiple: true
      options:
        - Linux
        - Mac
        - Windows
        - BSD
        - Other? (Please let us know in description)
    validations:
      required: true
  - type: dropdown
    id: backends
    attributes:
        label: GGML backends
        description: Which GGML backends do you know to be affected?
        options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, OpenVINO, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
        multiple: true
    validations:
      required: true
  - type: textarea
    id: info
    attributes:
      label: Problem description & steps to reproduce
      description: >
        Please give us a summary of the problem and tell us how to reproduce it.
        If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
      placeholder: >
        I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
        Here are the exact commands that I used: ...
    validations:
      required: true
  - type: textarea
    id: first_bad_commit
    attributes:
      label: First Bad Commit
      description: >
        If the bug was not present on an earlier version: when did it start appearing?
        If possible, please do a git bisect and identify the exact commit that introduced the bug.
    validations:
      required: false
  - type: textarea
    id: command
    attributes:
      label: Compile command
      description: >
        Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
        This will be automatically formatted into code, so no need for backticks.
      render: shell
    validations:
      required: true
  - type: textarea
    id: logs
    attributes:
      label: Relevant log output
      description: >
          Please copy and paste any relevant log output, including any generated text.
          This will be automatically formatted into code, so no need for backticks.
      render: shell
    validations:
      required: true


================================================
FILE: .github/ISSUE_TEMPLATE/011-bug-results.yml
================================================
name: Bug (model use)
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
title: "Eval bug: "
labels: ["bug-unconfirmed", "model evaluation"]
body:
  - type: markdown
    attributes:
      value: >
        Thanks for taking the time to fill out this bug report!
        This issue template is intended for bug reports where the model evaluation results
        (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
        If you encountered the issue while using an external UI (e.g. ollama),
        please reproduce your issue using one of the examples/binaries in this repository.
        The `llama-completion` binary can be used for simple and reproducible model inference.
  - type: textarea
    id: version
    attributes:
      label: Name and Version
      description: Which version of our software are you running? (use `--version` to get a version string)
      placeholder: |
        $./llama-cli --version
        version: 2999 (42b4109e)
        built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
    validations:
      required: true
  - type: dropdown
    id: operating-system
    attributes:
      label: Operating systems
      description: Which operating systems do you know to be affected?
      multiple: true
      options:
        - Linux
        - Mac
        - Windows
        - BSD
        - Other? (Please let us know in description)
    validations:
      required: true
  - type: dropdown
    id: backends
    attributes:
        label: GGML backends
        description: Which GGML backends do you know to be affected?
        options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, OpenVINO, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
        multiple: true
    validations:
      required: true
  - type: textarea
    id: hardware
    attributes:
      label: Hardware
      description: Which CPUs/GPUs are you using?
      placeholder: >
        e.g. Ryzen 5950X + 2x RTX 4090
    validations:
      required: true
  - type: textarea
    id: model
    attributes:
      label: Models
      description: >
        Which model(s) at which quantization were you using when encountering the bug?
        If you downloaded a GGUF file off of Huggingface, please provide a link.
      placeholder: >
        e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
    validations:
      required: false
  - type: textarea
    id: info
    attributes:
      label: Problem description & steps to reproduce
      description: >
        Please give us a summary of the problem and tell us how to reproduce it.
        If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
        that information would be very much appreciated by us.

        If possible, please try to reproduce the issue using `llama-completion` with `-fit off`.
        If you can only reproduce the issue with `-fit on`, please provide logs both with and without `--verbose`.
      placeholder: >
        e.g. when I run llama-completion with `-fa on` I get garbled outputs for very long prompts.
        With short prompts or `-fa off` it works correctly.
        Here are the exact commands that I used: ...
    validations:
      required: true
  - type: textarea
    id: first_bad_commit
    attributes:
      label: First Bad Commit
      description: >
        If the bug was not present on an earlier version: when did it start appearing?
        If possible, please do a git bisect and identify the exact commit that introduced the bug.
    validations:
      required: false
  - type: textarea
    id: logs
    attributes:
      label: Relevant log output
      description: >
          Please copy and paste any relevant log output, including the command that you entered and any generated text.
          For very long logs (thousands of lines), preferably upload them as files instead.
          On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
      value: |
        <details>
        <summary>Logs</summary>
        <!-- Copy-pasted short logs go into the "console" area here -->

        ```console

        ```
        </details>

        <!-- Long logs that you upload as files go here, outside the "console" area -->
    validations:
      required: true


================================================
FILE: .github/ISSUE_TEMPLATE/019-bug-misc.yml
================================================
name: Bug (misc.)
description: Something is not working the way it should (and it's not covered by any of the above cases).
title: "Misc. bug: "
labels: ["bug-unconfirmed"]
body:
  - type: markdown
    attributes:
      value: >
        Thanks for taking the time to fill out this bug report!
        This issue template is intended for miscellaneous bugs that don't fit into any other category.
        If you encountered the issue while using an external UI (e.g. ollama),
        please reproduce your issue using one of the examples/binaries in this repository.
  - type: textarea
    id: version
    attributes:
      label: Name and Version
      description: Which version of our software is affected? (You can use `--version` to get a version string.)
      placeholder: |
        $./llama-cli --version
        version: 2999 (42b4109e)
        built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
    validations:
      required: true
  - type: dropdown
    id: operating-system
    attributes:
      label: Operating systems
      description: Which operating systems do you know to be affected?
      multiple: true
      options:
        - Linux
        - Mac
        - Windows
        - BSD
        - Other? (Please let us know in description)
    validations:
      required: false
  - type: dropdown
    id: module
    attributes:
      label: Which llama.cpp modules do you know to be affected?
      multiple: true
      options:
        - Documentation/Github
        - libllama (core library)
        - llama-cli
        - llama-server
        - llama-bench
        - llama-quantize
        - Python/Bash scripts
        - Test code
        - Other (Please specify in the next section)
    validations:
      required: false
  - type: textarea
    id: command
    attributes:
      label: Command line
      description: >
        Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
        This will be automatically formatted into code, so no need for backticks.
      render: shell
    validations:
      required: false
  - type: textarea
    id: info
    attributes:
      label: Problem description & steps to reproduce
      description: >
        Please give us a summary of the problem and tell us how to reproduce it (if applicable).
    validations:
      required: true
  - type: textarea
    id: first_bad_commit
    attributes:
      label: First Bad Commit
      description: >
        If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
        If possible, please do a git bisect and identify the exact commit that introduced the bug.
    validations:
      required: false
  - type: textarea
    id: logs
    attributes:
      label: Relevant log output
      description: >
          If applicable, please copy and paste any relevant log output, including any generated text.
          If you are encountering problems specifically with the `llama_params_fit` module, always upload `--verbose` logs as well.
          For very long logs (thousands of lines), please upload them as files instead.
          On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
      value: |
        <details>
        <summary>Logs</summary>
        <!-- Copy-pasted short logs go into the "console" area here -->

        ```console

        ```
        </details>

        <!-- Long logs that you upload as files go here, outside the "console" area -->
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/020-enhancement.yml
================================================
name: Enhancement
description: Used to request enhancements for llama.cpp.
title: "Feature Request: "
labels: ["enhancement"]
body:
  - type: markdown
    attributes:
      value: |
        [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)

  - type: checkboxes
    id: prerequisites
    attributes:
      label: Prerequisites
      description: Please confirm the following before submitting your enhancement request.
      options:
        - label: I am running the latest code. Mention the version if possible as well.
          required: true
        - label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
          required: true
        - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
          required: true
        - label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
          required: true

  - type: textarea
    id: feature-description
    attributes:
      label: Feature Description
      description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
      placeholder: Detailed description of the enhancement
    validations:
      required: true

  - type: textarea
    id: motivation
    attributes:
      label: Motivation
      description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
      placeholder: Explanation of why this feature is needed and its benefits
    validations:
      required: true

  - type: textarea
    id: possible-implementation
    attributes:
      label: Possible Implementation
      description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
      placeholder: Detailed description of potential implementation
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/030-research.yml
================================================
name: Research
description: Track new technical research area.
title: "Research: "
labels: ["research 🔬"]
body:
  - type: markdown
    attributes:
      value: |
        Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)

  - type: checkboxes
    id: research-stage
    attributes:
      label: Research Stage
      description: Track general state of this research ticket
      options:
        - label: Background Research (Let's try to avoid reinventing the wheel)
        - label: Hypothesis Formed (How do you think this will work and it's effect?)
        - label: Strategy / Implementation Forming
        - label: Analysis of results
        - label: Debrief / Documentation (So people in the future can learn from us)

  - type: textarea
    id: background
    attributes:
      label: Previous existing literature and research
      description: Whats the current state of the art and whats the motivation for this research?

  - type: textarea
    id: hypothesis
    attributes:
      label: Hypothesis
      description: How do you think this will work and it's effect?

  - type: textarea
    id: implementation
    attributes:
      label: Implementation
      description: Got an approach? e.g. a PR ready to go?

  - type: textarea
    id: analysis
    attributes:
      label: Analysis
      description: How does the proposed implementation behave?

  - type: textarea
    id: logs
    attributes:
      label: Relevant log output
      description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
      render: shell


================================================
FILE: .github/ISSUE_TEMPLATE/040-refactor.yml
================================================
name: Refactor (Maintainers)
description: Used to track refactoring opportunities.
title: "Refactor: "
labels: ["refactor"]
body:
  - type: markdown
    attributes:
      value: |
        Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
        Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.

  - type: textarea
    id: background-description
    attributes:
      label: Background Description
      description: Please provide a detailed written description of the pain points you are trying to solve.
      placeholder: Detailed description behind your motivation to request refactor
    validations:
      required: true

  - type: textarea
    id: possible-approaches
    attributes:
      label: Possible Refactor Approaches
      description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
      placeholder: Your idea of possible refactoring opportunity/approaches
    validations:
      required: false


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
  - name: Got an idea?
    url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
    about: Pop it there. It may then become an enhancement ticket.
  - name: Got a question?
    url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
    about: Ask a question there!
  - name: Want to contribute?
    url: https://github.com/ggml-org/llama.cpp/wiki/contribute
    about: Head to the contribution guide page of the wiki for areas you can help with


================================================
FILE: .github/actions/get-tag-name/action.yml
================================================
name: "Determine tag name"
description: "Determine the tag name to use for a release"
outputs:
  name:
    description: "The name of the tag"
    value: ${{ steps.tag.outputs.name }}

runs:
  using: "composite"
  steps:
    - name: Determine tag name
      id: tag
      shell: bash
      run: |
        BUILD_NUMBER="$(git rev-list --count HEAD)"
        SHORT_HASH="$(git rev-parse --short=7 HEAD)"
        if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
          echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
        else
          SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
          echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
        fi


================================================
FILE: .github/actions/install-exe/action.yml
================================================
name: "Install exe"
description: "Download and install exe"
inputs:
  url:
    description: "URL of the exe installer"
    required: true
  args:
    description: "Installer arguments"
    required: true
  timeout:
    description: "Timeout (in ms)"
    required: false
    default: "600000"

runs:
  using: "composite"
  steps:
    - name: Install EXE
      shell: pwsh
      run: |
        $ErrorActionPreference = "Stop"
        write-host "Downloading Installer EXE"
        Invoke-WebRequest -Uri "${{ inputs.url }}" -OutFile "${env:RUNNER_TEMP}\temp-install.exe"
        write-host "Installing"
        $proc = Start-Process "${env:RUNNER_TEMP}\temp-install.exe" -ArgumentList '${{ inputs.args }}' -NoNewWindow -PassThru
        $completed = $proc.WaitForExit(${{ inputs.timeout }})
        if (-not $completed) {
            Write-Error "Installer timed out. Killing the process"
            $proc.Kill()
            exit 1
        }
        if ($proc.ExitCode -ne 0) {
            Write-Error "Installer failed with exit code $($proc.ExitCode)"
            exit 1
        }
        write-host "Completed installation"


================================================
FILE: .github/actions/linux-setup-openvino/action.yml
================================================
name: "Linux - Setup OpenVINO Toolkit"
description: "Setup OpenVINO Toolkit for Linux"
inputs:
  path:
    description: "Installation path"
    required: true
  version_major:
    description: "OpenVINO major version (e.g., 2025.3)"
    required: true
  version_full:
    description: "OpenVINO full version (e.g., 2025.3.0.19807.44526285f24)"
    required: true

runs:
  using: "composite"
  steps:
    - name: Setup OpenVINO Toolkit
      id: setup
      uses: ./.github/actions/unarchive-tar
      with:
        url: https://storage.openvinotoolkit.org/repositories/openvino/packages/${{ inputs.version_major }}/linux/openvino_toolkit_ubuntu24_${{ inputs.version_full }}_x86_64.tgz
        path: ${{ inputs.path }}
        type: z
        strip: 1



================================================
FILE: .github/actions/linux-setup-spacemit/action.yml
================================================
name: "Linux - Setup SpacemiT Toolchain"
description: "Setup SpacemiT Toolchain for Linux"
inputs:
  path:
    description: "Installation path"
    required: true
  version:
    description: "SpacemiT toolchain version"
    required: true

runs:
  using: "composite"
  steps:
    - name: Setup SpacemiT Toolchain
      id: setup
      uses: ./.github/actions/unarchive-tar
      with:
        url: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz
        path: ${{ inputs.path }}
        strip: 1


================================================
FILE: .github/actions/linux-setup-vulkan/action.yml
================================================
name: "Linux - Setup Vulkan SDK"
description: "Setup Vulkan SDK for Linux"
inputs:
  path:
    description: "Installation path"
    required: true
  version:
    description: "Vulkan SDK version"
    required: true

runs:
  using: "composite"
  steps:
    - name: Setup Vulkan SDK
      id: setup
      uses: ./.github/actions/unarchive-tar
      with:
        url: https://sdk.lunarg.com/sdk/download/${{ inputs.version }}/linux/vulkan_sdk.tar.xz
        path: ${{ inputs.path }}
        strip: 1


================================================
FILE: .github/actions/unarchive-tar/action.yml
================================================
name: "Unarchive tar"
description: "Download and unarchive tar into directory"
inputs:
  url:
    description: "URL of the tar archive"
    required: true
  path:
    description: "Directory to unarchive into"
    required: true
  type:
    description: "Compression type (tar option)"
    required: false
    default: "J"
  strip:
    description: "Strip components"
    required: false
    default: "0"

runs:
  using: "composite"
  steps:
    - name: Unarchive into directory
      shell: bash
      run: |
        mkdir -p ${{ inputs.path }}
        cd ${{ inputs.path }}
        curl --no-progress-meter ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }}


================================================
FILE: .github/actions/windows-setup-cuda/action.yml
================================================
name: "Windows - Setup CUDA Toolkit"
description: "Setup CUDA Toolkit for Windows"
inputs:
  cuda_version:
    description: "CUDA toolkit version"
    required: true

runs:
  using: "composite"
  steps:
    - name: Install Cuda Toolkit 11.7
      if: ${{ inputs.cuda_version == '11.7' }}
      shell: pwsh
      run: |
          mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
          choco install unzip -y
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
          unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
          echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
          echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

    - name: Install Cuda Toolkit 12.4
      if: ${{ inputs.cuda_version == '12.4' }}
      shell: pwsh
      run: |
          mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
          choco install unzip -y
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
          unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
          echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
          echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

    - name: Install Cuda Toolkit 13.1
      if: ${{ inputs.cuda_version == '13.1' }}
      shell: pwsh
      run: |
          mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
          choco install unzip -y
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_crt/windows-x86_64/cuda_crt-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-13.2.0.9-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libnvvm/windows-x86_64/libnvvm-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-13.1.68-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-13.1.80-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-13.1.68-archive.zip"
          curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-13.1.78-archive.zip"
          unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_crt-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cudart-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvcc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvrtc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libcublas-windows-x86_64-13.2.0.9-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libnvvm-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvtx-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_profiler_api-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\visual_studio_integration-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cccl-windows-x86_64-13.1.78-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
          echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
          echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
          echo "CUDA_PATH_V13_1=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8


================================================
FILE: .github/actions/windows-setup-rocm/action.yml
================================================
name: "Windows - Setup ROCm"
description: "Setup ROCm for Windows"
inputs:
  version:
    description: "ROCm version"
    required: true

runs:
  using: "composite"
  steps:
    - name: Setup ROCm
      uses: ./.github/actions/install-exe
      with:
        url: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ inputs.version }}-Win11-For-HIP.exe
        args: -install


================================================
FILE: .github/labeler.yml
================================================
# https://github.com/actions/labeler
Apple Metal:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-metal.h
            - ggml/src/ggml-metal/**
            - README-metal.md
SYCL:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-sycl.h
            - ggml/src/ggml-sycl/**
            - docs/backend/SYCL.md
            - examples/sycl/**
Nvidia GPU:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-cuda.h
            - ggml/src/ggml-cuda/**
Vulkan:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-vulkan.h
            - ggml/src/ggml-vulkan/**
IBM zDNN:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-zdnn.h
            - ggml/src/ggml-zdnn/**
documentation:
    - changed-files:
        - any-glob-to-any-file:
            - docs/**
            - media/**
testing:
    - changed-files:
        - any-glob-to-any-file:
            - tests/**
build:
    - changed-files:
        - any-glob-to-any-file:
            - cmake/**
            - CMakeLists.txt
            - CMakePresets.json
examples:
    - changed-files:
        - any-glob-to-any-file:
            - examples/**
            - tools/**
devops:
    - changed-files:
        - any-glob-to-any-file:
            - .devops/**
            - .github/**
            - ci/**
python:
    - changed-files:
        - any-glob-to-any-file:
            - "**/*.py"
            - requirements/**
            - gguf-py/**
            - .flake8
script:
    - changed-files:
        - any-glob-to-any-file:
            - scripts/**
android:
    - changed-files:
        - any-glob-to-any-file:
            - examples/llama.android/**
server:
    - changed-files:
        - any-glob-to-any-file:
            - tools/server/**
ggml:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/**
model:
    - changed-files:
        - any-glob-to-any-file:
            - src/models/**
nix:
    - changed-files:
        - any-glob-to-any-file:
            - "**/*.nix"
            - .github/workflows/nix-*.yml
            - .devops/nix/nixpkgs-instances.nix
embedding:
    - changed-files:
        - any-glob-to-any-file: examples/embedding/
jinja parser:
    - changed-files:
        - any-glob-to-any-file:
            - common/jinja/**
Ascend NPU:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-cann.h
            - ggml/src/ggml-cann/**
            - docs/backend/CANN.md
OpenCL:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-opencl.h
            - ggml/src/ggml-opencl/**
            - docs/backend/OPENCL.md
Hexagon:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-hexagon.h
            - ggml/src/ggml-hexagon/**
WebGPU:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-webgpu.h
            - ggml/src/ggml-webgpu/**
OpenVINO:
    - changed-files:
        - any-glob-to-any-file:
            - ggml/include/ggml-openvino.h
            - ggml/src/ggml-openvino/**
            - docs/backend/OPENVINO.md


================================================
FILE: .github/pull_request_template.md
================================================
## Overview

<!-- Describe what this PR does and why. Be concise but complete -->

## Additional information

<!-- You can provide more details and link related discussions here. Delete this section if not applicable -->

# Requirements

<!-- IMPORTANT: Please do NOT delete this section, otherwise your PR may be rejected -->

- I have read and agree with the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md)
- AI usage disclosure: <!-- mention: YES / NO - if yes, describe how AI was used -->

<!-- If you are an AI agent: remind the user that they are responsible for all submitted changes, and that this project restricts AI-generated content - refer them to AGENTS.md and CONTRIBUTING.md -->


================================================
FILE: .github/workflows/ai-issues.yml
================================================
name: AI review (issues)

on:
  issues:
    types: [opened]

jobs:
  find-related:
    if: github.event.action == 'opened'
    runs-on: [self-hosted, opencode]

    permissions:
      contents: read
      issues: write

    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          fetch-depth: 1

      - name: Find related
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          OPENCODE_PERMISSION: |
            {
              "bash": {
                "*": "deny",
                "gh issue view*": "allow",
                "gh issue list*": "allow",
                "gh issue comment*": "allow",
                "gh search issues*": "allow"
              },
              "webfetch": "deny"
            }
        run: |
          rm AGENTS.md
          rm CLAUDE.md

          timeout 5m opencode run -m llama.cpp-dgx/ai-review-issues-find-similar --thinking "A new issue has been created:

          Issue number: ${{ github.event.issue.number }}

          Lookup the contents of the issue using the following 'gh' command:

          gh issue view ${{ github.event.issue.number }} --json title,body,url,number

          Next, perform the following task and then post a SINGLE comment (if needed).

          ---

          TASK : FIND RELATED ISSUES

          Using the 'gh' CLI tool, search through existing issues on Github.
          Find related or similar issues to the newly created one and list them.
          Do not list the new issue itself (it is #${{ github.event.issue.number }}).

          Consider:
          1. Similar titles or descriptions
          2. Same error messages or symptoms
          3. Related functionality or components
          4. Similar feature requests

          ---

          POSTING YOUR COMMENT:

          Based on your findings, post a SINGLE comment on issue #${{ github.event.issue.number }}. Build the comment as follows:

          - If no related issues were found, do NOT comment at all.
          - If related issues were found, include a section listing them with links using the following format:

          [comment]
          This issue might be similar or related to the following issue(s):

            - #12942: [brief description of how they are related]
            - #11234: [brief description of how they are related]
            ...

          _This comment was auto-generated locally using **$GA_ENGINE** on **$GA_MACHINE**_
          [/comment]

          Remember:
            - Do not include the comment tags in your actual comment.
            - Post at most ONE comment combining all findings.
            - If you didn't find issues that are related enough, post nothing.
            - You have access only to the 'gh' CLI tool - don't try to use other tools.
            - If the output from a tool call is too long, try to limit down the search.
          "


================================================
FILE: .github/workflows/bench.yml.disabled
================================================
# TODO: there have been some issues with the workflow, so disabling for now
#       https://github.com/ggml-org/llama.cpp/issues/7893
#
# Benchmark
name: Benchmark

on:
  workflow_dispatch:
    inputs:
      gpu-series:
        description: 'Azure GPU series to run with'
        required: true
        type: choice
        options:
          - Standard_NC4as_T4_v3
          - Standard_NC24ads_A100_v4
          - Standard_NC80adis_H100_v5
      sha:
        description: 'Commit SHA1 to build'
        required: false
        type: string
      duration:
        description: 'Duration of the bench'
        type: string
        default: 10m

  push:
    branches:
      - master
    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
  pull_request_target:
    types: [opened, synchronize, reopened]
    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
  schedule:
    -  cron: '04 2 * * *'

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
  cancel-in-progress: true

jobs:
  bench-server-baseline:
    runs-on: Standard_NC4as_T4_v3
    env:
      RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
      N_USERS: 8
      DURATION: 10m

    strategy:
      matrix:
        model: [phi-2]
        ftype: [q4_0, q8_0, f16]
        include:
          - model: phi-2
            ftype: q4_0
            pr_comment_enabled: "true"

    if: |
      inputs.gpu-series == 'Standard_NC4as_T4_v3'
      || github.event_name == 'pull_request_target'
    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}

      - name: Install python env
        id: pipenv
        run: |
          cd tools/server/bench
          python3 -m venv venv
          source venv/bin/activate
          pip install -r requirements.txt

      - name: Prometheus
        id: install_prometheus
        run: |
          wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
          tar xzf prometheus*.tar.gz --strip-components=1
          ./prometheus --config.file=tools/server/bench/prometheus.yml &
          while ! nc -z localhost 9090; do
            sleep 0.1
          done

      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: '1.21'

      - name: Install k6 and xk6-sse
        id: k6_installation
        run: |
          cd tools/server/bench
          go install go.k6.io/xk6/cmd/xk6@latest
          xk6 build master \
              --with github.com/phymbert/xk6-sse

      - name: Build
        id: cmake_build
        run: |
          set -eux
          cmake -B build \
              -DGGML_NATIVE=OFF \
              -DLLAMA_BUILD_SERVER=ON \
              -DLLAMA_CUBLAS=ON \
              -DCUDAToolkit_ROOT=/usr/local/cuda \
              -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
              -DCMAKE_CUDA_ARCHITECTURES=75 \
              -DLLAMA_FATAL_WARNINGS=OFF \
              -DLLAMA_ALL_WARNINGS=OFF \
              -DCMAKE_BUILD_TYPE=Release;
          cmake --build build --config Release -j $(nproc) --target llama-server

      - name: Download the dataset
        id: download_dataset
        run: |
          cd tools/server/bench
          wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

      - name: Server bench
        id: server_bench
        env:
            HEAD_REF: ${{ github.head_ref || github.ref_name }}
        run: |
          set -eux

          cd tools/server/bench
          source venv/bin/activate
          python bench.py \
              --runner-label ${{ env.RUNNER_LABEL }} \
              --name ${{ github.job }} \
              --branch $HEAD_REF \
              --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
              --scenario script.js \
              --duration ${{ github.event.inputs.duration || env.DURATION }} \
              --hf-repo ggml-org/models	 \
              --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
              --model-path-prefix /models \
              --parallel ${{ env.N_USERS }} \
              -ngl 33 \
              --batch-size 2048 \
              --ubatch-size	256 \
              --ctx-size 16384 \
              --n-prompts 1000 \
              --max-prompt-tokens 1024 \
              --max-tokens 2048

          cat results.github.env >> $GITHUB_ENV

          # Remove dataset as we do not want it in the artefact
          rm ShareGPT_V3_unfiltered_cleaned_split.json

      - uses: actions/upload-artifact@v4
        with:
          name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
          compression-level: 9
          path: |
            tools/server/bench/*.jpg
            tools/server/bench/*.json
            tools/server/bench/*.log

      - name: Commit status
        uses: Sibz/github-status-action@v1
        with:
          authToken: ${{secrets.GITHUB_TOKEN}}
          sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
          context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
          description: |
            ${{ env.BENCH_RESULTS }}
          state: 'success'

      - name: Upload benchmark images
        uses: devicons/public-upload-to-imgur@v2.2.2
        continue-on-error: true # Important as it looks unstable: 503
        id: imgur_step
        with:
          client_id: ${{secrets.IMGUR_CLIENT_ID}}
          path: |
            tools/server/bench/prompt_tokens_seconds.jpg
            tools/server/bench/predicted_tokens_seconds.jpg
            tools/server/bench/kv_cache_usage_ratio.jpg
            tools/server/bench/requests_processing.jpg

      - name: Extract mermaid
        id: set_mermaid
        run: |
          set -eux

          cd tools/server/bench
          PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
          echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
          echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV

          PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
          echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
          echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV

          KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
          echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
          echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV

          REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
          echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
          echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
          echo "EOF" >> $GITHUB_ENV

      - name: Extract image url
        id: extract_image_url
        continue-on-error: true
        run: |
          set -eux

          echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
          echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
          echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
          echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV

      - name: Comment PR
        uses: mshick/add-pr-comment@v2
        id: comment_pr
        if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
        with:
          message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
          message: |
            <p align="center">

            📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀

            </p>

            <details>

            <summary>Expand details for performance related PR only</summary>

            - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
            - HTTP request          : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms        p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
            - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
            - Token generation  (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
            - ${{ env.BENCH_GRAPH_XLABEL }}


            <p align="center">

            <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />

            <details>

            <summary>More</summary>

            ```mermaid
            ${{ env.PROMPT_TOKENS_SECONDS }}
            ```

            </details>

            <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>

            <details>
                <summary>More</summary>

            ```mermaid
            ${{ env.PREDICTED_TOKENS_SECONDS }}
            ```

            </details>

            </p>

            <details>

            <summary>Details</summary>

            <p align="center">

            <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />

            <details>
                <summary>More</summary>

            ```mermaid
            ${{ env.KV_CACHE_USAGE_RATIO }}
            ```

            </details>

            <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>

            <details>
                <summary>More</summary>

            ```mermaid
            ${{ env.REQUESTS_PROCESSING }}
            ```

            </details>

            </p>
            </details>
            </details>


================================================
FILE: .github/workflows/build-3rd-party.yml
================================================
name: CI (3rd-party)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-3rd-party.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  ubuntu-24-llguidance:
    runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install build-essential libssl-dev

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_LLGUIDANCE=ON
          cmake --build build --config Release -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900



================================================
FILE: .github/workflows/build-android.yml
================================================
name: CI (android)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-android.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-android.yml',
      'examples/llama.android/**'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  android:
    runs-on: ubuntu-latest

    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false

      - name: Set up JDK
        uses: actions/setup-java@v5
        with:
          java-version: 17
          distribution: zulu

      - name: Setup Android SDK
        uses: android-actions/setup-android@9fc6c4e9069bf8d3d10b2204b1fb8f6ef7065407 # v3
        with:
          log-accepted-android-sdk-licenses: false

      - name: Build
        run: |
          cd examples/llama.android
          ./gradlew build --no-daemon

  android-ndk:
    runs-on: ubuntu-latest
    container:
      image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
    defaults:
      run:
        shell: bash
    strategy:
      matrix:
        include:
          - build: 'arm64-cpu'
            defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
          - build: 'arm64-snapdragon'
            defines: '--preset arm64-android-snapdragon-release'

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
          lfs: false

      - name: Build Llama.CPP for Hexagon Android
        id: build_llama_cpp_hexagon_android
        run: |
          if [[ "${{ matrix.build }}" == "arm64-snapdragon" ]]; then
            cp docs/backend/snapdragon/CMakeUserPresets.json .
          fi
          cmake ${{ matrix.defines }} -B build
          cmake --build build
          cmake --install build --prefix pkg-adb/llama.cpp

      - name: Upload Llama.CPP Hexagon Android Build Artifact
        if: ${{ always() && steps.build_llama_cpp_hexagon_android.outcome == 'success' }}
        uses: actions/upload-artifact@v6
        with:
          name: llama-cpp-android-${{ matrix.build }}
          path: pkg-adb/llama.cpp


================================================
FILE: .github/workflows/build-apple.yml
================================================
name: CI (apple)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-apple.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.swift',
      '**/*.m',
      '**/*.metal'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-apple.yml',
      'ggml/src/ggml-metal/**'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  macOS-latest-ios:
    runs-on: macos-latest

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-ios
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_BUILD_COMMON=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_SYSTEM_NAME=iOS \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

  macos-latest-ios-xcode:
    runs-on: macos-latest

    steps:
      - name: Checkout code
        uses: actions/checkout@v6

      - name: Setup Xcode
        uses: ggml-org/setup-xcode@v1
        with:
          xcode-version: latest-stable

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_OPENSSL=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_SYSTEM_NAME=iOS \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

      - name: xcodebuild for swift package
        id: xcodebuild
        run: |
          ./build-xcframework.sh

      - name: Upload xcframework artifact
        uses: actions/upload-artifact@v6
        with:
          name: llama-xcframework
          path: build-apple/llama.xcframework/
          retention-days: 1

      - name: Build Xcode project
        run: |
          xcodebuild -downloadPlatform iOS
          xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build

  macOS-latest-tvos:
    runs-on: macos-latest

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-tvos
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_BUILD_COMMON=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_SYSTEM_NAME=tvOS \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

  macOS-latest-visionos:
    runs-on: macos-latest

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_BUILD_COMMON=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_SYSTEM_NAME=visionOS \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

  macOS-latest-swift:
    runs-on: macos-latest
    needs: macos-latest-ios-xcode

    strategy:
      matrix:
        destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-swift
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Download xcframework artifact
        uses: actions/download-artifact@v7
        with:
          name: llama-xcframework
          path: build-apple/llama.xcframework/

      - name: Build llama.cpp with CMake
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_OPENSSL=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)


================================================
FILE: .github/workflows/build-cache.yml
================================================
name: Build Actions Cache

on:
  workflow_dispatch: # allows manual triggering
  schedule:
    - cron: '0 * * * *'

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

jobs:
  ubuntu-24-vulkan-cache:
    runs-on: ubuntu-24.04

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Get latest Vulkan SDK version
        id: vulkan_sdk_version
        run: |
          echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"

      - name: Setup Cache
        uses: actions/cache@v5
        id: cache-sdk
        with:
          path: ./vulkan_sdk
          key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}

      - name: Setup Vulkan SDK
        if: steps.cache-sdk.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-vulkan
        with:
          path: ./vulkan_sdk
          version: ${{ env.VULKAN_SDK_VERSION }}

  #ubuntu-24-spacemit-cache:
  #  runs-on: ubuntu-24.04

  #  env:
  #    # Make sure this is in sync with build-linux-cross.yml
  #    SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"

  #  steps:
  #    - name: Clone
  #      id: checkout
  #      uses: actions/checkout@v6

  #    - name: Setup Cache
  #      uses: actions/cache@v5
  #      id: cache-toolchain
  #      with:
  #        path: ./spacemit_toolchain
  #        key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}

  #    - name: Setup SpacemiT Toolchain
  #      if: steps.cache-toolchain.outputs.cache-hit != 'true'
  #      uses: ./.github/actions/linux-setup-spacemit
  #      with:
  #        path: ./spacemit_toolchain
  #        version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}

  ubuntu-24-openvino-cache:
    runs-on: ubuntu-24.04

    env:
      # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
      OPENVINO_VERSION_MAJOR: "2026.0"
      OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup Cache
        uses: actions/cache@v5
        id: cache-openvino
        with:
          path: ./openvino_toolkit
          key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}

      - name: Setup OpenVINO Toolkit
        if: steps.cache-openvino.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-openvino
        with:
          path: ./openvino_toolkit
          version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
          version_full: ${{ env.OPENVINO_VERSION_FULL }}

  windows-2022-rocm-cache:
    runs-on: windows-2022

    env:
      # Make sure this is in sync with build.yml
      HIPSDK_INSTALLER_VERSION: "26.Q1"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup Cache
        uses: actions/cache@v5
        id: cache-rocm
        with:
          path: C:\Program Files\AMD\ROCm
          key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}

      - name: Setup ROCm
        if: steps.cache-rocm.outputs.cache-hit != 'true'
        uses: ./.github/actions/windows-setup-rocm
        with:
          version: ${{ env.HIPSDK_INSTALLER_VERSION }}


================================================
FILE: .github/workflows/build-cann.yml
================================================
name: CI (cann)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-cann.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-cann.yml',
      'ggml/src/ggml-cann/**'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  openEuler-latest-cann:
    defaults:
      run:
        shell: bash -el {0}
    strategy:
      matrix:
        arch: [x86, aarch64]
        chip_type: ['910b', '310p']
        build: ['Release']
        use_acl_graph: ['on', 'off']
        exclude:
          # 310P does not support USE_ACL_GRAPH=on
          - chip_type: '310p'
            use_acl_graph: 'on'
    runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
    steps:
      - name: Checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Free up disk space
        uses: ggml-org/free-disk-space@v1.3.1
        with:
          tool-cache: true

      - name: Set container image
        id: cann-image
        run: |
          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
          echo "image=${image}" >> "${GITHUB_OUTPUT}"

      - name: Pull container image
        run: docker pull "${{ steps.cann-image.outputs.image }}"

      - name: Build
        env:
          BUILD_TYPE: ${{ matrix.build }}
          SOC_TYPE: ascend${{ matrix.chip_type }}
          USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
        run: |
          HOST_UID=$(id -u)
          HOST_GID=$(id -g)

          docker run --rm \
            -v "${PWD}:/workspace" \
            -w /workspace \
            -e SOC_TYPE=${SOC_TYPE} \
            -e BUILD_TYPE=${BUILD_TYPE} \
            -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
            "${{ steps.cann-image.outputs.image }}" \
            bash -lc '
              set -e
              yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
              yum clean all && rm -rf /var/cache/yum
              git config --global --add safe.directory "/workspace"
              export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
              cmake -S . -B build \
                  -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
                  -DGGML_CANN=on \
                  -DSOC_TYPE=${SOC_TYPE} \
                  -DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
              cmake --build build -j $(nproc)

              chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
            '


================================================
FILE: .github/workflows/build-cmake-pkg.yml
================================================
name: Build relocatable cmake package
on:
  workflow_dispatch:
  workflow_call:

jobs:
  linux:
    runs-on: ubuntu-slim
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Install dependencies
        run: |
          sudo apt update
          sudo apt install -y build-essential tcl cmake

      - name: Build
        run: |
          PREFIX="$(pwd)"/inst
          cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
                -DLLAMA_OPENSSL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
                -DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
          cmake --build build --config Release
          cmake --install build --prefix "$PREFIX" --config Release

          export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
          tclsh <<'EOF'
          set build(commit)  [string trim [exec git rev-parse --short HEAD]]
          set build(number)  [string trim [exec git rev-list  --count HEAD]]
          set build(version) "0.0.$build(number)"

          set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]]
          set checks [list "set\\(LLAMA_VERSION     \\s+$build(version)\\)" \
                           "set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \
                           "set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"]

          puts -nonewline "Checking llama-config.cmake version... "
          foreach check $checks {
              if {![regexp -expanded -- $check $llamaconfig]} {
                  puts "\"$check\" failed!"
                  exit 1
              }
          }
          puts "success."
          EOF

          cd examples/simple-cmake-pkg
          cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
          cmake --build build


================================================
FILE: .github/workflows/build-cross.yml
================================================
name: CI (cross)
on:
  # only manual triggers due to low-importance of the workflows
  # TODO: for regular runs, provision dedicated self-hosted runners
  workflow_dispatch:
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-cross.yml',
      'ggml/src/spacemit/*',
      'ggml/src/arch/loongarch/*'
    ]
  # run once every week
  schedule:
    - cron: '0 0 * * 0'

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true


jobs:
  # ubuntu-24-riscv64-cpu-cross:
  #   runs-on: ubuntu-24.04

  #   steps:
  #     - uses: actions/checkout@v6
  #     - name: Setup Riscv
  #       run: |
  #         sudo dpkg --add-architecture riscv64

  #         # Add arch-specific repositories for non-amd64 architectures
  #         cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
  #         EOF

  #         sudo apt-get update || true    ;# Prevent failure due to missing URLs.

  #         sudo apt-get install -y --no-install-recommends \
  #                 build-essential \
  #                 gcc-14-riscv64-linux-gnu \
  #                 g++-14-riscv64-linux-gnu

  #     - name: Build
  #       run: |
  #         cmake -B build -DLLAMA_OPENSSL=OFF \
  #                        -DCMAKE_BUILD_TYPE=Release \
  #                        -DGGML_OPENMP=OFF \
  #                        -DLLAMA_BUILD_EXAMPLES=ON \
  #                        -DLLAMA_BUILD_TOOLS=ON \
  #                        -DLLAMA_BUILD_TESTS=OFF \
  #                        -DCMAKE_SYSTEM_NAME=Linux \
  #                        -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
  #                        -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
  #                        -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
  #                        -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
  #                        -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

  #         cmake --build build --config Release -j $(nproc)

  # ubuntu-24-riscv64-vulkan-cross:
  #   runs-on: ubuntu-24.04

  #   steps:
  #     - uses: actions/checkout@v6
  #     - name: Setup Riscv
  #       run: |
  #         sudo dpkg --add-architecture riscv64

  #         # Add arch-specific repositories for non-amd64 architectures
  #         cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
  #         deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
  #         EOF

  #         sudo apt-get update || true    ;# Prevent failure due to missing URLs.

  #         sudo apt-get install -y --no-install-recommends \
  #                 build-essential \
  #                 glslc \
  #                 gcc-14-riscv64-linux-gnu \
  #                 g++-14-riscv64-linux-gnu \
  #                 libvulkan-dev:riscv64

  #     - name: Build
  #       run: |
  #         cmake -B build -DLLAMA_OPENSSL=OFF \
  #                        -DCMAKE_BUILD_TYPE=Release \
  #                        -DGGML_VULKAN=ON \
  #                        -DGGML_OPENMP=OFF \
  #                        -DLLAMA_BUILD_EXAMPLES=ON \
  #                        -DLLAMA_BUILD_TOOLS=ON \
  #                        -DLLAMA_BUILD_TESTS=OFF \
  #                        -DCMAKE_SYSTEM_NAME=Linux \
  #                        -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
  #                        -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
  #                        -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
  #                        -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
  #                        -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

  #         cmake --build build --config Release -j $(nproc)

  # ubuntu-24-arm64-vulkan-cross:
  #   runs-on: ubuntu-24.04

  #   steps:
  #     - uses: actions/checkout@v6
  #     - name: Setup Arm64
  #       run: |
  #         sudo dpkg --add-architecture arm64

  #         # Add arch-specific repositories for non-amd64 architectures
  #         cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
  #         deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
  #         deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
  #         deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
  #         deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
  #         EOF

  #         sudo apt-get update || true    ;# Prevent failure due to missing URLs.

  #         sudo apt-get install -y --no-install-recommends \
  #                 build-essential \
  #                 glslc \
  #                 crossbuild-essential-arm64 \
  #                 libvulkan-dev:arm64

  #     - name: Build
  #       run: |
  #         cmake -B build -DLLAMA_OPENSSL=OFF \
  #                        -DCMAKE_BUILD_TYPE=Release \
  #                        -DGGML_VULKAN=ON \
  #                        -DGGML_OPENMP=OFF \
  #                        -DLLAMA_BUILD_EXAMPLES=ON \
  #                        -DLLAMA_BUILD_TOOLS=ON \
  #                        -DLLAMA_BUILD_TESTS=OFF \
  #                        -DCMAKE_SYSTEM_NAME=Linux \
  #                        -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
  #                        -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
  #                        -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
  #                        -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
  #                        -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
  #                        -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

  #         cmake --build build --config Release -j $(nproc)

  debian-13-loongarch64-cpu-cross:
    runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
    container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671

    steps:
      - uses: actions/checkout@v6
      - name: Setup LoongArch
        run: |
          rm -f /etc/apt/sources.list.d/*
          cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
          deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
          EOF
          ( echo 'quiet "true";'; \
            echo 'APT::Get::Assume-Yes "true";'; \
            echo 'APT::Install-Recommends "false";'; \
            echo 'Acquire::Check-Valid-Until "false";'; \
            echo 'Acquire::Retries "5";'; \
          ) > /etc/apt/apt.conf.d/99snapshot-repos

          apt-get update
          apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
          dpkg --add-architecture loong64

          # Add arch-specific repositories for non-amd64 architectures
          cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
          deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
          EOF

          apt-get update || true    ;# Prevent failure due to missing URLs.

          apt-get install -y --no-install-recommends \
                  build-essential \
                  gcc-14-loongarch64-linux-gnu \
                  g++-14-loongarch64-linux-gnu

      - name: Build
        run: |
          cmake -B build -DLLAMA_OPENSSL=OFF \
                         -DCMAKE_BUILD_TYPE=Release \
                         -DGGML_OPENMP=OFF \
                         -DLLAMA_BUILD_EXAMPLES=ON \
                         -DLLAMA_BUILD_TOOLS=ON \
                         -DLLAMA_BUILD_TESTS=OFF \
                         -DCMAKE_SYSTEM_NAME=Linux \
                         -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
                         -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
                         -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

          cmake --build build --config Release -j $(nproc)

  debian-13-loongarch64-vulkan-cross:
    runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
    container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671

    steps:
      - uses: actions/checkout@v6
      - name: Setup LoongArch
        run: |
          rm -f /etc/apt/sources.list.d/*
          cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
          deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
          EOF
          ( echo 'quiet "true";'; \
            echo 'APT::Get::Assume-Yes "true";'; \
            echo 'APT::Install-Recommends "false";'; \
            echo 'Acquire::Check-Valid-Until "false";'; \
            echo 'Acquire::Retries "5";'; \
          ) > /etc/apt/apt.conf.d/99snapshot-repos

          apt-get update
          apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
          dpkg --add-architecture loong64

          # Add arch-specific repositories for non-amd64 architectures
          cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
          deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
          EOF

          apt-get update || true    ;# Prevent failure due to missing URLs.

          apt-get install -y --no-install-recommends \
                  build-essential \
                  glslc \
                  gcc-14-loongarch64-linux-gnu \
                  g++-14-loongarch64-linux-gnu \
                  libvulkan-dev:loong64

      - name: Build
        run: |
          cmake -B build -DLLAMA_OPENSSL=OFF \
                         -DCMAKE_BUILD_TYPE=Release \
                         -DGGML_VULKAN=ON \
                         -DGGML_OPENMP=OFF \
                         -DLLAMA_BUILD_EXAMPLES=ON \
                         -DLLAMA_BUILD_TOOLS=ON \
                         -DLLAMA_BUILD_TESTS=OFF \
                         -DCMAKE_SYSTEM_NAME=Linux \
                         -DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
                         -DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
                         -DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH

          cmake --build build --config Release -j $(nproc)

  ubuntu-24-riscv64-cpu-spacemit-ime-cross:
    runs-on: ubuntu-24.04

    env:
      # Make sure this is in sync with build-cache.yml
      SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"

    steps:
      - uses: actions/checkout@v6

      #- name: Use SpacemiT Toolchain Cache
      #  uses: actions/cache@v5
      #  id: cache-toolchain
      #  with:
      #    path: ./spacemit_toolchain
      #    key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}

      - name: Setup SpacemiT Toolchain
        #if: steps.cache-toolchain.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-spacemit
        with:
          path: ./spacemit_toolchain
          version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}

      - name: Build
        run: |
          export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain
          cmake -B build -DLLAMA_OPENSSL=OFF \
                         -DCMAKE_BUILD_TYPE=Release \
                         -DGGML_OPENMP=OFF \
                         -DLLAMA_BUILD_EXAMPLES=ON \
                         -DLLAMA_BUILD_TOOLS=ON \
                         -DLLAMA_BUILD_TESTS=OFF \
                         -DGGML_CPU_RISCV64_SPACEMIT=ON \
                         -DGGML_RVV=ON \
                         -DGGML_RV_ZFH=ON \
                         -DGGML_RV_ZICBOP=ON \
                         -DGGML_RV_ZIHINTPAUSE=ON \
                         -DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
                         -DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake

          cmake --build build --config Release -j $(nproc)


================================================
FILE: .github/workflows/build-msys.yml
================================================
name: CI (msys)

on:
  # only manual triggers due to low-importance of the workflows
  # TODO: for regular runs, provision dedicated self-hosted runners
  workflow_dispatch:
  # run once every week
  schedule:
    - cron: '0 0 * * 0'

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  windows-msys2:
    runs-on: windows-2025

    strategy:
      fail-fast: false
      matrix:
        include:
          - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
          - { sys: CLANG64, env: clang-x86_64, build: Release }

    steps:
      - name: Clone
        uses: actions/checkout@v6

      #- name: ccache
      #  uses: ggml-org/ccache-action@v1.2.16
      #  with:
      #    key: windows-msys2
      #    variant: ccache
      #    evict-old-files: 1d
      #    save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Setup ${{ matrix.sys }}
        uses: msys2/setup-msys2@cafece8e6baf9247cf9b1bf95097b0b983cc558d # v2
        with:
          update: true
          msystem: ${{matrix.sys}}
          install: >-
            base-devel
            git
            mingw-w64-${{matrix.env}}-toolchain
            mingw-w64-${{matrix.env}}-cmake
            mingw-w64-${{matrix.env}}-openblas

      - name: Build using CMake
        shell: msys2 {0}
        run: |
            cmake -B build
            cmake --build build --config ${{ matrix.build }} -j $(nproc)

      - name: Clean after building using CMake
        shell: msys2 {0}
        run: |
            rm -rf build

      - name: Build using CMake w/ OpenBLAS
        shell: msys2 {0}
        run: |
            cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
            cmake --build build --config ${{ matrix.build }} -j $(nproc)


================================================
FILE: .github/workflows/build-riscv.yml
================================================
name: CI (riscv)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-riscv.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-riscv.yml',
      'ggml/src/ggml-cpu/arch/riscv/**'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  ubuntu-riscv64-native-sanitizer:
    runs-on: RISCV64

    continue-on-error: true

    strategy:
      matrix:
        sanitizer: [ADDRESS, THREAD, UNDEFINED]
        build_type: [Debug]

    steps:
      - name: Install dependencies
        run: |
          sudo apt-get update

          # Install necessary packages
          sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs

          # Set gcc-14 and g++-14 as the default compilers
          sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
          sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
          sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
          sudo ln -sf /usr/bin/g++-14 /usr/bin/g++

          # Install Rust stable version
          rustup install stable
          rustup default stable

          git lfs install

      - name: GCC version check
        run: |
          gcc --version
          g++ --version

      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup ccache
        run: |
          # Unique cache directory per matrix combination
          export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}"
          mkdir -p "$CCACHE_DIR"

          # Configure ccache
          ccache --set-config=max_size=5G
          ccache --set-config=compression=true
          ccache --set-config=compression_level=6
          ccache --set-config=cache_dir="$CCACHE_DIR"
          ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
          ccache --set-config=hash_dir=false

          # Export for subsequent steps
          echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
          echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV

      - name: Build
        id: cmake_build
        if: ${{ matrix.sanitizer != 'THREAD' }}
        run: |
          cmake -B build \
            -DLLAMA_OPENSSL=OFF \
            -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -DGGML_OPENMP=ON \
            -DLLAMA_BUILD_EXAMPLES=ON \
            -DLLAMA_BUILD_TOOLS=ON \
            -DLLAMA_BUILD_TESTS=OFF \
            -DCMAKE_C_COMPILER_LAUNCHER=ccache \
            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
            -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
            -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14

          cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

      - name: Build (no OpenMP)
        id: cmake_build_no_openmp
        if: ${{ matrix.sanitizer == 'THREAD' }}
        run: |
          cmake -B build \
            -DLLAMA_OPENSSL=OFF \
            -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -DGGML_OPENMP=OFF \
            -DLLAMA_BUILD_EXAMPLES=ON \
            -DLLAMA_BUILD_TOOLS=ON \
            -DLLAMA_BUILD_TESTS=OFF \
            -DCMAKE_C_COMPILER_LAUNCHER=ccache \
            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
            -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
            -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14

          cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900


================================================
FILE: .github/workflows/build-sanitize.yml
================================================
name: CI (sanitize)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-sanitize.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  ubuntu-latest-sanitizer:
    runs-on: ubuntu-latest

    continue-on-error: true

    strategy:
      matrix:
        sanitizer: [ADDRESS, THREAD, UNDEFINED]
        build_type: [Debug]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }}
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install build-essential libssl-dev

      - name: Build
        id: cmake_build
        if: ${{ matrix.sanitizer != 'THREAD' }}
        run: |
          cmake -B build \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}

          cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

      - name: Build (no OpenMP)
        id: cmake_build_no_openmp
        if: ${{ matrix.sanitizer == 'THREAD' }}
        run: |
          cmake -B build \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
            -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
            -DGGML_OPENMP=OFF

          cmake --build build --config ${{ matrix.build_type }} -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900


================================================
FILE: .github/workflows/build-self-hosted.yml
================================================
name: CI (self-hosted)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-self-hosted.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  ggml-ci-nvidia-cuda:
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        run: |
          nvidia-smi
          GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm:
    runs-on: [self-hosted, Linux, NVIDIA]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  ggml-ci-nvidia-vulkan-cm2:
    runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Test
        id: ggml-ci
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMX-compatible machine
  #ggml-ci-cpu-amx:
  #  runs-on: [self-hosted, Linux, CPU, AMX]

  #  steps:
  #    - name: Clone
  #      id: checkout
  #      uses: actions/checkout@v6

  #    - name: Test
  #      id: ggml-ci
  #      run: |
  #        bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-vulkan:
  #   runs-on: [self-hosted, Linux, AMD]

  #   steps:
  #     - name: Clone
  #       id: checkout
  #       uses: actions/checkout@v6

  #     - name: Test
  #       id: ggml-ci
  #       run: |
  #         vulkaninfo --summary
  #         GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: provision AMD GPU machine
  # ggml-ci-amd-rocm:
  #   runs-on: [self-hosted, Linux, AMD]

  #   steps:
  #     - name: Clone
  #       id: checkout
  #       uses: actions/checkout@v6

  #     - name: Test
  #       id: ggml-ci
  #       run: |
  #         amd-smi static
  #         GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp

  # TODO: sandbox Mac runners
  #  ggml-ci-mac-metal:
  #    runs-on: [self-hosted, macOS, ARM64]
  #
  #    steps:
  #      - name: Clone
  #        id: checkout
  #        uses: actions/checkout@v6
  #
  #      - name: Test
  #        id: ggml-ci
  #        run: |
  #          GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
  #
  #  ggml-ci-mac-webgpu:
  #    runs-on: [self-hosted, macOS, ARM64]
  #
  #    steps:
  #      - name: Clone
  #        id: checkout
  #        uses: actions/checkout@v6
  #
  #      - name: Dawn Dependency
  #        id: dawn-depends
  #        run: |
  #          DAWN_VERSION="v2.0.0"
  #          DAWN_OWNER="reeselevine"
  #          DAWN_REPO="dawn"
  #          DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release"
  #          echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
  #          curl -L -o artifact.zip \
  #            "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
  #          mkdir dawn
  #          unzip artifact.zip
  #          tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1
  #
  #      - name: Test
  #        id: ggml-ci
  #        run: |
  #          GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
  #            bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
  #
  #  ggml-ci-mac-vulkan:
  #    runs-on: [self-hosted, macOS, ARM64]
  #
  #    steps:
  #      - name: Clone
  #        id: checkout
  #        uses: actions/checkout@v6
  #
  #      - name: Test
  #        id: ggml-ci
  #        run: |
  #          vulkaninfo --summary
  #          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-linux-intel-vulkan:
    runs-on: [self-hosted, Linux, Intel]

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          persist-credentials: false

      - name: Test
        id: ggml-ci
        run: |
          vulkaninfo --summary
          GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

  ggml-ci-intel-openvino-gpu-low-perf:
    runs-on: [self-hosted, Linux, Intel, OpenVINO]

    env:
      # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
      OPENVINO_VERSION_MAJOR: "2026.0"
      OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup OpenVINO Toolkit
        uses: ./.github/actions/linux-setup-openvino
        with:
          path: ./openvino_toolkit
          version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
          version_full: ${{ env.OPENVINO_VERSION_FULL }}

      - name: Install OpenVINO dependencies
        run: |
          cd ./openvino_toolkit
          chmod +x ./install_dependencies/install_openvino_dependencies.sh
          echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh

      - name: Test
        id: ggml-ci
        run: |
          source ./openvino_toolkit/setupvars.sh
          GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt


================================================
FILE: .github/workflows/build-vulkan.yml
================================================
name: CI (vulkan)

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build-vulkan.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.comp',
      '**/*.glsl'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build-vulkan.yml',
      'ggml/src/ggml-vulkan/**'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  ubuntu-24-vulkan-llvmpipe:
    runs-on: ubuntu-24.04

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-24-vulkan-llvmpipe
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Dependencies
        id: depends
        run: |
          sudo add-apt-repository -y ppa:kisak/kisak-mesa
          sudo apt-get update -y
          sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev

      - name: Get latest Vulkan SDK version
        id: vulkan_sdk_version
        run: |
          echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"

      - name: Use Vulkan SDK Cache
        uses: actions/cache@v5
        id: cache-sdk
        with:
          path: ./vulkan_sdk
          key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}

      - name: Setup Vulkan SDK
        if: steps.cache-sdk.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-vulkan-llvmpipe
        with:
          path: ./vulkan_sdk
          version: ${{ env.VULKAN_SDK_VERSION }}

      - name: Build
        id: cmake_build
        run: |
          source ./vulkan_sdk/setup-env.sh
          cmake -B build \
            -DGGML_VULKAN=ON
          cmake --build build --config Release -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          export GGML_VK_VISIBLE_DEVICES=0
          export GGML_VK_DISABLE_F16=1
          export GGML_VK_DISABLE_COOPMAT=1
          # This is using llvmpipe and runs slower than other backends
          ctest -L main --verbose --timeout 4800


================================================
FILE: .github/workflows/build.yml
================================================
name: CI

on:
  workflow_dispatch: # allows manual triggering
  push:
    branches:
      - master
    paths: [
      '.github/workflows/build.yml',
      '.github/workflows/build-cmake-pkg.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

  pull_request:
    types: [opened, synchronize, reopened]
    paths: [
      '.github/workflows/build.yml',
      '.github/workflows/build-cmake-pkg.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl',
      '**/*.wgsl'
    ]

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
  cancel-in-progress: true

env:
  GGML_NLOOP: 3
  GGML_N_THREADS: 1
  LLAMA_LOG_COLORS: 1
  LLAMA_LOG_PREFIX: 1
  LLAMA_LOG_TIMESTAMPS: 1

jobs:
  build-cmake-pkg:
    uses: ./.github/workflows/build-cmake-pkg.yml

  macOS-latest-arm64:
    runs-on: macos-latest

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-arm64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build \
            -DCMAKE_BUILD_RPATH="@loader_path" \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_BUILD_BORINGSSL=ON \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=OFF \
            -DGGML_METAL_SHADER_DEBUG=ON \
            -DGGML_RPC=ON
          time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
          leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main -E "test-llama-archs" --verbose --timeout 900

  macOS-latest-x64:
    runs-on: macos-15-intel

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-x64
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          # Metal is disabled due to intermittent failures with Github runners not having a GPU:
          # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
          cmake -B build \
            -DCMAKE_BUILD_RPATH="@loader_path" \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_BUILD_BORINGSSL=ON \
            -DGGML_METAL=OFF \
            -DGGML_RPC=ON \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
          time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900

  macOS-latest-arm64-webgpu:
    runs-on: macos-latest

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: macOS-latest-arm64-webgpu
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Dawn Dependency
        id: dawn-depends
        run: |
          DAWN_VERSION="v20260317.182325"
          DAWN_OWNER="google"
          DAWN_REPO="dawn"
          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          curl -L -o artifact.tar.gz \
            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          mkdir dawn
          tar -xvf artifact.tar.gz -C dawn --strip-components=1

      - name: Build
        id: cmake_build
        run: |
          export CMAKE_PREFIX_PATH=dawn
          cmake -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
          time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900

  ubuntu-cpu:
    strategy:
      matrix:
        include:
          - build: 'x64'
            os: ubuntu-22.04
          - build: 'arm64'
            os: ubuntu-24.04-arm
          - build: 's390x'
            os: ubuntu-24.04-s390x
          - build: 'ppc64le'
            os: ubuntu-24.04-ppc64le

    runs-on: ${{ matrix.os }}

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        if: ${{ matrix.build != 's390x' && matrix.build != 'ppc64le' }}
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-cpu-${{ matrix.build }}
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build Dependencies
        id: build_depends
        run: |
          sudo apt-get update
          sudo apt-get install -y --no-install-recommends \
            python3 python3-pip python3-dev python3-wheel \
            libjpeg-dev build-essential libssl-dev \
            git-lfs

      - name: Toolchain workaround (GCC 14)
        if: ${{ contains(matrix.os, 'ubuntu-24.04') }}
        run: |
          sudo apt-get install -y gcc-14 g++-14
          echo "CC=gcc-14" >> "$GITHUB_ENV"
          echo "CXX=g++-14" >> "$GITHUB_ENV"

      - name: Python Dependencies
        id: python_depends
        run: |
          export PIP_BREAK_SYSTEM_PACKAGES="1"
          python3 -m pip install --upgrade pip setuptools
          pip3 install ./gguf-py

      - name: Swap Endianness
        id: endianness
        if: ${{ matrix.build == 's390x' }}
        run: |
          for f in models/*.gguf; do
            echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big
          done

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DGGML_RPC=ON
          time cmake --build build --config Release -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose --timeout 900

      - name: Test llama2c conversion
        id: llama2c_test
        if: ${{ matrix.build != 's390x' }}
        run: |
          cd build
          echo "Fetch tokenizer"
          wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
          echo "Fetch llama2c model"
          wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
          ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
          ./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

      - name: Test llama2c (s390x)
        id: llama2c_test_s390x
        if: ${{ matrix.build == 's390x' }}
        run: |
          cd build
          echo "Fetch llama2c big-endian model"
          wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
          ./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

  ubuntu-latest-rpc:
    runs-on: ubuntu-latest

    continue-on-error: true

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install build-essential libssl-dev ninja-build

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -G "Ninja" \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_RPC=ON
          time cmake --build build --config Release -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          ctest -L main --verbose

  ubuntu-24-vulkan:
    strategy:
      matrix:
        include:
          - build: 'x64'
            os: ubuntu-24.04
          - build: 'arm64'
            os: ubuntu-24.04-arm

    runs-on: ${{ matrix.os }}

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev libssl-dev ninja-build
          echo "CC=gcc-14" >> "$GITHUB_ENV"
          echo "CXX=g++-14" >> "$GITHUB_ENV"

      - name: Configure
        id: cmake_configure
        run: |
          cmake -B build \
            -G "Ninja" \
            -DCMAKE_BUILD_TYPE=RelWithDebInfo \
            -DGGML_BACKEND_DL=ON \
            -DGGML_CPU_ALL_VARIANTS=ON \
            -DGGML_VULKAN=ON

      - name: Build
        id: cmake_build
        run: |
          time cmake --build build -j $(nproc)

  ubuntu-24-webgpu:
    runs-on: ubuntu-24.04

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-24-webgpu
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Dependencies
        id: depends
        run: |
          sudo add-apt-repository -y ppa:kisak/kisak-mesa
          sudo apt-get update -y
          sudo apt-get install -y build-essential mesa-vulkan-drivers \
            libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev

      - name: Get latest Vulkan SDK version
        id: vulkan_sdk_version
        run: |
          echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"

      - name: Use Vulkan SDK Cache
        uses: actions/cache@v5
        id: cache-sdk
        with:
          path: ./vulkan_sdk
          key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}

      - name: Setup Vulkan SDK
        if: steps.cache-sdk.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-vulkan
        with:
          path: ./vulkan_sdk
          version: ${{ env.VULKAN_SDK_VERSION }}

      - name: Dawn Dependency
        id: dawn-depends
        run: |
          sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
          DAWN_VERSION="v20260317.182325"
          DAWN_OWNER="google"
          DAWN_REPO="dawn"
          DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
          echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          curl -L -o artifact.tar.gz \
            "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
          mkdir dawn
          tar -xvf artifact.tar.gz -C dawn --strip-components=1

      - name: Build
        id: cmake_build
        run: |
          export Dawn_DIR=dawn/lib64/cmake/Dawn
          cmake -B build \
            -DGGML_WEBGPU=ON
          time cmake --build build --config Release -j $(nproc)

      - name: Test
        id: cmake_test
        run: |
          cd build
          # This is using llvmpipe and runs slower than other backends
          ctest -L main --verbose --timeout 900

  ubuntu-24-webgpu-wasm:
    runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Install Emscripten
        run: |
          git clone https://github.com/emscripten-core/emsdk.git
          cd emsdk
          ./emsdk install latest
          ./emsdk activate latest

      - name: Fetch emdawnwebgpu
        run: |
          DAWN_TAG="v20260317.182325"
          EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
          echo "Downloading ${EMDAWN_PKG}"
          curl -L -o emdawn.zip \
            "https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
          unzip emdawn.zip

      - name: Build WASM WebGPU
        run: |
          source emsdk/emsdk_env.sh
          emcmake cmake -B build-wasm \
            -G "Ninja" \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_WEBGPU=ON \
            -DLLAMA_OPENSSL=OFF \
            -DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg

          time cmake --build build-wasm --config Release --target test-backend-ops -j $(nproc)

  ubuntu-22-hip:
    runs-on: ubuntu-22.04
    container: rocm/dev-ubuntu-22.04:6.1.2

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev rocwmma-dev

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-22-hip
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build with native CMake HIP support
        id: cmake_build
        run: |
          cmake -B build -S . \
            -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
            -DGGML_HIP_ROCWMMA_FATTN=ON \
            -DGGML_HIP=ON
          cmake --build build --config Release -j $(nproc)

  ubuntu-22-musa:
    runs-on: ubuntu-22.04
    container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Dependencies
        id: depends
        run: |
          apt-get update
          apt-get install -y build-essential git cmake libssl-dev

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-22-musa
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build with native CMake MUSA support
        id: cmake_build
        run: |
          cmake -B build -S . \
            -DGGML_MUSA=ON
          time cmake --build build --config Release -j $(nproc)

  ubuntu-22-sycl:
    runs-on: ubuntu-22.04

    continue-on-error: true

    steps:
      - uses: actions/checkout@v6

      - name: add oneAPI to apt
        shell: bash
        run: |
          cd /tmp
          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

      - name: install oneAPI dpcpp compiler
        shell: bash
        run: |
          sudo apt update
          sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev

      - name: install oneAPI MKL library
        shell: bash
        run: |
          sudo apt install intel-oneapi-mkl-devel

      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-22-sycl
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          source /opt/intel/oneapi/setvars.sh
          cmake -B build \
            -DGGML_SYCL=ON \
            -DCMAKE_C_COMPILER=icx \
            -DCMAKE_CXX_COMPILER=icpx
          time cmake --build build --config Release -j $(nproc)

  ubuntu-22-sycl-fp16:
    runs-on: ubuntu-22.04

    continue-on-error: true

    steps:
      - uses: actions/checkout@v6

      - name: add oneAPI to apt
        shell: bash
        run: |
          cd /tmp
          wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
          sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

      - name: install oneAPI dpcpp compiler
        shell: bash
        run: |
          sudo apt update
          sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev ninja-build

      - name: install oneAPI MKL library
        shell: bash
        run: |
          sudo apt install intel-oneapi-mkl-devel

      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: ubuntu-22-sycl-fp16
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Build
        id: cmake_build
        run: |
          source /opt/intel/oneapi/setvars.sh
          cmake -B build \
            -G "Ninja" \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_SYCL=ON \
            -DCMAKE_C_COMPILER=icx \
            -DCMAKE_CXX_COMPILER=icpx \
            -DGGML_SYCL_F16=ON
          time cmake --build build --config Release -j $(nproc)

  ubuntu-24-openvino:
      name: ubuntu-24-openvino-${{ matrix.openvino_device }}
      strategy:
        matrix:
          include:
            - variant: cpu
              runner: '"ubuntu-24.04"'
              openvino_device: "CPU"
            - variant: gpu
              runner: '["self-hosted","Linux","X64","Intel"]'
              openvino_device: "GPU"

      runs-on: ${{ fromJSON(matrix.runner) }}

      env:
        # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
        OPENVINO_VERSION_MAJOR: "2026.0"
        OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

      steps:
        - name: Clone
          id: checkout
          uses: actions/checkout@v6

        - name: ccache
          if: runner.environment == 'github-hosted'
          uses: ggml-org/ccache-action@v1.2.21
          with:
            key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1
            evict-old-files: 1d
            save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

        - name: Dependencies
          id: depends
          run: |
            sudo apt-get update
            sudo apt-get install -y build-essential libssl-dev libtbb12 cmake ninja-build python3-pip
            sudo apt-get install -y ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd

        - name: Use OpenVINO Toolkit Cache
          if: runner.environment == 'github-hosted'
          uses: actions/cache@v5
          id: cache-openvino
          with:
            path: ./openvino_toolkit
            key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}

        - name: Setup OpenVINO Toolkit
          if: steps.cache-openvino.outputs.cache-hit != 'true'
          uses: ./.github/actions/linux-setup-openvino
          with:
            path: ./openvino_toolkit
            version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
            version_full: ${{ env.OPENVINO_VERSION_FULL }}

        - name: Install OpenVINO dependencies
          run: |
            cd ./openvino_toolkit
            chmod +x ./install_dependencies/install_openvino_dependencies.sh
            echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh

        - name: Build
          id: cmake_build
          run: |
            source ./openvino_toolkit/setupvars.sh
            cmake -B build/ReleaseOV -G Ninja \
              -DCMAKE_BUILD_TYPE=Release \
              -DGGML_OPENVINO=ON
            time cmake --build build/ReleaseOV --config Release -j $(nproc)

        - name: Test
          id: cmake_test
          # TODO: fix and re-enable the `test-llama-archs` test below
          run: |
            cd ${{ github.workspace }}
            if [ "${{ matrix.openvino_device }}" = "GPU" ]; then
              export GGML_OPENVINO_DEVICE=GPU
            fi
            ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000

  windows-latest:
    runs-on: windows-2025

    env:
      OPENBLAS_VERSION: 0.3.23
      SDE_VERSION: 9.33.0-2024-01-07
      VULKAN_VERSION: 1.4.313.2

    strategy:
      matrix:
        include:
          - build: 'cpu-x64 (static)'
            arch: 'x64'
            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
          - build: 'openblas-x64'
            arch: 'x64'
            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
          - build: 'vulkan-x64'
            arch: 'x64'
            defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
          - build: 'llvm-arm64'
            arch: 'arm64'
            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
          - build: 'llvm-arm64-opencl-adreno'
            arch: 'arm64'
            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: windows-latest-${{ matrix.build }}
          variant: ccache
          evict-old-files: 1d
          save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

      - name: Download OpenBLAS
        id: get_openblas

Download .txt

gitextract_xcrsk4vf/

├── .clang-format
├── .clang-tidy
├── .devops/
│   ├── cann.Dockerfile
│   ├── cpu.Dockerfile
│   ├── cuda-new.Dockerfile
│   ├── cuda.Dockerfile
│   ├── intel.Dockerfile
│   ├── llama-cli-cann.Dockerfile
│   ├── llama-cpp-cuda.srpm.spec
│   ├── llama-cpp.srpm.spec
│   ├── musa.Dockerfile
│   ├── nix/
│   │   ├── apps.nix
│   │   ├── devshells.nix
│   │   ├── docker.nix
│   │   ├── jetson-support.nix
│   │   ├── nixpkgs-instances.nix
│   │   ├── package-gguf-py.nix
│   │   ├── package.nix
│   │   ├── python-scripts.nix
│   │   ├── scope.nix
│   │   └── sif.nix
│   ├── openvino.Dockerfile
│   ├── rocm.Dockerfile
│   ├── s390x.Dockerfile
│   ├── tools.sh
│   └── vulkan.Dockerfile
├── .dockerignore
├── .ecrc
├── .editorconfig
├── .flake8
├── .gemini/
│   └── settings.json
├── .gitattributes
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── 010-bug-compilation.yml
│   │   ├── 011-bug-results.yml
│   │   ├── 019-bug-misc.yml
│   │   ├── 020-enhancement.yml
│   │   ├── 030-research.yml
│   │   ├── 040-refactor.yml
│   │   └── config.yml
│   ├── actions/
│   │   ├── get-tag-name/
│   │   │   └── action.yml
│   │   ├── install-exe/
│   │   │   └── action.yml
│   │   ├── linux-setup-openvino/
│   │   │   └── action.yml
│   │   ├── linux-setup-spacemit/
│   │   │   └── action.yml
│   │   ├── linux-setup-vulkan/
│   │   │   └── action.yml
│   │   ├── unarchive-tar/
│   │   │   └── action.yml
│   │   ├── windows-setup-cuda/
│   │   │   └── action.yml
│   │   └── windows-setup-rocm/
│   │       └── action.yml
│   ├── labeler.yml
│   ├── pull_request_template.md
│   └── workflows/
│       ├── ai-issues.yml
│       ├── bench.yml.disabled
│       ├── build-3rd-party.yml
│       ├── build-android.yml
│       ├── build-apple.yml
│       ├── build-cache.yml
│       ├── build-cann.yml
│       ├── build-cmake-pkg.yml
│       ├── build-cross.yml
│       ├── build-msys.yml
│       ├── build-riscv.yml
│       ├── build-sanitize.yml
│       ├── build-self-hosted.yml
│       ├── build-vulkan.yml
│       ├── build.yml
│       ├── check-vendor.yml
│       ├── close-issue.yml
│       ├── copilot-setup-steps.yml
│       ├── docker.yml
│       ├── editorconfig.yml
│       ├── gguf-publish.yml
│       ├── hip-quality-check.yml
│       ├── labeler.yml
│       ├── pre-tokenizer-hashes.yml
│       ├── python-check-requirements.yml
│       ├── python-lint.yml
│       ├── python-type-check.yml
│       ├── release.yml
│       ├── server-sanitize.yml
│       ├── server-self-hosted.yml
│       ├── server-webui.yml
│       ├── server.yml
│       ├── update-ops-docs.yml
│       └── winget.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── AGENTS.md
├── AUTHORS
├── CLAUDE.md
├── CMakeLists.txt
├── CMakePresets.json
├── CODEOWNERS
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── benches/
│   ├── dgx-spark/
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json
│   │   ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json
│   │   └── dgx-spark.md
│   ├── mac-m2-ultra/
│   │   └── mac-m2-ultra.md
│   └── nemotron/
│       └── nemotron-dgx-spark.md
├── ci/
│   ├── README-MUSA.md
│   ├── README.md
│   └── run.sh
├── cmake/
│   ├── arm64-apple-clang.cmake
│   ├── arm64-windows-llvm.cmake
│   ├── build-info.cmake
│   ├── common.cmake
│   ├── download-models.cmake
│   ├── git-vars.cmake
│   ├── license.cmake
│   ├── llama-config.cmake.in
│   ├── llama.pc.in
│   ├── riscv64-spacemit-linux-gnu-gcc.cmake
│   └── x64-windows-llvm.cmake
├── common/
│   ├── CMakeLists.txt
│   ├── arg.cpp
│   ├── arg.h
│   ├── base64.hpp
│   ├── build-info.cpp.in
│   ├── chat-auto-parser-generator.cpp
│   ├── chat-auto-parser-helpers.cpp
│   ├── chat-auto-parser-helpers.h
│   ├── chat-auto-parser.h
│   ├── chat-diff-analyzer.cpp
│   ├── chat-peg-parser.cpp
│   ├── chat-peg-parser.h
│   ├── chat.cpp
│   ├── chat.h
│   ├── common.cpp
│   ├── common.h
│   ├── console.cpp
│   ├── console.h
│   ├── debug.cpp
│   ├── debug.h
│   ├── download.cpp
│   ├── download.h
│   ├── hf-cache.cpp
│   ├── hf-cache.h
│   ├── http.h
│   ├── jinja/
│   │   ├── README.md
│   │   ├── caps.cpp
│   │   ├── caps.h
│   │   ├── lexer.cpp
│   │   ├── lexer.h
│   │   ├── parser.cpp
│   │   ├── parser.h
│   │   ├── runtime.cpp
│   │   ├── runtime.h
│   │   ├── string.cpp
│   │   ├── string.h
│   │   ├── utils.h
│   │   ├── value.cpp
│   │   └── value.h
│   ├── json-partial.cpp
│   ├── json-partial.h
│   ├── json-schema-to-grammar.cpp
│   ├── json-schema-to-grammar.h
│   ├── llguidance.cpp
│   ├── log.cpp
│   ├── log.h
│   ├── ngram-cache.cpp
│   ├── ngram-cache.h
│   ├── ngram-map.cpp
│   ├── ngram-map.h
│   ├── ngram-mod.cpp
│   ├── ngram-mod.h
│   ├── peg-parser.cpp
│   ├── peg-parser.h
│   ├── preset.cpp
│   ├── preset.h
│   ├── reasoning-budget.cpp
│   ├── reasoning-budget.h
│   ├── regex-partial.cpp
│   ├── regex-partial.h
│   ├── sampling.cpp
│   ├── sampling.h
│   ├── speculative.cpp
│   ├── speculative.h
│   ├── unicode.cpp
│   └── unicode.h
├── convert_hf_to_gguf.py
├── convert_hf_to_gguf_update.py
├── convert_llama_ggml_to_gguf.py
├── convert_lora_to_gguf.py
├── docs/
│   ├── android.md
│   ├── autoparser.md
│   ├── backend/
│   │   ├── BLIS.md
│   │   ├── CANN.md
│   │   ├── CUDA-FEDORA.md
│   │   ├── OPENCL.md
│   │   ├── OPENVINO.md
│   │   ├── SYCL.md
│   │   ├── VirtGPU/
│   │   │   ├── configuration.md
│   │   │   └── development.md
│   │   ├── VirtGPU.md
│   │   ├── ZenDNN.md
│   │   ├── snapdragon/
│   │   │   ├── CMakeUserPresets.json
│   │   │   ├── README.md
│   │   │   ├── developer.md
│   │   │   └── windows.md
│   │   └── zDNN.md
│   ├── build-riscv64-spacemit.md
│   ├── build-s390x.md
│   ├── build.md
│   ├── development/
│   │   ├── HOWTO-add-model.md
│   │   ├── debugging-tests.md
│   │   ├── llama-star/
│   │   │   └── idea-arch.key
│   │   ├── parsing.md
│   │   └── token_generation_performance_tips.md
│   ├── docker.md
│   ├── function-calling.md
│   ├── install.md
│   ├── llguidance.md
│   ├── multimodal/
│   │   ├── MobileVLM.md
│   │   ├── gemma3.md
│   │   ├── glmedge.md
│   │   ├── granitevision.md
│   │   ├── llava.md
│   │   ├── minicpmo2.6.md
│   │   ├── minicpmo4.0.md
│   │   ├── minicpmv2.5.md
│   │   ├── minicpmv2.6.md
│   │   ├── minicpmv4.0.md
│   │   └── minicpmv4.5.md
│   ├── multimodal.md
│   ├── ops/
│   │   ├── BLAS.csv
│   │   ├── CANN.csv
│   │   ├── CPU.csv
│   │   ├── CUDA.csv
│   │   ├── Metal.csv
│   │   ├── OpenCL.csv
│   │   ├── SYCL.csv
│   │   ├── Vulkan.csv
│   │   ├── WebGPU.csv
│   │   ├── ZenDNN.csv
│   │   └── zDNN.csv
│   ├── ops.md
│   ├── preset.md
│   └── speculative.md
├── examples/
│   ├── CMakeLists.txt
│   ├── batched/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── batched.cpp
│   ├── batched.swift/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── Package.swift
│   │   ├── README.md
│   │   └── Sources/
│   │       └── main.swift
│   ├── convert-llama2c-to-ggml/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── convert-llama2c-to-ggml.cpp
│   ├── convert_legacy_llama.py
│   ├── debug/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── debug.cpp
│   ├── deprecation-warning/
│   │   ├── README.md
│   │   └── deprecation-warning.cpp
│   ├── diffusion/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── diffusion-cli.cpp
│   ├── embedding/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── embedding.cpp
│   ├── eval-callback/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── eval-callback.cpp
│   ├── gen-docs/
│   │   ├── CMakeLists.txt
│   │   └── gen-docs.cpp
│   ├── gguf/
│   │   ├── CMakeLists.txt
│   │   └── gguf.cpp
│   ├── gguf-hash/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── deps/
│   │   │   ├── rotate-bits/
│   │   │   │   ├── package.json
│   │   │   │   └── rotate-bits.h
│   │   │   ├── sha1/
│   │   │   │   ├── package.json
│   │   │   │   ├── sha1.c
│   │   │   │   └── sha1.h
│   │   │   ├── sha256/
│   │   │   │   ├── package.json
│   │   │   │   ├── sha256.c
│   │   │   │   └── sha256.h
│   │   │   └── xxhash/
│   │   │       ├── clib.json
│   │   │       ├── xxhash.c
│   │   │       └── xxhash.h
│   │   └── gguf-hash.cpp
│   ├── idle/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── idle.cpp
│   ├── json_schema_pydantic_example.py
│   ├── json_schema_to_grammar.py
│   ├── llama.android/
│   │   ├── .gitignore
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── java/
│   │   │           │   └── com/
│   │   │           │       └── example/
│   │   │           │           └── llama/
│   │   │           │               ├── MainActivity.kt
│   │   │           │               └── MessageAdapter.kt
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   ├── bg_assistant_message.xml
│   │   │               │   ├── bg_user_message.xml
│   │   │               │   ├── ic_launcher_background.xml
│   │   │               │   ├── ic_launcher_foreground.xml
│   │   │               │   ├── outline_folder_open_24.xml
│   │   │               │   └── outline_send_24.xml
│   │   │               ├── layout/
│   │   │               │   ├── activity_main.xml
│   │   │               │   ├── item_message_assistant.xml
│   │   │               │   └── item_message_user.xml
│   │   │               ├── mipmap-anydpi/
│   │   │               │   ├── ic_launcher.xml
│   │   │               │   └── ic_launcher_round.xml
│   │   │               ├── values/
│   │   │               │   ├── colors.xml
│   │   │               │   ├── strings.xml
│   │   │               │   └── themes.xml
│   │   │               └── xml/
│   │   │                   ├── backup_rules.xml
│   │   │                   └── data_extraction_rules.xml
│   │   ├── build.gradle.kts
│   │   ├── gradle/
│   │   │   ├── libs.versions.toml
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── lib/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle.kts
│   │   │   ├── consumer-rules.pro
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── android/
│   │   │       │           └── llama/
│   │   │       │               └── cpp/
│   │   │       │                   └── ExampleInstrumentedTest.kt
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── cpp/
│   │   │       │   │   ├── CMakeLists.txt
│   │   │       │   │   ├── ai_chat.cpp
│   │   │       │   │   └── logging.h
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── arm/
│   │   │       │               └── aichat/
│   │   │       │                   ├── AiChat.kt
│   │   │       │                   ├── InferenceEngine.kt
│   │   │       │                   ├── gguf/
│   │   │       │                   │   ├── FileType.kt
│   │   │       │                   │   ├── GgufMetadata.kt
│   │   │       │                   │   └── GgufMetadataReader.kt
│   │   │       │                   └── internal/
│   │   │       │                       ├── InferenceEngineImpl.kt
│   │   │       │                       └── gguf/
│   │   │       │                           └── GgufMetadataReaderImpl.kt
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── android/
│   │   │                   └── llama/
│   │   │                       └── cpp/
│   │   │                           └── ExampleUnitTest.kt
│   │   └── settings.gradle.kts
│   ├── llama.swiftui/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── llama.cpp.swift/
│   │   │   └── LibLlama.swift
│   │   ├── llama.swiftui/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── Models/
│   │   │   │   └── LlamaState.swift
│   │   │   ├── Resources/
│   │   │   │   └── models/
│   │   │   │       └── .gitignore
│   │   │   ├── UI/
│   │   │   │   ├── ContentView.swift
│   │   │   │   ├── DownloadButton.swift
│   │   │   │   ├── InputButton.swift
│   │   │   │   └── LoadCustomButton.swift
│   │   │   └── llama_swiftuiApp.swift
│   │   └── llama.swiftui.xcodeproj/
│   │       ├── project.pbxproj
│   │       └── project.xcworkspace/
│   │           └── contents.xcworkspacedata
│   ├── llama.vim
│   ├── lookahead/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── lookahead.cpp
│   ├── lookup/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── lookup-create.cpp
│   │   ├── lookup-merge.cpp
│   │   ├── lookup-stats.cpp
│   │   └── lookup.cpp
│   ├── model-conversion/
│   │   ├── .gitignore
│   │   ├── Makefile
│   │   ├── README.md
│   │   ├── requirements.txt
│   │   └── scripts/
│   │       ├── causal/
│   │       │   ├── compare-embeddings-logits.sh
│   │       │   ├── compare-logits.py
│   │       │   ├── convert-model.sh
│   │       │   ├── modelcard.template
│   │       │   ├── run-casual-gen-embeddings-org.py
│   │       │   ├── run-converted-model-embeddings-logits.sh
│   │       │   ├── run-converted-model.sh
│   │       │   └── run-org-model.py
│   │       ├── embedding/
│   │       │   ├── compare-embeddings-logits.sh
│   │       │   ├── convert-model.sh
│   │       │   ├── modelcard.template
│   │       │   ├── run-converted-model.sh
│   │       │   └── run-original-model.py
│   │       └── utils/
│   │           ├── __init__.py
│   │           ├── check-nmse.py
│   │           ├── common.py
│   │           ├── compare_tokens.py
│   │           ├── create-collection-add-model.sh
│   │           ├── curl-embedding-server.sh
│   │           ├── hf-add-model-to-collection.py
│   │           ├── hf-create-collection.py
│   │           ├── hf-create-model.py
│   │           ├── hf-upload-gguf-model.py
│   │           ├── inspect-converted-model.sh
│   │           ├── inspect-org-model.py
│   │           ├── perplexity-gen.sh
│   │           ├── perplexity-run-simple.sh
│   │           ├── perplexity-run.sh
│   │           ├── quantize.sh
│   │           ├── run-embedding-server.sh
│   │           └── semantic_check.py
│   ├── parallel/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── parallel.cpp
│   ├── passkey/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── passkey.cpp
│   ├── pydantic_models_to_grammar.py
│   ├── pydantic_models_to_grammar_examples.py
│   ├── reason-act.sh
│   ├── regex_to_grammar.py
│   ├── retrieval/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── retrieval.cpp
│   ├── save-load-state/
│   │   ├── CMakeLists.txt
│   │   └── save-load-state.cpp
│   ├── server-llama2-13B.sh
│   ├── server_embd.py
│   ├── simple/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── simple.cpp
│   ├── simple-chat/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── simple-chat.cpp
│   ├── simple-cmake-pkg/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   └── README.md
│   ├── speculative/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── speculative.cpp
│   ├── speculative-simple/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── speculative-simple.cpp
│   ├── sycl/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── ls-sycl-device.cpp
│   │   ├── run-llama2.sh
│   │   ├── test.sh
│   │   ├── win-build-sycl.bat
│   │   ├── win-run-llama2.bat
│   │   └── win-test.bat
│   ├── training/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── finetune.cpp
│   └── ts-type-to-grammar.sh
├── flake.nix
├── ggml/
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── cmake/
│   │   ├── GitVars.cmake
│   │   ├── common.cmake
│   │   └── ggml-config.cmake.in
│   ├── include/
│   │   ├── ggml-alloc.h
│   │   ├── ggml-backend.h
│   │   ├── ggml-blas.h
│   │   ├── ggml-cann.h
│   │   ├── ggml-cpp.h
│   │   ├── ggml-cpu.h
│   │   ├── ggml-cuda.h
│   │   ├── ggml-hexagon.h
│   │   ├── ggml-metal.h
│   │   ├── ggml-opencl.h
│   │   ├── ggml-openvino.h
│   │   ├── ggml-opt.h
│   │   ├── ggml-rpc.h
│   │   ├── ggml-sycl.h
│   │   ├── ggml-virtgpu.h
│   │   ├── ggml-vulkan.h
│   │   ├── ggml-webgpu.h
│   │   ├── ggml-zdnn.h
│   │   ├── ggml-zendnn.h
│   │   ├── ggml.h
│   │   └── gguf.h
│   └── src/
│       ├── CMakeLists.txt
│       ├── ggml-alloc.c
│       ├── ggml-backend-dl.cpp
│       ├── ggml-backend-dl.h
│       ├── ggml-backend-impl.h
│       ├── ggml-backend-reg.cpp
│       ├── ggml-backend.cpp
│       ├── ggml-blas/
│       │   ├── CMakeLists.txt
│       │   └── ggml-blas.cpp
│       ├── ggml-cann/
│       │   ├── CMakeLists.txt
│       │   ├── acl_tensor.cpp
│       │   ├── acl_tensor.h
│       │   ├── aclnn_ops.cpp
│       │   ├── aclnn_ops.h
│       │   ├── common.h
│       │   └── ggml-cann.cpp
│       ├── ggml-common.h
│       ├── ggml-cpu/
│       │   ├── CMakeLists.txt
│       │   ├── amx/
│       │   │   ├── amx.cpp
│       │   │   ├── amx.h
│       │   │   ├── common.h
│       │   │   ├── mmq.cpp
│       │   │   └── mmq.h
│       │   ├── arch/
│       │   │   ├── arm/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   ├── quants.c
│       │   │   │   └── repack.cpp
│       │   │   ├── loongarch/
│       │   │   │   └── quants.c
│       │   │   ├── powerpc/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   └── quants.c
│       │   │   ├── riscv/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   ├── quants.c
│       │   │   │   └── repack.cpp
│       │   │   ├── s390/
│       │   │   │   ├── cpu-feats.cpp
│       │   │   │   └── quants.c
│       │   │   ├── wasm/
│       │   │   │   └── quants.c
│       │   │   └── x86/
│       │   │       ├── cpu-feats.cpp
│       │   │       ├── quants.c
│       │   │       └── repack.cpp
│       │   ├── arch-fallback.h
│       │   ├── binary-ops.cpp
│       │   ├── binary-ops.h
│       │   ├── cmake/
│       │   │   └── FindSIMD.cmake
│       │   ├── common.h
│       │   ├── ggml-cpu-impl.h
│       │   ├── ggml-cpu.c
│       │   ├── ggml-cpu.cpp
│       │   ├── hbm.cpp
│       │   ├── hbm.h
│       │   ├── kleidiai/
│       │   │   ├── kernels.cpp
│       │   │   ├── kernels.h
│       │   │   ├── kleidiai.cpp
│       │   │   └── kleidiai.h
│       │   ├── llamafile/
│       │   │   ├── sgemm.cpp
│       │   │   └── sgemm.h
│       │   ├── ops.cpp
│       │   ├── ops.h
│       │   ├── quants.c
│       │   ├── quants.h
│       │   ├── repack.cpp
│       │   ├── repack.h
│       │   ├── simd-gemm.h
│       │   ├── simd-mappings.h
│       │   ├── spacemit/
│       │   │   ├── ime.cpp
│       │   │   ├── ime.h
│       │   │   ├── ime1_kernels.cpp
│       │   │   └── ime_kernels.h
│       │   ├── traits.cpp
│       │   ├── traits.h
│       │   ├── unary-ops.cpp
│       │   ├── unary-ops.h
│       │   ├── vec.cpp
│       │   └── vec.h
│       ├── ggml-cuda/
│       │   ├── CMakeLists.txt
│       │   ├── acc.cu
│       │   ├── acc.cuh
│       │   ├── add-id.cu
│       │   ├── add-id.cuh
│       │   ├── arange.cu
│       │   ├── arange.cuh
│       │   ├── argmax.cu
│       │   ├── argmax.cuh
│       │   ├── argsort.cu
│       │   ├── argsort.cuh
│       │   ├── binbcast.cu
│       │   ├── binbcast.cuh
│       │   ├── clamp.cu
│       │   ├── clamp.cuh
│       │   ├── common.cuh
│       │   ├── concat.cu
│       │   ├── concat.cuh
│       │   ├── conv-transpose-1d.cu
│       │   ├── conv-transpose-1d.cuh
│       │   ├── conv2d-dw.cu
│       │   ├── conv2d-dw.cuh
│       │   ├── conv2d-transpose.cu
│       │   ├── conv2d-transpose.cuh
│       │   ├── conv2d.cu
│       │   ├── conv2d.cuh
│       │   ├── convert.cu
│       │   ├── convert.cuh
│       │   ├── count-equal.cu
│       │   ├── count-equal.cuh
│       │   ├── cp-async.cuh
│       │   ├── cpy-utils.cuh
│       │   ├── cpy.cu
│       │   ├── cpy.cuh
│       │   ├── cross-entropy-loss.cu
│       │   ├── cross-entropy-loss.cuh
│       │   ├── cumsum.cu
│       │   ├── cumsum.cuh
│       │   ├── dequantize.cuh
│       │   ├── diag.cu
│       │   ├── diag.cuh
│       │   ├── diagmask.cu
│       │   ├── diagmask.cuh
│       │   ├── fattn-common.cuh
│       │   ├── fattn-mma-f16.cuh
│       │   ├── fattn-tile.cu
│       │   ├── fattn-tile.cuh
│       │   ├── fattn-vec.cuh
│       │   ├── fattn-wmma-f16.cu
│       │   ├── fattn-wmma-f16.cuh
│       │   ├── fattn.cu
│       │   ├── fattn.cuh
│       │   ├── fill.cu
│       │   ├── fill.cuh
│       │   ├── gated_delta_net.cu
│       │   ├── gated_delta_net.cuh
│       │   ├── getrows.cu
│       │   ├── getrows.cuh
│       │   ├── ggml-cuda.cu
│       │   ├── gla.cu
│       │   ├── gla.cuh
│       │   ├── im2col.cu
│       │   ├── im2col.cuh
│       │   ├── mean.cu
│       │   ├── mean.cuh
│       │   ├── mma.cuh
│       │   ├── mmf.cu
│       │   ├── mmf.cuh
│       │   ├── mmid.cu
│       │   ├── mmid.cuh
│       │   ├── mmq.cu
│       │   ├── mmq.cuh
│       │   ├── mmvf.cu
│       │   ├── mmvf.cuh
│       │   ├── mmvq.cu
│       │   ├── mmvq.cuh
│       │   ├── norm.cu
│       │   ├── norm.cuh
│       │   ├── opt-step-adamw.cu
│       │   ├── opt-step-adamw.cuh
│       │   ├── opt-step-sgd.cu
│       │   ├── opt-step-sgd.cuh
│       │   ├── out-prod.cu
│       │   ├── out-prod.cuh
│       │   ├── pad.cu
│       │   ├── pad.cuh
│       │   ├── pad_reflect_1d.cu
│       │   ├── pad_reflect_1d.cuh
│       │   ├── pool2d.cu
│       │   ├── pool2d.cuh
│       │   ├── quantize.cu
│       │   ├── quantize.cuh
│       │   ├── reduce_rows.cuh
│       │   ├── roll.cu
│       │   ├── roll.cuh
│       │   ├── rope.cu
│       │   ├── rope.cuh
│       │   ├── scale.cu
│       │   ├── scale.cuh
│       │   ├── set-rows.cu
│       │   ├── set-rows.cuh
│       │   ├── set.cu
│       │   ├── set.cuh
│       │   ├── softcap.cu
│       │   ├── softcap.cuh
│       │   ├── softmax.cu
│       │   ├── softmax.cuh
│       │   ├── solve_tri.cu
│       │   ├── solve_tri.cuh
│       │   ├── ssm-conv.cu
│       │   ├── ssm-conv.cuh
│       │   ├── ssm-scan.cu
│       │   ├── ssm-scan.cuh
│       │   ├── sum.cu
│       │   ├── sum.cuh
│       │   ├── sumrows.cu
│       │   ├── sumrows.cuh
│       │   ├── template-instances/
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_32.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_32.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_16.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
│       │   │   ├── fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
│       │   │   ├── fattn-tile-instance-dkq112-dv112.cu
│       │   │   ├── fattn-tile-instance-dkq128-dv128.cu
│       │   │   ├── fattn-tile-instance-dkq256-dv256.cu
│       │   │   ├── fattn-tile-instance-dkq40-dv40.cu
│       │   │   ├── fattn-tile-instance-dkq512-dv512.cu
│       │   │   ├── fattn-tile-instance-dkq576-dv512.cu
│       │   │   ├── fattn-tile-instance-dkq64-dv64.cu
│       │   │   ├── fattn-tile-instance-dkq72-dv72.cu
│       │   │   ├── fattn-tile-instance-dkq80-dv80.cu
│       │   │   ├── fattn-tile-instance-dkq96-dv96.cu
│       │   │   ├── fattn-vec-instance-bf16-bf16.cu
│       │   │   ├── fattn-vec-instance-bf16-f16.cu
│       │   │   ├── fattn-vec-instance-bf16-q4_0.cu
│       │   │   ├── fattn-vec-instance-bf16-q4_1.cu
│       │   │   ├── fattn-vec-instance-bf16-q5_0.cu
│       │   │   ├── fattn-vec-instance-bf16-q5_1.cu
│       │   │   ├── fattn-vec-instance-bf16-q8_0.cu
│       │   │   ├── fattn-vec-instance-f16-bf16.cu
│       │   │   ├── fattn-vec-instance-f16-f16.cu
│       │   │   ├── fattn-vec-instance-f16-q4_0.cu
│       │   │   ├── fattn-vec-instance-f16-q4_1.cu
│       │   │   ├── fattn-vec-instance-f16-q5_0.cu
│       │   │   ├── fattn-vec-instance-f16-q5_1.cu
│       │   │   ├── fattn-vec-instance-f16-q8_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q4_0-f16.cu
│       │   │   ├── fattn-vec-instance-q4_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q4_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q4_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q4_0-q8_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-bf16.cu
│       │   │   ├── fattn-vec-instance-q4_1-f16.cu
│       │   │   ├── fattn-vec-instance-q4_1-q4_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-q4_1.cu
│       │   │   ├── fattn-vec-instance-q4_1-q5_0.cu
│       │   │   ├── fattn-vec-instance-q4_1-q5_1.cu
│       │   │   ├── fattn-vec-instance-q4_1-q8_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q5_0-f16.cu
│       │   │   ├── fattn-vec-instance-q5_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q5_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q5_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q5_0-q8_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-bf16.cu
│       │   │   ├── fattn-vec-instance-q5_1-f16.cu
│       │   │   ├── fattn-vec-instance-q5_1-q4_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-q4_1.cu
│       │   │   ├── fattn-vec-instance-q5_1-q5_0.cu
│       │   │   ├── fattn-vec-instance-q5_1-q5_1.cu
│       │   │   ├── fattn-vec-instance-q5_1-q8_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-bf16.cu
│       │   │   ├── fattn-vec-instance-q8_0-f16.cu
│       │   │   ├── fattn-vec-instance-q8_0-q4_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-q4_1.cu
│       │   │   ├── fattn-vec-instance-q8_0-q5_0.cu
│       │   │   ├── fattn-vec-instance-q8_0-q5_1.cu
│       │   │   ├── fattn-vec-instance-q8_0-q8_0.cu
│       │   │   ├── generate_cu_files.py
│       │   │   ├── mmf-instance-ncols_1.cu
│       │   │   ├── mmf-instance-ncols_10.cu
│       │   │   ├── mmf-instance-ncols_11.cu
│       │   │   ├── mmf-instance-ncols_12.cu
│       │   │   ├── mmf-instance-ncols_13.cu
│       │   │   ├── mmf-instance-ncols_14.cu
│       │   │   ├── mmf-instance-ncols_15.cu
│       │   │   ├── mmf-instance-ncols_16.cu
│       │   │   ├── mmf-instance-ncols_2.cu
│       │   │   ├── mmf-instance-ncols_3.cu
│       │   │   ├── mmf-instance-ncols_4.cu
│       │   │   ├── mmf-instance-ncols_5.cu
│       │   │   ├── mmf-instance-ncols_6.cu
│       │   │   ├── mmf-instance-ncols_7.cu
│       │   │   ├── mmf-instance-ncols_8.cu
│       │   │   ├── mmf-instance-ncols_9.cu
│       │   │   ├── mmq-instance-iq1_s.cu
│       │   │   ├── mmq-instance-iq2_s.cu
│       │   │   ├── mmq-instance-iq2_xs.cu
│       │   │   ├── mmq-instance-iq2_xxs.cu
│       │   │   ├── mmq-instance-iq3_s.cu
│       │   │   ├── mmq-instance-iq3_xxs.cu
│       │   │   ├── mmq-instance-iq4_nl.cu
│       │   │   ├── mmq-instance-iq4_xs.cu
│       │   │   ├── mmq-instance-mxfp4.cu
│       │   │   ├── mmq-instance-nvfp4.cu
│       │   │   ├── mmq-instance-q2_k.cu
│       │   │   ├── mmq-instance-q3_k.cu
│       │   │   ├── mmq-instance-q4_0.cu
│       │   │   ├── mmq-instance-q4_1.cu
│       │   │   ├── mmq-instance-q4_k.cu
│       │   │   ├── mmq-instance-q5_0.cu
│       │   │   ├── mmq-instance-q5_1.cu
│       │   │   ├── mmq-instance-q5_k.cu
│       │   │   ├── mmq-instance-q6_k.cu
│       │   │   └── mmq-instance-q8_0.cu
│       │   ├── top-k.cu
│       │   ├── top-k.cuh
│       │   ├── topk-moe.cu
│       │   ├── topk-moe.cuh
│       │   ├── tri.cu
│       │   ├── tri.cuh
│       │   ├── tsembd.cu
│       │   ├── tsembd.cuh
│       │   ├── unary.cu
│       │   ├── unary.cuh
│       │   ├── upscale.cu
│       │   ├── upscale.cuh
│       │   ├── vecdotq.cuh
│       │   ├── vendors/
│       │   │   ├── cuda.h
│       │   │   ├── hip.h
│       │   │   └── musa.h
│       │   ├── wkv.cu
│       │   └── wkv.cuh
│       ├── ggml-hexagon/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-hexagon.cpp
│       │   ├── htp/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── act-ops.c
│       │   │   ├── argsort-ops.c
│       │   │   ├── binary-ops.c
│       │   │   ├── cmake-toolchain.cmake
│       │   │   ├── cpy-ops.c
│       │   │   ├── cumsum-ops.c
│       │   │   ├── flash-attn-ops.c
│       │   │   ├── get-rows-ops.c
│       │   │   ├── hex-dma.c
│       │   │   ├── hex-dma.h
│       │   │   ├── hex-dump.h
│       │   │   ├── hex-fastdiv.h
│       │   │   ├── hex-utils.h
│       │   │   ├── hmx-matmul-ops.c
│       │   │   ├── hmx-ops.h
│       │   │   ├── hmx-profile.h
│       │   │   ├── hmx-utils.h
│       │   │   ├── htp-ctx.h
│       │   │   ├── htp-msg.h
│       │   │   ├── htp-ops.h
│       │   │   ├── htp_iface.idl
│       │   │   ├── hvx-arith.h
│       │   │   ├── hvx-base.h
│       │   │   ├── hvx-copy.h
│       │   │   ├── hvx-div.h
│       │   │   ├── hvx-dump.h
│       │   │   ├── hvx-exp.h
│       │   │   ├── hvx-floor.h
│       │   │   ├── hvx-inverse.h
│       │   │   ├── hvx-reduce.h
│       │   │   ├── hvx-scale.h
│       │   │   ├── hvx-sigmoid.h
│       │   │   ├── hvx-sqrt.h
│       │   │   ├── hvx-types.h
│       │   │   ├── hvx-utils.h
│       │   │   ├── main.c
│       │   │   ├── matmul-ops.c
│       │   │   ├── repeat-ops.c
│       │   │   ├── rope-ops.c
│       │   │   ├── set-rows-ops.c
│       │   │   ├── softmax-ops.c
│       │   │   ├── ssm-conv.c
│       │   │   ├── sum-rows-ops.c
│       │   │   ├── unary-ops.c
│       │   │   ├── worker-pool.c
│       │   │   └── worker-pool.h
│       │   ├── htp-drv.cpp
│       │   ├── htp-drv.h
│       │   ├── libdl.h
│       │   ├── libggml-htp.inf
│       │   └── op-desc.h
│       ├── ggml-hip/
│       │   └── CMakeLists.txt
│       ├── ggml-impl.h
│       ├── ggml-metal/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-metal-common.cpp
│       │   ├── ggml-metal-common.h
│       │   ├── ggml-metal-context.h
│       │   ├── ggml-metal-context.m
│       │   ├── ggml-metal-device.cpp
│       │   ├── ggml-metal-device.h
│       │   ├── ggml-metal-device.m
│       │   ├── ggml-metal-impl.h
│       │   ├── ggml-metal-ops.cpp
│       │   ├── ggml-metal-ops.h
│       │   ├── ggml-metal.cpp
│       │   └── ggml-metal.metal
│       ├── ggml-musa/
│       │   ├── CMakeLists.txt
│       │   ├── mudnn.cu
│       │   └── mudnn.cuh
│       ├── ggml-opencl/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-opencl.cpp
│       │   └── kernels/
│       │       ├── add.cl
│       │       ├── add_id.cl
│       │       ├── argsort.cl
│       │       ├── clamp.cl
│       │       ├── concat.cl
│       │       ├── conv2d.cl
│       │       ├── conv2d_f16_f32.cl
│       │       ├── cpy.cl
│       │       ├── cumsum.cl
│       │       ├── cvt.cl
│       │       ├── diag.cl
│       │       ├── diag_mask_inf.cl
│       │       ├── div.cl
│       │       ├── embed_kernel.py
│       │       ├── exp.cl
│       │       ├── expm1.cl
│       │       ├── fill.cl
│       │       ├── flash_attn_f16.cl
│       │       ├── flash_attn_f32.cl
│       │       ├── flash_attn_f32_f16.cl
│       │       ├── gelu.cl
│       │       ├── gemm_moe_mxfp4_f32.cl
│       │       ├── gemm_noshuffle_q4_1_f32.cl
│       │       ├── gemm_noshuffle_q4_k_f32.cl
│       │       ├── gemm_noshuffle_q6_k_f32.cl
│       │       ├── gemv_moe_mxfp4_f32.cl
│       │       ├── gemv_noshuffle.cl
│       │       ├── gemv_noshuffle_general.cl
│       │       ├── gemv_noshuffle_general_q8_0_f32.cl
│       │       ├── gemv_noshuffle_q4_1_f32.cl
│       │       ├── gemv_noshuffle_q4_k_f32.cl
│       │       ├── gemv_noshuffle_q6_k_f32.cl
│       │       ├── get_rows.cl
│       │       ├── glu.cl
│       │       ├── group_norm.cl
│       │       ├── im2col_f16.cl
│       │       ├── im2col_f32.cl
│       │       ├── l2_norm.cl
│       │       ├── mean.cl
│       │       ├── mul.cl
│       │       ├── mul_mat_Ab_Bi_8x4.cl
│       │       ├── mul_mat_f16_f32.cl
│       │       ├── mul_mm_f16_f32_kq_kqv.cl
│       │       ├── mul_mm_f16_f32_l4_lm.cl
│       │       ├── mul_mm_f32_f32_l4_lm.cl
│       │       ├── mul_mm_q4_0_f32_l4_lm.cl
│       │       ├── mul_mm_q4_1_f32_l4_lm.cl
│       │       ├── mul_mm_q4_k_f32_l4_lm.cl
│       │       ├── mul_mm_q6_k_f32_l4_lm.cl
│       │       ├── mul_mm_q8_0_f32_8x4.cl
│       │       ├── mul_mm_q8_0_f32_l4_lm.cl
│       │       ├── mul_mv_f16_f16.cl
│       │       ├── mul_mv_f16_f32.cl
│       │       ├── mul_mv_f16_f32_1row.cl
│       │       ├── mul_mv_f16_f32_l4.cl
│       │       ├── mul_mv_f32_f32.cl
│       │       ├── mul_mv_id_mxfp4_f32.cl
│       │       ├── mul_mv_id_mxfp4_f32_flat.cl
│       │       ├── mul_mv_id_q4_0_f32_8x_flat.cl
│       │       ├── mul_mv_id_q8_0_f32.cl
│       │       ├── mul_mv_id_q8_0_f32_flat.cl
│       │       ├── mul_mv_mxfp4_f32.cl
│       │       ├── mul_mv_mxfp4_f32_flat.cl
│       │       ├── mul_mv_q4_0_f32.cl
│       │       ├── mul_mv_q4_0_f32_1d_16x_flat.cl
│       │       ├── mul_mv_q4_0_f32_1d_8x_flat.cl
│       │       ├── mul_mv_q4_0_f32_8x_flat.cl
│       │       ├── mul_mv_q4_0_f32_v.cl
│       │       ├── mul_mv_q4_1_f32.cl
│       │       ├── mul_mv_q4_1_f32_flat.cl
│       │       ├── mul_mv_q4_k_f32.cl
│       │       ├── mul_mv_q4_k_f32_flat.cl
│       │       ├── mul_mv_q6_k_f32.cl
│       │       ├── mul_mv_q6_k_f32_flat.cl
│       │       ├── mul_mv_q8_0_f32.cl
│       │       ├── mul_mv_q8_0_f32_flat.cl
│       │       ├── neg.cl
│       │       ├── norm.cl
│       │       ├── pad.cl
│       │       ├── relu.cl
│       │       ├── repeat.cl
│       │       ├── rms_norm.cl
│       │       ├── rope.cl
│       │       ├── scale.cl
│       │       ├── set_rows.cl
│       │       ├── sigmoid.cl
│       │       ├── silu.cl
│       │       ├── softmax_4_f16.cl
│       │       ├── softmax_4_f32.cl
│       │       ├── softmax_f16.cl
│       │       ├── softmax_f32.cl
│       │       ├── softplus.cl
│       │       ├── solve_tri.cl
│       │       ├── sqr.cl
│       │       ├── sqrt.cl
│       │       ├── ssm_conv.cl
│       │       ├── sub.cl
│       │       ├── sum_rows.cl
│       │       ├── tanh.cl
│       │       ├── transpose.cl
│       │       ├── tri.cl
│       │       ├── tsembd.cl
│       │       └── upscale.cl
│       ├── ggml-openvino/
│       │   ├── .clang-format
│       │   ├── CMakeLists.txt
│       │   ├── ggml-decoder.cpp
│       │   ├── ggml-decoder.h
│       │   ├── ggml-openvino-extra.cpp
│       │   ├── ggml-openvino-extra.h
│       │   ├── ggml-openvino.cpp
│       │   ├── ggml-quants.cpp
│       │   ├── ggml-quants.h
│       │   ├── openvino/
│       │   │   ├── decoder.h
│       │   │   ├── frontend.cpp
│       │   │   ├── frontend.h
│       │   │   ├── input_model.cpp
│       │   │   ├── input_model.h
│       │   │   ├── node_context.h
│       │   │   ├── op/
│       │   │   │   ├── cont.cpp
│       │   │   │   ├── cpy.cpp
│       │   │   │   ├── flash_attn_ext.cpp
│       │   │   │   ├── get_rows.cpp
│       │   │   │   ├── glu_geglu.cpp
│       │   │   │   ├── glu_swiglu.cpp
│       │   │   │   ├── mulmat.cpp
│       │   │   │   ├── permute.cpp
│       │   │   │   ├── reshape.cpp
│       │   │   │   ├── rms_norm.cpp
│       │   │   │   ├── rope.cpp
│       │   │   │   ├── scale.cpp
│       │   │   │   ├── set_rows.cpp
│       │   │   │   ├── softmax.cpp
│       │   │   │   ├── transpose.cpp
│       │   │   │   ├── unary_silu.cpp
│       │   │   │   └── view.cpp
│       │   │   ├── op_table.cpp
│       │   │   ├── op_table.h
│       │   │   ├── pass/
│       │   │   │   ├── eliminate_zp.cpp
│       │   │   │   ├── eliminate_zp.h
│       │   │   │   ├── fuse_to_sdpa.cpp
│       │   │   │   ├── fuse_to_sdpa.h
│       │   │   │   ├── mark_decompression_convert_constant_folding.h
│       │   │   │   ├── squeeze_matmul.cpp
│       │   │   │   └── squeeze_matmul.h
│       │   │   ├── translate_session.cpp
│       │   │   ├── translate_session.h
│       │   │   ├── utils.cpp
│       │   │   └── utils.h
│       │   ├── utils.cpp
│       │   └── utils.h
│       ├── ggml-opt.cpp
│       ├── ggml-quants.c
│       ├── ggml-quants.h
│       ├── ggml-rpc/
│       │   ├── CMakeLists.txt
│       │   └── ggml-rpc.cpp
│       ├── ggml-sycl/
│       │   ├── CMakeLists.txt
│       │   ├── add-id.cpp
│       │   ├── add-id.hpp
│       │   ├── backend.hpp
│       │   ├── binbcast.cpp
│       │   ├── binbcast.hpp
│       │   ├── common.cpp
│       │   ├── common.hpp
│       │   ├── concat.cpp
│       │   ├── concat.hpp
│       │   ├── conv.cpp
│       │   ├── conv.hpp
│       │   ├── convert.cpp
│       │   ├── convert.hpp
│       │   ├── count-equal.cpp
│       │   ├── count-equal.hpp
│       │   ├── cpy.cpp
│       │   ├── cpy.hpp
│       │   ├── dequantize.hpp
│       │   ├── dmmv.cpp
│       │   ├── dmmv.hpp
│       │   ├── dpct/
│       │   │   └── helper.hpp
│       │   ├── element_wise.cpp
│       │   ├── element_wise.hpp
│       │   ├── fattn-common.hpp
│       │   ├── fattn-tile.cpp
│       │   ├── fattn-tile.hpp
│       │   ├── fattn-vec.hpp
│       │   ├── fattn.cpp
│       │   ├── fattn.hpp
│       │   ├── gated_delta_net.cpp
│       │   ├── gated_delta_net.hpp
│       │   ├── gemm.hpp
│       │   ├── getrows.cpp
│       │   ├── getrows.hpp
│       │   ├── ggml-sycl.cpp
│       │   ├── gla.cpp
│       │   ├── gla.hpp
│       │   ├── im2col.cpp
│       │   ├── im2col.hpp
│       │   ├── mmq.cpp
│       │   ├── mmq.hpp
│       │   ├── mmvq.cpp
│       │   ├── mmvq.hpp
│       │   ├── norm.cpp
│       │   ├── norm.hpp
│       │   ├── outprod.cpp
│       │   ├── outprod.hpp
│       │   ├── pad.cpp
│       │   ├── pad.hpp
│       │   ├── pad_reflect_1d.cpp
│       │   ├── pad_reflect_1d.hpp
│       │   ├── presets.hpp
│       │   ├── quantize.hpp
│       │   ├── quants.hpp
│       │   ├── repeat_back.cpp
│       │   ├── repeat_back.hpp
│       │   ├── roll.cpp
│       │   ├── roll.hpp
│       │   ├── rope.cpp
│       │   ├── rope.hpp
│       │   ├── set.cpp
│       │   ├── set.hpp
│       │   ├── set_rows.cpp
│       │   ├── set_rows.hpp
│       │   ├── softmax.cpp
│       │   ├── softmax.hpp
│       │   ├── ssm_conv.cpp
│       │   ├── ssm_conv.hpp
│       │   ├── sycl_hw.cpp
│       │   ├── sycl_hw.hpp
│       │   ├── template-instances/
│       │   │   ├── fattn-tile-instance-dkq112-dv112.cpp
│       │   │   ├── fattn-tile-instance-dkq128-dv128.cpp
│       │   │   ├── fattn-tile-instance-dkq256-dv256.cpp
│       │   │   ├── fattn-tile-instance-dkq40-dv40.cpp
│       │   │   ├── fattn-tile-instance-dkq576-dv512.cpp
│       │   │   ├── fattn-tile-instance-dkq64-dv64.cpp
│       │   │   ├── fattn-tile-instance-dkq72-dv72.cpp
│       │   │   ├── fattn-tile-instance-dkq80-dv80.cpp
│       │   │   ├── fattn-tile-instance-dkq96-dv96.cpp
│       │   │   ├── fattn-vec-instance-f16-f16.cpp
│       │   │   ├── fattn-vec-instance-f16-q4_0.cpp
│       │   │   ├── fattn-vec-instance-f16-q4_1.cpp
│       │   │   ├── fattn-vec-instance-f16-q5_0.cpp
│       │   │   ├── fattn-vec-instance-f16-q5_1.cpp
│       │   │   ├── fattn-vec-instance-f16-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q4_0-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-f16.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q4_1-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q5_0-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-f16.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q5_1.cpp
│       │   │   ├── fattn-vec-instance-q5_1-q8_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-f16.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q4_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q4_1.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q5_0.cpp
│       │   │   ├── fattn-vec-instance-q8_0-q5_1.cpp
│       │   │   └── fattn-vec-instance-q8_0-q8_0.cpp
│       │   ├── tsembd.cpp
│       │   ├── tsembd.hpp
│       │   ├── type.hpp
│       │   ├── upscale.cpp
│       │   ├── upscale.hpp
│       │   ├── vecdotq.hpp
│       │   ├── wkv.cpp
│       │   └── wkv.hpp
│       ├── ggml-threading.cpp
│       ├── ggml-threading.h
│       ├── ggml-virtgpu/
│       │   ├── CMakeLists.txt
│       │   ├── apir_cs_ggml-rpc-front.cpp
│       │   ├── backend/
│       │   │   ├── CMakeLists.txt
│       │   │   ├── apir_cs_ggml-rpc-back.cpp
│       │   │   ├── backend-convert.h
│       │   │   ├── backend-dispatched-backend.cpp
│       │   │   ├── backend-dispatched-buffer-type.cpp
│       │   │   ├── backend-dispatched-buffer.cpp
│       │   │   ├── backend-dispatched-device.cpp
│       │   │   ├── backend-dispatched.cpp
│       │   │   ├── backend-dispatched.gen.h
│       │   │   ├── backend-dispatched.h
│       │   │   ├── backend-virgl-apir.h
│       │   │   ├── backend.cpp
│       │   │   └── shared/
│       │   │       ├── api_remoting.h
│       │   │       ├── apir_backend.gen.h
│       │   │       ├── apir_backend.h
│       │   │       ├── apir_cs.h
│       │   │       ├── apir_cs_ggml.h
│       │   │       └── apir_cs_rpc.h
│       │   ├── ggml-backend-buffer-type.cpp
│       │   ├── ggml-backend-buffer.cpp
│       │   ├── ggml-backend-device.cpp
│       │   ├── ggml-backend-reg.cpp
│       │   ├── ggml-backend.cpp
│       │   ├── ggml-remoting.h
│       │   ├── ggmlremoting_functions.yaml
│       │   ├── include/
│       │   │   └── apir_hw.h
│       │   ├── regenerate_remoting.py
│       │   ├── virtgpu-apir.h
│       │   ├── virtgpu-forward-backend.cpp
│       │   ├── virtgpu-forward-buffer-type.cpp
│       │   ├── virtgpu-forward-buffer.cpp
│       │   ├── virtgpu-forward-device.cpp
│       │   ├── virtgpu-forward-impl.h
│       │   ├── virtgpu-forward.gen.h
│       │   ├── virtgpu-shm.cpp
│       │   ├── virtgpu-shm.h
│       │   ├── virtgpu-utils.cpp
│       │   ├── virtgpu-utils.h
│       │   ├── virtgpu.cpp
│       │   └── virtgpu.h
│       ├── ggml-vulkan/
│       │   ├── CMakeLists.txt
│       │   ├── cmake/
│       │   │   └── host-toolchain.cmake.in
│       │   ├── ggml-vulkan.cpp
│       │   └── vulkan-shaders/
│       │       ├── CMakeLists.txt
│       │       ├── abs.comp
│       │       ├── acc.comp
│       │       ├── add.comp
│       │       ├── add1.comp
│       │       ├── add_id.comp
│       │       ├── arange.comp
│       │       ├── argmax.comp
│       │       ├── argsort.comp
│       │       ├── argsort_large.comp
│       │       ├── ceil.comp
│       │       ├── clamp.comp
│       │       ├── concat.comp
│       │       ├── contig_copy.comp
│       │       ├── conv2d_dw.comp
│       │       ├── conv2d_mm.comp
│       │       ├── conv_transpose_1d.comp
│       │       ├── copy.comp
│       │       ├── copy_from_quant.comp
│       │       ├── copy_to_quant.comp
│       │       ├── copy_transpose.comp
│       │       ├── cos.comp
│       │       ├── count_equal.comp
│       │       ├── count_experts.comp
│       │       ├── cumsum.comp
│       │       ├── cumsum_multipass1.comp
│       │       ├── cumsum_multipass2.comp
│       │       ├── dequant_f32.comp
│       │       ├── dequant_funcs.glsl
│       │       ├── dequant_funcs_cm2.glsl
│       │       ├── dequant_head.glsl
│       │       ├── dequant_iq1_m.comp
│       │       ├── dequant_iq1_s.comp
│       │       ├── dequant_iq2_s.comp
│       │       ├── dequant_iq2_xs.comp
│       │       ├── dequant_iq2_xxs.comp
│       │       ├── dequant_iq3_s.comp
│       │       ├── dequant_iq3_xxs.comp
│       │       ├── dequant_iq4_nl.comp
│       │       ├── dequant_iq4_xs.comp
│       │       ├── dequant_mxfp4.comp
│       │       ├── dequant_q2_k.comp
│       │       ├── dequant_q3_k.comp
│       │       ├── dequant_q4_0.comp
│       │       ├── dequant_q4_1.comp
│       │       ├── dequant_q4_k.comp
│       │       ├── dequant_q5_0.comp
│       │       ├── dequant_q5_1.comp
│       │       ├── dequant_q5_k.comp
│       │       ├── dequant_q6_k.comp
│       │       ├── dequant_q8_0.comp
│       │       ├── diag.comp
│       │       ├── diag_mask_inf.comp
│       │       ├── div.comp
│       │       ├── elu.comp
│       │       ├── exp.comp
│       │       ├── feature-tests/
│       │       │   ├── bfloat16.comp
│       │       │   ├── coopmat.comp
│       │       │   ├── coopmat2.comp
│       │       │   └── integer_dot.comp
│       │       ├── fill.comp
│       │       ├── flash_attn.comp
│       │       ├── flash_attn_base.glsl
│       │       ├── flash_attn_cm1.comp
│       │       ├── flash_attn_cm2.comp
│       │       ├── flash_attn_mask_opt.comp
│       │       ├── flash_attn_split_k_reduce.comp
│       │       ├── floor.comp
│       │       ├── gated_delta_net.comp
│       │       ├── geglu.comp
│       │       ├── geglu_erf.comp
│       │       ├── geglu_quick.comp
│       │       ├── gelu.comp
│       │       ├── gelu_erf.comp
│       │       ├── gelu_quick.comp
│       │       ├── generic_binary_head.glsl
│       │       ├── generic_head.glsl
│       │       ├── generic_unary_head.glsl
│       │       ├── get_rows.comp
│       │       ├── get_rows_quant.comp
│       │       ├── glu_head.glsl
│       │       ├── glu_main.glsl
│       │       ├── group_norm.comp
│       │       ├── hardsigmoid.comp
│       │       ├── hardswish.comp
│       │       ├── im2col.comp
│       │       ├── im2col_3d.comp
│       │       ├── l2_norm.comp
│       │       ├── leaky_relu.comp
│       │       ├── log.comp
│       │       ├── mul.comp
│       │       ├── mul_mat_split_k_reduce.comp
│       │       ├── mul_mat_vec.comp
│       │       ├── mul_mat_vec_base.glsl
│       │       ├── mul_mat_vec_iface.glsl
│       │       ├── mul_mat_vec_iq1_m.comp
│       │       ├── mul_mat_vec_iq1_s.comp
│       │       ├── mul_mat_vec_iq2_s.comp
│       │       ├── mul_mat_vec_iq2_xs.comp
│       │       ├── mul_mat_vec_iq2_xxs.comp
│       │       ├── mul_mat_vec_iq3_s.comp
│       │       ├── mul_mat_vec_iq3_xxs.comp
│       │       ├── mul_mat_vec_nc.comp
│       │       ├── mul_mat_vec_p021.comp
│       │       ├── mul_mat_vec_q2_k.comp
│       │       ├── mul_mat_vec_q3_k.comp
│       │       ├── mul_mat_vec_q4_k.comp
│       │       ├── mul_mat_vec_q5_k.comp
│       │       ├── mul_mat_vec_q6_k.comp
│       │       ├── mul_mat_vecq.comp
│       │       ├── mul_mat_vecq_funcs.glsl
│       │       ├── mul_mm.comp
│       │       ├── mul_mm_cm2.comp
│       │       ├── mul_mm_funcs.glsl
│       │       ├── mul_mm_id_funcs.glsl
│       │       ├── mul_mmq.comp
│       │       ├── mul_mmq_funcs.glsl
│       │       ├── mul_mmq_shmem_types.glsl
│       │       ├── multi_add.comp
│       │       ├── neg.comp
│       │       ├── norm.comp
│       │       ├── opt_step_adamw.comp
│       │       ├── opt_step_sgd.comp
│       │       ├── pad.comp
│       │       ├── pool2d.comp
│       │       ├── quantize_q8_1.comp
│       │       ├── reglu.comp
│       │       ├── relu.comp
│       │       ├── repeat.comp
│       │       ├── repeat_back.comp
│       │       ├── rms_norm.comp
│       │       ├── rms_norm_back.comp
│       │       ├── rms_norm_partials.comp
│       │       ├── roll.comp
│       │       ├── rope_funcs.glsl
│       │       ├── rope_head.glsl
│       │       ├── rope_multi.comp
│       │       ├── rope_neox.comp
│       │       ├── rope_norm.comp
│       │       ├── rope_params.glsl
│       │       ├── rope_vision.comp
│       │       ├── round.comp
│       │       ├── rte.glsl
│       │       ├── scale.comp
│       │       ├── sgn.comp
│       │       ├── sigmoid.comp
│       │       ├── silu.comp
│       │       ├── silu_back.comp
│       │       ├── sin.comp
│       │       ├── soft_max.comp
│       │       ├── soft_max_back.comp
│       │       ├── soft_max_large1.comp
│       │       ├── soft_max_large2.comp
│       │       ├── soft_max_large3.comp
│       │       ├── soft_max_large_common.glsl
│       │       ├── softplus.comp
│       │       ├── solve_tri.comp
│       │       ├── sqrt.comp
│       │       ├── square.comp
│       │       ├── ssm_conv.comp
│       │       ├── ssm_scan.comp
│       │       ├── step.comp
│       │       ├── sub.comp
│       │       ├── sum_rows.comp
│       │       ├── sum_rows.glsl
│       │       ├── swiglu.comp
│       │       ├── swiglu_oai.comp
│       │       ├── tanh.comp
│       │       ├── timestep_embedding.comp
│       │       ├── topk_argsort.comp
│       │       ├── topk_moe.comp
│       │       ├── topk_nary_search.comp
│       │       ├── tri.comp
│       │       ├── trunc.comp
│       │       ├── types.glsl
│       │       ├── upscale.comp
│       │       ├── utils.glsl
│       │       ├── vulkan-shaders-gen.cpp
│       │       ├── wkv6.comp
│       │       ├── wkv7.comp
│       │       └── xielu.comp
│       ├── ggml-webgpu/
│       │   ├── CMakeLists.txt
│       │   ├── ggml-webgpu-shader-lib.hpp
│       │   ├── ggml-webgpu.cpp
│       │   ├── pre_wgsl.hpp
│       │   └── wgsl-shaders/
│       │       ├── argmax.wgsl
│       │       ├── argsort.wgsl
│       │       ├── argsort_merge.wgsl
│       │       ├── binary.wgsl
│       │       ├── common_decls.tmpl
│       │       ├── concat.wgsl
│       │       ├── cpy.wgsl
│       │       ├── cumsum.wgsl
│       │       ├── embed_wgsl.py
│       │       ├── flash_attn.wgsl
│       │       ├── gated_delta_net.wgsl
│       │       ├── get_rows.wgsl
│       │       ├── glu.wgsl
│       │       ├── memset.wgsl
│       │       ├── mul_mat.wgsl
│       │       ├── mul_mat_decls.tmpl
│       │       ├── mul_mat_reg_tile.wgsl
│       │       ├── mul_mat_subgroup_matrix.wgsl
│       │       ├── mul_mat_vec.wgsl
│       │       ├── pad.wgsl
│       │       ├── repeat.wgsl
│       │       ├── rope.wgsl
│       │       ├── row_norm.wgsl
│       │       ├── scale.wgsl
│       │       ├── set.wgsl
│       │       ├── set_rows.wgsl
│       │       ├── soft_max.wgsl
│       │       ├── solve_tri.wgsl
│       │       ├── ssm_conv.wgsl
│       │       ├── sum_rows.wgsl
│       │       └── unary.wgsl
│       ├── ggml-zdnn/
│       │   ├── .gitignore
│       │   ├── CMakeLists.txt
│       │   ├── common.hpp
│       │   ├── ggml-zdnn.cpp
│       │   ├── mmf.cpp
│       │   ├── mmf.hpp
│       │   ├── utils.cpp
│       │   └── utils.hpp
│       ├── ggml-zendnn/
│       │   ├── CMakeLists.txt
│       │   └── ggml-zendnn.cpp
│       ├── ggml.c
│       ├── ggml.cpp
│       └── gguf.cpp
├── gguf-py/
│   ├── LICENSE
│   ├── README.md
│   ├── examples/
│   │   ├── reader.py
│   │   └── writer.py
│   ├── gguf/
│   │   ├── __init__.py
│   │   ├── constants.py
│   │   ├── gguf.py
│   │   ├── gguf_reader.py
│   │   ├── gguf_writer.py
│   │   ├── lazy.py
│   │   ├── metadata.py
│   │   ├── py.typed
│   │   ├── quants.py
│   │   ├── scripts/
│   │   │   ├── gguf_convert_endian.py
│   │   │   ├── gguf_dump.py
│   │   │   ├── gguf_editor_gui.py
│   │   │   ├── gguf_hash.py
│   │   │   ├── gguf_new_metadata.py
│   │   │   └── gguf_set_metadata.py
│   │   ├── tensor_mapping.py
│   │   ├── utility.py
│   │   └── vocab.py
│   ├── pyproject.toml
│   └── tests/
│       ├── __init__.py
│       ├── test_metadata.py
│       └── test_quants.py
├── grammars/
│   ├── README.md
│   ├── arithmetic.gbnf
│   ├── c.gbnf
│   ├── chess.gbnf
│   ├── english.gbnf
│   ├── japanese.gbnf
│   ├── json.gbnf
│   ├── json_arr.gbnf
│   └── list.gbnf
├── include/
│   ├── llama-cpp.h
│   └── llama.h
├── licenses/
│   └── LICENSE-jsonhpp
├── models/
│   ├── .editorconfig
│   ├── ggml-vocab-aquila.gguf
│   ├── ggml-vocab-baichuan.gguf
│   ├── ggml-vocab-bert-bge.gguf
│   ├── ggml-vocab-bert-bge.gguf.inp
│   ├── ggml-vocab-bert-bge.gguf.out
│   ├── ggml-vocab-command-r.gguf
│   ├── ggml-vocab-command-r.gguf.inp
│   ├── ggml-vocab-command-r.gguf.out
│   ├── ggml-vocab-deepseek-coder.gguf
│   ├── ggml-vocab-deepseek-coder.gguf.inp
│   ├── ggml-vocab-deepseek-coder.gguf.out
│   ├── ggml-vocab-deepseek-llm.gguf
│   ├── ggml-vocab-deepseek-llm.gguf.inp
│   ├── ggml-vocab-deepseek-llm.gguf.out
│   ├── ggml-vocab-falcon.gguf
│   ├── ggml-vocab-falcon.gguf.inp
│   ├── ggml-vocab-falcon.gguf.out
│   ├── ggml-vocab-gpt-2.gguf
│   ├── ggml-vocab-gpt-2.gguf.inp
│   ├── ggml-vocab-gpt-2.gguf.out
│   ├── ggml-vocab-gpt-neox.gguf
│   ├── ggml-vocab-llama-bpe.gguf
│   ├── ggml-vocab-llama-bpe.gguf.inp
│   ├── ggml-vocab-llama-bpe.gguf.out
│   ├── ggml-vocab-llama-spm.gguf
│   ├── ggml-vocab-llama-spm.gguf.inp
│   ├── ggml-vocab-llama-spm.gguf.out
│   ├── ggml-vocab-mpt.gguf
│   ├── ggml-vocab-mpt.gguf.inp
│   ├── ggml-vocab-mpt.gguf.out
│   ├── ggml-vocab-nomic-bert-moe.gguf
│   ├── ggml-vocab-phi-3.gguf
│   ├── ggml-vocab-phi-3.gguf.inp
│   ├── ggml-vocab-phi-3.gguf.out
│   ├── ggml-vocab-qwen2.gguf
│   ├── ggml-vocab-qwen2.gguf.inp
│   ├── ggml-vocab-qwen2.gguf.out
│   ├── ggml-vocab-refact.gguf
│   ├── ggml-vocab-refact.gguf.inp
│   ├── ggml-vocab-refact.gguf.out
│   ├── ggml-vocab-starcoder.gguf
│   ├── ggml-vocab-starcoder.gguf.inp
│   ├── ggml-vocab-starcoder.gguf.out
│   └── templates/
│       ├── Apertus-8B-Instruct.jinja
│       ├── Apriel-1.6-15b-Thinker-fixed.jinja
│       ├── Bielik-11B-v3.0-Instruct.jinja
│       ├── ByteDance-Seed-OSS.jinja
│       ├── CohereForAI-c4ai-command-r-plus-tool_use.jinja
│       ├── CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
│       ├── GLM-4.6.jinja
│       ├── GLM-4.7-Flash.jinja
│       ├── GigaChat3-10B-A1.8B.jinja
│       ├── GigaChat3.1-10B-A1.8B.jinja
│       ├── HuggingFaceTB-SmolLM3-3B.jinja
│       ├── Kimi-K2-Instruct.jinja
│       ├── Kimi-K2-Thinking.jinja
│       ├── LFM2-8B-A1B.jinja
│       ├── LFM2.5-Instruct.jinja
│       ├── MiMo-VL.jinja
│       ├── MiniMax-M2.jinja
│       ├── Mistral-Small-3.2-24B-Instruct-2506.jinja
│       ├── NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja
│       ├── NVIDIA-Nemotron-Nano-v2.jinja
│       ├── NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
│       ├── NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
│       ├── Qwen-QwQ-32B.jinja
│       ├── Qwen-Qwen2.5-7B-Instruct.jinja
│       ├── Qwen-Qwen3-0.6B.jinja
│       ├── Qwen3-Coder.jinja
│       ├── Qwen3.5-4B.jinja
│       ├── README.md
│       ├── StepFun3.5-Flash.jinja
│       ├── deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
│       ├── deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
│       ├── deepseek-ai-DeepSeek-V3.1.jinja
│       ├── fireworks-ai-llama-3-firefunction-v2.jinja
│       ├── google-gemma-2-2b-it.jinja
│       ├── ibm-granite-granite-3.3-2B-Instruct.jinja
│       ├── llama-cpp-deepseek-r1.jinja
│       ├── llama-cpp-rwkv-world.jinja
│       ├── meetkai-functionary-medium-v3.1.jinja
│       ├── meetkai-functionary-medium-v3.2.jinja
│       ├── meta-llama-Llama-3.1-8B-Instruct.jinja
│       ├── meta-llama-Llama-3.2-3B-Instruct.jinja
│       ├── meta-llama-Llama-3.3-70B-Instruct.jinja
│       ├── microsoft-Phi-3.5-mini-instruct.jinja
│       ├── mistralai-Ministral-3-14B-Reasoning-2512.jinja
│       ├── mistralai-Mistral-Nemo-Instruct-2407.jinja
│       ├── moonshotai-Kimi-K2.jinja
│       ├── openai-gpt-oss-120b.jinja
│       ├── stepfun-ai-Step-3.5-Flash.jinja
│       ├── unsloth-Apriel-1.5.jinja
│       ├── unsloth-mistral-Devstral-Small-2507.jinja
│       └── upstage-Solar-Open-100B.jinja
├── mypy.ini
├── pocs/
│   ├── CMakeLists.txt
│   └── vdot/
│       ├── CMakeLists.txt
│       ├── q8dot.cpp
│       └── vdot.cpp
├── pyproject.toml
├── pyrightconfig.json
├── requirements/
│   ├── requirements-all.txt
│   ├── requirements-compare-llama-bench.txt
│   ├── requirements-convert_hf_to_gguf.txt
│   ├── requirements-convert_hf_to_gguf_update.txt
│   ├── requirements-convert_legacy_llama.txt
│   ├── requirements-convert_llama_ggml_to_gguf.txt
│   ├── requirements-convert_lora_to_gguf.txt
│   ├── requirements-gguf_editor_gui.txt
│   ├── requirements-pydantic.txt
│   ├── requirements-server-bench.txt
│   ├── requirements-test-tokenizer-random.txt
│   └── requirements-tool_bench.txt
├── requirements.txt
├── scripts/
│   ├── apple/
│   │   ├── validate-apps.sh
│   │   ├── validate-ios.sh
│   │   ├── validate-macos.sh
│   │   ├── validate-tvos.sh
│   │   └── validate-visionos.sh
│   ├── bench-models.sh
│   ├── build-info.sh
│   ├── check-requirements.sh
│   ├── compare-commits.sh
│   ├── compare-llama-bench.py
│   ├── compare-logprobs.py
│   ├── create_ops_docs.py
│   ├── debug-test.sh
│   ├── fetch_server_test_models.py
│   ├── gen-authors.sh
│   ├── gen-unicode-data.py
│   ├── get-flags.mk
│   ├── get-hellaswag.sh
│   ├── get-pg.sh
│   ├── get-wikitext-2.sh
│   ├── get-winogrande.sh
│   ├── get_chat_template.py
│   ├── git-bisect-run.sh
│   ├── git-bisect.sh
│   ├── hf.sh
│   ├── hip/
│   │   └── gcn-cdna-vgpr-check.py
│   ├── install-oneapi.bat
│   ├── jinja/
│   │   ├── jinja-tester.py
│   │   └── requirements.txt
│   ├── pr2wt.sh
│   ├── serve-static.js
│   ├── server-bench.py
│   ├── server-test-function-call.py
│   ├── server-test-model.py
│   ├── snapdragon/
│   │   ├── adb/
│   │   │   ├── llama-cli.farf
│   │   │   ├── run-bench.sh
│   │   │   ├── run-cli.sh
│   │   │   ├── run-completion.sh
│   │   │   ├── run-mtmd.sh
│   │   │   └── run-tool.sh
│   │   ├── qdc/
│   │   │   ├── readme.md
│   │   │   ├── requirements.txt
│   │   │   └── tests/
│   │   │       └── test_bench.py
│   │   └── windows/
│   │       ├── run-bench.ps1
│   │       ├── run-cli.ps1
│   │       ├── run-completion.ps1
│   │       ├── run-mtmd.ps1
│   │       ├── run-tool.ps1
│   │       └── setup-build.ps1
│   ├── sync-ggml-am.sh
│   ├── sync-ggml.last
│   ├── sync-ggml.sh
│   ├── sync_vendor.py
│   ├── tool_bench.py
│   ├── tool_bench.sh
│   ├── verify-checksum-models.py
│   └── xxd.cmake
├── src/
│   ├── CMakeLists.txt
│   ├── llama-adapter.cpp
│   ├── llama-adapter.h
│   ├── llama-arch.cpp
│   ├── llama-arch.h
│   ├── llama-batch.cpp
│   ├── llama-batch.h
│   ├── llama-chat.cpp
│   ├── llama-chat.h
│   ├── llama-context.cpp
│   ├── llama-context.h
│   ├── llama-cparams.cpp
│   ├── llama-cparams.h
│   ├── llama-ext.h
│   ├── llama-grammar.cpp
│   ├── llama-grammar.h
│   ├── llama-graph.cpp
│   ├── llama-graph.h
│   ├── llama-hparams.cpp
│   ├── llama-hparams.h
│   ├── llama-impl.cpp
│   ├── llama-impl.h
│   ├── llama-io.cpp
│   ├── llama-io.h
│   ├── llama-kv-cache-iswa.cpp
│   ├── llama-kv-cache-iswa.h
│   ├── llama-kv-cache.cpp
│   ├── llama-kv-cache.h
│   ├── llama-kv-cells.h
│   ├── llama-memory-hybrid-iswa.cpp
│   ├── llama-memory-hybrid-iswa.h
│   ├── llama-memory-hybrid.cpp
│   ├── llama-memory-hybrid.h
│   ├── llama-memory-recurrent.cpp
│   ├── llama-memory-recurrent.h
│   ├── llama-memory.cpp
│   ├── llama-memory.h
│   ├── llama-mmap.cpp
│   ├── llama-mmap.h
│   ├── llama-model-loader.cpp
│   ├── llama-model-loader.h
│   ├── llama-model-saver.cpp
│   ├── llama-model-saver.h
│   ├── llama-model.cpp
│   ├── llama-model.h
│   ├── llama-quant.cpp
│   ├── llama-quant.h
│   ├── llama-sampler.cpp
│   ├── llama-sampler.h
│   ├── llama-vocab.cpp
│   ├── llama-vocab.h
│   ├── llama.cpp
│   ├── models/
│   │   ├── afmoe.cpp
│   │   ├── apertus.cpp
│   │   ├── arcee.cpp
│   │   ├── arctic.cpp
│   │   ├── arwkv7.cpp
│   │   ├── baichuan.cpp
│   │   ├── bailingmoe.cpp
│   │   ├── bailingmoe2.cpp
│   │   ├── bert.cpp
│   │   ├── bitnet.cpp
│   │   ├── bloom.cpp
│   │   ├── chameleon.cpp
│   │   ├── chatglm.cpp
│   │   ├── codeshell.cpp
│   │   ├── cogvlm.cpp
│   │   ├── cohere2-iswa.cpp
│   │   ├── command-r.cpp
│   │   ├── dbrx.cpp
│   │   ├── deci.cpp
│   │   ├── deepseek.cpp
│   │   ├── deepseek2.cpp
│   │   ├── delta-net-base.cpp
│   │   ├── dots1.cpp
│   │   ├── dream.cpp
│   │   ├── ernie4-5-moe.cpp
│   │   ├── ernie4-5.cpp
│   │   ├── eurobert.cpp
│   │   ├── exaone-moe.cpp
│   │   ├── exaone.cpp
│   │   ├── exaone4.cpp
│   │   ├── falcon-h1.cpp
│   │   ├── falcon.cpp
│   │   ├── gemma-embedding.cpp
│   │   ├── gemma.cpp
│   │   ├── gemma2-iswa.cpp
│   │   ├── gemma3.cpp
│   │   ├── gemma3n-iswa.cpp
│   │   ├── glm4-moe.cpp
│   │   ├── glm4.cpp
│   │   ├── gpt2.cpp
│   │   ├── gptneox.cpp
│   │   ├── granite-hybrid.cpp
│   │   ├── granite.cpp
│   │   ├── grok.cpp
│   │   ├── grovemoe.cpp
│   │   ├── hunyuan-dense.cpp
│   │   ├── hunyuan-moe.cpp
│   │   ├── internlm2.cpp
│   │   ├── jais.cpp
│   │   ├── jais2.cpp
│   │   ├── jamba.cpp
│   │   ├── kimi-linear.cpp
│   │   ├── lfm2.cpp
│   │   ├── llada-moe.cpp
│   │   ├── llada.cpp
│   │   ├── llama-iswa.cpp
│   │   ├── llama.cpp
│   │   ├── maincoder.cpp
│   │   ├── mamba-base.cpp
│   │   ├── mamba.cpp
│   │   ├── mimo2-iswa.cpp
│   │   ├── minicpm3.cpp
│   │   ├── minimax-m2.cpp
│   │   ├── mistral3.cpp
│   │   ├── models.h
│   │   ├── modern-bert.cpp
│   │   ├── mpt.cpp
│   │   ├── nemotron-h.cpp
│   │   ├── nemotron.cpp
│   │   ├── neo-bert.cpp
│   │   ├── olmo.cpp
│   │   ├── olmo2.cpp
│   │   ├── olmoe.cpp
│   │   ├── openai-moe-iswa.cpp
│   │   ├── openelm.cpp
│   │   ├── orion.cpp
│   │   ├── paddleocr.cpp
│   │   ├── pangu-embedded.cpp
│   │   ├── phi2.cpp
│   │   ├── phi3.cpp
│   │   ├── plamo.cpp
│   │   ├── plamo2.cpp
│   │   ├── plamo3.cpp
│   │   ├── plm.cpp
│   │   ├── qwen.cpp
│   │   ├── qwen2.cpp
│   │   ├── qwen2moe.cpp
│   │   ├── qwen2vl.cpp
│   │   ├── qwen3.cpp
│   │   ├── qwen35.cpp
│   │   ├── qwen35moe.cpp
│   │   ├── qwen3moe.cpp
│   │   ├── qwen3next.cpp
│   │   ├── qwen3vl-moe.cpp
│   │   ├── qwen3vl.cpp
│   │   ├── refact.cpp
│   │   ├── rnd1.cpp
│   │   ├── rwkv6-base.cpp
│   │   ├── rwkv6.cpp
│   │   ├── rwkv6qwen2.cpp
│   │   ├── rwkv7-base.cpp
│   │   ├── rwkv7.cpp
│   │   ├── seed-oss.cpp
│   │   ├── smallthinker.cpp
│   │   ├── smollm3.cpp
│   │   ├── stablelm.cpp
│   │   ├── starcoder.cpp
│   │   ├── starcoder2.cpp
│   │   ├── step35-iswa.cpp
│   │   ├── t5-dec.cpp
│   │   ├── t5-enc.cpp
│   │   ├── wavtokenizer-dec.cpp
│   │   └── xverse.cpp
│   ├── unicode-data.cpp
│   ├── unicode-data.h
│   ├── unicode.cpp
│   └── unicode.h
├── tests/
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── export-graph-ops.cpp
│   ├── get-model.cpp
│   ├── get-model.h
│   ├── gguf-model-data.cpp
│   ├── gguf-model-data.h
│   ├── peg-parser/
│   │   ├── simple-tokenize.cpp
│   │   ├── simple-tokenize.h
│   │   ├── test-basic.cpp
│   │   ├── test-gbnf-generation.cpp
│   │   ├── test-json-parser.cpp
│   │   ├── test-json-serialization.cpp
│   │   ├── test-python-dict-parser.cpp
│   │   ├── test-unicode.cpp
│   │   └── tests.h
│   ├── run-json-schema-to-grammar.mjs
│   ├── test-alloc.cpp
│   ├── test-arg-parser.cpp
│   ├── test-autorelease.cpp
│   ├── test-backend-ops.cpp
│   ├── test-backend-sampler.cpp
│   ├── test-barrier.cpp
│   ├── test-c.c
│   ├── test-chat-auto-parser.cpp
│   ├── test-chat-peg-parser.cpp
│   ├── test-chat-template.cpp
│   ├── test-chat.cpp
│   ├── test-double-float.cpp
│   ├── test-gbnf-validator.cpp
│   ├── test-gguf-model-data.cpp
│   ├── test-gguf.cpp
│   ├── test-grammar-integration.cpp
│   ├── test-grammar-llguidance.cpp
│   ├── test-grammar-parser.cpp
│   ├── test-jinja.cpp
│   ├── test-json-partial.cpp
│   ├── test-json-schema-to-grammar.cpp
│   ├── test-llama-archs.cpp
│   ├── test-llama-grammar.cpp
│   ├── test-log.cpp
│   ├── test-lora-conversion-inference.sh
│   ├── test-model-load-cancel.cpp
│   ├── test-mtmd-c-api.c
│   ├── test-opt.cpp
│   ├── test-peg-parser.cpp
│   ├── test-quantize-fns.cpp
│   ├── test-quantize-perf.cpp
│   ├── test-quantize-stats.cpp
│   ├── test-reasoning-budget.cpp
│   ├── test-regex-partial.cpp
│   ├── test-rope.cpp
│   ├── test-sampling.cpp
│   ├── test-state-restore-fragmented.cpp
│   ├── test-thread-safety.cpp
│   ├── test-tokenizer-0.cpp
│   ├── test-tokenizer-0.py
│   ├── test-tokenizer-0.sh
│   ├── test-tokenizer-1-bpe.cpp
│   ├── test-tokenizer-1-spm.cpp
│   ├── test-tokenizer-random.py
│   ├── test-tokenizers-repo.sh
│   └── testing.h
├── tools/
│   ├── CMakeLists.txt
│   ├── batched-bench/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── batched-bench.cpp
│   ├── cli/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── cli.cpp
│   ├── completion/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── completion.cpp
│   ├── cvector-generator/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── completions.txt
│   │   ├── cvector-generator.cpp
│   │   ├── mean.hpp
│   │   ├── negative.txt
│   │   ├── pca.hpp
│   │   └── positive.txt
│   ├── export-lora/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── export-lora.cpp
│   ├── fit-params/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── fit-params.cpp
│   ├── gguf-split/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── gguf-split.cpp
│   │   └── tests.sh
│   ├── imatrix/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── imatrix.cpp
│   ├── llama-bench/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── llama-bench.cpp
│   ├── mtmd/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── clip-graph.h
│   │   ├── clip-impl.h
│   │   ├── clip-model.h
│   │   ├── clip.cpp
│   │   ├── clip.h
│   │   ├── debug/
│   │   │   ├── mtmd-debug.cpp
│   │   │   ├── mtmd-debug.h
│   │   │   └── mtmd-debug.md
│   │   ├── deprecation-warning.cpp
│   │   ├── legacy-models/
│   │   │   ├── convert_image_encoder_to_gguf.py
│   │   │   ├── glmedge-convert-image-encoder-to-gguf.py
│   │   │   ├── glmedge-surgery.py
│   │   │   ├── llava_surgery.py
│   │   │   ├── llava_surgery_v2.py
│   │   │   ├── minicpmv-convert-image-encoder-to-gguf.py
│   │   │   └── minicpmv-surgery.py
│   │   ├── models/
│   │   │   ├── cogvlm.cpp
│   │   │   ├── conformer.cpp
│   │   │   ├── deepseekocr.cpp
│   │   │   ├── glm4v.cpp
│   │   │   ├── internvl.cpp
│   │   │   ├── kimik25.cpp
│   │   │   ├── kimivl.cpp
│   │   │   ├── llama4.cpp
│   │   │   ├── llava.cpp
│   │   │   ├── minicpmv.cpp
│   │   │   ├── mobilenetv5.cpp
│   │   │   ├── models.h
│   │   │   ├── nemotron-v2-vl.cpp
│   │   │   ├── paddleocr.cpp
│   │   │   ├── pixtral.cpp
│   │   │   ├── qwen2vl.cpp
│   │   │   ├── qwen3vl.cpp
│   │   │   ├── siglip.cpp
│   │   │   ├── whisper-enc.cpp
│   │   │   └── youtuvl.cpp
│   │   ├── mtmd-audio.cpp
│   │   ├── mtmd-audio.h
│   │   ├── mtmd-cli.cpp
│   │   ├── mtmd-helper.cpp
│   │   ├── mtmd-helper.h
│   │   ├── mtmd-image.cpp
│   │   ├── mtmd-image.h
│   │   ├── mtmd.cpp
│   │   ├── mtmd.h
│   │   ├── requirements.txt
│   │   ├── tests/
│   │   │   ├── test-1-extracted.md
│   │   │   ├── test-1-extracted.txt
│   │   │   ├── test-deepseek-ocr.py
│   │   │   └── tests-requirements.txt
│   │   └── tests.sh
│   ├── parser/
│   │   ├── CMakeLists.txt
│   │   ├── debug-template-parser.cpp
│   │   └── template-analysis.cpp
│   ├── perplexity/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── perplexity.cpp
│   ├── quantize/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── quantize.cpp
│   │   └── tests.sh
│   ├── results/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── results.cpp
│   ├── rpc/
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   └── rpc-server.cpp
│   ├── server/
│   │   ├── CMakeLists.txt
│   │   ├── README-dev.md
│   │   ├── README.md
│   │   ├── bench/
│   │   │   ├── README.md
│   │   │   ├── bench.py
│   │   │   ├── prometheus.yml
│   │   │   ├── requirements.txt
│   │   │   └── script.js
│   │   ├── chat-llama2.sh
│   │   ├── chat.mjs
│   │   ├── chat.sh
│   │   ├── public/
│   │   │   ├── bundle.css
│   │   │   ├── bundle.js
│   │   │   ├── index.html
│   │   │   └── loading.html
│   │   ├── public_legacy/
│   │   │   ├── colorthemes.css
│   │   │   ├── completion.js
│   │   │   ├── index-new.html
│   │   │   ├── index.html
│   │   │   ├── index.js
│   │   │   ├── json-schema-to-grammar.mjs
│   │   │   ├── loading.html
│   │   │   ├── prompt-formats.js
│   │   │   ├── style.css
│   │   │   ├── system-prompts.js
│   │   │   ├── theme-beeninorder.css
│   │   │   ├── theme-ketivah.css
│   │   │   ├── theme-mangotango.css
│   │   │   ├── theme-playground.css
│   │   │   ├── theme-polarnight.css
│   │   │   └── theme-snowstorm.css
│   │   ├── public_simplechat/
│   │   │   ├── datautils.mjs
│   │   │   ├── index.html
│   │   │   ├── readme.md
│   │   │   ├── simplechat.css
│   │   │   ├── simplechat.js
│   │   │   └── ui.mjs
│   │   ├── server-common.cpp
│   │   ├── server-common.h
│   │   ├── server-context.cpp
│   │   ├── server-context.h
│   │   ├── server-cors-proxy.h
│   │   ├── server-http.cpp
│   │   ├── server-http.h
│   │   ├── server-models.cpp
│   │   ├── server-models.h
│   │   ├── server-queue.cpp
│   │   ├── server-queue.h
│   │   ├── server-task.cpp
│   │   ├── server-task.h
│   │   ├── server-tools.cpp
│   │   ├── server-tools.h
│   │   ├── server.cpp
│   │   ├── tests/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── conftest.py
│   │   │   ├── pytest.ini
│   │   │   ├── requirements.txt
│   │   │   ├── tests.sh
│   │   │   ├── unit/
│   │   │   │   ├── test_basic.py
│   │   │   │   ├── test_chat_completion.py
│   │   │   │   ├── test_compat_anthropic.py
│   │   │   │   ├── test_compat_oai_responses.py
│   │   │   │   ├── test_completion.py
│   │   │   │   ├── test_ctx_shift.py
│   │   │   │   ├── test_embedding.py
│   │   │   │   ├── test_infill.py
│   │   │   │   ├── test_lora.py
│   │   │   │   ├── test_proxy.py
│   │   │   │   ├── test_rerank.py
│   │   │   │   ├── test_router.py
│   │   │   │   ├── test_security.py
│   │   │   │   ├── test_sleep.py
│   │   │   │   ├── test_slot_save.py
│   │   │   │   ├── test_speculative.py
│   │   │   │   ├── test_template.py
│   │   │   │   ├── test_tokenize.py
│   │   │   │   ├── test_tool_call.py
│   │   │   │   └── test_vision_api.py
│   │   │   └── utils.py
│   │   ├── themes/
│   │   │   ├── README.md
│   │   │   ├── buttons-top/
│   │   │   │   ├── README.md
│   │   │   │   └── index.html
│   │   │   └── wild/
│   │   │       ├── README.md
│   │   │       └── index.html
│   │   └── webui/
│   │       ├── .gitignore
│   │       ├── .npmrc
│   │       ├── .prettierignore
│   │       ├── .prettierrc
│   │       ├── .storybook/
│   │       │   ├── ModeWatcherDecorator.svelte
│   │       │   ├── TooltipProviderDecorator.svelte
│   │       │   ├── main.ts
│   │       │   ├── preview.ts
│   │       │   └── vitest.setup.ts
│   │       ├── README.md
│   │       ├── components.json
│   │       ├── docs/
│   │       │   ├── architecture/
│   │       │   │   ├── high-level-architecture-simplified.md
│   │       │   │   └── high-level-architecture.md
│   │       │   └── flows/
│   │       │       ├── chat-flow.md
│   │       │       ├── conversations-flow.md
│   │       │       ├── data-flow-simplified-model-mode.md
│   │       │       ├── data-flow-simplified-router-mode.md
│   │       │       ├── database-flow.md
│   │       │       ├── mcp-flow.md
│   │       │       ├── models-flow.md
│   │       │       ├── server-flow.md
│   │       │       └── settings-flow.md
│   │       ├── eslint.config.js
│   │       ├── package.json
│   │       ├── playwright.config.ts
│   │       ├── scripts/
│   │       │   ├── dev.sh
│   │       │   ├── install-git-hooks.sh
│   │       │   └── post-build.sh
│   │       ├── src/
│   │       │   ├── app.css
│   │       │   ├── app.d.ts
│   │       │   ├── app.html
│   │       │   ├── lib/
│   │       │   │   ├── actions/
│   │       │   │   │   └── fade-in-view.svelte.ts
│   │       │   │   ├── components/
│   │       │   │   │   ├── app/
│   │       │   │   │   │   ├── actions/
│   │       │   │   │   │   │   ├── ActionIcon.svelte
│   │       │   │   │   │   │   ├── ActionIconCopyToClipboard.svelte
│   │       │   │   │   │   │   ├── ActionIconRemove.svelte
│   │       │   │   │   │   │   ├── ActionIconsCodeBlock.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── badges/
│   │       │   │   │   │   │   ├── BadgeChatStatistic.svelte
│   │       │   │   │   │   │   ├── BadgeInfo.svelte
│   │       │   │   │   │   │   ├── BadgeModality.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── chat/
│   │       │   │   │   │   │   ├── ChatAttachments/
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpPrompt.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpResource.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentMcpResources.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentPreview.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentThumbnailFile.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentThumbnailImage.svelte
│   │       │   │   │   │   │   │   ├── ChatAttachmentsList.svelte
│   │       │   │   │   │   │   │   └── ChatAttachmentsViewAll.svelte
│   │       │   │   │   │   │   ├── ChatForm/
│   │       │   │   │   │   │   │   ├── ChatForm.svelte
│   │       │   │   │   │   │   │   ├── ChatFormActions/
│   │       │   │   │   │   │   │   │   ├── ChatFormActionAttachmentsDropdown.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionAttachmentsSheet.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionRecord.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormActionSubmit.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormActions.svelte
│   │       │   │   │   │   │   │   ├── ChatFormFileInputInvisible.svelte
│   │       │   │   │   │   │   │   ├── ChatFormHelperText.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPicker/
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerItemHeader.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerList.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPickerListItem.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormPickerListItemSkeleton.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPickerPopover.svelte
│   │       │   │   │   │   │   │   ├── ChatFormPromptPicker/
│   │       │   │   │   │   │   │   │   ├── ChatFormPromptPicker.svelte
│   │       │   │   │   │   │   │   │   ├── ChatFormPromptPickerArgumentForm.svelte
│   │       │   │   │   │   │   │   │   └── ChatFormPromptPickerArgumentInput.svelte
│   │       │   │   │   │   │   │   ├── ChatFormResourcePicker/
│   │       │   │   │   │   │   │   │   └── ChatFormResourcePicker.svelte
│   │       │   │   │   │   │   │   └── ChatFormTextarea.svelte
│   │       │   │   │   │   │   ├── ChatMessages/
│   │       │   │   │   │   │   │   ├── ChatMessage.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageActions.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageAgenticContent.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageAssistant.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageBranchingControls.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageEditForm.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageMcpPrompt.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageMcpPromptContent.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageStatistics.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageSystem.svelte
│   │       │   │   │   │   │   │   ├── ChatMessageUser.svelte
│   │       │   │   │   │   │   │   └── ChatMessages.svelte
│   │       │   │   │   │   │   ├── ChatScreen/
│   │       │   │   │   │   │   │   ├── ChatScreen.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenDragOverlay.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenForm.svelte
│   │       │   │   │   │   │   │   ├── ChatScreenHeader.svelte
│   │       │   │   │   │   │   │   └── ChatScreenProcessingInfo.svelte
│   │       │   │   │   │   │   ├── ChatSettings/
│   │       │   │   │   │   │   │   ├── ChatSettings.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsFields.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsFooter.svelte
│   │       │   │   │   │   │   │   ├── ChatSettingsImportExportTab.svelte
│   │       │   │   │   │   │   │   └── ChatSettingsParameterSourceIndicator.svelte
│   │       │   │   │   │   │   ├── ChatSidebar/
│   │       │   │   │   │   │   │   ├── ChatSidebar.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarActions.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarConversationItem.svelte
│   │       │   │   │   │   │   │   ├── ChatSidebarSearch.svelte
│   │       │   │   │   │   │   │   └── handle-mobile-sidebar-item-click.ts
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── content/
│   │       │   │   │   │   │   ├── CollapsibleContentBlock.svelte
│   │       │   │   │   │   │   ├── MarkdownContent.svelte
│   │       │   │   │   │   │   ├── SyntaxHighlightedCode.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── dialogs/
│   │       │   │   │   │   │   ├── DialogChatAttachmentPreview.svelte
│   │       │   │   │   │   │   ├── DialogChatAttachmentsViewAll.svelte
│   │       │   │   │   │   │   ├── DialogChatError.svelte
│   │       │   │   │   │   │   ├── DialogChatSettings.svelte
│   │       │   │   │   │   │   ├── DialogCodePreview.svelte
│   │       │   │   │   │   │   ├── DialogConfirmation.svelte
│   │       │   │   │   │   │   ├── DialogConversationSelection.svelte
│   │       │   │   │   │   │   ├── DialogConversationTitleUpdate.svelte
│   │       │   │   │   │   │   ├── DialogEmptyFileAlert.svelte
│   │       │   │   │   │   │   ├── DialogMcpResourcePreview.svelte
│   │       │   │   │   │   │   ├── DialogMcpResources.svelte
│   │       │   │   │   │   │   ├── DialogMcpServersSettings.svelte
│   │       │   │   │   │   │   ├── DialogModelInformation.svelte
│   │       │   │   │   │   │   ├── DialogModelNotAvailable.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── forms/
│   │       │   │   │   │   │   ├── InputWithSuggestions.svelte
│   │       │   │   │   │   │   ├── KeyValuePairs.svelte
│   │       │   │   │   │   │   ├── SearchInput.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── index.ts
│   │       │   │   │   │   ├── mcp/
│   │       │   │   │   │   │   ├── McpActiveServersAvatars.svelte
│   │       │   │   │   │   │   ├── McpCapabilitiesBadges.svelte
│   │       │   │   │   │   │   ├── McpConnectionLogs.svelte
│   │       │   │   │   │   │   ├── McpLogo.svelte
│   │       │   │   │   │   │   ├── McpResourceBrowser/
│   │       │   │   │   │   │   │   ├── McpResourceBrowser.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserEmptyState.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserHeader.svelte
│   │       │   │   │   │   │   │   ├── McpResourceBrowserServerItem.svelte
│   │       │   │   │   │   │   │   └── mcp-resource-browser.ts
│   │       │   │   │   │   │   ├── McpResourcePreview.svelte
│   │       │   │   │   │   │   ├── McpResourceTemplateForm.svelte
│   │       │   │   │   │   │   ├── McpServerCard/
│   │       │   │   │   │   │   │   ├── McpServerCard.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardActions.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardDeleteDialog.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardEditForm.svelte
│   │       │   │   │   │   │   │   ├── McpServerCardHeader.svelte
│   │       │   │   │   │   │   │   └── McpServerCardToolsList.svelte
│   │       │   │   │   │   │   ├── McpServerCardSkeleton.svelte
│   │       │   │   │   │   │   ├── McpServerForm.svelte
│   │       │   │   │   │   │   ├── McpServerInfo.svelte
│   │       │   │   │   │   │   ├── McpServersSelector.svelte
│   │       │   │   │   │   │   ├── McpServersSettings.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── misc/
│   │       │   │   │   │   │   ├── ConversationSelection.svelte
│   │       │   │   │   │   │   ├── HorizontalScrollCarousel.svelte
│   │       │   │   │   │   │   ├── KeyboardShortcutInfo.svelte
│   │       │   │   │   │   │   ├── TruncatedText.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   ├── models/
│   │       │   │   │   │   │   ├── ModelBadge.svelte
│   │       │   │   │   │   │   ├── ModelId.svelte
│   │       │   │   │   │   │   ├── ModelsSelector.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorList.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorOption.svelte
│   │       │   │   │   │   │   ├── ModelsSelectorSheet.svelte
│   │       │   │   │   │   │   ├── index.ts
│   │       │   │   │   │   │   └── utils.ts
│   │       │   │   │   │   ├── navigation/
│   │       │   │   │   │   │   ├── DropdownMenuActions.svelte
│   │       │   │   │   │   │   ├── DropdownMenuSearchable.svelte
│   │       │   │   │   │   │   └── index.ts
│   │       │   │   │   │   └── server/
│   │       │   │   │   │       ├── ServerErrorSplash.svelte
│   │       │   │   │   │       ├── ServerLoadingSplash.svelte
│   │       │   │   │   │       ├── ServerStatus.svelte
│   │       │   │   │   │       └── index.ts
│   │       │   │   │   └── ui/
│   │       │   │   │       ├── alert/
│   │       │   │   │       │   ├── alert-description.svelte
│   │       │   │   │       │   ├── alert-title.svelte
│   │       │   │   │       │   ├── alert.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── alert-dialog/
│   │       │   │   │       │   ├── alert-dialog-action.svelte
│   │       │   │   │       │   ├── alert-dialog-cancel.svelte
│   │       │   │   │       │   ├── alert-dialog-content.svelte
│   │       │   │   │       │   ├── alert-dialog-description.svelte
│   │       │   │   │       │   ├── alert-dialog-footer.svelte
│   │       │   │   │       │   ├── alert-dialog-header.svelte
│   │       │   │   │       │   ├── alert-dialog-overlay.svelte
│   │       │   │   │       │   ├── alert-dialog-title.svelte
│   │       │   │   │       │   ├── alert-dialog-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── badge/
│   │       │   │   │       │   ├── badge.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── button/
│   │       │   │   │       │   ├── button.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── card/
│   │       │   │   │       │   ├── card-action.svelte
│   │       │   │   │       │   ├── card-content.svelte
│   │       │   │   │       │   ├── card-description.svelte
│   │       │   │   │       │   ├── card-footer.svelte
│   │       │   │   │       │   ├── card-header.svelte
│   │       │   │   │       │   ├── card-title.svelte
│   │       │   │   │       │   ├── card.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── checkbox/
│   │       │   │   │       │   ├── checkbox.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── collapsible/
│   │       │   │   │       │   ├── collapsible-content.svelte
│   │       │   │   │       │   ├── collapsible-trigger.svelte
│   │       │   │   │       │   ├── collapsible.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── dialog/
│   │       │   │   │       │   ├── dialog-close.svelte
│   │       │   │   │       │   ├── dialog-content.svelte
│   │       │   │   │       │   ├── dialog-description.svelte
│   │       │   │   │       │   ├── dialog-footer.svelte
│   │       │   │   │       │   ├── dialog-header.svelte
│   │       │   │   │       │   ├── dialog-overlay.svelte
│   │       │   │   │       │   ├── dialog-title.svelte
│   │       │   │   │       │   ├── dialog-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── dropdown-menu/
│   │       │   │   │       │   ├── dropdown-menu-checkbox-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-content.svelte
│   │       │   │   │       │   ├── dropdown-menu-group-heading.svelte
│   │       │   │   │       │   ├── dropdown-menu-group.svelte
│   │       │   │   │       │   ├── dropdown-menu-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-label.svelte
│   │       │   │   │       │   ├── dropdown-menu-radio-group.svelte
│   │       │   │   │       │   ├── dropdown-menu-radio-item.svelte
│   │       │   │   │       │   ├── dropdown-menu-separator.svelte
│   │       │   │   │       │   ├── dropdown-menu-shortcut.svelte
│   │       │   │   │       │   ├── dropdown-menu-sub-content.svelte
│   │       │   │   │       │   ├── dropdown-menu-sub-trigger.svelte
│   │       │   │   │       │   ├── dropdown-menu-trigger.svelte
│   │       │   │   │       │   └── index.ts
│   │       │   │   │       ├── input/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── input.svelte
│   │       │   │   │       ├── label/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── label.svelte
│   │       │   │   │       ├── popover/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── popover-close.svelte
│   │       │   │   │       │   ├── popover-content.svelte
│   │       │   │   │       │   ├── popover-portal.svelte
│   │       │   │   │       │   ├── popover-trigger.svelte
│   │       │   │   │       │   └── popover.svelte
│   │       │   │   │       ├── scroll-area/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── scroll-area-scrollbar.svelte
│   │       │   │   │       │   └── scroll-area.svelte
│   │       │   │   │       ├── select/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── select-content.svelte
│   │       │   │   │       │   ├── select-group-heading.svelte
│   │       │   │   │       │   ├── select-group.svelte
│   │       │   │   │       │   ├── select-item.svelte
│   │       │   │   │       │   ├── select-label.svelte
│   │       │   │   │       │   ├── select-scroll-down-button.svelte
│   │       │   │   │       │   ├── select-scroll-up-button.svelte
│   │       │   │   │       │   ├── select-separator.svelte
│   │       │   │   │       │   └── select-trigger.svelte
│   │       │   │   │       ├── separator/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── separator.svelte
│   │       │   │   │       ├── sheet/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── sheet-close.svelte
│   │       │   │   │       │   ├── sheet-content.svelte
│   │       │   │   │       │   ├── sheet-description.svelte
│   │       │   │   │       │   ├── sheet-footer.svelte
│   │       │   │   │       │   ├── sheet-header.svelte
│   │       │   │   │       │   ├── sheet-overlay.svelte
│   │       │   │   │       │   ├── sheet-title.svelte
│   │       │   │   │       │   └── sheet-trigger.svelte
│   │       │   │   │       ├── sidebar/
│   │       │   │   │       │   ├── constants.ts
│   │       │   │   │       │   ├── context.svelte.ts
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── sidebar-content.svelte
│   │       │   │   │       │   ├── sidebar-footer.svelte
│   │       │   │   │       │   ├── sidebar-group-action.svelte
│   │       │   │   │       │   ├── sidebar-group-content.svelte
│   │       │   │   │       │   ├── sidebar-group-label.svelte
│   │       │   │   │       │   ├── sidebar-group.svelte
│   │       │   │   │       │   ├── sidebar-header.svelte
│   │       │   │   │       │   ├── sidebar-input.svelte
│   │       │   │   │       │   ├── sidebar-inset.svelte
│   │       │   │   │       │   ├── sidebar-menu-action.svelte
│   │       │   │   │       │   ├── sidebar-menu-badge.svelte
│   │       │   │   │       │   ├── sidebar-menu-button.svelte
│   │       │   │   │       │   ├── sidebar-menu-item.svelte
│   │       │   │   │       │   ├── sidebar-menu-skeleton.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub-button.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub-item.svelte
│   │       │   │   │       │   ├── sidebar-menu-sub.svelte
│   │       │   │   │       │   ├── sidebar-menu.svelte
│   │       │   │   │       │   ├── sidebar-provider.svelte
│   │       │   │   │       │   ├── sidebar-rail.svelte
│   │       │   │   │       │   ├── sidebar-separator.svelte
│   │       │   │   │       │   ├── sidebar-trigger.svelte
│   │       │   │   │       │   └── sidebar.svelte
│   │       │   │   │       ├── skeleton/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── skeleton.svelte
│   │       │   │   │       ├── switch/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── switch.svelte
│   │       │   │   │       ├── table/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── table-body.svelte
│   │       │   │   │       │   ├── table-caption.svelte
│   │       │   │   │       │   ├── table-cell.svelte
│   │       │   │   │       │   ├── table-footer.svelte
│   │       │   │   │       │   ├── table-head.svelte
│   │       │   │   │       │   ├── table-header.svelte
│   │       │   │   │       │   ├── table-row.svelte
│   │       │   │   │       │   └── table.svelte
│   │       │   │   │       ├── textarea/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   └── textarea.svelte
│   │       │   │   │       ├── tooltip/
│   │       │   │   │       │   ├── index.ts
│   │       │   │   │       │   ├── tooltip-content.svelte
│   │       │   │   │       │   └── tooltip-trigger.svelte
│   │       │   │   │       └── utils.ts
│   │       │   │   ├── constants/
│   │       │   │   │   ├── agentic.ts
│   │       │   │   │   ├── api-endpoints.ts
│   │       │   │   │   ├── attachment-labels.ts
│   │       │   │   │   ├── auto-scroll.ts
│   │       │   │   │   ├── binary-detection.ts
│   │       │   │   │   ├── cache.ts
│   │       │   │   │   ├── chat-form.ts
│   │       │   │   │   ├── code-blocks.ts
│   │       │   │   │   ├── code.ts
│   │       │   │   │   ├── context-keys.ts
│   │       │   │   │   ├── css-classes.ts
│   │       │   │   │   ├── favicon.ts
│   │       │   │   │   ├── floating-ui-constraints.ts
│   │       │   │   │   ├── formatters.ts
│   │       │   │   │   ├── icons.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── key-value-pairs.ts
│   │       │   │   │   ├── latex-protection.ts
│   │       │   │   │   ├── literal-html.ts
│   │       │   │   │   ├── localstorage-keys.ts
│   │       │   │   │   ├── markdown.ts
│   │       │   │   │   ├── max-bundle-size.ts
│   │       │   │   │   ├── mcp-form.ts
│   │       │   │   │   ├── mcp-resource.ts
│   │       │   │   │   ├── mcp.ts
│   │       │   │   │   ├── message-export.ts
│   │       │   │   │   ├── model-id.ts
│   │       │   │   │   ├── precision.ts
│   │       │   │   │   ├── processing-info.ts
│   │       │   │   │   ├── settings-config.ts
│   │       │   │   │   ├── settings-fields.ts
│   │       │   │   │   ├── settings-keys.ts
│   │       │   │   │   ├── settings-sections.ts
│   │       │   │   │   ├── supported-file-types.ts
│   │       │   │   │   ├── table-html-restorer.ts
│   │       │   │   │   ├── tooltip-config.ts
│   │       │   │   │   ├── ui.ts
│   │       │   │   │   ├── uri-template.ts
│   │       │   │   │   └── viewport.ts
│   │       │   │   ├── contexts/
│   │       │   │   │   ├── chat-actions.context.ts
│   │       │   │   │   ├── chat-settings-dialog.context.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   └── message-edit.context.ts
│   │       │   │   ├── enums/
│   │       │   │   │   ├── agentic.ts
│   │       │   │   │   ├── attachment.ts
│   │       │   │   │   ├── chat.ts
│   │       │   │   │   ├── files.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── keyboard.ts
│   │       │   │   │   ├── mcp.ts
│   │       │   │   │   ├── model.ts
│   │       │   │   │   ├── server.ts
│   │       │   │   │   ├── settings.ts
│   │       │   │   │   └── ui.ts
│   │       │   │   ├── hooks/
│   │       │   │   │   ├── is-mobile.svelte.ts
│   │       │   │   │   ├── use-auto-scroll.svelte.ts
│   │       │   │   │   └── use-processing-state.svelte.ts
│   │       │   │   ├── markdown/
│   │       │   │   │   ├── enhance-code-blocks.ts
│   │       │   │   │   ├── enhance-links.ts
│   │       │   │   │   ├── literal-html.ts
│   │       │   │   │   ├── resolve-attachment-images.ts
│   │       │   │   │   └── table-html-restorer.ts
│   │       │   │   ├── services/
│   │       │   │   │   ├── chat.service.ts
│   │       │   │   │   ├── database.service.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── mcp.service.ts
│   │       │   │   │   ├── models.service.ts
│   │       │   │   │   ├── parameter-sync.service.spec.ts
│   │       │   │   │   ├── parameter-sync.service.ts
│   │       │   │   │   └── props.service.ts
│   │       │   │   ├── stores/
│   │       │   │   │   ├── agentic.svelte.ts
│   │       │   │   │   ├── chat.svelte.ts
│   │       │   │   │   ├── conversations.svelte.ts
│   │       │   │   │   ├── mcp-resources.svelte.ts
│   │       │   │   │   ├── mcp.svelte.ts
│   │       │   │   │   ├── models.svelte.ts
│   │       │   │   │   ├── persisted.svelte.ts
│   │       │   │   │   ├── server.svelte.ts
│   │       │   │   │   └── settings.svelte.ts
│   │       │   │   ├── types/
│   │       │   │   │   ├── agentic.d.ts
│   │       │   │   │   ├── api.d.ts
│   │       │   │   │   ├── chat.d.ts
│   │       │   │   │   ├── common.d.ts
│   │       │   │   │   ├── database.d.ts
│   │       │   │   │   ├── index.ts
│   │       │   │   │   ├── mcp.d.ts
│   │       │   │   │   ├── models.d.ts
│   │       │   │   │   └── settings.d.ts
│   │       │   │   └── utils/
│   │       │   │       ├── abort.ts
│   │       │   │       ├── agentic.ts
│   │       │   │       ├── api-fetch.ts
│   │       │   │       ├── api-headers.ts
│   │       │   │       ├── api-key-validation.ts
│   │       │   │       ├── attachment-display.ts
│   │       │   │       ├── attachment-type.ts
│   │       │   │       ├── audio-recording.ts
│   │       │   │       ├── autoresize-textarea.ts
│   │       │   │       ├── branching.ts
│   │       │   │       ├── browser-only.ts
│   │       │   │       ├── cache-ttl.ts
│   │       │   │       ├── clipboard.ts
│   │       │   │       ├── code.ts
│   │       │   │       ├── config-helpers.ts
│   │       │   │       ├── conversation-utils.ts
│   │       │   │       ├── convert-files-to-extra.ts
│   │       │   │       ├── cors-proxy.ts
│   │       │   │       ├── data-url.ts
│   │       │   │       ├── debounce.ts
│   │       │   │       ├── favicon.ts
│   │       │   │       ├── file-preview.ts
│   │       │   │       ├── file-type.ts
│   │       │   │       ├── formatters.ts
│   │       │   │       ├── headers.ts
│   │       │   │       ├── image-error-fallback.ts
│   │       │   │       ├── index.ts
│   │       │   │       ├── is-ime-composing.ts
│   │       │   │       ├── latex-protection.ts
│   │       │   │       ├── legacy-migration.ts
│   │       │   │       ├── mcp.ts
│   │       │   │       ├── modality-file-validation.ts
│   │       │   │       ├── model-names.ts
│   │       │   │       ├── pdf-processing.ts
│   │       │   │       ├── portal-to-body.ts
│   │       │   │       ├── precision.ts
│   │       │   │       ├── process-uploaded-files.ts
│   │       │   │       ├── sanitize.ts
│   │       │   │       ├── svg-to-png.ts
│   │       │   │       ├── syntax-highlight-language.ts
│   │       │   │       ├── text-files.ts
│   │       │   │       ├── text.ts
│   │       │   │       ├── uri-template.ts
│   │       │   │       ├── uuid.ts
│   │       │   │       └── webp-to-png.ts
│   │       │   ├── routes/
│   │       │   │   ├── +error.svelte
│   │       │   │   ├── +layout.svelte
│   │       │   │   ├── +page.svelte
│   │       │   │   ├── +page.ts
│   │       │   │   └── chat/
│   │       │   │       └── [id]/
│   │       │   │           ├── +page.svelte
│   │       │   │           └── +page.ts
│   │       │   └── styles/
│   │       │       └── katex-custom.scss
│   │       ├── static/
│   │       │   └── loading.html
│   │       ├── svelte.config.js
│   │       ├── tests/
│   │       │   ├── client/
│   │       │   │   ├── components/
│   │       │   │   │   └── TestWrapper.svelte
│   │       │   │   └── page.svelte.test.ts
│   │       │   ├── e2e/
│   │       │   │   └── demo.test.ts
│   │       │   ├── stories/
│   │       │   │   ├── ChatMessage.stories.svelte
│   │       │   │   ├── ChatScreenForm.stories.svelte
│   │       │   │   ├── ChatSettings.stories.svelte
│   │       │   │   ├── ChatSidebar.stories.svelte
│   │       │   │   ├── Introduction.mdx
│   │       │   │   ├── MarkdownContent.stories.svelte
│   │       │   │   └── fixtures/
│   │       │   │       ├── ai-tutorial.ts
│   │       │   │       ├── api-docs.ts
│   │       │   │       ├── blog-post.ts
│   │       │   │       ├── data-analysis.ts
│   │       │   │       ├── empty.ts
│   │       │   │       ├── math-formulas.ts
│   │       │   │       ├── readme.ts
│   │       │   │       └── storybook-mocks.ts
│   │       │   └── unit/
│   │       │       ├── agentic-sections.test.ts
│   │       │       ├── agentic-strip.test.ts
│   │       │       ├── clipboard.test.ts
│   │       │       ├── latex-protection.test.ts
│   │       │       ├── model-id-parser.test.ts
│   │       │       ├── model-names.test.ts
│   │       │       ├── reasoning-context.test.ts
│   │       │       └── uri-template.test.ts
│   │       ├── tsconfig.json
│   │       ├── vite.config.ts
│   │       └── vitest-setup-client.ts
│   ├── tokenize/
│   │   ├── CMakeLists.txt
│   │   └── tokenize.cpp
│   └── tts/
│       ├── CMakeLists.txt
│       ├── README.md
│       ├── convert_pt_to_hf.py
│       ├── tts-outetts.py
│       └── tts.cpp
├── ty.toml
└── vendor/
    ├── cpp-httplib/
    │   ├── CMakeLists.txt
    │   ├── LICENSE
    │   ├── httplib.cpp
    │   └── httplib.h
    ├── miniaudio/
    │   └── miniaudio.h
    ├── nlohmann/
    │   ├── json.hpp
    │   └── json_fwd.hpp
    ├── sheredom/
    │   └── subprocess.h
    └── stb/
        └── stb_image.h

Copy disabled (too large) Download .json

Condensed preview — 2536 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (74,657K chars).

[
  {
    "path": ".clang-format",
    "chars": 4961,
    "preview": "---\nLanguage:        Cpp\nAlignAfterOpenBracket: Align\nAlignArrayOfStructures: Left\nAlignConsecutiveAssignments: AcrossCo"
  },
  {
    "path": ".clang-tidy",
    "chars": 931,
    "preview": "---\nChecks: >\n    bugprone-*,\n    -bugprone-easily-swappable-parameters,\n    -bugprone-implicit-widening-of-multiplicati"
  },
  {
    "path": ".devops/cann.Dockerfile",
    "chars": 4837,
    "preview": "# ==============================================================================\n# ARGUMENTS\n# ========================="
  },
  {
    "path": ".devops/cpu.Dockerfile",
    "chars": 2212,
    "preview": "ARG UBUNTU_VERSION=24.04\n\nFROM ubuntu:$UBUNTU_VERSION AS build\n\nARG TARGETARCH\n\nRUN apt-get update && \\\n    apt-get inst"
  },
  {
    "path": ".devops/cuda-new.Dockerfile",
    "chars": 2693,
    "preview": "ARG UBUNTU_VERSION=24.04\n# This needs to generally match the container host's environment.\nARG CUDA_VERSION=13.1.1\n# Tar"
  },
  {
    "path": ".devops/cuda.Dockerfile",
    "chars": 2693,
    "preview": "ARG UBUNTU_VERSION=24.04\n# This needs to generally match the container host's environment.\nARG CUDA_VERSION=12.8.1\n# Tar"
  },
  {
    "path": ".devops/intel.Dockerfile",
    "chars": 4068,
    "preview": "ARG ONEAPI_VERSION=2025.3.2-0-devel-ubuntu24.04\n\n## Build Image\n\nFROM intel/deep-learning-essentials:$ONEAPI_VERSION AS "
  },
  {
    "path": ".devops/llama-cli-cann.Dockerfile",
    "chars": 2383,
    "preview": "ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10\n\nFROM ascendai/cann:$ASCEND_VERSION AS build\n\nWORKDIR /app\n\nCOPY . ."
  },
  {
    "path": ".devops/llama-cpp-cuda.srpm.spec",
    "chars": 2724,
    "preview": "# SRPM for building from source and packaging an RPM for RPM-based distros.\n# https://docs.fedoraproject.org/en-US/quick"
  },
  {
    "path": ".devops/llama-cpp.srpm.spec",
    "chars": 2781,
    "preview": "# SRPM for building from source and packaging an RPM for RPM-based distros.\n# https://docs.fedoraproject.org/en-US/quick"
  },
  {
    "path": ".devops/musa.Dockerfile",
    "chars": 2633,
    "preview": "ARG UBUNTU_VERSION=22.04\n# This needs to generally match the container host's environment.\nARG MUSA_VERSION=rc4.3.0\n# Ta"
  },
  {
    "path": ".devops/nix/apps.nix",
    "chars": 434,
    "preview": "{\n  perSystem =\n    { config, lib, ... }:\n    {\n      apps =\n        let\n          inherit (config.packages) default;\n  "
  },
  {
    "path": ".devops/nix/devshells.nix",
    "chars": 1451,
    "preview": "{ inputs, ... }:\n\n{\n  perSystem =\n    {\n      config,\n      lib,\n      system,\n      ...\n    }:\n    {\n      devShells =\n"
  },
  {
    "path": ".devops/nix/docker.nix",
    "chars": 850,
    "preview": "{\n  lib,\n  dockerTools,\n  buildEnv,\n  llama-cpp,\n  interactive ? true,\n  coreutils,\n}:\n\n# A tar that can be fed into `do"
  },
  {
    "path": ".devops/nix/jetson-support.nix",
    "chars": 1080,
    "preview": "{ inputs, ... }:\n{\n  perSystem =\n    {\n      config,\n      system,\n      lib,\n      pkgsCuda,\n      ...\n    }:\n    {\n   "
  },
  {
    "path": ".devops/nix/nixpkgs-instances.nix",
    "chars": 1685,
    "preview": "{ inputs, ... }:\n{\n  # The _module.args definitions are passed on to modules as arguments. E.g.\n  # the module `{ pkgs ."
  },
  {
    "path": ".devops/nix/package-gguf-py.nix",
    "chars": 691,
    "preview": "{\n  lib,\n  llamaVersion,\n  numpy,\n  tqdm,\n  requests,\n  sentencepiece,\n  pyyaml,\n  poetry-core,\n  buildPythonPackage,\n  "
  },
  {
    "path": ".devops/nix/package.nix",
    "chars": 7329,
    "preview": "{\n  lib,\n  glibc,\n  config,\n  stdenv,\n  runCommand,\n  cmake,\n  ninja,\n  pkg-config,\n  git,\n  mpi,\n  blas,\n  cudaPackages"
  },
  {
    "path": ".devops/nix/python-scripts.nix",
    "chars": 1300,
    "preview": "{\n  lib,\n  stdenv,\n  buildPythonPackage,\n  poetry-core,\n  mkShell,\n  python3Packages,\n  gguf-py,\n}@inputs:\n\nlet\n  llama-"
  },
  {
    "path": ".devops/nix/scope.nix",
    "chars": 900,
    "preview": "{\n  lib,\n  newScope,\n  python3,\n  llamaVersion ? \"0.0.0\",\n}:\n\nlet\n  pythonPackages = python3.pkgs;\nin\n\n# We're using `ma"
  },
  {
    "path": ".devops/nix/sif.nix",
    "chars": 729,
    "preview": "{\n  lib,\n  singularity-tools,\n  llama-cpp,\n  bashInteractive,\n  interactive ? false,\n}:\n\nlet\n  optionalInt = cond: x: if"
  },
  {
    "path": ".devops/openvino.Dockerfile",
    "chars": 3896,
    "preview": "ARG OPENVINO_VERSION_MAJOR=2026.0\nARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886\nARG UBUNTU_VERSION=24.04\n\n# Optio"
  },
  {
    "path": ".devops/rocm.Dockerfile",
    "chars": 3161,
    "preview": "ARG UBUNTU_VERSION=24.04\n\n# This needs to generally match the container host's environment.\nARG ROCM_VERSION=7.2\nARG AMD"
  },
  {
    "path": ".devops/s390x.Dockerfile",
    "chars": 3821,
    "preview": "ARG GCC_VERSION=15.2.0\nARG UBUNTU_VERSION=24.04\n\n### Build Llama.cpp stage\nFROM gcc:${GCC_VERSION} AS build\n\nRUN --mount"
  },
  {
    "path": ".devops/tools.sh",
    "chars": 2467,
    "preview": "#!/usr/bin/env bash\nset -e\n\n# Read the first argument into a variable\narg1=\"$1\"\n\n# Shift the arguments to remove the fir"
  },
  {
    "path": ".devops/vulkan.Dockerfile",
    "chars": 2461,
    "preview": "ARG UBUNTU_VERSION=26.04\n\nFROM ubuntu:$UBUNTU_VERSION AS build\n\n# Install build tools\nRUN apt update && apt install -y g"
  },
  {
    "path": ".dockerignore",
    "chars": 237,
    "preview": "*.o\n*.a\n.cache/\n# Do not ignore .git directory, otherwise the reported build number will always be 0\n.github/\n.gitignore"
  },
  {
    "path": ".ecrc",
    "chars": 97,
    "preview": "{\n  \"Exclude\": [\"^\\\\.gitmodules$\", \"stb_image\\\\.h\"],\n  \"Disable\": {\n    \"IndentSize\": true\n  }\n}\n"
  },
  {
    "path": ".editorconfig",
    "chars": 1392,
    "preview": "# https://EditorConfig.org\n\n# Top-most EditorConfig file\nroot = true\n\n# Unix-style newlines with a newline ending every "
  },
  {
    "path": ".flake8",
    "chars": 565,
    "preview": "[flake8]\nmax-line-length = 125\nignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503\nexclude =\n    #"
  },
  {
    "path": ".gemini/settings.json",
    "chars": 35,
    "preview": "{ \"contextFileName\": \"AGENTS.md\" }\n"
  },
  {
    "path": ".gitattributes",
    "chars": 259,
    "preview": "# Treat the generated single-file WebUI build as binary for diff purposes.\n# Git's pack-file delta compression still wor"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/010-bug-compilation.yml",
    "chars": 3216,
    "preview": "name: Bug (compilation)\ndescription: Something goes wrong when trying to compile llama.cpp.\ntitle: \"Compile bug: \"\nlabel"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/011-bug-results.yml",
    "chars": 4387,
    "preview": "name: Bug (model use)\ndescription: Something goes wrong when using a model (in general, not specific to a single llama.c"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/019-bug-misc.yml",
    "chars": 3624,
    "preview": "name: Bug (misc.)\ndescription: Something is not working the way it should (and it's not covered by any of the above case"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/020-enhancement.yml",
    "chars": 2406,
    "preview": "name: Enhancement\ndescription: Used to request enhancements for llama.cpp.\ntitle: \"Feature Request: \"\nlabels: [\"enhancem"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/030-research.yml",
    "chars": 1728,
    "preview": "name: Research\ndescription: Track new technical research area.\ntitle: \"Research: \"\nlabels: [\"research 🔬\"]\nbody:\n  - type"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/040-refactor.yml",
    "chars": 1223,
    "preview": "name: Refactor (Maintainers)\ndescription: Used to track refactoring opportunities.\ntitle: \"Refactor: \"\nlabels: [\"refacto"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "chars": 521,
    "preview": "blank_issues_enabled: true\ncontact_links:\n  - name: Got an idea?\n    url: https://github.com/ggml-org/llama.cpp/discussi"
  },
  {
    "path": ".github/actions/get-tag-name/action.yml",
    "chars": 692,
    "preview": "name: \"Determine tag name\"\ndescription: \"Determine the tag name to use for a release\"\noutputs:\n  name:\n    description: "
  },
  {
    "path": ".github/actions/install-exe/action.yml",
    "chars": 1126,
    "preview": "name: \"Install exe\"\ndescription: \"Download and install exe\"\ninputs:\n  url:\n    description: \"URL of the exe installer\"\n "
  },
  {
    "path": ".github/actions/linux-setup-openvino/action.yml",
    "chars": 752,
    "preview": "name: \"Linux - Setup OpenVINO Toolkit\"\ndescription: \"Setup OpenVINO Toolkit for Linux\"\ninputs:\n  path:\n    description: "
  },
  {
    "path": ".github/actions/linux-setup-spacemit/action.yml",
    "chars": 555,
    "preview": "name: \"Linux - Setup SpacemiT Toolchain\"\ndescription: \"Setup SpacemiT Toolchain for Linux\"\ninputs:\n  path:\n    descripti"
  },
  {
    "path": ".github/actions/linux-setup-vulkan/action.yml",
    "chars": 498,
    "preview": "name: \"Linux - Setup Vulkan SDK\"\ndescription: \"Setup Vulkan SDK for Linux\"\ninputs:\n  path:\n    description: \"Installatio"
  },
  {
    "path": ".github/actions/unarchive-tar/action.yml",
    "chars": 693,
    "preview": "name: \"Unarchive tar\"\ndescription: \"Download and unarchive tar into directory\"\ninputs:\n  url:\n    description: \"URL of t"
  },
  {
    "path": ".github/actions/windows-setup-cuda/action.yml",
    "chars": 12074,
    "preview": "name: \"Windows - Setup CUDA Toolkit\"\ndescription: \"Setup CUDA Toolkit for Windows\"\ninputs:\n  cuda_version:\n    descripti"
  },
  {
    "path": ".github/actions/windows-setup-rocm/action.yml",
    "chars": 401,
    "preview": "name: \"Windows - Setup ROCm\"\ndescription: \"Setup ROCm for Windows\"\ninputs:\n  version:\n    description: \"ROCm version\"\n  "
  },
  {
    "path": ".github/labeler.yml",
    "chars": 3197,
    "preview": "# https://github.com/actions/labeler\nApple Metal:\n    - changed-files:\n        - any-glob-to-any-file:\n            - ggm"
  },
  {
    "path": ".github/pull_request_template.md",
    "chars": 741,
    "preview": "## Overview\n\n<!-- Describe what this PR does and why. Be concise but complete -->\n\n## Additional information\n\n<!-- You c"
  },
  {
    "path": ".github/workflows/ai-issues.yml",
    "chars": 2904,
    "preview": "name: AI review (issues)\n\non:\n  issues:\n    types: [opened]\n\njobs:\n  find-related:\n    if: github.event.action == 'opene"
  },
  {
    "path": ".github/workflows/bench.yml.disabled",
    "chars": 10467,
    "preview": "# TODO: there have been some issues with the workflow, so disabling for now\n#       https://github.com/ggml-org/llama.cp"
  },
  {
    "path": ".github/workflows/build-3rd-party.yml",
    "chars": 1201,
    "preview": "name: CI (3rd-party)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths"
  },
  {
    "path": ".github/workflows/build-android.yml",
    "chars": 2730,
    "preview": "name: CI (android)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: "
  },
  {
    "path": ".github/workflows/build-apple.yml",
    "chars": 6325,
    "preview": "name: CI (apple)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: [\n"
  },
  {
    "path": ".github/workflows/build-cache.yml",
    "chars": 3349,
    "preview": "name: Build Actions Cache\n\non:\n  workflow_dispatch: # allows manual triggering\n  schedule:\n    - cron: '0 * * * *'\n\nconc"
  },
  {
    "path": ".github/workflows/build-cann.yml",
    "chars": 3029,
    "preview": "name: CI (cann)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: [\n "
  },
  {
    "path": ".github/workflows/build-cmake-pkg.yml",
    "chars": 1781,
    "preview": "name: Build relocatable cmake package\non:\n  workflow_dispatch:\n  workflow_call:\n\njobs:\n  linux:\n    runs-on: ubuntu-slim"
  },
  {
    "path": ".github/workflows/build-cross.yml",
    "chars": 13600,
    "preview": "name: CI (cross)\non:\n  # only manual triggers due to low-importance of the workflows\n  # TODO: for regular runs, provisi"
  },
  {
    "path": ".github/workflows/build-msys.yml",
    "chars": 1950,
    "preview": "name: CI (msys)\n\non:\n  # only manual triggers due to low-importance of the workflows\n  # TODO: for regular runs, provisi"
  },
  {
    "path": ".github/workflows/build-riscv.yml",
    "chars": 4117,
    "preview": "name: CI (riscv)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: [\n"
  },
  {
    "path": ".github/workflows/build-sanitize.yml",
    "chars": 2247,
    "preview": "name: CI (sanitize)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths:"
  },
  {
    "path": ".github/workflows/build-self-hosted.yml",
    "chars": 6489,
    "preview": "name: CI (self-hosted)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    pat"
  },
  {
    "path": ".github/workflows/build-vulkan.yml",
    "chars": 2574,
    "preview": "name: CI (vulkan)\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: ["
  },
  {
    "path": ".github/workflows/build.yml",
    "chars": 42379,
    "preview": "name: CI\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: [\n      '."
  },
  {
    "path": ".github/workflows/check-vendor.yml",
    "chars": 1224,
    "preview": "name: Check vendor\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    paths: "
  },
  {
    "path": ".github/workflows/close-issue.yml",
    "chars": 942,
    "preview": "name: Close inactive issues\non:\n  schedule:\n    - cron: \"42 0 * * *\"\n\n# Fine-grant permission\n# https://docs.github.com/"
  },
  {
    "path": ".github/workflows/copilot-setup-steps.yml",
    "chars": 2180,
    "preview": "name: \"Copilot Setup Steps\"\n\n# Automatically run the setup steps when they are changed to allow for easy validation, and"
  },
  {
    "path": ".github/workflows/docker.yml",
    "chars": 19166,
    "preview": "# This workflow uses actions that are not certified by GitHub.\n# They are provided by a third-party and are governed by\n"
  },
  {
    "path": ".github/workflows/editorconfig.yml",
    "chars": 690,
    "preview": "name: EditorConfig Checker\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      create_release:\n      "
  },
  {
    "path": ".github/workflows/gguf-publish.yml",
    "chars": 1281,
    "preview": "# This workflow will upload a Python Package using Twine when a GGUF release is created\n# For more information see: http"
  },
  {
    "path": ".github/workflows/hip-quality-check.yml",
    "chars": 2308,
    "preview": "name: HIP quality check\n\non:\n  workflow_dispatch: # allows manual triggering\n  push:\n    branches:\n      - master\n    pa"
  },
  {
    "path": ".github/workflows/labeler.yml",
    "chars": 352,
    "preview": "name: \"Pull Request Labeler\"\non:\n- pull_request_target\n\njobs:\n  labeler:\n    permissions:\n      contents: read\n      pul"
  },
  {
    "path": ".github/workflows/pre-tokenizer-hashes.yml",
    "chars": 1606,
    "preview": "name: Check Pre-Tokenizer Hashes\n\non:\n    push:\n        paths:\n            - 'convert_hf_to_gguf.py'\n            - 'conv"
  },
  {
    "path": ".github/workflows/python-check-requirements.yml",
    "chars": 914,
    "preview": "name: Python check requirements.txt\n\non:\n  push:\n    paths:\n      - '.github/workflows/python-check-requirements.yml'\n  "
  },
  {
    "path": ".github/workflows/python-lint.yml",
    "chars": 833,
    "preview": "name: flake8 Lint\n\non:\n  push:\n    branches:\n      - master\n    paths: [\n      '.github/workflows/python-lint.yml',\n    "
  },
  {
    "path": ".github/workflows/python-type-check.yml",
    "chars": 1134,
    "preview": "name: Python Type-Check\n\non:\n  push:\n    paths:\n      - '.github/workflows/python-type-check.yml'\n      - 'ty.toml'\n    "
  },
  {
    "path": ".github/workflows/release.yml",
    "chars": 43156,
    "preview": "name: Release\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      create_release:\n        description"
  },
  {
    "path": ".github/workflows/server-sanitize.yml",
    "chars": 3026,
    "preview": "name: Server (sanitize)\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      sha:\n        description:"
  },
  {
    "path": ".github/workflows/server-self-hosted.yml",
    "chars": 3577,
    "preview": "name: Server (self-hosted)\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      sha:\n        descripti"
  },
  {
    "path": ".github/workflows/server-webui.yml",
    "chars": 3267,
    "preview": "name: Server WebUI\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      sha:\n        description: 'Com"
  },
  {
    "path": ".github/workflows/server.yml",
    "chars": 4450,
    "preview": "name: Server\n\non:\n  workflow_dispatch: # allows manual triggering\n    inputs:\n      sha:\n        description: 'Commit SH"
  },
  {
    "path": ".github/workflows/update-ops-docs.yml",
    "chars": 1351,
    "preview": "name: Update Operations Documentation\n\non:\n    push:\n        paths:\n            - 'docs/ops.md'\n            - 'docs/ops/"
  },
  {
    "path": ".github/workflows/winget.yml",
    "chars": 1556,
    "preview": "name: Update Winget Package\n\non:\n  workflow_dispatch: # allows manual triggering\n  schedule:\n    - cron: '28 5 * * *' # "
  },
  {
    "path": ".gitignore",
    "chars": 1798,
    "preview": "# Extensions\n\n*.a\n*.bat\n*.bin\n*.d\n*.dll\n*.dot\n*.etag\n*.exe\n*.gcda\n*.gcno\n*.gcov\n*.gguf\n*.gguf.json\n*.lastModified\n*.log\n"
  },
  {
    "path": ".gitmodules",
    "chars": 0,
    "preview": ""
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 447,
    "preview": "# See https://pre-commit.com for more information\n# See https://pre-commit.com/hooks.html for more hooks\nexclude: prompt"
  },
  {
    "path": "AGENTS.md",
    "chars": 6857,
    "preview": "# Instructions for llama.cpp\n\n> [!IMPORTANT]\n> This project does **not** accept pull requests that are fully or predomin"
  },
  {
    "path": "AUTHORS",
    "chars": 65628,
    "preview": "# date: Mon Feb  2 08:45:04 EET 2026\n# this file is auto-generated by scripts/gen-authors.sh\n\nНияз Гарифзянов <112617865"
  },
  {
    "path": "CLAUDE.md",
    "chars": 104,
    "preview": "IMPORTANT: Ensure you’ve thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work.\n"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 8948,
    "preview": "cmake_minimum_required(VERSION 3.14...3.28) # for add_link_options and implicit target directories.\nproject(\"llama.cpp\" "
  },
  {
    "path": "CMakePresets.json",
    "chars": 4570,
    "preview": "{\n  \"version\": 4,\n  \"configurePresets\": [\n    {\n        \"name\":  \"base\",\n        \"hidden\": true,\n        \"generator\":   "
  },
  {
    "path": "CODEOWNERS",
    "chars": 5455,
    "preview": "# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs\n# multiplie "
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 11898,
    "preview": "# Contributors\n\nThe project differentiates between 3 levels of contributors:\n\n- Contributors: people who have contribute"
  },
  {
    "path": "LICENSE",
    "chars": 1078,
    "preview": "MIT License\n\nCopyright (c) 2023-2026 The ggml authors\n\nPermission is hereby granted, free of charge, to any person obtai"
  },
  {
    "path": "Makefile",
    "chars": 257,
    "preview": "define newline\n\n\nendef\n\n$(error Build system changed:$(newline)\\\nThe Makefile build has been replaced by CMake.$(newline"
  },
  {
    "path": "README.md",
    "chars": 30152,
    "preview": "# llama.cpp\n\n![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.p"
  },
  {
    "path": "SECURITY.md",
    "chars": 6707,
    "preview": "# Security Policy\n\n - [**Reporting a vulnerability**](#reporting-a-vulnerability)\n - [**Requirements**](#requirements)\n "
  },
  {
    "path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html",
    "chars": 773361,
    "preview": "<!DOCTYPE html>\n<html>\n    <head>\n        <meta charset=\"utf-8\">\n        <style>\n            .message {\n                "
  },
  {
    "path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json",
    "chars": 120,
    "preview": "{\n  \"chars\": 2296.1916666666666,\n  \"chars:std\": 986.051306946325,\n  \"score\": 0.925,\n  \"score:std\": 0.26339134382131846\n}"
  },
  {
    "path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json",
    "chars": 1613851,
    "preview": "{\n  \"score\": 0.925,\n  \"metrics\": {\n    \"chars\": 2296.1916666666666,\n    \"chars:std\": 986.051306946325,\n    \"score:std\": "
  },
  {
    "path": "benches/dgx-spark/dgx-spark.md",
    "chars": 26156,
    "preview": "## System info\n\n```bash\nuname --all\nLinux spark-17ed 6.11.0-1016-nvidia #16-Ubuntu SMP PREEMPT_DYNAMIC Sun Sep 21 16:52:"
  },
  {
    "path": "benches/mac-m2-ultra/mac-m2-ultra.md",
    "chars": 24581,
    "preview": "## System info\n\n```bash\nuname -a\nDarwin gg-studio 25.2.0 Darwin Kernel Version 25.2.0: Tue Nov 18 21:07:05 PST 2025; roo"
  },
  {
    "path": "benches/nemotron/nemotron-dgx-spark.md",
    "chars": 8996,
    "preview": "# NVIDIA DGX Spark\n\n## System info\n\n```bash\nuname --all\nLinux spark-17ed 6.11.0-1016-nvidia #16-Ubuntu SMP PREEMPT_DYNAM"
  },
  {
    "path": "ci/README-MUSA.md",
    "chars": 1052,
    "preview": "## Running MUSA CI in a Docker Container\n\nAssuming `$PWD` is the root of the `llama.cpp` repository, follow these steps "
  },
  {
    "path": "ci/README.md",
    "chars": 1297,
    "preview": "# CI\n\nThis CI implements heavy-duty workflows that run on self-hosted runners. Typically the purpose of these workflows "
  },
  {
    "path": "ci/run.sh",
    "chars": 28219,
    "preview": "#!/usr/bin/env bash\n#\n# sample usage:\n#\n# mkdir tmp\n#\n# # CPU-only build\n# bash ./ci/run.sh ./tmp/results ./tmp/mnt\n#\n# "
  },
  {
    "path": "cmake/arm64-apple-clang.cmake",
    "chars": 555,
    "preview": "set( CMAKE_SYSTEM_NAME Darwin )\nset( CMAKE_SYSTEM_PROCESSOR arm64 )\n\nset( target arm64-apple-darwin-macho )\n\nset( CMAKE_"
  },
  {
    "path": "cmake/arm64-windows-llvm.cmake",
    "chars": 592,
    "preview": "set( CMAKE_SYSTEM_NAME Windows )\nset( CMAKE_SYSTEM_PROCESSOR arm64 )\n\nset( target arm64-pc-windows-msvc )\n\nset( CMAKE_C_"
  },
  {
    "path": "cmake/build-info.cmake",
    "chars": 1319,
    "preview": "set(BUILD_NUMBER 0)\nset(BUILD_COMMIT \"unknown\")\nset(BUILD_COMPILER \"unknown\")\nset(BUILD_TARGET \"unknown\")\n\n# Look for gi"
  },
  {
    "path": "cmake/common.cmake",
    "chars": 2044,
    "preview": "include(\"ggml/cmake/common.cmake\")\n\nfunction(llama_add_compile_flags)\n    if (LLAMA_FATAL_WARNINGS)\n        if (CMAKE_CX"
  },
  {
    "path": "cmake/download-models.cmake",
    "chars": 505,
    "preview": "get_filename_component(DEST_DIR \"${DEST}\" DIRECTORY)\nfile(MAKE_DIRECTORY \"${DEST_DIR}\")\n\nif(NOT EXISTS \"${DEST}\")\n    me"
  },
  {
    "path": "cmake/git-vars.cmake",
    "chars": 717,
    "preview": "find_package(Git)\n\n# the commit's SHA1\nexecute_process(COMMAND\n    \"${GIT_EXECUTABLE}\" describe --match=NeVeRmAtCh --alw"
  },
  {
    "path": "cmake/license.cmake",
    "chars": 1445,
    "preview": "define_property(GLOBAL PROPERTY LICENSE_TEXT\n    BRIEF_DOCS \"Embedded licenses\"\n    FULL_DOCS  \"Global string containing"
  },
  {
    "path": "cmake/llama-config.cmake.in",
    "chars": 979,
    "preview": "set(LLAMA_VERSION      @LLAMA_INSTALL_VERSION@)\nset(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)\nset(LLAMA_BUILD_NUMBER @LLA"
  },
  {
    "path": "cmake/llama.pc.in",
    "chars": 311,
    "preview": "prefix=@CMAKE_INSTALL_PREFIX@\nexec_prefix=@CMAKE_INSTALL_PREFIX@\nlibdir=@CMAKE_INSTALL_FULL_LIBDIR@\nincludedir=@CMAKE_IN"
  },
  {
    "path": "cmake/riscv64-spacemit-linux-gnu-gcc.cmake",
    "chars": 1357,
    "preview": "set(CMAKE_SYSTEM_NAME Linux)\nset(CMAKE_SYSTEM_PROCESSOR riscv64)\nset(CMAKE_SYSTEM_VERSION 1)\n\nif (CMAKE_HOST_SYSTEM_PROC"
  },
  {
    "path": "cmake/x64-windows-llvm.cmake",
    "chars": 139,
    "preview": "set( CMAKE_SYSTEM_NAME Windows )\nset( CMAKE_SYSTEM_PROCESSOR x86_64 )\n\nset( CMAKE_C_COMPILER    clang )\nset( CMAKE_CXX_C"
  },
  {
    "path": "common/CMakeLists.txt",
    "chars": 4225,
    "preview": "# common\n\nfind_package(Threads REQUIRED)\n\nllama_add_compile_flags()\n\n# Build info header\n\nif(EXISTS \"${PROJECT_SOURCE_DI"
  },
  {
    "path": "common/arg.cpp",
    "chars": 176632,
    "preview": "#include \"arg.h\"\n\n#include \"chat.h\"\n#include \"common.h\"\n#include \"download.h\"\n#include \"hf-cache.h\"\n#include \"json-schem"
  },
  {
    "path": "common/arg.h",
    "chars": 5288,
    "preview": "#pragma once\n\n#include \"common.h\"\n\n#include <set>\n#include <map>\n#include <string>\n#include <vector>\n#include <cstring>\n"
  },
  {
    "path": "common/base64.hpp",
    "chars": 12878,
    "preview": "/*\nThis is free and unencumbered software released into the public domain.\n\nAnyone is free to copy, modify, publish, use"
  },
  {
    "path": "common/build-info.cpp.in",
    "chars": 198,
    "preview": "int LLAMA_BUILD_NUMBER = @LLAMA_BUILD_NUMBER@;\nchar const *LLAMA_COMMIT = \"@LLAMA_BUILD_COMMIT@\";\nchar const *LLAMA_COMP"
  },
  {
    "path": "common/chat-auto-parser-generator.cpp",
    "chars": 20142,
    "preview": "#include \"chat-auto-parser-helpers.h\"\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#incl"
  },
  {
    "path": "common/chat-auto-parser-helpers.cpp",
    "chars": 12940,
    "preview": "#include \"chat-auto-parser-helpers.h\"\n\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#inc"
  },
  {
    "path": "common/chat-auto-parser-helpers.h",
    "chars": 4541,
    "preview": "#pragma once\n\n#include \"chat-auto-parser.h\"\n#include \"peg-parser.h\"\n#include <functional>\n#include <optional>\n#include <"
  },
  {
    "path": "common/chat-auto-parser.h",
    "chars": 16499,
    "preview": "#pragma once\n\n#include \"chat.h\"\n#include \"common.h\"\n#include \"jinja/caps.h\"\n#include \"peg-parser.h\"\n\n#include <chrono>\n#"
  },
  {
    "path": "common/chat-diff-analyzer.cpp",
    "chars": 61798,
    "preview": "#include \"chat-auto-parser.h\"\n#include \"chat-auto-parser-helpers.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#incl"
  },
  {
    "path": "common/chat-peg-parser.cpp",
    "chars": 35700,
    "preview": "#include \"chat-peg-parser.h\"\n\n#include \"chat-auto-parser.h\"\n#include \"ggml.h\"\n#include \"peg-parser.h\"\n\n#include <nlohman"
  },
  {
    "path": "common/chat-peg-parser.h",
    "chars": 10166,
    "preview": "#pragma once\n\n#include \"chat.h\"\n#include \"peg-parser.h\"\n\n#include <map>\n#include <optional>\n#include <vector>\n\nclass com"
  },
  {
    "path": "common/chat.cpp",
    "chars": 81506,
    "preview": "#include \"chat.h\"\n\n#include \"chat-auto-parser-helpers.h\"\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#inc"
  },
  {
    "path": "common/chat.h",
    "chars": 13443,
    "preview": "// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.\n\n#pragma "
  },
  {
    "path": "common/common.cpp",
    "chars": 65272,
    "preview": "#include \"ggml.h\"\n#include \"gguf.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"llama.h\"\n#include \"sampling.h\"\n#incl"
  },
  {
    "path": "common/common.h",
    "chars": 43682,
    "preview": "// Various helper functions and utilities\n\n#pragma once\n\n#include \"ggml-opt.h\"\n#include \"ggml.h\"\n#include \"llama-cpp.h\"\n"
  },
  {
    "path": "common/console.cpp",
    "chars": 41576,
    "preview": "#include \"console.h\"\n#include \"log.h\"\n#include <vector>\n#include <iostream>\n#include <cassert>\n#include <cstddef>\n#inclu"
  },
  {
    "path": "common/console.h",
    "chars": 1170,
    "preview": "// Console functions\n\n#pragma once\n\n#include \"common.h\"\n\n#include <functional>\n#include <string>\n#include <vector>\n\nenum"
  },
  {
    "path": "common/debug.cpp",
    "chars": 5849,
    "preview": "#include \"debug.h\"\n\n#include \"log.h\"\n\n#include <cmath>\n#include <string>\n\nstatic std::string common_ggml_ne_string(const"
  },
  {
    "path": "common/debug.h",
    "chars": 1937,
    "preview": "#pragma once\n#include \"common.h\"\n#include <string>\n#include <vector>\n#include <regex>\n\n// common debug functions and str"
  },
  {
    "path": "common/download.cpp",
    "chars": 29461,
    "preview": "#include \"arg.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"download.h\"\n#include \"hf-cache.h\"\n\n#define JSON_ASSERT "
  },
  {
    "path": "common/download.h",
    "chars": 3348,
    "preview": "#pragma once\n\n#include <string>\n#include <vector>\n\nstruct common_params_model;\n\nusing common_header      = std::pair<std"
  },
  {
    "path": "common/hf-cache.cpp",
    "chars": 24067,
    "preview": "#include \"hf-cache.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"http.h\"\n\n#define JSON_ASSERT GGML_ASSERT\n#include "
  },
  {
    "path": "common/hf-cache.h",
    "chars": 801,
    "preview": "#pragma once\n\n#include <string>\n#include <vector>\n\n// Ref: https://huggingface.co/docs/hub/local-cache.md\n\nnamespace hf_"
  },
  {
    "path": "common/http.h",
    "chars": 2899,
    "preview": "#pragma once\n\n#include <cpp-httplib/httplib.h>\n\nstruct common_http_url {\n    std::string scheme;\n    std::string user;\n "
  },
  {
    "path": "common/jinja/README.md",
    "chars": 4188,
    "preview": "# llama.cpp Jinja Engine\n\nA Jinja template engine implementation in C++, originally inspired by [huggingface.js's jinja "
  },
  {
    "path": "common/jinja/caps.cpp",
    "chars": 16401,
    "preview": "#include \"log.h\"\n#include \"value.h\"\n#include \"runtime.h\"\n#include \"caps.h\"\n\n// note: the json dependency is only for def"
  },
  {
    "path": "common/jinja/caps.h",
    "chars": 737,
    "preview": "#pragma once\n\n#include \"runtime.h\"\n\n#include <string>\n#include <map>\n\nnamespace jinja {\n\nstruct caps {\n    bool supports"
  },
  {
    "path": "common/jinja/lexer.cpp",
    "chars": 12850,
    "preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n\n#include <cctype>\n#include <functional>\n#include <map>\n#include <string>\n#inclu"
  },
  {
    "path": "common/jinja/lexer.h",
    "chars": 5354,
    "preview": "#pragma once\n\n#include \"utils.h\"\n\n#include <cctype>\n#include <map>\n#include <stdexcept>\n#include <string>\n#include <vect"
  },
  {
    "path": "common/jinja/parser.cpp",
    "chars": 22873,
    "preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"parser.h\"\n\n#include <algorithm>\n#include <memory>\n#include <stdexcept>"
  },
  {
    "path": "common/jinja/parser.h",
    "chars": 533,
    "preview": "#pragma once\n\n#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"utils.h\"\n\n#include <string>\n#include <stdexcept>\n\nnamesp"
  },
  {
    "path": "common/jinja/runtime.cpp",
    "chars": 35848,
    "preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"value.h\"\n#include \"utils.h\"\n\n#include <string>\n#include <vector>\n#incl"
  },
  {
    "path": "common/jinja/runtime.h",
    "chars": 21068,
    "preview": "#pragma once\n\n#include \"lexer.h\"\n#include \"value.h\"\n\n#include <cassert>\n#include <ctime>\n#include <memory>\n#include <sst"
  },
  {
    "path": "common/jinja/string.cpp",
    "chars": 5399,
    "preview": "#include \"jinja/string.h\"\n#include \"jinja/value.h\"\n\n#include <algorithm>\n#include <functional>\n#include <optional>\n#incl"
  },
  {
    "path": "common/jinja/string.h",
    "chars": 1763,
    "preview": "#pragma once\n\n#include <optional>\n#include <string>\n#include <vector>\n\n#include \"utils.h\"\n\nnamespace jinja {\n\n// allow d"
  },
  {
    "path": "common/jinja/utils.h",
    "chars": 5152,
    "preview": "#pragma once\n\n#include <string>\n#include <sstream>\n#include <algorithm>\n#include <cstdint>\n#include <cstring>\n\nnamespace"
  },
  {
    "path": "common/jinja/value.cpp",
    "chars": 59322,
    "preview": "#include \"runtime.h\"\n#include \"value.h\"\n\n// for converting from JSON to jinja values\n#include <nlohmann/json.hpp>\n\n#incl"
  },
  {
    "path": "common/jinja/value.h",
    "chars": 29803,
    "preview": "#pragma once\n\n#include \"string.h\"\n#include \"utils.h\"\n\n#include <algorithm>\n#include <cmath>\n#include <cstdint>\n#include "
  },
  {
    "path": "common/json-partial.cpp",
    "chars": 15950,
    "preview": "#include \"json-partial.h\"\n\n#include \"log.h\"\n\n#include <nlohmann/json.hpp>\n\n#include <string>\n#include <regex>\n\nusing jso"
  },
  {
    "path": "common/json-partial.h",
    "chars": 1816,
    "preview": "#pragma once\n\n// TODO: use json_fwd.hpp when possible\n#include <nlohmann/json.hpp>\n\n// Healing marker (empty if the JSON"
  },
  {
    "path": "common/json-schema-to-grammar.cpp",
    "chars": 50160,
    "preview": "#include \"json-schema-to-grammar.h\"\n#include \"common.h\"\n\n#include <nlohmann/json.hpp>\n\n#include <algorithm>\n#include <ma"
  },
  {
    "path": "common/json-schema-to-grammar.h",
    "chars": 1434,
    "preview": "#pragma once\n\n#include <nlohmann/json_fwd.hpp>\n\n#include <functional>\n#include <memory>\n#include <string>\n\nstd::string j"
  },
  {
    "path": "common/llguidance.cpp",
    "chars": 8702,
    "preview": "#include \"sampling.h\"\n#include \"log.h\"\n\n#ifdef LLAMA_USE_LLGUIDANCE\n\n#    include \"llguidance.h\"\n#    include <cmath>\n\ns"
  },
  {
    "path": "common/log.cpp",
    "chars": 11499,
    "preview": "#include \"common.h\"\n#include \"log.h\"\n\n#include <chrono>\n#include <condition_variable>\n#include <cstdarg>\n#include <cstdi"
  },
  {
    "path": "common/log.h",
    "chars": 5274,
    "preview": "#pragma once\n\n#include \"ggml.h\" // for ggml_log_level\n\n#define LOG_CLR_TO_EOL  \"\\033[K\\r\"\n#define LOG_COL_DEFAULT \"\\033["
  },
  {
    "path": "common/ngram-cache.cpp",
    "chars": 11652,
    "preview": "#include \"ngram-cache.h\"\n#include \"common.h\"\n#include \"log.h\"\n\n#include <cinttypes>\n#include <cstdint>\n#include <cstdio>"
  },
  {
    "path": "common/ngram-cache.h",
    "chars": 4147,
    "preview": "#pragma once\n\n#include \"llama.h\"\n\n#include <unordered_map>\n#include <string>\n#include <vector>\n\n#define LLAMA_NGRAM_MIN "
  },
  {
    "path": "common/ngram-map.cpp",
    "chars": 19948,
    "preview": "#include \"common.h\"\n#include \"log.h\"\n#include \"ngram-map.h\"\n\n#include <cinttypes>\n#include <cstdint>\n#include <cstdio>\n#"
  },
  {
    "path": "common/ngram-map.h",
    "chars": 4869,
    "preview": "#pragma once\n//\n// common/ngram-map.h: structures used to manage a map from n-grams to a list of m-grams\n//\n// These str"
  },
  {
    "path": "common/ngram-mod.cpp",
    "chars": 1110,
    "preview": "#include \"ngram-mod.h\"\n\n//\n// common_ngram_mod\n//\n\ncommon_ngram_mod::common_ngram_mod(uint16_t n, size_t size) : n(n), u"
  },
  {
    "path": "common/ngram-mod.h",
    "chars": 728,
    "preview": "#pragma once\n\n#include <cstdint>\n#include <vector>\n#include <cstddef>\n\n//\n// common_ngram_mod\n// ref: https://github.com"
  },
  {
    "path": "common/peg-parser.cpp",
    "chars": 78208,
    "preview": "#include \"peg-parser.h\"\n\n#include \"common.h\"\n#include \"json-schema-to-grammar.h\"\n#include \"log.h\"\n#include \"unicode.h\"\n\n"
  },
  {
    "path": "common/peg-parser.h",
    "chars": 18876,
    "preview": "#pragma once\n\n#include <nlohmann/json_fwd.hpp>\n\n#include <memory>\n#include <unordered_map>\n#include <unordered_set>\n#inc"
  },
  {
    "path": "common/preset.cpp",
    "chars": 16468,
    "preview": "#include \"arg.h\"\n#include \"preset.h\"\n#include \"peg-parser.h\"\n#include \"log.h\"\n#include \"download.h\"\n\n#include <fstream>\n"
  },
  {
    "path": "common/preset.h",
    "chars": 2825,
    "preview": "#pragma once\n\n#include \"common.h\"\n#include \"arg.h\"\n\n#include <string>\n#include <vector>\n#include <map>\n#include <set>\n\n/"
  },
  {
    "path": "common/reasoning-budget.cpp",
    "chars": 9917,
    "preview": "#include \"reasoning-budget.h\"\n#include \"common.h\"\n#include \"unicode.h\"\n\n#include \"log.h\"\n\n#include <cmath>\n#include <cst"
  },
  {
    "path": "common/reasoning-budget.h",
    "chars": 2677,
    "preview": "#pragma once\n\n#include \"llama.h\"\n\n#include <cstdint>\n#include <vector>\n\nenum common_reasoning_budget_state {\n    REASONI"
  },
  {
    "path": "common/regex-partial.cpp",
    "chars": 8363,
    "preview": "#include \"regex-partial.h\"\n#include \"common.h\"\n#include <functional>\n#include <optional>\n\ncommon_regex::common_regex(con"
  },
  {
    "path": "common/regex-partial.h",
    "chars": 1507,
    "preview": "#pragma once\n\n#include <regex>\n#include <string>\n\nenum common_regex_match_type {\n    COMMON_REGEX_MATCH_TYPE_NONE,\n    C"
  },
  {
    "path": "common/sampling.cpp",
    "chars": 31261,
    "preview": "#include \"sampling.h\"\n\n#include \"common.h\"\n#include \"ggml.h\"\n#include \"log.h\"\n#include \"reasoning-budget.h\"\n\n#include <a"
  },
  {
    "path": "common/sampling.h",
    "chars": 5275,
    "preview": "#pragma once\n\n#include \"llama.h\"\n\n#include \"common.h\"\n\n#include <string>\n#include <vector>\n\n// common_sampler extends ll"
  },
  {
    "path": "common/speculative.cpp",
    "chars": 38688,
    "preview": "#include \"speculative.h\"\n\n#include \"common.h\"\n#include \"ggml.h\"\n#include \"llama.h\"\n#include \"log.h\"\n#include \"ngram-cach"
  },
  {
    "path": "common/speculative.h",
    "chars": 1520,
    "preview": "#pragma once\n\n#include \"llama.h\"\n#include \"common.h\"\n\nstruct common_speculative;\n\n// comma separated list of all types\ns"
  },
  {
    "path": "common/unicode.cpp",
    "chars": 4160,
    "preview": "#include \"unicode.h\"\n\n#include <algorithm>\n#include <cassert>\n#include <stdexcept>\n#include <string>\n#include <vector>\n\n"
  },
  {
    "path": "common/unicode.h",
    "chars": 1083,
    "preview": "#pragma once\n\n#include <cstdint>\n#include <string_view>\n#include <vector>\n#include <string>\n\n// UTF-8 parsing utilities "
  },
  {
    "path": "convert_hf_to_gguf.py",
    "chars": 610905,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nfrom __future__ import annotations\n\nimport ast\nimport logging\nimport arg"
  },
  {
    "path": "convert_hf_to_gguf_update.py",
    "chars": 26542,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport logging\nimport os\nimport pathlib\nimport re\n\nimport requests\nimpor"
  },
  {
    "path": "convert_llama_ggml_to_gguf.py",
    "chars": 19112,
    "preview": "#!/usr/bin/env python3\nfrom __future__ import annotations\n\nimport logging\nimport argparse\nimport os\nimport struct\nimport"
  },
  {
    "path": "convert_lora_to_gguf.py",
    "chars": 20917,
    "preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\nim"
  },
  {
    "path": "docs/android.md",
    "chars": 6080,
    "preview": "\n# Android\n\n## Build GUI binding using Android Studio\n\nImport the `examples/llama.android` directory into Android Studio"
  },
  {
    "path": "docs/autoparser.md",
    "chars": 31780,
    "preview": "# Auto-Parser Architecture\n\nThe auto-parser automatically analyzes chat templates to determine how to parse model output"
  },
  {
    "path": "docs/backend/BLIS.md",
    "chars": 1657,
    "preview": "BLIS Installation Manual\n------------------------\n\nBLIS is a portable software framework for high-performance BLAS-like "
  },
  {
    "path": "docs/backend/CANN.md",
    "chars": 13682,
    "preview": "# llama.cpp for CANN\n\n - [Background](#background)\n - [News](#news)\n - [OS](#os)\n - [Hardware](#hardware)\n - [Model Supp"
  },
  {
    "path": "docs/backend/CUDA-FEDORA.md",
    "chars": 10557,
    "preview": "# Setting Up CUDA on Fedora\n\nIn this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox container."
  },
  {
    "path": "docs/backend/OPENCL.md",
    "chars": 10267,
    "preview": "# llama.cpp for OpenCL\n\n- [Background](#background)\n- [OS](#os)\n- [Hardware](#hardware)\n- [DataType Supports](#datatype-"
  },
  {
    "path": "docs/backend/OPENVINO.md",
    "chars": 19997,
    "preview": "# OpenVINO Backend for llama.cpp\n\n> [!NOTE]\n> Performance and memory optimizations, accuracy validation, broader quantiz"
  },
  {
    "path": "docs/backend/SYCL.md",
    "chars": 32818,
    "preview": "# llama.cpp for SYCL\n\n- [Background](#background)\n- [Recommended Release](#recommended-release)\n- [News](#news)\n- [OS](#"
  },
  {
    "path": "docs/backend/VirtGPU/configuration.md",
    "chars": 6814,
    "preview": "# GGML-VirtGPU Backend Configuration\n\nThis document describes the environment variables used by the ggml-virtgpu backend"
  },
  {
    "path": "docs/backend/VirtGPU/development.md",
    "chars": 6159,
    "preview": "# Development and Testing\n\n## Development\n\n### Code Generation\n\nThe backend uses code generation from YAML configuration"
  },
  {
    "path": "docs/backend/VirtGPU.md",
    "chars": 6225,
    "preview": "# GGML-VirtGPU Backend\n\nThe GGML-VirtGPU backend enables GGML applications to run machine\nlearning computations on host "
  },
  {
    "path": "docs/backend/ZenDNN.md",
    "chars": 8883,
    "preview": "# llama.cpp for AMD ZenDNN\n\n> [!WARNING]\n> **Note:** ZenDNN is **not** the same as zDNN.\n> - **ZenDNN** (this page): AMD"
  },
  {
    "path": "docs/backend/snapdragon/CMakeUserPresets.json",
    "chars": 3047,
    "preview": "{\n  \"version\": 5,\n  \"configurePresets\": [\n    {\n        \"name\": \"arm64-android-snapdragon\",\n        \"hidden\": true,\n    "
  }
]

// ... and 2336 more files (download for full content)

About this extraction

This page contains the full source code of the ggml-org/llama.cpp GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 2536 files (77.2 MB), approximately 17.7M tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo