Copy disabled (too large)
Download .txt
Showing preview only (70,649K chars total). Download the full file to get everything.
Repository: ggml-org/llama.cpp
Branch: master
Commit: 4888137b1736
Files: 2536
Total size: 77.2 MB
Directory structure:
gitextract_xcrsk4vf/
├── .clang-format
├── .clang-tidy
├── .devops/
│ ├── cann.Dockerfile
│ ├── cpu.Dockerfile
│ ├── cuda-new.Dockerfile
│ ├── cuda.Dockerfile
│ ├── intel.Dockerfile
│ ├── llama-cli-cann.Dockerfile
│ ├── llama-cpp-cuda.srpm.spec
│ ├── llama-cpp.srpm.spec
│ ├── musa.Dockerfile
│ ├── nix/
│ │ ├── apps.nix
│ │ ├── devshells.nix
│ │ ├── docker.nix
│ │ ├── jetson-support.nix
│ │ ├── nixpkgs-instances.nix
│ │ ├── package-gguf-py.nix
│ │ ├── package.nix
│ │ ├── python-scripts.nix
│ │ ├── scope.nix
│ │ └── sif.nix
│ ├── openvino.Dockerfile
│ ├── rocm.Dockerfile
│ ├── s390x.Dockerfile
│ ├── tools.sh
│ └── vulkan.Dockerfile
├── .dockerignore
├── .ecrc
├── .editorconfig
├── .flake8
├── .gemini/
│ └── settings.json
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── 010-bug-compilation.yml
│ │ ├── 011-bug-results.yml
│ │ ├── 019-bug-misc.yml
│ │ ├── 020-enhancement.yml
│ │ ├── 030-research.yml
│ │ ├── 040-refactor.yml
│ │ └── config.yml
│ ├── actions/
│ │ ├── get-tag-name/
│ │ │ └── action.yml
│ │ ├── install-exe/
│ │ │ └── action.yml
│ │ ├── linux-setup-openvino/
│ │ │ └── action.yml
│ │ ├── linux-setup-spacemit/
│ │ │ └── action.yml
│ │ ├── linux-setup-vulkan/
│ │ │ └── action.yml
│ │ ├── unarchive-tar/
│ │ │ └── action.yml
│ │ ├── windows-setup-cuda/
│ │ │ └── action.yml
│ │ └── windows-setup-rocm/
│ │ └── action.yml
│ ├── labeler.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── ai-issues.yml
│ ├── bench.yml.disabled
│ ├── build-3rd-party.yml
│ ├── build-android.yml
│ ├── build-apple.yml
│ ├── build-cache.yml
│ ├── build-cann.yml
│ ├── build-cmake-pkg.yml
│ ├── build-cross.yml
│ ├── build-msys.yml
│ ├── build-riscv.yml
│ ├── build-sanitize.yml
│ ├── build-self-hosted.yml
│ ├── build-vulkan.yml
│ ├── build.yml
│ ├── check-vendor.yml
│ ├── close-issue.yml
│ ├── copilot-setup-steps.yml
│ ├── docker.yml
│ ├── editorconfig.yml
│ ├── gguf-publish.yml
│ ├── hip-quality-check.yml
│ ├── labeler.yml
│ ├── pre-tokenizer-hashes.yml
│ ├── python-check-requirements.yml
│ ├── python-lint.yml
│ ├── python-type-check.yml
│ ├── release.yml
│ ├── server-sanitize.yml
│ ├── server-self-hosted.yml
│ ├── server-webui.yml
│ ├── server.yml
│ ├── update-ops-docs.yml
│ └── winget.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── AGENTS.md
├── AUTHORS
├── CLAUDE.md
├── CMakeLists.txt
├── CMakePresets.json
├── CODEOWNERS
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── benches/
│ ├── dgx-spark/
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json
│ │ └── dgx-spark.md
│ ├── mac-m2-ultra/
│ │ └── mac-m2-ultra.md
│ └── nemotron/
│ └── nemotron-dgx-spark.md
├── ci/
│ ├── README-MUSA.md
│ ├── README.md
│ └── run.sh
├── cmake/
│ ├── arm64-apple-clang.cmake
│ ├── arm64-windows-llvm.cmake
│ ├── build-info.cmake
│ ├── common.cmake
│ ├── download-models.cmake
│ ├── git-vars.cmake
│ ├── license.cmake
│ ├── llama-config.cmake.in
│ ├── llama.pc.in
│ ├── riscv64-spacemit-linux-gnu-gcc.cmake
│ └── x64-windows-llvm.cmake
├── common/
│ ├── CMakeLists.txt
│ ├── arg.cpp
│ ├── arg.h
│ ├── base64.hpp
│ ├── build-info.cpp.in
│ ├── chat-auto-parser-generator.cpp
│ ├── chat-auto-parser-helpers.cpp
│ ├── chat-auto-parser-helpers.h
│ ├── chat-auto-parser.h
│ ├── chat-diff-analyzer.cpp
│ ├── chat-peg-parser.cpp
│ ├── chat-peg-parser.h
│ ├── chat.cpp
│ ├── chat.h
│ ├── common.cpp
│ ├── common.h
│ ├── console.cpp
│ ├── console.h
│ ├── debug.cpp
│ ├── debug.h
│ ├── download.cpp
│ ├── download.h
│ ├── hf-cache.cpp
│ ├── hf-cache.h
│ ├── http.h
│ ├── jinja/
│ │ ├── README.md
│ │ ├── caps.cpp
│ │ ├── caps.h
│ │ ├── lexer.cpp
│ │ ├── lexer.h
│ │ ├── parser.cpp
│ │ ├── parser.h
│ │ ├── runtime.cpp
│ │ ├── runtime.h
│ │ ├── string.cpp
│ │ ├── string.h
│ │ ├── utils.h
│ │ ├── value.cpp
│ │ └── value.h
│ ├── json-partial.cpp
│ ├── json-partial.h
│ ├── json-schema-to-grammar.cpp
│ ├── json-schema-to-grammar.h
│ ├── llguidance.cpp
│ ├── log.cpp
│ ├── log.h
│ ├── ngram-cache.cpp
│ ├── ngram-cache.h
│ ├── ngram-map.cpp
│ ├── ngram-map.h
│ ├── ngram-mod.cpp
│ ├── ngram-mod.h
│ ├── peg-parser.cpp
│ ├── peg-parser.h
│ ├── preset.cpp
│ ├── preset.h
│ ├── reasoning-budget.cpp
│ ├── reasoning-budget.h
│ ├── regex-partial.cpp
│ ├── regex-partial.h
│ ├── sampling.cpp
│ ├── sampling.h
│ ├── speculative.cpp
│ ├── speculative.h
│ ├── unicode.cpp
│ └── unicode.h
├── convert_hf_to_gguf.py
├── convert_hf_to_gguf_update.py
├── convert_llama_ggml_to_gguf.py
├── convert_lora_to_gguf.py
├── docs/
│ ├── android.md
│ ├── autoparser.md
│ ├── backend/
│ │ ├── BLIS.md
│ │ ├── CANN.md
│ │ ├── CUDA-FEDORA.md
│ │ ├── OPENCL.md
│ │ ├── OPENVINO.md
│ │ ├── SYCL.md
│ │ ├── VirtGPU/
│ │ │ ├── configuration.md
│ │ │ └── development.md
│ │ ├── VirtGPU.md
│ │ ├── ZenDNN.md
│ │ ├── snapdragon/
│ │ │ ├── CMakeUserPresets.json
│ │ │ ├── README.md
│ │ │ ├── developer.md
│ │ │ └── windows.md
│ │ └── zDNN.md
│ ├── build-riscv64-spacemit.md
│ ├── build-s390x.md
│ ├── build.md
│ ├── development/
│ │ ├── HOWTO-add-model.md
│ │ ├── debugging-tests.md
│ │ ├── llama-star/
│ │ │ └── idea-arch.key
│ │ ├── parsing.md
│ │ └── token_generation_performance_tips.md
│ ├── docker.md
│ ├── function-calling.md
│ ├── install.md
│ ├── llguidance.md
│ ├── multimodal/
│ │ ├── MobileVLM.md
│ │ ├── gemma3.md
│ │ ├── glmedge.md
│ │ ├── granitevision.md
│ │ ├── llava.md
│ │ ├── minicpmo2.6.md
│ │ ├── minicpmo4.0.md
│ │ ├── minicpmv2.5.md
│ │ ├── minicpmv2.6.md
│ │ ├── minicpmv4.0.md
│ │ └── minicpmv4.5.md
│ ├── multimodal.md
│ ├── ops/
│ │ ├── BLAS.csv
│ │ ├── CANN.csv
│ │ ├── CPU.csv
│ │ ├── CUDA.csv
│ │ ├── Metal.csv
│ │ ├── OpenCL.csv
│ │ ├── SYCL.csv
│ │ ├── Vulkan.csv
│ │ ├── WebGPU.csv
│ │ ├── ZenDNN.csv
│ │ └── zDNN.csv
│ ├── ops.md
│ ├── preset.md
│ └── speculative.md
├── examples/
│ ├── CMakeLists.txt
│ ├── batched/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── batched.cpp
│ ├── batched.swift/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── Package.swift
│ │ ├── README.md
│ │ └── Sources/
│ │ └── main.swift
│ ├── convert-llama2c-to-ggml/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── convert-llama2c-to-ggml.cpp
│ ├── convert_legacy_llama.py
│ ├── debug/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── debug.cpp
│ ├── deprecation-warning/
│ │ ├── README.md
│ │ └── deprecation-warning.cpp
│ ├── diffusion/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── diffusion-cli.cpp
│ ├── embedding/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── embedding.cpp
│ ├── eval-callback/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── eval-callback.cpp
│ ├── gen-docs/
│ │ ├── CMakeLists.txt
│ │ └── gen-docs.cpp
│ ├── gguf/
│ │ ├── CMakeLists.txt
│ │ └── gguf.cpp
│ ├── gguf-hash/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── deps/
│ │ │ ├── rotate-bits/
│ │ │ │ ├── package.json
│ │ │ │ └── rotate-bits.h
│ │ │ ├── sha1/
│ │ │ │ ├── package.json
│ │ │ │ ├── sha1.c
│ │ │ │ └── sha1.h
│ │ │ ├── sha256/
│ │ │ │ ├── package.json
│ │ │ │ ├── sha256.c
│ │ │ │ └── sha256.h
│ │ │ └── xxhash/
│ │ │ ├── clib.json
│ │ │ ├── xxhash.c
│ │ │ └── xxhash.h
│ │ └── gguf-hash.cpp
│ ├── idle/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── idle.cpp
│ ├── json_schema_pydantic_example.py
│ ├── json_schema_to_grammar.py
│ ├── llama.android/
│ │ ├── .gitignore
│ │ ├── app/
│ │ │ ├── .gitignore
│ │ │ ├── build.gradle.kts
│ │ │ ├── proguard-rules.pro
│ │ │ └── src/
│ │ │ └── main/
│ │ │ ├── AndroidManifest.xml
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── example/
│ │ │ │ └── llama/
│ │ │ │ ├── MainActivity.kt
│ │ │ │ └── MessageAdapter.kt
│ │ │ └── res/
│ │ │ ├── drawable/
│ │ │ │ ├── bg_assistant_message.xml
│ │ │ │ ├── bg_user_message.xml
│ │ │ │ ├── ic_launcher_background.xml
│ │ │ │ ├── ic_launcher_foreground.xml
│ │ │ │ ├── outline_folder_open_24.xml
│ │ │ │ └── outline_send_24.xml
│ │ │ ├── layout/
│ │ │ │ ├── activity_main.xml
│ │ │ │ ├── item_message_assistant.xml
│ │ │ │ └── item_message_user.xml
│ │ │ ├── mipmap-anydpi/
│ │ │ │ ├── ic_launcher.xml
│ │ │ │ └── ic_launcher_round.xml
│ │ │ ├── values/
│ │ │ │ ├── colors.xml
│ │ │ │ ├── strings.xml
│ │ │ │ └── themes.xml
│ │ │ └── xml/
│ │ │ ├── backup_rules.xml
│ │ │ └── data_extraction_rules.xml
│ │ ├── build.gradle.kts
│ │ ├── gradle/
│ │ │ ├── libs.versions.toml
│ │ │ └── wrapper/
│ │ │ ├── gradle-wrapper.jar
│ │ │ └── gradle-wrapper.properties
│ │ ├── gradle.properties
│ │ ├── gradlew
│ │ ├── lib/
│ │ │ ├── .gitignore
│ │ │ ├── build.gradle.kts
│ │ │ ├── consumer-rules.pro
│ │ │ ├── proguard-rules.pro
│ │ │ └── src/
│ │ │ ├── androidTest/
│ │ │ │ └── java/
│ │ │ │ └── android/
│ │ │ │ └── llama/
│ │ │ │ └── cpp/
│ │ │ │ └── ExampleInstrumentedTest.kt
│ │ │ ├── main/
│ │ │ │ ├── AndroidManifest.xml
│ │ │ │ ├── cpp/
│ │ │ │ │ ├── CMakeLists.txt
│ │ │ │ │ ├── ai_chat.cpp
│ │ │ │ │ └── logging.h
│ │ │ │ └── java/
│ │ │ │ └── com/
│ │ │ │ └── arm/
│ │ │ │ └── aichat/
│ │ │ │ ├── AiChat.kt
│ │ │ │ ├── InferenceEngine.kt
│ │ │ │ ├── gguf/
│ │ │ │ │ ├── FileType.kt
│ │ │ │ │ ├── GgufMetadata.kt
│ │ │ │ │ └── GgufMetadataReader.kt
│ │ │ │ └── internal/
│ │ │ │ ├── InferenceEngineImpl.kt
│ │ │ │ └── gguf/
│ │ │ │ └── GgufMetadataReaderImpl.kt
│ │ │ └── test/
│ │ │ └── java/
│ │ │ └── android/
│ │ │ └── llama/
│ │ │ └── cpp/
│ │ │ └── ExampleUnitTest.kt
│ │ └── settings.gradle.kts
│ ├── llama.swiftui/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── llama.cpp.swift/
│ │ │ └── LibLlama.swift
│ │ ├── llama.swiftui/
│ │ │ ├── Assets.xcassets/
│ │ │ │ ├── AppIcon.appiconset/
│ │ │ │ │ └── Contents.json
│ │ │ │ └── Contents.json
│ │ │ ├── Models/
│ │ │ │ └── LlamaState.swift
│ │ │ ├── Resources/
│ │ │ │ └── models/
│ │ │ │ └── .gitignore
│ │ │ ├── UI/
│ │ │ │ ├── ContentView.swift
│ │ │ │ ├── DownloadButton.swift
│ │ │ │ ├── InputButton.swift
│ │ │ │ └── LoadCustomButton.swift
│ │ │ └── llama_swiftuiApp.swift
│ │ └── llama.swiftui.xcodeproj/
│ │ ├── project.pbxproj
│ │ └── project.xcworkspace/
│ │ └── contents.xcworkspacedata
│ ├── llama.vim
│ ├── lookahead/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── lookahead.cpp
│ ├── lookup/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── lookup-create.cpp
│ │ ├── lookup-merge.cpp
│ │ ├── lookup-stats.cpp
│ │ └── lookup.cpp
│ ├── model-conversion/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── scripts/
│ │ ├── causal/
│ │ │ ├── compare-embeddings-logits.sh
│ │ │ ├── compare-logits.py
│ │ │ ├── convert-model.sh
│ │ │ ├── modelcard.template
│ │ │ ├── run-casual-gen-embeddings-org.py
│ │ │ ├── run-converted-model-embeddings-logits.sh
│ │ │ ├── run-converted-model.sh
│ │ │ └── run-org-model.py
│ │ ├── embedding/
│ │ │ ├── compare-embeddings-logits.sh
│ │ │ ├── convert-model.sh
│ │ │ ├── modelcard.template
│ │ │ ├── run-converted-model.sh
│ │ │ └── run-original-model.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── check-nmse.py
│ │ ├── common.py
│ │ ├── compare_tokens.py
│ │ ├── create-collection-add-model.sh
│ │ ├── curl-embedding-server.sh
│ │ ├── hf-add-model-to-collection.py
│ │ ├── hf-create-collection.py
│ │ ├── hf-create-model.py
│ │ ├── hf-upload-gguf-model.py
│ │ ├── inspect-converted-model.sh
│ │ ├── inspect-org-model.py
│ │ ├── perplexity-gen.sh
│ │ ├── perplexity-run-simple.sh
│ │ ├── perplexity-run.sh
│ │ ├── quantize.sh
│ │ ├── run-embedding-server.sh
│ │ └── semantic_check.py
│ ├── parallel/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── parallel.cpp
│ ├── passkey/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── passkey.cpp
│ ├── pydantic_models_to_grammar.py
│ ├── pydantic_models_to_grammar_examples.py
│ ├── reason-act.sh
│ ├── regex_to_grammar.py
│ ├── retrieval/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── retrieval.cpp
│ ├── save-load-state/
│ │ ├── CMakeLists.txt
│ │ └── save-load-state.cpp
│ ├── server-llama2-13B.sh
│ ├── server_embd.py
│ ├── simple/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── simple.cpp
│ ├── simple-chat/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── simple-chat.cpp
│ ├── simple-cmake-pkg/
│ │ ├── .gitignore
│ │ ├── CMakeLists.txt
│ │ └── README.md
│ ├── speculative/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── speculative.cpp
│ ├── speculative-simple/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── speculative-simple.cpp
│ ├── sycl/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── build.sh
│ │ ├── ls-sycl-device.cpp
│ │ ├── run-llama2.sh
│ │ ├── test.sh
│ │ ├── win-build-sycl.bat
│ │ ├── win-run-llama2.bat
│ │ └── win-test.bat
│ ├── training/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── finetune.cpp
│ └── ts-type-to-grammar.sh
├── flake.nix
├── ggml/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── cmake/
│ │ ├── GitVars.cmake
│ │ ├── common.cmake
│ │ └── ggml-config.cmake.in
│ ├── include/
│ │ ├── ggml-alloc.h
│ │ ├── ggml-backend.h
│ │ ├── ggml-blas.h
│ │ ├── ggml-cann.h
│ │ ├── ggml-cpp.h
│ │ ├── ggml-cpu.h
│ │ ├── ggml-cuda.h
│ │ ├── ggml-hexagon.h
│ │ ├── ggml-metal.h
│ │ ├── ggml-opencl.h
│ │ ├── ggml-openvino.h
│ │ ├── ggml-opt.h
│ │ ├── ggml-rpc.h
│ │ ├── ggml-sycl.h
│ │ ├── ggml-virtgpu.h
│ │ ├── ggml-vulkan.h
│ │ ├── ggml-webgpu.h
│ │ ├── ggml-zdnn.h
│ │ ├── ggml-zendnn.h
│ │ ├── ggml.h
│ │ └── gguf.h
│ └── src/
│ ├── CMakeLists.txt
│ ├── ggml-alloc.c
│ ├── ggml-backend-dl.cpp
│ ├── ggml-backend-dl.h
│ ├── ggml-backend-impl.h
│ ├── ggml-backend-reg.cpp
│ ├── ggml-backend.cpp
│ ├── ggml-blas/
│ │ ├── CMakeLists.txt
│ │ └── ggml-blas.cpp
│ ├── ggml-cann/
│ │ ├── CMakeLists.txt
│ │ ├── acl_tensor.cpp
│ │ ├── acl_tensor.h
│ │ ├── aclnn_ops.cpp
│ │ ├── aclnn_ops.h
│ │ ├── common.h
│ │ └── ggml-cann.cpp
│ ├── ggml-common.h
│ ├── ggml-cpu/
│ │ ├── CMakeLists.txt
│ │ ├── amx/
│ │ │ ├── amx.cpp
│ │ │ ├── amx.h
│ │ │ ├── common.h
│ │ │ ├── mmq.cpp
│ │ │ └── mmq.h
│ │ ├── arch/
│ │ │ ├── arm/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ ├── quants.c
│ │ │ │ └── repack.cpp
│ │ │ ├── loongarch/
│ │ │ │ └── quants.c
│ │ │ ├── powerpc/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ └── quants.c
│ │ │ ├── riscv/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ ├── quants.c
│ │ │ │ └── repack.cpp
│ │ │ ├── s390/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ └── quants.c
│ │ │ ├── wasm/
│ │ │ │ └── quants.c
│ │ │ └── x86/
│ │ │ ├── cpu-feats.cpp
│ │ │ ├── quants.c
│ │ │ └── repack.cpp
│ │ ├── arch-fallback.h
│ │ ├── binary-ops.cpp
│ │ ├── binary-ops.h
│ │ ├── cmake/
│ │ │ └── FindSIMD.cmake
│ │ ├── common.h
│ │ ├── ggml-cpu-impl.h
│ │ ├── ggml-cpu.c
│ │ ├── ggml-cpu.cpp
│ │ ├── hbm.cpp
│ │ ├── hbm.h
│ │ ├── kleidiai/
│ │ │ ├── kernels.cpp
│ │ │ ├── kernels.h
│ │ │ ├── kleidiai.cpp
│ │ │ └── kleidiai.h
│ │ ├── llamafile/
│ │ │ ├── sgemm.cpp
│ │ │ └── sgemm.h
│ │ ├── ops.cpp
│ │ ├── ops.h
│ │ ├── quants.c
│ │ ├── quants.h
│ │ ├── repack.cpp
│ │ ├── repack.h
│ │ ├── simd-gemm.h
│ │ ├── simd-mappings.h
│ │ ├── spacemit/
│ │ │ ├── ime.cpp
│ │ │ ├── ime.h
│ │ │ ├── ime1_kernels.cpp
│ │ │ └── ime_kernels.h
│ │ ├── traits.cpp
│ │ ├── traits.h
│ │ ├── unary-ops.cpp
│ │ ├── unary-ops.h
│ │ ├── vec.cpp
│ │ └── vec.h
│ ├── ggml-cuda/
│ │ ├── CMakeLists.txt
│ │ ├── acc.cu
│ │ ├── acc.cuh
│ │ ├── add-id.cu
│ │ ├── add-id.cuh
│ │ ├── arange.cu
│ │ ├── arange.cuh
│ │ ├── argmax.cu
│ │ ├── argmax.cuh
│ │ ├── argsort.cu
│ │ ├── argsort.cuh
│ │ ├── binbcast.cu
│ │ ├── binbcast.cuh
│ │ ├── clamp.cu
│ │ ├── clamp.cuh
│ │ ├── common.cuh
│ │ ├── concat.cu
│ │ ├── concat.cuh
│ │ ├── conv-transpose-1d.cu
│ │ ├── conv-transpose-1d.cuh
│ │ ├── conv2d-dw.cu
│ │ ├── conv2d-dw.cuh
│ │ ├── conv2d-transpose.cu
│ │ ├── conv2d-transpose.cuh
│ │ ├── conv2d.cu
│ │ ├── conv2d.cuh
│ │ ├── convert.cu
│ │ ├── convert.cuh
│ │ ├── count-equal.cu
│ │ ├── count-equal.cuh
│ │ ├── cp-async.cuh
│ │ ├── cpy-utils.cuh
│ │ ├── cpy.cu
│ │ ├── cpy.cuh
│ │ ├── cross-entropy-loss.cu
│ │ ├── cross-entropy-loss.cuh
│ │ ├── cumsum.cu
│ │ ├── cumsum.cuh
│ │ ├── dequantize.cuh
│ │ ├── diag.cu
│ │ ├── diag.cuh
│ │ ├── diagmask.cu
│ │ ├── diagmask.cuh
│ │ ├── fattn-common.cuh
│ │ ├── fattn-mma-f16.cuh
│ │ ├── fattn-tile.cu
│ │ ├── fattn-tile.cuh
│ │ ├── fattn-vec.cuh
│ │ ├── fattn-wmma-f16.cu
│ │ ├── fattn-wmma-f16.cuh
│ │ ├── fattn.cu
│ │ ├── fattn.cuh
│ │ ├── fill.cu
│ │ ├── fill.cuh
│ │ ├── gated_delta_net.cu
│ │ ├── gated_delta_net.cuh
│ │ ├── getrows.cu
│ │ ├── getrows.cuh
│ │ ├── ggml-cuda.cu
│ │ ├── gla.cu
│ │ ├── gla.cuh
│ │ ├── im2col.cu
│ │ ├── im2col.cuh
│ │ ├── mean.cu
│ │ ├── mean.cuh
│ │ ├── mma.cuh
│ │ ├── mmf.cu
│ │ ├── mmf.cuh
│ │ ├── mmid.cu
│ │ ├── mmid.cuh
│ │ ├── mmq.cu
│ │ ├── mmq.cuh
│ │ ├── mmvf.cu
│ │ ├── mmvf.cuh
│ │ ├── mmvq.cu
│ │ ├── mmvq.cuh
│ │ ├── norm.cu
│ │ ├── norm.cuh
│ │ ├── opt-step-adamw.cu
│ │ ├── opt-step-adamw.cuh
│ │ ├── opt-step-sgd.cu
│ │ ├── opt-step-sgd.cuh
│ │ ├── out-prod.cu
│ │ ├── out-prod.cuh
│ │ ├── pad.cu
│ │ ├── pad.cuh
│ │ ├── pad_reflect_1d.cu
│ │ ├── pad_reflect_1d.cuh
│ │ ├── pool2d.cu
│ │ ├── pool2d.cuh
│ │ ├── quantize.cu
│ │ ├── quantize.cuh
│ │ ├── reduce_rows.cuh
│ │ ├── roll.cu
│ │ ├── roll.cuh
│ │ ├── rope.cu
│ │ ├── rope.cuh
│ │ ├── scale.cu
│ │ ├── scale.cuh
│ │ ├── set-rows.cu
│ │ ├── set-rows.cuh
│ │ ├── set.cu
│ │ ├── set.cuh
│ │ ├── softcap.cu
│ │ ├── softcap.cuh
│ │ ├── softmax.cu
│ │ ├── softmax.cuh
│ │ ├── solve_tri.cu
│ │ ├── solve_tri.cuh
│ │ ├── ssm-conv.cu
│ │ ├── ssm-conv.cuh
│ │ ├── ssm-scan.cu
│ │ ├── ssm-scan.cuh
│ │ ├── sum.cu
│ │ ├── sum.cuh
│ │ ├── sumrows.cu
│ │ ├── sumrows.cuh
│ │ ├── template-instances/
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_32.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_32.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
│ │ │ ├── fattn-tile-instance-dkq112-dv112.cu
│ │ │ ├── fattn-tile-instance-dkq128-dv128.cu
│ │ │ ├── fattn-tile-instance-dkq256-dv256.cu
│ │ │ ├── fattn-tile-instance-dkq40-dv40.cu
│ │ │ ├── fattn-tile-instance-dkq512-dv512.cu
│ │ │ ├── fattn-tile-instance-dkq576-dv512.cu
│ │ │ ├── fattn-tile-instance-dkq64-dv64.cu
│ │ │ ├── fattn-tile-instance-dkq72-dv72.cu
│ │ │ ├── fattn-tile-instance-dkq80-dv80.cu
│ │ │ ├── fattn-tile-instance-dkq96-dv96.cu
│ │ │ ├── fattn-vec-instance-bf16-bf16.cu
│ │ │ ├── fattn-vec-instance-bf16-f16.cu
│ │ │ ├── fattn-vec-instance-bf16-q4_0.cu
│ │ │ ├── fattn-vec-instance-bf16-q4_1.cu
│ │ │ ├── fattn-vec-instance-bf16-q5_0.cu
│ │ │ ├── fattn-vec-instance-bf16-q5_1.cu
│ │ │ ├── fattn-vec-instance-bf16-q8_0.cu
│ │ │ ├── fattn-vec-instance-f16-bf16.cu
│ │ │ ├── fattn-vec-instance-f16-f16.cu
│ │ │ ├── fattn-vec-instance-f16-q4_0.cu
│ │ │ ├── fattn-vec-instance-f16-q4_1.cu
│ │ │ ├── fattn-vec-instance-f16-q5_0.cu
│ │ │ ├── fattn-vec-instance-f16-q5_1.cu
│ │ │ ├── fattn-vec-instance-f16-q8_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q4_0-f16.cu
│ │ │ ├── fattn-vec-instance-q4_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q4_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q4_0-q8_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-bf16.cu
│ │ │ ├── fattn-vec-instance-q4_1-f16.cu
│ │ │ ├── fattn-vec-instance-q4_1-q4_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-q4_1.cu
│ │ │ ├── fattn-vec-instance-q4_1-q5_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-q5_1.cu
│ │ │ ├── fattn-vec-instance-q4_1-q8_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q5_0-f16.cu
│ │ │ ├── fattn-vec-instance-q5_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q5_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q5_0-q8_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-bf16.cu
│ │ │ ├── fattn-vec-instance-q5_1-f16.cu
│ │ │ ├── fattn-vec-instance-q5_1-q4_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-q4_1.cu
│ │ │ ├── fattn-vec-instance-q5_1-q5_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-q5_1.cu
│ │ │ ├── fattn-vec-instance-q5_1-q8_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q8_0-f16.cu
│ │ │ ├── fattn-vec-instance-q8_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q8_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q8_0-q8_0.cu
│ │ │ ├── generate_cu_files.py
│ │ │ ├── mmf-instance-ncols_1.cu
│ │ │ ├── mmf-instance-ncols_10.cu
│ │ │ ├── mmf-instance-ncols_11.cu
│ │ │ ├── mmf-instance-ncols_12.cu
│ │ │ ├── mmf-instance-ncols_13.cu
│ │ │ ├── mmf-instance-ncols_14.cu
│ │ │ ├── mmf-instance-ncols_15.cu
│ │ │ ├── mmf-instance-ncols_16.cu
│ │ │ ├── mmf-instance-ncols_2.cu
│ │ │ ├── mmf-instance-ncols_3.cu
│ │ │ ├── mmf-instance-ncols_4.cu
│ │ │ ├── mmf-instance-ncols_5.cu
│ │ │ ├── mmf-instance-ncols_6.cu
│ │ │ ├── mmf-instance-ncols_7.cu
│ │ │ ├── mmf-instance-ncols_8.cu
│ │ │ ├── mmf-instance-ncols_9.cu
│ │ │ ├── mmq-instance-iq1_s.cu
│ │ │ ├── mmq-instance-iq2_s.cu
│ │ │ ├── mmq-instance-iq2_xs.cu
│ │ │ ├── mmq-instance-iq2_xxs.cu
│ │ │ ├── mmq-instance-iq3_s.cu
│ │ │ ├── mmq-instance-iq3_xxs.cu
│ │ │ ├── mmq-instance-iq4_nl.cu
│ │ │ ├── mmq-instance-iq4_xs.cu
│ │ │ ├── mmq-instance-mxfp4.cu
│ │ │ ├── mmq-instance-nvfp4.cu
│ │ │ ├── mmq-instance-q2_k.cu
│ │ │ ├── mmq-instance-q3_k.cu
│ │ │ ├── mmq-instance-q4_0.cu
│ │ │ ├── mmq-instance-q4_1.cu
│ │ │ ├── mmq-instance-q4_k.cu
│ │ │ ├── mmq-instance-q5_0.cu
│ │ │ ├── mmq-instance-q5_1.cu
│ │ │ ├── mmq-instance-q5_k.cu
│ │ │ ├── mmq-instance-q6_k.cu
│ │ │ └── mmq-instance-q8_0.cu
│ │ ├── top-k.cu
│ │ ├── top-k.cuh
│ │ ├── topk-moe.cu
│ │ ├── topk-moe.cuh
│ │ ├── tri.cu
│ │ ├── tri.cuh
│ │ ├── tsembd.cu
│ │ ├── tsembd.cuh
│ │ ├── unary.cu
│ │ ├── unary.cuh
│ │ ├── upscale.cu
│ │ ├── upscale.cuh
│ │ ├── vecdotq.cuh
│ │ ├── vendors/
│ │ │ ├── cuda.h
│ │ │ ├── hip.h
│ │ │ └── musa.h
│ │ ├── wkv.cu
│ │ └── wkv.cuh
│ ├── ggml-hexagon/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-hexagon.cpp
│ │ ├── htp/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── act-ops.c
│ │ │ ├── argsort-ops.c
│ │ │ ├── binary-ops.c
│ │ │ ├── cmake-toolchain.cmake
│ │ │ ├── cpy-ops.c
│ │ │ ├── cumsum-ops.c
│ │ │ ├── flash-attn-ops.c
│ │ │ ├── get-rows-ops.c
│ │ │ ├── hex-dma.c
│ │ │ ├── hex-dma.h
│ │ │ ├── hex-dump.h
│ │ │ ├── hex-fastdiv.h
│ │ │ ├── hex-utils.h
│ │ │ ├── hmx-matmul-ops.c
│ │ │ ├── hmx-ops.h
│ │ │ ├── hmx-profile.h
│ │ │ ├── hmx-utils.h
│ │ │ ├── htp-ctx.h
│ │ │ ├── htp-msg.h
│ │ │ ├── htp-ops.h
│ │ │ ├── htp_iface.idl
│ │ │ ├── hvx-arith.h
│ │ │ ├── hvx-base.h
│ │ │ ├── hvx-copy.h
│ │ │ ├── hvx-div.h
│ │ │ ├── hvx-dump.h
│ │ │ ├── hvx-exp.h
│ │ │ ├── hvx-floor.h
│ │ │ ├── hvx-inverse.h
│ │ │ ├── hvx-reduce.h
│ │ │ ├── hvx-scale.h
│ │ │ ├── hvx-sigmoid.h
│ │ │ ├── hvx-sqrt.h
│ │ │ ├── hvx-types.h
│ │ │ ├── hvx-utils.h
│ │ │ ├── main.c
│ │ │ ├── matmul-ops.c
│ │ │ ├── repeat-ops.c
│ │ │ ├── rope-ops.c
│ │ │ ├── set-rows-ops.c
│ │ │ ├── softmax-ops.c
│ │ │ ├── ssm-conv.c
│ │ │ ├── sum-rows-ops.c
│ │ │ ├── unary-ops.c
│ │ │ ├── worker-pool.c
│ │ │ └── worker-pool.h
│ │ ├── htp-drv.cpp
│ │ ├── htp-drv.h
│ │ ├── libdl.h
│ │ ├── libggml-htp.inf
│ │ └── op-desc.h
│ ├── ggml-hip/
│ │ └── CMakeLists.txt
│ ├── ggml-impl.h
│ ├── ggml-metal/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-metal-common.cpp
│ │ ├── ggml-metal-common.h
│ │ ├── ggml-metal-context.h
│ │ ├── ggml-metal-context.m
│ │ ├── ggml-metal-device.cpp
│ │ ├── ggml-metal-device.h
│ │ ├── ggml-metal-device.m
│ │ ├── ggml-metal-impl.h
│ │ ├── ggml-metal-ops.cpp
│ │ ├── ggml-metal-ops.h
│ │ ├── ggml-metal.cpp
│ │ └── ggml-metal.metal
│ ├── ggml-musa/
│ │ ├── CMakeLists.txt
│ │ ├── mudnn.cu
│ │ └── mudnn.cuh
│ ├── ggml-opencl/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-opencl.cpp
│ │ └── kernels/
│ │ ├── add.cl
│ │ ├── add_id.cl
│ │ ├── argsort.cl
│ │ ├── clamp.cl
│ │ ├── concat.cl
│ │ ├── conv2d.cl
│ │ ├── conv2d_f16_f32.cl
│ │ ├── cpy.cl
│ │ ├── cumsum.cl
│ │ ├── cvt.cl
│ │ ├── diag.cl
│ │ ├── diag_mask_inf.cl
│ │ ├── div.cl
│ │ ├── embed_kernel.py
│ │ ├── exp.cl
│ │ ├── expm1.cl
│ │ ├── fill.cl
│ │ ├── flash_attn_f16.cl
│ │ ├── flash_attn_f32.cl
│ │ ├── flash_attn_f32_f16.cl
│ │ ├── gelu.cl
│ │ ├── gemm_moe_mxfp4_f32.cl
│ │ ├── gemm_noshuffle_q4_1_f32.cl
│ │ ├── gemm_noshuffle_q4_k_f32.cl
│ │ ├── gemm_noshuffle_q6_k_f32.cl
│ │ ├── gemv_moe_mxfp4_f32.cl
│ │ ├── gemv_noshuffle.cl
│ │ ├── gemv_noshuffle_general.cl
│ │ ├── gemv_noshuffle_general_q8_0_f32.cl
│ │ ├── gemv_noshuffle_q4_1_f32.cl
│ │ ├── gemv_noshuffle_q4_k_f32.cl
│ │ ├── gemv_noshuffle_q6_k_f32.cl
│ │ ├── get_rows.cl
│ │ ├── glu.cl
│ │ ├── group_norm.cl
│ │ ├── im2col_f16.cl
│ │ ├── im2col_f32.cl
│ │ ├── l2_norm.cl
│ │ ├── mean.cl
│ │ ├── mul.cl
│ │ ├── mul_mat_Ab_Bi_8x4.cl
│ │ ├── mul_mat_f16_f32.cl
│ │ ├── mul_mm_f16_f32_kq_kqv.cl
│ │ ├── mul_mm_f16_f32_l4_lm.cl
│ │ ├── mul_mm_f32_f32_l4_lm.cl
│ │ ├── mul_mm_q4_0_f32_l4_lm.cl
│ │ ├── mul_mm_q4_1_f32_l4_lm.cl
│ │ ├── mul_mm_q4_k_f32_l4_lm.cl
│ │ ├── mul_mm_q6_k_f32_l4_lm.cl
│ │ ├── mul_mm_q8_0_f32_8x4.cl
│ │ ├── mul_mm_q8_0_f32_l4_lm.cl
│ │ ├── mul_mv_f16_f16.cl
│ │ ├── mul_mv_f16_f32.cl
│ │ ├── mul_mv_f16_f32_1row.cl
│ │ ├── mul_mv_f16_f32_l4.cl
│ │ ├── mul_mv_f32_f32.cl
│ │ ├── mul_mv_id_mxfp4_f32.cl
│ │ ├── mul_mv_id_mxfp4_f32_flat.cl
│ │ ├── mul_mv_id_q4_0_f32_8x_flat.cl
│ │ ├── mul_mv_id_q8_0_f32.cl
│ │ ├── mul_mv_id_q8_0_f32_flat.cl
│ │ ├── mul_mv_mxfp4_f32.cl
│ │ ├── mul_mv_mxfp4_f32_flat.cl
│ │ ├── mul_mv_q4_0_f32.cl
│ │ ├── mul_mv_q4_0_f32_1d_16x_flat.cl
│ │ ├── mul_mv_q4_0_f32_1d_8x_flat.cl
│ │ ├── mul_mv_q4_0_f32_8x_flat.cl
│ │ ├── mul_mv_q4_0_f32_v.cl
│ │ ├── mul_mv_q4_1_f32.cl
│ │ ├── mul_mv_q4_1_f32_flat.cl
│ │ ├── mul_mv_q4_k_f32.cl
│ │ ├── mul_mv_q4_k_f32_flat.cl
│ │ ├── mul_mv_q6_k_f32.cl
│ │ ├── mul_mv_q6_k_f32_flat.cl
│ │ ├── mul_mv_q8_0_f32.cl
│ │ ├── mul_mv_q8_0_f32_flat.cl
│ │ ├── neg.cl
│ │ ├── norm.cl
│ │ ├── pad.cl
│ │ ├── relu.cl
│ │ ├── repeat.cl
│ │ ├── rms_norm.cl
│ │ ├── rope.cl
│ │ ├── scale.cl
│ │ ├── set_rows.cl
│ │ ├── sigmoid.cl
│ │ ├── silu.cl
│ │ ├── softmax_4_f16.cl
│ │ ├── softmax_4_f32.cl
│ │ ├── softmax_f16.cl
│ │ ├── softmax_f32.cl
│ │ ├── softplus.cl
│ │ ├── solve_tri.cl
│ │ ├── sqr.cl
│ │ ├── sqrt.cl
│ │ ├── ssm_conv.cl
│ │ ├── sub.cl
│ │ ├── sum_rows.cl
│ │ ├── tanh.cl
│ │ ├── transpose.cl
│ │ ├── tri.cl
│ │ ├── tsembd.cl
│ │ └── upscale.cl
│ ├── ggml-openvino/
│ │ ├── .clang-format
│ │ ├── CMakeLists.txt
│ │ ├── ggml-decoder.cpp
│ │ ├── ggml-decoder.h
│ │ ├── ggml-openvino-extra.cpp
│ │ ├── ggml-openvino-extra.h
│ │ ├── ggml-openvino.cpp
│ │ ├── ggml-quants.cpp
│ │ ├── ggml-quants.h
│ │ ├── openvino/
│ │ │ ├── decoder.h
│ │ │ ├── frontend.cpp
│ │ │ ├── frontend.h
│ │ │ ├── input_model.cpp
│ │ │ ├── input_model.h
│ │ │ ├── node_context.h
│ │ │ ├── op/
│ │ │ │ ├── cont.cpp
│ │ │ │ ├── cpy.cpp
│ │ │ │ ├── flash_attn_ext.cpp
│ │ │ │ ├── get_rows.cpp
│ │ │ │ ├── glu_geglu.cpp
│ │ │ │ ├── glu_swiglu.cpp
│ │ │ │ ├── mulmat.cpp
│ │ │ │ ├── permute.cpp
│ │ │ │ ├── reshape.cpp
│ │ │ │ ├── rms_norm.cpp
│ │ │ │ ├── rope.cpp
│ │ │ │ ├── scale.cpp
│ │ │ │ ├── set_rows.cpp
│ │ │ │ ├── softmax.cpp
│ │ │ │ ├── transpose.cpp
│ │ │ │ ├── unary_silu.cpp
│ │ │ │ └── view.cpp
│ │ │ ├── op_table.cpp
│ │ │ ├── op_table.h
│ │ │ ├── pass/
│ │ │ │ ├── eliminate_zp.cpp
│ │ │ │ ├── eliminate_zp.h
│ │ │ │ ├── fuse_to_sdpa.cpp
│ │ │ │ ├── fuse_to_sdpa.h
│ │ │ │ ├── mark_decompression_convert_constant_folding.h
│ │ │ │ ├── squeeze_matmul.cpp
│ │ │ │ └── squeeze_matmul.h
│ │ │ ├── translate_session.cpp
│ │ │ ├── translate_session.h
│ │ │ ├── utils.cpp
│ │ │ └── utils.h
│ │ ├── utils.cpp
│ │ └── utils.h
│ ├── ggml-opt.cpp
│ ├── ggml-quants.c
│ ├── ggml-quants.h
│ ├── ggml-rpc/
│ │ ├── CMakeLists.txt
│ │ └── ggml-rpc.cpp
│ ├── ggml-sycl/
│ │ ├── CMakeLists.txt
│ │ ├── add-id.cpp
│ │ ├── add-id.hpp
│ │ ├── backend.hpp
│ │ ├── binbcast.cpp
│ │ ├── binbcast.hpp
│ │ ├── common.cpp
│ │ ├── common.hpp
│ │ ├── concat.cpp
│ │ ├── concat.hpp
│ │ ├── conv.cpp
│ │ ├── conv.hpp
│ │ ├── convert.cpp
│ │ ├── convert.hpp
│ │ ├── count-equal.cpp
│ │ ├── count-equal.hpp
│ │ ├── cpy.cpp
│ │ ├── cpy.hpp
│ │ ├── dequantize.hpp
│ │ ├── dmmv.cpp
│ │ ├── dmmv.hpp
│ │ ├── dpct/
│ │ │ └── helper.hpp
│ │ ├── element_wise.cpp
│ │ ├── element_wise.hpp
│ │ ├── fattn-common.hpp
│ │ ├── fattn-tile.cpp
│ │ ├── fattn-tile.hpp
│ │ ├── fattn-vec.hpp
│ │ ├── fattn.cpp
│ │ ├── fattn.hpp
│ │ ├── gated_delta_net.cpp
│ │ ├── gated_delta_net.hpp
│ │ ├── gemm.hpp
│ │ ├── getrows.cpp
│ │ ├── getrows.hpp
│ │ ├── ggml-sycl.cpp
│ │ ├── gla.cpp
│ │ ├── gla.hpp
│ │ ├── im2col.cpp
│ │ ├── im2col.hpp
│ │ ├── mmq.cpp
│ │ ├── mmq.hpp
│ │ ├── mmvq.cpp
│ │ ├── mmvq.hpp
│ │ ├── norm.cpp
│ │ ├── norm.hpp
│ │ ├── outprod.cpp
│ │ ├── outprod.hpp
│ │ ├── pad.cpp
│ │ ├── pad.hpp
│ │ ├── pad_reflect_1d.cpp
│ │ ├── pad_reflect_1d.hpp
│ │ ├── presets.hpp
│ │ ├── quantize.hpp
│ │ ├── quants.hpp
│ │ ├── repeat_back.cpp
│ │ ├── repeat_back.hpp
│ │ ├── roll.cpp
│ │ ├── roll.hpp
│ │ ├── rope.cpp
│ │ ├── rope.hpp
│ │ ├── set.cpp
│ │ ├── set.hpp
│ │ ├── set_rows.cpp
│ │ ├── set_rows.hpp
│ │ ├── softmax.cpp
│ │ ├── softmax.hpp
│ │ ├── ssm_conv.cpp
│ │ ├── ssm_conv.hpp
│ │ ├── sycl_hw.cpp
│ │ ├── sycl_hw.hpp
│ │ ├── template-instances/
│ │ │ ├── fattn-tile-instance-dkq112-dv112.cpp
│ │ │ ├── fattn-tile-instance-dkq128-dv128.cpp
│ │ │ ├── fattn-tile-instance-dkq256-dv256.cpp
│ │ │ ├── fattn-tile-instance-dkq40-dv40.cpp
│ │ │ ├── fattn-tile-instance-dkq576-dv512.cpp
│ │ │ ├── fattn-tile-instance-dkq64-dv64.cpp
│ │ │ ├── fattn-tile-instance-dkq72-dv72.cpp
│ │ │ ├── fattn-tile-instance-dkq80-dv80.cpp
│ │ │ ├── fattn-tile-instance-dkq96-dv96.cpp
│ │ │ ├── fattn-vec-instance-f16-f16.cpp
│ │ │ ├── fattn-vec-instance-f16-q4_0.cpp
│ │ │ ├── fattn-vec-instance-f16-q4_1.cpp
│ │ │ ├── fattn-vec-instance-f16-q5_0.cpp
│ │ │ ├── fattn-vec-instance-f16-q5_1.cpp
│ │ │ ├── fattn-vec-instance-f16-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-f16.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-f16.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q5_1.cpp
│ │ │ └── fattn-vec-instance-q8_0-q8_0.cpp
│ │ ├── tsembd.cpp
│ │ ├── tsembd.hpp
│ │ ├── type.hpp
│ │ ├── upscale.cpp
│ │ ├── upscale.hpp
│ │ ├── vecdotq.hpp
│ │ ├── wkv.cpp
│ │ └── wkv.hpp
│ ├── ggml-threading.cpp
│ ├── ggml-threading.h
│ ├── ggml-virtgpu/
│ │ ├── CMakeLists.txt
│ │ ├── apir_cs_ggml-rpc-front.cpp
│ │ ├── backend/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── apir_cs_ggml-rpc-back.cpp
│ │ │ ├── backend-convert.h
│ │ │ ├── backend-dispatched-backend.cpp
│ │ │ ├── backend-dispatched-buffer-type.cpp
│ │ │ ├── backend-dispatched-buffer.cpp
│ │ │ ├── backend-dispatched-device.cpp
│ │ │ ├── backend-dispatched.cpp
│ │ │ ├── backend-dispatched.gen.h
│ │ │ ├── backend-dispatched.h
│ │ │ ├── backend-virgl-apir.h
│ │ │ ├── backend.cpp
│ │ │ └── shared/
│ │ │ ├── api_remoting.h
│ │ │ ├── apir_backend.gen.h
│ │ │ ├── apir_backend.h
│ │ │ ├── apir_cs.h
│ │ │ ├── apir_cs_ggml.h
│ │ │ └── apir_cs_rpc.h
│ │ ├── ggml-backend-buffer-type.cpp
│ │ ├── ggml-backend-buffer.cpp
│ │ ├── ggml-backend-device.cpp
│ │ ├── ggml-backend-reg.cpp
│ │ ├── ggml-backend.cpp
│ │ ├── ggml-remoting.h
│ │ ├── ggmlremoting_functions.yaml
│ │ ├── include/
│ │ │ └── apir_hw.h
│ │ ├── regenerate_remoting.py
│ │ ├── virtgpu-apir.h
│ │ ├── virtgpu-forward-backend.cpp
│ │ ├── virtgpu-forward-buffer-type.cpp
│ │ ├── virtgpu-forward-buffer.cpp
│ │ ├── virtgpu-forward-device.cpp
│ │ ├── virtgpu-forward-impl.h
│ │ ├── virtgpu-forward.gen.h
│ │ ├── virtgpu-shm.cpp
│ │ ├── virtgpu-shm.h
│ │ ├── virtgpu-utils.cpp
│ │ ├── virtgpu-utils.h
│ │ ├── virtgpu.cpp
│ │ └── virtgpu.h
│ ├── ggml-vulkan/
│ │ ├── CMakeLists.txt
│ │ ├── cmake/
│ │ │ └── host-toolchain.cmake.in
│ │ ├── ggml-vulkan.cpp
│ │ └── vulkan-shaders/
│ │ ├── CMakeLists.txt
│ │ ├── abs.comp
│ │ ├── acc.comp
│ │ ├── add.comp
│ │ ├── add1.comp
│ │ ├── add_id.comp
│ │ ├── arange.comp
│ │ ├── argmax.comp
│ │ ├── argsort.comp
│ │ ├── argsort_large.comp
│ │ ├── ceil.comp
│ │ ├── clamp.comp
│ │ ├── concat.comp
│ │ ├── contig_copy.comp
│ │ ├── conv2d_dw.comp
│ │ ├── conv2d_mm.comp
│ │ ├── conv_transpose_1d.comp
│ │ ├── copy.comp
│ │ ├── copy_from_quant.comp
│ │ ├── copy_to_quant.comp
│ │ ├── copy_transpose.comp
│ │ ├── cos.comp
│ │ ├── count_equal.comp
│ │ ├── count_experts.comp
│ │ ├── cumsum.comp
│ │ ├── cumsum_multipass1.comp
│ │ ├── cumsum_multipass2.comp
│ │ ├── dequant_f32.comp
│ │ ├── dequant_funcs.glsl
│ │ ├── dequant_funcs_cm2.glsl
│ │ ├── dequant_head.glsl
│ │ ├── dequant_iq1_m.comp
│ │ ├── dequant_iq1_s.comp
│ │ ├── dequant_iq2_s.comp
│ │ ├── dequant_iq2_xs.comp
│ │ ├── dequant_iq2_xxs.comp
│ │ ├── dequant_iq3_s.comp
│ │ ├── dequant_iq3_xxs.comp
│ │ ├── dequant_iq4_nl.comp
│ │ ├── dequant_iq4_xs.comp
│ │ ├── dequant_mxfp4.comp
│ │ ├── dequant_q2_k.comp
│ │ ├── dequant_q3_k.comp
│ │ ├── dequant_q4_0.comp
│ │ ├── dequant_q4_1.comp
│ │ ├── dequant_q4_k.comp
│ │ ├── dequant_q5_0.comp
│ │ ├── dequant_q5_1.comp
│ │ ├── dequant_q5_k.comp
│ │ ├── dequant_q6_k.comp
│ │ ├── dequant_q8_0.comp
│ │ ├── diag.comp
│ │ ├── diag_mask_inf.comp
│ │ ├── div.comp
│ │ ├── elu.comp
│ │ ├── exp.comp
│ │ ├── feature-tests/
│ │ │ ├── bfloat16.comp
│ │ │ ├── coopmat.comp
│ │ │ ├── coopmat2.comp
│ │ │ └── integer_dot.comp
│ │ ├── fill.comp
│ │ ├── flash_attn.comp
│ │ ├── flash_attn_base.glsl
│ │ ├── flash_attn_cm1.comp
│ │ ├── flash_attn_cm2.comp
│ │ ├── flash_attn_mask_opt.comp
│ │ ├── flash_attn_split_k_reduce.comp
│ │ ├── floor.comp
│ │ ├── gated_delta_net.comp
│ │ ├── geglu.comp
│ │ ├── geglu_erf.comp
│ │ ├── geglu_quick.comp
│ │ ├── gelu.comp
│ │ ├── gelu_erf.comp
│ │ ├── gelu_quick.comp
│ │ ├── generic_binary_head.glsl
│ │ ├── generic_head.glsl
│ │ ├── generic_unary_head.glsl
│ │ ├── get_rows.comp
│ │ ├── get_rows_quant.comp
│ │ ├── glu_head.glsl
│ │ ├── glu_main.glsl
│ │ ├── group_norm.comp
│ │ ├── hardsigmoid.comp
│ │ ├── hardswish.comp
│ │ ├── im2col.comp
│ │ ├── im2col_3d.comp
│ │ ├── l2_norm.comp
│ │ ├── leaky_relu.comp
│ │ ├── log.comp
│ │ ├── mul.comp
│ │ ├── mul_mat_split_k_reduce.comp
│ │ ├── mul_mat_vec.comp
│ │ ├── mul_mat_vec_base.glsl
│ │ ├── mul_mat_vec_iface.glsl
│ │ ├── mul_mat_vec_iq1_m.comp
│ │ ├── mul_mat_vec_iq1_s.comp
│ │ ├── mul_mat_vec_iq2_s.comp
│ │ ├── mul_mat_vec_iq2_xs.comp
│ │ ├── mul_mat_vec_iq2_xxs.comp
│ │ ├── mul_mat_vec_iq3_s.comp
│ │ ├── mul_mat_vec_iq3_xxs.comp
│ │ ├── mul_mat_vec_nc.comp
│ │ ├── mul_mat_vec_p021.comp
│ │ ├── mul_mat_vec_q2_k.comp
│ │ ├── mul_mat_vec_q3_k.comp
│ │ ├── mul_mat_vec_q4_k.comp
│ │ ├── mul_mat_vec_q5_k.comp
│ │ ├── mul_mat_vec_q6_k.comp
│ │ ├── mul_mat_vecq.comp
│ │ ├── mul_mat_vecq_funcs.glsl
│ │ ├── mul_mm.comp
│ │ ├── mul_mm_cm2.comp
│ │ ├── mul_mm_funcs.glsl
│ │ ├── mul_mm_id_funcs.glsl
│ │ ├── mul_mmq.comp
│ │ ├── mul_mmq_funcs.glsl
│ │ ├── mul_mmq_shmem_types.glsl
│ │ ├── multi_add.comp
│ │ ├── neg.comp
│ │ ├── norm.comp
│ │ ├── opt_step_adamw.comp
│ │ ├── opt_step_sgd.comp
│ │ ├── pad.comp
│ │ ├── pool2d.comp
│ │ ├── quantize_q8_1.comp
│ │ ├── reglu.comp
│ │ ├── relu.comp
│ │ ├── repeat.comp
│ │ ├── repeat_back.comp
│ │ ├── rms_norm.comp
│ │ ├── rms_norm_back.comp
│ │ ├── rms_norm_partials.comp
│ │ ├── roll.comp
│ │ ├── rope_funcs.glsl
│ │ ├── rope_head.glsl
│ │ ├── rope_multi.comp
│ │ ├── rope_neox.comp
│ │ ├── rope_norm.comp
│ │ ├── rope_params.glsl
│ │ ├── rope_vision.comp
│ │ ├── round.comp
│ │ ├── rte.glsl
│ │ ├── scale.comp
│ │ ├── sgn.comp
│ │ ├── sigmoid.comp
│ │ ├── silu.comp
│ │ ├── silu_back.comp
│ │ ├── sin.comp
│ │ ├── soft_max.comp
│ │ ├── soft_max_back.comp
│ │ ├── soft_max_large1.comp
│ │ ├── soft_max_large2.comp
│ │ ├── soft_max_large3.comp
│ │ ├── soft_max_large_common.glsl
│ │ ├── softplus.comp
│ │ ├── solve_tri.comp
│ │ ├── sqrt.comp
│ │ ├── square.comp
│ │ ├── ssm_conv.comp
│ │ ├── ssm_scan.comp
│ │ ├── step.comp
│ │ ├── sub.comp
│ │ ├── sum_rows.comp
│ │ ├── sum_rows.glsl
│ │ ├── swiglu.comp
│ │ ├── swiglu_oai.comp
│ │ ├── tanh.comp
│ │ ├── timestep_embedding.comp
│ │ ├── topk_argsort.comp
│ │ ├── topk_moe.comp
│ │ ├── topk_nary_search.comp
│ │ ├── tri.comp
│ │ ├── trunc.comp
│ │ ├── types.glsl
│ │ ├── upscale.comp
│ │ ├── utils.glsl
│ │ ├── vulkan-shaders-gen.cpp
│ │ ├── wkv6.comp
│ │ ├── wkv7.comp
│ │ └── xielu.comp
│ ├── ggml-webgpu/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-webgpu-shader-lib.hpp
│ │ ├── ggml-webgpu.cpp
│ │ ├── pre_wgsl.hpp
│ │ └── wgsl-shaders/
│ │ ├── argmax.wgsl
│ │ ├── argsort.wgsl
│ │ ├── argsort_merge.wgsl
│ │ ├── binary.wgsl
│ │ ├── common_decls.tmpl
│ │ ├── concat.wgsl
│ │ ├── cpy.wgsl
│ │ ├── cumsum.wgsl
│ │ ├── embed_wgsl.py
│ │ ├── flash_attn.wgsl
│ │ ├── gated_delta_net.wgsl
│ │ ├── get_rows.wgsl
│ │ ├── glu.wgsl
│ │ ├── memset.wgsl
│ │ ├── mul_mat.wgsl
│ │ ├── mul_mat_decls.tmpl
│ │ ├── mul_mat_reg_tile.wgsl
│ │ ├── mul_mat_subgroup_matrix.wgsl
│ │ ├── mul_mat_vec.wgsl
│ │ ├── pad.wgsl
│ │ ├── repeat.wgsl
│ │ ├── rope.wgsl
│ │ ├── row_norm.wgsl
│ │ ├── scale.wgsl
│ │ ├── set.wgsl
│ │ ├── set_rows.wgsl
│ │ ├── soft_max.wgsl
│ │ ├── solve_tri.wgsl
│ │ ├── ssm_conv.wgsl
│ │ ├── sum_rows.wgsl
│ │ └── unary.wgsl
│ ├── ggml-zdnn/
│ │ ├── .gitignore
│ │ ├── CMakeLists.txt
│ │ ├── common.hpp
│ │ ├── ggml-zdnn.cpp
│ │ ├── mmf.cpp
│ │ ├── mmf.hpp
│ │ ├── utils.cpp
│ │ └── utils.hpp
│ ├── ggml-zendnn/
│ │ ├── CMakeLists.txt
│ │ └── ggml-zendnn.cpp
│ ├── ggml.c
│ ├── ggml.cpp
│ └── gguf.cpp
├── gguf-py/
│ ├── LICENSE
│ ├── README.md
│ ├── examples/
│ │ ├── reader.py
│ │ └── writer.py
│ ├── gguf/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── gguf.py
│ │ ├── gguf_reader.py
│ │ ├── gguf_writer.py
│ │ ├── lazy.py
│ │ ├── metadata.py
│ │ ├── py.typed
│ │ ├── quants.py
│ │ ├── scripts/
│ │ │ ├── gguf_convert_endian.py
│ │ │ ├── gguf_dump.py
│ │ │ ├── gguf_editor_gui.py
│ │ │ ├── gguf_hash.py
│ │ │ ├── gguf_new_metadata.py
│ │ │ └── gguf_set_metadata.py
│ │ ├── tensor_mapping.py
│ │ ├── utility.py
│ │ └── vocab.py
│ ├── pyproject.toml
│ └── tests/
│ ├── __init__.py
│ ├── test_metadata.py
│ └── test_quants.py
├── grammars/
│ ├── README.md
│ ├── arithmetic.gbnf
│ ├── c.gbnf
│ ├── chess.gbnf
│ ├── english.gbnf
│ ├── japanese.gbnf
│ ├── json.gbnf
│ ├── json_arr.gbnf
│ └── list.gbnf
├── include/
│ ├── llama-cpp.h
│ └── llama.h
├── licenses/
│ └── LICENSE-jsonhpp
├── models/
│ ├── .editorconfig
│ ├── ggml-vocab-aquila.gguf
│ ├── ggml-vocab-baichuan.gguf
│ ├── ggml-vocab-bert-bge.gguf
│ ├── ggml-vocab-bert-bge.gguf.inp
│ ├── ggml-vocab-bert-bge.gguf.out
│ ├── ggml-vocab-command-r.gguf
│ ├── ggml-vocab-command-r.gguf.inp
│ ├── ggml-vocab-command-r.gguf.out
│ ├── ggml-vocab-deepseek-coder.gguf
│ ├── ggml-vocab-deepseek-coder.gguf.inp
│ ├── ggml-vocab-deepseek-coder.gguf.out
│ ├── ggml-vocab-deepseek-llm.gguf
│ ├── ggml-vocab-deepseek-llm.gguf.inp
│ ├── ggml-vocab-deepseek-llm.gguf.out
│ ├── ggml-vocab-falcon.gguf
│ ├── ggml-vocab-falcon.gguf.inp
│ ├── ggml-vocab-falcon.gguf.out
│ ├── ggml-vocab-gpt-2.gguf
│ ├── ggml-vocab-gpt-2.gguf.inp
│ ├── ggml-vocab-gpt-2.gguf.out
│ ├── ggml-vocab-gpt-neox.gguf
│ ├── ggml-vocab-llama-bpe.gguf
│ ├── ggml-vocab-llama-bpe.gguf.inp
│ ├── ggml-vocab-llama-bpe.gguf.out
│ ├── ggml-vocab-llama-spm.gguf
│ ├── ggml-vocab-llama-spm.gguf.inp
│ ├── ggml-vocab-llama-spm.gguf.out
│ ├── ggml-vocab-mpt.gguf
│ ├── ggml-vocab-mpt.gguf.inp
│ ├── ggml-vocab-mpt.gguf.out
│ ├── ggml-vocab-nomic-bert-moe.gguf
│ ├── ggml-vocab-phi-3.gguf
│ ├── ggml-vocab-phi-3.gguf.inp
│ ├── ggml-vocab-phi-3.gguf.out
│ ├── ggml-vocab-qwen2.gguf
│ ├── ggml-vocab-qwen2.gguf.inp
│ ├── ggml-vocab-qwen2.gguf.out
│ ├── ggml-vocab-refact.gguf
│ ├── ggml-vocab-refact.gguf.inp
│ ├── ggml-vocab-refact.gguf.out
│ ├── ggml-vocab-starcoder.gguf
│ ├── ggml-vocab-starcoder.gguf.inp
│ ├── ggml-vocab-starcoder.gguf.out
│ └── templates/
│ ├── Apertus-8B-Instruct.jinja
│ ├── Apriel-1.6-15b-Thinker-fixed.jinja
│ ├── Bielik-11B-v3.0-Instruct.jinja
│ ├── ByteDance-Seed-OSS.jinja
│ ├── CohereForAI-c4ai-command-r-plus-tool_use.jinja
│ ├── CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
│ ├── GLM-4.6.jinja
│ ├── GLM-4.7-Flash.jinja
│ ├── GigaChat3-10B-A1.8B.jinja
│ ├── GigaChat3.1-10B-A1.8B.jinja
│ ├── HuggingFaceTB-SmolLM3-3B.jinja
│ ├── Kimi-K2-Instruct.jinja
│ ├── Kimi-K2-Thinking.jinja
│ ├── LFM2-8B-A1B.jinja
│ ├── LFM2.5-Instruct.jinja
│ ├── MiMo-VL.jinja
│ ├── MiniMax-M2.jinja
│ ├── Mistral-Small-3.2-24B-Instruct-2506.jinja
│ ├── NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja
│ ├── NVIDIA-Nemotron-Nano-v2.jinja
│ ├── NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
│ ├── NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
│ ├── Qwen-QwQ-32B.jinja
│ ├── Qwen-Qwen2.5-7B-Instruct.jinja
│ ├── Qwen-Qwen3-0.6B.jinja
│ ├── Qwen3-Coder.jinja
│ ├── Qwen3.5-4B.jinja
│ ├── README.md
│ ├── StepFun3.5-Flash.jinja
│ ├── deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
│ ├── deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
│ ├── deepseek-ai-DeepSeek-V3.1.jinja
│ ├── fireworks-ai-llama-3-firefunction-v2.jinja
│ ├── google-gemma-2-2b-it.jinja
│ ├── ibm-granite-granite-3.3-2B-Instruct.jinja
│ ├── llama-cpp-deepseek-r1.jinja
│ ├── llama-cpp-rwkv-world.jinja
│ ├── meetkai-functionary-medium-v3.1.jinja
│ ├── meetkai-functionary-medium-v3.2.jinja
│ ├── meta-llama-Llama-3.1-8B-Instruct.jinja
│ ├── meta-llama-Llama-3.2-3B-Instruct.jinja
│ ├── meta-llama-Llama-3.3-70B-Instruct.jinja
│ ├── microsoft-Phi-3.5-mini-instruct.jinja
│ ├── mistralai-Ministral-3-14B-Reasoning-2512.jinja
│ ├── mistralai-Mistral-Nemo-Instruct-2407.jinja
│ ├── moonshotai-Kimi-K2.jinja
│ ├── openai-gpt-oss-120b.jinja
│ ├── stepfun-ai-Step-3.5-Flash.jinja
│ ├── unsloth-Apriel-1.5.jinja
│ ├── unsloth-mistral-Devstral-Small-2507.jinja
│ └── upstage-Solar-Open-100B.jinja
├── mypy.ini
├── pocs/
│ ├── CMakeLists.txt
│ └── vdot/
│ ├── CMakeLists.txt
│ ├── q8dot.cpp
│ └── vdot.cpp
├── pyproject.toml
├── pyrightconfig.json
├── requirements/
│ ├── requirements-all.txt
│ ├── requirements-compare-llama-bench.txt
│ ├── requirements-convert_hf_to_gguf.txt
│ ├── requirements-convert_hf_to_gguf_update.txt
│ ├── requirements-convert_legacy_llama.txt
│ ├── requirements-convert_llama_ggml_to_gguf.txt
│ ├── requirements-convert_lora_to_gguf.txt
│ ├── requirements-gguf_editor_gui.txt
│ ├── requirements-pydantic.txt
│ ├── requirements-server-bench.txt
│ ├── requirements-test-tokenizer-random.txt
│ └── requirements-tool_bench.txt
├── requirements.txt
├── scripts/
│ ├── apple/
│ │ ├── validate-apps.sh
│ │ ├── validate-ios.sh
│ │ ├── validate-macos.sh
│ │ ├── validate-tvos.sh
│ │ └── validate-visionos.sh
│ ├── bench-models.sh
│ ├── build-info.sh
│ ├── check-requirements.sh
│ ├── compare-commits.sh
│ ├── compare-llama-bench.py
│ ├── compare-logprobs.py
│ ├── create_ops_docs.py
│ ├── debug-test.sh
│ ├── fetch_server_test_models.py
│ ├── gen-authors.sh
│ ├── gen-unicode-data.py
│ ├── get-flags.mk
│ ├── get-hellaswag.sh
│ ├── get-pg.sh
│ ├── get-wikitext-2.sh
│ ├── get-winogrande.sh
│ ├── get_chat_template.py
│ ├── git-bisect-run.sh
│ ├── git-bisect.sh
│ ├── hf.sh
│ ├── hip/
│ │ └── gcn-cdna-vgpr-check.py
│ ├── install-oneapi.bat
│ ├── jinja/
│ │ ├── jinja-tester.py
│ │ └── requirements.txt
│ ├── pr2wt.sh
│ ├── serve-static.js
│ ├── server-bench.py
│ ├── server-test-function-call.py
│ ├── server-test-model.py
│ ├── snapdragon/
│ │ ├── adb/
│ │ │ ├── llama-cli.farf
│ │ │ ├── run-bench.sh
│ │ │ ├── run-cli.sh
│ │ │ ├── run-completion.sh
│ │ │ ├── run-mtmd.sh
│ │ │ └── run-tool.sh
│ │ ├── qdc/
│ │ │ ├── readme.md
│ │ │ ├── requirements.txt
│ │ │ └── tests/
│ │ │ └── test_bench.py
│ │ └── windows/
│ │ ├── run-bench.ps1
│ │ ├── run-cli.ps1
│ │ ├── run-completion.ps1
│ │ ├── run-mtmd.ps1
│ │ ├── run-tool.ps1
│ │ └── setup-build.ps1
│ ├── sync-ggml-am.sh
│ ├── sync-ggml.last
│ ├── sync-ggml.sh
│ ├── sync_vendor.py
│ ├── tool_bench.py
│ ├── tool_bench.sh
│ ├── verify-checksum-models.py
│ └── xxd.cmake
├── src/
│ ├── CMakeLists.txt
│ ├── llama-adapter.cpp
│ ├── llama-adapter.h
│ ├── llama-arch.cpp
│ ├── llama-arch.h
│ ├── llama-batch.cpp
│ ├── llama-batch.h
│ ├── llama-chat.cpp
│ ├── llama-chat.h
│ ├── llama-context.cpp
│ ├── llama-context.h
│ ├── llama-cparams.cpp
│ ├── llama-cparams.h
│ ├── llama-ext.h
│ ├── llama-grammar.cpp
│ ├── llama-grammar.h
│ ├── llama-graph.cpp
│ ├── llama-graph.h
│ ├── llama-hparams.cpp
│ ├── llama-hparams.h
│ ├── llama-impl.cpp
│ ├── llama-impl.h
│ ├── llama-io.cpp
│ ├── llama-io.h
│ ├── llama-kv-cache-iswa.cpp
│ ├── llama-kv-cache-iswa.h
│ ├── llama-kv-cache.cpp
│ ├── llama-kv-cache.h
│ ├── llama-kv-cells.h
│ ├── llama-memory-hybrid-iswa.cpp
│ ├── llama-memory-hybrid-iswa.h
│ ├── llama-memory-hybrid.cpp
│ ├── llama-memory-hybrid.h
│ ├── llama-memory-recurrent.cpp
│ ├── llama-memory-recurrent.h
│ ├── llama-memory.cpp
│ ├── llama-memory.h
│ ├── llama-mmap.cpp
│ ├── llama-mmap.h
│ ├── llama-model-loader.cpp
│ ├── llama-model-loader.h
│ ├── llama-model-saver.cpp
│ ├── llama-model-saver.h
│ ├── llama-model.cpp
│ ├── llama-model.h
│ ├── llama-quant.cpp
│ ├── llama-quant.h
│ ├── llama-sampler.cpp
│ ├── llama-sampler.h
│ ├── llama-vocab.cpp
│ ├── llama-vocab.h
│ ├── llama.cpp
│ ├── models/
│ │ ├── afmoe.cpp
│ │ ├── apertus.cpp
│ │ ├── arcee.cpp
│ │ ├── arctic.cpp
│ │ ├── arwkv7.cpp
│ │ ├── baichuan.cpp
│ │ ├── bailingmoe.cpp
│ │ ├── bailingmoe2.cpp
│ │ ├── bert.cpp
│ │ ├── bitnet.cpp
│ │ ├── bloom.cpp
│ │ ├── chameleon.cpp
│ │ ├── chatglm.cpp
│ │ ├── codeshell.cpp
│ │ ├── cogvlm.cpp
│ │ ├── cohere2-iswa.cpp
│ │ ├── command-r.cpp
│ │ ├── dbrx.cpp
│ │ ├── deci.cpp
│ │ ├── deepseek.cpp
│ │ ├── deepseek2.cpp
│ │ ├── delta-net-base.cpp
│ │ ├── dots1.cpp
│ │ ├── dream.cpp
│ │ ├── ernie4-5-moe.cpp
│ │ ├── ernie4-5.cpp
│ │ ├── eurobert.cpp
│ │ ├── exaone-moe.cpp
│ │ ├── exaone.cpp
│ │ ├── exaone4.cpp
│ │ ├── falcon-h1.cpp
│ │ ├── falcon.cpp
│ │ ├── gemma-embedding.cpp
│ │ ├── gemma.cpp
│ │ ├── gemma2-iswa.cpp
│ │ ├── gemma3.cpp
│ │ ├── gemma3n-iswa.cpp
│ │ ├── glm4-moe.cpp
│ │ ├── glm4.cpp
│ │ ├── gpt2.cpp
│ │ ├── gptneox.cpp
│ │ ├── granite-hybrid.cpp
│ │ ├── granite.cpp
│ │ ├── grok.cpp
│ │ ├── grovemoe.cpp
│ │ ├── hunyuan-dense.cpp
│ │ ├── hunyuan-moe.cpp
│ │ ├── internlm2.cpp
│ │ ├── jais.cpp
│ │ ├── jais2.cpp
│ │ ├── jamba.cpp
│ │ ├── kimi-linear.cpp
│ │ ├── lfm2.cpp
│ │ ├── llada-moe.cpp
│ │ ├── llada.cpp
│ │ ├── llama-iswa.cpp
│ │ ├── llama.cpp
│ │ ├── maincoder.cpp
│ │ ├── mamba-base.cpp
│ │ ├── mamba.cpp
│ │ ├── mimo2-iswa.cpp
│ │ ├── minicpm3.cpp
│ │ ├── minimax-m2.cpp
│ │ ├── mistral3.cpp
│ │ ├── models.h
│ │ ├── modern-bert.cpp
│ │ ├── mpt.cpp
│ │ ├── nemotron-h.cpp
│ │ ├── nemotron.cpp
│ │ ├── neo-bert.cpp
│ │ ├── olmo.cpp
│ │ ├── olmo2.cpp
│ │ ├── olmoe.cpp
│ │ ├── openai-moe-iswa.cpp
│ │ ├── openelm.cpp
│ │ ├── orion.cpp
│ │ ├── paddleocr.cpp
│ │ ├── pangu-embedded.cpp
│ │ ├── phi2.cpp
│ │ ├── phi3.cpp
│ │ ├── plamo.cpp
│ │ ├── plamo2.cpp
│ │ ├── plamo3.cpp
│ │ ├── plm.cpp
│ │ ├── qwen.cpp
│ │ ├── qwen2.cpp
│ │ ├── qwen2moe.cpp
│ │ ├── qwen2vl.cpp
│ │ ├── qwen3.cpp
│ │ ├── qwen35.cpp
│ │ ├── qwen35moe.cpp
│ │ ├── qwen3moe.cpp
│ │ ├── qwen3next.cpp
│ │ ├── qwen3vl-moe.cpp
│ │ ├── qwen3vl.cpp
│ │ ├── refact.cpp
│ │ ├── rnd1.cpp
│ │ ├── rwkv6-base.cpp
│ │ ├── rwkv6.cpp
│ │ ├── rwkv6qwen2.cpp
│ │ ├── rwkv7-base.cpp
│ │ ├── rwkv7.cpp
│ │ ├── seed-oss.cpp
│ │ ├── smallthinker.cpp
│ │ ├── smollm3.cpp
│ │ ├── stablelm.cpp
│ │ ├── starcoder.cpp
│ │ ├── starcoder2.cpp
│ │ ├── step35-iswa.cpp
│ │ ├── t5-dec.cpp
│ │ ├── t5-enc.cpp
│ │ ├── wavtokenizer-dec.cpp
│ │ └── xverse.cpp
│ ├── unicode-data.cpp
│ ├── unicode-data.h
│ ├── unicode.cpp
│ └── unicode.h
├── tests/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── export-graph-ops.cpp
│ ├── get-model.cpp
│ ├── get-model.h
│ ├── gguf-model-data.cpp
│ ├── gguf-model-data.h
│ ├── peg-parser/
│ │ ├── simple-tokenize.cpp
│ │ ├── simple-tokenize.h
│ │ ├── test-basic.cpp
│ │ ├── test-gbnf-generation.cpp
│ │ ├── test-json-parser.cpp
│ │ ├── test-json-serialization.cpp
│ │ ├── test-python-dict-parser.cpp
│ │ ├── test-unicode.cpp
│ │ └── tests.h
│ ├── run-json-schema-to-grammar.mjs
│ ├── test-alloc.cpp
│ ├── test-arg-parser.cpp
│ ├── test-autorelease.cpp
│ ├── test-backend-ops.cpp
│ ├── test-backend-sampler.cpp
│ ├── test-barrier.cpp
│ ├── test-c.c
│ ├── test-chat-auto-parser.cpp
│ ├── test-chat-peg-parser.cpp
│ ├── test-chat-template.cpp
│ ├── test-chat.cpp
│ ├── test-double-float.cpp
│ ├── test-gbnf-validator.cpp
│ ├── test-gguf-model-data.cpp
│ ├── test-gguf.cpp
│ ├── test-grammar-integration.cpp
│ ├── test-grammar-llguidance.cpp
│ ├── test-grammar-parser.cpp
│ ├── test-jinja.cpp
│ ├── test-json-partial.cpp
│ ├── test-json-schema-to-grammar.cpp
│ ├── test-llama-archs.cpp
│ ├── test-llama-grammar.cpp
│ ├── test-log.cpp
│ ├── test-lora-conversion-inference.sh
│ ├── test-model-load-cancel.cpp
│ ├── test-mtmd-c-api.c
│ ├── test-opt.cpp
│ ├── test-peg-parser.cpp
│ ├── test-quantize-fns.cpp
│ ├── test-quantize-perf.cpp
│ ├── test-quantize-stats.cpp
│ ├── test-reasoning-budget.cpp
│ ├── test-regex-partial.cpp
│ ├── test-rope.cpp
│ ├── test-sampling.cpp
│ ├── test-state-restore-fragmented.cpp
│ ├── test-thread-safety.cpp
│ ├── test-tokenizer-0.cpp
│ ├── test-tokenizer-0.py
│ ├── test-tokenizer-0.sh
│ ├── test-tokenizer-1-bpe.cpp
│ ├── test-tokenizer-1-spm.cpp
│ ├── test-tokenizer-random.py
│ ├── test-tokenizers-repo.sh
│ └── testing.h
├── tools/
│ ├── CMakeLists.txt
│ ├── batched-bench/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── batched-bench.cpp
│ ├── cli/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── cli.cpp
│ ├── completion/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── completion.cpp
│ ├── cvector-generator/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── completions.txt
│ │ ├── cvector-generator.cpp
│ │ ├── mean.hpp
│ │ ├── negative.txt
│ │ ├── pca.hpp
│ │ └── positive.txt
│ ├── export-lora/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── export-lora.cpp
│ ├── fit-params/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── fit-params.cpp
│ ├── gguf-split/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── gguf-split.cpp
│ │ └── tests.sh
│ ├── imatrix/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── imatrix.cpp
│ ├── llama-bench/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── llama-bench.cpp
│ ├── mtmd/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── clip-graph.h
│ │ ├── clip-impl.h
│ │ ├── clip-model.h
│ │ ├── clip.cpp
│ │ ├── clip.h
│ │ ├── debug/
│ │ │ ├── mtmd-debug.cpp
│ │ │ ├── mtmd-debug.h
│ │ │ └── mtmd-debug.md
│ │ ├── deprecation-warning.cpp
│ │ ├── legacy-models/
│ │ │ ├── convert_image_encoder_to_gguf.py
│ │ │ ├── glmedge-convert-image-encoder-to-gguf.py
│ │ │ ├── glmedge-surgery.py
│ │ │ ├── llava_surgery.py
│ │ │ ├── llava_surgery_v2.py
│ │ │ ├── minicpmv-convert-image-encoder-to-gguf.py
│ │ │ └── minicpmv-surgery.py
│ │ ├── models/
│ │ │ ├── cogvlm.cpp
│ │ │ ├── conformer.cpp
│ │ │ ├── deepseekocr.cpp
│ │ │ ├── glm4v.cpp
│ │ │ ├── internvl.cpp
│ │ │ ├── kimik25.cpp
│ │ │ ├── kimivl.cpp
│ │ │ ├── llama4.cpp
│ │ │ ├── llava.cpp
│ │ │ ├── minicpmv.cpp
│ │ │ ├── mobilenetv5.cpp
│ │ │ ├── models.h
│ │ │ ├── nemotron-v2-vl.cpp
│ │ │ ├── paddleocr.cpp
│ │ │ ├── pixtral.cpp
│ │ │ ├── qwen2vl.cpp
│ │ │ ├── qwen3vl.cpp
│ │ │ ├── siglip.cpp
│ │ │ ├── whisper-enc.cpp
│ │ │ └── youtuvl.cpp
│ │ ├── mtmd-audio.cpp
│ │ ├── mtmd-audio.h
│ │ ├── mtmd-cli.cpp
│ │ ├── mtmd-helper.cpp
│ │ ├── mtmd-helper.h
│ │ ├── mtmd-image.cpp
│ │ ├── mtmd-image.h
│ │ ├── mtmd.cpp
│ │ ├── mtmd.h
│ │ ├── requirements.txt
│ │ ├── tests/
│ │ │ ├── test-1-extracted.md
│ │ │ ├── test-1-extracted.txt
│ │ │ ├── test-deepseek-ocr.py
│ │ │ └── tests-requirements.txt
│ │ └── tests.sh
│ ├── parser/
│ │ ├── CMakeLists.txt
│ │ ├── debug-template-parser.cpp
│ │ └── template-analysis.cpp
│ ├── perplexity/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── perplexity.cpp
│ ├── quantize/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── quantize.cpp
│ │ └── tests.sh
│ ├── results/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── results.cpp
│ ├── rpc/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── rpc-server.cpp
│ ├── server/
│ │ ├── CMakeLists.txt
│ │ ├── README-dev.md
│ │ ├── README.md
│ │ ├── bench/
│ │ │ ├── README.md
│ │ │ ├── bench.py
│ │ │ ├── prometheus.yml
│ │ │ ├── requirements.txt
│ │ │ └── script.js
│ │ ├── chat-llama2.sh
│ │ ├── chat.mjs
│ │ ├── chat.sh
│ │ ├── public/
│ │ │ ├── bundle.css
│ │ │ ├── bundle.js
│ │ │ ├── index.html
│ │ │ └── loading.html
│ │ ├── public_legacy/
│ │ │ ├── colorthemes.css
│ │ │ ├── completion.js
│ │ │ ├── index-new.html
│ │ │ ├── index.html
│ │ │ ├── index.js
│ │ │ ├── json-schema-to-grammar.mjs
│ │ │ ├── loading.html
│ │ │ ├── prompt-formats.js
│ │ │ ├── style.css
│ │ │ ├── system-prompts.js
│ │ │ ├── theme-beeninorder.css
│ │ │ ├── theme-ketivah.css
│ │ │ ├── theme-mangotango.css
│ │ │ ├── theme-playground.css
│ │ │ ├── theme-polarnight.css
│ │ │ └── theme-snowstorm.css
│ │ ├── public_simplechat/
│ │ │ ├── datautils.mjs
│ │ │ ├── index.html
│ │ │ ├── readme.md
│ │ │ ├── simplechat.css
│ │ │ ├── simplechat.js
│ │ │ └── ui.mjs
│ │ ├── server-common.cpp
│ │ ├── server-common.h
│ │ ├── server-context.cpp
│ │ ├── server-context.h
│ │ ├── server-cors-proxy.h
│ │ ├── server-http.cpp
│ │ ├── server-http.h
│ │ ├── server-models.cpp
│ │ ├── server-models.h
│ │ ├── server-queue.cpp
│ │ ├── server-queue.h
│ │ ├── server-task.cpp
│ │ ├── server-task.h
│ │ ├── server-tools.cpp
│ │ ├── server-tools.h
│ │ ├── server.cpp
│ │ ├── tests/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── conftest.py
│ │ │ ├── pytest.ini
│ │ │ ├── requirements.txt
│ │ │ ├── tests.sh
│ │ │ ├── unit/
│ │ │ │ ├── test_basic.py
│ │ │ │ ├── test_chat_completion.py
│ │ │ │ ├── test_compat_anthropic.py
│ │ │ │ ├── test_compat_oai_responses.py
│ │ │ │ ├── test_completion.py
│ │ │ │ ├── test_ctx_shift.py
│ │ │ │ ├── test_embedding.py
│ │ │ │ ├── test_infill.py
│ │ │ │ ├── test_lora.py
│ │ │ │ ├── test_proxy.py
│ │ │ │ ├── test_rerank.py
│ │ │ │ ├── test_router.py
│ │ │ │ ├── test_security.py
│ │ │ │ ├── test_sleep.py
│ │ │ │ ├── test_slot_save.py
│ │ │ │ ├── test_speculative.py
│ │ │ │ ├── test_template.py
│ │ │ │ ├── test_tokenize.py
│ │ │ │ ├── test_tool_call.py
│ │ │ │ └── test_vision_api.py
│ │ │ └── utils.py
│ │ ├── themes/
│ │ │ ├── README.md
│ │ │ ├── buttons-top/
│ │ │ │ ├── README.md
│ │ │ │ └── index.html
│ │ │ └── wild/
│ │ │ ├── README.md
│ │ │ └── index.html
│ │ └── webui/
│ │ ├── .gitignore
│ │ ├── .npmrc
│ │ ├── .prettierignore
│ │ ├── .prettierrc
│ │ ├── .storybook/
│ │ │ ├── ModeWatcherDecorator.svelte
│ │ │ ├── TooltipProviderDecorator.svelte
│ │ │ ├── main.ts
│ │ │ ├── preview.ts
│ │ │ └── vitest.setup.ts
│ │ ├── README.md
│ │ ├── components.json
│ │ ├── docs/
│ │ │ ├── architecture/
│ │ │ │ ├── high-level-architecture-simplified.md
│ │ │ │ └── high-level-architecture.md
│ │ │ └── flows/
│ │ │ ├── chat-flow.md
│ │ │ ├── conversations-flow.md
│ │ │ ├── data-flow-simplified-model-mode.md
│ │ │ ├── data-flow-simplified-router-mode.md
│ │ │ ├── database-flow.md
│ │ │ ├── mcp-flow.md
│ │ │ ├── models-flow.md
│ │ │ ├── server-flow.md
│ │ │ └── settings-flow.md
│ │ ├── eslint.config.js
│ │ ├── package.json
│ │ ├── playwright.config.ts
│ │ ├── scripts/
│ │ │ ├── dev.sh
│ │ │ ├── install-git-hooks.sh
│ │ │ └── post-build.sh
│ │ ├── src/
│ │ │ ├── app.css
│ │ │ ├── app.d.ts
│ │ │ ├── app.html
│ │ │ ├── lib/
│ │ │ │ ├── actions/
│ │ │ │ │ └── fade-in-view.svelte.ts
│ │ │ │ ├── components/
│ │ │ │ │ ├── app/
│ │ │ │ │ │ ├── actions/
│ │ │ │ │ │ │ ├── ActionIcon.svelte
│ │ │ │ │ │ │ ├── ActionIconCopyToClipboard.svelte
│ │ │ │ │ │ │ ├── ActionIconRemove.svelte
│ │ │ │ │ │ │ ├── ActionIconsCodeBlock.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── badges/
│ │ │ │ │ │ │ ├── BadgeChatStatistic.svelte
│ │ │ │ │ │ │ ├── BadgeInfo.svelte
│ │ │ │ │ │ │ ├── BadgeModality.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── chat/
│ │ │ │ │ │ │ ├── ChatAttachments/
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpPrompt.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpResource.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpResources.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentPreview.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentThumbnailFile.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentThumbnailImage.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentsList.svelte
│ │ │ │ │ │ │ │ └── ChatAttachmentsViewAll.svelte
│ │ │ │ │ │ │ ├── ChatForm/
│ │ │ │ │ │ │ │ ├── ChatForm.svelte
│ │ │ │ │ │ │ │ ├── ChatFormActions/
│ │ │ │ │ │ │ │ │ ├── ChatFormActionAttachmentsDropdown.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionAttachmentsSheet.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionRecord.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionSubmit.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormActions.svelte
│ │ │ │ │ │ │ │ ├── ChatFormFileInputInvisible.svelte
│ │ │ │ │ │ │ │ ├── ChatFormHelperText.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPicker/
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerItemHeader.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerList.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerListItem.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormPickerListItemSkeleton.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPickerPopover.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPromptPicker/
│ │ │ │ │ │ │ │ │ ├── ChatFormPromptPicker.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPromptPickerArgumentForm.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormPromptPickerArgumentInput.svelte
│ │ │ │ │ │ │ │ ├── ChatFormResourcePicker/
│ │ │ │ │ │ │ │ │ └── ChatFormResourcePicker.svelte
│ │ │ │ │ │ │ │ └── ChatFormTextarea.svelte
│ │ │ │ │ │ │ ├── ChatMessages/
│ │ │ │ │ │ │ │ ├── ChatMessage.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageActions.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageAgenticContent.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageAssistant.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageBranchingControls.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageEditForm.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageMcpPrompt.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageMcpPromptContent.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageStatistics.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageSystem.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageUser.svelte
│ │ │ │ │ │ │ │ └── ChatMessages.svelte
│ │ │ │ │ │ │ ├── ChatScreen/
│ │ │ │ │ │ │ │ ├── ChatScreen.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenDragOverlay.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenForm.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenHeader.svelte
│ │ │ │ │ │ │ │ └── ChatScreenProcessingInfo.svelte
│ │ │ │ │ │ │ ├── ChatSettings/
│ │ │ │ │ │ │ │ ├── ChatSettings.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsFields.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsFooter.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsImportExportTab.svelte
│ │ │ │ │ │ │ │ └── ChatSettingsParameterSourceIndicator.svelte
│ │ │ │ │ │ │ ├── ChatSidebar/
│ │ │ │ │ │ │ │ ├── ChatSidebar.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarActions.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarConversationItem.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarSearch.svelte
│ │ │ │ │ │ │ │ └── handle-mobile-sidebar-item-click.ts
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── content/
│ │ │ │ │ │ │ ├── CollapsibleContentBlock.svelte
│ │ │ │ │ │ │ ├── MarkdownContent.svelte
│ │ │ │ │ │ │ ├── SyntaxHighlightedCode.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── dialogs/
│ │ │ │ │ │ │ ├── DialogChatAttachmentPreview.svelte
│ │ │ │ │ │ │ ├── DialogChatAttachmentsViewAll.svelte
│ │ │ │ │ │ │ ├── DialogChatError.svelte
│ │ │ │ │ │ │ ├── DialogChatSettings.svelte
│ │ │ │ │ │ │ ├── DialogCodePreview.svelte
│ │ │ │ │ │ │ ├── DialogConfirmation.svelte
│ │ │ │ │ │ │ ├── DialogConversationSelection.svelte
│ │ │ │ │ │ │ ├── DialogConversationTitleUpdate.svelte
│ │ │ │ │ │ │ ├── DialogEmptyFileAlert.svelte
│ │ │ │ │ │ │ ├── DialogMcpResourcePreview.svelte
│ │ │ │ │ │ │ ├── DialogMcpResources.svelte
│ │ │ │ │ │ │ ├── DialogMcpServersSettings.svelte
│ │ │ │ │ │ │ ├── DialogModelInformation.svelte
│ │ │ │ │ │ │ ├── DialogModelNotAvailable.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── forms/
│ │ │ │ │ │ │ ├── InputWithSuggestions.svelte
│ │ │ │ │ │ │ ├── KeyValuePairs.svelte
│ │ │ │ │ │ │ ├── SearchInput.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── mcp/
│ │ │ │ │ │ │ ├── McpActiveServersAvatars.svelte
│ │ │ │ │ │ │ ├── McpCapabilitiesBadges.svelte
│ │ │ │ │ │ │ ├── McpConnectionLogs.svelte
│ │ │ │ │ │ │ ├── McpLogo.svelte
│ │ │ │ │ │ │ ├── McpResourceBrowser/
│ │ │ │ │ │ │ │ ├── McpResourceBrowser.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserEmptyState.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserHeader.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserServerItem.svelte
│ │ │ │ │ │ │ │ └── mcp-resource-browser.ts
│ │ │ │ │ │ │ ├── McpResourcePreview.svelte
│ │ │ │ │ │ │ ├── McpResourceTemplateForm.svelte
│ │ │ │ │ │ │ ├── McpServerCard/
│ │ │ │ │ │ │ │ ├── McpServerCard.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardActions.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardDeleteDialog.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardEditForm.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardHeader.svelte
│ │ │ │ │ │ │ │ └── McpServerCardToolsList.svelte
│ │ │ │ │ │ │ ├── McpServerCardSkeleton.svelte
│ │ │ │ │ │ │ ├── McpServerForm.svelte
│ │ │ │ │ │ │ ├── McpServerInfo.svelte
│ │ │ │ │ │ │ ├── McpServersSelector.svelte
│ │ │ │ │ │ │ ├── McpServersSettings.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── misc/
│ │ │ │ │ │ │ ├── ConversationSelection.svelte
│ │ │ │ │ │ │ ├── HorizontalScrollCarousel.svelte
│ │ │ │ │ │ │ ├── KeyboardShortcutInfo.svelte
│ │ │ │ │ │ │ ├── TruncatedText.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── models/
│ │ │ │ │ │ │ ├── ModelBadge.svelte
│ │ │ │ │ │ │ ├── ModelId.svelte
│ │ │ │ │ │ │ ├── ModelsSelector.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorList.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorOption.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorSheet.svelte
│ │ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ │ └── utils.ts
│ │ │ │ │ │ ├── navigation/
│ │ │ │ │ │ │ ├── DropdownMenuActions.svelte
│ │ │ │ │ │ │ ├── DropdownMenuSearchable.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ └── server/
│ │ │ │ │ │ ├── ServerErrorSplash.svelte
│ │ │ │ │ │ ├── ServerLoadingSplash.svelte
│ │ │ │ │ │ ├── ServerStatus.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ └── ui/
│ │ │ │ │ ├── alert/
│ │ │ │ │ │ ├── alert-description.svelte
│ │ │ │ │ │ ├── alert-title.svelte
│ │ │ │ │ │ ├── alert.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── alert-dialog/
│ │ │ │ │ │ ├── alert-dialog-action.svelte
│ │ │ │ │ │ ├── alert-dialog-cancel.svelte
│ │ │ │ │ │ ├── alert-dialog-content.svelte
│ │ │ │ │ │ ├── alert-dialog-description.svelte
│ │ │ │ │ │ ├── alert-dialog-footer.svelte
│ │ │ │ │ │ ├── alert-dialog-header.svelte
│ │ │ │ │ │ ├── alert-dialog-overlay.svelte
│ │ │ │ │ │ ├── alert-dialog-title.svelte
│ │ │ │ │ │ ├── alert-dialog-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── badge/
│ │ │ │ │ │ ├── badge.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── button/
│ │ │ │ │ │ ├── button.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── card/
│ │ │ │ │ │ ├── card-action.svelte
│ │ │ │ │ │ ├── card-content.svelte
│ │ │ │ │ │ ├── card-description.svelte
│ │ │ │ │ │ ├── card-footer.svelte
│ │ │ │ │ │ ├── card-header.svelte
│ │ │ │ │ │ ├── card-title.svelte
│ │ │ │ │ │ ├── card.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── checkbox/
│ │ │ │ │ │ ├── checkbox.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── collapsible/
│ │ │ │ │ │ ├── collapsible-content.svelte
│ │ │ │ │ │ ├── collapsible-trigger.svelte
│ │ │ │ │ │ ├── collapsible.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── dialog/
│ │ │ │ │ │ ├── dialog-close.svelte
│ │ │ │ │ │ ├── dialog-content.svelte
│ │ │ │ │ │ ├── dialog-description.svelte
│ │ │ │ │ │ ├── dialog-footer.svelte
│ │ │ │ │ │ ├── dialog-header.svelte
│ │ │ │ │ │ ├── dialog-overlay.svelte
│ │ │ │ │ │ ├── dialog-title.svelte
│ │ │ │ │ │ ├── dialog-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── dropdown-menu/
│ │ │ │ │ │ ├── dropdown-menu-checkbox-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-content.svelte
│ │ │ │ │ │ ├── dropdown-menu-group-heading.svelte
│ │ │ │ │ │ ├── dropdown-menu-group.svelte
│ │ │ │ │ │ ├── dropdown-menu-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-label.svelte
│ │ │ │ │ │ ├── dropdown-menu-radio-group.svelte
│ │ │ │ │ │ ├── dropdown-menu-radio-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-separator.svelte
│ │ │ │ │ │ ├── dropdown-menu-shortcut.svelte
│ │ │ │ │ │ ├── dropdown-menu-sub-content.svelte
│ │ │ │ │ │ ├── dropdown-menu-sub-trigger.svelte
│ │ │ │ │ │ ├── dropdown-menu-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── input/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── input.svelte
│ │ │ │ │ ├── label/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── label.svelte
│ │ │ │ │ ├── popover/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── popover-close.svelte
│ │ │ │ │ │ ├── popover-content.svelte
│ │ │ │ │ │ ├── popover-portal.svelte
│ │ │ │ │ │ ├── popover-trigger.svelte
│ │ │ │ │ │ └── popover.svelte
│ │ │ │ │ ├── scroll-area/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── scroll-area-scrollbar.svelte
│ │ │ │ │ │ └── scroll-area.svelte
│ │ │ │ │ ├── select/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── select-content.svelte
│ │ │ │ │ │ ├── select-group-heading.svelte
│ │ │ │ │ │ ├── select-group.svelte
│ │ │ │ │ │ ├── select-item.svelte
│ │ │ │ │ │ ├── select-label.svelte
│ │ │ │ │ │ ├── select-scroll-down-button.svelte
│ │ │ │ │ │ ├── select-scroll-up-button.svelte
│ │ │ │ │ │ ├── select-separator.svelte
│ │ │ │ │ │ └── select-trigger.svelte
│ │ │ │ │ ├── separator/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── separator.svelte
│ │ │ │ │ ├── sheet/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── sheet-close.svelte
│ │ │ │ │ │ ├── sheet-content.svelte
│ │ │ │ │ │ ├── sheet-description.svelte
│ │ │ │ │ │ ├── sheet-footer.svelte
│ │ │ │ │ │ ├── sheet-header.svelte
│ │ │ │ │ │ ├── sheet-overlay.svelte
│ │ │ │ │ │ ├── sheet-title.svelte
│ │ │ │ │ │ └── sheet-trigger.svelte
│ │ │ │ │ ├── sidebar/
│ │ │ │ │ │ ├── constants.ts
│ │ │ │ │ │ ├── context.svelte.ts
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── sidebar-content.svelte
│ │ │ │ │ │ ├── sidebar-footer.svelte
│ │ │ │ │ │ ├── sidebar-group-action.svelte
│ │ │ │ │ │ ├── sidebar-group-content.svelte
│ │ │ │ │ │ ├── sidebar-group-label.svelte
│ │ │ │ │ │ ├── sidebar-group.svelte
│ │ │ │ │ │ ├── sidebar-header.svelte
│ │ │ │ │ │ ├── sidebar-input.svelte
│ │ │ │ │ │ ├── sidebar-inset.svelte
│ │ │ │ │ │ ├── sidebar-menu-action.svelte
│ │ │ │ │ │ ├── sidebar-menu-badge.svelte
│ │ │ │ │ │ ├── sidebar-menu-button.svelte
│ │ │ │ │ │ ├── sidebar-menu-item.svelte
│ │ │ │ │ │ ├── sidebar-menu-skeleton.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub-button.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub-item.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub.svelte
│ │ │ │ │ │ ├── sidebar-menu.svelte
│ │ │ │ │ │ ├── sidebar-provider.svelte
│ │ │ │ │ │ ├── sidebar-rail.svelte
│ │ │ │ │ │ ├── sidebar-separator.svelte
│ │ │ │ │ │ ├── sidebar-trigger.svelte
│ │ │ │ │ │ └── sidebar.svelte
│ │ │ │ │ ├── skeleton/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── skeleton.svelte
│ │ │ │ │ ├── switch/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── switch.svelte
│ │ │ │ │ ├── table/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── table-body.svelte
│ │ │ │ │ │ ├── table-caption.svelte
│ │ │ │ │ │ ├── table-cell.svelte
│ │ │ │ │ │ ├── table-footer.svelte
│ │ │ │ │ │ ├── table-head.svelte
│ │ │ │ │ │ ├── table-header.svelte
│ │ │ │ │ │ ├── table-row.svelte
│ │ │ │ │ │ └── table.svelte
│ │ │ │ │ ├── textarea/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── textarea.svelte
│ │ │ │ │ ├── tooltip/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── tooltip-content.svelte
│ │ │ │ │ │ └── tooltip-trigger.svelte
│ │ │ │ │ └── utils.ts
│ │ │ │ ├── constants/
│ │ │ │ │ ├── agentic.ts
│ │ │ │ │ ├── api-endpoints.ts
│ │ │ │ │ ├── attachment-labels.ts
│ │ │ │ │ ├── auto-scroll.ts
│ │ │ │ │ ├── binary-detection.ts
│ │ │ │ │ ├── cache.ts
│ │ │ │ │ ├── chat-form.ts
│ │ │ │ │ ├── code-blocks.ts
│ │ │ │ │ ├── code.ts
│ │ │ │ │ ├── context-keys.ts
│ │ │ │ │ ├── css-classes.ts
│ │ │ │ │ ├── favicon.ts
│ │ │ │ │ ├── floating-ui-constraints.ts
│ │ │ │ │ ├── formatters.ts
│ │ │ │ │ ├── icons.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── key-value-pairs.ts
│ │ │ │ │ ├── latex-protection.ts
│ │ │ │ │ ├── literal-html.ts
│ │ │ │ │ ├── localstorage-keys.ts
│ │ │ │ │ ├── markdown.ts
│ │ │ │ │ ├── max-bundle-size.ts
│ │ │ │ │ ├── mcp-form.ts
│ │ │ │ │ ├── mcp-resource.ts
│ │ │ │ │ ├── mcp.ts
│ │ │ │ │ ├── message-export.ts
│ │ │ │ │ ├── model-id.ts
│ │ │ │ │ ├── precision.ts
│ │ │ │ │ ├── processing-info.ts
│ │ │ │ │ ├── settings-config.ts
│ │ │ │ │ ├── settings-fields.ts
│ │ │ │ │ ├── settings-keys.ts
│ │ │ │ │ ├── settings-sections.ts
│ │ │ │ │ ├── supported-file-types.ts
│ │ │ │ │ ├── table-html-restorer.ts
│ │ │ │ │ ├── tooltip-config.ts
│ │ │ │ │ ├── ui.ts
│ │ │ │ │ ├── uri-template.ts
│ │ │ │ │ └── viewport.ts
│ │ │ │ ├── contexts/
│ │ │ │ │ ├── chat-actions.context.ts
│ │ │ │ │ ├── chat-settings-dialog.context.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── message-edit.context.ts
│ │ │ │ ├── enums/
│ │ │ │ │ ├── agentic.ts
│ │ │ │ │ ├── attachment.ts
│ │ │ │ │ ├── chat.ts
│ │ │ │ │ ├── files.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── keyboard.ts
│ │ │ │ │ ├── mcp.ts
│ │ │ │ │ ├── model.ts
│ │ │ │ │ ├── server.ts
│ │ │ │ │ ├── settings.ts
│ │ │ │ │ └── ui.ts
│ │ │ │ ├── hooks/
│ │ │ │ │ ├── is-mobile.svelte.ts
│ │ │ │ │ ├── use-auto-scroll.svelte.ts
│ │ │ │ │ └── use-processing-state.svelte.ts
│ │ │ │ ├── markdown/
│ │ │ │ │ ├── enhance-code-blocks.ts
│ │ │ │ │ ├── enhance-links.ts
│ │ │ │ │ ├── literal-html.ts
│ │ │ │ │ ├── resolve-attachment-images.ts
│ │ │ │ │ └── table-html-restorer.ts
│ │ │ │ ├── services/
│ │ │ │ │ ├── chat.service.ts
│ │ │ │ │ ├── database.service.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── mcp.service.ts
│ │ │ │ │ ├── models.service.ts
│ │ │ │ │ ├── parameter-sync.service.spec.ts
│ │ │ │ │ ├── parameter-sync.service.ts
│ │ │ │ │ └── props.service.ts
│ │ │ │ ├── stores/
│ │ │ │ │ ├── agentic.svelte.ts
│ │ │ │ │ ├── chat.svelte.ts
│ │ │ │ │ ├── conversations.svelte.ts
│ │ │ │ │ ├── mcp-resources.svelte.ts
│ │ │ │ │ ├── mcp.svelte.ts
│ │ │ │ │ ├── models.svelte.ts
│ │ │ │ │ ├── persisted.svelte.ts
│ │ │ │ │ ├── server.svelte.ts
│ │ │ │ │ └── settings.svelte.ts
│ │ │ │ ├── types/
│ │ │ │ │ ├── agentic.d.ts
│ │ │ │ │ ├── api.d.ts
│ │ │ │ │ ├── chat.d.ts
│ │ │ │ │ ├── common.d.ts
│ │ │ │ │ ├── database.d.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── mcp.d.ts
│ │ │ │ │ ├── models.d.ts
│ │ │ │ │ └── settings.d.ts
│ │ │ │ └── utils/
│ │ │ │ ├── abort.ts
│ │ │ │ ├── agentic.ts
│ │ │ │ ├── api-fetch.ts
│ │ │ │ ├── api-headers.ts
│ │ │ │ ├── api-key-validation.ts
│ │ │ │ ├── attachment-display.ts
│ │ │ │ ├── attachment-type.ts
│ │ │ │ ├── audio-recording.ts
│ │ │ │ ├── autoresize-textarea.ts
│ │ │ │ ├── branching.ts
│ │ │ │ ├── browser-only.ts
│ │ │ │ ├── cache-ttl.ts
│ │ │ │ ├── clipboard.ts
│ │ │ │ ├── code.ts
│ │ │ │ ├── config-helpers.ts
│ │ │ │ ├── conversation-utils.ts
│ │ │ │ ├── convert-files-to-extra.ts
│ │ │ │ ├── cors-proxy.ts
│ │ │ │ ├── data-url.ts
│ │ │ │ ├── debounce.ts
│ │ │ │ ├── favicon.ts
│ │ │ │ ├── file-preview.ts
│ │ │ │ ├── file-type.ts
│ │ │ │ ├── formatters.ts
│ │ │ │ ├── headers.ts
│ │ │ │ ├── image-error-fallback.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── is-ime-composing.ts
│ │ │ │ ├── latex-protection.ts
│ │ │ │ ├── legacy-migration.ts
│ │ │ │ ├── mcp.ts
│ │ │ │ ├── modality-file-validation.ts
│ │ │ │ ├── model-names.ts
│ │ │ │ ├── pdf-processing.ts
│ │ │ │ ├── portal-to-body.ts
│ │ │ │ ├── precision.ts
│ │ │ │ ├── process-uploaded-files.ts
│ │ │ │ ├── sanitize.ts
│ │ │ │ ├── svg-to-png.ts
│ │ │ │ ├── syntax-highlight-language.ts
│ │ │ │ ├── text-files.ts
│ │ │ │ ├── text.ts
│ │ │ │ ├── uri-template.ts
│ │ │ │ ├── uuid.ts
│ │ │ │ └── webp-to-png.ts
│ │ │ ├── routes/
│ │ │ │ ├── +error.svelte
│ │ │ │ ├── +layout.svelte
│ │ │ │ ├── +page.svelte
│ │ │ │ ├── +page.ts
│ │ │ │ └── chat/
│ │ │ │ └── [id]/
│ │ │ │ ├── +page.svelte
│ │ │ │ └── +page.ts
│ │ │ └── styles/
│ │ │ └── katex-custom.scss
│ │ ├── static/
│ │ │ └── loading.html
│ │ ├── svelte.config.js
│ │ ├── tests/
│ │ │ ├── client/
│ │ │ │ ├── components/
│ │ │ │ │ └── TestWrapper.svelte
│ │ │ │ └── page.svelte.test.ts
│ │ │ ├── e2e/
│ │ │ │ └── demo.test.ts
│ │ │ ├── stories/
│ │ │ │ ├── ChatMessage.stories.svelte
│ │ │ │ ├── ChatScreenForm.stories.svelte
│ │ │ │ ├── ChatSettings.stories.svelte
│ │ │ │ ├── ChatSidebar.stories.svelte
│ │ │ │ ├── Introduction.mdx
│ │ │ │ ├── MarkdownContent.stories.svelte
│ │ │ │ └── fixtures/
│ │ │ │ ├── ai-tutorial.ts
│ │ │ │ ├── api-docs.ts
│ │ │ │ ├── blog-post.ts
│ │ │ │ ├── data-analysis.ts
│ │ │ │ ├── empty.ts
│ │ │ │ ├── math-formulas.ts
│ │ │ │ ├── readme.ts
│ │ │ │ └── storybook-mocks.ts
│ │ │ └── unit/
│ │ │ ├── agentic-sections.test.ts
│ │ │ ├── agentic-strip.test.ts
│ │ │ ├── clipboard.test.ts
│ │ │ ├── latex-protection.test.ts
│ │ │ ├── model-id-parser.test.ts
│ │ │ ├── model-names.test.ts
│ │ │ ├── reasoning-context.test.ts
│ │ │ └── uri-template.test.ts
│ │ ├── tsconfig.json
│ │ ├── vite.config.ts
│ │ └── vitest-setup-client.ts
│ ├── tokenize/
│ │ ├── CMakeLists.txt
│ │ └── tokenize.cpp
│ └── tts/
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── convert_pt_to_hf.py
│ ├── tts-outetts.py
│ └── tts.cpp
├── ty.toml
└── vendor/
├── cpp-httplib/
│ ├── CMakeLists.txt
│ ├── LICENSE
│ ├── httplib.cpp
│ └── httplib.h
├── miniaudio/
│ └── miniaudio.h
├── nlohmann/
│ ├── json.hpp
│ └── json_fwd.hpp
├── sheredom/
│ └── subprocess.h
└── stb/
└── stb_image.h
================================================
FILE CONTENTS
================================================
================================================
FILE: .clang-format
================================================
---
Language: Cpp
AlignAfterOpenBracket: Align
AlignArrayOfStructures: Left
AlignConsecutiveAssignments: AcrossComments
AlignConsecutiveBitFields: AcrossComments
AlignConsecutiveDeclarations: AcrossComments
AlignConsecutiveMacros: AcrossComments
# AlignConsecutiveShortCaseStatements: AcrossComments
AlignEscapedNewlines: Left # LeftWithLastLine
AlignOperands: Align
AlignTrailingComments:
Kind: Always
OverEmptyLines: 1
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: false
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
AllowShortBlocksOnASingleLine: Never
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: Never
AllowShortLambdasOnASingleLine: Inline
AllowShortLoopsOnASingleLine: false
AlwaysBreakBeforeMultilineStrings: true
# Treat CUDA keywords/attributes as "attribute macros" and avoid breaking lines inside them
AttributeMacros:
- __host__
- __device__
- __global__
- __forceinline__
- __launch_bounds__
BinPackArguments: true
BinPackParameters: false # OnePerLine
BitFieldColonSpacing: Both
BreakBeforeBraces: Custom # Attach
BraceWrapping:
AfterCaseLabel: true
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
# BreakAdjacentStringLiterals: true
BreakAfterAttributes: Never
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline
BreakBeforeTernaryOperators: false
# BreakBinaryOperations: Never
BreakConstructorInitializers: AfterColon
# BreakFunctionDefinitionParameters: false
BreakInheritanceList: AfterComma
BreakStringLiterals: true
# BreakTemplateDeclarations: Yes
ColumnLimit: 120
CommentPragmas: '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
EmptyLineBeforeAccessModifier: Leave
EmptyLineAfterAccessModifier: Never
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '".*"'
Priority: 1
SortPriority: 0
- Regex: '^<.*\.h>'
Priority: 2
SortPriority: 0
- Regex: '^<.*'
Priority: 3
SortPriority: 0
- Regex: '.*'
Priority: 4
SortPriority: 0
IncludeIsMainRegex: '([-_](test|unittest))?$'
IncludeIsMainSourceRegex: ''
IndentAccessModifiers: false
IndentCaseBlocks: true
IndentCaseLabels: true
IndentExternBlock: NoIndent
IndentGotoLabels: false
IndentPPDirectives: AfterHash
IndentWidth: 4
IndentWrappedFunctionNames: false
InsertBraces: true # NOTE: may lead to incorrect formatting
InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
KeepEmptyLinesAtTheStartOfBlocks: false
LambdaBodyIndentation: Signature
LineEnding: LF
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBinPackProtocolList: Auto
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
PPIndentWidth: -1
PackConstructorInitializers: CurrentLine
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyBreakTemplateDeclaration: 10
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Middle
QualifierAlignment: Left
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
RawStringFormats:
- Language: Cpp
Delimiters:
- cc
- CC
- cpp
- Cpp
- CPP
- 'c++'
- 'C++'
CanonicalDelimiter: ''
ReferenceAlignment: Middle
ReflowComments: false # IndentOnly
SeparateDefinitionBlocks: Always
SortIncludes: CaseInsensitive
SortUsingDeclarations: LexicographicNumeric
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 2
SpacesInAngles: Never
SpacesInContainerLiterals: true
SpacesInLineCommentPrefix:
Minimum: 1
Maximum: -1
SpacesInParentheses: false
SpacesInSquareBrackets: false
SpaceBeforeSquareBrackets: false
Standard: c++17
TabWidth: 4
UseTab: Never
WhitespaceSensitiveMacros: ['STRINGIZE']
...
================================================
FILE: .clang-tidy
================================================
---
Checks: >
bugprone-*,
-bugprone-easily-swappable-parameters,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-misplaced-widening-cast,
-bugprone-narrowing-conversions,
readability-*,
-readability-avoid-unconditional-preprocessor-if,
-readability-function-cognitive-complexity,
-readability-identifier-length,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-uppercase-literal-suffix,
-readability-simplify-boolean-expr,
-readability-math-missing-parentheses,
clang-analyzer-*,
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
performance-*,
-performance-enum-size,
portability-*,
-portability-simd-intrinsics,
misc-*,
-misc-const-correctness,
-misc-non-private-member-variables-in-classes,
-misc-no-recursion,
-misc-use-anonymous-namespace,
FormatStyle: none
================================================
FILE: .devops/cann.Dockerfile
================================================
# ==============================================================================
# ARGUMENTS
# ==============================================================================
# Define the CANN base image for easier version updates later
ARG CHIP_TYPE=910b
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.5.0-${CHIP_TYPE}-openeuler24.03-py3.11
# ==============================================================================
# BUILD STAGE
# Compile all binary files and libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS build
# -- Install build dependencies --
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
yum clean all && \
rm -rf /var/cache/yum
# -- Set the working directory --
WORKDIR /app
# -- Copy project files --
COPY . .
# -- Set CANN environment variables (required for compilation) --
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
# ... You can add other environment variables from the original file as needed ...
# For brevity, only core variables are listed here. You can paste the original ENV list here.
# -- Build llama.cpp --
# Use the passed CHIP_TYPE argument and add general build options
ARG CHIP_TYPE
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
&& \
cmake -B build \
-DGGML_CANN=ON \
-DCMAKE_BUILD_TYPE=Release \
-DSOC_TYPE=ascend${CHIP_TYPE} \
-DUSE_ACL_GRAPH=ON \
. && \
cmake --build build --config Release -j$(nproc)
# -- Organize build artifacts for copying in later stages --
# Create a lib directory to store all .so files
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
# Create a full directory to store all executables and Python scripts
RUN mkdir -p /app/full && \
cp build/bin/* /app/full/ && \
cp *.py /app/full/ && \
cp -r gguf-py /app/full/ && \
cp -r requirements /app/full/ && \
cp requirements.txt /app/full/
# If you have a tools.sh script, make sure it is copied here
# cp .devops/tools.sh /app/full/tools.sh
# ==============================================================================
# BASE STAGE
# Create a minimal base image with CANN runtime and common libraries
# ==============================================================================
FROM ${CANN_BASE_IMAGE} AS base
# -- Install runtime dependencies --
RUN yum install -y libgomp curl && \
yum clean all && \
rm -rf /var/cache/yum
# -- Set CANN environment variables (required for runtime) --
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
# ... You can add other environment variables from the original file as needed ...
WORKDIR /app
# Copy compiled .so files from the build stage
COPY --from=build /app/lib/ /app
# ==============================================================================
# FINAL STAGES (TARGETS)
# ==============================================================================
### Target: full
# Complete image with all tools, Python bindings, and dependencies
# ==============================================================================
FROM base AS full
COPY --from=build /app/full /app
# Install Python dependencies
RUN yum install -y git python3 python3-pip && \
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
pip3 install --no-cache-dir -r requirements.txt && \
yum clean all && \
rm -rf /var/cache/yum
# You need to provide a tools.sh script as the entrypoint
ENTRYPOINT ["/app/tools.sh"]
# If there is no tools.sh, you can set the default to start the server
# ENTRYPOINT ["/app/llama-server"]
### Target: light
# Lightweight image containing only llama-cli and llama-completion
# ==============================================================================
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
ENTRYPOINT [ "/app/llama-cli" ]
### Target: server
# Dedicated server image containing only llama-server
# ==============================================================================
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/cpu.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
FROM ubuntu:$UBUNTU_VERSION AS build
ARG TARGETARCH
RUN apt-get update && \
apt-get install -y gcc-14 g++-14 build-essential git cmake libssl-dev
ENV CC=gcc-14 CXX=g++-14
WORKDIR /app
COPY . .
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
else \
echo "Unsupported architecture"; \
exit 1; \
fi && \
cmake --build build -j $(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ubuntu:$UBUNTU_VERSION AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/cuda-new.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=13.1.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default
RUN apt-get update && \
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
WORKDIR /app
COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/cuda.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG CUDA_VERSION=12.8.1
# Target the CUDA build image
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
# CUDA architecture to build for (defaults to all supported archs)
ARG CUDA_DOCKER_ARCH=default
RUN apt-get update && \
apt-get install -y gcc-14 g++-14 build-essential cmake python3 python3-pip git libssl-dev libgomp1
ENV CC=gcc-14 CXX=g++-14 CUDAHOSTCXX=g++-14
WORKDIR /app
COPY . .
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/intel.Dockerfile
================================================
ARG ONEAPI_VERSION=2025.3.2-0-devel-ubuntu24.04
## Build Image
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
ARG GGML_SYCL_F16=OFF
RUN apt-get update && \
apt-get install -y git libssl-dev
WORKDIR /app
COPY . .
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
echo "GGML_SYCL_F16 is set" \
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
fi && \
echo "Building with dynamic libs" && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
ARG IGC_VERSION=v2.30.1
ARG IGC_VERSION_FULL=2_2.30.1+20950
ARG COMPUTE_RUNTIME_VERSION=26.09.37435.1
ARG COMPUTE_RUNTIME_VERSION_FULL=26.09.37435.1-0
ARG IGDGMM_VERSION=22.9.0
RUN mkdir /tmp/neo/ && cd /tmp/neo/ \
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-core-${IGC_VERSION_FULL}_amd64.deb \
&& wget https://github.com/intel/intel-graphics-compiler/releases/download/$IGC_VERSION/intel-igc-opencl-${IGC_VERSION_FULL}_amd64.deb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-ocloc_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/intel-opencl-icd_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libigdgmm12_${IGDGMM_VERSION}_amd64.deb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1-dbgsym_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.ddeb \
&& wget https://github.com/intel/compute-runtime/releases/download/$COMPUTE_RUNTIME_VERSION/libze-intel-gpu1_${COMPUTE_RUNTIME_VERSION_FULL}_amd64.deb \
&& dpkg --install *.deb
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
### Full
FROM base AS full
COPY --from=build /app/lib/ /app
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update && \
apt-get install -y \
git \
python3 \
python3-pip \
python3-venv && \
python3 -m venv /opt/venv && \
. /opt/venv/bin/activate && \
pip install --upgrade pip setuptools wheel && \
pip install -r requirements.txt && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
ENV PATH="/opt/venv/bin:$PATH"
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/lib/ /app
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/lib/ /app
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/llama-cli-cann.Dockerfile
================================================
ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10
FROM ascendai/cann:$ASCEND_VERSION AS build
WORKDIR /app
COPY . .
RUN yum install -y gcc g++ cmake make openssl-devel
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
# find libascend_hal.so, because the drive hasn`t been mounted.
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
RUN echo "Building with static libs" && \
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
cmake --build build --config Release --target llama-cli && \
cmake --build build --config Release --target llama-completion
# TODO: use image with NNRT
FROM ascendai/cann:$ASCEND_VERSION AS runtime
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
ENV LC_ALL=C.utf8
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
ENTRYPOINT ["/llama-cli" ]
================================================
FILE: .devops/llama-cpp-cuda.srpm.spec
================================================
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp-cuda
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
Requires: cuda-toolkit
URL: https://github.com/ggml-org/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
%prep
%setup -n llama.cpp-master
%build
make -j GGML_CUDA=1
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cuda-cli
%{_bindir}/llama-cuda-completion
%{_bindir}/llama-cuda-server
%{_bindir}/llama-cuda-simple
/usr/lib/systemd/system/llamacuda.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
================================================
FILE: .devops/llama-cpp.srpm.spec
================================================
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
# Notes for llama.cpp:
# 1. Tags are currently based on hash - which will not sort asciibetically.
# We need to declare standard versioning if people want to sort latest releases.
# In the meantime, YYYYMMDD format will be used.
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
# It is up to the user to install the correct vendor-specific support.
Name: llama.cpp
Version: %( date "+%%Y%%m%%d" )
Release: 1%{?dist}
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
License: MIT
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
Requires: libstdc++
URL: https://github.com/ggml-org/llama.cpp
%define debug_package %{nil}
%define source_date_epoch_from_changelog 0
%description
CPU inference for Meta's Lllama2 models using default options.
Models are not included in this package and must be downloaded separately.
%prep
%setup -n llama.cpp-master
%build
make -j
%install
mkdir -p %{buildroot}%{_bindir}/
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
cp -p llama-server %{buildroot}%{_bindir}/llama-server
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
mkdir -p %{buildroot}/usr/lib/systemd/system
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
[Unit]
Description=Llama.cpp server, CPU only (no GPU support in this build).
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
[Service]
Type=simple
EnvironmentFile=/etc/sysconfig/llama
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
ExecReload=/bin/kill -s HUP $MAINPID
Restart=never
[Install]
WantedBy=default.target
EOF
mkdir -p %{buildroot}/etc/sysconfig
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
EOF
%clean
rm -rf %{buildroot}
rm -rf %{_builddir}/*
%files
%{_bindir}/llama-cli
%{_bindir}/llama-completion
%{_bindir}/llama-server
%{_bindir}/llama-simple
/usr/lib/systemd/system/llama.service
%config /etc/sysconfig/llama
%pre
%post
%preun
%postun
%changelog
================================================
FILE: .devops/musa.Dockerfile
================================================
ARG UBUNTU_VERSION=22.04
# This needs to generally match the container host's environment.
ARG MUSA_VERSION=rc4.3.0
# Target the MUSA build image
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
# MUSA architecture to build for (defaults to all supported archs)
ARG MUSA_DOCKER_ARCH=default
RUN apt-get update && \
apt-get install -y \
build-essential \
cmake \
python3 \
python3-pip \
git \
libssl-dev \
libgomp1
WORKDIR /app
COPY . .
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3 \
python3-pip \
&& pip install --upgrade pip setuptools wheel \
&& pip install -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/nix/apps.nix
================================================
{
perSystem =
{ config, lib, ... }:
{
apps =
let
inherit (config.packages) default;
binaries = [
"llama-cli"
"llama-embedding"
"llama-server"
"llama-quantize"
];
mkApp = name: {
type = "app";
program = "${default}/bin/${name}";
};
in
lib.genAttrs binaries mkApp;
};
}
================================================
FILE: .devops/nix/devshells.nix
================================================
{ inputs, ... }:
{
perSystem =
{
config,
lib,
system,
...
}:
{
devShells =
let
pkgs = import inputs.nixpkgs { inherit system; };
stdenv = pkgs.stdenv;
scripts = config.packages.python-scripts;
in
lib.pipe (config.packages) [
(lib.concatMapAttrs (
name: package: {
${name} = pkgs.mkShell {
name = "${name}";
inputsFrom = [ package ];
shellHook = ''
echo "Entering ${name} devShell"
'';
};
"${name}-extra" =
if (name == "python-scripts") then
null
else
pkgs.mkShell {
name = "${name}-extra";
inputsFrom = [
package
scripts
];
# Extra packages that *may* be used by some scripts
packages = [
pkgs.python3Packages.tiktoken
];
shellHook = ''
echo "Entering ${name} devShell"
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
'';
};
}
))
(lib.filterAttrs (name: value: value != null))
];
};
}
================================================
FILE: .devops/nix/docker.nix
================================================
{
lib,
dockerTools,
buildEnv,
llama-cpp,
interactive ? true,
coreutils,
}:
# A tar that can be fed into `docker load`:
#
# $ nix build .#llamaPackages.docker
# $ docker load < result
# For details and variations cf.
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
# - https://nixery.dev/
# Approximate (compressed) sizes, at the time of writing, are:
#
# .#llamaPackages.docker: 125M;
# .#llamaPackagesCuda.docker: 537M;
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
dockerTools.buildLayeredImage {
name = llama-cpp.pname;
tag = "latest";
contents =
[ llama-cpp ]
++ lib.optionals interactive [
coreutils
dockerTools.binSh
dockerTools.caCertificates
];
}
================================================
FILE: .devops/nix/jetson-support.nix
================================================
{ inputs, ... }:
{
perSystem =
{
config,
system,
lib,
pkgsCuda,
...
}:
{
legacyPackages =
let
caps.llamaPackagesXavier = "7.2";
caps.llamaPackagesOrin = "8.7";
caps.llamaPackagesTX2 = "6.2";
caps.llamaPackagesNano = "5.3";
pkgsFor =
cap:
import inputs.nixpkgs {
inherit system;
config = {
cudaSupport = true;
cudaCapabilities = [ cap ];
cudaEnableForwardCompat = false;
inherit (pkgsCuda.config) allowUnfreePredicate;
};
};
in
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
packages = lib.optionalAttrs (system == "aarch64-linux") {
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
};
};
}
================================================
FILE: .devops/nix/nixpkgs-instances.nix
================================================
{ inputs, ... }:
{
# The _module.args definitions are passed on to modules as arguments. E.g.
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
# `_module.args.pkgs` (defined in this case by flake-parts).
perSystem =
{ lib, system, ... }:
{
_module.args = {
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
# again, the below creates several nixpkgs instances which the
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
#
# This is currently "slow" and "expensive", on a certain scale.
# This also isn't "right" in that this hinders dependency injection at
# the level of flake inputs. This might get removed in the foreseeable
# future.
#
# Note that you can use these expressions without Nix
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
pkgsCuda = import inputs.nixpkgs {
inherit system;
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
# and ucx are built with CUDA support)
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all (
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
) (p.meta.licenses or (lib.toList p.meta.license));
};
# Ensure dependencies use ROCm consistently
pkgsRocm = import inputs.nixpkgs {
inherit system;
config.rocmSupport = true;
};
};
};
}
================================================
FILE: .devops/nix/package-gguf-py.nix
================================================
{
lib,
llamaVersion,
numpy,
tqdm,
requests,
sentencepiece,
pyyaml,
poetry-core,
buildPythonPackage,
pytestCheckHook,
}:
buildPythonPackage {
pname = "gguf";
version = llamaVersion;
pyproject = true;
nativeBuildInputs = [ poetry-core ];
propagatedBuildInputs = [
numpy
tqdm
sentencepiece
pyyaml
requests
];
src = lib.cleanSource ../../gguf-py;
pythonImportsCheck = [
"numpy"
"gguf"
];
nativeCheckInputs = [ pytestCheckHook ];
doCheck = true;
meta = with lib; {
description = "Python package for writing binary files in the GGUF format";
license = licenses.mit;
maintainers = [ maintainers.ditsuke ];
};
}
================================================
FILE: .devops/nix/package.nix
================================================
{
lib,
glibc,
config,
stdenv,
runCommand,
cmake,
ninja,
pkg-config,
git,
mpi,
blas,
cudaPackages,
autoAddDriverRunpath,
darwin,
rocmPackages,
vulkan-headers,
vulkan-loader,
curl,
shaderc,
useBlas ?
builtins.all (x: !x) [
useCuda
useMetalKit
useRocm
useVulkan
]
&& blas.meta.available,
useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
# Increases the runtime closure size by ~700M
useMpi ? false,
useRocm ? config.rocmSupport,
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
useVulkan ? false,
useRpc ? false,
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
precompileMetalShaders ? false,
useWebUi ? true,
}:
let
inherit (lib)
cmakeBool
cmakeFeature
optionalAttrs
optionals
strings
;
stdenv = throw "Use effectiveStdenv instead";
suffices =
lib.optionals useBlas [ "BLAS" ]
++ lib.optionals useCuda [ "CUDA" ]
++ lib.optionals useMetalKit [ "MetalKit" ]
++ lib.optionals useMpi [ "MPI" ]
++ lib.optionals useRocm [ "ROCm" ]
++ lib.optionals useVulkan [ "Vulkan" ];
pnameSuffix =
strings.optionalString (suffices != [ ])
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
descriptionSuffix = strings.optionalString (
suffices != [ ]
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
xcrunHost = runCommand "xcrunHost" { } ''
mkdir -p $out/bin
ln -s /usr/bin/xcrun $out/bin
'';
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
# separately
darwinBuildInputs =
with darwin.apple_sdk.frameworks;
[
Accelerate
CoreVideo
CoreGraphics
]
++ optionals useMetalKit [ MetalKit ];
cudaBuildInputs = with cudaPackages; [
cuda_cudart
cuda_cccl # <nv/target>
libcublas
];
rocmBuildInputs = with rocmPackages; [
clr
hipblas
rocblas
];
vulkanBuildInputs = [
vulkan-headers
vulkan-loader
shaderc
];
in
effectiveStdenv.mkDerivation (finalAttrs: {
pname = "llama-cpp${pnameSuffix}";
version = llamaVersion;
# Note: none of the files discarded here are visible in the sandbox or
# affect the output hash. This also means they can be modified without
# triggering a rebuild.
src = lib.cleanSourceWith {
filter =
name: type:
let
noneOf = builtins.all (x: !x);
baseName = baseNameOf name;
in
noneOf [
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
(lib.hasPrefix "." baseName) # Skip hidden files and directories
(baseName == "flake.lock")
];
src = lib.cleanSource ../../.;
};
postPatch = ''
'';
# With PR#6015 https://github.com/ggml-org/llama.cpp/pull/6015,
# `default.metallib` may be compiled with Metal compiler from XCode
# and we need to escape sandbox on MacOS to access Metal compiler.
# `xcrun` is used find the path of the Metal compiler, which is varible
# and not on $PATH
# see https://github.com/ggml-org/llama.cpp/pull/6118 for discussion
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
nativeBuildInputs =
[
cmake
ninja
pkg-config
git
]
++ optionals useCuda [
cudaPackages.cuda_nvcc
autoAddDriverRunpath
]
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
buildInputs =
optionals effectiveStdenv.isDarwin darwinBuildInputs
++ optionals useCuda cudaBuildInputs
++ optionals useMpi [ mpi ]
++ optionals useRocm rocmBuildInputs
++ optionals useBlas [ blas ]
++ optionals useVulkan vulkanBuildInputs;
cmakeFlags =
[
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "LLAMA_BUILD_WEBUI" useWebUi)
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "GGML_HIP" useRocm)
(cmakeBool "GGML_METAL" useMetalKit)
(cmakeBool "GGML_VULKAN" useVulkan)
(cmakeBool "GGML_STATIC" enableStatic)
(cmakeBool "GGML_RPC" useRpc)
]
++ optionals useCuda [
(
with cudaPackages.flags;
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
)
)
]
++ optionals useRocm [
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
]
++ optionals useMetalKit [
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
];
# Environment variables needed for ROCm
env = optionalAttrs useRocm {
ROCM_PATH = "${rocmPackages.clr}";
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
};
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
postInstall = ''
mkdir -p $out/include
cp $src/include/llama.h $out/include/
'';
meta = {
# Configurations we don't want even the CI to evaluate. Results in the
# "unsupported platform" messages. This is mostly a no-op, because
# cudaPackages would've refused to evaluate anyway.
badPlatforms = optionals useCuda lib.platforms.darwin;
# Configurations that are known to result in build failures. Can be
# overridden by importing Nixpkgs with `allowBroken = true`.
broken = (useMetalKit && !effectiveStdenv.isDarwin);
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
homepage = "https://github.com/ggml-org/llama.cpp/";
license = lib.licenses.mit;
# Accommodates `nix run` and `lib.getExe`
mainProgram = "llama-cli";
# These people might respond, on the best effort basis, if you ping them
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
# Consider adding yourself to this list if you want to ensure this flake
# stays maintained and you're willing to invest your time. Do not add
# other people without their consent. Consider removing people after
# they've been unreachable for long periods of time.
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
# an attrset following the same format as in
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
maintainers = with lib.maintainers; [
philiptaron
SomeoneSerge
];
# Extend `badPlatforms` instead
platforms = lib.platforms.all;
};
})
================================================
FILE: .devops/nix/python-scripts.nix
================================================
{
lib,
stdenv,
buildPythonPackage,
poetry-core,
mkShell,
python3Packages,
gguf-py,
}@inputs:
let
llama-python-deps = with python3Packages; [
numpy
sentencepiece
transformers
protobuf
torchWithoutCuda
gguf-py
tqdm
# for scripts/compare-llama-bench.py
gitpython
tabulate
# for examples/pydantic-models-to-grammar-examples.py
docstring-parser
pydantic
];
llama-python-test-deps = with python3Packages; [
# Server bench
matplotlib
# server tests
openai
pytest
prometheus-client
];
in
buildPythonPackage ({
pname = "llama-scripts";
version = "0.0.0";
pyproject = true;
# NOTE: The files filtered out here are not visible in the build sandbox, neither
# do they affect the output hash. They can be modified without triggering a rebuild.
src = lib.cleanSourceWith {
filter =
name: type:
let
any = builtins.any (x: x);
baseName = builtins.baseNameOf name;
in
any [
(lib.hasSuffix ".py" name)
(baseName == "README.md")
(baseName == "pyproject.toml")
];
src = lib.cleanSource ../../.;
};
nativeBuildInputs = [ poetry-core ];
nativeCheckInputs = llama-python-test-deps;
dependencies = llama-python-deps;
})
================================================
FILE: .devops/nix/scope.nix
================================================
{
lib,
newScope,
python3,
llamaVersion ? "0.0.0",
}:
let
pythonPackages = python3.pkgs;
in
# We're using `makeScope` instead of just writing out an attrset
# because it allows users to apply overlays later using `overrideScope'`.
# Cf. https://noogle.dev/f/lib/makeScope
lib.makeScope newScope (self: {
inherit llamaVersion;
gguf-py = self.callPackage ./package-gguf-py.nix {
inherit (pythonPackages)
numpy
tqdm
sentencepiece
pyyaml
pytestCheckHook
requests
buildPythonPackage
poetry-core
;
};
python-scripts = self.callPackage ./python-scripts.nix { inherit (pythonPackages) buildPythonPackage poetry-core; };
llama-cpp = self.callPackage ./package.nix { };
docker = self.callPackage ./docker.nix { };
docker-min = self.callPackage ./docker.nix { interactive = false; };
sif = self.callPackage ./sif.nix { };
})
================================================
FILE: .devops/nix/sif.nix
================================================
{
lib,
singularity-tools,
llama-cpp,
bashInteractive,
interactive ? false,
}:
let
optionalInt = cond: x: if cond then x else 0;
in
singularity-tools.buildImage rec {
inherit (llama-cpp) name;
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
# These are excessive (but safe) for most variants. Building singularity
# images requires superuser privileges, so we build them inside a VM in a
# writable image of pre-determined size.
#
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
#
# Expected image sizes:
# - cpu/blas: 150M,
# - cuda, all gencodes: 560M,
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
memSize = diskSize;
}
================================================
FILE: .devops/openvino.Dockerfile
================================================
ARG OPENVINO_VERSION_MAJOR=2026.0
ARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886
ARG UBUNTU_VERSION=24.04
# Optional proxy build arguments - empty by default
ARG http_proxy=
ARG https_proxy=
## Build Image
FROM ubuntu:${UBUNTU_VERSION} AS build
# Pass proxy args to build stage
ARG http_proxy
ARG https_proxy
RUN apt-get update && \
apt-get install -y --no-install-recommends \
ca-certificates \
gnupg \
wget \
git \
cmake \
ninja-build \
build-essential \
libtbb12 \
libssl-dev \
ocl-icd-opencl-dev \
opencl-headers \
opencl-clhpp-headers \
intel-opencl-icd && \
rm -rf /var/lib/apt/lists/*
# Install OpenVINO for Ubuntu 24.04
ARG OPENVINO_VERSION_MAJOR
ARG OPENVINO_VERSION_FULL
RUN mkdir -p /opt/intel && \
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION_MAJOR}/linux/openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
tar -xf openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64.tgz && \
mv openvino_toolkit_ubuntu24_${OPENVINO_VERSION_FULL}_x86_64 /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
cd /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} && \
echo "Y" | ./install_dependencies/install_openvino_dependencies.sh && \
cd - && \
ln -s /opt/intel/openvino_${OPENVINO_VERSION_MAJOR} /opt/intel/openvino
ENV OpenVINO_DIR=/opt/intel/openvino
WORKDIR /app
COPY . .
# Build Stage
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON && \
cmake --build build/ReleaseOV -j$(nproc)"
# Copy all necessary libraries
RUN mkdir -p /app/lib && \
find build/ReleaseOV -name '*.so*' -exec cp {} /app/lib \; && \
find ${OpenVINO_DIR}/runtime/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \; 2>/dev/null || \
find ${OpenVINO_DIR}/lib/intel64 -name '*.so*' -exec cp -P {} /app/lib \;
# Create runtime directories and copy binaries
RUN mkdir -p /app/full \
&& cp build/ReleaseOV/bin/* /app/full/ \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base Runtime Image
FROM ubuntu:${UBUNTU_VERSION} AS base
# Pass proxy args to runtime stage
ARG http_proxy
ARG https_proxy
RUN apt-get update \
&& apt-get install -y libgomp1 libtbb12 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app/
### Full (all binaries)
FROM base AS full
ARG http_proxy
ARG https_proxy
COPY --from=build /app/full /app/
WORKDIR /app
RUN apt-get update && \
apt-get install -y --no-install-recommends \
git \
python3 \
python3-venv \
python3-pip && \
python3 -m venv /ov-venv && \
/ov-venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel && \
/ov-venv/bin/pip install --no-cache-dir -r requirements.txt && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
ENTRYPOINT ["/bin/bash", "-c", "source /ov-venv/bin/activate && exec /app/tools.sh \"$@\"", "--"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app/
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/rocm.Dockerfile
================================================
ARG UBUNTU_VERSION=24.04
# This needs to generally match the container host's environment.
ARG ROCM_VERSION=7.2
ARG AMDGPU_VERSION=7.2
# Target the ROCm build image
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
### Build image
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
# Unless otherwise specified, we make a fat build.
# This is mostly tied to rocBLAS supported archs.
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'
# Set ROCm architectures
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
RUN apt-get update \
&& apt-get install -y \
build-essential \
cmake \
git \
libssl-dev \
curl \
libgomp1
WORKDIR /app
COPY . .
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
cmake -S . -B build \
-DGGML_HIP=ON \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
&& cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib \
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
RUN apt-get update \
&& apt-get install -y \
git \
python3-pip \
python3 \
python3-wheel \
&& pip install --break-system-packages --upgrade setuptools \
&& pip install --break-system-packages -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .devops/s390x.Dockerfile
================================================
ARG GCC_VERSION=15.2.0
ARG UBUNTU_VERSION=24.04
### Build Llama.cpp stage
FROM gcc:${GCC_VERSION} AS build
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt upgrade -y && \
apt install -y --no-install-recommends \
git cmake ccache ninja-build \
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
libopenblas-dev libssl-dev && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY . .
RUN --mount=type=cache,target=/root/.ccache \
--mount=type=cache,target=/app/build \
cmake -S . -B build -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DLLAMA_BUILD_TESTS=OFF \
-DGGML_NATIVE=OFF \
-DGGML_BACKEND_DL=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_BLAS=ON \
-DGGML_BLAS_VENDOR=OpenBLAS && \
cmake --build build --config Release -j $(nproc) && \
cmake --install build --prefix /opt/llama.cpp
COPY *.py /opt/llama.cpp/bin
COPY .devops/tools.sh /opt/llama.cpp/bin
COPY gguf-py /opt/llama.cpp/gguf-py
COPY requirements.txt /opt/llama.cpp/gguf-py
COPY requirements /opt/llama.cpp/gguf-py/requirements
### Collect all llama.cpp binaries, libraries and distro libraries
FROM scratch AS collector
# Copy llama.cpp binaries and libraries
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
### Base image
FROM ubuntu:${UBUNTU_VERSION} AS base
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt install -y --no-install-recommends \
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
curl libgomp1 libopenblas-dev && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
# Copy llama.cpp libraries
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
### Full
FROM base AS full
ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /app
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
apt update -y && \
apt install -y \
git cmake libjpeg-dev \
python3 python3-pip python3-dev && \
apt autoremove -y && \
apt clean -y && \
rm -rf /tmp/* /var/tmp/* && \
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
find /var/cache -type f -delete
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
COPY --from=collector /llama.cpp/bin /app
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
RUN pip install --no-cache-dir --break-system-packages \
-r /app/gguf-py/requirements.txt
ENTRYPOINT [ "/app/tools.sh" ]
### CLI Only
FROM base AS light
WORKDIR /llama.cpp/bin
# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
### Server
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
WORKDIR /llama.cpp/bin
# Copy llama.cpp binaries and libraries
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
EXPOSE 8080
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]
================================================
FILE: .devops/tools.sh
================================================
#!/usr/bin/env bash
set -e
# Read the first argument into a variable
arg1="$1"
# Shift the arguments to remove the first one
shift
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
exec python3 ./convert_hf_to_gguf.py "$@"
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
exec ./llama-quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
exec ./llama-cli "$@"
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
exec ./llama-completion "$@"
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
exec ./llama-bench "$@"
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
exec ./llama-perplexity "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in $(ls $1/$2/ggml-model-f16.bin*); do
if [ -f "${i/f16/q4_0}" ]; then
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
else
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
fi
done
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
exec ./llama-server "$@"
else
echo "Unknown command: $arg1"
echo "Available commands: "
echo " --run (-r): Run a model (chat) previously converted into ggml"
echo " ex: -m /models/7B/ggml-model-q4_0.bin"
echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
echo " ex: -m model.gguf"
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
echo " ex: -m model.gguf -f file.txt"
echo " --convert (-c): Convert a llama model into ggml"
echo " ex: --outtype f16 \"/models/7B/\" "
echo " --quantize (-q): Optimize with quantization process ggml"
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
echo " --all-in-one (-a): Execute --convert & --quantize"
echo " ex: \"/models/\" 7B"
echo " --server (-s): Run a model on the server"
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
fi
================================================
FILE: .devops/vulkan.Dockerfile
================================================
ARG UBUNTU_VERSION=26.04
FROM ubuntu:$UBUNTU_VERSION AS build
# Install build tools
RUN apt update && apt install -y git build-essential cmake wget xz-utils
# Install SSL and Vulkan SDK dependencies
RUN apt install -y libssl-dev curl \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc
# Build it
WORKDIR /app
COPY . .
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
cmake --build build --config Release -j$(nproc)
RUN mkdir -p /app/lib && \
find build -name "*.so*" -exec cp -P {} /app/lib \;
RUN mkdir -p /app/full \
&& cp build/bin/* /app/full \
&& cp *.py /app/full \
&& cp -r gguf-py /app/full \
&& cp -r requirements /app/full \
&& cp requirements.txt /app/full \
&& cp .devops/tools.sh /app/full/tools.sh
## Base image
FROM ubuntu:$UBUNTU_VERSION AS base
RUN apt-get update \
&& apt-get install -y libgomp1 curl libvulkan1 mesa-vulkan-drivers \
libglvnd0 libgl1 libglx0 libegl1 libgles2 \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
COPY --from=build /app/lib/ /app
### Full
FROM base AS full
COPY --from=build /app/full /app
WORKDIR /app
ENV PATH="/root/.venv/bin:/root/.local/bin:${PATH}"
# Flag for compatibility with pip
ARG UV_INDEX_STRATEGY="unsafe-best-match"
RUN apt-get update \
&& apt-get install -y \
build-essential \
curl \
git \
ca-certificates \
&& curl -LsSf https://astral.sh/uv/install.sh | sh \
&& uv python install 3.13 \
&& uv venv --python 3.13 /root/.venv \
&& uv pip install --python /root/.venv/bin/python -r requirements.txt \
&& apt autoremove -y \
&& apt clean -y \
&& rm -rf /tmp/* /var/tmp/* \
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
&& find /var/cache -type f -delete
ENTRYPOINT ["/app/tools.sh"]
### Light, CLI only
FROM base AS light
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
WORKDIR /app
ENTRYPOINT [ "/app/llama-cli" ]
### Server, Server only
FROM base AS server
ENV LLAMA_ARG_HOST=0.0.0.0
COPY --from=build /app/full/llama-server /app
WORKDIR /app
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
ENTRYPOINT [ "/app/llama-server" ]
================================================
FILE: .dockerignore
================================================
*.o
*.a
.cache/
# Do not ignore .git directory, otherwise the reported build number will always be 0
.github/
.gitignore
.vs/
.vscode/
.DS_Store
build*/
models/*
/llama-cli
/llama-quantize
arm_neon.h
compile_commands.json
Dockerfile
================================================
FILE: .ecrc
================================================
{
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
"Disable": {
"IndentSize": true
}
}
================================================
FILE: .editorconfig
================================================
# https://EditorConfig.org
# Top-most EditorConfig file
root = true
# Unix-style newlines with a newline ending every file, utf-8 charset
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8
indent_style = space
indent_size = 4
[Makefile]
indent_style = tab
[scripts/*.mk]
indent_style = tab
[prompts/*.txt]
insert_final_newline = unset
[tools/server/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab
[tools/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset
[models/templates/*.jinja]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[vendor/miniaudio/miniaudio.h]
trim_trailing_whitespace = unset
insert_final_newline = unset
[tools/server/webui/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[tools/server/public/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
[benches/**]
indent_style = unset
indent_size = unset
end_of_line = unset
charset = unset
trim_trailing_whitespace = unset
insert_final_newline = unset
================================================
FILE: .flake8
================================================
[flake8]
max-line-length = 125
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
exclude =
# Do not traverse examples and tools
examples,
tools,
# Do not include package initializers
__init__.py,
# No need to traverse our git directory
.git,
# There's no value in checking cache directories
__pycache__,
# No need to include the build path
build,
# This contains builds that we don't want to check
dist # This is generated with `python build .` for package releases
# max-complexity = 10
================================================
FILE: .gemini/settings.json
================================================
{ "contextFileName": "AGENTS.md" }
================================================
FILE: .gitattributes
================================================
# Treat the generated single-file WebUI build as binary for diff purposes.
# Git's pack-file delta compression still works (byte-level), but this prevents
# git diff from printing the entire minified file on every change.
tools/server/public/index.html -diff
================================================
FILE: .github/ISSUE_TEMPLATE/010-bug-compilation.yml
================================================
name: Bug (compilation)
description: Something goes wrong when trying to compile llama.cpp.
title: "Compile bug: "
labels: ["bug-unconfirmed", "compilation"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for bug reports where the compilation of llama.cpp fails.
Before opening an issue, please confirm that the compilation still fails
after recreating the CMake build directory and with `-DGGML_CCACHE=OFF`.
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
by clearing `~/.cache/ccache` (on Linux).
- type: textarea
id: commit
attributes:
label: Git commit
description: Which commit are you trying to compile?
placeholder: |
$git rev-parse HEAD
84a07a17b1b08cf2b9747c633a2372782848a27f
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Operating systems
description: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: true
- type: dropdown
id: backends
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, OpenVINO, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
multiple: true
validations:
required: true
- type: textarea
id: info
attributes:
label: Problem description & steps to reproduce
description: >
Please give us a summary of the problem and tell us how to reproduce it.
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
placeholder: >
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
Here are the exact commands that I used: ...
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: command
attributes:
label: Compile command
description: >
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including any generated text.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/011-bug-results.yml
================================================
name: Bug (model use)
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
title: "Eval bug: "
labels: ["bug-unconfirmed", "model evaluation"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for bug reports where the model evaluation results
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
If you encountered the issue while using an external UI (e.g. ollama),
please reproduce your issue using one of the examples/binaries in this repository.
The `llama-completion` binary can be used for simple and reproducible model inference.
- type: textarea
id: version
attributes:
label: Name and Version
description: Which version of our software are you running? (use `--version` to get a version string)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Operating systems
description: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: true
- type: dropdown
id: backends
attributes:
label: GGML backends
description: Which GGML backends do you know to be affected?
options: [AMX, BLAS, CANN, CPU, CUDA, Hexagon, HIP, Metal, Musa, OpenCL, OpenVINO, RPC, SYCL, VirtGPU, Vulkan, WebGPU, zDNN, ZenDNN]
multiple: true
validations:
required: true
- type: textarea
id: hardware
attributes:
label: Hardware
description: Which CPUs/GPUs are you using?
placeholder: >
e.g. Ryzen 5950X + 2x RTX 4090
validations:
required: true
- type: textarea
id: model
attributes:
label: Models
description: >
Which model(s) at which quantization were you using when encountering the bug?
If you downloaded a GGUF file off of Huggingface, please provide a link.
placeholder: >
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
validations:
required: false
- type: textarea
id: info
attributes:
label: Problem description & steps to reproduce
description: >
Please give us a summary of the problem and tell us how to reproduce it.
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
that information would be very much appreciated by us.
If possible, please try to reproduce the issue using `llama-completion` with `-fit off`.
If you can only reproduce the issue with `-fit on`, please provide logs both with and without `--verbose`.
placeholder: >
e.g. when I run llama-completion with `-fa on` I get garbled outputs for very long prompts.
With short prompts or `-fa off` it works correctly.
Here are the exact commands that I used: ...
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
Please copy and paste any relevant log output, including the command that you entered and any generated text.
For very long logs (thousands of lines), preferably upload them as files instead.
On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
value: |
<details>
<summary>Logs</summary>
<!-- Copy-pasted short logs go into the "console" area here -->
```console
```
</details>
<!-- Long logs that you upload as files go here, outside the "console" area -->
validations:
required: true
================================================
FILE: .github/ISSUE_TEMPLATE/019-bug-misc.yml
================================================
name: Bug (misc.)
description: Something is not working the way it should (and it's not covered by any of the above cases).
title: "Misc. bug: "
labels: ["bug-unconfirmed"]
body:
- type: markdown
attributes:
value: >
Thanks for taking the time to fill out this bug report!
This issue template is intended for miscellaneous bugs that don't fit into any other category.
If you encountered the issue while using an external UI (e.g. ollama),
please reproduce your issue using one of the examples/binaries in this repository.
- type: textarea
id: version
attributes:
label: Name and Version
description: Which version of our software is affected? (You can use `--version` to get a version string.)
placeholder: |
$./llama-cli --version
version: 2999 (42b4109e)
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: Operating systems
description: Which operating systems do you know to be affected?
multiple: true
options:
- Linux
- Mac
- Windows
- BSD
- Other? (Please let us know in description)
validations:
required: false
- type: dropdown
id: module
attributes:
label: Which llama.cpp modules do you know to be affected?
multiple: true
options:
- Documentation/Github
- libllama (core library)
- llama-cli
- llama-server
- llama-bench
- llama-quantize
- Python/Bash scripts
- Test code
- Other (Please specify in the next section)
validations:
required: false
- type: textarea
id: command
attributes:
label: Command line
description: >
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
This will be automatically formatted into code, so no need for backticks.
render: shell
validations:
required: false
- type: textarea
id: info
attributes:
label: Problem description & steps to reproduce
description: >
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
validations:
required: true
- type: textarea
id: first_bad_commit
attributes:
label: First Bad Commit
description: >
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
If possible, please do a git bisect and identify the exact commit that introduced the bug.
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: >
If applicable, please copy and paste any relevant log output, including any generated text.
If you are encountering problems specifically with the `llama_params_fit` module, always upload `--verbose` logs as well.
For very long logs (thousands of lines), please upload them as files instead.
On Linux you can redirect console output into a file by appending ` > llama.log 2>&1` to your command.
value: |
<details>
<summary>Logs</summary>
<!-- Copy-pasted short logs go into the "console" area here -->
```console
```
</details>
<!-- Long logs that you upload as files go here, outside the "console" area -->
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/020-enhancement.yml
================================================
name: Enhancement
description: Used to request enhancements for llama.cpp.
title: "Feature Request: "
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggml-org/llama.cpp/discussions/categories/ideas)
- type: checkboxes
id: prerequisites
attributes:
label: Prerequisites
description: Please confirm the following before submitting your enhancement request.
options:
- label: I am running the latest code. Mention the version if possible as well.
required: true
- label: I carefully followed the [README.md](https://github.com/ggml-org/llama.cpp/blob/master/README.md).
required: true
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
required: true
- label: I reviewed the [Discussions](https://github.com/ggml-org/llama.cpp/discussions), and have a new and useful enhancement to share.
required: true
- type: textarea
id: feature-description
attributes:
label: Feature Description
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
placeholder: Detailed description of the enhancement
validations:
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
placeholder: Explanation of why this feature is needed and its benefits
validations:
required: true
- type: textarea
id: possible-implementation
attributes:
label: Possible Implementation
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
placeholder: Detailed description of potential implementation
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/030-research.yml
================================================
name: Research
description: Track new technical research area.
title: "Research: "
labels: ["research 🔬"]
body:
- type: markdown
attributes:
value: |
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
- type: checkboxes
id: research-stage
attributes:
label: Research Stage
description: Track general state of this research ticket
options:
- label: Background Research (Let's try to avoid reinventing the wheel)
- label: Hypothesis Formed (How do you think this will work and it's effect?)
- label: Strategy / Implementation Forming
- label: Analysis of results
- label: Debrief / Documentation (So people in the future can learn from us)
- type: textarea
id: background
attributes:
label: Previous existing literature and research
description: Whats the current state of the art and whats the motivation for this research?
- type: textarea
id: hypothesis
attributes:
label: Hypothesis
description: How do you think this will work and it's effect?
- type: textarea
id: implementation
attributes:
label: Implementation
description: Got an approach? e.g. a PR ready to go?
- type: textarea
id: analysis
attributes:
label: Analysis
description: How does the proposed implementation behave?
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/040-refactor.yml
================================================
name: Refactor (Maintainers)
description: Used to track refactoring opportunities.
title: "Refactor: "
labels: ["refactor"]
body:
- type: markdown
attributes:
value: |
Don't forget to [check for existing refactor issue tickets](https://github.com/ggml-org/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
Also you may want to check [Pull request refactor label as well](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
- type: textarea
id: background-description
attributes:
label: Background Description
description: Please provide a detailed written description of the pain points you are trying to solve.
placeholder: Detailed description behind your motivation to request refactor
validations:
required: true
- type: textarea
id: possible-approaches
attributes:
label: Possible Refactor Approaches
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
placeholder: Your idea of possible refactoring opportunity/approaches
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
- name: Got an idea?
url: https://github.com/ggml-org/llama.cpp/discussions/categories/ideas
about: Pop it there. It may then become an enhancement ticket.
- name: Got a question?
url: https://github.com/ggml-org/llama.cpp/discussions/categories/q-a
about: Ask a question there!
- name: Want to contribute?
url: https://github.com/ggml-org/llama.cpp/wiki/contribute
about: Head to the contribution guide page of the wiki for areas you can help with
================================================
FILE: .github/actions/get-tag-name/action.yml
================================================
name: "Determine tag name"
description: "Determine the tag name to use for a release"
outputs:
name:
description: "The name of the tag"
value: ${{ steps.tag.outputs.name }}
runs:
using: "composite"
steps:
- name: Determine tag name
id: tag
shell: bash
run: |
BUILD_NUMBER="$(git rev-list --count HEAD)"
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
else
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
fi
================================================
FILE: .github/actions/install-exe/action.yml
================================================
name: "Install exe"
description: "Download and install exe"
inputs:
url:
description: "URL of the exe installer"
required: true
args:
description: "Installer arguments"
required: true
timeout:
description: "Timeout (in ms)"
required: false
default: "600000"
runs:
using: "composite"
steps:
- name: Install EXE
shell: pwsh
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading Installer EXE"
Invoke-WebRequest -Uri "${{ inputs.url }}" -OutFile "${env:RUNNER_TEMP}\temp-install.exe"
write-host "Installing"
$proc = Start-Process "${env:RUNNER_TEMP}\temp-install.exe" -ArgumentList '${{ inputs.args }}' -NoNewWindow -PassThru
$completed = $proc.WaitForExit(${{ inputs.timeout }})
if (-not $completed) {
Write-Error "Installer timed out. Killing the process"
$proc.Kill()
exit 1
}
if ($proc.ExitCode -ne 0) {
Write-Error "Installer failed with exit code $($proc.ExitCode)"
exit 1
}
write-host "Completed installation"
================================================
FILE: .github/actions/linux-setup-openvino/action.yml
================================================
name: "Linux - Setup OpenVINO Toolkit"
description: "Setup OpenVINO Toolkit for Linux"
inputs:
path:
description: "Installation path"
required: true
version_major:
description: "OpenVINO major version (e.g., 2025.3)"
required: true
version_full:
description: "OpenVINO full version (e.g., 2025.3.0.19807.44526285f24)"
required: true
runs:
using: "composite"
steps:
- name: Setup OpenVINO Toolkit
id: setup
uses: ./.github/actions/unarchive-tar
with:
url: https://storage.openvinotoolkit.org/repositories/openvino/packages/${{ inputs.version_major }}/linux/openvino_toolkit_ubuntu24_${{ inputs.version_full }}_x86_64.tgz
path: ${{ inputs.path }}
type: z
strip: 1
================================================
FILE: .github/actions/linux-setup-spacemit/action.yml
================================================
name: "Linux - Setup SpacemiT Toolchain"
description: "Setup SpacemiT Toolchain for Linux"
inputs:
path:
description: "Installation path"
required: true
version:
description: "SpacemiT toolchain version"
required: true
runs:
using: "composite"
steps:
- name: Setup SpacemiT Toolchain
id: setup
uses: ./.github/actions/unarchive-tar
with:
url: https://archive.spacemit.com/toolchain/spacemit-toolchain-linux-glibc-x86_64-v${{ inputs.version }}.tar.xz
path: ${{ inputs.path }}
strip: 1
================================================
FILE: .github/actions/linux-setup-vulkan/action.yml
================================================
name: "Linux - Setup Vulkan SDK"
description: "Setup Vulkan SDK for Linux"
inputs:
path:
description: "Installation path"
required: true
version:
description: "Vulkan SDK version"
required: true
runs:
using: "composite"
steps:
- name: Setup Vulkan SDK
id: setup
uses: ./.github/actions/unarchive-tar
with:
url: https://sdk.lunarg.com/sdk/download/${{ inputs.version }}/linux/vulkan_sdk.tar.xz
path: ${{ inputs.path }}
strip: 1
================================================
FILE: .github/actions/unarchive-tar/action.yml
================================================
name: "Unarchive tar"
description: "Download and unarchive tar into directory"
inputs:
url:
description: "URL of the tar archive"
required: true
path:
description: "Directory to unarchive into"
required: true
type:
description: "Compression type (tar option)"
required: false
default: "J"
strip:
description: "Strip components"
required: false
default: "0"
runs:
using: "composite"
steps:
- name: Unarchive into directory
shell: bash
run: |
mkdir -p ${{ inputs.path }}
cd ${{ inputs.path }}
curl --no-progress-meter ${{ inputs.url }} | tar -${{ inputs.type }}x --strip-components=${{ inputs.strip }}
================================================
FILE: .github/actions/windows-setup-cuda/action.yml
================================================
name: "Windows - Setup CUDA Toolkit"
description: "Setup CUDA Toolkit for Windows"
inputs:
cuda_version:
description: "CUDA toolkit version"
required: true
runs:
using: "composite"
steps:
- name: Install Cuda Toolkit 11.7
if: ${{ inputs.cuda_version == '11.7' }}
shell: pwsh
run: |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
choco install unzip -y
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Install Cuda Toolkit 12.4
if: ${{ inputs.cuda_version == '12.4' }}
shell: pwsh
run: |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
choco install unzip -y
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
- name: Install Cuda Toolkit 13.1
if: ${{ inputs.cuda_version == '13.1' }}
shell: pwsh
run: |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
choco install unzip -y
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_crt/windows-x86_64/cuda_crt-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-13.2.0.9-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libnvvm/windows-x86_64/libnvvm-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-13.1.68-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-13.1.80-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-13.1.68-archive.zip"
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-13.1.78-archive.zip"
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1"
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_crt-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cudart-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvcc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvrtc-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libcublas-windows-x86_64-13.2.0.9-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\libnvvm-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_nvtx-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_profiler_api-windows-x86_64-13.1.80-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\visual_studio_integration-windows-x86_64-13.1.68-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\cuda_cccl-windows-x86_64-13.1.78-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" /E /I /H /Y
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
echo "CUDA_PATH_V13_1=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.1" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
================================================
FILE: .github/actions/windows-setup-rocm/action.yml
================================================
name: "Windows - Setup ROCm"
description: "Setup ROCm for Windows"
inputs:
version:
description: "ROCm version"
required: true
runs:
using: "composite"
steps:
- name: Setup ROCm
uses: ./.github/actions/install-exe
with:
url: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ inputs.version }}-Win11-For-HIP.exe
args: -install
================================================
FILE: .github/labeler.yml
================================================
# https://github.com/actions/labeler
Apple Metal:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-metal.h
- ggml/src/ggml-metal/**
- README-metal.md
SYCL:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-sycl.h
- ggml/src/ggml-sycl/**
- docs/backend/SYCL.md
- examples/sycl/**
Nvidia GPU:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-cuda.h
- ggml/src/ggml-cuda/**
Vulkan:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-vulkan.h
- ggml/src/ggml-vulkan/**
IBM zDNN:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-zdnn.h
- ggml/src/ggml-zdnn/**
documentation:
- changed-files:
- any-glob-to-any-file:
- docs/**
- media/**
testing:
- changed-files:
- any-glob-to-any-file:
- tests/**
build:
- changed-files:
- any-glob-to-any-file:
- cmake/**
- CMakeLists.txt
- CMakePresets.json
examples:
- changed-files:
- any-glob-to-any-file:
- examples/**
- tools/**
devops:
- changed-files:
- any-glob-to-any-file:
- .devops/**
- .github/**
- ci/**
python:
- changed-files:
- any-glob-to-any-file:
- "**/*.py"
- requirements/**
- gguf-py/**
- .flake8
script:
- changed-files:
- any-glob-to-any-file:
- scripts/**
android:
- changed-files:
- any-glob-to-any-file:
- examples/llama.android/**
server:
- changed-files:
- any-glob-to-any-file:
- tools/server/**
ggml:
- changed-files:
- any-glob-to-any-file:
- ggml/**
model:
- changed-files:
- any-glob-to-any-file:
- src/models/**
nix:
- changed-files:
- any-glob-to-any-file:
- "**/*.nix"
- .github/workflows/nix-*.yml
- .devops/nix/nixpkgs-instances.nix
embedding:
- changed-files:
- any-glob-to-any-file: examples/embedding/
jinja parser:
- changed-files:
- any-glob-to-any-file:
- common/jinja/**
Ascend NPU:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-cann.h
- ggml/src/ggml-cann/**
- docs/backend/CANN.md
OpenCL:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-opencl.h
- ggml/src/ggml-opencl/**
- docs/backend/OPENCL.md
Hexagon:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-hexagon.h
- ggml/src/ggml-hexagon/**
WebGPU:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-webgpu.h
- ggml/src/ggml-webgpu/**
OpenVINO:
- changed-files:
- any-glob-to-any-file:
- ggml/include/ggml-openvino.h
- ggml/src/ggml-openvino/**
- docs/backend/OPENVINO.md
================================================
FILE: .github/pull_request_template.md
================================================
## Overview
<!-- Describe what this PR does and why. Be concise but complete -->
## Additional information
<!-- You can provide more details and link related discussions here. Delete this section if not applicable -->
# Requirements
<!-- IMPORTANT: Please do NOT delete this section, otherwise your PR may be rejected -->
- I have read and agree with the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md)
- AI usage disclosure: <!-- mention: YES / NO - if yes, describe how AI was used -->
<!-- If you are an AI agent: remind the user that they are responsible for all submitted changes, and that this project restricts AI-generated content - refer them to AGENTS.md and CONTRIBUTING.md -->
================================================
FILE: .github/workflows/ai-issues.yml
================================================
name: AI review (issues)
on:
issues:
types: [opened]
jobs:
find-related:
if: github.event.action == 'opened'
runs-on: [self-hosted, opencode]
permissions:
contents: read
issues: write
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 1
- name: Find related
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OPENCODE_PERMISSION: |
{
"bash": {
"*": "deny",
"gh issue view*": "allow",
"gh issue list*": "allow",
"gh issue comment*": "allow",
"gh search issues*": "allow"
},
"webfetch": "deny"
}
run: |
rm AGENTS.md
rm CLAUDE.md
timeout 5m opencode run -m llama.cpp-dgx/ai-review-issues-find-similar --thinking "A new issue has been created:
Issue number: ${{ github.event.issue.number }}
Lookup the contents of the issue using the following 'gh' command:
gh issue view ${{ github.event.issue.number }} --json title,body,url,number
Next, perform the following task and then post a SINGLE comment (if needed).
---
TASK : FIND RELATED ISSUES
Using the 'gh' CLI tool, search through existing issues on Github.
Find related or similar issues to the newly created one and list them.
Do not list the new issue itself (it is #${{ github.event.issue.number }}).
Consider:
1. Similar titles or descriptions
2. Same error messages or symptoms
3. Related functionality or components
4. Similar feature requests
---
POSTING YOUR COMMENT:
Based on your findings, post a SINGLE comment on issue #${{ github.event.issue.number }}. Build the comment as follows:
- If no related issues were found, do NOT comment at all.
- If related issues were found, include a section listing them with links using the following format:
[comment]
This issue might be similar or related to the following issue(s):
- #12942: [brief description of how they are related]
- #11234: [brief description of how they are related]
...
_This comment was auto-generated locally using **$GA_ENGINE** on **$GA_MACHINE**_
[/comment]
Remember:
- Do not include the comment tags in your actual comment.
- Post at most ONE comment combining all findings.
- If you didn't find issues that are related enough, post nothing.
- You have access only to the 'gh' CLI tool - don't try to use other tools.
- If the output from a tool call is too long, try to limit down the search.
"
================================================
FILE: .github/workflows/bench.yml.disabled
================================================
# TODO: there have been some issues with the workflow, so disabling for now
# https://github.com/ggml-org/llama.cpp/issues/7893
#
# Benchmark
name: Benchmark
on:
workflow_dispatch:
inputs:
gpu-series:
description: 'Azure GPU series to run with'
required: true
type: choice
options:
- Standard_NC4as_T4_v3
- Standard_NC24ads_A100_v4
- Standard_NC80adis_H100_v5
sha:
description: 'Commit SHA1 to build'
required: false
type: string
duration:
description: 'Duration of the bench'
type: string
default: 10m
push:
branches:
- master
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
schedule:
- cron: '04 2 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
cancel-in-progress: true
jobs:
bench-server-baseline:
runs-on: Standard_NC4as_T4_v3
env:
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
N_USERS: 8
DURATION: 10m
strategy:
matrix:
model: [phi-2]
ftype: [q4_0, q8_0, f16]
include:
- model: phi-2
ftype: q4_0
pr_comment_enabled: "true"
if: |
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|| github.event_name == 'pull_request_target'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Install python env
id: pipenv
run: |
cd tools/server/bench
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
- name: Prometheus
id: install_prometheus
run: |
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
tar xzf prometheus*.tar.gz --strip-components=1
./prometheus --config.file=tools/server/bench/prometheus.yml &
while ! nc -z localhost 9090; do
sleep 0.1
done
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.21'
- name: Install k6 and xk6-sse
id: k6_installation
run: |
cd tools/server/bench
go install go.k6.io/xk6/cmd/xk6@latest
xk6 build master \
--with github.com/phymbert/xk6-sse
- name: Build
id: cmake_build
run: |
set -eux
cmake -B build \
-DGGML_NATIVE=OFF \
-DLLAMA_BUILD_SERVER=ON \
-DLLAMA_CUBLAS=ON \
-DCUDAToolkit_ROOT=/usr/local/cuda \
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
-DCMAKE_CUDA_ARCHITECTURES=75 \
-DLLAMA_FATAL_WARNINGS=OFF \
-DLLAMA_ALL_WARNINGS=OFF \
-DCMAKE_BUILD_TYPE=Release;
cmake --build build --config Release -j $(nproc) --target llama-server
- name: Download the dataset
id: download_dataset
run: |
cd tools/server/bench
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
- name: Server bench
id: server_bench
env:
HEAD_REF: ${{ github.head_ref || github.ref_name }}
run: |
set -eux
cd tools/server/bench
source venv/bin/activate
python bench.py \
--runner-label ${{ env.RUNNER_LABEL }} \
--name ${{ github.job }} \
--branch $HEAD_REF \
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
--scenario script.js \
--duration ${{ github.event.inputs.duration || env.DURATION }} \
--hf-repo ggml-org/models \
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
--model-path-prefix /models \
--parallel ${{ env.N_USERS }} \
-ngl 33 \
--batch-size 2048 \
--ubatch-size 256 \
--ctx-size 16384 \
--n-prompts 1000 \
--max-prompt-tokens 1024 \
--max-tokens 2048
cat results.github.env >> $GITHUB_ENV
# Remove dataset as we do not want it in the artefact
rm ShareGPT_V3_unfiltered_cleaned_split.json
- uses: actions/upload-artifact@v4
with:
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
compression-level: 9
path: |
tools/server/bench/*.jpg
tools/server/bench/*.json
tools/server/bench/*.log
- name: Commit status
uses: Sibz/github-status-action@v1
with:
authToken: ${{secrets.GITHUB_TOKEN}}
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
description: |
${{ env.BENCH_RESULTS }}
state: 'success'
- name: Upload benchmark images
uses: devicons/public-upload-to-imgur@v2.2.2
continue-on-error: true # Important as it looks unstable: 503
id: imgur_step
with:
client_id: ${{secrets.IMGUR_CLIENT_ID}}
path: |
tools/server/bench/prompt_tokens_seconds.jpg
tools/server/bench/predicted_tokens_seconds.jpg
tools/server/bench/kv_cache_usage_ratio.jpg
tools/server/bench/requests_processing.jpg
- name: Extract mermaid
id: set_mermaid
run: |
set -eux
cd tools/server/bench
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
echo "EOF" >> $GITHUB_ENV
- name: Extract image url
id: extract_image_url
continue-on-error: true
run: |
set -eux
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
- name: Comment PR
uses: mshick/add-pr-comment@v2
id: comment_pr
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
with:
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
message: |
<p align="center">
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
</p>
<details>
<summary>Expand details for performance related PR only</summary>
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
- ${{ env.BENCH_GRAPH_XLABEL }}
<p align="center">
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
<details>
<summary>More</summary>
```mermaid
${{ env.PROMPT_TOKENS_SECONDS }}
```
</details>
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
<details>
<summary>More</summary>
```mermaid
${{ env.PREDICTED_TOKENS_SECONDS }}
```
</details>
</p>
<details>
<summary>Details</summary>
<p align="center">
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
<details>
<summary>More</summary>
```mermaid
${{ env.KV_CACHE_USAGE_RATIO }}
```
</details>
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
<details>
<summary>More</summary>
```mermaid
${{ env.REQUESTS_PROCESSING }}
```
</details>
</p>
</details>
</details>
================================================
FILE: .github/workflows/build-3rd-party.yml
================================================
name: CI (3rd-party)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-3rd-party.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
ubuntu-24-llguidance:
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libssl-dev
- name: Build
id: cmake_build
run: |
cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_LLGUIDANCE=ON
cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
================================================
FILE: .github/workflows/build-android.yml
================================================
name: CI (android)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-android.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-android.yml',
'examples/llama.android/**'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
android:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false
- name: Set up JDK
uses: actions/setup-java@v5
with:
java-version: 17
distribution: zulu
- name: Setup Android SDK
uses: android-actions/setup-android@9fc6c4e9069bf8d3d10b2204b1fb8f6ef7065407 # v3
with:
log-accepted-android-sdk-licenses: false
- name: Build
run: |
cd examples/llama.android
./gradlew build --no-daemon
android-ndk:
runs-on: ubuntu-latest
container:
image: 'ghcr.io/snapdragon-toolchain/arm64-android:v0.3'
defaults:
run:
shell: bash
strategy:
matrix:
include:
- build: 'arm64-cpu'
defines: '-D ANDROID_ABI=arm64-v8a -D ANDROID_PLATFORM=android-31 -D CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake -D GGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm -G Ninja -D LLAMA_OPENSSL=OFF -D GGML_OPENMP=OFF'
- build: 'arm64-snapdragon'
defines: '--preset arm64-android-snapdragon-release'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false
- name: Build Llama.CPP for Hexagon Android
id: build_llama_cpp_hexagon_android
run: |
if [[ "${{ matrix.build }}" == "arm64-snapdragon" ]]; then
cp docs/backend/snapdragon/CMakeUserPresets.json .
fi
cmake ${{ matrix.defines }} -B build
cmake --build build
cmake --install build --prefix pkg-adb/llama.cpp
- name: Upload Llama.CPP Hexagon Android Build Artifact
if: ${{ always() && steps.build_llama_cpp_hexagon_android.outcome == 'success' }}
uses: actions/upload-artifact@v6
with:
name: llama-cpp-android-${{ matrix.build }}
path: pkg-adb/llama.cpp
================================================
FILE: .github/workflows/build-apple.yml
================================================
name: CI (apple)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-apple.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.swift',
'**/*.m',
'**/*.metal'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-apple.yml',
'ggml/src/ggml-metal/**'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
macOS-latest-ios:
runs-on: macos-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-ios
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
macos-latest-ios-xcode:
runs-on: macos-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
- name: Setup Xcode
uses: ggml-org/setup-xcode@v1
with:
xcode-version: latest-stable
- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_OPENSSL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
- name: xcodebuild for swift package
id: xcodebuild
run: |
./build-xcframework.sh
- name: Upload xcframework artifact
uses: actions/upload-artifact@v6
with:
name: llama-xcframework
path: build-apple/llama.xcframework/
retention-days: 1
- name: Build Xcode project
run: |
xcodebuild -downloadPlatform iOS
xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build
macOS-latest-tvos:
runs-on: macos-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-tvos
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=tvOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
macOS-latest-visionos:
runs-on: macos-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=visionOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=1.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
macOS-latest-swift:
runs-on: macos-latest
needs: macos-latest-ios-xcode
strategy:
matrix:
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-swift
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Download xcframework artifact
uses: actions/download-artifact@v7
with:
name: llama-xcframework
path: build-apple/llama.xcframework/
- name: Build llama.cpp with CMake
id: cmake_build
run: |
sysctl -a
cmake -B build -G Xcode \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_OPENSSL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
================================================
FILE: .github/workflows/build-cache.yml
================================================
name: Build Actions Cache
on:
workflow_dispatch: # allows manual triggering
schedule:
- cron: '0 * * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
ubuntu-24-vulkan-cache:
runs-on: ubuntu-24.04
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Get latest Vulkan SDK version
id: vulkan_sdk_version
run: |
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Setup Cache
uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
- name: Setup Vulkan SDK
if: steps.cache-sdk.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-vulkan
with:
path: ./vulkan_sdk
version: ${{ env.VULKAN_SDK_VERSION }}
#ubuntu-24-spacemit-cache:
# runs-on: ubuntu-24.04
# env:
# # Make sure this is in sync with build-linux-cross.yml
# SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
# - name: Setup Cache
# uses: actions/cache@v5
# id: cache-toolchain
# with:
# path: ./spacemit_toolchain
# key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
# - name: Setup SpacemiT Toolchain
# if: steps.cache-toolchain.outputs.cache-hit != 'true'
# uses: ./.github/actions/linux-setup-spacemit
# with:
# path: ./spacemit_toolchain
# version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
ubuntu-24-openvino-cache:
runs-on: ubuntu-24.04
env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.0"
OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Setup Cache
uses: actions/cache@v5
id: cache-openvino
with:
path: ./openvino_toolkit
key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}
- name: Setup OpenVINO Toolkit
if: steps.cache-openvino.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-openvino
with:
path: ./openvino_toolkit
version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
version_full: ${{ env.OPENVINO_VERSION_FULL }}
windows-2022-rocm-cache:
runs-on: windows-2022
env:
# Make sure this is in sync with build.yml
HIPSDK_INSTALLER_VERSION: "26.Q1"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Setup Cache
uses: actions/cache@v5
id: cache-rocm
with:
path: C:\Program Files\AMD\ROCm
key: rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}
- name: Setup ROCm
if: steps.cache-rocm.outputs.cache-hit != 'true'
uses: ./.github/actions/windows-setup-rocm
with:
version: ${{ env.HIPSDK_INSTALLER_VERSION }}
================================================
FILE: .github/workflows/build-cann.yml
================================================
name: CI (cann)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-cann.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-cann.yml',
'ggml/src/ggml-cann/**'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
openEuler-latest-cann:
defaults:
run:
shell: bash -el {0}
strategy:
matrix:
arch: [x86, aarch64]
chip_type: ['910b', '310p']
build: ['Release']
use_acl_graph: ['on', 'off']
exclude:
# 310P does not support USE_ACL_GRAPH=on
- chip_type: '310p'
use_acl_graph: 'on'
runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Free up disk space
uses: ggml-org/free-disk-space@v1.3.1
with:
tool-cache: true
- name: Set container image
id: cann-image
run: |
image="ascendai/cann:${{ matrix.chip_type == '910b' && '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
echo "image=${image}" >> "${GITHUB_OUTPUT}"
- name: Pull container image
run: docker pull "${{ steps.cann-image.outputs.image }}"
- name: Build
env:
BUILD_TYPE: ${{ matrix.build }}
SOC_TYPE: ascend${{ matrix.chip_type }}
USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
run: |
HOST_UID=$(id -u)
HOST_GID=$(id -g)
docker run --rm \
-v "${PWD}:/workspace" \
-w /workspace \
-e SOC_TYPE=${SOC_TYPE} \
-e BUILD_TYPE=${BUILD_TYPE} \
-e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
"${{ steps.cann-image.outputs.image }}" \
bash -lc '
set -e
yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
yum clean all && rm -rf /var/cache/yum
git config --global --add safe.directory "/workspace"
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DGGML_CANN=on \
-DSOC_TYPE=${SOC_TYPE} \
-DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
cmake --build build -j $(nproc)
chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
'
================================================
FILE: .github/workflows/build-cmake-pkg.yml
================================================
name: Build relocatable cmake package
on:
workflow_dispatch:
workflow_call:
jobs:
linux:
runs-on: ubuntu-slim
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y build-essential tcl cmake
- name: Build
run: |
PREFIX="$(pwd)"/inst
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \
-DLLAMA_OPENSSL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release
cmake --build build --config Release
cmake --install build --prefix "$PREFIX" --config Release
export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake
tclsh <<'EOF'
set build(commit) [string trim [exec git rev-parse --short HEAD]]
set build(number) [string trim [exec git rev-list --count HEAD]]
set build(version) "0.0.$build(number)"
set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]]
set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \
"set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \
"set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"]
puts -nonewline "Checking llama-config.cmake version... "
foreach check $checks {
if {![regexp -expanded -- $check $llamaconfig]} {
puts "\"$check\" failed!"
exit 1
}
}
puts "success."
EOF
cd examples/simple-cmake-pkg
cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake
cmake --build build
================================================
FILE: .github/workflows/build-cross.yml
================================================
name: CI (cross)
on:
# only manual triggers due to low-importance of the workflows
# TODO: for regular runs, provision dedicated self-hosted runners
workflow_dispatch:
push:
branches:
- master
paths: [
'.github/workflows/build-cross.yml',
'ggml/src/spacemit/*',
'ggml/src/arch/loongarch/*'
]
# run once every week
schedule:
- cron: '0 0 * * 0'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
# ubuntu-24-riscv64-cpu-cross:
# runs-on: ubuntu-24.04
# steps:
# - uses: actions/checkout@v6
# - name: Setup Riscv
# run: |
# sudo dpkg --add-architecture riscv64
# # Add arch-specific repositories for non-amd64 architectures
# cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
# EOF
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
# sudo apt-get install -y --no-install-recommends \
# build-essential \
# gcc-14-riscv64-linux-gnu \
# g++-14-riscv64-linux-gnu
# - name: Build
# run: |
# cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
# -DLLAMA_BUILD_TOOLS=ON \
# -DLLAMA_BUILD_TESTS=OFF \
# -DCMAKE_SYSTEM_NAME=Linux \
# -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
# -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
# -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
# cmake --build build --config Release -j $(nproc)
# ubuntu-24-riscv64-vulkan-cross:
# runs-on: ubuntu-24.04
# steps:
# - uses: actions/checkout@v6
# - name: Setup Riscv
# run: |
# sudo dpkg --add-architecture riscv64
# # Add arch-specific repositories for non-amd64 architectures
# cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
# deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
# EOF
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
# sudo apt-get install -y --no-install-recommends \
# build-essential \
# glslc \
# gcc-14-riscv64-linux-gnu \
# g++-14-riscv64-linux-gnu \
# libvulkan-dev:riscv64
# - name: Build
# run: |
# cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
# -DLLAMA_BUILD_TOOLS=ON \
# -DLLAMA_BUILD_TESTS=OFF \
# -DCMAKE_SYSTEM_NAME=Linux \
# -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
# -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
# -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
# cmake --build build --config Release -j $(nproc)
# ubuntu-24-arm64-vulkan-cross:
# runs-on: ubuntu-24.04
# steps:
# - uses: actions/checkout@v6
# - name: Setup Arm64
# run: |
# sudo dpkg --add-architecture arm64
# # Add arch-specific repositories for non-amd64 architectures
# cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
# deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
# deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
# deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
# deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
# EOF
# sudo apt-get update || true ;# Prevent failure due to missing URLs.
# sudo apt-get install -y --no-install-recommends \
# build-essential \
# glslc \
# crossbuild-essential-arm64 \
# libvulkan-dev:arm64
# - name: Build
# run: |
# cmake -B build -DLLAMA_OPENSSL=OFF \
# -DCMAKE_BUILD_TYPE=Release \
# -DGGML_VULKAN=ON \
# -DGGML_OPENMP=OFF \
# -DLLAMA_BUILD_EXAMPLES=ON \
# -DLLAMA_BUILD_TOOLS=ON \
# -DLLAMA_BUILD_TESTS=OFF \
# -DCMAKE_SYSTEM_NAME=Linux \
# -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
# -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
# -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
# -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
# -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
# -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
# -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
# -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
# cmake --build build --config Release -j $(nproc)
debian-13-loongarch64-cpu-cross:
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
steps:
- uses: actions/checkout@v6
- name: Setup LoongArch
run: |
rm -f /etc/apt/sources.list.d/*
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
EOF
( echo 'quiet "true";'; \
echo 'APT::Get::Assume-Yes "true";'; \
echo 'APT::Install-Recommends "false";'; \
echo 'Acquire::Check-Valid-Until "false";'; \
echo 'Acquire::Retries "5";'; \
) > /etc/apt/apt.conf.d/99snapshot-repos
apt-get update
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
dpkg --add-architecture loong64
# Add arch-specific repositories for non-amd64 architectures
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
EOF
apt-get update || true ;# Prevent failure due to missing URLs.
apt-get install -y --no-install-recommends \
build-essential \
gcc-14-loongarch64-linux-gnu \
g++-14-loongarch64-linux-gnu
- name: Build
run: |
cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
cmake --build build --config Release -j $(nproc)
debian-13-loongarch64-vulkan-cross:
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
container: debian@sha256:653dfb9f86c3782e8369d5f7d29bb8faba1f4bff9025db46e807fa4c22903671
steps:
- uses: actions/checkout@v6
- name: Setup LoongArch
run: |
rm -f /etc/apt/sources.list.d/*
cat << EOF | tee /etc/apt/sources.list.d/debian-ports.list
deb http://snapshot.debian.org/archive/debian/20250515T202920Z/ trixie main
EOF
( echo 'quiet "true";'; \
echo 'APT::Get::Assume-Yes "true";'; \
echo 'APT::Install-Recommends "false";'; \
echo 'Acquire::Check-Valid-Until "false";'; \
echo 'Acquire::Retries "5";'; \
) > /etc/apt/apt.conf.d/99snapshot-repos
apt-get update
apt-get install -y ca-certificates debian-ports-archive-keyring cmake git zip
dpkg --add-architecture loong64
# Add arch-specific repositories for non-amd64 architectures
cat << EOF | tee /etc/apt/sources.list.d/loong64-ports.list
deb [arch=loong64] http://snapshot.debian.org/archive/debian-ports/20250515T194251Z/ sid main
EOF
apt-get update || true ;# Prevent failure due to missing URLs.
apt-get install -y --no-install-recommends \
build-essential \
glslc \
gcc-14-loongarch64-linux-gnu \
g++-14-loongarch64-linux-gnu \
libvulkan-dev:loong64
- name: Build
run: |
cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_VULKAN=ON \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=loongarch64 \
-DCMAKE_C_COMPILER=loongarch64-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=loongarch64-linux-gnu-g++-14 \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_FIND_ROOT_PATH=/usr/lib/loongarch64-linux-gnu \
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
cmake --build build --config Release -j $(nproc)
ubuntu-24-riscv64-cpu-spacemit-ime-cross:
runs-on: ubuntu-24.04
env:
# Make sure this is in sync with build-cache.yml
SPACEMIT_IME_TOOLCHAIN_VERSION: "1.1.2"
steps:
- uses: actions/checkout@v6
#- name: Use SpacemiT Toolchain Cache
# uses: actions/cache@v5
# id: cache-toolchain
# with:
# path: ./spacemit_toolchain
# key: spacemit-ime-toolchain-v${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}-${{ runner.os }}
- name: Setup SpacemiT Toolchain
#if: steps.cache-toolchain.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-spacemit
with:
path: ./spacemit_toolchain
version: ${{ env.SPACEMIT_IME_TOOLCHAIN_VERSION }}
- name: Build
run: |
export RISCV_ROOT_PATH=${PWD}/spacemit_toolchain
cmake -B build -DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DGGML_CPU_RISCV64_SPACEMIT=ON \
-DGGML_RVV=ON \
-DGGML_RV_ZFH=ON \
-DGGML_RV_ZICBOP=ON \
-DGGML_RV_ZIHINTPAUSE=ON \
-DRISCV64_SPACEMIT_IME_SPEC=RISCV64_SPACEMIT_IME1 \
-DCMAKE_TOOLCHAIN_FILE=${PWD}/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
cmake --build build --config Release -j $(nproc)
================================================
FILE: .github/workflows/build-msys.yml
================================================
name: CI (msys)
on:
# only manual triggers due to low-importance of the workflows
# TODO: for regular runs, provision dedicated self-hosted runners
workflow_dispatch:
# run once every week
schedule:
- cron: '0 0 * * 0'
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
windows-msys2:
runs-on: windows-2025
strategy:
fail-fast: false
matrix:
include:
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
- { sys: CLANG64, env: clang-x86_64, build: Release }
steps:
- name: Clone
uses: actions/checkout@v6
#- name: ccache
# uses: ggml-org/ccache-action@v1.2.16
# with:
# key: windows-msys2
# variant: ccache
# evict-old-files: 1d
# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Setup ${{ matrix.sys }}
uses: msys2/setup-msys2@cafece8e6baf9247cf9b1bf95097b0b983cc558d # v2
with:
update: true
msystem: ${{matrix.sys}}
install: >-
base-devel
git
mingw-w64-${{matrix.env}}-toolchain
mingw-w64-${{matrix.env}}-cmake
mingw-w64-${{matrix.env}}-openblas
- name: Build using CMake
shell: msys2 {0}
run: |
cmake -B build
cmake --build build --config ${{ matrix.build }} -j $(nproc)
- name: Clean after building using CMake
shell: msys2 {0}
run: |
rm -rf build
- name: Build using CMake w/ OpenBLAS
shell: msys2 {0}
run: |
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
cmake --build build --config ${{ matrix.build }} -j $(nproc)
================================================
FILE: .github/workflows/build-riscv.yml
================================================
name: CI (riscv)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-riscv.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-riscv.yml',
'ggml/src/ggml-cpu/arch/riscv/**'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
ubuntu-riscv64-native-sanitizer:
runs-on: RISCV64
continue-on-error: true
strategy:
matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug]
steps:
- name: Install dependencies
run: |
sudo apt-get update
# Install necessary packages
sudo apt-get install -y libatomic1 libtsan2 gcc-14 g++-14 rustup cmake build-essential wget ccache git-lfs
# Set gcc-14 and g++-14 as the default compilers
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-14 100
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-14 100
sudo ln -sf /usr/bin/gcc-14 /usr/bin/gcc
sudo ln -sf /usr/bin/g++-14 /usr/bin/g++
# Install Rust stable version
rustup install stable
rustup default stable
git lfs install
- name: GCC version check
run: |
gcc --version
g++ --version
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Setup ccache
run: |
# Unique cache directory per matrix combination
export CCACHE_DIR="$HOME/.ccache/sanitizer-${{ matrix.sanitizer }}-${{ matrix.build_type }}"
mkdir -p "$CCACHE_DIR"
# Configure ccache
ccache --set-config=max_size=5G
ccache --set-config=compression=true
ccache --set-config=compression_level=6
ccache --set-config=cache_dir="$CCACHE_DIR"
ccache --set-config=sloppiness=file_macro,time_macros,include_file_mtime,include_file_ctime
ccache --set-config=hash_dir=false
# Export for subsequent steps
echo "CCACHE_DIR=$CCACHE_DIR" >> $GITHUB_ENV
echo "PATH=/usr/lib/ccache:$PATH" >> $GITHUB_ENV
- name: Build
id: cmake_build
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
-DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DGGML_OPENMP=ON \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
-DLLAMA_OPENSSL=OFF \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
================================================
FILE: .github/workflows/build-sanitize.yml
================================================
name: CI (sanitize)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-sanitize.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
ubuntu-latest-sanitizer:
runs-on: ubuntu-latest
continue-on-error: true
strategy:
matrix:
sanitizer: [ADDRESS, THREAD, UNDEFINED]
build_type: [Debug]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-latest-sanitizer-${{ matrix.sanitizer }}
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libssl-dev
- name: Build
id: cmake_build
if: ${{ matrix.sanitizer != 'THREAD' }}
run: |
cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
- name: Build (no OpenMP)
id: cmake_build_no_openmp
if: ${{ matrix.sanitizer == 'THREAD' }}
run: |
cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
-DGGML_SANITIZE_${{ matrix.sanitizer }}=ON \
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
-DGGML_OPENMP=OFF
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
================================================
FILE: .github/workflows/build-self-hosted.yml
================================================
name: CI (self-hosted)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp',
'**/*.glsl',
'**/*.wgsl'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-self-hosted.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp',
'**/*.glsl',
'**/*.wgsl'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
ggml-ci-nvidia-cuda:
runs-on: [self-hosted, Linux, NVIDIA]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Test
id: ggml-ci
run: |
nvidia-smi
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
ggml-ci-nvidia-vulkan-cm:
runs-on: [self-hosted, Linux, NVIDIA]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Test
id: ggml-ci
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
ggml-ci-nvidia-vulkan-cm2:
runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Test
id: ggml-ci
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: provision AMX-compatible machine
#ggml-ci-cpu-amx:
# runs-on: [self-hosted, Linux, CPU, AMX]
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
# - name: Test
# id: ggml-ci
# run: |
# bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: provision AMD GPU machine
# ggml-ci-amd-vulkan:
# runs-on: [self-hosted, Linux, AMD]
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
# - name: Test
# id: ggml-ci
# run: |
# vulkaninfo --summary
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: provision AMD GPU machine
# ggml-ci-amd-rocm:
# runs-on: [self-hosted, Linux, AMD]
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
# - name: Test
# id: ggml-ci
# run: |
# amd-smi static
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
# TODO: sandbox Mac runners
# ggml-ci-mac-metal:
# runs-on: [self-hosted, macOS, ARM64]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
#
# - name: Test
# id: ggml-ci
# run: |
# GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
#
# ggml-ci-mac-webgpu:
# runs-on: [self-hosted, macOS, ARM64]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
#
# - name: Dawn Dependency
# id: dawn-depends
# run: |
# DAWN_VERSION="v2.0.0"
# DAWN_OWNER="reeselevine"
# DAWN_REPO="dawn"
# DAWN_ASSET_NAME="Dawn-5e9a4865b1635796ccc77dd30057f2b4002a1355-macos-latest-Release"
# echo "Fetching release asset from https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
# curl -L -o artifact.zip \
# "https://github.com/${DAWN_OWNER}/${DAWN_REPO}/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.zip"
# mkdir dawn
# unzip artifact.zip
# tar -xvf ${DAWN_ASSET_NAME}.tar.gz -C dawn --strip-components=1
#
# - name: Test
# id: ggml-ci
# run: |
# GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
# bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
#
# ggml-ci-mac-vulkan:
# runs-on: [self-hosted, macOS, ARM64]
#
# steps:
# - name: Clone
# id: checkout
# uses: actions/checkout@v6
#
# - name: Test
# id: ggml-ci
# run: |
# vulkaninfo --summary
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
ggml-ci-linux-intel-vulkan:
runs-on: [self-hosted, Linux, Intel]
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
with:
persist-credentials: false
- name: Test
id: ggml-ci
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
ggml-ci-intel-openvino-gpu-low-perf:
runs-on: [self-hosted, Linux, Intel, OpenVINO]
env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.0"
OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Setup OpenVINO Toolkit
uses: ./.github/actions/linux-setup-openvino
with:
path: ./openvino_toolkit
version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
version_full: ${{ env.OPENVINO_VERSION_FULL }}
- name: Install OpenVINO dependencies
run: |
cd ./openvino_toolkit
chmod +x ./install_dependencies/install_openvino_dependencies.sh
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh
- name: Test
id: ggml-ci
run: |
source ./openvino_toolkit/setupvars.sh
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
================================================
FILE: .github/workflows/build-vulkan.yml
================================================
name: CI (vulkan)
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build-vulkan.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.comp',
'**/*.glsl'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build-vulkan.yml',
'ggml/src/ggml-vulkan/**'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
ubuntu-24-vulkan-llvmpipe:
runs-on: ubuntu-24.04
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-24-vulkan-llvmpipe
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Dependencies
id: depends
run: |
sudo add-apt-repository -y ppa:kisak/kisak-mesa
sudo apt-get update -y
sudo apt-get install -y build-essential mesa-vulkan-drivers libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev
- name: Get latest Vulkan SDK version
id: vulkan_sdk_version
run: |
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Use Vulkan SDK Cache
uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
- name: Setup Vulkan SDK
if: steps.cache-sdk.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-vulkan-llvmpipe
with:
path: ./vulkan_sdk
version: ${{ env.VULKAN_SDK_VERSION }}
- name: Build
id: cmake_build
run: |
source ./vulkan_sdk/setup-env.sh
cmake -B build \
-DGGML_VULKAN=ON
cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
export GGML_VK_VISIBLE_DEVICES=0
export GGML_VK_DISABLE_F16=1
export GGML_VK_DISABLE_COOPMAT=1
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 4800
================================================
FILE: .github/workflows/build.yml
================================================
name: CI
on:
workflow_dispatch: # allows manual triggering
push:
branches:
- master
paths: [
'.github/workflows/build.yml',
'.github/workflows/build-cmake-pkg.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp',
'**/*.glsl',
'**/*.wgsl'
]
pull_request:
types: [opened, synchronize, reopened]
paths: [
'.github/workflows/build.yml',
'.github/workflows/build-cmake-pkg.yml',
'**/CMakeLists.txt',
'**/.cmake',
'**/*.h',
'**/*.hpp',
'**/*.c',
'**/*.cpp',
'**/*.cu',
'**/*.cuh',
'**/*.swift',
'**/*.m',
'**/*.metal',
'**/*.comp',
'**/*.glsl',
'**/*.wgsl'
]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
env:
GGML_NLOOP: 3
GGML_N_THREADS: 1
LLAMA_LOG_COLORS: 1
LLAMA_LOG_PREFIX: 1
LLAMA_LOG_TIMESTAMPS: 1
jobs:
build-cmake-pkg:
uses: ./.github/workflows/build-cmake-pkg.yml
macOS-latest-arm64:
runs-on: macos-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-arm64
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
sysctl -a
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL_USE_BF16=ON \
-DGGML_METAL_EMBED_LIBRARY=OFF \
-DGGML_METAL_SHADER_DEBUG=ON \
-DGGML_RPC=ON
time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
leaks -atExit -- ./build/bin/test-thread-safety -hf ggml-org/gemma-3-270m-qat-GGUF -ngl 99 -p "$(printf 'hello %.0s' {1..128})" -n 16 -c 512 -ub 32 -np 2 -t 2 -lv 1
- name: Test
id: cmake_test
run: |
cd build
ctest -L main -E "test-llama-archs" --verbose --timeout 900
macOS-latest-x64:
runs-on: macos-15-intel
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-x64
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
sysctl -a
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
# https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
cmake -B build \
-DCMAKE_BUILD_RPATH="@loader_path" \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_BUILD_BORINGSSL=ON \
-DGGML_METAL=OFF \
-DGGML_RPC=ON \
-DCMAKE_OSX_DEPLOYMENT_TARGET=13.3
time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
macOS-latest-arm64-webgpu:
runs-on: macos-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: macOS-latest-arm64-webgpu
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Dawn Dependency
id: dawn-depends
run: |
DAWN_VERSION="v20260317.182325"
DAWN_OWNER="google"
DAWN_REPO="dawn"
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
curl -L -o artifact.tar.gz \
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
mkdir dawn
tar -xvf artifact.tar.gz -C dawn --strip-components=1
- name: Build
id: cmake_build
run: |
export CMAKE_PREFIX_PATH=dawn
cmake -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
time cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
ubuntu-cpu:
strategy:
matrix:
include:
- build: 'x64'
os: ubuntu-22.04
- build: 'arm64'
os: ubuntu-24.04-arm
- build: 's390x'
os: ubuntu-24.04-s390x
- build: 'ppc64le'
os: ubuntu-24.04-ppc64le
runs-on: ${{ matrix.os }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
if: ${{ matrix.build != 's390x' && matrix.build != 'ppc64le' }}
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-cpu-${{ matrix.build }}
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build Dependencies
id: build_depends
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
python3 python3-pip python3-dev python3-wheel \
libjpeg-dev build-essential libssl-dev \
git-lfs
- name: Toolchain workaround (GCC 14)
if: ${{ contains(matrix.os, 'ubuntu-24.04') }}
run: |
sudo apt-get install -y gcc-14 g++-14
echo "CC=gcc-14" >> "$GITHUB_ENV"
echo "CXX=g++-14" >> "$GITHUB_ENV"
- name: Python Dependencies
id: python_depends
run: |
export PIP_BREAK_SYSTEM_PACKAGES="1"
python3 -m pip install --upgrade pip setuptools
pip3 install ./gguf-py
- name: Swap Endianness
id: endianness
if: ${{ matrix.build == 's390x' }}
run: |
for f in models/*.gguf; do
echo YES | python3 gguf-py/gguf/scripts/gguf_convert_endian.py $f big
done
- name: Build
id: cmake_build
run: |
cmake -B build \
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_RPC=ON
time cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose --timeout 900
- name: Test llama2c conversion
id: llama2c_test
if: ${{ matrix.build != 's390x' }}
run: |
cd build
echo "Fetch tokenizer"
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
echo "Fetch llama2c model"
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
./bin/llama-completion -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
- name: Test llama2c (s390x)
id: llama2c_test_s390x
if: ${{ matrix.build == 's390x' }}
run: |
cd build
echo "Fetch llama2c big-endian model"
wget https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K-be.gguf
./bin/llama-completion -m stories260K-be.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
ubuntu-latest-rpc:
runs-on: ubuntu-latest
continue-on-error: true
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install build-essential libssl-dev ninja-build
- name: Build
id: cmake_build
run: |
cmake -B build \
-G "Ninja" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_RPC=ON
time cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
ctest -L main --verbose
ubuntu-24-vulkan:
strategy:
matrix:
include:
- build: 'x64'
os: ubuntu-24.04
- build: 'arm64'
os: ubuntu-24.04-arm
runs-on: ${{ matrix.os }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev libssl-dev ninja-build
echo "CC=gcc-14" >> "$GITHUB_ENV"
echo "CXX=g++-14" >> "$GITHUB_ENV"
- name: Configure
id: cmake_configure
run: |
cmake -B build \
-G "Ninja" \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DGGML_BACKEND_DL=ON \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_VULKAN=ON
- name: Build
id: cmake_build
run: |
time cmake --build build -j $(nproc)
ubuntu-24-webgpu:
runs-on: ubuntu-24.04
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-24-webgpu
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Dependencies
id: depends
run: |
sudo add-apt-repository -y ppa:kisak/kisak-mesa
sudo apt-get update -y
sudo apt-get install -y build-essential mesa-vulkan-drivers \
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libssl-dev
- name: Get latest Vulkan SDK version
id: vulkan_sdk_version
run: |
echo "VULKAN_SDK_VERSION=$(curl https://vulkan.lunarg.com/sdk/latest/linux.txt)" >> "$GITHUB_ENV"
- name: Use Vulkan SDK Cache
uses: actions/cache@v5
id: cache-sdk
with:
path: ./vulkan_sdk
key: vulkan-sdk-${{ env.VULKAN_SDK_VERSION }}-${{ runner.os }}
- name: Setup Vulkan SDK
if: steps.cache-sdk.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-vulkan
with:
path: ./vulkan_sdk
version: ${{ env.VULKAN_SDK_VERSION }}
- name: Dawn Dependency
id: dawn-depends
run: |
sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
DAWN_VERSION="v20260317.182325"
DAWN_OWNER="google"
DAWN_REPO="dawn"
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
curl -L -o artifact.tar.gz \
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
mkdir dawn
tar -xvf artifact.tar.gz -C dawn --strip-components=1
- name: Build
id: cmake_build
run: |
export Dawn_DIR=dawn/lib64/cmake/Dawn
cmake -B build \
-DGGML_WEBGPU=ON
time cmake --build build --config Release -j $(nproc)
- name: Test
id: cmake_test
run: |
cd build
# This is using llvmpipe and runs slower than other backends
ctest -L main --verbose --timeout 900
ubuntu-24-webgpu-wasm:
runs-on: ${{ 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Install Emscripten
run: |
git clone https://github.com/emscripten-core/emsdk.git
cd emsdk
./emsdk install latest
./emsdk activate latest
- name: Fetch emdawnwebgpu
run: |
DAWN_TAG="v20260317.182325"
EMDAWN_PKG="emdawnwebgpu_pkg-${DAWN_TAG}.zip"
echo "Downloading ${EMDAWN_PKG}"
curl -L -o emdawn.zip \
"https://github.com/google/dawn/releases/download/${DAWN_TAG}/${EMDAWN_PKG}"
unzip emdawn.zip
- name: Build WASM WebGPU
run: |
source emsdk/emsdk_env.sh
emcmake cmake -B build-wasm \
-G "Ninja" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_WEBGPU=ON \
-DLLAMA_OPENSSL=OFF \
-DEMDAWNWEBGPU_DIR=emdawnwebgpu_pkg
time cmake --build build-wasm --config Release --target test-backend-ops -j $(nproc)
ubuntu-22-hip:
runs-on: ubuntu-22.04
container: rocm/dev-ubuntu-22.04:6.1.2
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev libssl-dev rocwmma-dev
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-22-hip
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build with native CMake HIP support
id: cmake_build
run: |
cmake -B build -S . \
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DGGML_HIP=ON
cmake --build build --config Release -j $(nproc)
ubuntu-22-musa:
runs-on: ubuntu-22.04
container: mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: Dependencies
id: depends
run: |
apt-get update
apt-get install -y build-essential git cmake libssl-dev
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-22-musa
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build with native CMake MUSA support
id: cmake_build
run: |
cmake -B build -S . \
-DGGML_MUSA=ON
time cmake --build build --config Release -j $(nproc)
ubuntu-22-sycl:
runs-on: ubuntu-22.04
continue-on-error: true
steps:
- uses: actions/checkout@v6
- name: add oneAPI to apt
shell: bash
run: |
cd /tmp
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
- name: install oneAPI dpcpp compiler
shell: bash
run: |
sudo apt update
sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev
- name: install oneAPI MKL library
shell: bash
run: |
sudo apt install intel-oneapi-mkl-devel
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-22-sycl
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
source /opt/intel/oneapi/setvars.sh
cmake -B build \
-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx
time cmake --build build --config Release -j $(nproc)
ubuntu-22-sycl-fp16:
runs-on: ubuntu-22.04
continue-on-error: true
steps:
- uses: actions/checkout@v6
- name: add oneAPI to apt
shell: bash
run: |
cd /tmp
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
- name: install oneAPI dpcpp compiler
shell: bash
run: |
sudo apt update
sudo apt install intel-oneapi-compiler-dpcpp-cpp libssl-dev ninja-build
- name: install oneAPI MKL library
shell: bash
run: |
sudo apt install intel-oneapi-mkl-devel
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-22-sycl-fp16
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Build
id: cmake_build
run: |
source /opt/intel/oneapi/setvars.sh
cmake -B build \
-G "Ninja" \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_SYCL=ON \
-DCMAKE_C_COMPILER=icx \
-DCMAKE_CXX_COMPILER=icpx \
-DGGML_SYCL_F16=ON
time cmake --build build --config Release -j $(nproc)
ubuntu-24-openvino:
name: ubuntu-24-openvino-${{ matrix.openvino_device }}
strategy:
matrix:
include:
- variant: cpu
runner: '"ubuntu-24.04"'
openvino_device: "CPU"
- variant: gpu
runner: '["self-hosted","Linux","X64","Intel"]'
openvino_device: "GPU"
runs-on: ${{ fromJSON(matrix.runner) }}
env:
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
OPENVINO_VERSION_MAJOR: "2026.0"
OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
if: runner.environment == 'github-hosted'
uses: ggml-org/ccache-action@v1.2.21
with:
key: ubuntu-24-openvino-${{ matrix.variant }}-no-preset-v1
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install -y build-essential libssl-dev libtbb12 cmake ninja-build python3-pip
sudo apt-get install -y ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd
- name: Use OpenVINO Toolkit Cache
if: runner.environment == 'github-hosted'
uses: actions/cache@v5
id: cache-openvino
with:
path: ./openvino_toolkit
key: openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}
- name: Setup OpenVINO Toolkit
if: steps.cache-openvino.outputs.cache-hit != 'true'
uses: ./.github/actions/linux-setup-openvino
with:
path: ./openvino_toolkit
version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
version_full: ${{ env.OPENVINO_VERSION_FULL }}
- name: Install OpenVINO dependencies
run: |
cd ./openvino_toolkit
chmod +x ./install_dependencies/install_openvino_dependencies.sh
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh
- name: Build
id: cmake_build
run: |
source ./openvino_toolkit/setupvars.sh
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON
time cmake --build build/ReleaseOV --config Release -j $(nproc)
- name: Test
id: cmake_test
# TODO: fix and re-enable the `test-llama-archs` test below
run: |
cd ${{ github.workspace }}
if [ "${{ matrix.openvino_device }}" = "GPU" ]; then
export GGML_OPENVINO_DEVICE=GPU
fi
ctest --test-dir build/ReleaseOV -L main -E "test-llama-archs" --verbose --timeout 2000
windows-latest:
runs-on: windows-2025
env:
OPENBLAS_VERSION: 0.3.23
SDE_VERSION: 9.33.0-2024-01-07
VULKAN_VERSION: 1.4.313.2
strategy:
matrix:
include:
- build: 'cpu-x64 (static)'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF'
- build: 'openblas-x64'
arch: 'x64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
- build: 'vulkan-x64'
arch: 'x64'
defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
- build: 'llvm-arm64'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
- build: 'llvm-arm64-opencl-adreno'
arch: 'arm64'
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
steps:
- name: Clone
id: checkout
uses: actions/checkout@v6
- name: ccache
uses: ggml-org/ccache-action@v1.2.21
with:
key: windows-latest-${{ matrix.build }}
variant: ccache
evict-old-files: 1d
save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
- name: Download OpenBLAS
id: get_openblas
gitextract_xcrsk4vf/
├── .clang-format
├── .clang-tidy
├── .devops/
│ ├── cann.Dockerfile
│ ├── cpu.Dockerfile
│ ├── cuda-new.Dockerfile
│ ├── cuda.Dockerfile
│ ├── intel.Dockerfile
│ ├── llama-cli-cann.Dockerfile
│ ├── llama-cpp-cuda.srpm.spec
│ ├── llama-cpp.srpm.spec
│ ├── musa.Dockerfile
│ ├── nix/
│ │ ├── apps.nix
│ │ ├── devshells.nix
│ │ ├── docker.nix
│ │ ├── jetson-support.nix
│ │ ├── nixpkgs-instances.nix
│ │ ├── package-gguf-py.nix
│ │ ├── package.nix
│ │ ├── python-scripts.nix
│ │ ├── scope.nix
│ │ └── sif.nix
│ ├── openvino.Dockerfile
│ ├── rocm.Dockerfile
│ ├── s390x.Dockerfile
│ ├── tools.sh
│ └── vulkan.Dockerfile
├── .dockerignore
├── .ecrc
├── .editorconfig
├── .flake8
├── .gemini/
│ └── settings.json
├── .gitattributes
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── 010-bug-compilation.yml
│ │ ├── 011-bug-results.yml
│ │ ├── 019-bug-misc.yml
│ │ ├── 020-enhancement.yml
│ │ ├── 030-research.yml
│ │ ├── 040-refactor.yml
│ │ └── config.yml
│ ├── actions/
│ │ ├── get-tag-name/
│ │ │ └── action.yml
│ │ ├── install-exe/
│ │ │ └── action.yml
│ │ ├── linux-setup-openvino/
│ │ │ └── action.yml
│ │ ├── linux-setup-spacemit/
│ │ │ └── action.yml
│ │ ├── linux-setup-vulkan/
│ │ │ └── action.yml
│ │ ├── unarchive-tar/
│ │ │ └── action.yml
│ │ ├── windows-setup-cuda/
│ │ │ └── action.yml
│ │ └── windows-setup-rocm/
│ │ └── action.yml
│ ├── labeler.yml
│ ├── pull_request_template.md
│ └── workflows/
│ ├── ai-issues.yml
│ ├── bench.yml.disabled
│ ├── build-3rd-party.yml
│ ├── build-android.yml
│ ├── build-apple.yml
│ ├── build-cache.yml
│ ├── build-cann.yml
│ ├── build-cmake-pkg.yml
│ ├── build-cross.yml
│ ├── build-msys.yml
│ ├── build-riscv.yml
│ ├── build-sanitize.yml
│ ├── build-self-hosted.yml
│ ├── build-vulkan.yml
│ ├── build.yml
│ ├── check-vendor.yml
│ ├── close-issue.yml
│ ├── copilot-setup-steps.yml
│ ├── docker.yml
│ ├── editorconfig.yml
│ ├── gguf-publish.yml
│ ├── hip-quality-check.yml
│ ├── labeler.yml
│ ├── pre-tokenizer-hashes.yml
│ ├── python-check-requirements.yml
│ ├── python-lint.yml
│ ├── python-type-check.yml
│ ├── release.yml
│ ├── server-sanitize.yml
│ ├── server-self-hosted.yml
│ ├── server-webui.yml
│ ├── server.yml
│ ├── update-ops-docs.yml
│ └── winget.yml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── AGENTS.md
├── AUTHORS
├── CLAUDE.md
├── CMakeLists.txt
├── CMakePresets.json
├── CODEOWNERS
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── benches/
│ ├── dgx-spark/
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json
│ │ ├── aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json
│ │ └── dgx-spark.md
│ ├── mac-m2-ultra/
│ │ └── mac-m2-ultra.md
│ └── nemotron/
│ └── nemotron-dgx-spark.md
├── ci/
│ ├── README-MUSA.md
│ ├── README.md
│ └── run.sh
├── cmake/
│ ├── arm64-apple-clang.cmake
│ ├── arm64-windows-llvm.cmake
│ ├── build-info.cmake
│ ├── common.cmake
│ ├── download-models.cmake
│ ├── git-vars.cmake
│ ├── license.cmake
│ ├── llama-config.cmake.in
│ ├── llama.pc.in
│ ├── riscv64-spacemit-linux-gnu-gcc.cmake
│ └── x64-windows-llvm.cmake
├── common/
│ ├── CMakeLists.txt
│ ├── arg.cpp
│ ├── arg.h
│ ├── base64.hpp
│ ├── build-info.cpp.in
│ ├── chat-auto-parser-generator.cpp
│ ├── chat-auto-parser-helpers.cpp
│ ├── chat-auto-parser-helpers.h
│ ├── chat-auto-parser.h
│ ├── chat-diff-analyzer.cpp
│ ├── chat-peg-parser.cpp
│ ├── chat-peg-parser.h
│ ├── chat.cpp
│ ├── chat.h
│ ├── common.cpp
│ ├── common.h
│ ├── console.cpp
│ ├── console.h
│ ├── debug.cpp
│ ├── debug.h
│ ├── download.cpp
│ ├── download.h
│ ├── hf-cache.cpp
│ ├── hf-cache.h
│ ├── http.h
│ ├── jinja/
│ │ ├── README.md
│ │ ├── caps.cpp
│ │ ├── caps.h
│ │ ├── lexer.cpp
│ │ ├── lexer.h
│ │ ├── parser.cpp
│ │ ├── parser.h
│ │ ├── runtime.cpp
│ │ ├── runtime.h
│ │ ├── string.cpp
│ │ ├── string.h
│ │ ├── utils.h
│ │ ├── value.cpp
│ │ └── value.h
│ ├── json-partial.cpp
│ ├── json-partial.h
│ ├── json-schema-to-grammar.cpp
│ ├── json-schema-to-grammar.h
│ ├── llguidance.cpp
│ ├── log.cpp
│ ├── log.h
│ ├── ngram-cache.cpp
│ ├── ngram-cache.h
│ ├── ngram-map.cpp
│ ├── ngram-map.h
│ ├── ngram-mod.cpp
│ ├── ngram-mod.h
│ ├── peg-parser.cpp
│ ├── peg-parser.h
│ ├── preset.cpp
│ ├── preset.h
│ ├── reasoning-budget.cpp
│ ├── reasoning-budget.h
│ ├── regex-partial.cpp
│ ├── regex-partial.h
│ ├── sampling.cpp
│ ├── sampling.h
│ ├── speculative.cpp
│ ├── speculative.h
│ ├── unicode.cpp
│ └── unicode.h
├── convert_hf_to_gguf.py
├── convert_hf_to_gguf_update.py
├── convert_llama_ggml_to_gguf.py
├── convert_lora_to_gguf.py
├── docs/
│ ├── android.md
│ ├── autoparser.md
│ ├── backend/
│ │ ├── BLIS.md
│ │ ├── CANN.md
│ │ ├── CUDA-FEDORA.md
│ │ ├── OPENCL.md
│ │ ├── OPENVINO.md
│ │ ├── SYCL.md
│ │ ├── VirtGPU/
│ │ │ ├── configuration.md
│ │ │ └── development.md
│ │ ├── VirtGPU.md
│ │ ├── ZenDNN.md
│ │ ├── snapdragon/
│ │ │ ├── CMakeUserPresets.json
│ │ │ ├── README.md
│ │ │ ├── developer.md
│ │ │ └── windows.md
│ │ └── zDNN.md
│ ├── build-riscv64-spacemit.md
│ ├── build-s390x.md
│ ├── build.md
│ ├── development/
│ │ ├── HOWTO-add-model.md
│ │ ├── debugging-tests.md
│ │ ├── llama-star/
│ │ │ └── idea-arch.key
│ │ ├── parsing.md
│ │ └── token_generation_performance_tips.md
│ ├── docker.md
│ ├── function-calling.md
│ ├── install.md
│ ├── llguidance.md
│ ├── multimodal/
│ │ ├── MobileVLM.md
│ │ ├── gemma3.md
│ │ ├── glmedge.md
│ │ ├── granitevision.md
│ │ ├── llava.md
│ │ ├── minicpmo2.6.md
│ │ ├── minicpmo4.0.md
│ │ ├── minicpmv2.5.md
│ │ ├── minicpmv2.6.md
│ │ ├── minicpmv4.0.md
│ │ └── minicpmv4.5.md
│ ├── multimodal.md
│ ├── ops/
│ │ ├── BLAS.csv
│ │ ├── CANN.csv
│ │ ├── CPU.csv
│ │ ├── CUDA.csv
│ │ ├── Metal.csv
│ │ ├── OpenCL.csv
│ │ ├── SYCL.csv
│ │ ├── Vulkan.csv
│ │ ├── WebGPU.csv
│ │ ├── ZenDNN.csv
│ │ └── zDNN.csv
│ ├── ops.md
│ ├── preset.md
│ └── speculative.md
├── examples/
│ ├── CMakeLists.txt
│ ├── batched/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── batched.cpp
│ ├── batched.swift/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── Package.swift
│ │ ├── README.md
│ │ └── Sources/
│ │ └── main.swift
│ ├── convert-llama2c-to-ggml/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── convert-llama2c-to-ggml.cpp
│ ├── convert_legacy_llama.py
│ ├── debug/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── debug.cpp
│ ├── deprecation-warning/
│ │ ├── README.md
│ │ └── deprecation-warning.cpp
│ ├── diffusion/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── diffusion-cli.cpp
│ ├── embedding/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── embedding.cpp
│ ├── eval-callback/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── eval-callback.cpp
│ ├── gen-docs/
│ │ ├── CMakeLists.txt
│ │ └── gen-docs.cpp
│ ├── gguf/
│ │ ├── CMakeLists.txt
│ │ └── gguf.cpp
│ ├── gguf-hash/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── deps/
│ │ │ ├── rotate-bits/
│ │ │ │ ├── package.json
│ │ │ │ └── rotate-bits.h
│ │ │ ├── sha1/
│ │ │ │ ├── package.json
│ │ │ │ ├── sha1.c
│ │ │ │ └── sha1.h
│ │ │ ├── sha256/
│ │ │ │ ├── package.json
│ │ │ │ ├── sha256.c
│ │ │ │ └── sha256.h
│ │ │ └── xxhash/
│ │ │ ├── clib.json
│ │ │ ├── xxhash.c
│ │ │ └── xxhash.h
│ │ └── gguf-hash.cpp
│ ├── idle/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── idle.cpp
│ ├── json_schema_pydantic_example.py
│ ├── json_schema_to_grammar.py
│ ├── llama.android/
│ │ ├── .gitignore
│ │ ├── app/
│ │ │ ├── .gitignore
│ │ │ ├── build.gradle.kts
│ │ │ ├── proguard-rules.pro
│ │ │ └── src/
│ │ │ └── main/
│ │ │ ├── AndroidManifest.xml
│ │ │ ├── java/
│ │ │ │ └── com/
│ │ │ │ └── example/
│ │ │ │ └── llama/
│ │ │ │ ├── MainActivity.kt
│ │ │ │ └── MessageAdapter.kt
│ │ │ └── res/
│ │ │ ├── drawable/
│ │ │ │ ├── bg_assistant_message.xml
│ │ │ │ ├── bg_user_message.xml
│ │ │ │ ├── ic_launcher_background.xml
│ │ │ │ ├── ic_launcher_foreground.xml
│ │ │ │ ├── outline_folder_open_24.xml
│ │ │ │ └── outline_send_24.xml
│ │ │ ├── layout/
│ │ │ │ ├── activity_main.xml
│ │ │ │ ├── item_message_assistant.xml
│ │ │ │ └── item_message_user.xml
│ │ │ ├── mipmap-anydpi/
│ │ │ │ ├── ic_launcher.xml
│ │ │ │ └── ic_launcher_round.xml
│ │ │ ├── values/
│ │ │ │ ├── colors.xml
│ │ │ │ ├── strings.xml
│ │ │ │ └── themes.xml
│ │ │ └── xml/
│ │ │ ├── backup_rules.xml
│ │ │ └── data_extraction_rules.xml
│ │ ├── build.gradle.kts
│ │ ├── gradle/
│ │ │ ├── libs.versions.toml
│ │ │ └── wrapper/
│ │ │ ├── gradle-wrapper.jar
│ │ │ └── gradle-wrapper.properties
│ │ ├── gradle.properties
│ │ ├── gradlew
│ │ ├── lib/
│ │ │ ├── .gitignore
│ │ │ ├── build.gradle.kts
│ │ │ ├── consumer-rules.pro
│ │ │ ├── proguard-rules.pro
│ │ │ └── src/
│ │ │ ├── androidTest/
│ │ │ │ └── java/
│ │ │ │ └── android/
│ │ │ │ └── llama/
│ │ │ │ └── cpp/
│ │ │ │ └── ExampleInstrumentedTest.kt
│ │ │ ├── main/
│ │ │ │ ├── AndroidManifest.xml
│ │ │ │ ├── cpp/
│ │ │ │ │ ├── CMakeLists.txt
│ │ │ │ │ ├── ai_chat.cpp
│ │ │ │ │ └── logging.h
│ │ │ │ └── java/
│ │ │ │ └── com/
│ │ │ │ └── arm/
│ │ │ │ └── aichat/
│ │ │ │ ├── AiChat.kt
│ │ │ │ ├── InferenceEngine.kt
│ │ │ │ ├── gguf/
│ │ │ │ │ ├── FileType.kt
│ │ │ │ │ ├── GgufMetadata.kt
│ │ │ │ │ └── GgufMetadataReader.kt
│ │ │ │ └── internal/
│ │ │ │ ├── InferenceEngineImpl.kt
│ │ │ │ └── gguf/
│ │ │ │ └── GgufMetadataReaderImpl.kt
│ │ │ └── test/
│ │ │ └── java/
│ │ │ └── android/
│ │ │ └── llama/
│ │ │ └── cpp/
│ │ │ └── ExampleUnitTest.kt
│ │ └── settings.gradle.kts
│ ├── llama.swiftui/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── llama.cpp.swift/
│ │ │ └── LibLlama.swift
│ │ ├── llama.swiftui/
│ │ │ ├── Assets.xcassets/
│ │ │ │ ├── AppIcon.appiconset/
│ │ │ │ │ └── Contents.json
│ │ │ │ └── Contents.json
│ │ │ ├── Models/
│ │ │ │ └── LlamaState.swift
│ │ │ ├── Resources/
│ │ │ │ └── models/
│ │ │ │ └── .gitignore
│ │ │ ├── UI/
│ │ │ │ ├── ContentView.swift
│ │ │ │ ├── DownloadButton.swift
│ │ │ │ ├── InputButton.swift
│ │ │ │ └── LoadCustomButton.swift
│ │ │ └── llama_swiftuiApp.swift
│ │ └── llama.swiftui.xcodeproj/
│ │ ├── project.pbxproj
│ │ └── project.xcworkspace/
│ │ └── contents.xcworkspacedata
│ ├── llama.vim
│ ├── lookahead/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── lookahead.cpp
│ ├── lookup/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── lookup-create.cpp
│ │ ├── lookup-merge.cpp
│ │ ├── lookup-stats.cpp
│ │ └── lookup.cpp
│ ├── model-conversion/
│ │ ├── .gitignore
│ │ ├── Makefile
│ │ ├── README.md
│ │ ├── requirements.txt
│ │ └── scripts/
│ │ ├── causal/
│ │ │ ├── compare-embeddings-logits.sh
│ │ │ ├── compare-logits.py
│ │ │ ├── convert-model.sh
│ │ │ ├── modelcard.template
│ │ │ ├── run-casual-gen-embeddings-org.py
│ │ │ ├── run-converted-model-embeddings-logits.sh
│ │ │ ├── run-converted-model.sh
│ │ │ └── run-org-model.py
│ │ ├── embedding/
│ │ │ ├── compare-embeddings-logits.sh
│ │ │ ├── convert-model.sh
│ │ │ ├── modelcard.template
│ │ │ ├── run-converted-model.sh
│ │ │ └── run-original-model.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── check-nmse.py
│ │ ├── common.py
│ │ ├── compare_tokens.py
│ │ ├── create-collection-add-model.sh
│ │ ├── curl-embedding-server.sh
│ │ ├── hf-add-model-to-collection.py
│ │ ├── hf-create-collection.py
│ │ ├── hf-create-model.py
│ │ ├── hf-upload-gguf-model.py
│ │ ├── inspect-converted-model.sh
│ │ ├── inspect-org-model.py
│ │ ├── perplexity-gen.sh
│ │ ├── perplexity-run-simple.sh
│ │ ├── perplexity-run.sh
│ │ ├── quantize.sh
│ │ ├── run-embedding-server.sh
│ │ └── semantic_check.py
│ ├── parallel/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── parallel.cpp
│ ├── passkey/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── passkey.cpp
│ ├── pydantic_models_to_grammar.py
│ ├── pydantic_models_to_grammar_examples.py
│ ├── reason-act.sh
│ ├── regex_to_grammar.py
│ ├── retrieval/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── retrieval.cpp
│ ├── save-load-state/
│ │ ├── CMakeLists.txt
│ │ └── save-load-state.cpp
│ ├── server-llama2-13B.sh
│ ├── server_embd.py
│ ├── simple/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── simple.cpp
│ ├── simple-chat/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── simple-chat.cpp
│ ├── simple-cmake-pkg/
│ │ ├── .gitignore
│ │ ├── CMakeLists.txt
│ │ └── README.md
│ ├── speculative/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── speculative.cpp
│ ├── speculative-simple/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── speculative-simple.cpp
│ ├── sycl/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── build.sh
│ │ ├── ls-sycl-device.cpp
│ │ ├── run-llama2.sh
│ │ ├── test.sh
│ │ ├── win-build-sycl.bat
│ │ ├── win-run-llama2.bat
│ │ └── win-test.bat
│ ├── training/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── finetune.cpp
│ └── ts-type-to-grammar.sh
├── flake.nix
├── ggml/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── cmake/
│ │ ├── GitVars.cmake
│ │ ├── common.cmake
│ │ └── ggml-config.cmake.in
│ ├── include/
│ │ ├── ggml-alloc.h
│ │ ├── ggml-backend.h
│ │ ├── ggml-blas.h
│ │ ├── ggml-cann.h
│ │ ├── ggml-cpp.h
│ │ ├── ggml-cpu.h
│ │ ├── ggml-cuda.h
│ │ ├── ggml-hexagon.h
│ │ ├── ggml-metal.h
│ │ ├── ggml-opencl.h
│ │ ├── ggml-openvino.h
│ │ ├── ggml-opt.h
│ │ ├── ggml-rpc.h
│ │ ├── ggml-sycl.h
│ │ ├── ggml-virtgpu.h
│ │ ├── ggml-vulkan.h
│ │ ├── ggml-webgpu.h
│ │ ├── ggml-zdnn.h
│ │ ├── ggml-zendnn.h
│ │ ├── ggml.h
│ │ └── gguf.h
│ └── src/
│ ├── CMakeLists.txt
│ ├── ggml-alloc.c
│ ├── ggml-backend-dl.cpp
│ ├── ggml-backend-dl.h
│ ├── ggml-backend-impl.h
│ ├── ggml-backend-reg.cpp
│ ├── ggml-backend.cpp
│ ├── ggml-blas/
│ │ ├── CMakeLists.txt
│ │ └── ggml-blas.cpp
│ ├── ggml-cann/
│ │ ├── CMakeLists.txt
│ │ ├── acl_tensor.cpp
│ │ ├── acl_tensor.h
│ │ ├── aclnn_ops.cpp
│ │ ├── aclnn_ops.h
│ │ ├── common.h
│ │ └── ggml-cann.cpp
│ ├── ggml-common.h
│ ├── ggml-cpu/
│ │ ├── CMakeLists.txt
│ │ ├── amx/
│ │ │ ├── amx.cpp
│ │ │ ├── amx.h
│ │ │ ├── common.h
│ │ │ ├── mmq.cpp
│ │ │ └── mmq.h
│ │ ├── arch/
│ │ │ ├── arm/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ ├── quants.c
│ │ │ │ └── repack.cpp
│ │ │ ├── loongarch/
│ │ │ │ └── quants.c
│ │ │ ├── powerpc/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ └── quants.c
│ │ │ ├── riscv/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ ├── quants.c
│ │ │ │ └── repack.cpp
│ │ │ ├── s390/
│ │ │ │ ├── cpu-feats.cpp
│ │ │ │ └── quants.c
│ │ │ ├── wasm/
│ │ │ │ └── quants.c
│ │ │ └── x86/
│ │ │ ├── cpu-feats.cpp
│ │ │ ├── quants.c
│ │ │ └── repack.cpp
│ │ ├── arch-fallback.h
│ │ ├── binary-ops.cpp
│ │ ├── binary-ops.h
│ │ ├── cmake/
│ │ │ └── FindSIMD.cmake
│ │ ├── common.h
│ │ ├── ggml-cpu-impl.h
│ │ ├── ggml-cpu.c
│ │ ├── ggml-cpu.cpp
│ │ ├── hbm.cpp
│ │ ├── hbm.h
│ │ ├── kleidiai/
│ │ │ ├── kernels.cpp
│ │ │ ├── kernels.h
│ │ │ ├── kleidiai.cpp
│ │ │ └── kleidiai.h
│ │ ├── llamafile/
│ │ │ ├── sgemm.cpp
│ │ │ └── sgemm.h
│ │ ├── ops.cpp
│ │ ├── ops.h
│ │ ├── quants.c
│ │ ├── quants.h
│ │ ├── repack.cpp
│ │ ├── repack.h
│ │ ├── simd-gemm.h
│ │ ├── simd-mappings.h
│ │ ├── spacemit/
│ │ │ ├── ime.cpp
│ │ │ ├── ime.h
│ │ │ ├── ime1_kernels.cpp
│ │ │ └── ime_kernels.h
│ │ ├── traits.cpp
│ │ ├── traits.h
│ │ ├── unary-ops.cpp
│ │ ├── unary-ops.h
│ │ ├── vec.cpp
│ │ └── vec.h
│ ├── ggml-cuda/
│ │ ├── CMakeLists.txt
│ │ ├── acc.cu
│ │ ├── acc.cuh
│ │ ├── add-id.cu
│ │ ├── add-id.cuh
│ │ ├── arange.cu
│ │ ├── arange.cuh
│ │ ├── argmax.cu
│ │ ├── argmax.cuh
│ │ ├── argsort.cu
│ │ ├── argsort.cuh
│ │ ├── binbcast.cu
│ │ ├── binbcast.cuh
│ │ ├── clamp.cu
│ │ ├── clamp.cuh
│ │ ├── common.cuh
│ │ ├── concat.cu
│ │ ├── concat.cuh
│ │ ├── conv-transpose-1d.cu
│ │ ├── conv-transpose-1d.cuh
│ │ ├── conv2d-dw.cu
│ │ ├── conv2d-dw.cuh
│ │ ├── conv2d-transpose.cu
│ │ ├── conv2d-transpose.cuh
│ │ ├── conv2d.cu
│ │ ├── conv2d.cuh
│ │ ├── convert.cu
│ │ ├── convert.cuh
│ │ ├── count-equal.cu
│ │ ├── count-equal.cuh
│ │ ├── cp-async.cuh
│ │ ├── cpy-utils.cuh
│ │ ├── cpy.cu
│ │ ├── cpy.cuh
│ │ ├── cross-entropy-loss.cu
│ │ ├── cross-entropy-loss.cuh
│ │ ├── cumsum.cu
│ │ ├── cumsum.cuh
│ │ ├── dequantize.cuh
│ │ ├── diag.cu
│ │ ├── diag.cuh
│ │ ├── diagmask.cu
│ │ ├── diagmask.cuh
│ │ ├── fattn-common.cuh
│ │ ├── fattn-mma-f16.cuh
│ │ ├── fattn-tile.cu
│ │ ├── fattn-tile.cuh
│ │ ├── fattn-vec.cuh
│ │ ├── fattn-wmma-f16.cu
│ │ ├── fattn-wmma-f16.cuh
│ │ ├── fattn.cu
│ │ ├── fattn.cuh
│ │ ├── fill.cu
│ │ ├── fill.cuh
│ │ ├── gated_delta_net.cu
│ │ ├── gated_delta_net.cuh
│ │ ├── getrows.cu
│ │ ├── getrows.cuh
│ │ ├── ggml-cuda.cu
│ │ ├── gla.cu
│ │ ├── gla.cuh
│ │ ├── im2col.cu
│ │ ├── im2col.cuh
│ │ ├── mean.cu
│ │ ├── mean.cuh
│ │ ├── mma.cuh
│ │ ├── mmf.cu
│ │ ├── mmf.cuh
│ │ ├── mmid.cu
│ │ ├── mmid.cuh
│ │ ├── mmq.cu
│ │ ├── mmq.cuh
│ │ ├── mmvf.cu
│ │ ├── mmvf.cuh
│ │ ├── mmvq.cu
│ │ ├── mmvq.cuh
│ │ ├── norm.cu
│ │ ├── norm.cuh
│ │ ├── opt-step-adamw.cu
│ │ ├── opt-step-adamw.cuh
│ │ ├── opt-step-sgd.cu
│ │ ├── opt-step-sgd.cuh
│ │ ├── out-prod.cu
│ │ ├── out-prod.cuh
│ │ ├── pad.cu
│ │ ├── pad.cuh
│ │ ├── pad_reflect_1d.cu
│ │ ├── pad_reflect_1d.cuh
│ │ ├── pool2d.cu
│ │ ├── pool2d.cuh
│ │ ├── quantize.cu
│ │ ├── quantize.cuh
│ │ ├── reduce_rows.cuh
│ │ ├── roll.cu
│ │ ├── roll.cuh
│ │ ├── rope.cu
│ │ ├── rope.cuh
│ │ ├── scale.cu
│ │ ├── scale.cuh
│ │ ├── set-rows.cu
│ │ ├── set-rows.cuh
│ │ ├── set.cu
│ │ ├── set.cuh
│ │ ├── softcap.cu
│ │ ├── softcap.cuh
│ │ ├── softmax.cu
│ │ ├── softmax.cuh
│ │ ├── solve_tri.cu
│ │ ├── solve_tri.cuh
│ │ ├── ssm-conv.cu
│ │ ├── ssm-conv.cuh
│ │ ├── ssm-scan.cu
│ │ ├── ssm-scan.cuh
│ │ ├── sum.cu
│ │ ├── sum.cuh
│ │ ├── sumrows.cu
│ │ ├── sumrows.cuh
│ │ ├── template-instances/
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_32.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_1-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_16-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_32.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_2-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_32-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_16.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_4-ncols2_8.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_64-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_1.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_2.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_4.cu
│ │ │ ├── fattn-mma-f16-instance-ncols1_8-ncols2_8.cu
│ │ │ ├── fattn-tile-instance-dkq112-dv112.cu
│ │ │ ├── fattn-tile-instance-dkq128-dv128.cu
│ │ │ ├── fattn-tile-instance-dkq256-dv256.cu
│ │ │ ├── fattn-tile-instance-dkq40-dv40.cu
│ │ │ ├── fattn-tile-instance-dkq512-dv512.cu
│ │ │ ├── fattn-tile-instance-dkq576-dv512.cu
│ │ │ ├── fattn-tile-instance-dkq64-dv64.cu
│ │ │ ├── fattn-tile-instance-dkq72-dv72.cu
│ │ │ ├── fattn-tile-instance-dkq80-dv80.cu
│ │ │ ├── fattn-tile-instance-dkq96-dv96.cu
│ │ │ ├── fattn-vec-instance-bf16-bf16.cu
│ │ │ ├── fattn-vec-instance-bf16-f16.cu
│ │ │ ├── fattn-vec-instance-bf16-q4_0.cu
│ │ │ ├── fattn-vec-instance-bf16-q4_1.cu
│ │ │ ├── fattn-vec-instance-bf16-q5_0.cu
│ │ │ ├── fattn-vec-instance-bf16-q5_1.cu
│ │ │ ├── fattn-vec-instance-bf16-q8_0.cu
│ │ │ ├── fattn-vec-instance-f16-bf16.cu
│ │ │ ├── fattn-vec-instance-f16-f16.cu
│ │ │ ├── fattn-vec-instance-f16-q4_0.cu
│ │ │ ├── fattn-vec-instance-f16-q4_1.cu
│ │ │ ├── fattn-vec-instance-f16-q5_0.cu
│ │ │ ├── fattn-vec-instance-f16-q5_1.cu
│ │ │ ├── fattn-vec-instance-f16-q8_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q4_0-f16.cu
│ │ │ ├── fattn-vec-instance-q4_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q4_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q4_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q4_0-q8_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-bf16.cu
│ │ │ ├── fattn-vec-instance-q4_1-f16.cu
│ │ │ ├── fattn-vec-instance-q4_1-q4_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-q4_1.cu
│ │ │ ├── fattn-vec-instance-q4_1-q5_0.cu
│ │ │ ├── fattn-vec-instance-q4_1-q5_1.cu
│ │ │ ├── fattn-vec-instance-q4_1-q8_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q5_0-f16.cu
│ │ │ ├── fattn-vec-instance-q5_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q5_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q5_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q5_0-q8_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-bf16.cu
│ │ │ ├── fattn-vec-instance-q5_1-f16.cu
│ │ │ ├── fattn-vec-instance-q5_1-q4_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-q4_1.cu
│ │ │ ├── fattn-vec-instance-q5_1-q5_0.cu
│ │ │ ├── fattn-vec-instance-q5_1-q5_1.cu
│ │ │ ├── fattn-vec-instance-q5_1-q8_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-bf16.cu
│ │ │ ├── fattn-vec-instance-q8_0-f16.cu
│ │ │ ├── fattn-vec-instance-q8_0-q4_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-q4_1.cu
│ │ │ ├── fattn-vec-instance-q8_0-q5_0.cu
│ │ │ ├── fattn-vec-instance-q8_0-q5_1.cu
│ │ │ ├── fattn-vec-instance-q8_0-q8_0.cu
│ │ │ ├── generate_cu_files.py
│ │ │ ├── mmf-instance-ncols_1.cu
│ │ │ ├── mmf-instance-ncols_10.cu
│ │ │ ├── mmf-instance-ncols_11.cu
│ │ │ ├── mmf-instance-ncols_12.cu
│ │ │ ├── mmf-instance-ncols_13.cu
│ │ │ ├── mmf-instance-ncols_14.cu
│ │ │ ├── mmf-instance-ncols_15.cu
│ │ │ ├── mmf-instance-ncols_16.cu
│ │ │ ├── mmf-instance-ncols_2.cu
│ │ │ ├── mmf-instance-ncols_3.cu
│ │ │ ├── mmf-instance-ncols_4.cu
│ │ │ ├── mmf-instance-ncols_5.cu
│ │ │ ├── mmf-instance-ncols_6.cu
│ │ │ ├── mmf-instance-ncols_7.cu
│ │ │ ├── mmf-instance-ncols_8.cu
│ │ │ ├── mmf-instance-ncols_9.cu
│ │ │ ├── mmq-instance-iq1_s.cu
│ │ │ ├── mmq-instance-iq2_s.cu
│ │ │ ├── mmq-instance-iq2_xs.cu
│ │ │ ├── mmq-instance-iq2_xxs.cu
│ │ │ ├── mmq-instance-iq3_s.cu
│ │ │ ├── mmq-instance-iq3_xxs.cu
│ │ │ ├── mmq-instance-iq4_nl.cu
│ │ │ ├── mmq-instance-iq4_xs.cu
│ │ │ ├── mmq-instance-mxfp4.cu
│ │ │ ├── mmq-instance-nvfp4.cu
│ │ │ ├── mmq-instance-q2_k.cu
│ │ │ ├── mmq-instance-q3_k.cu
│ │ │ ├── mmq-instance-q4_0.cu
│ │ │ ├── mmq-instance-q4_1.cu
│ │ │ ├── mmq-instance-q4_k.cu
│ │ │ ├── mmq-instance-q5_0.cu
│ │ │ ├── mmq-instance-q5_1.cu
│ │ │ ├── mmq-instance-q5_k.cu
│ │ │ ├── mmq-instance-q6_k.cu
│ │ │ └── mmq-instance-q8_0.cu
│ │ ├── top-k.cu
│ │ ├── top-k.cuh
│ │ ├── topk-moe.cu
│ │ ├── topk-moe.cuh
│ │ ├── tri.cu
│ │ ├── tri.cuh
│ │ ├── tsembd.cu
│ │ ├── tsembd.cuh
│ │ ├── unary.cu
│ │ ├── unary.cuh
│ │ ├── upscale.cu
│ │ ├── upscale.cuh
│ │ ├── vecdotq.cuh
│ │ ├── vendors/
│ │ │ ├── cuda.h
│ │ │ ├── hip.h
│ │ │ └── musa.h
│ │ ├── wkv.cu
│ │ └── wkv.cuh
│ ├── ggml-hexagon/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-hexagon.cpp
│ │ ├── htp/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── act-ops.c
│ │ │ ├── argsort-ops.c
│ │ │ ├── binary-ops.c
│ │ │ ├── cmake-toolchain.cmake
│ │ │ ├── cpy-ops.c
│ │ │ ├── cumsum-ops.c
│ │ │ ├── flash-attn-ops.c
│ │ │ ├── get-rows-ops.c
│ │ │ ├── hex-dma.c
│ │ │ ├── hex-dma.h
│ │ │ ├── hex-dump.h
│ │ │ ├── hex-fastdiv.h
│ │ │ ├── hex-utils.h
│ │ │ ├── hmx-matmul-ops.c
│ │ │ ├── hmx-ops.h
│ │ │ ├── hmx-profile.h
│ │ │ ├── hmx-utils.h
│ │ │ ├── htp-ctx.h
│ │ │ ├── htp-msg.h
│ │ │ ├── htp-ops.h
│ │ │ ├── htp_iface.idl
│ │ │ ├── hvx-arith.h
│ │ │ ├── hvx-base.h
│ │ │ ├── hvx-copy.h
│ │ │ ├── hvx-div.h
│ │ │ ├── hvx-dump.h
│ │ │ ├── hvx-exp.h
│ │ │ ├── hvx-floor.h
│ │ │ ├── hvx-inverse.h
│ │ │ ├── hvx-reduce.h
│ │ │ ├── hvx-scale.h
│ │ │ ├── hvx-sigmoid.h
│ │ │ ├── hvx-sqrt.h
│ │ │ ├── hvx-types.h
│ │ │ ├── hvx-utils.h
│ │ │ ├── main.c
│ │ │ ├── matmul-ops.c
│ │ │ ├── repeat-ops.c
│ │ │ ├── rope-ops.c
│ │ │ ├── set-rows-ops.c
│ │ │ ├── softmax-ops.c
│ │ │ ├── ssm-conv.c
│ │ │ ├── sum-rows-ops.c
│ │ │ ├── unary-ops.c
│ │ │ ├── worker-pool.c
│ │ │ └── worker-pool.h
│ │ ├── htp-drv.cpp
│ │ ├── htp-drv.h
│ │ ├── libdl.h
│ │ ├── libggml-htp.inf
│ │ └── op-desc.h
│ ├── ggml-hip/
│ │ └── CMakeLists.txt
│ ├── ggml-impl.h
│ ├── ggml-metal/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-metal-common.cpp
│ │ ├── ggml-metal-common.h
│ │ ├── ggml-metal-context.h
│ │ ├── ggml-metal-context.m
│ │ ├── ggml-metal-device.cpp
│ │ ├── ggml-metal-device.h
│ │ ├── ggml-metal-device.m
│ │ ├── ggml-metal-impl.h
│ │ ├── ggml-metal-ops.cpp
│ │ ├── ggml-metal-ops.h
│ │ ├── ggml-metal.cpp
│ │ └── ggml-metal.metal
│ ├── ggml-musa/
│ │ ├── CMakeLists.txt
│ │ ├── mudnn.cu
│ │ └── mudnn.cuh
│ ├── ggml-opencl/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-opencl.cpp
│ │ └── kernels/
│ │ ├── add.cl
│ │ ├── add_id.cl
│ │ ├── argsort.cl
│ │ ├── clamp.cl
│ │ ├── concat.cl
│ │ ├── conv2d.cl
│ │ ├── conv2d_f16_f32.cl
│ │ ├── cpy.cl
│ │ ├── cumsum.cl
│ │ ├── cvt.cl
│ │ ├── diag.cl
│ │ ├── diag_mask_inf.cl
│ │ ├── div.cl
│ │ ├── embed_kernel.py
│ │ ├── exp.cl
│ │ ├── expm1.cl
│ │ ├── fill.cl
│ │ ├── flash_attn_f16.cl
│ │ ├── flash_attn_f32.cl
│ │ ├── flash_attn_f32_f16.cl
│ │ ├── gelu.cl
│ │ ├── gemm_moe_mxfp4_f32.cl
│ │ ├── gemm_noshuffle_q4_1_f32.cl
│ │ ├── gemm_noshuffle_q4_k_f32.cl
│ │ ├── gemm_noshuffle_q6_k_f32.cl
│ │ ├── gemv_moe_mxfp4_f32.cl
│ │ ├── gemv_noshuffle.cl
│ │ ├── gemv_noshuffle_general.cl
│ │ ├── gemv_noshuffle_general_q8_0_f32.cl
│ │ ├── gemv_noshuffle_q4_1_f32.cl
│ │ ├── gemv_noshuffle_q4_k_f32.cl
│ │ ├── gemv_noshuffle_q6_k_f32.cl
│ │ ├── get_rows.cl
│ │ ├── glu.cl
│ │ ├── group_norm.cl
│ │ ├── im2col_f16.cl
│ │ ├── im2col_f32.cl
│ │ ├── l2_norm.cl
│ │ ├── mean.cl
│ │ ├── mul.cl
│ │ ├── mul_mat_Ab_Bi_8x4.cl
│ │ ├── mul_mat_f16_f32.cl
│ │ ├── mul_mm_f16_f32_kq_kqv.cl
│ │ ├── mul_mm_f16_f32_l4_lm.cl
│ │ ├── mul_mm_f32_f32_l4_lm.cl
│ │ ├── mul_mm_q4_0_f32_l4_lm.cl
│ │ ├── mul_mm_q4_1_f32_l4_lm.cl
│ │ ├── mul_mm_q4_k_f32_l4_lm.cl
│ │ ├── mul_mm_q6_k_f32_l4_lm.cl
│ │ ├── mul_mm_q8_0_f32_8x4.cl
│ │ ├── mul_mm_q8_0_f32_l4_lm.cl
│ │ ├── mul_mv_f16_f16.cl
│ │ ├── mul_mv_f16_f32.cl
│ │ ├── mul_mv_f16_f32_1row.cl
│ │ ├── mul_mv_f16_f32_l4.cl
│ │ ├── mul_mv_f32_f32.cl
│ │ ├── mul_mv_id_mxfp4_f32.cl
│ │ ├── mul_mv_id_mxfp4_f32_flat.cl
│ │ ├── mul_mv_id_q4_0_f32_8x_flat.cl
│ │ ├── mul_mv_id_q8_0_f32.cl
│ │ ├── mul_mv_id_q8_0_f32_flat.cl
│ │ ├── mul_mv_mxfp4_f32.cl
│ │ ├── mul_mv_mxfp4_f32_flat.cl
│ │ ├── mul_mv_q4_0_f32.cl
│ │ ├── mul_mv_q4_0_f32_1d_16x_flat.cl
│ │ ├── mul_mv_q4_0_f32_1d_8x_flat.cl
│ │ ├── mul_mv_q4_0_f32_8x_flat.cl
│ │ ├── mul_mv_q4_0_f32_v.cl
│ │ ├── mul_mv_q4_1_f32.cl
│ │ ├── mul_mv_q4_1_f32_flat.cl
│ │ ├── mul_mv_q4_k_f32.cl
│ │ ├── mul_mv_q4_k_f32_flat.cl
│ │ ├── mul_mv_q6_k_f32.cl
│ │ ├── mul_mv_q6_k_f32_flat.cl
│ │ ├── mul_mv_q8_0_f32.cl
│ │ ├── mul_mv_q8_0_f32_flat.cl
│ │ ├── neg.cl
│ │ ├── norm.cl
│ │ ├── pad.cl
│ │ ├── relu.cl
│ │ ├── repeat.cl
│ │ ├── rms_norm.cl
│ │ ├── rope.cl
│ │ ├── scale.cl
│ │ ├── set_rows.cl
│ │ ├── sigmoid.cl
│ │ ├── silu.cl
│ │ ├── softmax_4_f16.cl
│ │ ├── softmax_4_f32.cl
│ │ ├── softmax_f16.cl
│ │ ├── softmax_f32.cl
│ │ ├── softplus.cl
│ │ ├── solve_tri.cl
│ │ ├── sqr.cl
│ │ ├── sqrt.cl
│ │ ├── ssm_conv.cl
│ │ ├── sub.cl
│ │ ├── sum_rows.cl
│ │ ├── tanh.cl
│ │ ├── transpose.cl
│ │ ├── tri.cl
│ │ ├── tsembd.cl
│ │ └── upscale.cl
│ ├── ggml-openvino/
│ │ ├── .clang-format
│ │ ├── CMakeLists.txt
│ │ ├── ggml-decoder.cpp
│ │ ├── ggml-decoder.h
│ │ ├── ggml-openvino-extra.cpp
│ │ ├── ggml-openvino-extra.h
│ │ ├── ggml-openvino.cpp
│ │ ├── ggml-quants.cpp
│ │ ├── ggml-quants.h
│ │ ├── openvino/
│ │ │ ├── decoder.h
│ │ │ ├── frontend.cpp
│ │ │ ├── frontend.h
│ │ │ ├── input_model.cpp
│ │ │ ├── input_model.h
│ │ │ ├── node_context.h
│ │ │ ├── op/
│ │ │ │ ├── cont.cpp
│ │ │ │ ├── cpy.cpp
│ │ │ │ ├── flash_attn_ext.cpp
│ │ │ │ ├── get_rows.cpp
│ │ │ │ ├── glu_geglu.cpp
│ │ │ │ ├── glu_swiglu.cpp
│ │ │ │ ├── mulmat.cpp
│ │ │ │ ├── permute.cpp
│ │ │ │ ├── reshape.cpp
│ │ │ │ ├── rms_norm.cpp
│ │ │ │ ├── rope.cpp
│ │ │ │ ├── scale.cpp
│ │ │ │ ├── set_rows.cpp
│ │ │ │ ├── softmax.cpp
│ │ │ │ ├── transpose.cpp
│ │ │ │ ├── unary_silu.cpp
│ │ │ │ └── view.cpp
│ │ │ ├── op_table.cpp
│ │ │ ├── op_table.h
│ │ │ ├── pass/
│ │ │ │ ├── eliminate_zp.cpp
│ │ │ │ ├── eliminate_zp.h
│ │ │ │ ├── fuse_to_sdpa.cpp
│ │ │ │ ├── fuse_to_sdpa.h
│ │ │ │ ├── mark_decompression_convert_constant_folding.h
│ │ │ │ ├── squeeze_matmul.cpp
│ │ │ │ └── squeeze_matmul.h
│ │ │ ├── translate_session.cpp
│ │ │ ├── translate_session.h
│ │ │ ├── utils.cpp
│ │ │ └── utils.h
│ │ ├── utils.cpp
│ │ └── utils.h
│ ├── ggml-opt.cpp
│ ├── ggml-quants.c
│ ├── ggml-quants.h
│ ├── ggml-rpc/
│ │ ├── CMakeLists.txt
│ │ └── ggml-rpc.cpp
│ ├── ggml-sycl/
│ │ ├── CMakeLists.txt
│ │ ├── add-id.cpp
│ │ ├── add-id.hpp
│ │ ├── backend.hpp
│ │ ├── binbcast.cpp
│ │ ├── binbcast.hpp
│ │ ├── common.cpp
│ │ ├── common.hpp
│ │ ├── concat.cpp
│ │ ├── concat.hpp
│ │ ├── conv.cpp
│ │ ├── conv.hpp
│ │ ├── convert.cpp
│ │ ├── convert.hpp
│ │ ├── count-equal.cpp
│ │ ├── count-equal.hpp
│ │ ├── cpy.cpp
│ │ ├── cpy.hpp
│ │ ├── dequantize.hpp
│ │ ├── dmmv.cpp
│ │ ├── dmmv.hpp
│ │ ├── dpct/
│ │ │ └── helper.hpp
│ │ ├── element_wise.cpp
│ │ ├── element_wise.hpp
│ │ ├── fattn-common.hpp
│ │ ├── fattn-tile.cpp
│ │ ├── fattn-tile.hpp
│ │ ├── fattn-vec.hpp
│ │ ├── fattn.cpp
│ │ ├── fattn.hpp
│ │ ├── gated_delta_net.cpp
│ │ ├── gated_delta_net.hpp
│ │ ├── gemm.hpp
│ │ ├── getrows.cpp
│ │ ├── getrows.hpp
│ │ ├── ggml-sycl.cpp
│ │ ├── gla.cpp
│ │ ├── gla.hpp
│ │ ├── im2col.cpp
│ │ ├── im2col.hpp
│ │ ├── mmq.cpp
│ │ ├── mmq.hpp
│ │ ├── mmvq.cpp
│ │ ├── mmvq.hpp
│ │ ├── norm.cpp
│ │ ├── norm.hpp
│ │ ├── outprod.cpp
│ │ ├── outprod.hpp
│ │ ├── pad.cpp
│ │ ├── pad.hpp
│ │ ├── pad_reflect_1d.cpp
│ │ ├── pad_reflect_1d.hpp
│ │ ├── presets.hpp
│ │ ├── quantize.hpp
│ │ ├── quants.hpp
│ │ ├── repeat_back.cpp
│ │ ├── repeat_back.hpp
│ │ ├── roll.cpp
│ │ ├── roll.hpp
│ │ ├── rope.cpp
│ │ ├── rope.hpp
│ │ ├── set.cpp
│ │ ├── set.hpp
│ │ ├── set_rows.cpp
│ │ ├── set_rows.hpp
│ │ ├── softmax.cpp
│ │ ├── softmax.hpp
│ │ ├── ssm_conv.cpp
│ │ ├── ssm_conv.hpp
│ │ ├── sycl_hw.cpp
│ │ ├── sycl_hw.hpp
│ │ ├── template-instances/
│ │ │ ├── fattn-tile-instance-dkq112-dv112.cpp
│ │ │ ├── fattn-tile-instance-dkq128-dv128.cpp
│ │ │ ├── fattn-tile-instance-dkq256-dv256.cpp
│ │ │ ├── fattn-tile-instance-dkq40-dv40.cpp
│ │ │ ├── fattn-tile-instance-dkq576-dv512.cpp
│ │ │ ├── fattn-tile-instance-dkq64-dv64.cpp
│ │ │ ├── fattn-tile-instance-dkq72-dv72.cpp
│ │ │ ├── fattn-tile-instance-dkq80-dv80.cpp
│ │ │ ├── fattn-tile-instance-dkq96-dv96.cpp
│ │ │ ├── fattn-vec-instance-f16-f16.cpp
│ │ │ ├── fattn-vec-instance-f16-q4_0.cpp
│ │ │ ├── fattn-vec-instance-f16-q4_1.cpp
│ │ │ ├── fattn-vec-instance-f16-q5_0.cpp
│ │ │ ├── fattn-vec-instance-f16-q5_1.cpp
│ │ │ ├── fattn-vec-instance-f16-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q4_0-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-f16.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q4_1-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q5_0-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-f16.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q5_1.cpp
│ │ │ ├── fattn-vec-instance-q5_1-q8_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-f16.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q4_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q4_1.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q5_0.cpp
│ │ │ ├── fattn-vec-instance-q8_0-q5_1.cpp
│ │ │ └── fattn-vec-instance-q8_0-q8_0.cpp
│ │ ├── tsembd.cpp
│ │ ├── tsembd.hpp
│ │ ├── type.hpp
│ │ ├── upscale.cpp
│ │ ├── upscale.hpp
│ │ ├── vecdotq.hpp
│ │ ├── wkv.cpp
│ │ └── wkv.hpp
│ ├── ggml-threading.cpp
│ ├── ggml-threading.h
│ ├── ggml-virtgpu/
│ │ ├── CMakeLists.txt
│ │ ├── apir_cs_ggml-rpc-front.cpp
│ │ ├── backend/
│ │ │ ├── CMakeLists.txt
│ │ │ ├── apir_cs_ggml-rpc-back.cpp
│ │ │ ├── backend-convert.h
│ │ │ ├── backend-dispatched-backend.cpp
│ │ │ ├── backend-dispatched-buffer-type.cpp
│ │ │ ├── backend-dispatched-buffer.cpp
│ │ │ ├── backend-dispatched-device.cpp
│ │ │ ├── backend-dispatched.cpp
│ │ │ ├── backend-dispatched.gen.h
│ │ │ ├── backend-dispatched.h
│ │ │ ├── backend-virgl-apir.h
│ │ │ ├── backend.cpp
│ │ │ └── shared/
│ │ │ ├── api_remoting.h
│ │ │ ├── apir_backend.gen.h
│ │ │ ├── apir_backend.h
│ │ │ ├── apir_cs.h
│ │ │ ├── apir_cs_ggml.h
│ │ │ └── apir_cs_rpc.h
│ │ ├── ggml-backend-buffer-type.cpp
│ │ ├── ggml-backend-buffer.cpp
│ │ ├── ggml-backend-device.cpp
│ │ ├── ggml-backend-reg.cpp
│ │ ├── ggml-backend.cpp
│ │ ├── ggml-remoting.h
│ │ ├── ggmlremoting_functions.yaml
│ │ ├── include/
│ │ │ └── apir_hw.h
│ │ ├── regenerate_remoting.py
│ │ ├── virtgpu-apir.h
│ │ ├── virtgpu-forward-backend.cpp
│ │ ├── virtgpu-forward-buffer-type.cpp
│ │ ├── virtgpu-forward-buffer.cpp
│ │ ├── virtgpu-forward-device.cpp
│ │ ├── virtgpu-forward-impl.h
│ │ ├── virtgpu-forward.gen.h
│ │ ├── virtgpu-shm.cpp
│ │ ├── virtgpu-shm.h
│ │ ├── virtgpu-utils.cpp
│ │ ├── virtgpu-utils.h
│ │ ├── virtgpu.cpp
│ │ └── virtgpu.h
│ ├── ggml-vulkan/
│ │ ├── CMakeLists.txt
│ │ ├── cmake/
│ │ │ └── host-toolchain.cmake.in
│ │ ├── ggml-vulkan.cpp
│ │ └── vulkan-shaders/
│ │ ├── CMakeLists.txt
│ │ ├── abs.comp
│ │ ├── acc.comp
│ │ ├── add.comp
│ │ ├── add1.comp
│ │ ├── add_id.comp
│ │ ├── arange.comp
│ │ ├── argmax.comp
│ │ ├── argsort.comp
│ │ ├── argsort_large.comp
│ │ ├── ceil.comp
│ │ ├── clamp.comp
│ │ ├── concat.comp
│ │ ├── contig_copy.comp
│ │ ├── conv2d_dw.comp
│ │ ├── conv2d_mm.comp
│ │ ├── conv_transpose_1d.comp
│ │ ├── copy.comp
│ │ ├── copy_from_quant.comp
│ │ ├── copy_to_quant.comp
│ │ ├── copy_transpose.comp
│ │ ├── cos.comp
│ │ ├── count_equal.comp
│ │ ├── count_experts.comp
│ │ ├── cumsum.comp
│ │ ├── cumsum_multipass1.comp
│ │ ├── cumsum_multipass2.comp
│ │ ├── dequant_f32.comp
│ │ ├── dequant_funcs.glsl
│ │ ├── dequant_funcs_cm2.glsl
│ │ ├── dequant_head.glsl
│ │ ├── dequant_iq1_m.comp
│ │ ├── dequant_iq1_s.comp
│ │ ├── dequant_iq2_s.comp
│ │ ├── dequant_iq2_xs.comp
│ │ ├── dequant_iq2_xxs.comp
│ │ ├── dequant_iq3_s.comp
│ │ ├── dequant_iq3_xxs.comp
│ │ ├── dequant_iq4_nl.comp
│ │ ├── dequant_iq4_xs.comp
│ │ ├── dequant_mxfp4.comp
│ │ ├── dequant_q2_k.comp
│ │ ├── dequant_q3_k.comp
│ │ ├── dequant_q4_0.comp
│ │ ├── dequant_q4_1.comp
│ │ ├── dequant_q4_k.comp
│ │ ├── dequant_q5_0.comp
│ │ ├── dequant_q5_1.comp
│ │ ├── dequant_q5_k.comp
│ │ ├── dequant_q6_k.comp
│ │ ├── dequant_q8_0.comp
│ │ ├── diag.comp
│ │ ├── diag_mask_inf.comp
│ │ ├── div.comp
│ │ ├── elu.comp
│ │ ├── exp.comp
│ │ ├── feature-tests/
│ │ │ ├── bfloat16.comp
│ │ │ ├── coopmat.comp
│ │ │ ├── coopmat2.comp
│ │ │ └── integer_dot.comp
│ │ ├── fill.comp
│ │ ├── flash_attn.comp
│ │ ├── flash_attn_base.glsl
│ │ ├── flash_attn_cm1.comp
│ │ ├── flash_attn_cm2.comp
│ │ ├── flash_attn_mask_opt.comp
│ │ ├── flash_attn_split_k_reduce.comp
│ │ ├── floor.comp
│ │ ├── gated_delta_net.comp
│ │ ├── geglu.comp
│ │ ├── geglu_erf.comp
│ │ ├── geglu_quick.comp
│ │ ├── gelu.comp
│ │ ├── gelu_erf.comp
│ │ ├── gelu_quick.comp
│ │ ├── generic_binary_head.glsl
│ │ ├── generic_head.glsl
│ │ ├── generic_unary_head.glsl
│ │ ├── get_rows.comp
│ │ ├── get_rows_quant.comp
│ │ ├── glu_head.glsl
│ │ ├── glu_main.glsl
│ │ ├── group_norm.comp
│ │ ├── hardsigmoid.comp
│ │ ├── hardswish.comp
│ │ ├── im2col.comp
│ │ ├── im2col_3d.comp
│ │ ├── l2_norm.comp
│ │ ├── leaky_relu.comp
│ │ ├── log.comp
│ │ ├── mul.comp
│ │ ├── mul_mat_split_k_reduce.comp
│ │ ├── mul_mat_vec.comp
│ │ ├── mul_mat_vec_base.glsl
│ │ ├── mul_mat_vec_iface.glsl
│ │ ├── mul_mat_vec_iq1_m.comp
│ │ ├── mul_mat_vec_iq1_s.comp
│ │ ├── mul_mat_vec_iq2_s.comp
│ │ ├── mul_mat_vec_iq2_xs.comp
│ │ ├── mul_mat_vec_iq2_xxs.comp
│ │ ├── mul_mat_vec_iq3_s.comp
│ │ ├── mul_mat_vec_iq3_xxs.comp
│ │ ├── mul_mat_vec_nc.comp
│ │ ├── mul_mat_vec_p021.comp
│ │ ├── mul_mat_vec_q2_k.comp
│ │ ├── mul_mat_vec_q3_k.comp
│ │ ├── mul_mat_vec_q4_k.comp
│ │ ├── mul_mat_vec_q5_k.comp
│ │ ├── mul_mat_vec_q6_k.comp
│ │ ├── mul_mat_vecq.comp
│ │ ├── mul_mat_vecq_funcs.glsl
│ │ ├── mul_mm.comp
│ │ ├── mul_mm_cm2.comp
│ │ ├── mul_mm_funcs.glsl
│ │ ├── mul_mm_id_funcs.glsl
│ │ ├── mul_mmq.comp
│ │ ├── mul_mmq_funcs.glsl
│ │ ├── mul_mmq_shmem_types.glsl
│ │ ├── multi_add.comp
│ │ ├── neg.comp
│ │ ├── norm.comp
│ │ ├── opt_step_adamw.comp
│ │ ├── opt_step_sgd.comp
│ │ ├── pad.comp
│ │ ├── pool2d.comp
│ │ ├── quantize_q8_1.comp
│ │ ├── reglu.comp
│ │ ├── relu.comp
│ │ ├── repeat.comp
│ │ ├── repeat_back.comp
│ │ ├── rms_norm.comp
│ │ ├── rms_norm_back.comp
│ │ ├── rms_norm_partials.comp
│ │ ├── roll.comp
│ │ ├── rope_funcs.glsl
│ │ ├── rope_head.glsl
│ │ ├── rope_multi.comp
│ │ ├── rope_neox.comp
│ │ ├── rope_norm.comp
│ │ ├── rope_params.glsl
│ │ ├── rope_vision.comp
│ │ ├── round.comp
│ │ ├── rte.glsl
│ │ ├── scale.comp
│ │ ├── sgn.comp
│ │ ├── sigmoid.comp
│ │ ├── silu.comp
│ │ ├── silu_back.comp
│ │ ├── sin.comp
│ │ ├── soft_max.comp
│ │ ├── soft_max_back.comp
│ │ ├── soft_max_large1.comp
│ │ ├── soft_max_large2.comp
│ │ ├── soft_max_large3.comp
│ │ ├── soft_max_large_common.glsl
│ │ ├── softplus.comp
│ │ ├── solve_tri.comp
│ │ ├── sqrt.comp
│ │ ├── square.comp
│ │ ├── ssm_conv.comp
│ │ ├── ssm_scan.comp
│ │ ├── step.comp
│ │ ├── sub.comp
│ │ ├── sum_rows.comp
│ │ ├── sum_rows.glsl
│ │ ├── swiglu.comp
│ │ ├── swiglu_oai.comp
│ │ ├── tanh.comp
│ │ ├── timestep_embedding.comp
│ │ ├── topk_argsort.comp
│ │ ├── topk_moe.comp
│ │ ├── topk_nary_search.comp
│ │ ├── tri.comp
│ │ ├── trunc.comp
│ │ ├── types.glsl
│ │ ├── upscale.comp
│ │ ├── utils.glsl
│ │ ├── vulkan-shaders-gen.cpp
│ │ ├── wkv6.comp
│ │ ├── wkv7.comp
│ │ └── xielu.comp
│ ├── ggml-webgpu/
│ │ ├── CMakeLists.txt
│ │ ├── ggml-webgpu-shader-lib.hpp
│ │ ├── ggml-webgpu.cpp
│ │ ├── pre_wgsl.hpp
│ │ └── wgsl-shaders/
│ │ ├── argmax.wgsl
│ │ ├── argsort.wgsl
│ │ ├── argsort_merge.wgsl
│ │ ├── binary.wgsl
│ │ ├── common_decls.tmpl
│ │ ├── concat.wgsl
│ │ ├── cpy.wgsl
│ │ ├── cumsum.wgsl
│ │ ├── embed_wgsl.py
│ │ ├── flash_attn.wgsl
│ │ ├── gated_delta_net.wgsl
│ │ ├── get_rows.wgsl
│ │ ├── glu.wgsl
│ │ ├── memset.wgsl
│ │ ├── mul_mat.wgsl
│ │ ├── mul_mat_decls.tmpl
│ │ ├── mul_mat_reg_tile.wgsl
│ │ ├── mul_mat_subgroup_matrix.wgsl
│ │ ├── mul_mat_vec.wgsl
│ │ ├── pad.wgsl
│ │ ├── repeat.wgsl
│ │ ├── rope.wgsl
│ │ ├── row_norm.wgsl
│ │ ├── scale.wgsl
│ │ ├── set.wgsl
│ │ ├── set_rows.wgsl
│ │ ├── soft_max.wgsl
│ │ ├── solve_tri.wgsl
│ │ ├── ssm_conv.wgsl
│ │ ├── sum_rows.wgsl
│ │ └── unary.wgsl
│ ├── ggml-zdnn/
│ │ ├── .gitignore
│ │ ├── CMakeLists.txt
│ │ ├── common.hpp
│ │ ├── ggml-zdnn.cpp
│ │ ├── mmf.cpp
│ │ ├── mmf.hpp
│ │ ├── utils.cpp
│ │ └── utils.hpp
│ ├── ggml-zendnn/
│ │ ├── CMakeLists.txt
│ │ └── ggml-zendnn.cpp
│ ├── ggml.c
│ ├── ggml.cpp
│ └── gguf.cpp
├── gguf-py/
│ ├── LICENSE
│ ├── README.md
│ ├── examples/
│ │ ├── reader.py
│ │ └── writer.py
│ ├── gguf/
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── gguf.py
│ │ ├── gguf_reader.py
│ │ ├── gguf_writer.py
│ │ ├── lazy.py
│ │ ├── metadata.py
│ │ ├── py.typed
│ │ ├── quants.py
│ │ ├── scripts/
│ │ │ ├── gguf_convert_endian.py
│ │ │ ├── gguf_dump.py
│ │ │ ├── gguf_editor_gui.py
│ │ │ ├── gguf_hash.py
│ │ │ ├── gguf_new_metadata.py
│ │ │ └── gguf_set_metadata.py
│ │ ├── tensor_mapping.py
│ │ ├── utility.py
│ │ └── vocab.py
│ ├── pyproject.toml
│ └── tests/
│ ├── __init__.py
│ ├── test_metadata.py
│ └── test_quants.py
├── grammars/
│ ├── README.md
│ ├── arithmetic.gbnf
│ ├── c.gbnf
│ ├── chess.gbnf
│ ├── english.gbnf
│ ├── japanese.gbnf
│ ├── json.gbnf
│ ├── json_arr.gbnf
│ └── list.gbnf
├── include/
│ ├── llama-cpp.h
│ └── llama.h
├── licenses/
│ └── LICENSE-jsonhpp
├── models/
│ ├── .editorconfig
│ ├── ggml-vocab-aquila.gguf
│ ├── ggml-vocab-baichuan.gguf
│ ├── ggml-vocab-bert-bge.gguf
│ ├── ggml-vocab-bert-bge.gguf.inp
│ ├── ggml-vocab-bert-bge.gguf.out
│ ├── ggml-vocab-command-r.gguf
│ ├── ggml-vocab-command-r.gguf.inp
│ ├── ggml-vocab-command-r.gguf.out
│ ├── ggml-vocab-deepseek-coder.gguf
│ ├── ggml-vocab-deepseek-coder.gguf.inp
│ ├── ggml-vocab-deepseek-coder.gguf.out
│ ├── ggml-vocab-deepseek-llm.gguf
│ ├── ggml-vocab-deepseek-llm.gguf.inp
│ ├── ggml-vocab-deepseek-llm.gguf.out
│ ├── ggml-vocab-falcon.gguf
│ ├── ggml-vocab-falcon.gguf.inp
│ ├── ggml-vocab-falcon.gguf.out
│ ├── ggml-vocab-gpt-2.gguf
│ ├── ggml-vocab-gpt-2.gguf.inp
│ ├── ggml-vocab-gpt-2.gguf.out
│ ├── ggml-vocab-gpt-neox.gguf
│ ├── ggml-vocab-llama-bpe.gguf
│ ├── ggml-vocab-llama-bpe.gguf.inp
│ ├── ggml-vocab-llama-bpe.gguf.out
│ ├── ggml-vocab-llama-spm.gguf
│ ├── ggml-vocab-llama-spm.gguf.inp
│ ├── ggml-vocab-llama-spm.gguf.out
│ ├── ggml-vocab-mpt.gguf
│ ├── ggml-vocab-mpt.gguf.inp
│ ├── ggml-vocab-mpt.gguf.out
│ ├── ggml-vocab-nomic-bert-moe.gguf
│ ├── ggml-vocab-phi-3.gguf
│ ├── ggml-vocab-phi-3.gguf.inp
│ ├── ggml-vocab-phi-3.gguf.out
│ ├── ggml-vocab-qwen2.gguf
│ ├── ggml-vocab-qwen2.gguf.inp
│ ├── ggml-vocab-qwen2.gguf.out
│ ├── ggml-vocab-refact.gguf
│ ├── ggml-vocab-refact.gguf.inp
│ ├── ggml-vocab-refact.gguf.out
│ ├── ggml-vocab-starcoder.gguf
│ ├── ggml-vocab-starcoder.gguf.inp
│ ├── ggml-vocab-starcoder.gguf.out
│ └── templates/
│ ├── Apertus-8B-Instruct.jinja
│ ├── Apriel-1.6-15b-Thinker-fixed.jinja
│ ├── Bielik-11B-v3.0-Instruct.jinja
│ ├── ByteDance-Seed-OSS.jinja
│ ├── CohereForAI-c4ai-command-r-plus-tool_use.jinja
│ ├── CohereForAI-c4ai-command-r7b-12-2024-tool_use.jinja
│ ├── GLM-4.6.jinja
│ ├── GLM-4.7-Flash.jinja
│ ├── GigaChat3-10B-A1.8B.jinja
│ ├── GigaChat3.1-10B-A1.8B.jinja
│ ├── HuggingFaceTB-SmolLM3-3B.jinja
│ ├── Kimi-K2-Instruct.jinja
│ ├── Kimi-K2-Thinking.jinja
│ ├── LFM2-8B-A1B.jinja
│ ├── LFM2.5-Instruct.jinja
│ ├── MiMo-VL.jinja
│ ├── MiniMax-M2.jinja
│ ├── Mistral-Small-3.2-24B-Instruct-2506.jinja
│ ├── NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja
│ ├── NVIDIA-Nemotron-Nano-v2.jinja
│ ├── NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja
│ ├── NousResearch-Hermes-3-Llama-3.1-8B-tool_use.jinja
│ ├── Qwen-QwQ-32B.jinja
│ ├── Qwen-Qwen2.5-7B-Instruct.jinja
│ ├── Qwen-Qwen3-0.6B.jinja
│ ├── Qwen3-Coder.jinja
│ ├── Qwen3.5-4B.jinja
│ ├── README.md
│ ├── StepFun3.5-Flash.jinja
│ ├── deepseek-ai-DeepSeek-R1-Distill-Llama-8B.jinja
│ ├── deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja
│ ├── deepseek-ai-DeepSeek-V3.1.jinja
│ ├── fireworks-ai-llama-3-firefunction-v2.jinja
│ ├── google-gemma-2-2b-it.jinja
│ ├── ibm-granite-granite-3.3-2B-Instruct.jinja
│ ├── llama-cpp-deepseek-r1.jinja
│ ├── llama-cpp-rwkv-world.jinja
│ ├── meetkai-functionary-medium-v3.1.jinja
│ ├── meetkai-functionary-medium-v3.2.jinja
│ ├── meta-llama-Llama-3.1-8B-Instruct.jinja
│ ├── meta-llama-Llama-3.2-3B-Instruct.jinja
│ ├── meta-llama-Llama-3.3-70B-Instruct.jinja
│ ├── microsoft-Phi-3.5-mini-instruct.jinja
│ ├── mistralai-Ministral-3-14B-Reasoning-2512.jinja
│ ├── mistralai-Mistral-Nemo-Instruct-2407.jinja
│ ├── moonshotai-Kimi-K2.jinja
│ ├── openai-gpt-oss-120b.jinja
│ ├── stepfun-ai-Step-3.5-Flash.jinja
│ ├── unsloth-Apriel-1.5.jinja
│ ├── unsloth-mistral-Devstral-Small-2507.jinja
│ └── upstage-Solar-Open-100B.jinja
├── mypy.ini
├── pocs/
│ ├── CMakeLists.txt
│ └── vdot/
│ ├── CMakeLists.txt
│ ├── q8dot.cpp
│ └── vdot.cpp
├── pyproject.toml
├── pyrightconfig.json
├── requirements/
│ ├── requirements-all.txt
│ ├── requirements-compare-llama-bench.txt
│ ├── requirements-convert_hf_to_gguf.txt
│ ├── requirements-convert_hf_to_gguf_update.txt
│ ├── requirements-convert_legacy_llama.txt
│ ├── requirements-convert_llama_ggml_to_gguf.txt
│ ├── requirements-convert_lora_to_gguf.txt
│ ├── requirements-gguf_editor_gui.txt
│ ├── requirements-pydantic.txt
│ ├── requirements-server-bench.txt
│ ├── requirements-test-tokenizer-random.txt
│ └── requirements-tool_bench.txt
├── requirements.txt
├── scripts/
│ ├── apple/
│ │ ├── validate-apps.sh
│ │ ├── validate-ios.sh
│ │ ├── validate-macos.sh
│ │ ├── validate-tvos.sh
│ │ └── validate-visionos.sh
│ ├── bench-models.sh
│ ├── build-info.sh
│ ├── check-requirements.sh
│ ├── compare-commits.sh
│ ├── compare-llama-bench.py
│ ├── compare-logprobs.py
│ ├── create_ops_docs.py
│ ├── debug-test.sh
│ ├── fetch_server_test_models.py
│ ├── gen-authors.sh
│ ├── gen-unicode-data.py
│ ├── get-flags.mk
│ ├── get-hellaswag.sh
│ ├── get-pg.sh
│ ├── get-wikitext-2.sh
│ ├── get-winogrande.sh
│ ├── get_chat_template.py
│ ├── git-bisect-run.sh
│ ├── git-bisect.sh
│ ├── hf.sh
│ ├── hip/
│ │ └── gcn-cdna-vgpr-check.py
│ ├── install-oneapi.bat
│ ├── jinja/
│ │ ├── jinja-tester.py
│ │ └── requirements.txt
│ ├── pr2wt.sh
│ ├── serve-static.js
│ ├── server-bench.py
│ ├── server-test-function-call.py
│ ├── server-test-model.py
│ ├── snapdragon/
│ │ ├── adb/
│ │ │ ├── llama-cli.farf
│ │ │ ├── run-bench.sh
│ │ │ ├── run-cli.sh
│ │ │ ├── run-completion.sh
│ │ │ ├── run-mtmd.sh
│ │ │ └── run-tool.sh
│ │ ├── qdc/
│ │ │ ├── readme.md
│ │ │ ├── requirements.txt
│ │ │ └── tests/
│ │ │ └── test_bench.py
│ │ └── windows/
│ │ ├── run-bench.ps1
│ │ ├── run-cli.ps1
│ │ ├── run-completion.ps1
│ │ ├── run-mtmd.ps1
│ │ ├── run-tool.ps1
│ │ └── setup-build.ps1
│ ├── sync-ggml-am.sh
│ ├── sync-ggml.last
│ ├── sync-ggml.sh
│ ├── sync_vendor.py
│ ├── tool_bench.py
│ ├── tool_bench.sh
│ ├── verify-checksum-models.py
│ └── xxd.cmake
├── src/
│ ├── CMakeLists.txt
│ ├── llama-adapter.cpp
│ ├── llama-adapter.h
│ ├── llama-arch.cpp
│ ├── llama-arch.h
│ ├── llama-batch.cpp
│ ├── llama-batch.h
│ ├── llama-chat.cpp
│ ├── llama-chat.h
│ ├── llama-context.cpp
│ ├── llama-context.h
│ ├── llama-cparams.cpp
│ ├── llama-cparams.h
│ ├── llama-ext.h
│ ├── llama-grammar.cpp
│ ├── llama-grammar.h
│ ├── llama-graph.cpp
│ ├── llama-graph.h
│ ├── llama-hparams.cpp
│ ├── llama-hparams.h
│ ├── llama-impl.cpp
│ ├── llama-impl.h
│ ├── llama-io.cpp
│ ├── llama-io.h
│ ├── llama-kv-cache-iswa.cpp
│ ├── llama-kv-cache-iswa.h
│ ├── llama-kv-cache.cpp
│ ├── llama-kv-cache.h
│ ├── llama-kv-cells.h
│ ├── llama-memory-hybrid-iswa.cpp
│ ├── llama-memory-hybrid-iswa.h
│ ├── llama-memory-hybrid.cpp
│ ├── llama-memory-hybrid.h
│ ├── llama-memory-recurrent.cpp
│ ├── llama-memory-recurrent.h
│ ├── llama-memory.cpp
│ ├── llama-memory.h
│ ├── llama-mmap.cpp
│ ├── llama-mmap.h
│ ├── llama-model-loader.cpp
│ ├── llama-model-loader.h
│ ├── llama-model-saver.cpp
│ ├── llama-model-saver.h
│ ├── llama-model.cpp
│ ├── llama-model.h
│ ├── llama-quant.cpp
│ ├── llama-quant.h
│ ├── llama-sampler.cpp
│ ├── llama-sampler.h
│ ├── llama-vocab.cpp
│ ├── llama-vocab.h
│ ├── llama.cpp
│ ├── models/
│ │ ├── afmoe.cpp
│ │ ├── apertus.cpp
│ │ ├── arcee.cpp
│ │ ├── arctic.cpp
│ │ ├── arwkv7.cpp
│ │ ├── baichuan.cpp
│ │ ├── bailingmoe.cpp
│ │ ├── bailingmoe2.cpp
│ │ ├── bert.cpp
│ │ ├── bitnet.cpp
│ │ ├── bloom.cpp
│ │ ├── chameleon.cpp
│ │ ├── chatglm.cpp
│ │ ├── codeshell.cpp
│ │ ├── cogvlm.cpp
│ │ ├── cohere2-iswa.cpp
│ │ ├── command-r.cpp
│ │ ├── dbrx.cpp
│ │ ├── deci.cpp
│ │ ├── deepseek.cpp
│ │ ├── deepseek2.cpp
│ │ ├── delta-net-base.cpp
│ │ ├── dots1.cpp
│ │ ├── dream.cpp
│ │ ├── ernie4-5-moe.cpp
│ │ ├── ernie4-5.cpp
│ │ ├── eurobert.cpp
│ │ ├── exaone-moe.cpp
│ │ ├── exaone.cpp
│ │ ├── exaone4.cpp
│ │ ├── falcon-h1.cpp
│ │ ├── falcon.cpp
│ │ ├── gemma-embedding.cpp
│ │ ├── gemma.cpp
│ │ ├── gemma2-iswa.cpp
│ │ ├── gemma3.cpp
│ │ ├── gemma3n-iswa.cpp
│ │ ├── glm4-moe.cpp
│ │ ├── glm4.cpp
│ │ ├── gpt2.cpp
│ │ ├── gptneox.cpp
│ │ ├── granite-hybrid.cpp
│ │ ├── granite.cpp
│ │ ├── grok.cpp
│ │ ├── grovemoe.cpp
│ │ ├── hunyuan-dense.cpp
│ │ ├── hunyuan-moe.cpp
│ │ ├── internlm2.cpp
│ │ ├── jais.cpp
│ │ ├── jais2.cpp
│ │ ├── jamba.cpp
│ │ ├── kimi-linear.cpp
│ │ ├── lfm2.cpp
│ │ ├── llada-moe.cpp
│ │ ├── llada.cpp
│ │ ├── llama-iswa.cpp
│ │ ├── llama.cpp
│ │ ├── maincoder.cpp
│ │ ├── mamba-base.cpp
│ │ ├── mamba.cpp
│ │ ├── mimo2-iswa.cpp
│ │ ├── minicpm3.cpp
│ │ ├── minimax-m2.cpp
│ │ ├── mistral3.cpp
│ │ ├── models.h
│ │ ├── modern-bert.cpp
│ │ ├── mpt.cpp
│ │ ├── nemotron-h.cpp
│ │ ├── nemotron.cpp
│ │ ├── neo-bert.cpp
│ │ ├── olmo.cpp
│ │ ├── olmo2.cpp
│ │ ├── olmoe.cpp
│ │ ├── openai-moe-iswa.cpp
│ │ ├── openelm.cpp
│ │ ├── orion.cpp
│ │ ├── paddleocr.cpp
│ │ ├── pangu-embedded.cpp
│ │ ├── phi2.cpp
│ │ ├── phi3.cpp
│ │ ├── plamo.cpp
│ │ ├── plamo2.cpp
│ │ ├── plamo3.cpp
│ │ ├── plm.cpp
│ │ ├── qwen.cpp
│ │ ├── qwen2.cpp
│ │ ├── qwen2moe.cpp
│ │ ├── qwen2vl.cpp
│ │ ├── qwen3.cpp
│ │ ├── qwen35.cpp
│ │ ├── qwen35moe.cpp
│ │ ├── qwen3moe.cpp
│ │ ├── qwen3next.cpp
│ │ ├── qwen3vl-moe.cpp
│ │ ├── qwen3vl.cpp
│ │ ├── refact.cpp
│ │ ├── rnd1.cpp
│ │ ├── rwkv6-base.cpp
│ │ ├── rwkv6.cpp
│ │ ├── rwkv6qwen2.cpp
│ │ ├── rwkv7-base.cpp
│ │ ├── rwkv7.cpp
│ │ ├── seed-oss.cpp
│ │ ├── smallthinker.cpp
│ │ ├── smollm3.cpp
│ │ ├── stablelm.cpp
│ │ ├── starcoder.cpp
│ │ ├── starcoder2.cpp
│ │ ├── step35-iswa.cpp
│ │ ├── t5-dec.cpp
│ │ ├── t5-enc.cpp
│ │ ├── wavtokenizer-dec.cpp
│ │ └── xverse.cpp
│ ├── unicode-data.cpp
│ ├── unicode-data.h
│ ├── unicode.cpp
│ └── unicode.h
├── tests/
│ ├── .gitignore
│ ├── CMakeLists.txt
│ ├── export-graph-ops.cpp
│ ├── get-model.cpp
│ ├── get-model.h
│ ├── gguf-model-data.cpp
│ ├── gguf-model-data.h
│ ├── peg-parser/
│ │ ├── simple-tokenize.cpp
│ │ ├── simple-tokenize.h
│ │ ├── test-basic.cpp
│ │ ├── test-gbnf-generation.cpp
│ │ ├── test-json-parser.cpp
│ │ ├── test-json-serialization.cpp
│ │ ├── test-python-dict-parser.cpp
│ │ ├── test-unicode.cpp
│ │ └── tests.h
│ ├── run-json-schema-to-grammar.mjs
│ ├── test-alloc.cpp
│ ├── test-arg-parser.cpp
│ ├── test-autorelease.cpp
│ ├── test-backend-ops.cpp
│ ├── test-backend-sampler.cpp
│ ├── test-barrier.cpp
│ ├── test-c.c
│ ├── test-chat-auto-parser.cpp
│ ├── test-chat-peg-parser.cpp
│ ├── test-chat-template.cpp
│ ├── test-chat.cpp
│ ├── test-double-float.cpp
│ ├── test-gbnf-validator.cpp
│ ├── test-gguf-model-data.cpp
│ ├── test-gguf.cpp
│ ├── test-grammar-integration.cpp
│ ├── test-grammar-llguidance.cpp
│ ├── test-grammar-parser.cpp
│ ├── test-jinja.cpp
│ ├── test-json-partial.cpp
│ ├── test-json-schema-to-grammar.cpp
│ ├── test-llama-archs.cpp
│ ├── test-llama-grammar.cpp
│ ├── test-log.cpp
│ ├── test-lora-conversion-inference.sh
│ ├── test-model-load-cancel.cpp
│ ├── test-mtmd-c-api.c
│ ├── test-opt.cpp
│ ├── test-peg-parser.cpp
│ ├── test-quantize-fns.cpp
│ ├── test-quantize-perf.cpp
│ ├── test-quantize-stats.cpp
│ ├── test-reasoning-budget.cpp
│ ├── test-regex-partial.cpp
│ ├── test-rope.cpp
│ ├── test-sampling.cpp
│ ├── test-state-restore-fragmented.cpp
│ ├── test-thread-safety.cpp
│ ├── test-tokenizer-0.cpp
│ ├── test-tokenizer-0.py
│ ├── test-tokenizer-0.sh
│ ├── test-tokenizer-1-bpe.cpp
│ ├── test-tokenizer-1-spm.cpp
│ ├── test-tokenizer-random.py
│ ├── test-tokenizers-repo.sh
│ └── testing.h
├── tools/
│ ├── CMakeLists.txt
│ ├── batched-bench/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── batched-bench.cpp
│ ├── cli/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── cli.cpp
│ ├── completion/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── completion.cpp
│ ├── cvector-generator/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── completions.txt
│ │ ├── cvector-generator.cpp
│ │ ├── mean.hpp
│ │ ├── negative.txt
│ │ ├── pca.hpp
│ │ └── positive.txt
│ ├── export-lora/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── export-lora.cpp
│ ├── fit-params/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── fit-params.cpp
│ ├── gguf-split/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── gguf-split.cpp
│ │ └── tests.sh
│ ├── imatrix/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── imatrix.cpp
│ ├── llama-bench/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── llama-bench.cpp
│ ├── mtmd/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── clip-graph.h
│ │ ├── clip-impl.h
│ │ ├── clip-model.h
│ │ ├── clip.cpp
│ │ ├── clip.h
│ │ ├── debug/
│ │ │ ├── mtmd-debug.cpp
│ │ │ ├── mtmd-debug.h
│ │ │ └── mtmd-debug.md
│ │ ├── deprecation-warning.cpp
│ │ ├── legacy-models/
│ │ │ ├── convert_image_encoder_to_gguf.py
│ │ │ ├── glmedge-convert-image-encoder-to-gguf.py
│ │ │ ├── glmedge-surgery.py
│ │ │ ├── llava_surgery.py
│ │ │ ├── llava_surgery_v2.py
│ │ │ ├── minicpmv-convert-image-encoder-to-gguf.py
│ │ │ └── minicpmv-surgery.py
│ │ ├── models/
│ │ │ ├── cogvlm.cpp
│ │ │ ├── conformer.cpp
│ │ │ ├── deepseekocr.cpp
│ │ │ ├── glm4v.cpp
│ │ │ ├── internvl.cpp
│ │ │ ├── kimik25.cpp
│ │ │ ├── kimivl.cpp
│ │ │ ├── llama4.cpp
│ │ │ ├── llava.cpp
│ │ │ ├── minicpmv.cpp
│ │ │ ├── mobilenetv5.cpp
│ │ │ ├── models.h
│ │ │ ├── nemotron-v2-vl.cpp
│ │ │ ├── paddleocr.cpp
│ │ │ ├── pixtral.cpp
│ │ │ ├── qwen2vl.cpp
│ │ │ ├── qwen3vl.cpp
│ │ │ ├── siglip.cpp
│ │ │ ├── whisper-enc.cpp
│ │ │ └── youtuvl.cpp
│ │ ├── mtmd-audio.cpp
│ │ ├── mtmd-audio.h
│ │ ├── mtmd-cli.cpp
│ │ ├── mtmd-helper.cpp
│ │ ├── mtmd-helper.h
│ │ ├── mtmd-image.cpp
│ │ ├── mtmd-image.h
│ │ ├── mtmd.cpp
│ │ ├── mtmd.h
│ │ ├── requirements.txt
│ │ ├── tests/
│ │ │ ├── test-1-extracted.md
│ │ │ ├── test-1-extracted.txt
│ │ │ ├── test-deepseek-ocr.py
│ │ │ └── tests-requirements.txt
│ │ └── tests.sh
│ ├── parser/
│ │ ├── CMakeLists.txt
│ │ ├── debug-template-parser.cpp
│ │ └── template-analysis.cpp
│ ├── perplexity/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── perplexity.cpp
│ ├── quantize/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ ├── quantize.cpp
│ │ └── tests.sh
│ ├── results/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── results.cpp
│ ├── rpc/
│ │ ├── CMakeLists.txt
│ │ ├── README.md
│ │ └── rpc-server.cpp
│ ├── server/
│ │ ├── CMakeLists.txt
│ │ ├── README-dev.md
│ │ ├── README.md
│ │ ├── bench/
│ │ │ ├── README.md
│ │ │ ├── bench.py
│ │ │ ├── prometheus.yml
│ │ │ ├── requirements.txt
│ │ │ └── script.js
│ │ ├── chat-llama2.sh
│ │ ├── chat.mjs
│ │ ├── chat.sh
│ │ ├── public/
│ │ │ ├── bundle.css
│ │ │ ├── bundle.js
│ │ │ ├── index.html
│ │ │ └── loading.html
│ │ ├── public_legacy/
│ │ │ ├── colorthemes.css
│ │ │ ├── completion.js
│ │ │ ├── index-new.html
│ │ │ ├── index.html
│ │ │ ├── index.js
│ │ │ ├── json-schema-to-grammar.mjs
│ │ │ ├── loading.html
│ │ │ ├── prompt-formats.js
│ │ │ ├── style.css
│ │ │ ├── system-prompts.js
│ │ │ ├── theme-beeninorder.css
│ │ │ ├── theme-ketivah.css
│ │ │ ├── theme-mangotango.css
│ │ │ ├── theme-playground.css
│ │ │ ├── theme-polarnight.css
│ │ │ └── theme-snowstorm.css
│ │ ├── public_simplechat/
│ │ │ ├── datautils.mjs
│ │ │ ├── index.html
│ │ │ ├── readme.md
│ │ │ ├── simplechat.css
│ │ │ ├── simplechat.js
│ │ │ └── ui.mjs
│ │ ├── server-common.cpp
│ │ ├── server-common.h
│ │ ├── server-context.cpp
│ │ ├── server-context.h
│ │ ├── server-cors-proxy.h
│ │ ├── server-http.cpp
│ │ ├── server-http.h
│ │ ├── server-models.cpp
│ │ ├── server-models.h
│ │ ├── server-queue.cpp
│ │ ├── server-queue.h
│ │ ├── server-task.cpp
│ │ ├── server-task.h
│ │ ├── server-tools.cpp
│ │ ├── server-tools.h
│ │ ├── server.cpp
│ │ ├── tests/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── conftest.py
│ │ │ ├── pytest.ini
│ │ │ ├── requirements.txt
│ │ │ ├── tests.sh
│ │ │ ├── unit/
│ │ │ │ ├── test_basic.py
│ │ │ │ ├── test_chat_completion.py
│ │ │ │ ├── test_compat_anthropic.py
│ │ │ │ ├── test_compat_oai_responses.py
│ │ │ │ ├── test_completion.py
│ │ │ │ ├── test_ctx_shift.py
│ │ │ │ ├── test_embedding.py
│ │ │ │ ├── test_infill.py
│ │ │ │ ├── test_lora.py
│ │ │ │ ├── test_proxy.py
│ │ │ │ ├── test_rerank.py
│ │ │ │ ├── test_router.py
│ │ │ │ ├── test_security.py
│ │ │ │ ├── test_sleep.py
│ │ │ │ ├── test_slot_save.py
│ │ │ │ ├── test_speculative.py
│ │ │ │ ├── test_template.py
│ │ │ │ ├── test_tokenize.py
│ │ │ │ ├── test_tool_call.py
│ │ │ │ └── test_vision_api.py
│ │ │ └── utils.py
│ │ ├── themes/
│ │ │ ├── README.md
│ │ │ ├── buttons-top/
│ │ │ │ ├── README.md
│ │ │ │ └── index.html
│ │ │ └── wild/
│ │ │ ├── README.md
│ │ │ └── index.html
│ │ └── webui/
│ │ ├── .gitignore
│ │ ├── .npmrc
│ │ ├── .prettierignore
│ │ ├── .prettierrc
│ │ ├── .storybook/
│ │ │ ├── ModeWatcherDecorator.svelte
│ │ │ ├── TooltipProviderDecorator.svelte
│ │ │ ├── main.ts
│ │ │ ├── preview.ts
│ │ │ └── vitest.setup.ts
│ │ ├── README.md
│ │ ├── components.json
│ │ ├── docs/
│ │ │ ├── architecture/
│ │ │ │ ├── high-level-architecture-simplified.md
│ │ │ │ └── high-level-architecture.md
│ │ │ └── flows/
│ │ │ ├── chat-flow.md
│ │ │ ├── conversations-flow.md
│ │ │ ├── data-flow-simplified-model-mode.md
│ │ │ ├── data-flow-simplified-router-mode.md
│ │ │ ├── database-flow.md
│ │ │ ├── mcp-flow.md
│ │ │ ├── models-flow.md
│ │ │ ├── server-flow.md
│ │ │ └── settings-flow.md
│ │ ├── eslint.config.js
│ │ ├── package.json
│ │ ├── playwright.config.ts
│ │ ├── scripts/
│ │ │ ├── dev.sh
│ │ │ ├── install-git-hooks.sh
│ │ │ └── post-build.sh
│ │ ├── src/
│ │ │ ├── app.css
│ │ │ ├── app.d.ts
│ │ │ ├── app.html
│ │ │ ├── lib/
│ │ │ │ ├── actions/
│ │ │ │ │ └── fade-in-view.svelte.ts
│ │ │ │ ├── components/
│ │ │ │ │ ├── app/
│ │ │ │ │ │ ├── actions/
│ │ │ │ │ │ │ ├── ActionIcon.svelte
│ │ │ │ │ │ │ ├── ActionIconCopyToClipboard.svelte
│ │ │ │ │ │ │ ├── ActionIconRemove.svelte
│ │ │ │ │ │ │ ├── ActionIconsCodeBlock.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── badges/
│ │ │ │ │ │ │ ├── BadgeChatStatistic.svelte
│ │ │ │ │ │ │ ├── BadgeInfo.svelte
│ │ │ │ │ │ │ ├── BadgeModality.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── chat/
│ │ │ │ │ │ │ ├── ChatAttachments/
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpPrompt.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpResource.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentMcpResources.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentPreview.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentThumbnailFile.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentThumbnailImage.svelte
│ │ │ │ │ │ │ │ ├── ChatAttachmentsList.svelte
│ │ │ │ │ │ │ │ └── ChatAttachmentsViewAll.svelte
│ │ │ │ │ │ │ ├── ChatForm/
│ │ │ │ │ │ │ │ ├── ChatForm.svelte
│ │ │ │ │ │ │ │ ├── ChatFormActions/
│ │ │ │ │ │ │ │ │ ├── ChatFormActionAttachmentsDropdown.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionAttachmentsSheet.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionRecord.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormActionSubmit.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormActions.svelte
│ │ │ │ │ │ │ │ ├── ChatFormFileInputInvisible.svelte
│ │ │ │ │ │ │ │ ├── ChatFormHelperText.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPicker/
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerItemHeader.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerList.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPickerListItem.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormPickerListItemSkeleton.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPickerPopover.svelte
│ │ │ │ │ │ │ │ ├── ChatFormPromptPicker/
│ │ │ │ │ │ │ │ │ ├── ChatFormPromptPicker.svelte
│ │ │ │ │ │ │ │ │ ├── ChatFormPromptPickerArgumentForm.svelte
│ │ │ │ │ │ │ │ │ └── ChatFormPromptPickerArgumentInput.svelte
│ │ │ │ │ │ │ │ ├── ChatFormResourcePicker/
│ │ │ │ │ │ │ │ │ └── ChatFormResourcePicker.svelte
│ │ │ │ │ │ │ │ └── ChatFormTextarea.svelte
│ │ │ │ │ │ │ ├── ChatMessages/
│ │ │ │ │ │ │ │ ├── ChatMessage.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageActions.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageAgenticContent.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageAssistant.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageBranchingControls.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageEditForm.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageMcpPrompt.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageMcpPromptContent.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageStatistics.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageSystem.svelte
│ │ │ │ │ │ │ │ ├── ChatMessageUser.svelte
│ │ │ │ │ │ │ │ └── ChatMessages.svelte
│ │ │ │ │ │ │ ├── ChatScreen/
│ │ │ │ │ │ │ │ ├── ChatScreen.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenDragOverlay.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenForm.svelte
│ │ │ │ │ │ │ │ ├── ChatScreenHeader.svelte
│ │ │ │ │ │ │ │ └── ChatScreenProcessingInfo.svelte
│ │ │ │ │ │ │ ├── ChatSettings/
│ │ │ │ │ │ │ │ ├── ChatSettings.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsFields.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsFooter.svelte
│ │ │ │ │ │ │ │ ├── ChatSettingsImportExportTab.svelte
│ │ │ │ │ │ │ │ └── ChatSettingsParameterSourceIndicator.svelte
│ │ │ │ │ │ │ ├── ChatSidebar/
│ │ │ │ │ │ │ │ ├── ChatSidebar.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarActions.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarConversationItem.svelte
│ │ │ │ │ │ │ │ ├── ChatSidebarSearch.svelte
│ │ │ │ │ │ │ │ └── handle-mobile-sidebar-item-click.ts
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── content/
│ │ │ │ │ │ │ ├── CollapsibleContentBlock.svelte
│ │ │ │ │ │ │ ├── MarkdownContent.svelte
│ │ │ │ │ │ │ ├── SyntaxHighlightedCode.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── dialogs/
│ │ │ │ │ │ │ ├── DialogChatAttachmentPreview.svelte
│ │ │ │ │ │ │ ├── DialogChatAttachmentsViewAll.svelte
│ │ │ │ │ │ │ ├── DialogChatError.svelte
│ │ │ │ │ │ │ ├── DialogChatSettings.svelte
│ │ │ │ │ │ │ ├── DialogCodePreview.svelte
│ │ │ │ │ │ │ ├── DialogConfirmation.svelte
│ │ │ │ │ │ │ ├── DialogConversationSelection.svelte
│ │ │ │ │ │ │ ├── DialogConversationTitleUpdate.svelte
│ │ │ │ │ │ │ ├── DialogEmptyFileAlert.svelte
│ │ │ │ │ │ │ ├── DialogMcpResourcePreview.svelte
│ │ │ │ │ │ │ ├── DialogMcpResources.svelte
│ │ │ │ │ │ │ ├── DialogMcpServersSettings.svelte
│ │ │ │ │ │ │ ├── DialogModelInformation.svelte
│ │ │ │ │ │ │ ├── DialogModelNotAvailable.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── forms/
│ │ │ │ │ │ │ ├── InputWithSuggestions.svelte
│ │ │ │ │ │ │ ├── KeyValuePairs.svelte
│ │ │ │ │ │ │ ├── SearchInput.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── mcp/
│ │ │ │ │ │ │ ├── McpActiveServersAvatars.svelte
│ │ │ │ │ │ │ ├── McpCapabilitiesBadges.svelte
│ │ │ │ │ │ │ ├── McpConnectionLogs.svelte
│ │ │ │ │ │ │ ├── McpLogo.svelte
│ │ │ │ │ │ │ ├── McpResourceBrowser/
│ │ │ │ │ │ │ │ ├── McpResourceBrowser.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserEmptyState.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserHeader.svelte
│ │ │ │ │ │ │ │ ├── McpResourceBrowserServerItem.svelte
│ │ │ │ │ │ │ │ └── mcp-resource-browser.ts
│ │ │ │ │ │ │ ├── McpResourcePreview.svelte
│ │ │ │ │ │ │ ├── McpResourceTemplateForm.svelte
│ │ │ │ │ │ │ ├── McpServerCard/
│ │ │ │ │ │ │ │ ├── McpServerCard.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardActions.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardDeleteDialog.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardEditForm.svelte
│ │ │ │ │ │ │ │ ├── McpServerCardHeader.svelte
│ │ │ │ │ │ │ │ └── McpServerCardToolsList.svelte
│ │ │ │ │ │ │ ├── McpServerCardSkeleton.svelte
│ │ │ │ │ │ │ ├── McpServerForm.svelte
│ │ │ │ │ │ │ ├── McpServerInfo.svelte
│ │ │ │ │ │ │ ├── McpServersSelector.svelte
│ │ │ │ │ │ │ ├── McpServersSettings.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── misc/
│ │ │ │ │ │ │ ├── ConversationSelection.svelte
│ │ │ │ │ │ │ ├── HorizontalScrollCarousel.svelte
│ │ │ │ │ │ │ ├── KeyboardShortcutInfo.svelte
│ │ │ │ │ │ │ ├── TruncatedText.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ ├── models/
│ │ │ │ │ │ │ ├── ModelBadge.svelte
│ │ │ │ │ │ │ ├── ModelId.svelte
│ │ │ │ │ │ │ ├── ModelsSelector.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorList.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorOption.svelte
│ │ │ │ │ │ │ ├── ModelsSelectorSheet.svelte
│ │ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ │ └── utils.ts
│ │ │ │ │ │ ├── navigation/
│ │ │ │ │ │ │ ├── DropdownMenuActions.svelte
│ │ │ │ │ │ │ ├── DropdownMenuSearchable.svelte
│ │ │ │ │ │ │ └── index.ts
│ │ │ │ │ │ └── server/
│ │ │ │ │ │ ├── ServerErrorSplash.svelte
│ │ │ │ │ │ ├── ServerLoadingSplash.svelte
│ │ │ │ │ │ ├── ServerStatus.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ └── ui/
│ │ │ │ │ ├── alert/
│ │ │ │ │ │ ├── alert-description.svelte
│ │ │ │ │ │ ├── alert-title.svelte
│ │ │ │ │ │ ├── alert.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── alert-dialog/
│ │ │ │ │ │ ├── alert-dialog-action.svelte
│ │ │ │ │ │ ├── alert-dialog-cancel.svelte
│ │ │ │ │ │ ├── alert-dialog-content.svelte
│ │ │ │ │ │ ├── alert-dialog-description.svelte
│ │ │ │ │ │ ├── alert-dialog-footer.svelte
│ │ │ │ │ │ ├── alert-dialog-header.svelte
│ │ │ │ │ │ ├── alert-dialog-overlay.svelte
│ │ │ │ │ │ ├── alert-dialog-title.svelte
│ │ │ │ │ │ ├── alert-dialog-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── badge/
│ │ │ │ │ │ ├── badge.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── button/
│ │ │ │ │ │ ├── button.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── card/
│ │ │ │ │ │ ├── card-action.svelte
│ │ │ │ │ │ ├── card-content.svelte
│ │ │ │ │ │ ├── card-description.svelte
│ │ │ │ │ │ ├── card-footer.svelte
│ │ │ │ │ │ ├── card-header.svelte
│ │ │ │ │ │ ├── card-title.svelte
│ │ │ │ │ │ ├── card.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── checkbox/
│ │ │ │ │ │ ├── checkbox.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── collapsible/
│ │ │ │ │ │ ├── collapsible-content.svelte
│ │ │ │ │ │ ├── collapsible-trigger.svelte
│ │ │ │ │ │ ├── collapsible.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── dialog/
│ │ │ │ │ │ ├── dialog-close.svelte
│ │ │ │ │ │ ├── dialog-content.svelte
│ │ │ │ │ │ ├── dialog-description.svelte
│ │ │ │ │ │ ├── dialog-footer.svelte
│ │ │ │ │ │ ├── dialog-header.svelte
│ │ │ │ │ │ ├── dialog-overlay.svelte
│ │ │ │ │ │ ├── dialog-title.svelte
│ │ │ │ │ │ ├── dialog-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── dropdown-menu/
│ │ │ │ │ │ ├── dropdown-menu-checkbox-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-content.svelte
│ │ │ │ │ │ ├── dropdown-menu-group-heading.svelte
│ │ │ │ │ │ ├── dropdown-menu-group.svelte
│ │ │ │ │ │ ├── dropdown-menu-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-label.svelte
│ │ │ │ │ │ ├── dropdown-menu-radio-group.svelte
│ │ │ │ │ │ ├── dropdown-menu-radio-item.svelte
│ │ │ │ │ │ ├── dropdown-menu-separator.svelte
│ │ │ │ │ │ ├── dropdown-menu-shortcut.svelte
│ │ │ │ │ │ ├── dropdown-menu-sub-content.svelte
│ │ │ │ │ │ ├── dropdown-menu-sub-trigger.svelte
│ │ │ │ │ │ ├── dropdown-menu-trigger.svelte
│ │ │ │ │ │ └── index.ts
│ │ │ │ │ ├── input/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── input.svelte
│ │ │ │ │ ├── label/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── label.svelte
│ │ │ │ │ ├── popover/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── popover-close.svelte
│ │ │ │ │ │ ├── popover-content.svelte
│ │ │ │ │ │ ├── popover-portal.svelte
│ │ │ │ │ │ ├── popover-trigger.svelte
│ │ │ │ │ │ └── popover.svelte
│ │ │ │ │ ├── scroll-area/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── scroll-area-scrollbar.svelte
│ │ │ │ │ │ └── scroll-area.svelte
│ │ │ │ │ ├── select/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── select-content.svelte
│ │ │ │ │ │ ├── select-group-heading.svelte
│ │ │ │ │ │ ├── select-group.svelte
│ │ │ │ │ │ ├── select-item.svelte
│ │ │ │ │ │ ├── select-label.svelte
│ │ │ │ │ │ ├── select-scroll-down-button.svelte
│ │ │ │ │ │ ├── select-scroll-up-button.svelte
│ │ │ │ │ │ ├── select-separator.svelte
│ │ │ │ │ │ └── select-trigger.svelte
│ │ │ │ │ ├── separator/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── separator.svelte
│ │ │ │ │ ├── sheet/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── sheet-close.svelte
│ │ │ │ │ │ ├── sheet-content.svelte
│ │ │ │ │ │ ├── sheet-description.svelte
│ │ │ │ │ │ ├── sheet-footer.svelte
│ │ │ │ │ │ ├── sheet-header.svelte
│ │ │ │ │ │ ├── sheet-overlay.svelte
│ │ │ │ │ │ ├── sheet-title.svelte
│ │ │ │ │ │ └── sheet-trigger.svelte
│ │ │ │ │ ├── sidebar/
│ │ │ │ │ │ ├── constants.ts
│ │ │ │ │ │ ├── context.svelte.ts
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── sidebar-content.svelte
│ │ │ │ │ │ ├── sidebar-footer.svelte
│ │ │ │ │ │ ├── sidebar-group-action.svelte
│ │ │ │ │ │ ├── sidebar-group-content.svelte
│ │ │ │ │ │ ├── sidebar-group-label.svelte
│ │ │ │ │ │ ├── sidebar-group.svelte
│ │ │ │ │ │ ├── sidebar-header.svelte
│ │ │ │ │ │ ├── sidebar-input.svelte
│ │ │ │ │ │ ├── sidebar-inset.svelte
│ │ │ │ │ │ ├── sidebar-menu-action.svelte
│ │ │ │ │ │ ├── sidebar-menu-badge.svelte
│ │ │ │ │ │ ├── sidebar-menu-button.svelte
│ │ │ │ │ │ ├── sidebar-menu-item.svelte
│ │ │ │ │ │ ├── sidebar-menu-skeleton.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub-button.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub-item.svelte
│ │ │ │ │ │ ├── sidebar-menu-sub.svelte
│ │ │ │ │ │ ├── sidebar-menu.svelte
│ │ │ │ │ │ ├── sidebar-provider.svelte
│ │ │ │ │ │ ├── sidebar-rail.svelte
│ │ │ │ │ │ ├── sidebar-separator.svelte
│ │ │ │ │ │ ├── sidebar-trigger.svelte
│ │ │ │ │ │ └── sidebar.svelte
│ │ │ │ │ ├── skeleton/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── skeleton.svelte
│ │ │ │ │ ├── switch/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── switch.svelte
│ │ │ │ │ ├── table/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── table-body.svelte
│ │ │ │ │ │ ├── table-caption.svelte
│ │ │ │ │ │ ├── table-cell.svelte
│ │ │ │ │ │ ├── table-footer.svelte
│ │ │ │ │ │ ├── table-head.svelte
│ │ │ │ │ │ ├── table-header.svelte
│ │ │ │ │ │ ├── table-row.svelte
│ │ │ │ │ │ └── table.svelte
│ │ │ │ │ ├── textarea/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ └── textarea.svelte
│ │ │ │ │ ├── tooltip/
│ │ │ │ │ │ ├── index.ts
│ │ │ │ │ │ ├── tooltip-content.svelte
│ │ │ │ │ │ └── tooltip-trigger.svelte
│ │ │ │ │ └── utils.ts
│ │ │ │ ├── constants/
│ │ │ │ │ ├── agentic.ts
│ │ │ │ │ ├── api-endpoints.ts
│ │ │ │ │ ├── attachment-labels.ts
│ │ │ │ │ ├── auto-scroll.ts
│ │ │ │ │ ├── binary-detection.ts
│ │ │ │ │ ├── cache.ts
│ │ │ │ │ ├── chat-form.ts
│ │ │ │ │ ├── code-blocks.ts
│ │ │ │ │ ├── code.ts
│ │ │ │ │ ├── context-keys.ts
│ │ │ │ │ ├── css-classes.ts
│ │ │ │ │ ├── favicon.ts
│ │ │ │ │ ├── floating-ui-constraints.ts
│ │ │ │ │ ├── formatters.ts
│ │ │ │ │ ├── icons.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── key-value-pairs.ts
│ │ │ │ │ ├── latex-protection.ts
│ │ │ │ │ ├── literal-html.ts
│ │ │ │ │ ├── localstorage-keys.ts
│ │ │ │ │ ├── markdown.ts
│ │ │ │ │ ├── max-bundle-size.ts
│ │ │ │ │ ├── mcp-form.ts
│ │ │ │ │ ├── mcp-resource.ts
│ │ │ │ │ ├── mcp.ts
│ │ │ │ │ ├── message-export.ts
│ │ │ │ │ ├── model-id.ts
│ │ │ │ │ ├── precision.ts
│ │ │ │ │ ├── processing-info.ts
│ │ │ │ │ ├── settings-config.ts
│ │ │ │ │ ├── settings-fields.ts
│ │ │ │ │ ├── settings-keys.ts
│ │ │ │ │ ├── settings-sections.ts
│ │ │ │ │ ├── supported-file-types.ts
│ │ │ │ │ ├── table-html-restorer.ts
│ │ │ │ │ ├── tooltip-config.ts
│ │ │ │ │ ├── ui.ts
│ │ │ │ │ ├── uri-template.ts
│ │ │ │ │ └── viewport.ts
│ │ │ │ ├── contexts/
│ │ │ │ │ ├── chat-actions.context.ts
│ │ │ │ │ ├── chat-settings-dialog.context.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── message-edit.context.ts
│ │ │ │ ├── enums/
│ │ │ │ │ ├── agentic.ts
│ │ │ │ │ ├── attachment.ts
│ │ │ │ │ ├── chat.ts
│ │ │ │ │ ├── files.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── keyboard.ts
│ │ │ │ │ ├── mcp.ts
│ │ │ │ │ ├── model.ts
│ │ │ │ │ ├── server.ts
│ │ │ │ │ ├── settings.ts
│ │ │ │ │ └── ui.ts
│ │ │ │ ├── hooks/
│ │ │ │ │ ├── is-mobile.svelte.ts
│ │ │ │ │ ├── use-auto-scroll.svelte.ts
│ │ │ │ │ └── use-processing-state.svelte.ts
│ │ │ │ ├── markdown/
│ │ │ │ │ ├── enhance-code-blocks.ts
│ │ │ │ │ ├── enhance-links.ts
│ │ │ │ │ ├── literal-html.ts
│ │ │ │ │ ├── resolve-attachment-images.ts
│ │ │ │ │ └── table-html-restorer.ts
│ │ │ │ ├── services/
│ │ │ │ │ ├── chat.service.ts
│ │ │ │ │ ├── database.service.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── mcp.service.ts
│ │ │ │ │ ├── models.service.ts
│ │ │ │ │ ├── parameter-sync.service.spec.ts
│ │ │ │ │ ├── parameter-sync.service.ts
│ │ │ │ │ └── props.service.ts
│ │ │ │ ├── stores/
│ │ │ │ │ ├── agentic.svelte.ts
│ │ │ │ │ ├── chat.svelte.ts
│ │ │ │ │ ├── conversations.svelte.ts
│ │ │ │ │ ├── mcp-resources.svelte.ts
│ │ │ │ │ ├── mcp.svelte.ts
│ │ │ │ │ ├── models.svelte.ts
│ │ │ │ │ ├── persisted.svelte.ts
│ │ │ │ │ ├── server.svelte.ts
│ │ │ │ │ └── settings.svelte.ts
│ │ │ │ ├── types/
│ │ │ │ │ ├── agentic.d.ts
│ │ │ │ │ ├── api.d.ts
│ │ │ │ │ ├── chat.d.ts
│ │ │ │ │ ├── common.d.ts
│ │ │ │ │ ├── database.d.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── mcp.d.ts
│ │ │ │ │ ├── models.d.ts
│ │ │ │ │ └── settings.d.ts
│ │ │ │ └── utils/
│ │ │ │ ├── abort.ts
│ │ │ │ ├── agentic.ts
│ │ │ │ ├── api-fetch.ts
│ │ │ │ ├── api-headers.ts
│ │ │ │ ├── api-key-validation.ts
│ │ │ │ ├── attachment-display.ts
│ │ │ │ ├── attachment-type.ts
│ │ │ │ ├── audio-recording.ts
│ │ │ │ ├── autoresize-textarea.ts
│ │ │ │ ├── branching.ts
│ │ │ │ ├── browser-only.ts
│ │ │ │ ├── cache-ttl.ts
│ │ │ │ ├── clipboard.ts
│ │ │ │ ├── code.ts
│ │ │ │ ├── config-helpers.ts
│ │ │ │ ├── conversation-utils.ts
│ │ │ │ ├── convert-files-to-extra.ts
│ │ │ │ ├── cors-proxy.ts
│ │ │ │ ├── data-url.ts
│ │ │ │ ├── debounce.ts
│ │ │ │ ├── favicon.ts
│ │ │ │ ├── file-preview.ts
│ │ │ │ ├── file-type.ts
│ │ │ │ ├── formatters.ts
│ │ │ │ ├── headers.ts
│ │ │ │ ├── image-error-fallback.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── is-ime-composing.ts
│ │ │ │ ├── latex-protection.ts
│ │ │ │ ├── legacy-migration.ts
│ │ │ │ ├── mcp.ts
│ │ │ │ ├── modality-file-validation.ts
│ │ │ │ ├── model-names.ts
│ │ │ │ ├── pdf-processing.ts
│ │ │ │ ├── portal-to-body.ts
│ │ │ │ ├── precision.ts
│ │ │ │ ├── process-uploaded-files.ts
│ │ │ │ ├── sanitize.ts
│ │ │ │ ├── svg-to-png.ts
│ │ │ │ ├── syntax-highlight-language.ts
│ │ │ │ ├── text-files.ts
│ │ │ │ ├── text.ts
│ │ │ │ ├── uri-template.ts
│ │ │ │ ├── uuid.ts
│ │ │ │ └── webp-to-png.ts
│ │ │ ├── routes/
│ │ │ │ ├── +error.svelte
│ │ │ │ ├── +layout.svelte
│ │ │ │ ├── +page.svelte
│ │ │ │ ├── +page.ts
│ │ │ │ └── chat/
│ │ │ │ └── [id]/
│ │ │ │ ├── +page.svelte
│ │ │ │ └── +page.ts
│ │ │ └── styles/
│ │ │ └── katex-custom.scss
│ │ ├── static/
│ │ │ └── loading.html
│ │ ├── svelte.config.js
│ │ ├── tests/
│ │ │ ├── client/
│ │ │ │ ├── components/
│ │ │ │ │ └── TestWrapper.svelte
│ │ │ │ └── page.svelte.test.ts
│ │ │ ├── e2e/
│ │ │ │ └── demo.test.ts
│ │ │ ├── stories/
│ │ │ │ ├── ChatMessage.stories.svelte
│ │ │ │ ├── ChatScreenForm.stories.svelte
│ │ │ │ ├── ChatSettings.stories.svelte
│ │ │ │ ├── ChatSidebar.stories.svelte
│ │ │ │ ├── Introduction.mdx
│ │ │ │ ├── MarkdownContent.stories.svelte
│ │ │ │ └── fixtures/
│ │ │ │ ├── ai-tutorial.ts
│ │ │ │ ├── api-docs.ts
│ │ │ │ ├── blog-post.ts
│ │ │ │ ├── data-analysis.ts
│ │ │ │ ├── empty.ts
│ │ │ │ ├── math-formulas.ts
│ │ │ │ ├── readme.ts
│ │ │ │ └── storybook-mocks.ts
│ │ │ └── unit/
│ │ │ ├── agentic-sections.test.ts
│ │ │ ├── agentic-strip.test.ts
│ │ │ ├── clipboard.test.ts
│ │ │ ├── latex-protection.test.ts
│ │ │ ├── model-id-parser.test.ts
│ │ │ ├── model-names.test.ts
│ │ │ ├── reasoning-context.test.ts
│ │ │ └── uri-template.test.ts
│ │ ├── tsconfig.json
│ │ ├── vite.config.ts
│ │ └── vitest-setup-client.ts
│ ├── tokenize/
│ │ ├── CMakeLists.txt
│ │ └── tokenize.cpp
│ └── tts/
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── convert_pt_to_hf.py
│ ├── tts-outetts.py
│ └── tts.cpp
├── ty.toml
└── vendor/
├── cpp-httplib/
│ ├── CMakeLists.txt
│ ├── LICENSE
│ ├── httplib.cpp
│ └── httplib.h
├── miniaudio/
│ └── miniaudio.h
├── nlohmann/
│ ├── json.hpp
│ └── json_fwd.hpp
├── sheredom/
│ └── subprocess.h
└── stb/
└── stb_image.h
Copy disabled (too large)
Download .json
Condensed preview — 2536 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (74,657K chars).
[
{
"path": ".clang-format",
"chars": 4961,
"preview": "---\nLanguage: Cpp\nAlignAfterOpenBracket: Align\nAlignArrayOfStructures: Left\nAlignConsecutiveAssignments: AcrossCo"
},
{
"path": ".clang-tidy",
"chars": 931,
"preview": "---\nChecks: >\n bugprone-*,\n -bugprone-easily-swappable-parameters,\n -bugprone-implicit-widening-of-multiplicati"
},
{
"path": ".devops/cann.Dockerfile",
"chars": 4837,
"preview": "# ==============================================================================\n# ARGUMENTS\n# ========================="
},
{
"path": ".devops/cpu.Dockerfile",
"chars": 2212,
"preview": "ARG UBUNTU_VERSION=24.04\n\nFROM ubuntu:$UBUNTU_VERSION AS build\n\nARG TARGETARCH\n\nRUN apt-get update && \\\n apt-get inst"
},
{
"path": ".devops/cuda-new.Dockerfile",
"chars": 2693,
"preview": "ARG UBUNTU_VERSION=24.04\n# This needs to generally match the container host's environment.\nARG CUDA_VERSION=13.1.1\n# Tar"
},
{
"path": ".devops/cuda.Dockerfile",
"chars": 2693,
"preview": "ARG UBUNTU_VERSION=24.04\n# This needs to generally match the container host's environment.\nARG CUDA_VERSION=12.8.1\n# Tar"
},
{
"path": ".devops/intel.Dockerfile",
"chars": 4068,
"preview": "ARG ONEAPI_VERSION=2025.3.2-0-devel-ubuntu24.04\n\n## Build Image\n\nFROM intel/deep-learning-essentials:$ONEAPI_VERSION AS "
},
{
"path": ".devops/llama-cli-cann.Dockerfile",
"chars": 2383,
"preview": "ARG ASCEND_VERSION=8.5.0-910b-openeuler22.03-py3.10\n\nFROM ascendai/cann:$ASCEND_VERSION AS build\n\nWORKDIR /app\n\nCOPY . ."
},
{
"path": ".devops/llama-cpp-cuda.srpm.spec",
"chars": 2724,
"preview": "# SRPM for building from source and packaging an RPM for RPM-based distros.\n# https://docs.fedoraproject.org/en-US/quick"
},
{
"path": ".devops/llama-cpp.srpm.spec",
"chars": 2781,
"preview": "# SRPM for building from source and packaging an RPM for RPM-based distros.\n# https://docs.fedoraproject.org/en-US/quick"
},
{
"path": ".devops/musa.Dockerfile",
"chars": 2633,
"preview": "ARG UBUNTU_VERSION=22.04\n# This needs to generally match the container host's environment.\nARG MUSA_VERSION=rc4.3.0\n# Ta"
},
{
"path": ".devops/nix/apps.nix",
"chars": 434,
"preview": "{\n perSystem =\n { config, lib, ... }:\n {\n apps =\n let\n inherit (config.packages) default;\n "
},
{
"path": ".devops/nix/devshells.nix",
"chars": 1451,
"preview": "{ inputs, ... }:\n\n{\n perSystem =\n {\n config,\n lib,\n system,\n ...\n }:\n {\n devShells =\n"
},
{
"path": ".devops/nix/docker.nix",
"chars": 850,
"preview": "{\n lib,\n dockerTools,\n buildEnv,\n llama-cpp,\n interactive ? true,\n coreutils,\n}:\n\n# A tar that can be fed into `do"
},
{
"path": ".devops/nix/jetson-support.nix",
"chars": 1080,
"preview": "{ inputs, ... }:\n{\n perSystem =\n {\n config,\n system,\n lib,\n pkgsCuda,\n ...\n }:\n {\n "
},
{
"path": ".devops/nix/nixpkgs-instances.nix",
"chars": 1685,
"preview": "{ inputs, ... }:\n{\n # The _module.args definitions are passed on to modules as arguments. E.g.\n # the module `{ pkgs ."
},
{
"path": ".devops/nix/package-gguf-py.nix",
"chars": 691,
"preview": "{\n lib,\n llamaVersion,\n numpy,\n tqdm,\n requests,\n sentencepiece,\n pyyaml,\n poetry-core,\n buildPythonPackage,\n "
},
{
"path": ".devops/nix/package.nix",
"chars": 7329,
"preview": "{\n lib,\n glibc,\n config,\n stdenv,\n runCommand,\n cmake,\n ninja,\n pkg-config,\n git,\n mpi,\n blas,\n cudaPackages"
},
{
"path": ".devops/nix/python-scripts.nix",
"chars": 1300,
"preview": "{\n lib,\n stdenv,\n buildPythonPackage,\n poetry-core,\n mkShell,\n python3Packages,\n gguf-py,\n}@inputs:\n\nlet\n llama-"
},
{
"path": ".devops/nix/scope.nix",
"chars": 900,
"preview": "{\n lib,\n newScope,\n python3,\n llamaVersion ? \"0.0.0\",\n}:\n\nlet\n pythonPackages = python3.pkgs;\nin\n\n# We're using `ma"
},
{
"path": ".devops/nix/sif.nix",
"chars": 729,
"preview": "{\n lib,\n singularity-tools,\n llama-cpp,\n bashInteractive,\n interactive ? false,\n}:\n\nlet\n optionalInt = cond: x: if"
},
{
"path": ".devops/openvino.Dockerfile",
"chars": 3896,
"preview": "ARG OPENVINO_VERSION_MAJOR=2026.0\nARG OPENVINO_VERSION_FULL=2026.0.0.20965.c6d6a13a886\nARG UBUNTU_VERSION=24.04\n\n# Optio"
},
{
"path": ".devops/rocm.Dockerfile",
"chars": 3161,
"preview": "ARG UBUNTU_VERSION=24.04\n\n# This needs to generally match the container host's environment.\nARG ROCM_VERSION=7.2\nARG AMD"
},
{
"path": ".devops/s390x.Dockerfile",
"chars": 3821,
"preview": "ARG GCC_VERSION=15.2.0\nARG UBUNTU_VERSION=24.04\n\n### Build Llama.cpp stage\nFROM gcc:${GCC_VERSION} AS build\n\nRUN --mount"
},
{
"path": ".devops/tools.sh",
"chars": 2467,
"preview": "#!/usr/bin/env bash\nset -e\n\n# Read the first argument into a variable\narg1=\"$1\"\n\n# Shift the arguments to remove the fir"
},
{
"path": ".devops/vulkan.Dockerfile",
"chars": 2461,
"preview": "ARG UBUNTU_VERSION=26.04\n\nFROM ubuntu:$UBUNTU_VERSION AS build\n\n# Install build tools\nRUN apt update && apt install -y g"
},
{
"path": ".dockerignore",
"chars": 237,
"preview": "*.o\n*.a\n.cache/\n# Do not ignore .git directory, otherwise the reported build number will always be 0\n.github/\n.gitignore"
},
{
"path": ".ecrc",
"chars": 97,
"preview": "{\n \"Exclude\": [\"^\\\\.gitmodules$\", \"stb_image\\\\.h\"],\n \"Disable\": {\n \"IndentSize\": true\n }\n}\n"
},
{
"path": ".editorconfig",
"chars": 1392,
"preview": "# https://EditorConfig.org\n\n# Top-most EditorConfig file\nroot = true\n\n# Unix-style newlines with a newline ending every "
},
{
"path": ".flake8",
"chars": 565,
"preview": "[flake8]\nmax-line-length = 125\nignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503\nexclude =\n #"
},
{
"path": ".gemini/settings.json",
"chars": 35,
"preview": "{ \"contextFileName\": \"AGENTS.md\" }\n"
},
{
"path": ".gitattributes",
"chars": 259,
"preview": "# Treat the generated single-file WebUI build as binary for diff purposes.\n# Git's pack-file delta compression still wor"
},
{
"path": ".github/ISSUE_TEMPLATE/010-bug-compilation.yml",
"chars": 3216,
"preview": "name: Bug (compilation)\ndescription: Something goes wrong when trying to compile llama.cpp.\ntitle: \"Compile bug: \"\nlabel"
},
{
"path": ".github/ISSUE_TEMPLATE/011-bug-results.yml",
"chars": 4387,
"preview": "name: Bug (model use)\ndescription: Something goes wrong when using a model (in general, not specific to a single llama.c"
},
{
"path": ".github/ISSUE_TEMPLATE/019-bug-misc.yml",
"chars": 3624,
"preview": "name: Bug (misc.)\ndescription: Something is not working the way it should (and it's not covered by any of the above case"
},
{
"path": ".github/ISSUE_TEMPLATE/020-enhancement.yml",
"chars": 2406,
"preview": "name: Enhancement\ndescription: Used to request enhancements for llama.cpp.\ntitle: \"Feature Request: \"\nlabels: [\"enhancem"
},
{
"path": ".github/ISSUE_TEMPLATE/030-research.yml",
"chars": 1728,
"preview": "name: Research\ndescription: Track new technical research area.\ntitle: \"Research: \"\nlabels: [\"research 🔬\"]\nbody:\n - type"
},
{
"path": ".github/ISSUE_TEMPLATE/040-refactor.yml",
"chars": 1223,
"preview": "name: Refactor (Maintainers)\ndescription: Used to track refactoring opportunities.\ntitle: \"Refactor: \"\nlabels: [\"refacto"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 521,
"preview": "blank_issues_enabled: true\ncontact_links:\n - name: Got an idea?\n url: https://github.com/ggml-org/llama.cpp/discussi"
},
{
"path": ".github/actions/get-tag-name/action.yml",
"chars": 692,
"preview": "name: \"Determine tag name\"\ndescription: \"Determine the tag name to use for a release\"\noutputs:\n name:\n description: "
},
{
"path": ".github/actions/install-exe/action.yml",
"chars": 1126,
"preview": "name: \"Install exe\"\ndescription: \"Download and install exe\"\ninputs:\n url:\n description: \"URL of the exe installer\"\n "
},
{
"path": ".github/actions/linux-setup-openvino/action.yml",
"chars": 752,
"preview": "name: \"Linux - Setup OpenVINO Toolkit\"\ndescription: \"Setup OpenVINO Toolkit for Linux\"\ninputs:\n path:\n description: "
},
{
"path": ".github/actions/linux-setup-spacemit/action.yml",
"chars": 555,
"preview": "name: \"Linux - Setup SpacemiT Toolchain\"\ndescription: \"Setup SpacemiT Toolchain for Linux\"\ninputs:\n path:\n descripti"
},
{
"path": ".github/actions/linux-setup-vulkan/action.yml",
"chars": 498,
"preview": "name: \"Linux - Setup Vulkan SDK\"\ndescription: \"Setup Vulkan SDK for Linux\"\ninputs:\n path:\n description: \"Installatio"
},
{
"path": ".github/actions/unarchive-tar/action.yml",
"chars": 693,
"preview": "name: \"Unarchive tar\"\ndescription: \"Download and unarchive tar into directory\"\ninputs:\n url:\n description: \"URL of t"
},
{
"path": ".github/actions/windows-setup-cuda/action.yml",
"chars": 12074,
"preview": "name: \"Windows - Setup CUDA Toolkit\"\ndescription: \"Setup CUDA Toolkit for Windows\"\ninputs:\n cuda_version:\n descripti"
},
{
"path": ".github/actions/windows-setup-rocm/action.yml",
"chars": 401,
"preview": "name: \"Windows - Setup ROCm\"\ndescription: \"Setup ROCm for Windows\"\ninputs:\n version:\n description: \"ROCm version\"\n "
},
{
"path": ".github/labeler.yml",
"chars": 3197,
"preview": "# https://github.com/actions/labeler\nApple Metal:\n - changed-files:\n - any-glob-to-any-file:\n - ggm"
},
{
"path": ".github/pull_request_template.md",
"chars": 741,
"preview": "## Overview\n\n<!-- Describe what this PR does and why. Be concise but complete -->\n\n## Additional information\n\n<!-- You c"
},
{
"path": ".github/workflows/ai-issues.yml",
"chars": 2904,
"preview": "name: AI review (issues)\n\non:\n issues:\n types: [opened]\n\njobs:\n find-related:\n if: github.event.action == 'opene"
},
{
"path": ".github/workflows/bench.yml.disabled",
"chars": 10467,
"preview": "# TODO: there have been some issues with the workflow, so disabling for now\n# https://github.com/ggml-org/llama.cp"
},
{
"path": ".github/workflows/build-3rd-party.yml",
"chars": 1201,
"preview": "name: CI (3rd-party)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths"
},
{
"path": ".github/workflows/build-android.yml",
"chars": 2730,
"preview": "name: CI (android)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: "
},
{
"path": ".github/workflows/build-apple.yml",
"chars": 6325,
"preview": "name: CI (apple)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: [\n"
},
{
"path": ".github/workflows/build-cache.yml",
"chars": 3349,
"preview": "name: Build Actions Cache\n\non:\n workflow_dispatch: # allows manual triggering\n schedule:\n - cron: '0 * * * *'\n\nconc"
},
{
"path": ".github/workflows/build-cann.yml",
"chars": 3029,
"preview": "name: CI (cann)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: [\n "
},
{
"path": ".github/workflows/build-cmake-pkg.yml",
"chars": 1781,
"preview": "name: Build relocatable cmake package\non:\n workflow_dispatch:\n workflow_call:\n\njobs:\n linux:\n runs-on: ubuntu-slim"
},
{
"path": ".github/workflows/build-cross.yml",
"chars": 13600,
"preview": "name: CI (cross)\non:\n # only manual triggers due to low-importance of the workflows\n # TODO: for regular runs, provisi"
},
{
"path": ".github/workflows/build-msys.yml",
"chars": 1950,
"preview": "name: CI (msys)\n\non:\n # only manual triggers due to low-importance of the workflows\n # TODO: for regular runs, provisi"
},
{
"path": ".github/workflows/build-riscv.yml",
"chars": 4117,
"preview": "name: CI (riscv)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: [\n"
},
{
"path": ".github/workflows/build-sanitize.yml",
"chars": 2247,
"preview": "name: CI (sanitize)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths:"
},
{
"path": ".github/workflows/build-self-hosted.yml",
"chars": 6489,
"preview": "name: CI (self-hosted)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n pat"
},
{
"path": ".github/workflows/build-vulkan.yml",
"chars": 2574,
"preview": "name: CI (vulkan)\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: ["
},
{
"path": ".github/workflows/build.yml",
"chars": 42379,
"preview": "name: CI\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: [\n '."
},
{
"path": ".github/workflows/check-vendor.yml",
"chars": 1224,
"preview": "name: Check vendor\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n paths: "
},
{
"path": ".github/workflows/close-issue.yml",
"chars": 942,
"preview": "name: Close inactive issues\non:\n schedule:\n - cron: \"42 0 * * *\"\n\n# Fine-grant permission\n# https://docs.github.com/"
},
{
"path": ".github/workflows/copilot-setup-steps.yml",
"chars": 2180,
"preview": "name: \"Copilot Setup Steps\"\n\n# Automatically run the setup steps when they are changed to allow for easy validation, and"
},
{
"path": ".github/workflows/docker.yml",
"chars": 19166,
"preview": "# This workflow uses actions that are not certified by GitHub.\n# They are provided by a third-party and are governed by\n"
},
{
"path": ".github/workflows/editorconfig.yml",
"chars": 690,
"preview": "name: EditorConfig Checker\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n create_release:\n "
},
{
"path": ".github/workflows/gguf-publish.yml",
"chars": 1281,
"preview": "# This workflow will upload a Python Package using Twine when a GGUF release is created\n# For more information see: http"
},
{
"path": ".github/workflows/hip-quality-check.yml",
"chars": 2308,
"preview": "name: HIP quality check\n\non:\n workflow_dispatch: # allows manual triggering\n push:\n branches:\n - master\n pa"
},
{
"path": ".github/workflows/labeler.yml",
"chars": 352,
"preview": "name: \"Pull Request Labeler\"\non:\n- pull_request_target\n\njobs:\n labeler:\n permissions:\n contents: read\n pul"
},
{
"path": ".github/workflows/pre-tokenizer-hashes.yml",
"chars": 1606,
"preview": "name: Check Pre-Tokenizer Hashes\n\non:\n push:\n paths:\n - 'convert_hf_to_gguf.py'\n - 'conv"
},
{
"path": ".github/workflows/python-check-requirements.yml",
"chars": 914,
"preview": "name: Python check requirements.txt\n\non:\n push:\n paths:\n - '.github/workflows/python-check-requirements.yml'\n "
},
{
"path": ".github/workflows/python-lint.yml",
"chars": 833,
"preview": "name: flake8 Lint\n\non:\n push:\n branches:\n - master\n paths: [\n '.github/workflows/python-lint.yml',\n "
},
{
"path": ".github/workflows/python-type-check.yml",
"chars": 1134,
"preview": "name: Python Type-Check\n\non:\n push:\n paths:\n - '.github/workflows/python-type-check.yml'\n - 'ty.toml'\n "
},
{
"path": ".github/workflows/release.yml",
"chars": 43156,
"preview": "name: Release\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n create_release:\n description"
},
{
"path": ".github/workflows/server-sanitize.yml",
"chars": 3026,
"preview": "name: Server (sanitize)\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n sha:\n description:"
},
{
"path": ".github/workflows/server-self-hosted.yml",
"chars": 3577,
"preview": "name: Server (self-hosted)\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n sha:\n descripti"
},
{
"path": ".github/workflows/server-webui.yml",
"chars": 3267,
"preview": "name: Server WebUI\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n sha:\n description: 'Com"
},
{
"path": ".github/workflows/server.yml",
"chars": 4450,
"preview": "name: Server\n\non:\n workflow_dispatch: # allows manual triggering\n inputs:\n sha:\n description: 'Commit SH"
},
{
"path": ".github/workflows/update-ops-docs.yml",
"chars": 1351,
"preview": "name: Update Operations Documentation\n\non:\n push:\n paths:\n - 'docs/ops.md'\n - 'docs/ops/"
},
{
"path": ".github/workflows/winget.yml",
"chars": 1556,
"preview": "name: Update Winget Package\n\non:\n workflow_dispatch: # allows manual triggering\n schedule:\n - cron: '28 5 * * *' # "
},
{
"path": ".gitignore",
"chars": 1798,
"preview": "# Extensions\n\n*.a\n*.bat\n*.bin\n*.d\n*.dll\n*.dot\n*.etag\n*.exe\n*.gcda\n*.gcno\n*.gcov\n*.gguf\n*.gguf.json\n*.lastModified\n*.log\n"
},
{
"path": ".gitmodules",
"chars": 0,
"preview": ""
},
{
"path": ".pre-commit-config.yaml",
"chars": 447,
"preview": "# See https://pre-commit.com for more information\n# See https://pre-commit.com/hooks.html for more hooks\nexclude: prompt"
},
{
"path": "AGENTS.md",
"chars": 6857,
"preview": "# Instructions for llama.cpp\n\n> [!IMPORTANT]\n> This project does **not** accept pull requests that are fully or predomin"
},
{
"path": "AUTHORS",
"chars": 65628,
"preview": "# date: Mon Feb 2 08:45:04 EET 2026\n# this file is auto-generated by scripts/gen-authors.sh\n\nНияз Гарифзянов <112617865"
},
{
"path": "CLAUDE.md",
"chars": 104,
"preview": "IMPORTANT: Ensure you’ve thoroughly reviewed the [AGENTS.md](AGENTS.md) file before beginning any work.\n"
},
{
"path": "CMakeLists.txt",
"chars": 8948,
"preview": "cmake_minimum_required(VERSION 3.14...3.28) # for add_link_options and implicit target directories.\nproject(\"llama.cpp\" "
},
{
"path": "CMakePresets.json",
"chars": 4570,
"preview": "{\n \"version\": 4,\n \"configurePresets\": [\n {\n \"name\": \"base\",\n \"hidden\": true,\n \"generator\": "
},
{
"path": "CODEOWNERS",
"chars": 5455,
"preview": "# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs\n# multiplie "
},
{
"path": "CONTRIBUTING.md",
"chars": 11898,
"preview": "# Contributors\n\nThe project differentiates between 3 levels of contributors:\n\n- Contributors: people who have contribute"
},
{
"path": "LICENSE",
"chars": 1078,
"preview": "MIT License\n\nCopyright (c) 2023-2026 The ggml authors\n\nPermission is hereby granted, free of charge, to any person obtai"
},
{
"path": "Makefile",
"chars": 257,
"preview": "define newline\n\n\nendef\n\n$(error Build system changed:$(newline)\\\nThe Makefile build has been replaced by CMake.$(newline"
},
{
"path": "README.md",
"chars": 30152,
"preview": "# llama.cpp\n\n\n - [**Requirements**](#requirements)\n "
},
{
"path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.html",
"chars": 773361,
"preview": "<!DOCTYPE html>\n<html>\n <head>\n <meta charset=\"utf-8\">\n <style>\n .message {\n "
},
{
"path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547.json",
"chars": 120,
"preview": "{\n \"chars\": 2296.1916666666666,\n \"chars:std\": 986.051306946325,\n \"score\": 0.925,\n \"score:std\": 0.26339134382131846\n}"
},
{
"path": "benches/dgx-spark/aime25_openai__gpt-oss-120b-high_temp1.0_20251109_094547_allresults.json",
"chars": 1613851,
"preview": "{\n \"score\": 0.925,\n \"metrics\": {\n \"chars\": 2296.1916666666666,\n \"chars:std\": 986.051306946325,\n \"score:std\": "
},
{
"path": "benches/dgx-spark/dgx-spark.md",
"chars": 26156,
"preview": "## System info\n\n```bash\nuname --all\nLinux spark-17ed 6.11.0-1016-nvidia #16-Ubuntu SMP PREEMPT_DYNAMIC Sun Sep 21 16:52:"
},
{
"path": "benches/mac-m2-ultra/mac-m2-ultra.md",
"chars": 24581,
"preview": "## System info\n\n```bash\nuname -a\nDarwin gg-studio 25.2.0 Darwin Kernel Version 25.2.0: Tue Nov 18 21:07:05 PST 2025; roo"
},
{
"path": "benches/nemotron/nemotron-dgx-spark.md",
"chars": 8996,
"preview": "# NVIDIA DGX Spark\n\n## System info\n\n```bash\nuname --all\nLinux spark-17ed 6.11.0-1016-nvidia #16-Ubuntu SMP PREEMPT_DYNAM"
},
{
"path": "ci/README-MUSA.md",
"chars": 1052,
"preview": "## Running MUSA CI in a Docker Container\n\nAssuming `$PWD` is the root of the `llama.cpp` repository, follow these steps "
},
{
"path": "ci/README.md",
"chars": 1297,
"preview": "# CI\n\nThis CI implements heavy-duty workflows that run on self-hosted runners. Typically the purpose of these workflows "
},
{
"path": "ci/run.sh",
"chars": 28219,
"preview": "#!/usr/bin/env bash\n#\n# sample usage:\n#\n# mkdir tmp\n#\n# # CPU-only build\n# bash ./ci/run.sh ./tmp/results ./tmp/mnt\n#\n# "
},
{
"path": "cmake/arm64-apple-clang.cmake",
"chars": 555,
"preview": "set( CMAKE_SYSTEM_NAME Darwin )\nset( CMAKE_SYSTEM_PROCESSOR arm64 )\n\nset( target arm64-apple-darwin-macho )\n\nset( CMAKE_"
},
{
"path": "cmake/arm64-windows-llvm.cmake",
"chars": 592,
"preview": "set( CMAKE_SYSTEM_NAME Windows )\nset( CMAKE_SYSTEM_PROCESSOR arm64 )\n\nset( target arm64-pc-windows-msvc )\n\nset( CMAKE_C_"
},
{
"path": "cmake/build-info.cmake",
"chars": 1319,
"preview": "set(BUILD_NUMBER 0)\nset(BUILD_COMMIT \"unknown\")\nset(BUILD_COMPILER \"unknown\")\nset(BUILD_TARGET \"unknown\")\n\n# Look for gi"
},
{
"path": "cmake/common.cmake",
"chars": 2044,
"preview": "include(\"ggml/cmake/common.cmake\")\n\nfunction(llama_add_compile_flags)\n if (LLAMA_FATAL_WARNINGS)\n if (CMAKE_CX"
},
{
"path": "cmake/download-models.cmake",
"chars": 505,
"preview": "get_filename_component(DEST_DIR \"${DEST}\" DIRECTORY)\nfile(MAKE_DIRECTORY \"${DEST_DIR}\")\n\nif(NOT EXISTS \"${DEST}\")\n me"
},
{
"path": "cmake/git-vars.cmake",
"chars": 717,
"preview": "find_package(Git)\n\n# the commit's SHA1\nexecute_process(COMMAND\n \"${GIT_EXECUTABLE}\" describe --match=NeVeRmAtCh --alw"
},
{
"path": "cmake/license.cmake",
"chars": 1445,
"preview": "define_property(GLOBAL PROPERTY LICENSE_TEXT\n BRIEF_DOCS \"Embedded licenses\"\n FULL_DOCS \"Global string containing"
},
{
"path": "cmake/llama-config.cmake.in",
"chars": 979,
"preview": "set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)\nset(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)\nset(LLAMA_BUILD_NUMBER @LLA"
},
{
"path": "cmake/llama.pc.in",
"chars": 311,
"preview": "prefix=@CMAKE_INSTALL_PREFIX@\nexec_prefix=@CMAKE_INSTALL_PREFIX@\nlibdir=@CMAKE_INSTALL_FULL_LIBDIR@\nincludedir=@CMAKE_IN"
},
{
"path": "cmake/riscv64-spacemit-linux-gnu-gcc.cmake",
"chars": 1357,
"preview": "set(CMAKE_SYSTEM_NAME Linux)\nset(CMAKE_SYSTEM_PROCESSOR riscv64)\nset(CMAKE_SYSTEM_VERSION 1)\n\nif (CMAKE_HOST_SYSTEM_PROC"
},
{
"path": "cmake/x64-windows-llvm.cmake",
"chars": 139,
"preview": "set( CMAKE_SYSTEM_NAME Windows )\nset( CMAKE_SYSTEM_PROCESSOR x86_64 )\n\nset( CMAKE_C_COMPILER clang )\nset( CMAKE_CXX_C"
},
{
"path": "common/CMakeLists.txt",
"chars": 4225,
"preview": "# common\n\nfind_package(Threads REQUIRED)\n\nllama_add_compile_flags()\n\n# Build info header\n\nif(EXISTS \"${PROJECT_SOURCE_DI"
},
{
"path": "common/arg.cpp",
"chars": 176632,
"preview": "#include \"arg.h\"\n\n#include \"chat.h\"\n#include \"common.h\"\n#include \"download.h\"\n#include \"hf-cache.h\"\n#include \"json-schem"
},
{
"path": "common/arg.h",
"chars": 5288,
"preview": "#pragma once\n\n#include \"common.h\"\n\n#include <set>\n#include <map>\n#include <string>\n#include <vector>\n#include <cstring>\n"
},
{
"path": "common/base64.hpp",
"chars": 12878,
"preview": "/*\nThis is free and unencumbered software released into the public domain.\n\nAnyone is free to copy, modify, publish, use"
},
{
"path": "common/build-info.cpp.in",
"chars": 198,
"preview": "int LLAMA_BUILD_NUMBER = @LLAMA_BUILD_NUMBER@;\nchar const *LLAMA_COMMIT = \"@LLAMA_BUILD_COMMIT@\";\nchar const *LLAMA_COMP"
},
{
"path": "common/chat-auto-parser-generator.cpp",
"chars": 20142,
"preview": "#include \"chat-auto-parser-helpers.h\"\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#incl"
},
{
"path": "common/chat-auto-parser-helpers.cpp",
"chars": 12940,
"preview": "#include \"chat-auto-parser-helpers.h\"\n\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#inc"
},
{
"path": "common/chat-auto-parser-helpers.h",
"chars": 4541,
"preview": "#pragma once\n\n#include \"chat-auto-parser.h\"\n#include \"peg-parser.h\"\n#include <functional>\n#include <optional>\n#include <"
},
{
"path": "common/chat-auto-parser.h",
"chars": 16499,
"preview": "#pragma once\n\n#include \"chat.h\"\n#include \"common.h\"\n#include \"jinja/caps.h\"\n#include \"peg-parser.h\"\n\n#include <chrono>\n#"
},
{
"path": "common/chat-diff-analyzer.cpp",
"chars": 61798,
"preview": "#include \"chat-auto-parser.h\"\n#include \"chat-auto-parser-helpers.h\"\n#include \"chat-peg-parser.h\"\n#include \"chat.h\"\n#incl"
},
{
"path": "common/chat-peg-parser.cpp",
"chars": 35700,
"preview": "#include \"chat-peg-parser.h\"\n\n#include \"chat-auto-parser.h\"\n#include \"ggml.h\"\n#include \"peg-parser.h\"\n\n#include <nlohman"
},
{
"path": "common/chat-peg-parser.h",
"chars": 10166,
"preview": "#pragma once\n\n#include \"chat.h\"\n#include \"peg-parser.h\"\n\n#include <map>\n#include <optional>\n#include <vector>\n\nclass com"
},
{
"path": "common/chat.cpp",
"chars": 81506,
"preview": "#include \"chat.h\"\n\n#include \"chat-auto-parser-helpers.h\"\n#include \"chat-auto-parser.h\"\n#include \"chat-peg-parser.h\"\n#inc"
},
{
"path": "common/chat.h",
"chars": 13443,
"preview": "// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.\n\n#pragma "
},
{
"path": "common/common.cpp",
"chars": 65272,
"preview": "#include \"ggml.h\"\n#include \"gguf.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"llama.h\"\n#include \"sampling.h\"\n#incl"
},
{
"path": "common/common.h",
"chars": 43682,
"preview": "// Various helper functions and utilities\n\n#pragma once\n\n#include \"ggml-opt.h\"\n#include \"ggml.h\"\n#include \"llama-cpp.h\"\n"
},
{
"path": "common/console.cpp",
"chars": 41576,
"preview": "#include \"console.h\"\n#include \"log.h\"\n#include <vector>\n#include <iostream>\n#include <cassert>\n#include <cstddef>\n#inclu"
},
{
"path": "common/console.h",
"chars": 1170,
"preview": "// Console functions\n\n#pragma once\n\n#include \"common.h\"\n\n#include <functional>\n#include <string>\n#include <vector>\n\nenum"
},
{
"path": "common/debug.cpp",
"chars": 5849,
"preview": "#include \"debug.h\"\n\n#include \"log.h\"\n\n#include <cmath>\n#include <string>\n\nstatic std::string common_ggml_ne_string(const"
},
{
"path": "common/debug.h",
"chars": 1937,
"preview": "#pragma once\n#include \"common.h\"\n#include <string>\n#include <vector>\n#include <regex>\n\n// common debug functions and str"
},
{
"path": "common/download.cpp",
"chars": 29461,
"preview": "#include \"arg.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"download.h\"\n#include \"hf-cache.h\"\n\n#define JSON_ASSERT "
},
{
"path": "common/download.h",
"chars": 3348,
"preview": "#pragma once\n\n#include <string>\n#include <vector>\n\nstruct common_params_model;\n\nusing common_header = std::pair<std"
},
{
"path": "common/hf-cache.cpp",
"chars": 24067,
"preview": "#include \"hf-cache.h\"\n\n#include \"common.h\"\n#include \"log.h\"\n#include \"http.h\"\n\n#define JSON_ASSERT GGML_ASSERT\n#include "
},
{
"path": "common/hf-cache.h",
"chars": 801,
"preview": "#pragma once\n\n#include <string>\n#include <vector>\n\n// Ref: https://huggingface.co/docs/hub/local-cache.md\n\nnamespace hf_"
},
{
"path": "common/http.h",
"chars": 2899,
"preview": "#pragma once\n\n#include <cpp-httplib/httplib.h>\n\nstruct common_http_url {\n std::string scheme;\n std::string user;\n "
},
{
"path": "common/jinja/README.md",
"chars": 4188,
"preview": "# llama.cpp Jinja Engine\n\nA Jinja template engine implementation in C++, originally inspired by [huggingface.js's jinja "
},
{
"path": "common/jinja/caps.cpp",
"chars": 16401,
"preview": "#include \"log.h\"\n#include \"value.h\"\n#include \"runtime.h\"\n#include \"caps.h\"\n\n// note: the json dependency is only for def"
},
{
"path": "common/jinja/caps.h",
"chars": 737,
"preview": "#pragma once\n\n#include \"runtime.h\"\n\n#include <string>\n#include <map>\n\nnamespace jinja {\n\nstruct caps {\n bool supports"
},
{
"path": "common/jinja/lexer.cpp",
"chars": 12850,
"preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n\n#include <cctype>\n#include <functional>\n#include <map>\n#include <string>\n#inclu"
},
{
"path": "common/jinja/lexer.h",
"chars": 5354,
"preview": "#pragma once\n\n#include \"utils.h\"\n\n#include <cctype>\n#include <map>\n#include <stdexcept>\n#include <string>\n#include <vect"
},
{
"path": "common/jinja/parser.cpp",
"chars": 22873,
"preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"parser.h\"\n\n#include <algorithm>\n#include <memory>\n#include <stdexcept>"
},
{
"path": "common/jinja/parser.h",
"chars": 533,
"preview": "#pragma once\n\n#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"utils.h\"\n\n#include <string>\n#include <stdexcept>\n\nnamesp"
},
{
"path": "common/jinja/runtime.cpp",
"chars": 35848,
"preview": "#include \"lexer.h\"\n#include \"runtime.h\"\n#include \"value.h\"\n#include \"utils.h\"\n\n#include <string>\n#include <vector>\n#incl"
},
{
"path": "common/jinja/runtime.h",
"chars": 21068,
"preview": "#pragma once\n\n#include \"lexer.h\"\n#include \"value.h\"\n\n#include <cassert>\n#include <ctime>\n#include <memory>\n#include <sst"
},
{
"path": "common/jinja/string.cpp",
"chars": 5399,
"preview": "#include \"jinja/string.h\"\n#include \"jinja/value.h\"\n\n#include <algorithm>\n#include <functional>\n#include <optional>\n#incl"
},
{
"path": "common/jinja/string.h",
"chars": 1763,
"preview": "#pragma once\n\n#include <optional>\n#include <string>\n#include <vector>\n\n#include \"utils.h\"\n\nnamespace jinja {\n\n// allow d"
},
{
"path": "common/jinja/utils.h",
"chars": 5152,
"preview": "#pragma once\n\n#include <string>\n#include <sstream>\n#include <algorithm>\n#include <cstdint>\n#include <cstring>\n\nnamespace"
},
{
"path": "common/jinja/value.cpp",
"chars": 59322,
"preview": "#include \"runtime.h\"\n#include \"value.h\"\n\n// for converting from JSON to jinja values\n#include <nlohmann/json.hpp>\n\n#incl"
},
{
"path": "common/jinja/value.h",
"chars": 29803,
"preview": "#pragma once\n\n#include \"string.h\"\n#include \"utils.h\"\n\n#include <algorithm>\n#include <cmath>\n#include <cstdint>\n#include "
},
{
"path": "common/json-partial.cpp",
"chars": 15950,
"preview": "#include \"json-partial.h\"\n\n#include \"log.h\"\n\n#include <nlohmann/json.hpp>\n\n#include <string>\n#include <regex>\n\nusing jso"
},
{
"path": "common/json-partial.h",
"chars": 1816,
"preview": "#pragma once\n\n// TODO: use json_fwd.hpp when possible\n#include <nlohmann/json.hpp>\n\n// Healing marker (empty if the JSON"
},
{
"path": "common/json-schema-to-grammar.cpp",
"chars": 50160,
"preview": "#include \"json-schema-to-grammar.h\"\n#include \"common.h\"\n\n#include <nlohmann/json.hpp>\n\n#include <algorithm>\n#include <ma"
},
{
"path": "common/json-schema-to-grammar.h",
"chars": 1434,
"preview": "#pragma once\n\n#include <nlohmann/json_fwd.hpp>\n\n#include <functional>\n#include <memory>\n#include <string>\n\nstd::string j"
},
{
"path": "common/llguidance.cpp",
"chars": 8702,
"preview": "#include \"sampling.h\"\n#include \"log.h\"\n\n#ifdef LLAMA_USE_LLGUIDANCE\n\n# include \"llguidance.h\"\n# include <cmath>\n\ns"
},
{
"path": "common/log.cpp",
"chars": 11499,
"preview": "#include \"common.h\"\n#include \"log.h\"\n\n#include <chrono>\n#include <condition_variable>\n#include <cstdarg>\n#include <cstdi"
},
{
"path": "common/log.h",
"chars": 5274,
"preview": "#pragma once\n\n#include \"ggml.h\" // for ggml_log_level\n\n#define LOG_CLR_TO_EOL \"\\033[K\\r\"\n#define LOG_COL_DEFAULT \"\\033["
},
{
"path": "common/ngram-cache.cpp",
"chars": 11652,
"preview": "#include \"ngram-cache.h\"\n#include \"common.h\"\n#include \"log.h\"\n\n#include <cinttypes>\n#include <cstdint>\n#include <cstdio>"
},
{
"path": "common/ngram-cache.h",
"chars": 4147,
"preview": "#pragma once\n\n#include \"llama.h\"\n\n#include <unordered_map>\n#include <string>\n#include <vector>\n\n#define LLAMA_NGRAM_MIN "
},
{
"path": "common/ngram-map.cpp",
"chars": 19948,
"preview": "#include \"common.h\"\n#include \"log.h\"\n#include \"ngram-map.h\"\n\n#include <cinttypes>\n#include <cstdint>\n#include <cstdio>\n#"
},
{
"path": "common/ngram-map.h",
"chars": 4869,
"preview": "#pragma once\n//\n// common/ngram-map.h: structures used to manage a map from n-grams to a list of m-grams\n//\n// These str"
},
{
"path": "common/ngram-mod.cpp",
"chars": 1110,
"preview": "#include \"ngram-mod.h\"\n\n//\n// common_ngram_mod\n//\n\ncommon_ngram_mod::common_ngram_mod(uint16_t n, size_t size) : n(n), u"
},
{
"path": "common/ngram-mod.h",
"chars": 728,
"preview": "#pragma once\n\n#include <cstdint>\n#include <vector>\n#include <cstddef>\n\n//\n// common_ngram_mod\n// ref: https://github.com"
},
{
"path": "common/peg-parser.cpp",
"chars": 78208,
"preview": "#include \"peg-parser.h\"\n\n#include \"common.h\"\n#include \"json-schema-to-grammar.h\"\n#include \"log.h\"\n#include \"unicode.h\"\n\n"
},
{
"path": "common/peg-parser.h",
"chars": 18876,
"preview": "#pragma once\n\n#include <nlohmann/json_fwd.hpp>\n\n#include <memory>\n#include <unordered_map>\n#include <unordered_set>\n#inc"
},
{
"path": "common/preset.cpp",
"chars": 16468,
"preview": "#include \"arg.h\"\n#include \"preset.h\"\n#include \"peg-parser.h\"\n#include \"log.h\"\n#include \"download.h\"\n\n#include <fstream>\n"
},
{
"path": "common/preset.h",
"chars": 2825,
"preview": "#pragma once\n\n#include \"common.h\"\n#include \"arg.h\"\n\n#include <string>\n#include <vector>\n#include <map>\n#include <set>\n\n/"
},
{
"path": "common/reasoning-budget.cpp",
"chars": 9917,
"preview": "#include \"reasoning-budget.h\"\n#include \"common.h\"\n#include \"unicode.h\"\n\n#include \"log.h\"\n\n#include <cmath>\n#include <cst"
},
{
"path": "common/reasoning-budget.h",
"chars": 2677,
"preview": "#pragma once\n\n#include \"llama.h\"\n\n#include <cstdint>\n#include <vector>\n\nenum common_reasoning_budget_state {\n REASONI"
},
{
"path": "common/regex-partial.cpp",
"chars": 8363,
"preview": "#include \"regex-partial.h\"\n#include \"common.h\"\n#include <functional>\n#include <optional>\n\ncommon_regex::common_regex(con"
},
{
"path": "common/regex-partial.h",
"chars": 1507,
"preview": "#pragma once\n\n#include <regex>\n#include <string>\n\nenum common_regex_match_type {\n COMMON_REGEX_MATCH_TYPE_NONE,\n C"
},
{
"path": "common/sampling.cpp",
"chars": 31261,
"preview": "#include \"sampling.h\"\n\n#include \"common.h\"\n#include \"ggml.h\"\n#include \"log.h\"\n#include \"reasoning-budget.h\"\n\n#include <a"
},
{
"path": "common/sampling.h",
"chars": 5275,
"preview": "#pragma once\n\n#include \"llama.h\"\n\n#include \"common.h\"\n\n#include <string>\n#include <vector>\n\n// common_sampler extends ll"
},
{
"path": "common/speculative.cpp",
"chars": 38688,
"preview": "#include \"speculative.h\"\n\n#include \"common.h\"\n#include \"ggml.h\"\n#include \"llama.h\"\n#include \"log.h\"\n#include \"ngram-cach"
},
{
"path": "common/speculative.h",
"chars": 1520,
"preview": "#pragma once\n\n#include \"llama.h\"\n#include \"common.h\"\n\nstruct common_speculative;\n\n// comma separated list of all types\ns"
},
{
"path": "common/unicode.cpp",
"chars": 4160,
"preview": "#include \"unicode.h\"\n\n#include <algorithm>\n#include <cassert>\n#include <stdexcept>\n#include <string>\n#include <vector>\n\n"
},
{
"path": "common/unicode.h",
"chars": 1083,
"preview": "#pragma once\n\n#include <cstdint>\n#include <string_view>\n#include <vector>\n#include <string>\n\n// UTF-8 parsing utilities "
},
{
"path": "convert_hf_to_gguf.py",
"chars": 610905,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nfrom __future__ import annotations\n\nimport ast\nimport logging\nimport arg"
},
{
"path": "convert_hf_to_gguf_update.py",
"chars": 26542,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nimport logging\nimport os\nimport pathlib\nimport re\n\nimport requests\nimpor"
},
{
"path": "convert_llama_ggml_to_gguf.py",
"chars": 19112,
"preview": "#!/usr/bin/env python3\nfrom __future__ import annotations\n\nimport logging\nimport argparse\nimport os\nimport struct\nimport"
},
{
"path": "convert_lora_to_gguf.py",
"chars": 20917,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\nim"
},
{
"path": "docs/android.md",
"chars": 6080,
"preview": "\n# Android\n\n## Build GUI binding using Android Studio\n\nImport the `examples/llama.android` directory into Android Studio"
},
{
"path": "docs/autoparser.md",
"chars": 31780,
"preview": "# Auto-Parser Architecture\n\nThe auto-parser automatically analyzes chat templates to determine how to parse model output"
},
{
"path": "docs/backend/BLIS.md",
"chars": 1657,
"preview": "BLIS Installation Manual\n------------------------\n\nBLIS is a portable software framework for high-performance BLAS-like "
},
{
"path": "docs/backend/CANN.md",
"chars": 13682,
"preview": "# llama.cpp for CANN\n\n - [Background](#background)\n - [News](#news)\n - [OS](#os)\n - [Hardware](#hardware)\n - [Model Supp"
},
{
"path": "docs/backend/CUDA-FEDORA.md",
"chars": 10557,
"preview": "# Setting Up CUDA on Fedora\n\nIn this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox container."
},
{
"path": "docs/backend/OPENCL.md",
"chars": 10267,
"preview": "# llama.cpp for OpenCL\n\n- [Background](#background)\n- [OS](#os)\n- [Hardware](#hardware)\n- [DataType Supports](#datatype-"
},
{
"path": "docs/backend/OPENVINO.md",
"chars": 19997,
"preview": "# OpenVINO Backend for llama.cpp\n\n> [!NOTE]\n> Performance and memory optimizations, accuracy validation, broader quantiz"
},
{
"path": "docs/backend/SYCL.md",
"chars": 32818,
"preview": "# llama.cpp for SYCL\n\n- [Background](#background)\n- [Recommended Release](#recommended-release)\n- [News](#news)\n- [OS](#"
},
{
"path": "docs/backend/VirtGPU/configuration.md",
"chars": 6814,
"preview": "# GGML-VirtGPU Backend Configuration\n\nThis document describes the environment variables used by the ggml-virtgpu backend"
},
{
"path": "docs/backend/VirtGPU/development.md",
"chars": 6159,
"preview": "# Development and Testing\n\n## Development\n\n### Code Generation\n\nThe backend uses code generation from YAML configuration"
},
{
"path": "docs/backend/VirtGPU.md",
"chars": 6225,
"preview": "# GGML-VirtGPU Backend\n\nThe GGML-VirtGPU backend enables GGML applications to run machine\nlearning computations on host "
},
{
"path": "docs/backend/ZenDNN.md",
"chars": 8883,
"preview": "# llama.cpp for AMD ZenDNN\n\n> [!WARNING]\n> **Note:** ZenDNN is **not** the same as zDNN.\n> - **ZenDNN** (this page): AMD"
},
{
"path": "docs/backend/snapdragon/CMakeUserPresets.json",
"chars": 3047,
"preview": "{\n \"version\": 5,\n \"configurePresets\": [\n {\n \"name\": \"arm64-android-snapdragon\",\n \"hidden\": true,\n "
}
]
// ... and 2336 more files (download for full content)
About this extraction
This page contains the full source code of the ggml-org/llama.cpp GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 2536 files (77.2 MB), approximately 17.7M tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.