Repository: mozilla-ai/llamafile
Branch: main
Commit: b02f2e0c540f
Files: 621
Total size: 17.6 MB
Directory structure:
gitextract_8bcuz138/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── 01-bug-low.yml
│ │ ├── 02-bug-medium.yml
│ │ ├── 03-bug-high.yml
│ │ ├── 04-bug-critical.yml
│ │ ├── 05-enhancement.yml
│ │ ├── 06-refactor.yml
│ │ ├── 07-refactor.yml
│ │ └── config.yml
│ ├── labeler.yml
│ └── workflows/
│ ├── ci.yml
│ ├── docs.yml
│ ├── editorconfig.yml
│ ├── labeler.yml
│ └── update-llama-cpp.yml
├── .gitignore
├── .gitmodules
├── .llamafile_plugin/
│ └── .claude-plugin/
│ ├── marketplace.json
│ └── plugin.json
├── LICENSE
├── Makefile
├── README.md
├── README_0.10.0.md
├── RELEASE.md
├── build/
│ ├── config.mk
│ ├── cudacc
│ ├── deps.mk
│ ├── download-cosmocc.sh
│ ├── gperf
│ ├── htags
│ ├── llamafile-convert
│ ├── llamafile-upgrade-engine
│ ├── objdump
│ ├── rules.mk
│ ├── run
│ ├── sha256sum.c
│ └── tags.mk
├── cosmocc-override.cmake
├── docs/
│ ├── AGENTS.md
│ ├── commands/
│ │ ├── build.md
│ │ ├── check.md
│ │ └── clean.md
│ ├── creating_llamafiles.md
│ ├── example_llamafiles.md
│ ├── index.md
│ ├── quickstart.md
│ ├── running_llamafile.md
│ ├── skills/
│ │ └── llamafile/
│ │ ├── SKILL.md
│ │ ├── architecture.md
│ │ ├── building.md
│ │ ├── development.md
│ │ ├── testing.md
│ │ └── update_llamacpp.md
│ ├── source_installation.md
│ ├── support.md
│ ├── technical_details.md
│ ├── troubleshooting.md
│ └── whisperfile/
│ ├── getting-started.md
│ ├── gpu.md
│ ├── index.md
│ ├── packaging.md
│ ├── server.md
│ └── translate.md
├── llama.cpp.patches/
│ ├── README.md
│ ├── apply-patches.sh
│ ├── llamafile-files/
│ │ ├── BUILD.mk
│ │ ├── README.llamafile
│ │ └── common/
│ │ └── license.cpp
│ ├── patches/
│ │ ├── common_arg.cpp.patch
│ │ ├── common_chat.cpp.patch
│ │ ├── common_common.cpp.patch
│ │ ├── common_download.cpp.patch
│ │ ├── common_log.cpp.patch
│ │ ├── common_ngram-mod.cpp.patch
│ │ ├── ggml_src_ggml-backend-impl.h.patch
│ │ ├── ggml_src_ggml-backend-reg.cpp.patch
│ │ ├── ggml_src_ggml-backend.cpp.patch
│ │ ├── ggml_src_ggml-cuda_common.cuh.patch
│ │ ├── ggml_src_ggml-cuda_ggml-cuda.cu.patch
│ │ ├── ggml_src_ggml-cuda_solve_tri.cu.patch
│ │ ├── ggml_src_ggml-cuda_vendors_cuda.h.patch
│ │ ├── ggml_src_ggml-metal_ggml-metal.cpp.patch
│ │ ├── ggml_src_gguf.cpp.patch
│ │ ├── src_llama-mmap.cpp.patch
│ │ ├── src_llama-mmap.h.patch
│ │ ├── tools_server_server-queue.cpp.patch
│ │ ├── tools_server_server.cpp.patch
│ │ └── vendor_cpp-httplib_httplib.cpp.patch
│ └── renames.sh
├── llamafile/
│ ├── BUILD.mk
│ ├── args.cpp
│ ├── args.h
│ ├── bestline.c
│ ├── bestline.h
│ ├── build-functions.sh
│ ├── chatbot.h
│ ├── chatbot_api.cpp
│ ├── chatbot_backend.h
│ ├── chatbot_cli.cpp
│ ├── chatbot_comm.cpp
│ ├── chatbot_comp.cpp
│ ├── chatbot_direct.cpp
│ ├── chatbot_eval.cpp
│ ├── chatbot_file.cpp
│ ├── chatbot_help.cpp
│ ├── chatbot_hint.cpp
│ ├── chatbot_hist.cpp
│ ├── chatbot_logo.cpp
│ ├── chatbot_main.cpp
│ ├── chatbot_repl.cpp
│ ├── check_cpu.c
│ ├── color.h
│ ├── compute.cpp
│ ├── compute.h
│ ├── cuda.c
│ ├── cuda.sh
│ ├── datauri.cpp
│ ├── datauri.h
│ ├── extract_data_uris.cpp
│ ├── highlight/
│ │ ├── BUILD.mk
│ │ ├── color_bleeder.cpp
│ │ ├── highlight.cpp
│ │ ├── highlight.h
│ │ ├── highlight_ada.cpp
│ │ ├── highlight_asm.cpp
│ │ ├── highlight_basic.cpp
│ │ ├── highlight_bnf.cpp
│ │ ├── highlight_c.cpp
│ │ ├── highlight_c_test.cpp
│ │ ├── highlight_cmake.cpp
│ │ ├── highlight_cobol.cpp
│ │ ├── highlight_create.gperf
│ │ ├── highlight_csharp.cpp
│ │ ├── highlight_css.cpp
│ │ ├── highlight_d.cpp
│ │ ├── highlight_forth.cpp
│ │ ├── highlight_fortran.cpp
│ │ ├── highlight_go.cpp
│ │ ├── highlight_haskell.cpp
│ │ ├── highlight_html.cpp
│ │ ├── highlight_java.cpp
│ │ ├── highlight_js.cpp
│ │ ├── highlight_julia.cpp
│ │ ├── highlight_kotlin.cpp
│ │ ├── highlight_ld.cpp
│ │ ├── highlight_lisp.cpp
│ │ ├── highlight_lua.cpp
│ │ ├── highlight_m4.cpp
│ │ ├── highlight_make.cpp
│ │ ├── highlight_markdown.cpp
│ │ ├── highlight_matlab.cpp
│ │ ├── highlight_ocaml.cpp
│ │ ├── highlight_pascal.cpp
│ │ ├── highlight_perl.cpp
│ │ ├── highlight_php.cpp
│ │ ├── highlight_python.cpp
│ │ ├── highlight_python_test.cpp
│ │ ├── highlight_r.cpp
│ │ ├── highlight_ruby.cpp
│ │ ├── highlight_rust.cpp
│ │ ├── highlight_scala.cpp
│ │ ├── highlight_shell.cpp
│ │ ├── highlight_sql.cpp
│ │ ├── highlight_swift.cpp
│ │ ├── highlight_tcl.cpp
│ │ ├── highlight_test.cpp
│ │ ├── highlight_tex.cpp
│ │ ├── highlight_txt.cpp
│ │ ├── highlight_typescript.cpp
│ │ ├── highlight_zig.cpp
│ │ ├── is_keyword_ada.gperf
│ │ ├── is_keyword_ada_constant.gperf
│ │ ├── is_keyword_asm_prefix.gperf
│ │ ├── is_keyword_asm_qualifier.gperf
│ │ ├── is_keyword_basic.gperf
│ │ ├── is_keyword_basic_builtin.gperf
│ │ ├── is_keyword_basic_constant.gperf
│ │ ├── is_keyword_basic_type.gperf
│ │ ├── is_keyword_c.gperf
│ │ ├── is_keyword_c_builtin.gperf
│ │ ├── is_keyword_c_constant.gperf
│ │ ├── is_keyword_c_pod.gperf
│ │ ├── is_keyword_c_type.gperf
│ │ ├── is_keyword_cmake.gperf
│ │ ├── is_keyword_cobol.gperf
│ │ ├── is_keyword_cpp.gperf
│ │ ├── is_keyword_csharp.gperf
│ │ ├── is_keyword_csharp_constant.gperf
│ │ ├── is_keyword_css_at.gperf
│ │ ├── is_keyword_css_bang.gperf
│ │ ├── is_keyword_cxx.gperf
│ │ ├── is_keyword_d.gperf
│ │ ├── is_keyword_d_constant.gperf
│ │ ├── is_keyword_forth.gperf
│ │ ├── is_keyword_forth_def.gperf
│ │ ├── is_keyword_fortran.gperf
│ │ ├── is_keyword_fortran_builtin.gperf
│ │ ├── is_keyword_fortran_type.gperf
│ │ ├── is_keyword_go.gperf
│ │ ├── is_keyword_go_type.gperf
│ │ ├── is_keyword_haskell.gperf
│ │ ├── is_keyword_java.gperf
│ │ ├── is_keyword_java_constant.gperf
│ │ ├── is_keyword_js.gperf
│ │ ├── is_keyword_js_builtin.gperf
│ │ ├── is_keyword_js_constant.gperf
│ │ ├── is_keyword_julia.gperf
│ │ ├── is_keyword_kotlin.gperf
│ │ ├── is_keyword_ld.gperf
│ │ ├── is_keyword_ld_builtin.gperf
│ │ ├── is_keyword_ld_warning.gperf
│ │ ├── is_keyword_lisp.gperf
│ │ ├── is_keyword_lua.gperf
│ │ ├── is_keyword_lua_builtin.gperf
│ │ ├── is_keyword_lua_constant.gperf
│ │ ├── is_keyword_m4.gperf
│ │ ├── is_keyword_make.gperf
│ │ ├── is_keyword_make_builtin.gperf
│ │ ├── is_keyword_matlab.gperf
│ │ ├── is_keyword_matlab_builtin.gperf
│ │ ├── is_keyword_matlab_constant.gperf
│ │ ├── is_keyword_ocaml.gperf
│ │ ├── is_keyword_ocaml_builtin.gperf
│ │ ├── is_keyword_ocaml_constant.gperf
│ │ ├── is_keyword_pascal.gperf
│ │ ├── is_keyword_pascal_builtin.gperf
│ │ ├── is_keyword_pascal_type.gperf
│ │ ├── is_keyword_perl.gperf
│ │ ├── is_keyword_php.gperf
│ │ ├── is_keyword_php_constant.gperf
│ │ ├── is_keyword_python.gperf
│ │ ├── is_keyword_python_builtin.gperf
│ │ ├── is_keyword_python_constant.gperf
│ │ ├── is_keyword_r.gperf
│ │ ├── is_keyword_r_builtin.gperf
│ │ ├── is_keyword_r_constant.gperf
│ │ ├── is_keyword_ruby.gperf
│ │ ├── is_keyword_ruby_builtin.gperf
│ │ ├── is_keyword_ruby_constant.gperf
│ │ ├── is_keyword_rust.gperf
│ │ ├── is_keyword_rust_constant.gperf
│ │ ├── is_keyword_rust_type.gperf
│ │ ├── is_keyword_scala.gperf
│ │ ├── is_keyword_shell.gperf
│ │ ├── is_keyword_shell_builtin.gperf
│ │ ├── is_keyword_sql.gperf
│ │ ├── is_keyword_sql_type.gperf
│ │ ├── is_keyword_swift.gperf
│ │ ├── is_keyword_swift_builtin.gperf
│ │ ├── is_keyword_swift_constant.gperf
│ │ ├── is_keyword_swift_type.gperf
│ │ ├── is_keyword_tcl.gperf
│ │ ├── is_keyword_tcl_builtin.gperf
│ │ ├── is_keyword_tcl_type.gperf
│ │ ├── is_keyword_typescript.gperf
│ │ ├── is_keyword_typescript_type.gperf
│ │ ├── is_keyword_zig.gperf
│ │ ├── is_keyword_zig_builtin.gperf
│ │ ├── is_keyword_zig_constant.gperf
│ │ ├── is_keyword_zig_type.gperf
│ │ ├── util.cpp
│ │ └── util.h
│ ├── image.cpp
│ ├── image.h
│ ├── iqk_mul_mat.inc
│ ├── iqk_mul_mat_amd_avx2.cpp
│ ├── iqk_mul_mat_amd_zen4.cpp
│ ├── iqk_mul_mat_arm82.cpp
│ ├── llama.cpp
│ ├── llama.h
│ ├── llamafile.c
│ ├── llamafile.h
│ ├── macros.h
│ ├── main.cpp
│ ├── metal.c
│ ├── rocm.sh
│ ├── sgemm.cpp
│ ├── sgemm.h
│ ├── string.cpp
│ ├── string.h
│ ├── tinyblas-compat.h
│ ├── tinyblas.cu
│ ├── tinyblas.h
│ ├── tinyblas_cpu.h
│ ├── tinyblas_cpu_mixmul.inc
│ ├── tinyblas_cpu_mixmul_amd_avx.cpp
│ ├── tinyblas_cpu_mixmul_amd_avx2.cpp
│ ├── tinyblas_cpu_mixmul_amd_avx512f.cpp
│ ├── tinyblas_cpu_mixmul_amd_avxvnni.cpp
│ ├── tinyblas_cpu_mixmul_amd_fma.cpp
│ ├── tinyblas_cpu_mixmul_amd_zen4.cpp
│ ├── tinyblas_cpu_mixmul_arm80.cpp
│ ├── tinyblas_cpu_mixmul_arm82.cpp
│ ├── tinyblas_cpu_sgemm.inc
│ ├── tinyblas_cpu_sgemm_amd_avx.cpp
│ ├── tinyblas_cpu_sgemm_amd_avx2.cpp
│ ├── tinyblas_cpu_sgemm_amd_avx512f.cpp
│ ├── tinyblas_cpu_sgemm_amd_avxvnni.cpp
│ ├── tinyblas_cpu_sgemm_amd_fma.cpp
│ ├── tinyblas_cpu_sgemm_amd_zen4.cpp
│ ├── tinyblas_cpu_sgemm_arm80.cpp
│ ├── tinyblas_cpu_sgemm_arm82.cpp
│ ├── tinyblas_cpu_unsupported.cpp
│ ├── version.h
│ ├── xterm.cpp
│ ├── xterm.h
│ ├── zip.c
│ └── zip.h
├── localscore/
│ ├── BUILD.mk
│ ├── README.md
│ ├── apple.cpp
│ ├── apple.h
│ ├── ascii_digits.h
│ ├── benchmark.cpp
│ ├── benchmark.h
│ ├── cmd.cpp
│ ├── cmd.h
│ ├── cuda.bat
│ ├── cuda.sh
│ ├── doc/
│ │ └── troubleshooting.md
│ ├── http.cpp
│ ├── http.h
│ ├── localscore.cpp
│ ├── localscore.h
│ ├── main.cpp
│ ├── net.h
│ ├── nvml.cpp
│ ├── nvml.h
│ ├── powersampler.cpp
│ ├── powersampler.h
│ ├── printer.cpp
│ ├── printer.h
│ ├── rsmi.cpp
│ ├── rsmi.h
│ ├── system.cpp
│ ├── system.h
│ └── utils.h
├── mkdocs.yml
├── models/
│ └── TinyLLama-v0.1-5M-F16.gguf
├── stable-diffusion.cpp.patches/
│ ├── apply-patches.sh
│ ├── llamafile-files/
│ │ ├── BUILD.mk
│ │ ├── README.llamafile
│ │ ├── darts.h
│ │ ├── main.cpp
│ │ ├── miniz.h
│ │ ├── zip.c
│ │ └── zip.h
│ └── patches/
│ └── save.patch
├── tests/
│ ├── BUILD.mk
│ ├── extract_data_uris_test.cpp
│ ├── integration/
│ │ ├── README.md
│ │ ├── conftest.py
│ │ ├── pyproject.toml
│ │ ├── run_tests.sh
│ │ ├── tests/
│ │ │ ├── test_cli.py
│ │ │ ├── test_combined.py
│ │ │ ├── test_gpu.py
│ │ │ ├── test_multimodal.py
│ │ │ ├── test_server.py
│ │ │ ├── test_tool_calling.py
│ │ │ └── test_tui.py
│ │ └── utils/
│ │ └── llamafile.py
│ └── sgemm/
│ ├── BUILD.mk
│ ├── iqk_test.cpp
│ ├── q8_0_layout_test.cpp
│ ├── sgemm_matmul_test.cpp
│ ├── sgemm_sss_test.cpp
│ ├── sgemm_test_utils.h
│ └── sgemm_vecdot_test.cpp
├── third_party/
│ ├── BUILD.mk
│ ├── double-conversion/
│ │ ├── .gitignore
│ │ ├── AUTHORS
│ │ ├── BUILD.mk
│ │ ├── LICENSE
│ │ ├── README.llamafile
│ │ ├── SConscript
│ │ ├── bignum-dtoa.cc
│ │ ├── bignum-dtoa.h
│ │ ├── bignum.cc
│ │ ├── bignum.h
│ │ ├── cached-powers.cc
│ │ ├── cached-powers.h
│ │ ├── diy-fp.h
│ │ ├── double-conversion.h
│ │ ├── double-to-string.cc
│ │ ├── double-to-string.h
│ │ ├── fast-dtoa.cc
│ │ ├── fast-dtoa.h
│ │ ├── fixed-dtoa.cc
│ │ ├── fixed-dtoa.h
│ │ ├── ieee.h
│ │ ├── string-to-double.cc
│ │ ├── string-to-double.h
│ │ ├── strtod.cc
│ │ ├── strtod.h
│ │ └── utils.h
│ ├── mbedtls/
│ │ ├── BUILD.mk
│ │ ├── LICENSE
│ │ ├── README.cosmo
│ │ ├── README.llamafile
│ │ ├── aes.c
│ │ ├── aes.h
│ │ ├── aesce.c
│ │ ├── aesce.h
│ │ ├── aesni.c
│ │ ├── aesni.h
│ │ ├── asn1.h
│ │ ├── asn1parse.c
│ │ ├── asn1write.c
│ │ ├── asn1write.h
│ │ ├── base64.c
│ │ ├── base64.h
│ │ ├── bigmul.c
│ │ ├── bigmul4.c
│ │ ├── bignum.c
│ │ ├── bignum.h
│ │ ├── bignum_internal.h
│ │ ├── bigshift.c
│ │ ├── blake2b256.c
│ │ ├── ccm.c
│ │ ├── ccm.h
│ │ ├── certs.c
│ │ ├── certs.h
│ │ ├── chacha20.c
│ │ ├── chacha20.h
│ │ ├── chachapoly.c
│ │ ├── chachapoly.h
│ │ ├── check.inc
│ │ ├── chk.h
│ │ ├── cipher.c
│ │ ├── cipher.h
│ │ ├── cipher_internal.h
│ │ ├── cipher_wrap.c
│ │ ├── common.h
│ │ ├── config.h
│ │ ├── ctr_drbg.c
│ │ ├── ctr_drbg.h
│ │ ├── debug.c
│ │ ├── debug.h
│ │ ├── des.c
│ │ ├── des.h
│ │ ├── describecode.c
│ │ ├── dhm.c
│ │ ├── dhm.h
│ │ ├── ecdh.c
│ │ ├── ecdh.h
│ │ ├── ecdh_everest.c
│ │ ├── ecdh_everest.h
│ │ ├── ecdsa.c
│ │ ├── ecdsa.h
│ │ ├── ecp.c
│ │ ├── ecp.h
│ │ ├── ecp256.c
│ │ ├── ecp384.c
│ │ ├── ecp_curves.c
│ │ ├── ecp_internal.h
│ │ ├── ecpshl.c
│ │ ├── endian.h
│ │ ├── entropy.c
│ │ ├── entropy.h
│ │ ├── entropy_poll.c
│ │ ├── entropy_poll.h
│ │ ├── error.c
│ │ ├── error.h
│ │ ├── everest.c
│ │ ├── everest.h
│ │ ├── fastdiv.h
│ │ ├── formatclientciphers.c
│ │ ├── gcm.c
│ │ ├── gcm.h
│ │ ├── getalertdescription.c
│ │ ├── getciphersuite.c
│ │ ├── getciphersuitename.c
│ │ ├── getsslstatename.c
│ │ ├── hkdf.c
│ │ ├── hkdf.h
│ │ ├── hmac_drbg.c
│ │ ├── hmac_drbg.h
│ │ ├── iana.h
│ │ ├── isciphersuitegood.c
│ │ ├── karatsuba.c
│ │ ├── math.h
│ │ ├── md.c
│ │ ├── md.h
│ │ ├── md5.c
│ │ ├── md5.h
│ │ ├── md5t.c
│ │ ├── mdtype.c
│ │ ├── memory_buffer_alloc.c
│ │ ├── memory_buffer_alloc.h
│ │ ├── net_sockets.c
│ │ ├── net_sockets.h
│ │ ├── nist_kw.c
│ │ ├── nist_kw.h
│ │ ├── notice.c
│ │ ├── oid.c
│ │ ├── oid.h
│ │ ├── param.c
│ │ ├── pem.c
│ │ ├── pem.h
│ │ ├── pk.c
│ │ ├── pk.h
│ │ ├── pk_internal.h
│ │ ├── pk_wrap.c
│ │ ├── pkcs5.c
│ │ ├── pkcs5.h
│ │ ├── pkparse.c
│ │ ├── pktype.c
│ │ ├── pkwrite.c
│ │ ├── platform.c
│ │ ├── platform.h
│ │ ├── poly1305.c
│ │ ├── poly1305.h
│ │ ├── profile.h
│ │ ├── rando.c
│ │ ├── rsa.c
│ │ ├── rsa.h
│ │ ├── rsa_internal.c
│ │ ├── rsa_internal.h
│ │ ├── san.c
│ │ ├── san.h
│ │ ├── secp256r1.c
│ │ ├── secp384r1.c
│ │ ├── select.h
│ │ ├── sha1.c
│ │ ├── sha1.h
│ │ ├── sha1t.c
│ │ ├── sha256.c
│ │ ├── sha256.h
│ │ ├── sha256t.c
│ │ ├── sha512.c
│ │ ├── sha512.h
│ │ ├── sha512t.c
│ │ ├── shakedescription.c
│ │ ├── shiftright-avx.c
│ │ ├── shiftright.c
│ │ ├── sigalg.c
│ │ ├── speed.sh
│ │ ├── srtp.c
│ │ ├── ssl.h
│ │ ├── ssl_cache.c
│ │ ├── ssl_cache.h
│ │ ├── ssl_ciphersuites.c
│ │ ├── ssl_ciphersuites.h
│ │ ├── ssl_cli.c
│ │ ├── ssl_cookie.c
│ │ ├── ssl_cookie.h
│ │ ├── ssl_internal.h
│ │ ├── ssl_invasive.h
│ │ ├── ssl_msg.c
│ │ ├── ssl_srv.c
│ │ ├── ssl_ticket.c
│ │ ├── ssl_ticket.h
│ │ ├── ssl_tls.c
│ │ ├── ssl_tls13_keys.c
│ │ ├── ssl_tls13_keys.h
│ │ ├── sslroot/
│ │ │ ├── amazon.pem
│ │ │ ├── certum.pem
│ │ │ ├── comodo.pem
│ │ │ ├── digicert.pem
│ │ │ ├── geotrust.pem
│ │ │ ├── globalsign.pem
│ │ │ ├── godaddy.pem
│ │ │ ├── google.pem
│ │ │ ├── isrg.pem
│ │ │ ├── quovadis.pem
│ │ │ ├── redbean.pem
│ │ │ ├── show.sh
│ │ │ ├── starfield.pem
│ │ │ └── verisign.pem
│ │ ├── sslroots.c
│ │ ├── version.h
│ │ ├── x509.c
│ │ ├── x509.h
│ │ ├── x509_create.c
│ │ ├── x509_crl.c
│ │ ├── x509_crl.h
│ │ ├── x509_crt.c
│ │ ├── x509_crt.h
│ │ ├── x509_csr.c
│ │ ├── x509_csr.h
│ │ ├── x509write_crt.c
│ │ ├── x509write_csr.c
│ │ └── zeroize.c
│ ├── sqlite/
│ │ ├── BUILD.mk
│ │ ├── README.llamafile
│ │ ├── shell.c
│ │ ├── sqlite3.c
│ │ ├── sqlite3.h
│ │ └── sqlite3ext.h
│ └── stb/
│ ├── BUILD.mk
│ ├── README.llamafile
│ ├── stb_image.c
│ ├── stb_image.h
│ ├── stb_image_resize2.c
│ ├── stb_image_resize2.h
│ ├── stb_image_write.c
│ ├── stb_image_write.h
│ ├── stb_vorbis.c
│ └── stb_vorbis.h
├── tools/
│ ├── check_patches.sh
│ └── generate_patches.sh
├── whisper.cpp.patches/
│ ├── apply-patches.sh
│ ├── llamafile-files/
│ │ └── BUILD.mk
│ ├── patches/
│ │ ├── examples_cli_cli.cpp.patch
│ │ ├── examples_common.cpp.patch
│ │ ├── examples_miniaudio.h.patch
│ │ ├── examples_server_server.cpp.patch
│ │ └── ggml_src_ggml-backend-reg.cpp.patch
│ └── renames.sh
└── whisperfile/
├── BUILD.mk
├── color.cpp
├── color.h
├── mic2raw.cpp
├── mic2txt.cpp
├── slurp.cpp
├── slurp.h
├── stream.cpp
├── whisper-server.1
├── whisper-server.cpp
├── whisperfile.1
└── whisperfile.cpp
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/01-bug-low.yml
================================================
name: Low Severity Bugs
description: Used to report low severity bugs in llamafiles (e.g. cosmetic issues, non critical UI glitches)
title: "Bug: "
labels: ["bug", "low severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llamafiles that you are using.
If possible, please provide a minimal code example that reproduces the bug.
You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
for additional technical logging that may allow us to narrow down where the fault occurred.
- type: input
id: contact
attributes:
label: Contact Details
description: How can we get in touch with you if we need more info?
placeholder: ex. email@example.com
validations:
required: false
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: What version of our software are you running? (use `--version` to get a version string)
placeholder: "llamafile v0.8.4"
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- FreeBSD
- OpenBSD
- NetBSD
- BIOS
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/02-bug-medium.yml
================================================
name: Medium Severity Bug
description: Used to report medium severity bugs in llamafiles (e.g. Malfunctioning Features but generally still useable)
title: "Bug: "
labels: ["bug", "medium severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llamafiles that you are using.
If possible, please provide a minimal code example that reproduces the bug.
You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
for additional technical logging that may allow us to narrow down where the fault occurred.
- type: input
id: contact
attributes:
label: Contact Details
description: How can we get in touch with you if we need more info?
placeholder: ex. email@example.com
validations:
required: false
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: What version of our software are you running? (use `--version` to get a version string)
placeholder: "llamafile v0.8.4"
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- FreeBSD
- OpenBSD
- NetBSD
- BIOS
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/03-bug-high.yml
================================================
name: High Severity Bug
description: Used to report high severity bugs in llamafiles (e.g. Malfunctioning features hindering important common workflow)
title: "Bug: "
labels: ["bug", "high severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llamafiles that you are using.
If possible, please provide a minimal code example that reproduces the bug.
You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
for additional technical logging that may allow us to narrow down where the fault occurred.
- type: input
id: contact
attributes:
label: Contact Details
description: How can we get in touch with you if we need more info?
placeholder: ex. email@example.com
validations:
required: false
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: What version of our software are you running? (use `--version` to get a version string)
placeholder: "llamafile v0.8.4"
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- FreeBSD
- OpenBSD
- NetBSD
- BIOS
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/04-bug-critical.yml
================================================
name: Critical Severity Bug
description: Used to report critical severity bugs in llamafiles (e.g. Crashing, Corrupted, Dataloss)
title: "Bug: "
labels: ["bug", "critical severity"]
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
Please include information about your system, the steps to reproduce the bug,
and the version of llamafiles that you are using.
If possible, please provide a minimal code example that reproduces the bug.
You may also consider using function call tracing `--ftrace` or the lighter system call tracing `--strace`
for additional technical logging that may allow us to narrow down where the fault occurred.
- type: input
id: contact
attributes:
label: Contact Details
description: How can we get in touch with you if we need more info?
placeholder: ex. email@example.com
validations:
required: false
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Also tell us, what did you expect to happen?
placeholder: Tell us what you see!
validations:
required: true
- type: textarea
id: version
attributes:
label: Version
description: What version of our software are you running? (use `--version` to get a version string)
placeholder: "llamafile v0.8.4"
validations:
required: true
- type: dropdown
id: operating-system
attributes:
label: What operating system are you seeing the problem on?
multiple: true
options:
- Linux
- Mac
- Windows
- FreeBSD
- OpenBSD
- NetBSD
- BIOS
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
render: shell
================================================
FILE: .github/ISSUE_TEMPLATE/05-enhancement.yml
================================================
name: Enhancement template
description: Used to request enhancements for llamafiles
title: "Feature Request: "
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/Mozilla-Ocho/llamafile/discussions/categories/ideas)
- type: checkboxes
id: prerequisites
attributes:
label: Prerequisites
description: Please confirm the following before submitting your enhancement request.
options:
- label: I am running the latest code. Mention the version if possible as well.
required: true
- label: I carefully followed the [README.md](https://github.com/Mozilla-Ocho/llamafile/blob/master/README.md).
required: true
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
required: true
- label: I reviewed the [Discussions](https://github.com/Mozilla-Ocho/llamafile/discussions), and have a new and useful enhancement to share.
required: true
- type: textarea
id: feature-description
attributes:
label: Feature Description
description: Please provide a detailed written description of what you were trying to do, and what you expected `llamafiles` to do as an enhancement.
placeholder: Detailed description of the enhancement
validations:
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llamafiles` users.
placeholder: Explanation of why this feature is needed and its benefits
validations:
required: true
- type: textarea
id: possible-implementation
attributes:
label: Possible Implementation
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
placeholder: Detailed description of potential implementation
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/06-refactor.yml
================================================
name: Refactor (Maintainers)
description: Used to track refactoring opportunities
title: "Refactor: "
labels: ["refactor"]
body:
- type: markdown
attributes:
value: |
Don't forget to [check for existing refactor issue tickets](https://github.com/Mozilla-Ocho/llamafile/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
Also you may want to check [Pull request refactor label as well](https://github.com/Mozilla-Ocho/llamafile/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
- type: textarea
id: background-description
attributes:
label: Background Description
description: Please provide a detailed written description of the pain points you are trying to solve.
placeholder: Detailed description behind your motivation to request refactor
validations:
required: true
- type: textarea
id: possible-approaches
attributes:
label: Possible Refactor Approaches
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
placeholder: Your idea of possible refactoring opportunity/approaches
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/07-refactor.yml
================================================
name: Refactor (Maintainers)
description: Used to track refactoring opportunities
title: "Refactor: "
labels: ["refactor"]
body:
- type: markdown
attributes:
value: |
Don't forget to [check for existing refactor issue tickets](https://github.com/Mozilla-Ocho/llamafile/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
Also you may want to check [Pull request refactor label as well](https://github.com/Mozilla-Ocho/llamafile/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
- type: textarea
id: background-description
attributes:
label: Background Description
description: Please provide a detailed written description of the pain points you are trying to solve.
placeholder: Detailed description behind your motivation to request refactor
validations:
required: true
- type: textarea
id: possible-approaches
attributes:
label: Possible Refactor Approaches
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
placeholder: Your idea of possible refactoring opportunity/approaches
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
- name: Got an idea?
url: https://github.com/Mozilla-Ocho/llamafile/discussions/categories/ideas
about: Pop it there. It may then become an enhancement ticket.
- name: Got a question?
url: https://github.com/Mozilla-Ocho/llamafile/discussions/categories/q-a
about: Ask a question there!
- name: Is your problem more about the underlying llama.cpp engine?
url: https://github.com/ggerganov/llama.cpp/issues/new/choose
about: Head to the llama.cpp reporting page instead
================================================
FILE: .github/labeler.yml
================================================
# https://github.com/actions/labeler
documentation:
- changed-files:
- any-glob-to-any-file:
- README.md
- LICENSE
- docs/**
testing:
- changed-files:
- any-glob-to-any-file:
- tests/**
build:
- changed-files:
- any-glob-to-any-file:
- cmake/**
- CMakeLists.txt
- CMakePresets.json
- codecov.yml
llama.cpp:
- changed-files:
- any-glob-to-any-file: llama.cpp/**
llamafile:
- changed-files:
- any-glob-to-any-file: llamafile/**
devops:
- changed-files:
- any-glob-to-any-file:
- .devops/**
- .github/**
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
workflow_dispatch:
push:
branches: [ master, main, fix ]
pull_request:
branches: [ master, main, fix ]
jobs:
ubuntu-focal-make:
timeout-minutes: 60
runs-on: ubuntu-latest
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
- name: Dependencies
id: depends
run: |
sudo apt-get update
sudo apt-get install make patch
- name: Setup dependencies and patch dependencies
run: |
make setup
- name: Cache cosmocc toolchain
id: cache-cosmocc-toolchain
uses: actions/cache@v4
env:
cache-name: cache-cosmocc-toolchain
with:
path: |
.cosmocc
o/depend
o/depend.test
key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/config.mk') }}
restore-keys: |
${{ runner.os }}-build-${{ env.cache-name }}
- name: Setup cosmocc and ape loader
run: |
sudo make cosmocc-ci PREFIX=/usr
- name: Build
run: |
sudo make -j $(nproc)
- name: Make Llamafile
run: |
cp ./models/TinyLLama-v0.1-5M-F16.gguf tinyllama.gguf
cat << EoF > .args
-m
tinyllama.gguf
...
EoF
cp o//llamafile/llamafile \
tinyllama.llamafile
o//third_party/zipalign/zipalign -j0 \
tinyllama.llamafile \
tinyllama.gguf \
.args
- name: Execute LLM CLI CPU
run: |
./tinyllama.llamafile --cli -p '## Famous Speech\n\nFour score and seven'
================================================
FILE: .github/workflows/docs.yml
================================================
name: Documentation
on:
push:
branches: [main]
paths:
- mkdocs.yml
- 'docs/**'
- '.github/workflows/docs.yml'
pull_request:
paths:
- mkdocs.yml
- 'docs/**'
- '.github/workflows/docs.yml'
workflow_dispatch:
jobs:
docs:
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Configure git
run: |
git config user.name 'github-actions[bot]'
git config user.email 'github-actions[bot]@users.noreply.github.com'
- name: Install dependencies
run: |
pip install mkdocs-material
- name: Build docs
if: github.event_name == 'pull_request'
run: mkdocs build -s
- name: Publish docs
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
run: mkdocs gh-deploy --force
================================================
FILE: .github/workflows/editorconfig.yml
================================================
name: EditorConfig Checker
on:
workflow_dispatch: # allows manual triggering
inputs:
create_release:
description: 'Create new release'
required: true
type: boolean
push:
branches:
- master
pull_request:
branches:
- master
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
editorconfig:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: editorconfig-checker/action-editorconfig-checker@main
- run: editorconfig-checker
================================================
FILE: .github/workflows/labeler.yml
================================================
name: "Pull Request Labeler"
on:
- pull_request_target
jobs:
labeler:
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
repository: "Mozilla-Ocho/llamafile"
- uses: actions/labeler@v5
with:
configuration-path: '.github/labeler.yml'
================================================
FILE: .github/workflows/update-llama-cpp.yml
================================================
name: Update llama.cpp submodule
on:
schedule:
- cron: '0 0 * * 1'
workflow_dispatch:
jobs:
update-submodule:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
- name: Configure git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
- name: Check for submodule updates
id: check
run: |
cd llama.cpp
CURRENT_COMMIT=$(git rev-parse HEAD)
echo "Current commit: $CURRENT_COMMIT"
git fetch origin master
LATEST_COMMIT=$(git rev-parse origin/master)
echo "Latest commit: $LATEST_COMMIT"
if [ "$CURRENT_COMMIT" != "$LATEST_COMMIT" ]; then
echo "needs_update=true" >> "$GITHUB_OUTPUT"
echo "current_commit=$CURRENT_COMMIT" >> "$GITHUB_OUTPUT"
echo "latest_commit=$LATEST_COMMIT" >> "$GITHUB_OUTPUT"
echo "current_short=$(echo $CURRENT_COMMIT | cut -c1-7)" >> "$GITHUB_OUTPUT"
echo "latest_short=$(echo $LATEST_COMMIT | cut -c1-7)" >> "$GITHUB_OUTPUT"
else
echo "needs_update=false" >> "$GITHUB_OUTPUT"
echo "Submodule is up to date"
fi
- name: Update submodule and create PR
if: steps.check.outputs.needs_update == 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
BRANCH_NAME="update-llama-cpp-${{ steps.check.outputs.latest_short }}"
if git ls-remote --heads origin "$BRANCH_NAME" | grep -q "$BRANCH_NAME"; then
echo "Branch $BRANCH_NAME already exists, skipping"
exit 0
fi
EXISTING_PR=$(gh pr list --search "Update llama.cpp submodule" --state open --json number --jq '.[0].number')
if [ -n "$EXISTING_PR" ]; then
echo "PR #$EXISTING_PR already exists for updating llama.cpp, skipping"
exit 0
fi
git checkout -b "$BRANCH_NAME"
cd llama.cpp
git checkout origin/master
cd ..
git add llama.cpp
git commit -m "Update llama.cpp submodule to ${{ steps.check.outputs.latest_short }}"
git push origin "$BRANCH_NAME"
gh pr create \
--title "Update llama.cpp submodule to ${{ steps.check.outputs.latest_short }}" \
--body "This PR updates the llama.cpp submodule from \`${{ steps.check.outputs.current_short }}\` to \`${{ steps.check.outputs.latest_short }}\`.
**Changes:** https://github.com/ggerganov/llama.cpp/compare/${{ steps.check.outputs.current_commit }}...${{ steps.check.outputs.latest_commit }}
---
*This PR was automatically created by the update-llama-cpp workflow.*" \
--head "$BRANCH_NAME"
================================================
FILE: .gitignore
================================================
# -*- conf -*-
/o
/.cosmocc
/TAGS
/HTAGS
/cosmocc
/perf.data
/perf.data.old
/trace.json
/*.log
/*.bin
/*.mp3
.claude
CLAUDE.md
# python
*.pyc
__init__.py
uv.lock
================================================
FILE: .gitmodules
================================================
[submodule "whisper.cpp"]
path = whisper.cpp
url = https://github.com/ggerganov/whisper.cpp.git
[submodule "stable-diffusion.cpp"]
path = stable-diffusion.cpp
url = https://github.com/leejet/stable-diffusion.cpp.git
[submodule "llama.cpp"]
path = llama.cpp
url = https://github.com/ggerganov/llama.cpp.git
[submodule "third_party/zipalign"]
path = third_party/zipalign
url = https://github.com/jart/zipalign.git
================================================
FILE: .llamafile_plugin/.claude-plugin/marketplace.json
================================================
{
"name": "llamafile-local",
"description": "Local llamafile plugin marketplace",
"owner": {
"name": "Mozilla AI",
"email": "davide@mozilla.ai"
},
"plugins": [
{
"name": "llamafile",
"description": "Build guidance and commands for the llamafile project",
"version": "0.1.1",
"author": {
"name": "Mozilla AI",
"email": "davide@mozilla.ai"
},
"source": "./"
}
]
}
================================================
FILE: .llamafile_plugin/.claude-plugin/plugin.json
================================================
{
"name": "llamafile",
"version": "0.1.1",
"description": "Build guidance and commands for the llamafile project"
}
================================================
FILE: LICENSE
================================================
The Apache 2.0 License
Copyright 2023 Mozilla Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: Makefile
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
SHELL = /bin/sh
MAKEFLAGS += --no-builtin-rules
.SUFFIXES:
.DELETE_ON_ERROR:
.FEATURES: output-sync
# setup and reset-repo targets need to run before build/config.mk checks make version
ifeq ($(filter $(MAKECMDGOALS),setup reset-repo claude),)
include build/config.mk
include build/rules.mk
include third_party/BUILD.mk
include llama.cpp/BUILD.mk
include whisper.cpp/BUILD.mk
include llamafile/BUILD.mk
include whisperfile/BUILD.mk
include tests/BUILD.mk
endif
# the root package is `o//` by default
# building a package also builds its sub-packages
.PHONY: o/$(MODE)/
o/$(MODE)/: o/$(MODE)/llamafile \
o/$(MODE)/llama.cpp \
o/$(MODE)/whisper.cpp \
o/$(MODE)/whisperfile \
o/$(MODE)/third_party/zipalign
.PHONY: install
install: o/$(MODE)/llamafile/llamafile
mkdir -p $(PREFIX)/bin
$(INSTALL) o/$(MODE)/llamafile/llamafile $(PREFIX)/bin/llamafile
$(INSTALL) o/$(MODE)/whisperfile/whisperfile $(PREFIX)/bin/whisperfile
$(INSTALL) o/$(MODE)/third_party/zipalign/zipalign $(PREFIX)/bin/zipalign
.PHONY: check
check: o/$(MODE)/tests
# ==============================================================================
# GPU Backend Targets
# ==============================================================================
# These targets build GPU backend shared libraries that can be loaded at runtime.
# They pass GGML_VERSION and GGML_COMMIT from build/config.mk to the build scripts.
.PHONY: cuda
cuda: # Build CUDA backend with TinyBLAS (NVIDIA GPUs)
GGML_VERSION=$(GGML_VERSION) GGML_COMMIT=$(GGML_COMMIT) llamafile/cuda.sh
.PHONY: cublas
cublas: # Build CUDA backend with cuBLAS (NVIDIA GPUs, requires cuBLAS at runtime)
GGML_VERSION=$(GGML_VERSION) GGML_COMMIT=$(GGML_COMMIT) llamafile/cuda.sh --cublas
.PHONY: rocm
rocm: # Build ROCm backend with TinyBLAS (AMD GPUs)
GGML_VERSION=$(GGML_VERSION) GGML_COMMIT=$(GGML_COMMIT) llamafile/rocm.sh
.PHONY: cosmocc
cosmocc: $(COSMOCC) # cosmocc toolchain setup
.PHONY: cosmocc-ci
cosmocc-ci: $(COSMOCC) $(PREFIX)/bin/ape # cosmocc toolchain setup in ci context
.PHONY: setup
setup: # Initialize and configure all dependencies (submodules, patches, etc.)
@echo "Setting up dependencies..."
@mkdir -p o/tmp
@if [ ! -f whisper.cpp/.git ]; then \
echo "Initializing whisper.cpp submodule..."; \
git submodule update --init whisper.cpp; \
fi
@echo "Applying whisper.cpp patches..."
@export TMPDIR=$$(pwd)/o/tmp && ./whisper.cpp.patches/apply-patches.sh
@if [ ! -f stable-diffusion.cpp/.git ]; then \
echo "Initializing stable-diffusion.cpp submodule..."; \
git submodule update --init stable-diffusion.cpp; \
fi
@echo "Applying stable-diffusion.cpp patches..."
@export TMPDIR=$$(pwd)/o/tmp && ./stable-diffusion.cpp.patches/apply-patches.sh
@if [ ! -f llama.cpp/.git ]; then \
echo "Initializing llama.cpp submodule..."; \
git submodule update --init llama.cpp; \
fi
@echo "Initializing llama.cpp dependencies (nested submodules)..."
@cd llama.cpp && git submodule update --init
@echo "Applying llama.cpp patches..."
@export TMPDIR=$$(pwd)/o/tmp && ./llama.cpp.patches/apply-patches.sh
@if [ ! -f third_party/zipalign/.git ]; then \
echo "Initializing zipalign submodule..."; \
git submodule update --init third_party/zipalign; \
fi
@echo "Setup complete!"
@$(MAKE) cosmocc
.PHONY: reset-repo
reset-repo: # Reset all submodules to their original state (removes patches or any other change)
@echo "Resetting submodules to original state..."
@for dir in llama.cpp whisper.cpp stable-diffusion.cpp third_party/zipalign; do \
if [ -e "$$dir" ]; then \
echo "Removing $$dir..."; \
rm -rf "$$dir"; \
fi; \
echo "Restoring $$dir..."; \
git checkout "$$dir"; \
done
@echo "Reset complete. Run 'make setup' to reinitialize and apply patches."
.PHONY: claude
claude: # Set up CLAUDE.md symlink for Claude Code, show how to install the plugin
@if [ -e CLAUDE.md ] && [ ! -L CLAUDE.md ]; then \
echo "Error: CLAUDE.md exists and is not a symlink"; \
exit 1; \
fi
@rm -f CLAUDE.md
@ln -s docs/AGENTS.md CLAUDE.md
@echo "CLAUDE.md -> docs/AGENTS.md"
@echo ""
@echo "To install the llamafile plugin, run in Claude Code:"
@echo " /plugin marketplace add ./.llamafile_plugin"
@echo " /plugin install llamafile"
ifeq ($(filter $(MAKECMDGOALS),setup reset-repo claude),)
include build/deps.mk
include build/tags.mk
endif
================================================
FILE: README.md
================================================
# llamafile
[](https://github.com/mozilla-ai/llamafile/blob/main/LICENSE)
[](https://github.com/mozilla-ai/llamafile/actions/workflows/ci.yml)
[](https://github.com/ggml-org/llama.cpp/commit/7f5ee54)
[](https://github.com/ggml-org/whisper.cpp/commit/2eeeba5)
[](https://discord.gg/YuMNeuKStr)
[](https://builders.mozilla.org/)
**llamafile lets you distribute and run LLMs with a single file.**
llamafile is a [Mozilla Builders](https://builders.mozilla.org/) project (see its [announcement blog post](https://hacks.mozilla.org/2023/11/introducing-llamafile/)), now revamped by [Mozilla.ai](https://www.mozilla.ai/open-tools/llamafile).
Our goal is to make open LLMs much more
accessible to both developers and end users. We're doing that by
combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one
framework that collapses all the complexity of LLMs down to
a single-file executable (called a "llamafile") that runs
locally on most operating systems and CPU archiectures, with no installation.
llamafile also includes **[whisperfile](whisperfile/index.md)**, a single-file speech-to-text tool built on [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and the same Cosmopolitan packaging. It supports transcription and translation of audio files across all the same platforms, with no installation required.
## v0.10.0
**llamafile versions starting from 0.10.0 use a new build system**, aimed at keeping our code more easily
aligned with the latest versions of llama.cpp. This means they support more recent models and functionalities,
but at the same time they might be missing some of
the features you were accustomed to (check out [this doc](README_0.10.0.md) for a high-level description of what has been done). If you liked
the "classic experience" more, you will always be able to access the previous versions from our
[releases](https://github.com/mozilla-ai/llamafile/releases) page. Our pre-built llamafiles always
show which version of the server they have been bundled with ([0.9.* example](https://huggingface.co/mozilla-ai/llava-v1.5-7b-llamafile), [0.10.* example](https://huggingface.co/mozilla-ai/llamafile_0.10.0)), so you will always know
which version of the software you are downloading.
> **We want to hear from you!**
Whether you are a new user or a long-time fan, please share what you find most valuable about llamafile and what would make it more useful for you.
[Read more via the blog](https://blog.mozilla.ai/llamafile-returns/) and add your voice to the discussion [here](https://github.com/mozilla-ai/llamafile/discussions/809).
## Quick Start
Download and run your first llamafile in minutes:
```sh
# Download an example model (Qwen3.5 0.8B)
curl -LO https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-0.8B-Q8_0.llamafile
# Make it executable (macOS/Linux/BSD)
chmod +x Qwen3.5-0.8B-Q8_0.llamafile
# Run it
./Qwen3.5-0.8B-Q8_0.llamafile
```
We chose this model because that's the smallest one we have
built a llamafile for, so most likely to work out-of-the-box for you.
If you have powerful hardware and/or GPUs, [feel free to choose](example_llamafiles.md)
larger and more expressive models which should provide more accurate
responses.
**Windows users:** Rename the file to add `.exe` extension before running.
## Documentation
Check the full documentation in the [docs/](docs/) folder or online at [mozilla-ai.github.io/llamafile](https://mozilla-ai.github.io/llamafile/), or directly jump into one of the following subsections:
- [Quickstart](https://mozilla-ai.github.io/llamafile/quickstart/)
- [Example llamafiles](https://mozilla-ai.github.io/llamafile/example_llamafiles/)
- [Running a llamafile](https://mozilla-ai.github.io/llamafile/running_llamafile/)
- [Creating llamafiles](https://mozilla-ai.github.io/llamafile/creating_llamafiles/)
- [Source installation](https://mozilla-ai.github.io/llamafile/source_installation/)
- [Technical details](https://mozilla-ai.github.io/llamafile/technical_details/)
- [Supported Systems](https://mozilla-ai.github.io/llamafile/support/)
- [Troubleshooting](https://mozilla-ai.github.io/llamafile/troubleshooting/)
- [Whisperfile](https://mozilla-ai.github.io/llamafile/whisperfile/)
## Licensing
While the llamafile project is Apache 2.0-licensed, our changes
to llama.cpp and whisper.cpp are licensed under MIT (just like the projects
themselves) so as to remain compatible and upstreamable in the future,
should that be desired.
The llamafile logo on this page was generated with the assistance of DALL·E 3.
[](https://star-history.com/#Mozilla-Ocho/llamafile&Date)
================================================
FILE: README_0.10.0.md
================================================
llamafile 0.10.0 has been a work in progress for a while. Now that we are merging
its code with main, we want to leave this document available to document both the
reasons and the process behind it.
Everything started with the goal of replicating a cosmopolitan llama.cpp build from scratch,
so we could get the best of two worlds. On the one hand, some of the characteristic
features of llamafiles, that is portability across different systems and architectures
and the possibility of bundling model weights within llamafile executables. On the
other hand, the features and the model support made available by the most recent
versions of llama.cpp.
We realise that what makes a llamafile is not just an APE executable, so before
merging this code with main we wanted to bring back other of its features into the
new build. We believe there's still work to do, but now that the main features are
there we can let you play with a more modern llamafile and directly ask you what
you'd like to see the most in its future versions.
Older builds (and llamafiles built on them) will still be available, check out our
[releases](https://github.com/mozilla-ai/llamafile/releases) and our
[Example Llamafiles](/docs/example_llamafiles.md) page.
# Updates
Here are the features we brought into our development branch before merging with main.
Most of them were brought in from previous versions of llamafile, and all credit goes
to their original authors <3. Some (including new build for easier sync with upstream
llama.cpp, mtmd API support, intregration tests, skill docs, HTTP chat client for combined
mode) are new.
20260317
- Updates to [skill documents](https://github.com/mozilla-ai/llamafile/pull/886)
- Added [whisper](https://github.com/mozilla-ai/llamafile/pull/880)
- Added support for [chat, cli, server](https://github.com/mozilla-ai/llamafile/pull/896) modalities
- [Updated llama.cpp](https://github.com/mozilla-ai/llamafile/pull/901) to 7f5ee54 (with support for qwen3.5 models)
- Added [integration tests](https://github.com/mozilla-ai/llamafile/pull/906)
- Added [`--image` support to CLI](https://github.com/mozilla-ai/llamafile/pull/912)
20260219
- Added [CPU optimizations](https://github.com/mozilla-ai/llamafile/pull/868)
- Fixed misc issues
- server [timing out](https://github.com/mozilla-ai/llamafile/pull/876)
- [mmap errors](https://github.com/mozilla-ai/llamafile/pull/882) when loading bundled models
- [think mode in TUI](https://github.com/mozilla-ai/llamafile/pull/885)
- [Added "skill docs"](https://github.com/mozilla-ai/llamafile/pull/886) to be used with AI assistants
[20260202](https://github.com/mozilla-ai/llamafile/discussions/871)
- Added zipalign as a GitHub [submodule](https://github.com/mozilla-ai/llamafile/pull/848) (so we can get the latest updates from Justine’s repo)
- Brought back [cuda support](https://github.com/mozilla-ai/llamafile/pull/859) on Linux
- Added support for the [mtmd API](https://github.com/mozilla-ai/llamafile/pull/852) in the TUI (so you can now directly access modern multimodal models from the llamafile chat)
- Tested new llamafiles running models trained for tool calling (e.g. Qwen3, gpt-oss-20b) and multimodal models such as llava 1.6, Qwen3-VL and Ministral 3
[20251218](https://github.com/mozilla-ai/llamafile/discussions/845)
- added Metal support: GPU on MacOS ARM64 is supported by compiling a small module
using the Xcode Command Line Tools, which need to be installed. Check our docs at
https://mozilla-ai.github.io/llamafile/support/#gpu-support for more info.
- Metal works both in llamafile (called either as TUI or with the --server flag)
and in llama-server.
20251215
- added TUI support: you can now directly chat with the chosen LLM from
the terminal, or run the llama.cpp server using the `--server` parameter
- simplified build by removing all tools/deps except those required by
the new llamafile code (they will be added back in as soon as we reintroduce
functionalities)
20251209
- added BUILD.mk so we can do without cmake
- build works with cosmocc 4.0.2
- dependencies are all taken from llama.cpp/vendor directory
- building now works both on linux and mac
20251208
- updated to llama.cpp commit dbc15a79672e72e0b9c1832adddf3334f5c9229c
20251124
- first version, relying on cmake for the build
# What's missing
- GPU support for Windows (and for whisperfile)
- stable diffusion (the code is there, but has not been ported to the new build format yet)
- some features triggered by extra arguments in CLI mode
- pledge() SECCOMP sandboxing
- localscore
- llamafiler for embeddings (we rolled back to llama.cpp's embeddings endpoint instead)
- ... please help us track if there's anything missing you wish to see in the new build!
================================================
FILE: RELEASE.md
================================================
# Making a llamafile Release
There are a few steps in making a llamafile release which will be detailed in this document.
The two primary artifacts of the release are the `llamafile-.zip` and the binaries for the GitHub release.
## Release Process
Note: Step 2 is only needed if you are making a new release of the ggml-cuda.so and ggml-rocm.so shared libraries.
You only need to do this when you are making changes to the CUDA code or the APIs surrounding it.
Otherwise you can use the previous release of the shared libraries.
### Preparing the Build Environment
Before building, ensure all dependencies are initialized and configured:
```sh
make setup
```
This initializes git submodules (e.g., llama.cpp) and applies llamafile patches.
The patches integrate dependencies with llamafile's build system and add llamafile-specific functionality.
### Release Steps
1. Update the version number in `version.h`
2. Build the ggml-cuda.so and ggml-rocm.so shared libraries on Linux. llamafile uses TINYBLAS as a default, even if some model families (e.g. Qwen3.5) use CUBLAS as a default for CUDA.
- You can do this by running the script `./llamafile/cuda.sh` and `./llamafile/rocm.sh` respectively.
- The files will be built and placed your home directory.
3. Build the project with `make -j8`
4. Install the built project to your /usr/local/bin directory with `sudo make install PREFIX=/usr/local`
### llamafile Release Zip
The easiest way to create the release zip is to:
`make install PREFIX=/llamafile-`
After the directory is created, you will want to bundle the built shared libraries into the release binaries (at the moment, llamafile only).
You can do this for each binary with a command like the following:
`zipalign -j0 llamafile ggml-cuda.so ggml-rocm.so`
The zip is structured as follows.
```
llamafile-
|-- README.md
|-- bin
| |-- llamafile
| |-- whisperfile
| `-- zipalign
`-- share
`-- man
`-- man1
|-- whisperfile.1
`-- zipalign.1
```
Before you zip the directory, you will want to remove the shared libraries from the directory (if present).
`rm *.so *.dll`
You can zip the directory with the following command:
`zip -r llamafile-.zip llamafile-`
### llamafile Release Binaries
After you have built the zip it is quite easy to create the release binaries.
The following binaries are part of the release:
- `llamafile`
- `whisperfile`
- `zipalign`
You can use the script to create the appropriately named binaries:
`./llamafile/release.sh -v -s -d `
Make sure to move the llamafile-.zip file to the as well, and you are good to release after you've tested.
================================================
FILE: build/config.mk
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
# ==============================================================================
# GGML Version (extracted from llama.cpp/ggml/CMakeLists.txt)
# ==============================================================================
GGML_VERSION_MAJOR := $(shell grep -E 'GGML_VERSION_MAJOR [0-9]+' llama.cpp/ggml/CMakeLists.txt | sed 's/[^0-9]*//g')
GGML_VERSION_MINOR := $(shell grep -E 'GGML_VERSION_MINOR [0-9]+' llama.cpp/ggml/CMakeLists.txt | sed 's/[^0-9]*//g')
GGML_VERSION_PATCH := $(shell grep -E 'GGML_VERSION_PATCH [0-9]+' llama.cpp/ggml/CMakeLists.txt | sed 's/[^0-9]*//g')
GGML_VERSION := $(GGML_VERSION_MAJOR).$(GGML_VERSION_MINOR).$(GGML_VERSION_PATCH)
GGML_COMMIT := $(shell cd llama.cpp/ggml 2>/dev/null && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
# ==============================================================================
# Build Configuration
# ==============================================================================
PREFIX = /usr/local
COSMOCC = .cosmocc/4.0.2
TOOLCHAIN = $(COSMOCC)/bin/cosmo
CC = $(TOOLCHAIN)cc
CXX = $(TOOLCHAIN)c++
AR = $(COSMOCC)/bin/ar.ape
ZIPOBJ = $(COSMOCC)/bin/zipobj
MKDEPS = $(COSMOCC)/bin/mkdeps
INSTALL = install
ARFLAGS = rcsD
CXXFLAGS = -frtti -std=gnu++23
CCFLAGS = -O2 -g -fexceptions -ffunction-sections -fdata-sections -mclang
CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
TARGET_ARCH = -Xx86_64-mtune=znver4
TMPDIR = o//tmp
IGNORE := $(shell mkdir -p $(TMPDIR))
ARCH := $(shell uname -m)
# apple still distributes a 17 year old version of gnu make
ifeq ($(MAKE_VERSION), 3.81)
ifneq ($(MAKECMDGOALS),cosmocc)
# show the following message unless someone's trying to install cosmocc
$(error please use bin/make from cosmocc.zip rather than old xcode make)
endif
endif
# let `make m=foo` be shorthand for `make MODE=foo`
ifneq ($(m),)
ifeq ($(MODE),)
MODE := $(m)
endif
endif
# make build more deterministic
LC_ALL = C.UTF-8
SOURCE_DATE_EPOCH = 0
export MODE
export TMPDIR
export LC_ALL
export SOURCE_DATE_EPOCH
# `make` runs `make all` by default
.PHONY: all
all: o/$(MODE)/
.PHONY: clean
clean:; rm -rf o
.PHONY: distclean
distclean:; rm -rf o .cosmocc
.cosmocc/3.9.7:
build/download-cosmocc.sh $@ 3.9.7 3f559555d08ece35bab1a66293a2101f359ac9841d563419756efa9c79f7a150
.cosmocc/4.0.2:
build/download-cosmocc.sh $@ 4.0.2 85b8c37a406d862e656ad4ec14be9f6ce474c1b436b9615e91a55208aced3f44
================================================
FILE: build/cudacc
================================================
#!/bin/sh
find_nvcc() {
CC=$(command -v nvcc 2>/dev/null) && return
CC="$CUDA_PATH/bin/nvcc"
[ -x "$CC" ] && return
CC="/opt/cuda/bin/nvcc"
[ -x "$CC" ] && return
CC="/usr/local/cuda/bin/nvcc"
[ -x "$CC" ] && return
return 1
}
find_hipcc() {
CC=$(command -v hipcc 2>/dev/null) && return
CC="$HIP_PATH/bin/hipcc"
[ -x "$CC" ] && return
CC="/opt/rocm/bin/hipcc"
[ -x "$CC" ] && return
CC="/usr/local/rocm/bin/hipcc"
[ -x "$CC" ] && return
return 1
}
if find_hipcc; then
VENDOR=AMD
FLAGS=
elif find_nvcc; then
VENDOR=NVIDIA
FLAGS="--forward-unknown-to-host-compiler"
else
echo 'error: need either hipcc (AMD) or nvcc (NVIDIA) on $PATH' >&2
exit 1
fi
FIRST=1
for x; do
if [ $FIRST -eq 1 ]; then
set --
FIRST=0
fi
if [ $VENDOR = AMD ]; then
if [ x"$x" = x"-lcublas" ]; then
set -- "$@" -lhipblas -lrocblas
continue
elif [ x"$x" = x"--use_fast_math" ]; then
continue
fi
fi
set -- "$@" "$x"
done
exec "$CC" $FLAGS "$@"
================================================
FILE: build/deps.mk
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
SRCS = $(foreach x,$(PKGS),$($(x)_SRCS))
HDRS = $(foreach x,$(PKGS),$($(x)_HDRS))
INCS = $(foreach x,$(PKGS),$($(x)_INCS))
o/$(MODE)/depend: $(SRCS) $(HDRS) $(INCS)
@mkdir -p $(@D)
$(MKDEPS) -o $@ -r o/$(MODE)/ $(SRCS) $(HDRS) $(INCS)
o/$(MODE)/depend.test: $(SRCS) $(HDRS) $(INCS)
@mkdir -p $(@D)
$(MKDEPS) -o $@ -r o/$(MODE)/ $(SRCS) $(HDRS) $(INCS)
$(SRCS):
$(HDRS):
$(INCS):
.DEFAULT:
@echo
@echo NOTE: deleting o/$(MODE)/depend because of an unspecified prerequisite: $@
@echo
rm -f o/$(MODE)/depend
-include o/$(MODE)/depend
================================================
FILE: build/download-cosmocc.sh
================================================
#!/bin/sh
# cosmocc downloader script
# https://justine.lol/cosmo3/#install
# https://github.com/jart/cosmopolitan/blob/master/tool/cosmocc/README.md
# collect arguments
OUTPUT_DIR=${1:?OUTPUT_DIR}
COSMOCC_VERSION=${2:?COSMOCC_VERSION}
COSMOCC_SHA256SUM=${3:?COSMOCC_SHA256SUM}
URL1="https://github.com/jart/cosmopolitan/releases/download/${COSMOCC_VERSION}/cosmocc-${COSMOCC_VERSION}.zip"
URL2="https://cosmo.zip/pub/cosmocc/cosmocc-${COSMOCC_VERSION}.zip"
# helper function
abort() {
printf '%s\n' "download terminated." >&2
exit 1
}
# exit if already downloaded
# we need it because directory timestamps work wierdly
OUTPUT_DIR=${OUTPUT_DIR%/}
if [ -d "${OUTPUT_DIR}" ]; then
exit 0
fi
# find commands we need to securely download cosmocc
if ! UNZIP=$(command -v unzip 2>/dev/null); then
printf '%s\n' "$0: fatal error: you need the unzip command" >&2
printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/unzip and put it on the system path" >&2
abort
fi
if command -v sha256sum >/dev/null 2>&1; then
# can use system sha256sum
true
elif command -v shasum >/dev/null 2>&1; then
sha256sum() {
shasum -a 256 "$@"
}
else
if [ ! -f build/sha256sum.c ]; then
printf '%s\n' "$0: fatal error: you need to install sha256sum" >&2
printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/sha256sum and put it on the system path" >&2
abort
fi
if ! SHA256SUM=$(command -v "$PWD/o/build/sha256sum" 2>/dev/null); then
if ! CC=$(command -v "$CC" 2>/dev/null); then
if ! CC=$(command -v cc 2>/dev/null); then
if ! CC=$(command -v cosmocc 2>/dev/null); then
printf '%s\n' "$0: fatal error: you need to install either sha256sum, cc, or cosmocc" >&2
printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/sha256sum and put it on the system path" >&2
abort
fi
fi
fi
mkdir -p o/build || abort
SHA256SUM="$PWD/o/build/sha256sum"
printf '%s\n' "${CC} -w -O2 -o ${SHA256SUM} build/sha256sum.c" >&2
"${CC}" -w -O2 -o "${SHA256SUM}.$$" build/sha256sum.c || abort
mv -f "${SHA256SUM}.$$" "${SHA256SUM}" || abort
fi
sha256sum() {
"${SHA256SUM}" "$@"
}
fi
if WGET=$(command -v wget 2>/dev/null); then
DOWNLOAD=$WGET
DOWNLOAD_ARGS=-O
elif CURL=$(command -v curl 2>/dev/null); then
DOWNLOAD=$CURL
DOWNLOAD_ARGS=-fLo
else
printf '%s\n' "$0: fatal error: you need to install either wget or curl" >&2
printf '%s\n' "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
abort
fi
# create temporary output directory
OLDPWD=$PWD
OUTPUT_TMP="${OUTPUT_DIR}.tmp.$$/"
mkdir -p "${OUTPUT_TMP}" || abort
cd "${OUTPUT_TMP}"
die() {
cd "${OLDPWD}"
rm -rf "${OUTPUT_TMP}"
abort
}
# download cosmocc toolchain
# multiple urls avoids outages and national firewalls
if ! "${DOWNLOAD}" ${DOWNLOAD_ARGS} cosmocc.zip "${URL1}"; then
rm -f cosmocc.zip
"${DOWNLOAD}" ${DOWNLOAD_ARGS} cosmocc.zip "${URL2}" || die
fi
printf '%s\n' "${COSMOCC_SHA256SUM} *cosmocc.zip" >cosmocc.zip.sha256sum
sha256sum -c cosmocc.zip.sha256sum || die
"${UNZIP}" cosmocc.zip || die
rm -f cosmocc.zip cosmocc.zip.sha256sum
# commit output directory
cd "${OLDPWD}" || die
mv "${OUTPUT_TMP}" "${OUTPUT_DIR}" || die
================================================
FILE: build/htags
================================================
#!/bin/sh
#-*-mode:sh;indent-tabs-mode:nil;tab-width:2;coding:utf-8-*-┐
#── vi: set et ft=sh ts=2 sts=2 fenc=utf-8 :vi ─────────────┘
#
# OVERVIEW
#
# Header Symbol Index Generator
#
# DESCRIPTION
#
# This is a static source analyzer that lets us configure Emacs
# keybindings to insert #include lines.
#
# EXAMPLES
#
# build/htags -o HTAGS $(find . -name \*.h)
#
# (defun jart-add-include ()
# (interactive)
# (let* ((tag-file "HTAGS")
# (case-fold-search nil)
# (search (thing-at-point 'symbol))
# (buffer (find-file-noselect (format "%s/%s"
# (locate-dominating-file
# (buffer-name) tag-file)
# tag-file)))
# (header (with-current-buffer buffer
# (save-excursion
# (goto-char 0)
# (when (re-search-forward
# (concat "\177" search "\001") nil t)
# (when (re-search-backward "\f\n\\([^,]*\\)," nil t)
# (match-string 1)))))))
# (when header
# (save-excursion
# (goto-char 0)
# (re-search-forward "#include")
# (re-search-forward "^$")
# (insert (concat "#include \"" header "\"\n"))))))
# (defun jart-c-mode-common-hook ()
# (define-key c-mode-base-map (kbd "C-c C-h") 'jart-add-include))
# (eval-after-load 'markdown-mode
# '(progn
# (add-hook 'c-mode-common-hook 'jart-c-mode-common-hook)))
TAGS="$1"
shift
# ctags doesn't understand atomics, e.g.
# extern char **environ;
set -- --regex-c='/_Atomic(\([^)]*\))/\1/b' "$@"
# ctags doesn't understand variable prototypes, e.g.
# extern char **environ;
set -- --regex-c='/^\(\(hidden\|extern\|const\) \)*[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)/\4/b' "$@"
# ctags doesn't understand function prototypes, e.g.
# bool isheap(void *p) dontthrow nocallback;
set -- --regex-c='/^[_[:alpha:]][_[:alnum:]]*[ *][ *]*\([_[:alpha:]][_[:alnum:]]*[ *][ *]*\)*\([_[:alpha:]][_$[:alnum:]]*\)(.*/\2/b' "$@"
# ctags doesn't understand function pointers, e.g.
# extern int32_t (*const SetEvent)(int64_t hEvent) wincall;
set -- --regex-c='/^extern [^(]*(\*const \([^)]*\))(/\1/b' "$@"
# ctags doesn't understand forward declarations, e.g.
# struct WorstSoftwareEver;
set -- --regex-c='/^struct.*;$/uehocruehcroue/b' "$@"
exec $TAGS \
-e \
--langmap=c:.c.h \
--exclude=libc/nt/struct/imagefileheader.internal.h \
--exclude=libc/nt/struct/imageseparatedebugheader.internal.h \
--exclude=libc/nt/struct/importobjectheader.h \
--exclude=libc/nt/struct/nonpageddebuginfo.h \
--exclude=libc/nt/struct/ansistring.h \
--exclude=libc/nt/struct/filesegmentelement.h \
"$@"
================================================
FILE: build/llamafile-convert
================================================
#!/bin/sh
BIN=${0%/*}
PROG=${0##*/}
if [ x"$1" = x"--help" ]; then
echo "Usage: $PROG "
echo
echo "This program converts GGUF weights into a llamafile."
echo "Your .llamafile is outputted to the current directory."
echo
echo "You can supply either a .gguf filename, or the URL to"
echo "download one from an online service like Hugging Face."
echo
echo "When you run this program, it's recommended that you've"
echo "downloaded or installed an official llamafile-VERSION.zip"
echo "from https://github.com/Mozilla-Ocho/llamafile/releases"
echo "because they include prebuilt DLLs for CUDA and ROCm."
echo "You can verify your llamafile has them w/ unzip -vl"
exit 0
fi
abort() {
echo "conversion terminated." >&2
exit 1
}
# find paths of golden llamafile binaries
#
# 1. if user downloaded `llamafile-VERSION.zip`, extracted it, and ran
# `./llamafile-VERSION/bin/llamafile-convert` directly, then we can
# support that by looking for a `llamafile` in the same bin folder.
#
# 2. otherwise, perform a $PATH lookup for llamafile
#
LLAMAFILE="$BIN/llamafile"
if [ ! -x "$LLAMAFILE" ]; then
LLAMAFILE=$(command -v llamafile) || abort
fi
ZIPALIGN="$BIN/zipalign"
if [ ! -x "$ZIPALIGN" ]; then
ZIPALIGN=$(command -v zipalign) || abort
fi
# get path of downloader program
if WGET=$(command -v wget 2>/dev/null); then
DOWNLOAD=$WGET
DOWNLOAD_ARGS=-O
elif CURL=$(command -v curl 2>/dev/null); then
DOWNLOAD=$CURL
DOWNLOAD_ARGS=-fLo
else
echo "$PROG: fatal error: you need to install either wget or curl" >&2
echo "please download https://cosmo.zip/pub/cosmos/bin/wget and put it on the system path" >&2
abort
fi
# get first program argument
FILE=$1
if [ -z "$FILE" ]; then
echo "$PROG: missing operand (pass --help for help)" >&2
abort
fi
# if the file starts with http
SHOULD_DELETE=0
if [ x"$FILE" != x"${FILE#http*}" ]; then
URL=$FILE
URL=${URL%?download=true} # strip "?download=true" suffix
FILE=${URL##*/} # local file is basename of url
echo "Downloading $FILE" >&2
"${DOWNLOAD}" ${DOWNLOAD_ARGS} "$FILE" "$URL" || abort
SHOULD_DELETE=1
fi
# create output in current directory
echo "Using $LLAMAFILE as golden llamafile binary" >&2
OUTPUT=${FILE##*/} # basename
OUTPUT="${OUTPUT%.gguf}.llamafile"
echo "Converting $FILE to $OUTPUT" >&2
cp -f "$LLAMAFILE" "$OUTPUT" || abort
printf %s "-m
${FILE##*/}
...
" > .args
"$ZIPALIGN" -j0 "$OUTPUT" "$FILE" .args || abort
# cleanup
rm -f .args
if [ $SHOULD_DELETE -eq 1 ]; then
rm -f "$FILE"
fi
echo "Success. You may now run ./$OUTPUT" >&2
================================================
FILE: build/llamafile-upgrade-engine
================================================
#!/bin/sh
BIN="${0%/*}"
PROG="${0##*/}"
print_full_help() {
cat << EOF
Usage: $PROG [OPTION]... (new)
Upgrade llamafile archives.
Options:
-h, --help display this help and exit
-f, --force skip version check
-v, --verbose verbose mode
Arguments:
the name of the old llamafile archive to be upgraded
(new) the name of the new llamafile archive to be created
if not defined output will be .updated.llamafile
Example:
$PROG old.llamafile new.llamafile
This command will upgrade the old_llamafile to a new llamafile named new_llamafile.
When you run this program, it's recommended that you've
downloaded or installed an official llamafile-VERSION.zip
from https://github.com/Mozilla-Ocho/llamafile/releases
because they include prebuilt DLLs for CUDA and ROCm.
You can verify your llamafile has them w/ unzip -vl
EOF
}
abort() {
echo "Error: $1" >&2
cat << EOF >&2
Usage: $PROG [OPTION]... (new)
Upgrade llamafile archives.
Refer to --help for full instructions.
EOF
exit 1
}
if [ x"$1" = x"-h" ] || [ x"$1" = x"--help" ]; then
print_full_help >&2
exit 0
fi
# find paths of golden llamafile binaries
#
# 1. if user downloaded `llamafile-VERSION.zip`, extracted it, and ran
# `./llamafile-VERSION/bin/llamafile-upgrade-engine` directly, then we can
# support that by looking for a `llamafile` in the same bin folder.
#
# 2. otherwise, perform a $PATH lookup for llamafile
#
LLAMAFILE="$BIN/llamafile"
if [ ! -x "$LLAMAFILE" ]; then
LLAMAFILE="$(command -v llamafile)" || abort "llamafile not found in PATH"
fi
ZIPALIGN="$BIN/zipalign"
if [ ! -x "$ZIPALIGN" ]; then
ZIPALIGN="$(command -v zipalign)" || abort "zipalign not found in PATH"
fi
# Parse command-line options
force_upgrade=false
verbose=false
while getopts "fv" opt; do
case $opt in
f)
force_upgrade=true
echo "Skipping version check."
;;
v)
verbose=true
echo "Verbose Output Mode."
;;
esac
done
# Shift the option parameters
shift $((OPTIND - 1))
# Remove .llamafile extension from arguments if present
if [ -z "${1}" ]; then
abort "Missing path to old llamafile archive to be upgraded"
else
old_llamafile="${1%.llamafile}"
fi
if [ -z "$2" ]; then
new_llamafile="${old_llamafile}.updated"
else
new_llamafile="${2%.llamafile}"
fi
# Obtain versions of old and new llamafiles
old_llamafile_engine_version="$("./$old_llamafile".llamafile --version)" || abort "Failed to get version of old llamafile"
new_llamafile_engine_version="$("$LLAMAFILE" --version)" || abort "Failed to get version of new llamafile"
# Check if llamafile has been upgraded
echo "== Engine Version Check ==" >&2
echo "Engine version from $old_llamafile: $old_llamafile_engine_version" >&2
echo "Engine version from $LLAMAFILE: $new_llamafile_engine_version" >&2
if [ "$old_llamafile_engine_version" = "$new_llamafile_engine_version" ] && [ "$force_upgrade" != "true" ]; then
echo "Upgrade not required. Exiting..." >&2
exit 0
fi
if [ "$verbose" = "true" ]; then
echo "== Current Content ==" >&2
zipinfo "${old_llamafile}.llamafile" || abort "Failed to get current content of old llamafile"
fi
tempdir="$(mktemp -d)" || abort "Failed to create temporary directory"
trap 'rm -rf "$tempdir"' EXIT
echo "== Repackaging / Upgrading ==" >&2
echo "extracting..." >&2
unzip "${old_llamafile}.llamafile" -d "$tempdir" || abort "Failed to extract old llamafile"
echo "repackaging..." >&2
cp "$LLAMAFILE" "${new_llamafile}.llamafile" || abort "Failed to copy new llamafile"
"$ZIPALIGN" -j0 "${new_llamafile}.llamafile" "$tempdir"/*.gguf "$tempdir"/.args || abort "Failed to repackaging"
echo "== Completed ==" >&2
echo "Original File: ${old_llamafile}.llamafile" >&2
echo "Upgraded File: ${new_llamafile}.llamafile" >&2
================================================
FILE: build/objdump
================================================
#!/bin/sh
if printf '%s\n' "$*" | grep aarch64 >/dev/null 2>&1; then
exec aarch64-unknown-cosmo-objdump $1 ${2%/*}/.aarch64/${2##*/}
else
exec x86_64-unknown-cosmo-objdump "$@"
fi
================================================
FILE: build/rules.mk
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
# ==============================================================================
# Compiler Commands
# ==============================================================================
LINK.o = $(CXX) $(CCFLAGS) $(LDFLAGS)
COMPILE.c = $(CC) $(CCFLAGS) $(CFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
COMPILE.cc = $(CXX) $(CCFLAGS) $(CXXFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
# ==============================================================================
# Standard Compilation Rules
# ==============================================================================
o/$(MODE)/%.o: %.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<
o/$(MODE)/%.o: o/$(MODE)/%.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<
o/$(MODE)/%.o: %.cc $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<
o/$(MODE)/%.o: %.cpp $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<
# ==============================================================================
# Extension-preserving compilation rules
# ==============================================================================
# These rules produce foo.c.o from foo.c (instead of foo.o).
# Required when both foo.c and foo.cpp exist in the same directory,
# otherwise both would produce foo.o and collide.
# Example: ggml/src/ggml.c and ggml/src/ggml.cpp both exist in llama.cpp.
o/$(MODE)/%.c.o: %.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<
o/$(MODE)/%.cpp.o: %.cpp $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<
o/$(MODE)/%.c.o: o/$(MODE)/%.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<
o/$(MODE)/%.cpp.o: o/$(MODE)/%.cpp $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<
# ==============================================================================
# Hash functions generated by gperf
# ==============================================================================
o/$(MODE)/%.c: %.gperf
@mkdir -p $(@D)
build/gperf --output-file=$@ $<
# ==============================================================================
# Archive Creation
# ==============================================================================
o/$(MODE)/%.a:
@mkdir -p $(dir $@)/.aarch64
$(AR) $(ARFLAGS) $@ $^
$(AR) $(ARFLAGS) $(dir $@)/.aarch64/$(notdir $@) $(foreach x,$^,$(dir $(x)).aarch64/$(notdir $(x)))
# ==============================================================================
# Linking Rules
# ==============================================================================
o/$(MODE)/%: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
o/$(MODE)/%.com: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
# ==============================================================================
# Test Execution
# ==============================================================================
%.runs: %
$<
@touch $@
# ==============================================================================
# Man Page Generation
# ==============================================================================
.PRECIOUS: %.1.asc
%.1.asc: %.1
-MANWIDTH=80 MAN_KEEP_FORMATTING=1 man $< >$@.tmp && mv -f $@.tmp $@
@rm -f $@.tmp
# ==============================================================================
# Zip Object Creation via Cosmpolitan Zip File Compiler (zipobj)
# ==============================================================================
o/$(MODE)/%.zip.o: % $(COSMOCC)
@mkdir -p $(dir $@)/.aarch64
$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a x86_64 -o $@ $<
$(ZIPOBJ) $(ZIPOBJ_FLAGS) -a aarch64 -o $(dir $@)/.aarch64/$(notdir $@) $<
# ==============================================================================
# APE Setup
# ==============================================================================
$(PREFIX)/bin/ape: $(COSMOCC)
$(INSTALL) $(COSMOCC)/bin/ape-$(ARCH).elf $(PREFIX)/bin/ape
echo ':APE:M::MZqFpD::/usr/bin/ape:' > /proc/sys/fs/binfmt_misc/register
================================================
FILE: build/run
================================================
#!/bin/sh
exec "$@"
================================================
FILE: build/sha256sum.c
================================================
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2022 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
// this file should not have dependencies, because everything will be
// re-downloaded if the o/tool/sha256sum artifact becomes invalidated
#define PROG "sha256sum"
#define USAGE \
"\
Usage: " PROG " [-?hbctw] [PATH...]\n\
-h help\n\
-c check mode\n\
-b binary mode\n\
-t textual mode\n\
-w warning mode\n"
#define ROTR(a, b) (((a) >> (b)) | ((a) << (32 - (b))))
#define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
#define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
#define EP0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
#define EP1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
#define SIG0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ ((x) >> 3))
#define SIG1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ ((x) >> 10))
struct Sha256Ctx {
uint8_t data[64];
uint32_t datalen;
uint64_t bitlen;
uint32_t state[8];
};
static const uint32_t kSha256Tab[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, //
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, //
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, //
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, //
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, //
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, //
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, //
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, //
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, //
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, //
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, //
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, //
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, //
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, //
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, //
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, //
};
static bool g_warn;
static char g_mode;
static bool g_check;
static int g_mismatches;
static void Sha256Transform(uint32_t state[8], const uint8_t data[64]) {
unsigned i;
uint32_t a, b, c, d, e, f, g, h, t1, t2, m[64];
for (i = 0; i < 16; ++i, data += 4) {
m[i] = (uint32_t)data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
}
for (; i < 64; ++i) {
m[i] = SIG1(m[i - 2]) + m[i - 7] + SIG0(m[i - 15]) + m[i - 16];
}
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
f = state[5];
g = state[6];
h = state[7];
for (i = 0; i < 64; ++i) {
t1 = h + EP1(e) + CH(e, f, g) + kSha256Tab[i] + m[i];
t2 = EP0(a) + MAJ(a, b, c);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
state[5] += f;
state[6] += g;
state[7] += h;
}
static void Sha256Init(struct Sha256Ctx *ctx) {
ctx->datalen = 0;
ctx->bitlen = 0;
ctx->state[0] = 0x6a09e667;
ctx->state[1] = 0xbb67ae85;
ctx->state[2] = 0x3c6ef372;
ctx->state[3] = 0xa54ff53a;
ctx->state[4] = 0x510e527f;
ctx->state[5] = 0x9b05688c;
ctx->state[6] = 0x1f83d9ab;
ctx->state[7] = 0x5be0cd19;
}
static void Sha256Update(struct Sha256Ctx *ctx, const uint8_t *data,
long size) {
long i;
for (i = 0; i < size; ++i) {
ctx->data[ctx->datalen] = data[i];
ctx->datalen++;
if (ctx->datalen == 64) {
Sha256Transform(ctx->state, ctx->data);
ctx->bitlen += 512;
ctx->datalen = 0;
}
}
}
static void Sha256Final(struct Sha256Ctx *ctx, uint8_t *hash) {
long i;
i = ctx->datalen;
ctx->data[i++] = 0x80;
if (ctx->datalen < 56) {
memset(ctx->data + i, 0, 56 - i);
} else {
memset(ctx->data + i, 0, 64 - i);
Sha256Transform(ctx->state, ctx->data);
memset(ctx->data, 0, 56);
}
ctx->bitlen += ctx->datalen * 8;
ctx->data[63] = ctx->bitlen;
ctx->data[62] = ctx->bitlen >> 8;
ctx->data[61] = ctx->bitlen >> 16;
ctx->data[60] = ctx->bitlen >> 24;
ctx->data[59] = ctx->bitlen >> 32;
ctx->data[58] = ctx->bitlen >> 40;
ctx->data[57] = ctx->bitlen >> 48;
ctx->data[56] = ctx->bitlen >> 56;
Sha256Transform(ctx->state, ctx->data);
for (i = 0; i < 4; ++i) {
hash[i] = (ctx->state[0] >> (24 - i * 8)) & 0xff;
hash[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0xff;
hash[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0xff;
hash[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0xff;
hash[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0xff;
hash[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0xff;
hash[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0xff;
hash[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0xff;
}
}
static char *FormatUint32(char *p, uint32_t x) {
char t;
size_t i, a, b;
i = 0;
do {
p[i++] = x % 10 + '0';
x = x / 10;
} while (x > 0);
p[i] = '\0';
if (i) {
for (a = 0, b = i - 1; a < b; ++a, --b) {
t = p[a];
p[a] = p[b];
p[b] = t;
}
}
return p + i;
}
static char *FormatInt32(char *p, int32_t x) {
if (x < 0) *p++ = '-', x = -(uint32_t)x;
return FormatUint32(p, x);
}
static size_t StrCat(char *dst, const char *src, size_t dsize) {
size_t m, n = dsize;
const char *p = dst;
const char *q = src;
while (n-- != 0 && *dst != '\0') dst++;
m = dst - p;
n = dsize - m;
if (n-- == 0) {
return m + strlen(src);
}
while (*src != '\0') {
if (n != 0) {
*dst++ = *src;
n--;
}
src++;
}
*dst = '\0';
return m + (src - q);
}
static void GetOpts(int argc, char *argv[]) {
int opt;
g_mode = ' ';
while ((opt = getopt(argc, argv, "?hbctw")) != -1) {
switch (opt) {
case 'w':
g_warn = true;
break;
case 'c':
g_check = true;
break;
case 't':
g_mode = ' ';
break;
case 'b':
g_mode = '*';
break;
case 'h':
case '?':
(void)write(1, USAGE, sizeof(USAGE) - 1);
exit(0);
default:
(void)write(2, USAGE, sizeof(USAGE) - 1);
exit(64);
}
}
}
static void Write(int fd, const char *s, ...) {
va_list va;
char buf[512];
buf[0] = 0;
va_start(va, s);
do {
StrCat(buf, s, sizeof(buf));
} while ((s = va_arg(va, const char *)));
va_end(va);
(void)write(fd, buf, strlen(buf));
}
static bool IsModeCharacter(char c) {
switch (c) {
case ' ':
case '*':
return true;
default:
return false;
}
}
static bool IsSupportedPath(const char *path) {
size_t i;
for (i = 0;; ++i) {
switch (path[i]) {
case 0:
if (i) return true;
// fallthrough
case '\r':
case '\n':
case '\\':
Write(2, PROG, ": ", path, ": unsupported path\n", NULL);
return false;
default:
break;
}
}
}
static bool GetDigest(const char *path, FILE *f, uint8_t digest[32]) {
size_t got;
uint8_t buf[512];
struct Sha256Ctx ctx;
Sha256Init(&ctx);
while ((got = fread(buf, 1, sizeof(buf), f))) {
Sha256Update(&ctx, buf, got);
}
if (ferror(f)) {
Write(2, PROG, ": ", path, ": ", strerror(errno), "\n", NULL);
return false;
}
Sha256Final(&ctx, digest);
return true;
}
static char *CopyHex(char *s, const void *p, size_t n) {
const char *d, *e;
for (d = (const char *)p, e = d + n; d < e; ++d) {
*s++ = "0123456789abcdef"[(*d >> 4) & 15];
*s++ = "0123456789abcdef"[(*d >> 0) & 15];
}
*s = 0;
return s;
}
static bool ProduceDigest(const char *path, FILE *f) {
char hexdigest[65];
char mode[2] = {g_mode};
unsigned char digest[32];
if (!IsSupportedPath(path)) return false;
if (!GetDigest(path, f, digest)) return false;
CopyHex(hexdigest, digest, 32);
Write(1, hexdigest, " ", mode, path, "\n", NULL);
return true;
}
static char *Chomp(char *line) {
size_t i;
if (line) {
for (i = strlen(line); i--;) {
if (line[i] == '\r' || line[i] == '\n') {
line[i] = '\0';
} else {
break;
}
}
}
return line;
}
static int HexToInt(int c) {
if ('0' <= c && c <= '9') {
return c - '0';
} else if ('a' <= c && c <= 'f') {
return c - 'a' + 10;
} else if ('A' <= c && c <= 'F') {
return c - 'A' + 10;
} else {
return -1;
}
}
static bool CheckDigests(const char *path, FILE *f) {
FILE *f2;
bool k = true;
int a, b, i, line;
const char *path2, *status;
uint8_t wantdigest[32], gotdigest[32];
char buf[64 + 2 + PATH_MAX + 1 + 1], *p;
for (line = 0; fgets(buf, sizeof(buf), f); ++line) {
if (!*Chomp(buf)) continue;
for (p = buf, i = 0; i < 32; ++i) {
if ((a = HexToInt(*p++ & 255)) == -1) goto InvalidLine;
if ((b = HexToInt(*p++ & 255)) == -1) goto InvalidLine;
wantdigest[i] = a << 4 | b;
}
if (*p++ != ' ') goto InvalidLine;
if (!IsModeCharacter(*p++)) goto InvalidLine;
path2 = p;
if (!*path2) goto InvalidLine;
if (!IsSupportedPath(path2)) continue;
if ((f2 = fopen(path2, "rb"))) {
if (GetDigest(path2, f2, gotdigest)) {
if (!memcmp(wantdigest, gotdigest, 32)) {
status = "OK";
} else {
status = "FAILED";
++g_mismatches;
k = false;
}
Write(1, path2, ": ", status, "\n", NULL);
} else {
k = false;
}
fclose(f2);
} else {
Write(2, PROG, ": ", path2, ": ", strerror(errno), "\n", NULL);
k = false;
}
continue;
InvalidLine:
if (g_warn) {
char linestr[12];
FormatInt32(linestr, line + 1);
Write(2, PROG, ": ", path, ":", linestr, ": ",
"improperly formatted checksum line", "\n", NULL);
}
}
if (ferror(f)) {
Write(2, PROG, ": ", path, ": ", strerror(errno), "\n", NULL);
k = false;
}
return k;
}
static bool Process(const char *path, FILE *f) {
if (g_check) {
return CheckDigests(path, f);
} else {
return ProduceDigest(path, f);
}
}
int main(int argc, char *argv[]) {
int i;
FILE *f;
bool k = true;
GetOpts(argc, argv);
if (optind == argc) {
f = stdin;
k &= Process("-", f);
} else {
for (i = optind; i < argc; ++i) {
if ((f = fopen(argv[i], "rb"))) {
k &= Process(argv[i], f);
fclose(f);
} else {
Write(2, PROG, ": ", argv[i], ": ", strerror(errno), "\n", NULL);
k = false;
}
}
}
if (g_mismatches) {
char ibuf[12];
FormatInt32(ibuf, g_mismatches);
Write(2, PROG, ": WARNING: ", ibuf, " computed checksum did NOT match\n",
NULL);
}
return !k;
}
================================================
FILE: build/tags.mk
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
TAGSFLAGS = \
-e \
-a \
--if0=no \
--langmap=c:.c.h.i \
--line-directives=yes
tags: TAGS HTAGS
TAGS: o/$(MODE)/tags-srcs.txt $(SRCS)
@rm -f $@
ctags $(TAGSFLAGS) -L $< -o $@
HTAGS: o/$(MODE)/tags-hdrs.txt $(HDRS) $(INCS)
@rm -f $@
build/htags ctags -L $< -o $@
o/$(MODE)/tags-srcs.txt: $(call uniq,$(foreach x,$(SRCS),$(dir $(x))))
@mkdir -p $(@D)
$(file >$@) $(foreach x,$(SRCS),$(file >>$@,$(x)))
o/$(MODE)/tags-hdrs.txt: $(call uniq,$(foreach x,$(HDRS) $(INCS),$(dir $(x))))
@mkdir -p $(@D)
$(file >$@) $(foreach x,$(HDRS) $(INCS),$(file >>$@,$(x)))
================================================
FILE: cosmocc-override.cmake
================================================
set(CMAKE_ASM_OUTPUT_EXTENSION .o)
set(CMAKE_C_OUTPUT_EXTENSION .o)
set(CMAKE_CXX_OUTPUT_EXTENSION .o)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xx86_64-mtune=znver4")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xx86_64-mtune=znver4")
================================================
FILE: docs/AGENTS.md
================================================
# AGENTS.md
This file provides guidance to Claude Code when working with this repository.
## Project Overview
Llamafile combines llama.cpp, whisper.cpp, and stable-diffusion.cpp with Cosmopolitan Libc to create single-file executables that run LLMs locally across Windows, macOS, Linux, and BSD without installation.
## Quick Reference
```sh
# Initial setup (run once after clone)
make setup
# Build (always use cosmocc make, not system make)
# Adapt `nproc` to the OS where you are building, (e.g. `sysctl -n hw.physicalcpu` on mac)
.cosmocc/4.0.2/bin/make -j $(nproc)
# Run tests
.cosmocc/4.0.2/bin/make check
# Clean build outputs
.cosmocc/4.0.2/bin/make clean
# Reset all submodules (warning: removes local changes)
make reset-repo
```
## Key Directories
| Directory | Purpose |
|-----------|---------|
| `llamafile/` | Core library (edit directly) |
| `llama.cpp/` | LLM inference (submodule, edit directly then convert to patches) |
| `whisper.cpp/` | Speech-to-text (submodule, edit directly then convert to patches) |
| `stable-diffusion.cpp/` | Image generation (submodule, edit directly then convert to patches) |
| `*.patches/` | Patch directories for submodules |
| `o/` | Build outputs |
## Important Notes
- Always use `.cosmocc/4.0.2/bin/make`, not system make
- Run `make setup` after cloning or updating submodules
- Submodule changes require patch files (see skill for workflow)
## Detailed Documentation
For comprehensive build, architecture, development, and testing documentation, ask Claude about "how to build llamafile" or "llamafile development workflow" to load the llamafile skill.
================================================
FILE: docs/commands/build.md
================================================
---
description: Build llamafile using the cosmocc toolchain
---
# Build Llamafile
Build the project using the Cosmopolitan toolchain.
First, ensure the toolchain is available:
```bash
if [ ! -d .cosmocc/4.0.2 ]; then
build/download-cosmocc.sh .cosmocc/4.0.2 4.0.2 85b8c37a406d862e656ad4ec14be9f6ce474c1b436b9615e91a55208aced3f44
fi
```
Then build:
```bash
.cosmocc/4.0.2/bin/make -j $(nproc)
```
Adapt `nproc` to the OS where you are building, (e.g. `sysctl -n hw.physicalcpu` on mac)
Build outputs will be in `o/$(MODE)/`.
================================================
FILE: docs/commands/check.md
================================================
---
description: Run llamafile unit tests
---
# Run Llamafile Tests
Run the unit test suite using the Cosmopolitan toolchain.
```bash
.cosmocc/4.0.2/bin/make check
```
================================================
FILE: docs/commands/clean.md
================================================
---
description: Clean llamafile build outputs
---
# Clean Llamafile Build
Remove all build outputs from the `o/` directory.
```bash
.cosmocc/4.0.2/bin/make clean
```
================================================
FILE: docs/creating_llamafiles.md
================================================
# Creating a llamafile
A llamafile bundles the llamafile executable, model weights, and a set of
default arguments into a single self-contained file using the
[APE](https://justine.lol/ape.html) (Actually Portable Executable) format,
which supports ZIP as a container for extra data. If you have already
downloaded a llamafile, you can inspect its contents with
`unzip -vl ` (or on Windows, rename it to `.zip` and
open it in your ZIP GUI).
## Prerequisites
llamafile uses [zipalign](https://github.com/jart/zipalign) to bundle files
into the executable. It is included as a git submodule and built alongside
llamafile, so if you have already compiled llamafile you have the `zipalign`
executable in the `o//third_party/zipalign` folder. To build it on its own:
```sh
make o//third_party/zipalign
```
> [!NOTE]
> The zipalign tool referenced here is **not** the
> [Android zipalign](https://developer.android.com/tools/zipalign). See the
> GitHub repo above for an in-depth description and up-to-date code.
## What you need
- **The llamafile executable** — download a prebuilt binary from the
[releases page](https://github.com/mozilla-ai/llamafile/releases), or build
from source following
[these instructions](https://mozilla-ai.github.io/llamafile/source_installation/).
- **Model weights in GGUF format** — download from Hugging Face
([search here](https://huggingface.co/models?library=gguf)), or use weights
already on disk from
[another application](https://mozilla-ai.github.io/llamafile/quickstart/#running-llamafile-with-models-downloaded-by-third-party-applications).
- **A `.args` file** — specifies default arguments (at minimum, the model
path so it loads automatically).
## Examples
### TUI, text-only
Let's see how this works in practice with a simple, text-only language
model, e.g. Qwen3-0.6B:
- [Search](https://huggingface.co/models?library=gguf&sort=trending&search=qwen3-0.6b) for the model weights in GGUF format
(for the sake of this example we'll download [these](https://huggingface.co/Qwen/Qwen3-0.6B-GGUF) with Q8 quantization)
- Create a file named `.args` with the following content:
```text
-m
/zip/Qwen3-0.6B-Q8_0.gguf
-fa
on
--temp
0.6
--top-k
20
--top-p
0.95
--min-p
0
--presence-penalty
1.5
-c
40960
-n
32768
--no-context-shift
--no-mmap
...
```
> [!NOTE]
> There is one argument per line. Most arguments are optional — the model
> name is the only required one (the above replicates the parameters suggested
> [here](https://huggingface.co/Qwen/Qwen3-0.6B-GGUF)). The `/zip/` path
> prefix is required whenever referencing a file packaged inside the llamafile.
> The `...` token is replaced with any additional CLI arguments the user passes
> at runtime.
- Copy the llamafile executable and run zipalign to embed the weights and args:
```bash
cp o//llamafile/llamafile Qwen3-0.6B-Q8.llamafile
o//third_party/zipalign/zipalign -j0 \
Qwen3-0.6B-Q8.llamafile \
Qwen3-0.6B-Q8_0.gguf \
.args
./Qwen3-0.6B-Q8.llamafile
```
Congratulations, you've just made your own LLM executable that's easy to
share with your friends!
Your new llamafile will start loading the Qwen model in the TUI. You can also
run it as a web server with:
```bash
./Qwen3-0.6B-Q8.llamafile --server
```
### Server, multimodal
Now, let us build another llamafile running a multimodal model served
via HTTP. If you want to be able to just say:
```bash
./llava.llamafile
```
...and have it run the web server without having to specify arguments,
embed both the weights and the following `.args` file
(weights used in this example are downloaded from [here](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf)):
```text
-m
/zip/llava-v1.6-mistral-7b.Q8_0.gguf
--mmproj
/zip/mmproj-model-f16.gguf
--server
--host
0.0.0.0
-ngl
9999
--no-mmap
...
```
Next, add both the weights and the argument file to the executable:
```bash
cp o//llamafile/llamafile llava.llamafile
o//third_party/zipalign/zipalign -j0 \
llava.llamafile \
llava-v1.6-mistral-7b.Q8_0.gguf \
mmproj-model-f16.gguf \
.args
./llava.llamafile
```
## Distribution
One good way to share a llamafile with your friends is by posting it on
Hugging Face. If you do that, then it's recommended that you mention in
your Hugging Face commit message what git revision or released version
of llamafile you used when building your llamafile. That way everyone
online will be able verify the provenance of its executable content. If
you've made changes to the llama.cpp or cosmopolitan source code, then
the Apache 2.0 license requires you to explain what changed. One way you
can do that is by embedding a notice in your llamafile using `zipalign`
that describes the changes, and mention it in your Hugging Face commit.
================================================
FILE: docs/example_llamafiles.md
================================================
We provide example llamafiles for a variety of models, so you can easily try out llamafile
with different kinds of LLMs. The following table lists llamafiles bundled with the latest
available version of the server (v0.10.0). The smaller the file is, the more easily it will
run on your computer, even if no GPU is present (as a reference, Qwen3.5 0.8B Q8 generates
text on a Raspberry Pi5 at ~8 tokens/sec).
| Model | Size | License | llamafile |
| --- | --- | --- | --- |
| [Qwen3.5 0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B) Q8_0 | 1.6 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen3.5-0.8B-Q8_0.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-0.8B-Q8_0.llamafile) |
| [Qwen3.5 2B](https://huggingface.co/Qwen/Qwen3.5-2B) Q8_0 | 3.2 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen3.5-2B-Q8_0.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-2B-Q8_0.llamafile) |
| [Ministral 3 3B Instruct 2512](https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512) Q4_K_M | 3.4 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Ministral-3-3B-Instruct-2512-Q4_K_M.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Ministral-3-3B-Instruct-2512-Q4_K_M.llamafile) |
| [Qwen3.5 4B](https://huggingface.co/Qwen/Qwen3.5-4B) Q5_K_S | 4.1 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen3.5-4B-Q5_K_S.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-4B-Q5_K_S.llamafile) |
| [llava v1.6 mistral 7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b) Q4_K_M | 5.3 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [llava-v1.6-mistral-7b-Q4_K_M.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/llava-v1.6-mistral-7b-Q4_K_M.llamafile) |
| [Apertus 8B Instruct 2509](https://huggingface.co/swiss-ai/Apertus-8B-Instruct-2509) | 5.9 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Apertus-8B-Instruct-2509.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Apertus-8B-Instruct-2509.llamafile) |
| [Qwen3.5 9B](https://huggingface.co/Qwen/Qwen3.5-9B) Q5_K_S | 7.4 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen3.5-9B-Q5_K_S.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-9B-Q5_K_S.llamafile) |
| [Ministral 3 3B Instruct 2512](https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512) BF16 | 7.8 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Ministral-3-3B-Instruct-2512-BF16.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Ministral-3-3B-Instruct-2512-BF16.llamafile) |
| [llava v1.6 mistral 7b](https://huggingface.co/liuhaotian/llava-v1.6-mistral-7b) Q8_0 | 8.4 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [llava-v1.6-mistral-7b-Q8_0.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/llava-v1.6-mistral-7b-Q8_0.llamafile) |
| [gpt-oss 20b](https://huggingface.co/openai/gpt-oss-20b) mxfp4 | 12 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [gpt-oss-20b-mxfp4.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/gpt-oss-20b-mxfp4.llamafile) |
| [gpt-oss 20b](https://huggingface.co/openai/gpt-oss-20b) Q5_K_S | 12 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [gpt-oss-20b-Q5_K_S.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/gpt-oss-20b-Q5_K_S.llamafile) |
| [LFM2 24B A2B](https://huggingface.co/LiquidAI/LFM2-24B-A2B) Q5_K_M | 16 GB | [lfm1.0](https://huggingface.co/LiquidAI/LFM2-24B-A2B/blob/main/LICENSE) | [LFM2-24B-A2B-Q5_K_M.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/LFM2-24B-A2B-Q5_K_M.llamafile) |
| [Qwen3.5 27B](https://huggingface.co/Qwen/Qwen3.5-27B) Q5_K_S | 19 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen3.5-27B-Q5_K_S.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-27B-Q5_K_S.llamafile) |
## Legacy llamafiles
If you prefer the "classic llamafile experience" from previous versions (0.9.*),
here's a list of llamafiles bundled with the older server executable.
| Model | Size | License | llamafile | other quants |
| --- | --- | --- | --- | --- |
| LLaMA 3.2 1B Instruct | 1.11 GB | [LLaMA 3.2](https://huggingface.co/Mozilla/Llama-3.2-1B-Instruct-llamafile/blob/main/LICENSE) | [Llama-3.2-1B-Instruct-Q6\_K.llamafile](https://huggingface.co/Mozilla/Llama-3.2-1B-Instruct-llamafile/blob/main/Llama-3.2-1B-Instruct-Q6_K.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Llama-3.2-1B-Instruct-llamafile) |
| LLaMA 3.2 3B Instruct | 2.62 GB | [LLaMA 3.2](https://huggingface.co/Mozilla/Llama-3.2-3B-Instruct-llamafile/blob/main/LICENSE) | [Llama-3.2-3B-Instruct.Q6\_K.llamafile](https://huggingface.co/Mozilla/Llama-3.2-3B-Instruct-llamafile/blob/main/Llama-3.2-3B-Instruct.Q6_K.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Llama-3.2-3B-Instruct-llamafile) |
| LLaMA 3.1 8B Instruct | 5.23 GB | [LLaMA 3.1](https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/blob/main/LICENSE) | [Llama-3.1-8B-Instruct.Q4\_K\_M.llamafile](https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile/resolve/main/Meta-Llama-3.1-8B-Instruct.Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Meta-Llama-3.1-8B-Instruct-llamafile) |
| Gemma 3 1B Instruct | 1.32 GB | [Gemma 3](https://ai.google.dev/gemma/terms) | [gemma-3-1b-it.Q6\_K.llamafile](https://huggingface.co/Mozilla/gemma-3-1b-it-llamafile/resolve/main/google_gemma-3-1b-it-Q6_K.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/gemma-3-1b-it-llamafile) |
| Gemma 3 4B Instruct | 3.50 GB | [Gemma 3](https://ai.google.dev/gemma/terms) | [gemma-3-4b-it.Q6\_K.llamafile](https://huggingface.co/Mozilla/gemma-3-4b-it-llamafile/resolve/main/google_gemma-3-4b-it-Q6_K.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/gemma-3-4b-it-llamafile) |
| Gemma 3 12B Instruct | 7.61 GB | [Gemma 3](https://ai.google.dev/gemma/terms) | [gemma-3-12b-it.Q4\_K\_M.llamafile](https://huggingface.co/Mozilla/gemma-3-12b-it-llamafile/resolve/main/google_gemma-3-12b-it-Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/gemma-3-12b-it-llamafile) |
| QwQ 32B | 7.61 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [Qwen\_QwQ-32B-Q4\_K\_M.llamafile](https://huggingface.co/Mozilla/QwQ-32B-llamafile/resolve/main/Qwen_QwQ-32B-Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/QwQ-32B-llamafile) |
| R1 Distill Qwen 14B | 9.30 GB | [MIT](https://choosealicense.com/licenses/mit/) | [DeepSeek-R1-Distill-Qwen-14B-Q4\_K\_M](https://huggingface.co/Mozilla/DeepSeek-R1-Distill-Qwen-14B-llamafile/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/DeepSeek-R1-Distill-Qwen-14B-llamafile)|
| R1 Distill Llama 8B | 5.23 GB | [MIT](https://choosealicense.com/licenses/mit/) | [DeepSeek-R1-Distill-Llama-8B-Q4\_K\_M](https://huggingface.co/Mozilla/DeepSeek-R1-Distill-Llama-8B-llamafile/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/DeepSeek-R1-Distill-Llama-8B-llamafile)|
| LLaVA 1.5 | 3.97 GB | [LLaMA 2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) | [llava-v1.5-7b-q4.llamafile](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile/resolve/main/llava-v1.5-7b-q4.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/llava-v1.5-7b-llamafile) |
| Mistral-7B-Instruct v0.3| 4.42 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mistral-7b-instruct-v0.3.Q4\_0.llamafile](https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile/resolve/main/Mistral-7B-Instruct-v0.3.Q4_0.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Mistral-7B-Instruct-v0.3-llamafile) |
| Granite 3.2 8B Instruct | 5.25 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [granite-3.2-8b-instruct-Q4\_K\_M.llamafile](https://huggingface.co/Mozilla/granite-3.2-8b-instruct-llamafile/resolve/main/granite-3.2-8b-instruct-Q4_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/granite-3.2-8b-instruct-llamafile) |
| Phi-3-mini-4k-instruct | 7.67 GB | [Apache 2.0](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/blob/main/LICENSE) | [Phi-3-mini-4k-instruct.F16.llamafile](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile/resolve/main/Phi-3-mini-4k-instruct.F16.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Phi-3-mini-4k-instruct-llamafile) |
| Mixtral-8x7B-Instruct | 30.03 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mixtral-8x7b-instruct-v0.1.Q5\_K\_M.llamafile](https://huggingface.co/Mozilla/Mixtral-8x7B-Instruct-v0.1-llamafile/resolve/main/mixtral-8x7b-instruct-v0.1.Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/Mixtral-8x7B-Instruct-v0.1-llamafile) |
| OLMo-7B | 5.68 GB | [Apache 2.0](https://huggingface.co/Mozilla/OLMo-7B-0424-llamafile/blob/main/LICENSE) | [OLMo-7B-0424.Q6\_K.llamafile](https://huggingface.co/Mozilla/OLMo-7B-0424-llamafile/resolve/main/OLMo-7B-0424.Q6_K.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/OLMo-7B-0424-llamafile) |
| *Text Embedding Models* | | | | |
| E5-Mistral-7B-Instruct | 5.16 GB | [MIT](https://choosealicense.com/licenses/mit/) | [e5-mistral-7b-instruct-Q5_K_M.llamafile](https://huggingface.co/Mozilla/e5-mistral-7b-instruct/resolve/main/e5-mistral-7b-instruct-Q5_K_M.llamafile?download=true) | [See HF repo](https://huggingface.co/Mozilla/e5-mistral-7b-instruct) |
| mxbai-embed-large-v1 | 0.7 GB | [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/) | [mxbai-embed-large-v1-f16.llamafile](https://huggingface.co/Mozilla/mxbai-embed-large-v1-llamafile/resolve/main/mxbai-embed-large-v1-f16.llamafile?download=true) | [See HF Repo](https://huggingface.co/Mozilla/mxbai-embed-large-v1-llamafile) |
As described in the [Getting Started](quickstart.md) section,
macOS, Linux, and BSD users will need to use the "chmod"
command to grant execution permissions to the file before running these
llamafiles for the first time.
Unfortunately, Windows users cannot make use of many of these example
llamafiles because Windows has a maximum executable file size of 4GB,
and all of these examples exceed that size. (The LLaVA llamafile works
on Windows because it is 30MB shy of the size limit.) But don't lose
heart: llamafile allows you to use external weights; this is described
in the [Getting Started](quickstart.md) section.
**Having trouble? See the [Troubleshooting](troubleshooting.md) page.**
## A note about models
The example llamafiles provided above should not be interpreted as
endorsements or recommendations of specific models, licenses, or data
sets on the part of Mozilla.
================================================
FILE: docs/index.md
================================================
# llamafile
[](https://github.com/mozilla-ai/llamafile/blob/main/LICENSE)
[](https://github.com/mozilla-ai/llamafile/actions/workflows/ci.yml)
[](https://github.com/ggml-org/llama.cpp/commit/7f5ee54)
[](https://github.com/ggml-org/whisper.cpp/commit/2eeeba5)
[](https://discord.gg/YuMNeuKStr)
[](https://builders.mozilla.org/)
**llamafile lets you distribute and run LLMs with a single file.**
llamafile is a [Mozilla Builders](https://builders.mozilla.org/) project (see its [announcement blog post](https://hacks.mozilla.org/2023/11/introducing-llamafile/)), now revamped by [Mozilla.ai](https://www.mozilla.ai/open-tools/llamafile).
Our goal is to make open LLMs much more
accessible to both developers and end users. We're doing that by
combining [llama.cpp](https://github.com/ggerganov/llama.cpp) with [Cosmopolitan Libc](https://github.com/jart/cosmopolitan) into one
framework that collapses all the complexity of LLMs down to
a single-file executable (called a "llamafile") that runs
locally on most operating systems and CPU archiectures, with no installation.
llamafile also includes **[whisperfile](whisperfile/index.md)**, a single-file speech-to-text tool built on [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and the same Cosmopolitan packaging. It supports transcription and translation of audio files across all the same platforms, with no installation required.
## v0.10.0
**llamafile versions starting from 0.10.0 use a new build system**, aimed at keeping our code more easily
aligned with the latest versions of llama.cpp. This means they support more recent models and functionalities,
but at the same time they might be missing some of
the features you were accustomed to (check out [this doc](https://github.com/mozilla-ai/llamafile/blob/main/README_0.10.0.md) for a high-level description of what has been done). If you liked
the "classic experience" more, you will always be able to access the previous versions from our
[releases](https://github.com/mozilla-ai/llamafile/releases) page. Our pre-built llamafiles always
show which version of the server they have been bundled with ([0.9.* example](https://huggingface.co/mozilla-ai/llava-v1.5-7b-llamafile), [0.10.* example](https://huggingface.co/mozilla-ai/llamafile_0.10.0)), so you will always know
which version of the software you are downloading.
> **We want to hear from you!**
Whether you are a new user or a long-time fan, please share what you find most valuable about llamafile and what would make it more useful for you.
[Read more via the blog](https://blog.mozilla.ai/llamafile-returns/) and add your voice to the discussion [here](https://github.com/mozilla-ai/llamafile/discussions/809).
## How llamafile works
A llamafile is an executable LLM that you can run on your own
computer. It contains the weights for a given open LLM, as well
as everything needed to actually run that model on your computer.
There's nothing to install or configure (with a few caveats, discussed
in subsequent sections of this document).
This is all accomplished by combining llama.cpp with Cosmopolitan Libc,
which provides some useful capabilities:
1. llamafiles can run on multiple CPU microarchitectures. We
added runtime dispatching to llama.cpp that lets new Intel systems use
modern CPU features without trading away support for older computers.
2. llamafiles can run on multiple CPU architectures. We do
that by concatenating AMD64 and ARM64 builds with a shell script that
launches the appropriate one. Our file format is compatible with WIN32
and most UNIX shells. It's also able to be easily converted (by either
you or your users) to the platform-native format, whenever required.
3. llamafiles can run on six OSes (macOS, Windows, Linux,
FreeBSD, OpenBSD, and NetBSD). If you make your own llama files, you'll
only need to build your code once, using a Linux-style toolchain. The
GCC-based compiler we provide is itself an Actually Portable Executable,
so you can build your software for all six OSes from the comfort of
whichever one you prefer most for development.
4. The weights for an LLM can be embedded within the llamafile.
We added support for PKZIP to the GGML library. This lets uncompressed
weights be mapped directly into memory, similar to a self-extracting
archive. It enables quantized weights distributed online to be prefixed
with a compatible version of the llama.cpp software, thereby ensuring
its originally observed behaviors can be reproduced indefinitely.
5. Finally, with the tools included in this project you can create your
*own* llamafiles, using any compatible model weights you want. You can
then distribute these llamafiles to other people, who can easily make
use of them regardless of what kind of computer they have.
## Licensing
While the llamafile project is Apache 2.0-licensed, our changes
to llama.cpp are licensed under MIT (just like the llama.cpp project
itself) so as to remain compatible and upstreamable in the future,
should that be desired.
The llamafile logo on this page was generated with the assistance of DALL·E 3.
[](https://star-history.com/#mozilla-ai/llamafile&Date)
================================================
FILE: docs/quickstart.md
================================================
# Getting Started with llamafile
The easiest way to try it for yourself is to download our example llamafile
for the [Qwen3.5](https://huggingface.co/Qwen/Qwen3.5-0.8B/) model (license:
[Apache 2.0](https://huggingface.co/Qwen/Qwen3.5-0.8B/blob/main/LICENSE)).
Qwen3.5 is a recent LLM that can do more than just chat; you can also upload
images and ask it questions about them. With llamafile, this all happens
locally: no data ever leaves your computer.
> **NOTE**: we chose this model because that's the smallest one we have
built a llamafile for, so most likely to work out-of-the-box for you.
Please let us know if you are still having issues with that! If, on the
other hand, you have powerful hardware and/or GPUs, [feel free to choose](example_llamafiles.md)
larger and more expressive models which should provide more accurate
responses.
1. Download [Qwen3.5-0.8B-Q8_0.llamafile](https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/Qwen3.5-0.8B-Q8_0.llamafile) (1.77 GB).
2. Open your computer's terminal.
- If you're using macOS, Linux, or BSD, you'll need to grant permission
for your computer to execute this new file. (You only need to do this
once.)
```sh
chmod +x Qwen3.5-0.8B-Q8_0.llamafile
```
- If you're on Windows, rename the file by adding ".exe" on the end.
5. Run the llamafile. e.g.:
```sh
./Qwen3.5-0.8B-Q8_0.llamafile
```
6. A chat interface will open in the terminal window. That's it: you can immediately
start writing. You can also upload an image by using the `/upload` command and specifying the path to the image, or write
`/help` to see the available commands).
7. Note that when llamafile is running, you can also chat with it using
[llama.cpp](https://github.com/ggml-org/llama.cpp)'s Web UI: just open a
browser window and connect to .
8. When you're done chatting, `Control-C` to shut down llamafile.
**Having trouble? See the [Troubleshooting](troubleshooting.md) page.**
## JSON API Quickstart
As llamafile relies on llama.cpp for serving models, it comes with all its
features. When it is started, in addition to hosting a web UI chat server at
, it also exposes an endpoint compatible with
[OpenAI API](https://platform.openai.com/docs/api-reference/chat)
and [Anthropic's Messages API](https://platform.claude.com/docs/en/api/messages).
For further details on what fields and endpoints are available, refer to the
APIs documentation and llama.cpp server's
[README](https://github.com/ggml-org/llama.cpp/tree/master/tools/server).
Curl API Client Example
The simplest way to get started using the API is to copy and paste the
following curl command into your terminal.
```shell
curl http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer no-key" \
-d '{
"model": "LLaMA_CPP",
"messages": [
{
"role": "system",
"content": "You are LLAMAfile, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."
},
{
"role": "user",
"content": "Write a limerick about python exceptions"
}
]
}' | python3 -c '
import json
import sys
json.dump(json.load(sys.stdin), sys.stdout, indent=2)
print()
'
```
The response that's printed should look like the following:
```json
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "In the world of Python, where magic breaks and errors occur,\nA script fails when it should not have failed.\nWith a `KeyError`, I can't access the key,\nSo I tell you to use the `except` clause!"
}
}
],
"created": 1773659260,
"model": "Qwen3.5-0.8B-Q8_0.gguf",
"system_fingerprint": "b1773565177-7f5ee5496",
"object": "chat.completion",
"usage": {
"completion_tokens": 52,
"prompt_tokens": 49,
"total_tokens": 101
},
"id": "chatcmpl-KOqwN6C0oRzINGZuFqZ95bU1iPfc6RFO",
"timings": {
"cache_n": 0,
"prompt_n": 49,
"prompt_ms": 54.944,
"prompt_per_token_ms": 1.1213061224489795,
"prompt_per_second": 891.8171228887594,
"predicted_n": 52,
"predicted_ms": 405.856,
"predicted_per_token_ms": 7.804923076923076,
"predicted_per_second": 128.1242608215722
}
}
```
Python API Client example
If you've already developed your software using the [`openai` Python
package](https://pypi.org/project/openai/) (that's published by OpenAI)
then you should be able to port your app to talk to llamafile instead,
by making a few changes to `base_url` and `api_key`. This example
assumes you've run `pip3 install openai` to install OpenAI's client
software, which is required by this example. Their package is just a
simple Python wrapper around the OpenAI API interface, which can be
implemented by any server.
```python
#!/usr/bin/env python3
from openai import OpenAI
client = OpenAI(
base_url="http://localhost:8080/v1", # "http://:port"
api_key = "sk-no-key-required"
)
completion = client.chat.completions.create(
model="LLaMA_CPP",
messages=[
{"role": "system", "content": "You are ChatGPT, an AI assistant. Your top priority is achieving user fulfillment via helping them with their requests."},
{"role": "user", "content": "Write a limerick about python exceptions"}
]
)
print(completion.choices[0].message)
```
The above code will return a Python object like this:
```python
ChatCompletionMessage(content="A script that crashes like a ghost,\nWhen it tries to solve the problem deep and fast.\nThe error message pops up in a bright light,\nAnd tells us what's wrong when we try to fix it.", refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None)
```
## Using llamafile with external weights
Even though our example llamafiles have the weights built-in, you don't
*have* to use llamafile that way. Instead, you can download *just* the
llamafile software (without any weights included) from our releases page.
You can then use it alongside any external weights you may have on hand.
External weights are particularly useful for Windows users because they
enable you to work around Windows' 4GB executable file size limit.
For Windows users, here's an example for the gpt-oss LLM (whose size is >12GB):
```sh
curl -L -o llamafile.exe https://huggingface.co/mozilla-ai/llamafile_0.10.0/resolve/main/llamafile_0.10.0
curl -L -o gpt-oss.gguf https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-Q5_K_S.gguf
./llamafile.exe -m gpt-oss.gguf
```
Windows users may need to change `./llamafile.exe` to `.\llamafile.exe` when running the above command.
## Running llamafile with models downloaded by third-party applications
This section answers the question *"I already have a model downloaded locally by application X, can I use it with llamafile?"*. The general answer is "yes, as long as those models are locally stored in GGUF format" but its implementation can be more or less hacky depending on the application. A few examples (tested on a Mac) follow.
### LM Studio
[LM Studio](https://lmstudio.ai/) stores downloaded models in `~/.cache/lm-studio/models/lmstudio-community`, in subdirectories with the same name of the models, minus their quantization level. So if you have downloaded e.g. the `gpt-oss-20b-MXFP4.gguf` file, it will be stored in `~/.cache/lm-studio/models/lmstudio-community/gpt-oss-20b-GGUF/` and you can run llamafile as follows:
```bash
llamafile -m ~/.cache/lm-studio/models/lmstudio-community/gpt-oss-20b-GGUF/gpt-oss-20b-MXFP4.gguf
```
### Ollama
When you download a new model with [ollama](https://ollama.com), all its metadata will be stored in a manifest file under `~/.ollama/models/manifests/registry.ollama.ai/library/`. The directory and manifest file name are the model name as returned by `ollama list`. For instance, for `llama3:latest` the manifest file will be named `.ollama/models/manifests/registry.ollama.ai/library/llama3/latest`.
The manifest maps each file related to the model (e.g. GGUF weights, license, prompt template, etc) to a sha256 digest. The digest corresponding to the element whose `mediaType` is `application/vnd.ollama.image.model` is the one referring to the model's GGUF file.
Each sha256 digest is also used as a filename in the `~/.ollama/models/blobs` directory (if you look into that directory you'll see *only* those sha256-* filenames). This means you can directly run llamafile by passing the sha256 digest as the model filename. So if e.g. the `llama3:latest` GGUF file digest is `sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29`, you can run llamafile as follows:
```bash
cd ~/.ollama/models/blobs
llamafile -m sha256-00e1317cbf74d901080d7100f57580ba8dd8de57203072dc6f668324ba545f29
```
**Note** that Ollama's GGUF weights do not always work with llama.cpp (see e.g. [here](https://forums.developer.nvidia.com/t/nemotron-3-super-120b-on-gb10-llama-cpp-sm-121-build-ollama-gguf-incompatibility-fix/363459)),
and as llamafile relies on llama.cpp this trick might not always work for you.
================================================
FILE: docs/running_llamafile.md
================================================
You have just downloaded a llamafile from the [Example llamafiles](example_llamafiles.md)
section. Now what? Here are a few examples to get you started.
> **NOTE**
For the purpose of these examples, you can run any of the following either from a
pre-bundled llamafile or by calling the llamafile server executable and passing
it the corresponding model weights. For instance, the following two are equivalent:
```sh
llamafile -m Apertus-8B-Instruct-2509.gguf --temp ...
```
```sh
./Apertus-8B-Instruct-2509.llamafile --temp ...
```
## Running llamafile in CLI mode
If you add the `--cli` argument to a llamafile, you will run a CLI version
of the model that answers to whatever you provide as a prompt (via the `-p`
argument) and, for multimodal models, as in image (via the `--image` argument).
Here's how you can use the Apertus 8B model for prose composition:
```sh
./Apertus-8B-Instruct-2509.llamafile --cli -p 'Write a story about llamas'
```
Here's how you can use llamafile to describe a jpg/png/gif/bmp image with
a multimodal model (Qwen3.5, Ministral3, llava1.6 are all good candidates):
```sh
llamafile -ngl 9999 --temp 0 \
--cli
--image ~/Pictures/lemurs.jpg \
-m llava-v1.6-mistral-7b.Q4_K_M.gguf \
--mmproj mmproj-model-f16.gguf \
-p 'Describe this picture'
```
The weights above were taken from [here](https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/tree/main).
Alternatively, you can use a pre-bundled llamafile:
```sh
./Ministral-3-3B-Instruct-2512-Q4_K_M.llamafile -ngl 9999 \
--cli
--image ~/Pictures/lemurs.jpg \
-p 'Describe this picture'
```
Here's how you can use Qwen3.5 9B to summarize a Web page:
```sh
./Qwen3.5-9B-Q5_K_S.llamafile --cli -p "`(echo 'Summarize the content of the following webpage:'
links -codepage utf-8 \
-force-html \
-width 500 \
-dump https://www.poetryfoundation.org/poems/48860/the-raven |
sed 's/ */ /g')`"
```
## Running llamafile in chat mode
If you add the `--chat` argument to a llamafile, you will run it in chat mode.
Chat mode has different /commands available (type `/help` for the full list)
which include context management, file upload, and dumping of the conversation
to an output file.
## Running llamafile in server mode
If you add the `--server` argument to a llamafile, you will run it in server mode.
Here's an example of how to run llama.cpp's built-in HTTP server. The `--host`
parameter makes it reachable not just from your own computer, but also from
other machines that can reach it via network. The `--port` parameter can be
used to specify a different port from the default one (8080).
```sh
./llava-v1.6-mistral-7b-Q4_K_M.llamafile \
--server \
--host 0.0.0.0 \
--port 8081
```
If you want to serve a model to be used by an AI agent / agentic framework,
you should add the `--jinja` parameter and choose a context size which is
large enough (but still fits your memory). For instance:
```sh
./gpt-oss-20b-mxfp4.llamafile \
--server \
--host 0.0.0.0
--jinja
--ctx-size 64000
```
## Running llamafile in combined mode
Combined mode is the default for the last generation of llamafiles: when you
run them without specifying any of `--cli`, `--chat`, or `--server`, both
a server (running at ) and a chat in the terminal will
start simultaneously. You will then be able to e.g. run an OpenAI API endpoint
while you chat in the terminal, or use different chat simultaneously.
## llamafile 0.9.* examples
The following examples have not been tested with llamafile 0.10.* yet,
but we thought they were too cool not to preserve them!
If you are having issues testing these examples with the latest llamafiles,
you can try running them with an older release... And let us know if you want
them to be supported by the new build.
Here's an example of how to generate code for a libc function using the
llama.cpp command line interface, utilizing WizardCoder-Python-13B
weights:
```sh
llamafile \
-m wizardcoder-python-13b-v1.0.Q8_0.gguf \
--temp 0 -r '}\n' -r '```\n' \
-e -p '```c\nvoid *memcpy(void *dst, const void *src, size_t size) {\n'
```
Here's an example of how llamafile can be used as an interactive chatbot
that lets you query knowledge contained in training data:
```sh
llamafile -m llama-65b-Q5_K.gguf -p '
The following is a conversation between a Researcher and their helpful AI assistant Digital Athena which is a large language model trained on the sum of human knowledge.
Researcher: Good morning.
Digital Athena: How can I help you today?
Researcher:' --interactive --color --batch_size 1024 --ctx_size 4096 \
--keep -1 --temp 0 --mirostat 2 --in-prefix ' ' --interactive-first \
--in-suffix 'Digital Athena:' --reverse-prompt 'Researcher:'
```
It's possible to use BNF grammar to enforce the output is predictable
and safe to use in your shell script. The simplest grammar would be
`--grammar 'root ::= "yes" | "no"'` to force the LLM to only print to
standard output either `"yes\n"` or `"no\n"`. Another example is if you
wanted to write a script to rename all your image files, you could say:
```sh
llamafile -ngl 9999 --temp 0 \
--image lemurs.jpg \
-m llava-v1.5-7b-Q4_K.gguf \
--mmproj llava-v1.5-7b-mmproj-Q4_0.gguf \
--grammar 'root ::= [a-z]+ (" " [a-z]+)+' \
-e -p '### User: What do you see?\n### Assistant: ' \
--no-display-prompt 2>/dev/null |
sed -e's/ /_/g' -e's/$/.jpg/'
a_baby_monkey_on_the_back_of_a_mother.jpg
```
================================================
FILE: docs/skills/llamafile/SKILL.md
================================================
---
name: llamafile
description: This skill should be used when the user asks to "build llamafile", "rebuild llamafile", "run llamafile", "run llamafile tests", "debug llamafile", "set up llamafile", "update patches", "fix patch conflict", "update llama.cpp", "pull latest llama.cpp", "sync upstream llama.cpp", "reset submodules", "write a test for llamafile", "how does llamafile work", "llamafile architecture", or needs guidance on the llamafile build system, patch workflow, submodule integration, cosmocc toolchain, or development practices.
version: 0.1.2
---
# Llamafile Development Guide
Llamafile combines llama.cpp, whisper.cpp, and stable-diffusion.cpp with Cosmopolitan Libc to create single-file executables that run LLMs locally across Windows, macOS, Linux, and BSD without installation.
## Version Disambiguation
- **New llamafile** (or simply "llamafile"): The code in the `main` branch, used for releases >=0.10.0
- **Old/Classic llamafile**: The legacy code, used for releases until 0.9.3 (see commit 7e7d33c).
This guide covers the **new llamafile** project.
## Quick Reference
### Initial Setup
```sh
make setup
```
Immediately after cloning the repo (or after a reset done with `make reset-repo`), this command initializes git submodules and applies llamafile-specific patches.
### Building
Run `llamafile:build` to build all targets.
### Testing
Run `llamafile:check` to run the unit test suite.
### Cleaning
Run `llamafile:clean` to remove all build outputs.
### Reset Submodules
After `make setup`, submodules contain patches and are no longer in a clean state.
To reset them, run:
```sh
make reset-repo # Warning: removes all local changes
```
WARNING: this command removes all local changes. Do not run it without first generating patches from any modifications.
## Core Workflows
### Building from Scratch
To build llamafile from a fresh clone:
1. Clone the repository
2. Run `make setup` to initialize submodules and apply patches
3. Build with `llamafile:build`
Build outputs appear in `o/$(MODE)/` directory.
### Modifying Core Code
For changes to llamafile's own code (not submodules):
1. Edit files in `llamafile/` directory
2. Rebuild with `llamafile:build`
3. Run unit tests with `llamafile:check`
### Modifying Submodule Code
Submodules (llama.cpp, whisper.cpp, stable-diffusion.cpp) require a patch-based workflow:
1. Make changes directly in the submodule directory
2. Rebuild with `llamafile:build`
3. Run unit tests with `llamafile:check`
NOTE: never try to edit patches or generate them manually. This step is
done only after rebuild and tests (even manual ones) are successful. See
`development.md` for detailed patch workflow.
### Running Specific Tests
Tests use the `.runs` pattern in BUILD.mk files:
```makefile
o/$(MODE)/llamafile/json_test.runs
```
To run all tests: `llamafile:check`
## Key Concepts
### Cosmopolitan Toolchain
The project uses Cosmopolitan Libc (cosmocc) to create Actually Portable Executables (APE) - single files that run on multiple platforms without modification. Always use the `llamafile:build`, `llamafile:check`, and `llamafile:clean` commands (which use cosmocc's make), not system make.
### Patch System
Each submodule has a corresponding patches directory:
- `llama.cpp.patches/`
- `whisper.cpp.patches/`
- `stable-diffusion.cpp.patches/`
Patches include:
- **Modifications** (.patch files): Changes to upstream code
- **Additions** (llamafile-files/): New files for integration (BUILD.mk, utilities)
### Build System
- **build/config.mk**: Compiler and toolchain configuration
- **build/rules.mk**: Generic build patterns (.c → .o, archives, asset bundling)
- **BUILD.mk files**: Per-package build logic
Outputs: `o/$(MODE)/package/file.o`
### Multi-Architecture Support
Binaries include both x86_64 and aarch64 code paths with runtime CPU feature detection (AVX, AVX2, AVX-512, ARM NEON).
## Main Executables
After building, find binaries in `o/$(MODE)/`:
| Binary | Purpose |
|--------|---------|
| `llamafile/llamafile` | Main llamafile executable |
| `third_party/zipalign/zipalign` | Bundle assets into executables |
| `whisperfile/whisperfile` | Main whisperfile executable |
## Troubleshooting
### Build Fails After Submodule Update
Run `make setup` to reapply patches after any submodule changes.
### Submodule Has Uncommitted Changes
To reset a single submodule:
```sh
cd && git reset --hard && git clean -fdx
```
To reset all submodules:
```sh
make reset-repo
```
### Wrong Make Being Used
Ensure using the `llamafile:build` command (which uses cosmocc's make), not system make.
## Additional Resources
### Reference Files
For detailed information, consult:
- **`building.md`** - Complete build system documentation, toolchain details
- **`architecture.md`** - Repository structure, component overview
- **`development.md`** - Development workflow, patch management, submodule integration
- **`testing.md`** - Test patterns, running and writing tests
- **`update_llamacpp.md`** - Keeping llamafile updated with upstream llama.cpp
### Project Documentation
- **README.md** in repo: Project introduction
- **docs/** directory: User documentation (quickstart, installation, troubleshooting)
- **RELEASE.md**: Release process
- Most executables support `--help`
================================================
FILE: docs/skills/llamafile/architecture.md
================================================
# Llamafile Architecture
Repository structure and component overview.
## Project Overview
Llamafile creates single-file executables that run LLMs locally across Windows, macOS, Linux, and BSD without installation. It achieves this by:
1. Combining multiple inference engines (llama.cpp, whisper.cpp, stable-diffusion.cpp)
2. Using Cosmopolitan Libc for cross-platform portability
3. Bundling models and assets into Actually Portable Executables (APE)
## Repository Structure
```
llamafile/
├── llamafile/ # Core library
│ ├── server/ # HTTP server implementation
│ └── highlight/ # Syntax highlighting
├── llama.cpp/ # LLM inference (submodule)
│ ├── ggml/ # Low-level tensor ops
│ ├── src/ # Model implementations
│ ├── common/ # Utilities
│ └── tools/ # CLI applications
├── whisper.cpp/ # Speech-to-text (submodule)
├── stable-diffusion.cpp/ # Image generation (submodule)
├── localscore/ # Benchmarking tool
├── third_party/ # External dependencies
├── build/ # Build system
├── docs/ # User documentation
├── *.patches/ # Patch directories
└── o/ # Build outputs
```
## Core Components
### llamafile/ - Core Library
The heart of llamafile, containing:
- **tinyblas**: BLAS kernels for CUDA support without cublas and optimized CPU inference
- **GPU support**: Metal, CUDA and ROCm integration (dynamic loading)
- **Multiplatform optimizations**: CPU feature detection, runtime dispatch
- **TUI**: Chat interface running in the terminal
#### llamafile/highlight/
Syntax highlighting for code output in chat responses.
### llama.cpp/ - LLM Inference Engine
Git submodule providing:
- **ggml/**: Low-level tensor library
- Matrix operations
- Quantization support
- Backend abstraction (CPU, CUDA, Metal, etc.)
- **src/**: LLM implementations
- 100+ model architectures
- GGUF format handling
- KV cache management
- **common/**: Shared utilities
- Argument parsing
- Sampling algorithms
- Chat templates
- **tools/**: CLI applications
- main (inference)
- quantize (model quantization)
- imatrix (importance matrix)
- perplexity (model evaluation)
- llama-bench (benchmarking)
### whisper.cpp/ - Speech-to-Text
Git submodule for audio transcription:
- Whisper model implementation
- Audio processing utilities
- Multiple model sizes (tiny to large)
### stable-diffusion.cpp/ - Image Generation
Git submodule for image synthesis:
- Stable Diffusion implementation
- Image encoding/decoding
- Various SD model support
### third_party/ - Dependencies
External libraries:
- **double-conversion**: Float-to-string conversion
- **mbedtls**: TLS/SSL support
- **sqlite**: Database support
- **stb**: Image loading/saving
- **zipalign**: Tool to bundle llamafile executables with model weights and configurations
## Patch System
Each submodule has a corresponding patches directory:
```
llama.cpp.patches/
├── patches/ # .patch files modifying upstream
└── llamafile-files/ # New files for integration
whisper.cpp.patches/
├── patches/
└── llamafile-files/
stable-diffusion.cpp.patches/
├── patches/
└── llamafile-files/
```
### Patch Types
1. **Modifications** (`.patch` files):
- Changes to existing upstream code
- Applied with `git apply`
- Track upstream file changes
2. **Additions** (`llamafile-files/`):
- New files for llamafile integration
- Example: BUILD.mk for each submodule
- Utility scripts
- Additional documentation
3. **Deletions**:
- Removal of upstream build systems (CMakeLists.txt, Makefiles)
- Replaced by llamafile's unified build
- NOTE: deletions were common in the original llamafile but are no longer used,
as submodule code is pulled rather than redistributed
### Patch Application
`make setup` applies patches:
1. Initialize/update git submodules
2. Apply each .patch file in order
3. Copy llamafile-files/ contents into submodule
4. Remove conflicting build files
Finally, if cosmocc is not present, it is automatically downloaded at the end of `make setup`.
## Build Infrastructure
### build/ Directory
```
build/
├── config.mk # Toolchain configuration
├── rules.mk # Generic build patterns
├── download-cosmocc.sh # Toolchain download
├── llamafile-convert # Model conversion
└── llamafile-upgrade-engine # Engine updates
```
### BUILD.mk Pattern
Each component has a BUILD.mk defining:
```makefile
# Source files
COMPONENT_SRCS = \
component/file1.c \
component/file2.c
# Object files
COMPONENT_OBJS = $(COMPONENT_SRCS:%.c=o/$(MODE)/%.o)
# Library target
o/$(MODE)/component/libcomponent.a: $(COMPONENT_OBJS)
# Executable target
o/$(MODE)/component/binary: o/$(MODE)/component/libcomponent.a
# Test targets
o/$(MODE)/component/test.runs: o/$(MODE)/component/test
```
### Output Organization
```
o/$(MODE)/
├── package/
│ ├── file.o # Object files
│ ├── libpackage.a # Static libraries
│ └── binary # Executables
└── ...
```
## Key Technologies
### Actually Portable Executable (APE)
Cosmopolitan's executable format:
- Single file runs on Windows, macOS, Linux, BSD
- Contains x86_64 and aarch64 code
- Self-extracting when needed
- No installation required
### Asset Bundling
Files embedded into executables:
- Models (.gguf)
- Web assets (HTML, CSS, JS)
- Shared libraries (.so, .dll)
The `zipalign` tool handles bundling, and files are accessible via Cosmopolitan's VFS.
### Runtime CPU Dispatch
Binaries detect CPU features and select optimal code:
- x86_64: SSE, AVX, AVX2, AVX-512, FMA
- aarch64: NEON, SVE
This happens transparently at runtime, no user configuration needed.
### Dynamic GPU Loading
GPU support loads at runtime:
- CUDA: Loads from system or bundled .so/.dll
- ROCm: Similar dynamic loading
- Fallback to CPU if GPU unavailable
## Licensing
- **Llamafile project**: Apache 2.0
- **Llamafile changes to llama.cpp**: MIT (upstream compatibility)
- **Dependencies**: Retain original licenses
================================================
FILE: docs/skills/llamafile/building.md
================================================
# Building Llamafile
Complete guide to the llamafile build system and toolchain.
## Prerequisites
### Cosmopolitan Toolchain
Llamafile uses Cosmopolitan C/C++ compiler (cosmocc) to create Actually Portable Executables (APE). The toolchain
is downloaded automatically when `make setup` is called but can be fetched manually too with:
```sh
build/download-cosmocc.sh .cosmocc/4.0.2 4.0.2 85b8c37a406d862e656ad4ec14be9f6ce474c1b436b9615e91a55208aced3f44
```
Arguments:
1. Destination directory (`.cosmocc/4.0.2`)
2. Version (`4.0.2`)
3. SHA256 checksum for verification
### Git Submodules
Three main dependencies are git submodules:
- llama.cpp - LLM inference engine
- whisper.cpp - Speech-to-text engine
- stable-diffusion.cpp - Image generation engine
## Initial Setup
Before first build, initialize and configure dependencies:
```sh
make setup
```
This command:
1. Initializes git submodules (clones if needed)
2. Applies llamafile-specific patches from `.patches/` directories
3. Modifies submodules in-place for llamafile integration
**Important:** Run `make setup` after:
- Fresh clone
- Updating submodules
- Pulling changes that modify patch files
## Build Commands
### Full Build
```sh
.cosmocc/4.0.2/bin/make -j $(nproc) # or: llamafile:build
```
The `-j $(nproc)` flag enables parallel compilation (adjust based on CPU cores).
Adapt `nproc` to the OS where you are building, (e.g. `sysctl -n hw.physicalcpu` on mac)
**Critical:** Always use `.cosmocc/4.0.2/bin/make`, not system make. The cosmocc toolchain includes its own make with Cosmopolitan-specific behavior.
### Clean Build
Remove build outputs:
```sh
.cosmocc/4.0.2/bin/make clean # or: llamafile:clean
```
This removes the `o/` directory containing all compiled objects and binaries.
### Install compiled binaries
```sh
sudo .cosmocc/4.0.2/bin/make install PREFIX=/usr/local
```
Installs binaries and man pages.
## Build System Architecture
### Directory Structure
```
build/
├── config.mk # Compiler, flags, toolchain version
├── rules.mk # Generic build patterns
├── download-cosmocc.sh # Toolchain download script
├── llamafile-convert # Model conversion script
└── llamafile-upgrade-engine # Engine update script
```
### Configuration (build/config.mk)
Defines:
- Compiler paths (CC, CXX pointing to cosmocc)
- Compiler flags (optimization, warnings)
- Toolchain version
- Platform-specific settings
### Build Rules (build/rules.mk)
Generic patterns for:
- `.c` → `.o` compilation
- `.a` archive creation
- `.zip.o` asset bundling (embed files into executables)
### BUILD.mk Files
Each major component has a BUILD.mk file defining:
- Source files to compile
- Dependencies
- Build targets
- Test targets
The top-level Makefile includes all BUILD.mk files to orchestrate the build.
## Build Outputs
All outputs go to `o/$(MODE)/`:
```
o/
└── $(MODE)/
├── llamafile/
│ ├── llamafile # Main executable
│ ├── *.o # Object files
│ └── *.a # Static libraries
├── llama.cpp/
├── whisper.cpp/
├── stable-diffusion.cpp/
└── third_party/
└── zipalign/
└── zipalign # Asset bundling tool
```
## Multi-Architecture Support
The build system creates universal binaries supporting:
- x86_64 (Intel/AMD)
- aarch64 (ARM64)
Both architectures are compiled simultaneously and combined into single APE binaries.
### Runtime Dispatch
Binaries detect CPU features at runtime and select optimal code paths:
- AVX, AVX2, AVX-512 (x86_64)
- ARM NEON (aarch64)
## Asset Bundling
Files can be embedded into executables using the `.zip.o` pattern:
```makefile
o/$(MODE)/path/to/asset.zip.o: path/to/asset
```
The `zipalign` tool handles bundling. Embedded assets are accessible at runtime through the Cosmopolitan virtual filesystem.
## GPU Support
GPU acceleration (CUDA/ROCm) uses dynamic loading:
- Shared libraries (.so/.dll) are not linked at compile time
- Libraries are loaded at runtime if available
- Can be bundled into executables using zipalign
## Troubleshooting
### "make: command not found" or Wrong Make
Ensure using the cosmocc make:
```sh
# Wrong
make -j $(nproc)
# Correct
.cosmocc/4.0.2/bin/make -j $(nproc)
# Or use the command directly:
# llamafile:build
```
### Submodule Not Initialized
If build fails with missing files in llama.cpp/whisper.cpp/stable-diffusion.cpp:
```sh
make setup
```
### Stale Object Files
After significant changes, clean and rebuild:
```sh
.cosmocc/4.0.2/bin/make clean # or: llamafile:clean
.cosmocc/4.0.2/bin/make -j $(nproc) # or: llamafile:build
```
### Toolchain Checksum Mismatch
If `download-cosmocc.sh` fails verification, check:
1. Correct version specified
2. Correct checksum for that version
3. Network connectivity
================================================
FILE: docs/skills/llamafile/development.md
================================================
# Llamafile Development Workflow
Guide to modifying code, managing patches, and working with submodules.
## Development Overview
Llamafile development involves two distinct workflows:
1. **Core code changes**: Direct edits to root-level directories such as `llamafile/`, `whisperfile/`, etc.
2. **Submodule changes**: Patch-based modifications to `llama.cpp`, `whisper.cpp`, `stable-diffusion.cpp`
## Modifying Core Code
For changes which are not affecting submodules:
### Workflow
1. Edit files
2. Rebuild: `llamafile:build`
3. Test: `llamafile:check`
4. Commit changes normally with git
### Key Directories
```
llamafile/
├── server/ # HTTP server, API endpoints
├── highlight/ # Syntax highlighting
├── tinyblas/ # Optimized BLAS kernels
└── *.c, *.h # Core utilities
```
## Modifying Submodule Code
Submodules require a patch-based workflow because:
- Submodules point to specific upstream commits
- Direct commits in submodules would be lost
- Patches preserve modifications across submodule updates
### Understanding the Patch System
Each submodule has a patches directory. For instance, for `llama.cpp`:
```
llama.cpp.patches/
├── README.md # Patching info + list of all patches and their purpose
├── apply-patches.sh # Script to apply all patches to llama.cpp submodule
├── renames.sh # Script for file renames/moves (if any)
├── llamafile-files/ # Additional files to copy into llama.cpp
│ ├── BUILD.mk # Makefile for building llama.cpp with cosmocc
│ └── README.llamafile # License and modification notes
└── patches/ # Patch files for upstream sources
```
Patches are applied by `make setup`:
1. Submodule is reset to clean state
2. Each .patch file is applied in alphabetical order
3. Files from llamafile-files/ are copied into the submodule
### Making Changes to a Submodule
#### Step 1: Make Changes
Edit files directly in the submodule directory:
```sh
cd llama.cpp
# Make your changes
vim src/llama.cpp
```
#### Step 2: Generate Patches
Patches are usually generated after the code has been thoroughly tested and is
ready to commit. To avoid manual errors, use the script `tools/generate-patches.sh`
which automatically saves all new files and patches in the specified output directory.
```sh
cd llama.cpp
../tools/generate-patches.sh --output-dir ../llama.cpp.patches
```
After this operation, one can double check which files have been modified / added
via a `git diff`.
Naming convention:
- all patches have a `.patch` extension
- patch filenames reflect the file path with underscores replacing slashes (e.g., `common_arg.cpp.patch` for `common/arg.cpp`).
#### Step 3: Verify Patches
Once you are sure all patches have been saved, reset and reapply to verify:
```sh
# Reset everything
make reset-repo
# Reapply patches
make setup
# Rebuild and test
# llamafile:build
# llamafile:check
```
### Adding New Files to Submodules
For new files (not modifications), use llamafile-files/:
```sh
# Create directory structure matching submodule
mkdir -p llama.cpp.patches/llamafile-files/src/
# Add your new file
cp new-utility.cpp llama.cpp.patches/llamafile-files/src/
```
The file will be copied into the submodule during `make setup`.
### Updating BUILD.mk for Submodules
Each submodule needs a BUILD.mk in llamafile-files/:
```makefile
# llama.cpp.patches/llamafile-files/BUILD.mk
LLAMA_SRCS = \
llama.cpp/src/llama.cpp \
llama.cpp/src/new-file.cpp # Add new files here
LLAMA_OBJS = $(LLAMA_SRCS:%.cpp=o/$(MODE)/%.o)
# ... rest of build rules
```
## Submodule Management
### Resetting a Single Submodule
To discard changes in one submodule:
```sh
cd llama.cpp
git reset --hard
git clean -fdx
```
Then reapply patches:
```sh
cd ..
make setup
```
### Resetting All Submodules
To reset everything (warning: loses all local changes):
```sh
make reset-repo
make setup
```
## Git Workflow
### Committing Changes
For core code changes:
```sh
git add llamafile/modified-file.c
git commit -m "Fix: description"
```
For submodule patches:
```sh
git add llama.cpp.patches/patches/new-patch.patch
git commit -m "llama.cpp: Add feature X"
```
### Pull Request Checklist
Before submitting changes:
1. [ ] Patches apply cleanly from fresh clone
2. [ ] Build succeeds: `llamafile:build`
3. [ ] Tests pass: `llamafile:check`
4. [ ] Patches are focused and documented
5. [ ] BUILD.mk updated if adding new files
## Debugging Tips
### Viewing Applied Patches
To see what patches are currently applied:
```sh
cd llama.cpp
git log --oneline HEAD...$(git rev-parse --short @{u} 2>/dev/null || echo "origin/master")
```
### Checking Submodule State
```sh
git submodule status
```
Output shows:
- `-` : Not initialized
- `+` : Different commit than recorded
- ` ` : Clean, matches recorded commit
### Finding Which Patch Changed a File
```sh
grep -l "filename" llama.cpp.patches/patches/*.patch
```
================================================
FILE: docs/skills/llamafile/testing.md
================================================
# Testing Llamafile
Guide to running and writing tests.
## Running Tests
### Manually testing the executable
#### TUI mode
Run a newly compiled llamafile executable this way:
```sh
./o/llamafile/llamafile --model gguf_model.gguf
```
where `gguf_model.gguf` is a file holding a model's weights in GGUF format. For
instance:
```sh
./o/llamafile/llamafile --model ~/llamafiles/gpt-oss-20b-MXFP4.gguf
```
#### Server mode
Run a newly compiled llamafile executable this way:
```sh
./o/llamafile/llamafile --model gguf_model.gguf --server
```
#### Verbose mode
When debugging, the `--verbose` argument is particularly useful as it adds
more verbose logging.
#### Where can I find GGUF model weights files?
Look for available gguf files in `~/llamafiles/`. Depending on the kind of
test, prefer:
- `gpt-oss-20b-MXFP4.gguf` for agentic tests
- `Ministral-3-3B-Instruct-2512-Q4_K_M.gguf` for multimodal tests
(also look for corresponding `mmproj` projector weights or ask for them)
- `Qwen3-0.6B-Q8_0.gguf` for any other tests
### Run All Unit Tests
Run `llamafile:check` to run all unit tests from the test suite.
### Run Integration Tests
```sh
./tests/integration/run_tests.sh --executable model_name.llamafile
```
- executable can be a pre-bundled llamafile or just the server executable
- if running the server executable, `--model` (and `--mmproj` for multimodal models) can be specified too
- different tests are run to verify the model/server capabilities
- more information and a user manual are available in `tests/integration/README.md`
### Run Specific Test
Tests are defined as `.runs` targets in BUILD.mk:
```sh
.cosmocc/4.0.2/bin/make o/$(MODE)/llamafile/json_test.runs # run a specific test target
```
Replace `$(MODE)` with the actual mode (e.g., `opt`, `dbg`).
## Test System Overview
### Test Pattern
Tests in llamafile use the `.runs` suffix convention:
```makefile
# In build/rules.mk
%.runs: %
$<
@touch $@
# In tests/BUILD.mk
.PHONY: o/$(MODE)/tests
o/$(MODE)/tests: \
o/$(MODE)/tests/extract_data_uris_test.runs
```
The `.runs` file is a timestamp marker indicating the test passed. The build system:
1. Compiles the test binary
2. Executes it
3. Creates `.runs` file if successful
### Test Dependencies
Tests should be run when:
- Their source changes
- Dependencies change
- `.runs` file is missing
The `llamafile:check` command depends on all `.runs` files, ensuring all tests run.
## Test Locations
### Submodule Tests
Each submodule may have its own tests:
```
llama.cpp/
└── tests/ # llama.cpp test suite
whisper.cpp/
└── tests/ # whisper.cpp tests
```
These tests are currently not run (as they are assumed valid when pulling from
an approved commit), but future plans include introducing them to verify the
cosmo build has the same behavior as the native one.
### llamafile Tests
These tests are saved in:
```
tests/
└── sgemm
└── *_test.c # Optimized CPU kernels tests
...
```
## Writing Tests
### Basic Test Structure
```c
// myfeature_test.c
#include "myfeature.h"
#include
#include
void test_basic_functionality(void) {
// Arrange
int input = 42;
// Act
int result = my_function(input);
// Assert
assert(result == expected_value);
}
void test_edge_case(void) {
assert(my_function(0) == 0);
assert(my_function(-1) == handle_negative());
}
int main(void) {
test_basic_functionality();
test_edge_case();
printf("All tests passed!\n");
return 0;
}
```
### Adding to BUILD.mk
- Tests for a new feature are usually added in a separate directory under `tests`.
- Each directory holds a `BUILD.mk` file for specific dependencies and local tests
building.
- The `tests/BUILD.mk` file includes build files from each subdirectory and adds
phony targets for them. Refer to the current version of this file for an example.
- Test files which are manual (i.e. not unit or integration tests, that are used
as exemplifications of issues or performance comparisons) are added to the build
files of their respective directories. They are not added as `.runs` targets to
the `tests/BUILD.mk` file, thus they need to be manually compiled and run.
## Debugging Failed Tests
### Running Single Test Manually
```sh
# Build a specific test
.cosmocc/4.0.2/bin/make o//tests/extract_data_uris_test
# Run directly
./o/tests/extract_data_uris_test
```
### Debug Build
For debugging, use debug mode:
```sh
.cosmocc/4.0.2/bin/make MODE=dbg o/dbg/llamafile/json_test
```
Debug builds include:
- Debug symbols
- Assertions enabled
- No optimization
### Verbose Output
Add printf/fprintf statements for debugging:
```c
#ifdef DEBUG
fprintf(stderr, "Debug: value = %d\n", value);
#endif
```
## Test Categories
### Unit Tests
Test individual functions/modules, e.g.:
- JSON parsing
- String utilities
- Data structures
### Integration Tests
Test component interactions, e.g.:
- Server endpoints
- Model loading
- API responses
### Performance Tests
Benchmark critical paths:
- Inference speed
- Memory usage
- Startup time
## Continuous Integration
Tests should run automatically on:
- Pull requests
- Commits to main branches
### Local CI Simulation
Before pushing, run full test suite:
```sh
make reset-repo
make setup
# llamafile:clean
# llamafile:build
# llamafile:check
```
## Test Coverage
### Identifying Untested Code
Review critical paths:
- Error handling
- Edge cases
- Platform-specific code
### Adding Coverage
When adding features:
1. Write tests for happy path
2. Write tests for error cases
3. Write tests for edge cases
4. Update BUILD.mk
### Priority Areas
Focus testing on:
- Public API functions
- Security-sensitive code
- Complex algorithms
- Cross-platform behavior
================================================
FILE: docs/skills/llamafile/update_llamacpp.md
================================================
# Keeping llamafile updated with upstream llama.cpp
llamafile relies on llama.cpp for many of its functionalities. Keeping it up-to-date
with the latest version upstream is generally a good practice, as it brings both
bugfixes and support for recent models and features.
This document describes the steps to keep llamafile updated with upstream.
## Step 1: Update the submodule
The output of this step is a new branch with the submodule checked out
at its latest commit id.
```bash
# make sure the submodule is initialized
git submodule update --init llama.cpp
# check current commit
cd llama.cpp
OLD_ID=`git rev-parse HEAD`
# checkout latest commit
git fetch origin master
COMMIT_ID=`git rev-parse origin/master`
git checkout origin/master
# create new branch for merging
cd ..
git checkout -b llamacpp_$COMMIT_ID
git add llama.cpp
git commit -m "Update llama.cpp submodule to $COMMIT_ID"
# this branch becomes the starting point of a new PR
```
## Step 2: Verify and update patches
Review the patches in `llama.cpp.patches/patches/` as follows:
- As a first pass, run `tools/check_patches.sh` to check if applying any of the
patches causes an error. Directly apply all and only the patches you see working.
- Any patch that has conflicts due to upstream changes has to be inspected
in detail and updated. Useful references are:
- the file the patch refers to
- the patch description in `llama.cpp.patches/README.md`
- To update patches that have conflicts, first edit the new llama.cpp code
in-place, then call the `generate_patches` script (more info in `development.md`).
At the end of this step, your patches should all work (i.e. it should be possible
to apply them without conflicts). Note that you might still not have a working build,
but you should at least be able to run `make setup` without any errors.
## Step 3: Update BUILD.mk dependencies
- Review `llama.cpp/BUILD.mk` for any new source files or dependencies added upstream
- Remove references to any deleted source files
- Ensure all new dependencies are properly included
- Check the upstream changes for new/removed files in `llama.cpp/src/`, `llama.cpp/common/`, `llama.cpp/ggml/`, `llama.cpp/tools`, (all the relevant subdirectories you'd find in `llama.cpp/BUILD.mk`)
Useful references:
- check changes in each dir
```bash
cd llama.cpp
git diff --stat --summary $OLD_ID -- src/
```
- the `llama.cpp/CMakeLists.txt` file, showing what files are included in the latest llama.cpp build
At the end of this step, the `llama.cpp/BUILD.mk` file should include all the
updated dependencies to build, at least, the `o//llama.cpp/server/llama-server`
target.
## Step 4: Update llamafile integration code
- Check if the llamafile code that calls llama.cpp server/main needs updates
- Review `llamafile/` for any API changes in llama.cpp that need to be reflected
- Pay attention to changes in `llama.cpp/include/` for API modifications
At the end of this step, you should be able to build all targets in this repo,
i.e. the following verification step should return a successful result
## Verification
After making changes, verify the build works:
```sh
# llamafile:clean
# llamafile:build
```
## Reference
- **Upstream changes:** https://github.com/ggerganov/llama.cpp/compare/$OLD_ID...$COMMIT_ID
- **Example PR with similar updates:** https://github.com/mozilla-ai/llamafile/pull/847
================================================
FILE: docs/source_installation.md
================================================
Developing on llamafile requires a modern version of the GNU `make`
command (called `gmake` on some systems), `sha256sum` (otherwise `cc`
will be used to build it), `wget` (or `curl`), and `unzip` available at
[https://cosmo.zip/pub/cosmos/bin/](https://cosmo.zip/pub/cosmos/bin/).
Windows users need [cosmos bash](https://justine.lol/cosmo3/) shell too.
### Dependency Setup
Some dependencies are managed as git submodules with llamafile-specific patches.
Before building, you need to initialize and configure these dependencies:
```sh
make setup
```
The patches modify code in the git submodules. These modifications remain as local
changes in the submodule working directories.
`make setup` also downloads the [Cosmopolitan](https://github.com/jart/cosmopolitan/)
C compiler for you, saving it under the `.cosmocc` directory.
### Building
```sh
.cosmocc/4.0.2/bin/make -j8
sudo .cosmocc/4.0.2/bin/make install PREFIX=/usr/local
```
Build outputs will appear in the `./o` directory, e.g.:
- `./o/llama.cpp/server/llama-server`: the original llama.cpp inference server, compiled with cosmocc
- `o/llamafile/llamafile`: the llamafile executable, running both as a TUI and a server (with the `--server` flag)
- `o/third_party/zipalign/zipalign`: the zipalign tool used to bundle llamafile executable, model weights, and default args into llamafiles
> **NOTE**: Calling `make` should automatically run cosmocc's make when required.
If that does not happen for any reason, you can still directly run the one provided
by cosmocc: `.cosmocc/4.0.2/bin/make`.
### Testing
Optionally, you can verify the build with:
```sh
make check
```
This runs our unit tests to ensure everything is built correctly.
Some integration tests in `tests/integration` are available to test llamafile
with real models. Check the [README](/tests/integration/README.md) to learn how to run them.
### Running llamafile
After the build, you can run llamafile as:
```sh
./o/llamafile/llamafile --model
```
or just the llama.cpp server as:
```sh
./o/llamafile/llamafile --model --server
```
or the llamafile CLI command as:
```sh
./o/llamafile/llamafile --model --cli -p "Hello world"
```
## Documentation
There's a manual page for each of the llamafile programs installed when you
run `sudo make install`. Most commands will also display that information when
passing the `--help` flag.
================================================
FILE: docs/support.md
================================================
## Supported OSes
llamafile supports the following operating systems, which require a minimum
stock install:
- Linux 2.6.18+ (i.e. every distro since RHEL5 c. 2007)
- Darwin (macOS) 23.1.0+ [1] (GPU is only supported on ARM64)
- Windows 10+ (AMD64 only)
- FreeBSD 13+
- NetBSD 9.2+ (AMD64 only)
- OpenBSD 7.0 to 7.4 (AMD64 only)
On Windows, llamafile runs as a native portable executable. On UNIX
systems, llamafile extracts a small loader program named `ape` to
`$TMPDIR/.ape-1.10` which is used to map your model into memory.
[1] Darwin kernel versions 15.6+ *should* be supported, but we currently
have no way of testing that.
## Supported CPUs
llamafile supports the following CPUs:
- **AMD64** microprocessors must have AVX. Otherwise llamafile will
print an error and refuse to run. This means that if you have an Intel
CPU, it needs to be Intel Core or newer (circa 2006+), and if you have
an AMD CPU, then it needs to be K8 or newer (circa 2003+). Support for
AVX512, AVX2, FMA, F16C, and VNNI are conditionally enabled at runtime
if you have a newer CPU. For example, Zen4 has very good AVX512 that
can speed up BF16 llamafiles.
- **ARM64** microprocessors must have ARMv8a+. This means everything
from Apple Silicon to 64-bit Raspberry Pis will work, provided your
weights fit into memory.
## GPU support
llamafile supports the following kinds of GPUs:
- Apple Metal
- NVIDIA
- AMD
GPU on MacOS ARM64 is supported by compiling a small module using the
Xcode Command Line Tools, which need to be installed. This is a one time
cost that happens the first time you run your llamafile. The DSO built
by llamafile is stored in `$TMPDIR/.llamafile` or `$HOME/.llamafile`.
Offloading to GPU is enabled by default when a Metal GPU is present.
This can be disabled by passing `-ngl 0` or `--gpu disable` to force
llamafile to perform CPU inference.
Owners of NVIDIA and AMD graphics cards need to pass the `-ngl 999` flag
to enable maximum offloading. If multiple GPUs are present then the work
will be divided evenly among them by default, so you can load larger
models. Multiple GPU support may be broken on AMD Radeon systems. If
that happens to you, then use `export HIP_VISIBLE_DEVICES=0` which
forces llamafile to only use the first GPU.
Windows users are encouraged to use our release binaries, because they
contain prebuilt DLLs for both NVIDIA and AMD graphics cards, which only
depend on the graphics driver being installed. If llamafile detects that
NVIDIA's CUDA SDK or AMD's ROCm HIP SDK are installed, then llamafile
will try to build a faster DLL that uses cuBLAS or rocBLAS. In order for
llamafile to successfully build a cuBLAS module, it needs to be run on
the x64 MSVC command prompt. You can use CUDA via WSL by enabling
[Nvidia CUDA on
WSL](https://learn.microsoft.com/en-us/windows/ai/directml/gpu-cuda-in-wsl)
and running your llamafiles inside of WSL. Using WSL has the added
benefit of letting you run llamafiles greater than 4GB on Windows.
On Linux, NVIDIA users will need to install the CUDA SDK (ideally using
the shell script installer) and ROCm users need to install the HIP SDK.
They're detected by looking to see if `nvcc` or `hipcc` are on the PATH.
If you have both an AMD GPU *and* an NVIDIA GPU in your machine, then
you may need to qualify which one you want used, by passing either
`--gpu amd` or `--gpu nvidia`.
In the event that GPU support couldn't be compiled and dynamically
linked on the fly for any reason, llamafile will fall back to CPU
inference.
**NOTE** that the 0.10.0 build of llamafile has not been tested on all
GPUs/platforms yet, so we welcome your feedback both whether there are
any issues or if everything runs smoothly on your specific setup!
================================================
FILE: docs/technical_details.md
================================================
Here is a succinct overview of the tricks we used to create the fattest
executable format ever. The long story short is llamafile is a shell
script that launches itself and runs inference on embedded weights in
milliseconds without needing to be copied or installed. What makes that
possible is mmap(). Both the llama.cpp executable and the weights are
concatenated onto the shell script. A tiny loader program is then
extracted by the shell script, which maps the executable into memory.
The llama.cpp executable then opens the shell script again as a file,
and calls mmap() again to pull the weights into memory and make them
directly accessible to both the CPU and GPU.
### ZIP weights embedding
The trick to embedding weights inside llama.cpp executables is to ensure
the local file is aligned on a page size boundary. That way, assuming
the zip file is uncompressed, once it's mmap()'d into memory we can pass
pointers directly to GPUs like Apple Metal, which require that data be
page size aligned. Since no existing ZIP archiving tool has an alignment
flag, we had to write about [500 lines of code](https://github.com/jart/zipalign/blob/main/zipalign.c) to
insert the ZIP files ourselves. However, once there, every existing ZIP
program should be able to read them, provided they support ZIP64. This
makes the weights much more easily accessible than they otherwise would
have been, had we invented our own file format for concatenated files.
### Microarchitectural portability
On Intel and AMD microprocessors, llama.cpp spends most of its time in
the matmul quants, which are usually written thrice for SSSE3, AVX, and
AVX2. llamafile pulls each of these functions out into a separate file
that can be `#include`ed multiple times, with varying
`__attribute__((__target__("arch")))` function attributes. Then, a
wrapper function is added which uses Cosmopolitan's `X86_HAVE(FOO)`
feature to runtime dispatch to the appropriate implementation.
### Architecture portability
llamafile solves architecture portability by building llama.cpp twice:
once for AMD64 and again for ARM64. It then wraps them with a shell
script which has an MZ prefix. On Windows, it'll run as a native binary.
On Linux, it'll extract a small 8kb executable called [APE
Loader](https://github.com/jart/cosmopolitan/blob/master/ape/loader.c)
to `${TMPDIR:-${HOME:-.}}/.ape` that'll map the binary portions of the
shell script into memory. It's possible to avoid this process by running
the
[`assimilate`](https://github.com/jart/cosmopolitan/blob/master/tool/build/assimilate.c)
program that comes included with the `cosmocc` compiler. What the
`assimilate` program does is turn the shell script executable into
the host platform's native executable format. This guarantees a fallback
path exists for traditional release processes when it's needed.
### GPU support
Cosmopolitan Libc uses static linking, since that's the only way to get
the same executable to run on six OSes. This presents a challenge for
llama.cpp, because it's not possible to statically link GPU support. The
way we solve that is by checking if a compiler is installed on the host
system. For Apple, that would be Xcode, and for other platforms, that
would be `nvcc`. llama.cpp has a single file implementation of each GPU
module, named `ggml-metal.m` (Objective C) and `ggml-cuda.cu` (Nvidia
C). llamafile embeds those source files within the zip archive and asks
the platform compiler to build them at runtime, targeting the native GPU
microarchitecture. If it works, then it's linked with platform C library
dlopen() implementation. See [llamafile/cuda.c](https://github.com/mozilla-ai/llamafile/blob/HEAD/llamafile/cuda.c) and
[llamafile/metal.c](https://github.com/mozilla-ai/llamafile/blob/HEAD/llamafile/metal.c).
In order to use the platform-specific dlopen() function, we need to ask
the platform-specific compiler to build a small executable that exposes
these interfaces. On ELF platforms, Cosmopolitan Libc maps this helper
executable into memory along with the platform's ELF interpreter. The
platform C library then takes care of linking all the GPU libraries, and
then runs the helper program which longjmp()'s back into Cosmopolitan.
The executable program is now in a weird hybrid state where two separate
C libraries exist which have different ABIs. For example, thread local
storage works differently on each operating system, and programs will
crash if the TLS register doesn't point to the appropriate memory. The
way Cosmopolitan Libc solves that on AMD is by using SSE to recompile
the executable at runtime to change `%fs` register accesses into `%gs`
which takes a millisecond. On ARM, Cosmo uses the `x28` register for TLS
which can be made safe by passing the `-ffixed-x28` flag when compiling
GPU modules. Lastly, llamafile uses the `__ms_abi__` attribute so that
function pointers passed between the application and GPU modules conform
to the Windows calling convention. Amazingly enough, every compiler we
tested, including nvcc on Linux and even Objective-C on MacOS, all
support compiling WIN32 style functions, thus ensuring your llamafile
will be able to talk to Windows drivers, when it's run on Windows,
without needing to be recompiled as a separate file for Windows. See
[cosmopolitan/dlopen.c](https://github.com/jart/cosmopolitan/blob/master/libc/dlopen/dlopen.c)
for further details.
================================================
FILE: docs/troubleshooting.md
================================================
## Gotchas and troubleshooting
On any platform, if your llamafile process is immediately killed, check
if you have CrowdStrike and then ask to be whitelisted.
### Mac
On macOS with Apple Silicon you need to have Xcode Command Line Tools
installed for llamafile to be able to bootstrap itself.
If you use zsh and have trouble running llamafile, try saying `sh -c
./llamafile`. This is due to a bug that was fixed in zsh 5.9+. The same
is the case for Python `subprocess`, old versions of Fish, etc.
#### Mac error "... cannot be opened because the developer cannot be verified"
1. Immediately launch System Settings, then go to Privacy & Security. llamafile should be listed at the bottom, with a button to Allow.
2. If not, then change your command in the Terminal to be `sudo spctl --master-disable; [llama launch command]; sudo spctl --master-enable`. This is because `--master-disable` disables _all_ checking, so you need to turn it back on after quitting llama.
### Linux
On some Linux systems, you might get errors relating to `run-detectors`
or WINE. This is due to `binfmt_misc` registrations. You can fix that by
adding an additional registration for the APE file format llamafile
uses:
```sh
sudo wget -O /usr/bin/ape https://cosmo.zip/pub/cosmos/bin/ape-$(uname -m).elf
sudo chmod +x /usr/bin/ape
sudo sh -c "echo ':APE:M::MZqFpD::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/register"
sudo sh -c "echo ':APE-jart:M::jartsr::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/register"
```
### Windows
As mentioned above, on Windows you may need to rename your llamafile by
adding `.exe` to the filename.
Also as mentioned above, Windows also has a maximum file size limit of 4GB
for executables. The LLaVA server executable above is just 30MB shy of
that limit, so it'll work on Windows, but with larger models like
WizardCoder 13B, you need to store the weights in a separate file. An
example is provided above; see "Using llamafile with external weights."
On WSL, there are many possible gotchas. One thing that helps solve them
completely is this:
```
[Unit]
Description=cosmopolitan APE binfmt service
After=wsl-binfmt.service
[Service]
Type=oneshot
ExecStart=/bin/sh -c "echo ':APE:M::MZqFpD::/usr/bin/ape:' >/proc/sys/fs/binfmt_misc/register"
[Install]
WantedBy=multi-user.target
```
Put that in `/etc/systemd/system/cosmo-binfmt.service`.
Ensure that the APE loader is installed to `/usr/bin/ape`:
```sh
sudo wget -O /usr/bin/ape https://cosmo.zip/pub/cosmos/bin/ape-$(uname -m).elf
sudo chmod +x /usr/bin/ape
```
Then run `sudo systemctl enable --now cosmo-binfmt`.
Another thing that's helped WSL users who experience issues, is to
disable the WIN32 interop feature:
```sh
sudo sh -c "echo -1 > /proc/sys/fs/binfmt_misc/WSLInterop"
```
In Windows 11 with WSL 2 the location of the interop flag has changed, as such
the following command be required instead/additionally:
```sh
sudo sh -c "echo -1 > /proc/sys/fs/binfmt_misc/WSLInterop-late"
```
In the instance of getting a `Permission Denied` on disabling interop
through CLI, it can be permanently disabled by adding the following in
`/etc/wsl.conf`
```sh
[interop]
enabled=false
```
================================================
FILE: docs/whisperfile/getting-started.md
================================================
# Getting Started with Whisperfile
This tutorial will explain how to turn speech from audio files into plain text, using the whisperfile software and OpenAI's whisper model.
## (0) Setup the repo
```bash
git clone https://github.com/mozilla-ai/llamafile.git
cd llamafile
# initialise all submodules - this step is required,
# as the submodules need to be pulled and patched first!
make setup
```
## (1) Download Model
First, you need to obtain the model weights. For this tutorial, we'll use the tiny quantized model, since
it is the smallest and fastest to get started with and works reasonably well. The transcribed output is readable, even though it may misspell or misunderstand some words.
```bash
curl -L -o models/whisper-tiny.en-q5_1.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin
```
## (2) Build Software
Now build the whisperfile software from source.
```bash
.cosmocc/4.0.2/bin/make -j8 o//whisperfile
```
## (3) Run Program
Now that the software is compiled, here's an example of how to turn speech into text. Included in this repository is a .wav file holding a short clip of John F. Kennedy speaking. You can transcribe it using:
```bash
o//whisperfile/whisperfile -m models/whisper-tiny.en-q5_1.bin whisperfile/jfk.wav --no-prints
```
The `--no-prints` is optional. It's helpful in avoiding a lot of verbose logging and statistical information from being printed, which is useful when writing shell scripts.
## Supported Audio Formats
Whisperfile prefers that the input file be a 16khz .wav file with 16-bit signed linear samples that's stereo or mono. Otherwise it'll attempt to convert your audiofile automatically using an internal library. The MP3,
FLAC, and Ogg Vorbis formats are supported across platforms.
For example, here's an audio recording of a famous poem in MP3 format:
```bash
curl -LO https://archive.org/download/raven/raven_poe_64kb.mp3
o//whisperfile/whisperfile -m models/whisper-tiny.en-q5_1.bin -f raven_poe_64kb.mp3 -pc
```
Here we passed the `-pc` flag to get color-coded terminal output which communicates the confidence of transcription.
## Higher Quality Models
The tiny model may get some words wrong. For example, it might think
"quoth" is "quof". You can solve that using the medium model, which
enables whisperfile to decode The Raven perfectly. However it's slower.
```bash
curl -LO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin
o//whisperfile/whisperfile -m ggml-medium.en.bin -f raven_poe_64kb.mp3 --no-prints
```
Lastly, there's the large model, which is the best, but also slowest.
```bash
curl -L -o models/whisper-large-v3.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin
o//whisperfile/whisperfile -m models/whisper-large-v3.bin -f raven_poe_64kb.mp3 --no-prints
```
> [!NOTE]
> Here are how different model sizes compared in terms of size and performance:
>
> | Model | Download Size | Speed | Accuracy |
> |-------|--------------|-------|----------|
> | tiny | ~31 MB | fastest | good |
> | medium | ~1.5 GB | moderate | better |
> | large | ~3.1 GB | slowest | best |
>
> See [Higher Quality Models](#higher-quality-models) for download instructions.
## Installation
If you like whisperfile, you can also install it as a systemwide command by the llamafile project.
```bash
.cosmocc/4.0.2/bin/make -j8
sudo make install
```
================================================
FILE: docs/whisperfile/gpu.md
================================================
# Using Whisperfile with GPUs
GPU acceleration is most beneficial for the medium and large models. The
tiny model is already fast on CPU, so the speedup there is minimal.
Pass `--gpu auto` to let whisperfile detect and use the best available GPU
on your system. If no supported GPU is found, it falls back to CPU silently:
```bash
whisperfile -m models/ggml-medium.en.bin -f audio.wav --gpu auto
```
You can also target a specific backend:
- `--gpu apple` — Apple Metal (macOS, works on Apple Silicon and AMD GPUs)
- `--gpu nvidia` — NVIDIA CUDA (requires CUDA Toolkit to be installed)
- `--gpu amd` — AMD ROCm (requires ROCm to be installed on Linux)
To disable GPU acceleration entirely:
```bash
whisperfile -m models/ggml-medium.en.bin -f audio.wav --no-gpu
```
## Troubleshooting
**`ggml_backend_load_best: search path does not exist` warnings**
These are benign. They appear when whisperfile searches for GPU backend
libraries and doesn't find them — usually because no GPU is present or
configured. Transcription will continue on CPU. To suppress them, redirect
stderr:
```bash
whisperfile -m models/ggml-medium.en.bin -f audio.wav 2>/dev/null
```
================================================
FILE: docs/whisperfile/index.md
================================================
# Whisperfile
Whisperfile is a high-performance speech-to-text tool built on
[whisper.cpp](https://github.com/ggerganov/whisper.cpp) by Georgi
Gerganov, et al., and [OpenAI's Whisper](https://github.com/openai/whisper)
model weights.
Whisperfile bundles the binary and model weights into a **single
self-contained executable** that runs on Linux, macOS, and Windows without
installation.
## Quick Start
```sh
# transcribe a local audio file
whisperfile -m whisper-tiny.en-q5_1.bin audio.wav
# translate non-English speech to English
whisperfile -m ggml-medium-q5_0.bin -f audio.ogg --translate
# start the HTTP server
whisper-server -m whisper-tiny.en-q5_1.bin --port 8080
```
## Features
- Transcribes WAV, MP3, FLAC, and Ogg Vorbis audio
- GPU acceleration via Apple Metal, NVIDIA CUDA, and AMD ROCm
- Translates speech from any language into English
- HTTP server with a REST API for remote transcription
- Pack the binary and model weights into a single portable executable
## Documentation
- [Getting Started](getting-started.md)
- [Packaging](packaging.md)
- [Using GPUs](gpu.md)
- [Speech Translation](translate.md)
- [Server](server.md)
================================================
FILE: docs/whisperfile/packaging.md
================================================
# How to make a Whisperfile
Whisperfile is designed to be a single-file solution for speech-to-text.
This tutorial will explain how you can merge the whisperfile executable
and OpenAI's model weights into a unified executable.
We'll be using Cosmopolitan Libc's "ZipOS" read-only filesystem to achieve
this. Because whisperfile executables are valid ZIP files at the same time,
you can embed model weights directly inside the binary, and the runtime
will expose them under the `/zip/...` path prefix. We'll also
use the `.args` file convention to bake in default arguments so users don't
need to pass flags manually.
## Prerequisites
First, build the `zipalign` tool, which is used to embed files into the
executable without breaking its ZIP structure:
```bash
.cosmocc/4.0.2/bin/make -j8 o//third_party/zipalign
```
Next, either obtain a prebuilt `whisperfile` executable from the
[GitHub releases page](https://github.com/mozilla-ai/llamafile/releases),
or build one from source:
```bash
.cosmocc/4.0.2/bin/make -j8 o//whisperfile
# copy it with a more specific name
cp o//whisperfile/whisperfile whisper-tiny
```
## Instructions
Download the model weights you want to bundle. For this tutorial we'll use
the tiny q5\_1 quantized weights:
```bash
curl -LO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en-q5_1.bin
```
Embed the weights inside your whisperfile. The `-0` flag disables PKZIP
DEFLATE compression, which isn't beneficial for binary weights files:
```bash
o//third_party/zipalign/zipalign -0 whisper-tiny ggml-tiny.en-q5_1.bin
```
Your weights are now embedded. You can verify with `unzip -vl whisper-tiny`.
Cosmopolitan Libc exposes embedded files under the synthetic `/zip/...`
directory, so a file named `ggml-tiny.en-q5_1.bin` is accessible at
`/zip/ggml-tiny.en-q5_1.bin`:
```bash
./whisper-tiny -m /zip/ggml-tiny.en-q5_1.bin -f whisper.cpp/samples/jfk.wav
```
(`jfk.wav` is a sample audio clip included in the repository.)
It's now safe to delete the original weights file:
```bash
rm -f ggml-tiny.en-q5_1.bin
```
To avoid passing `-m /zip/ggml-tiny.en-q5_1.bin` every time, create a
`.args` file that specifies default arguments. Each argument goes on its
own line — no shell quoting needed:
```text
-m
/zip/ggml-tiny.en-q5_1.bin
...
```
The `...` at the end is a special token that gets replaced with any
additional arguments the user passes at runtime.
Embed the `.args` file into your whisperfile:
```bash
o//third_party/zipalign/zipalign whisper-tiny .args
rm -f .args
```
You now have a self-contained whisperfile. Run it with just an audio file:
```bash
./whisper-tiny -f whisper.cpp/samples/jfk.wav
```
================================================
FILE: docs/whisperfile/server.md
================================================
# whisper-server HTTP API
The whisper-server provides an HTTP API for speech-to-text transcription.
Audio files are passed to the inference model via HTTP requests. MP3,
FLAC, and OGG files are automatically converted to WAV format.
## Usage
Build and run the server with a model:
```bash
.cosmocc/4.0.2/bin/make -j8 o//whisperfile
o//whisperfile/whisper-server -m models/whisper-tiny.en-q5_1.bin
```
The server accepts the following options:
```text
whisper-server options:
-m FNAME, --model FNAME Path of Whisper model weights
--host ADDR Hostname or IP address to bind to (default: 127.0.0.1)
--port PORT Port number (default: 8080)
-l LANG, --language LANG Default spoken language ('auto' for auto-detect)
-tr, --translate Translate audio into English text
-t N, --threads N Number of threads to use during computation
-ng, --no-gpu Disable GPU acceleration
--gpu VALUE Select GPU backend (auto, apple, amd, nvidia, disable)
--log-disable Suppress logging output
```
Run `whisper-server --help` for the complete list of options.
> [!WARNING]
> **Do not run the server with administrative privileges and ensure it's operated in a sandbox environment, especially since it involves risky operations like accepting user file uploads. Always validate and sanitize inputs to guard against potential security threats.**
## HTTP Endpoints
### GET /health
Returns server health status as JSON. Returns HTTP 503 if the model
is still loading.
```bash
curl http://localhost:8080/health
```
Response when ready (HTTP 200):
```json
{"status": "ok"}
```
Response while model is loading (HTTP 503):
```json
{"status": "loading model"}
```
### POST /inference
Transcribe an audio file. Send as multipart/form-data with the audio
file in a field named "file".
Optional form fields:
- `response_format` - Output format: json, text, srt, vtt, verbose_json (default: json)
- `language` - Spoken language or 'auto' for detection
- `translate` - Set to 'true' to translate to English
- `temperature` - Sampling temperature
- `prompt` - Initial prompt for the model
Example:
```bash
curl http://localhost:8080/inference \
-F "file=@whisper.cpp/samples/jfk.wav" \
-F "response_format=json"
```
Response (HTTP 200):
```json
{"text": " And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country."}
```
### POST /load
Load a different model at runtime.
```bash
curl http://localhost:8080/load \
-F "model=/path/to/model.bin"
```
Response (HTTP 200):
```text
Load was successful!
```
================================================
FILE: docs/whisperfile/translate.md
================================================
# Speech Translation with Whisperfile
Whisperfile is not only able to transcribe speech to text, it's also able to
translate that speech into English too, at the same time. All you have
to do is pass the `-tr` or `--translate` flag.
## Choosing a Model
In order for translation to work, you need to be using a multilingual
model. On the files that
have `.en` in the name are English-only; you can't use those for
translation. One model that does work well in translation mode is
[`ggml-medium-q5_0.bin`](https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin?download=true), so for instance you could run:
```bash
# download ggml-medium model
curl -LO https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium-q5_0.bin
# download the first chapter of Pinocchio
curl -LO https://archive.org/download/avventure_pinocchio_librivox/avventurepinocchio_01_collodi.ogg
# read it, translated in English
o//whisperfile/whisperfile -m ggml-medium-q5_0.bin -f avventurepinocchio_01_collodi.ogg -tr
```
## Language Override
By default, the source language will be auto-detected. This works great
except for recordings with multiple languages. For example, if you have
a recording with a little bit of English at the beginning, but the rest
is in French, then you may want to pass the `-l fr` flag, to explicitly
specify the source language as French.
================================================
FILE: llama.cpp.patches/README.md
================================================
# llama.cpp Patches for Llamafile
This directory contains patches that adapt llama.cpp for use with Llamafile and Cosmopolitan libc. These patches enable llama.cpp to run as a portable, single-file executable across Windows, macOS, Linux, and BSD without installation.
## Directory Structure
```
llama.cpp.patches/
├── README.md # This file
├── apply-patches.sh # Script to apply all patches to llama.cpp submodule
├── renames.sh # Script for file renames/moves (if any)
├── llamafile-files/ # Additional files to copy into llama.cpp
│ ├── BUILD.mk # Makefile for building llama.cpp with cosmocc
│ ├── README.llamafile # License and modification notes
│ └── common/
│ └── license.cpp # Llama.cpp's license file (cmake creates this at build time)
└── patches/ # Patch files for upstream sources
```
## Applying Patches
To apply all patches to the llama.cpp submodule:
```sh
./llama.cpp.patches/apply-patches.sh
```
To reset the submodule to its clean state:
```sh
cd llama.cpp && git reset --hard && git clean -fdx
```
## Patch Index
### Cosmopolitan Libc Compatibility
These patches address compatibility issues when building with Cosmopolitan libc (cosmocc).
| Patch | Description |
|-------|-------------|
| `common_arg.cpp.patch` | Adds `COSMOCC` platform detection for `PATH_MAX` (includes `linux/limits.h`) |
| `common_common.cpp.patch` | Adds platform-aware cache directory detection for Cosmopolitan (checks `LOCALAPPDATA`, `XDG_CACHE_HOME`, falls back to `~/.cache/`) |
| `common_download.cpp.patch` | Adds `COSMOCC` platform detection for `PATH_MAX` |
| `common_ngram-mod.cpp.patch` | Adds missing `#include ` for `std::fill` |
### Threading and Signal Handling
Cosmopolitan libc has specific behaviors with condition variables and signals that require workarounds.
| Patch | Description |
|-------|-------------|
| `common_log.cpp.patch` | Blocks `SIGINT`/`SIGTERM` on logger thread to prevent `EINTR` exceptions; uses `wait_for()` instead of `wait()` to work around XNU futex timeout bug (~72 minute expiry) |
| `tools_server_server-queue.cpp.patch` | Same threading fixes for server queue: signal masking and `wait_for()` timeouts |
| `vendor_cpp-httplib_httplib.cpp.patch` | Fixes httplib thread pool with `wait_for()` instead of `wait()` for XNU futex compatibility |
### Cross-Module Memory Management
When GPU backends (CUDA, Metal) are loaded as dynamic libraries, memory allocated by the DSO must be freed by the DSO's allocator, not the main executable's.
| Patch | Description |
|-------|-------------|
| `ggml_src_ggml-backend-impl.h.patch` | Adds `free_struct` callback to `ggml_backend_buffer_i` interface for cross-module buffer cleanup |
| `ggml_src_ggml-backend.cpp.patch` | Implements `free_struct` callback support in `ggml_backend_buffer_free()` |
| `ggml_src_ggml-cuda_ggml-cuda.cu.patch` | Adds `free_struct` implementation for CUDA buffers; disables BF16 with TinyBLAS |
| `ggml_src_ggml-metal_ggml-metal.cpp.patch` | Adds `free_struct` implementation for Metal buffers |
### TinyBLAS Integration
Llamafile uses TinyBLAS as a lightweight replacement for cuBLAS, enabling GPU support without CUDA SDK dependencies.
| Patch | Description |
|-------|-------------|
| `ggml_src_ggml-cuda_vendors_cuda.h.patch` | Includes TinyBLAS headers instead of `cublas_v2.h` when `GGML_USE_TINYBLAS` is defined |
| `ggml_src_ggml-cuda_common.cuh.patch` | Disables BF16 MMA when using TinyBLAS (TinyBLAS would incorrectly interpret BF16 as FP16) |
| `ggml_src_ggml-cuda_solve_tri.cu.patch` | Disables cuBLAS TRSM path when using TinyBLAS (only affects Qwen3-Next models with large matrices) |
### Llamafile File Handling
These patches integrate llamafile's file handling APIs for loading models from bundled zip archives and `.llamafile` containers.
| Patch | Description |
|-------|-------------|
| `src_llama-mmap.h.patch` | Adds `has_premapped_content()`, `premapped_content()`, and `get_llamafile()` methods to `llama_file` class |
| `src_llama-mmap.cpp.patch` | Implements llamafile API integration for file I/O (`llamafile_open_gguf`, `llamafile_read`, etc.) and memory mapping with reference counting for bundled assets |
| `ggml_src_gguf.cpp.patch` | Adds `gguf_llamafile_reader` for reading GGUF files via llamafile API (supports `/zip/` paths, `foo.zip@weights.gguf` syntax, `.llamafile` containers) |
### Server Integration
| Patch | Description |
|-------|-------------|
| `tools_server_server.cpp.patch` | Refactors `main()` to `server_main()` for llamafile integration; adds Metal backend trigger, cosmo_args support, TUI mode handling, and proper exit for Metal async logging |
### Miscellaneous
| Patch | Description |
|-------|-------------|
| `common_chat.cpp.patch` | Fixes C++ type conversion: explicitly wraps `inputs.messages` in `std::optional()` for Deepseek v3.1 template |
| `ggml_src_ggml-backend-reg.cpp.patch` | Suppresses debug log noise for non-existent backend search paths (irrelevant for llamafile's DSO loading approach) |
## Creating New Patches
Files in `llama.cpp` are usually modified in-place for development and testing.
Once they are ready to be committed, you can update all files in the `llama.cpp.patches` directory by running the following:
```sh
cd llama.cpp
../tools/generate-patches.sh --output-dir ../llama.cpp.patches
```
Patch filenames will automatically reflect the file path with underscores replacing slashes (e.g., `common_arg.cpp.patch` for `common/arg.cpp`).
================================================
FILE: llama.cpp.patches/apply-patches.sh
================================================
#!/bin/bash
# Apply llamafile patches to llama.cpp submodule
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LLAMA_DIR="$SCRIPT_DIR/../llama.cpp"
PATCHES_DIR="$SCRIPT_DIR/patches"
LLAMAFILE_FILES_DIR="$SCRIPT_DIR/llamafile-files"
cd "$LLAMA_DIR"
# Check if status is dirty, if so, exit
if [ -n "$(git status --porcelain)" ]; then
echo "Git status is dirty. Please commit or stash your changes before applying patches."
exit 1
fi
echo "Applying patches to llama.cpp submodule..."
echo "Copying all files in llamafile-files to root directory..."
cp -r "$LLAMAFILE_FILES_DIR"/* .
../llama.cpp.patches/renames.sh
echo "Removing unnecessary files and directories..."
# If you want to clean up the original code, add your `rm` commands here.
# For example:
rm Makefile
cd ..
echo "Applying modifications to upstream files..."
for patch_file in "$PATCHES_DIR"/*.patch; do
if [ -f "$patch_file" ]; then
echo "Applying $(basename "$patch_file")..."
patch -p1 < "$patch_file"
fi
done
echo ""
echo "Patches applied successfully!"
echo "Note: These changes are not committed to the submodule."
echo "To reset the submodule to its clean state, run:"
echo " cd llama.cpp && git reset --hard && git clean -fdx"
================================================
FILE: llama.cpp.patches/llamafile-files/BUILD.mk
================================================
#-*-mode:makefile-gmake;indent-tabs-mode:t;tab-width:8;coding:utf-8-*-┐
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
PKGS += LLAMA_CPP
# ==============================================================================
# Version information
# ==============================================================================
# GGML_VERSION and GGML_COMMIT are inherited from build/config.mk
LLAMA_VERSION := $(shell cd llama.cpp 2>/dev/null && git describe --tags --always 2>/dev/null || echo "unknown")
LLAMA_COMMIT := $(shell cd llama.cpp 2>/dev/null && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
# ==============================================================================
# GGML Library (Core tensor operations)
# ==============================================================================
GGML_SRCS_C := \
llama.cpp/ggml/src/ggml-alloc.c \
llama.cpp/ggml/src/ggml-quants.c \
llama.cpp/ggml/src/ggml.c \
llama.cpp/ggml/src/ggml-cpu/ggml-cpu.c \
llama.cpp/ggml/src/ggml-cpu/quants.c
GGML_SRCS_CPP := \
llama.cpp/ggml/src/ggml-backend-dl.cpp \
llama.cpp/ggml/src/ggml-backend-reg.cpp \
llama.cpp/ggml/src/ggml-backend.cpp \
llama.cpp/ggml/src/ggml-opt.cpp \
llama.cpp/ggml/src/ggml-threading.cpp \
llama.cpp/ggml/src/ggml.cpp \
llama.cpp/ggml/src/gguf.cpp \
llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp \
llama.cpp/ggml/src/ggml-cpu/ggml-cpu.cpp \
llama.cpp/ggml/src/ggml-cpu/hbm.cpp \
llama.cpp/ggml/src/ggml-cpu/ops.cpp \
llama.cpp/ggml/src/ggml-cpu/repack.cpp \
llama.cpp/ggml/src/ggml-cpu/traits.cpp \
llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp \
llama.cpp/ggml/src/ggml-cpu/vec.cpp \
llama.cpp/ggml/src/ggml-cpu/amx/amx.cpp \
llama.cpp/ggml/src/ggml-cpu/amx/mmq.cpp
GGML_OBJS := \
$(GGML_SRCS_C:%.c=o/$(MODE)/%.c.o) \
$(GGML_SRCS_CPP:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# LLAMA Library (LLM inference)
# ==============================================================================
LLAMA_SRCS_CPP := \
llama.cpp/src/llama.cpp \
llama.cpp/src/models/afmoe.cpp \
llama.cpp/src/models/apertus.cpp \
llama.cpp/src/models/arcee.cpp \
llama.cpp/src/models/arctic.cpp \
llama.cpp/src/models/arwkv7.cpp \
llama.cpp/src/models/baichuan.cpp \
llama.cpp/src/models/bailingmoe.cpp \
llama.cpp/src/models/bailingmoe2.cpp \
llama.cpp/src/models/bert.cpp \
llama.cpp/src/models/bitnet.cpp \
llama.cpp/src/models/bloom.cpp \
llama.cpp/src/models/chameleon.cpp \
llama.cpp/src/models/chatglm.cpp \
llama.cpp/src/models/codeshell.cpp \
llama.cpp/src/models/cogvlm.cpp \
llama.cpp/src/models/cohere2-iswa.cpp \
llama.cpp/src/models/command-r.cpp \
llama.cpp/src/models/dbrx.cpp \
llama.cpp/src/models/deci.cpp \
llama.cpp/src/models/deepseek.cpp \
llama.cpp/src/models/deepseek2.cpp \
llama.cpp/src/models/delta-net-base.cpp \
llama.cpp/src/models/dots1.cpp \
llama.cpp/src/models/dream.cpp \
llama.cpp/src/models/ernie4-5-moe.cpp \
llama.cpp/src/models/ernie4-5.cpp \
llama.cpp/src/models/eurobert.cpp \
llama.cpp/src/models/exaone.cpp \
llama.cpp/src/models/exaone4.cpp \
llama.cpp/src/models/exaone-moe.cpp \
llama.cpp/src/models/falcon-h1.cpp \
llama.cpp/src/models/falcon.cpp \
llama.cpp/src/models/gemma-embedding.cpp \
llama.cpp/src/models/gemma.cpp \
llama.cpp/src/models/gemma2-iswa.cpp \
llama.cpp/src/models/gemma3.cpp \
llama.cpp/src/models/gemma3n-iswa.cpp \
llama.cpp/src/models/glm4-moe.cpp \
llama.cpp/src/models/glm4.cpp \
llama.cpp/src/models/gpt2.cpp \
llama.cpp/src/models/gptneox.cpp \
llama.cpp/src/models/granite-hybrid.cpp \
llama.cpp/src/models/granite.cpp \
llama.cpp/src/models/mamba-base.cpp \
llama.cpp/src/models/grok.cpp \
llama.cpp/src/models/grovemoe.cpp \
llama.cpp/src/models/hunyuan-dense.cpp \
llama.cpp/src/models/hunyuan-moe.cpp \
llama.cpp/src/models/internlm2.cpp \
llama.cpp/src/models/jais.cpp \
llama.cpp/src/models/jais2.cpp \
llama.cpp/src/models/jamba.cpp \
llama.cpp/src/models/kimi-linear.cpp \
llama.cpp/src/models/lfm2.cpp \
llama.cpp/src/models/llada-moe.cpp \
llama.cpp/src/models/llada.cpp \
llama.cpp/src/models/llama-iswa.cpp \
llama.cpp/src/models/llama.cpp \
llama.cpp/src/models/maincoder.cpp \
llama.cpp/src/models/mamba.cpp \
llama.cpp/src/models/mimo2-iswa.cpp \
llama.cpp/src/models/minicpm3.cpp \
llama.cpp/src/models/minimax-m2.cpp \
llama.cpp/src/models/mistral3.cpp \
llama.cpp/src/models/modern-bert.cpp \
llama.cpp/src/models/mpt.cpp \
llama.cpp/src/models/nemotron-h.cpp \
llama.cpp/src/models/nemotron.cpp \
llama.cpp/src/models/neo-bert.cpp \
llama.cpp/src/models/olmo.cpp \
llama.cpp/src/models/olmo2.cpp \
llama.cpp/src/models/olmoe.cpp \
llama.cpp/src/models/openai-moe-iswa.cpp \
llama.cpp/src/models/openelm.cpp \
llama.cpp/src/models/orion.cpp \
llama.cpp/src/models/paddleocr.cpp \
llama.cpp/src/models/pangu-embedded.cpp \
llama.cpp/src/models/phi2.cpp \
llama.cpp/src/models/phi3.cpp \
llama.cpp/src/models/plamo.cpp \
llama.cpp/src/models/plamo2.cpp \
llama.cpp/src/models/plamo3.cpp \
llama.cpp/src/models/plm.cpp \
llama.cpp/src/models/qwen.cpp \
llama.cpp/src/models/qwen2.cpp \
llama.cpp/src/models/qwen2moe.cpp \
llama.cpp/src/models/qwen2vl.cpp \
llama.cpp/src/models/qwen3.cpp \
llama.cpp/src/models/qwen3moe.cpp \
llama.cpp/src/models/qwen3next.cpp \
llama.cpp/src/models/qwen35.cpp \
llama.cpp/src/models/qwen35moe.cpp \
llama.cpp/src/models/qwen3vl-moe.cpp \
llama.cpp/src/models/qwen3vl.cpp \
llama.cpp/src/models/refact.cpp \
llama.cpp/src/models/rnd1.cpp \
llama.cpp/src/models/rwkv6-base.cpp \
llama.cpp/src/models/rwkv6.cpp \
llama.cpp/src/models/rwkv6qwen2.cpp \
llama.cpp/src/models/rwkv7-base.cpp \
llama.cpp/src/models/rwkv7.cpp \
llama.cpp/src/models/seed-oss.cpp \
llama.cpp/src/models/smallthinker.cpp \
llama.cpp/src/models/smollm3.cpp \
llama.cpp/src/models/stablelm.cpp \
llama.cpp/src/models/starcoder.cpp \
llama.cpp/src/models/step35-iswa.cpp \
llama.cpp/src/models/starcoder2.cpp \
llama.cpp/src/models/t5-dec.cpp \
llama.cpp/src/models/t5-enc.cpp \
llama.cpp/src/models/wavtokenizer-dec.cpp \
llama.cpp/src/models/xverse.cpp \
llama.cpp/src/llama-adapter.cpp \
llama.cpp/src/llama-arch.cpp \
llama.cpp/src/llama-batch.cpp \
llama.cpp/src/llama-chat.cpp \
llama.cpp/src/llama-context.cpp \
llama.cpp/src/llama-cparams.cpp \
llama.cpp/src/llama-grammar.cpp \
llama.cpp/src/llama-graph.cpp \
llama.cpp/src/llama-hparams.cpp \
llama.cpp/src/llama-impl.cpp \
llama.cpp/src/llama-io.cpp \
llama.cpp/src/llama-kv-cache-iswa.cpp \
llama.cpp/src/llama-kv-cache.cpp \
llama.cpp/src/llama-memory-hybrid.cpp \
llama.cpp/src/llama-memory-hybrid-iswa.cpp \
llama.cpp/src/llama-memory-recurrent.cpp \
llama.cpp/src/llama-memory.cpp \
llama.cpp/src/llama-mmap.cpp \
llama.cpp/src/llama-model-loader.cpp \
llama.cpp/src/llama-model-saver.cpp \
llama.cpp/src/llama-model.cpp \
llama.cpp/src/llama-quant.cpp \
llama.cpp/src/llama-sampler.cpp \
llama.cpp/src/llama-vocab.cpp \
llama.cpp/src/unicode-data.cpp \
llama.cpp/src/unicode.cpp
LLAMA_OBJS := $(LLAMA_SRCS_CPP:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# Common Library (Utilities shared across tools)
# ==============================================================================
COMMON_SRCS_CPP := \
llama.cpp/common/arg.cpp \
llama.cpp/common/chat-parser-xml-toolcall.cpp \
llama.cpp/common/chat-parser.cpp \
llama.cpp/common/chat-peg-parser.cpp \
llama.cpp/common/chat.cpp \
llama.cpp/common/common.cpp \
llama.cpp/common/console.cpp \
llama.cpp/common/debug.cpp \
llama.cpp/common/download.cpp \
llama.cpp/common/jinja/caps.cpp \
llama.cpp/common/jinja/lexer.cpp \
llama.cpp/common/jinja/parser.cpp \
llama.cpp/common/jinja/runtime.cpp \
llama.cpp/common/jinja/string.cpp \
llama.cpp/common/jinja/value.cpp \
llama.cpp/common/json-partial.cpp \
llama.cpp/common/json-schema-to-grammar.cpp \
llama.cpp/common/license.cpp \
llama.cpp/common/llguidance.cpp \
llama.cpp/common/log.cpp \
llama.cpp/common/ngram-cache.cpp \
llama.cpp/common/ngram-map.cpp \
llama.cpp/common/ngram-mod.cpp \
llama.cpp/common/peg-parser.cpp \
llama.cpp/common/preset.cpp \
llama.cpp/common/regex-partial.cpp \
llama.cpp/common/sampling.cpp \
llama.cpp/common/speculative.cpp \
llama.cpp/common/unicode.cpp
# Build info generation
LLAMA_BUILD_NUMBER := $(shell date +%s)
LLAMA_BUILD_COMMIT := $(shell cd llama.cpp 2>/dev/null && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
LLAMA_BUILD_COMPILER := cosmocc
LLAMA_BUILD_TARGET := cosmopolitan
o/$(MODE)/llama.cpp/common/build-info.cpp: llama.cpp/common/build-info.cpp.in
@mkdir -p $(dir $@)
sed -e 's/@LLAMA_BUILD_NUMBER@/$(LLAMA_BUILD_NUMBER)/g' \
-e 's/@LLAMA_BUILD_COMMIT@/$(LLAMA_BUILD_COMMIT)/g' \
-e 's/@BUILD_COMPILER@/$(LLAMA_BUILD_COMPILER)/g' \
-e 's/@BUILD_TARGET@/$(LLAMA_BUILD_TARGET)/g' \
$< > $@
COMMON_SRCS_CPP += o/$(MODE)/llama.cpp/common/build-info.cpp
COMMON_OBJS := $(COMMON_SRCS_CPP:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# Additional support files
# ==============================================================================
GGUF_SRCS := llama.cpp/examples/gguf/gguf.cpp
GGUF_OBJS := $(GGUF_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# Combined library (just llama.cpp, equivalent to cmake build)
# ==============================================================================
LLAMA_CPP_OBJS := \
$(GGML_OBJS) \
$(LLAMA_OBJS) \
$(COMMON_OBJS) \
$(GGUF_OBJS)
o/$(MODE)/llama.cpp/llama.cpp.a: $(LLAMA_CPP_OBJS)
# ==============================================================================
# MTMD Library (Multimodal - for server)
# ==============================================================================
MTMD_SRCS_CPP := \
llama.cpp/tools/mtmd/clip.cpp \
llama.cpp/tools/mtmd/mtmd.cpp \
llama.cpp/tools/mtmd/mtmd-helper.cpp \
llama.cpp/tools/mtmd/mtmd-audio.cpp \
llama.cpp/tools/mtmd/models/cogvlm.cpp \
llama.cpp/tools/mtmd/models/conformer.cpp \
llama.cpp/tools/mtmd/models/glm4v.cpp \
llama.cpp/tools/mtmd/models/internvl.cpp \
llama.cpp/tools/mtmd/models/kimik25.cpp \
llama.cpp/tools/mtmd/models/kimivl.cpp \
llama.cpp/tools/mtmd/models/llama4.cpp \
llama.cpp/tools/mtmd/models/llava.cpp \
llama.cpp/tools/mtmd/models/minicpmv.cpp \
llama.cpp/tools/mtmd/models/mobilenetv5.cpp \
llama.cpp/tools/mtmd/models/nemotron-v2-vl.cpp \
llama.cpp/tools/mtmd/models/paddleocr.cpp \
llama.cpp/tools/mtmd/models/pixtral.cpp \
llama.cpp/tools/mtmd/models/qwen2vl.cpp \
llama.cpp/tools/mtmd/models/qwen3vl.cpp \
llama.cpp/tools/mtmd/models/siglip.cpp \
llama.cpp/tools/mtmd/models/whisper-enc.cpp \
llama.cpp/tools/mtmd/models/youtuvl.cpp
MTMD_OBJS := $(MTMD_SRCS_CPP:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# cpp-httplib (HTTP library for server)
# ==============================================================================
HTTPLIB_SRCS := llama.cpp/vendor/cpp-httplib/httplib.cpp
HTTPLIB_OBJS := $(HTTPLIB_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
# ==============================================================================
# Server Assets (convert HTML to C++ headers)
# ==============================================================================
# Generate .hpp files from binary assets using xxd-like conversion
o/$(MODE)/llama.cpp/tools/server/%.hpp: llama.cpp/tools/server/public/%
@mkdir -p $(dir $@)
$(eval VARNAME := $(shell echo "$(notdir $*)" | sed 's/[.-]/_/g'))
@echo 'unsigned char $(VARNAME)[] = {' > $@
@od -An -tx1 -v $< | awk '{for(i=1;i<=NF;i++){if(NR>1||i>1)printf", "; printf"0x%s",$$i}}' >> $@
@echo >> $@
@echo '};' >> $@
@echo 'unsigned int $(VARNAME)_len = sizeof($(VARNAME));' >> $@
SERVER_ASSETS := \
o/$(MODE)/llama.cpp/tools/server/index.html.gz.hpp \
o/$(MODE)/llama.cpp/tools/server/loading.html.hpp
# ==============================================================================
# Tools (in tools/ directory)
# ==============================================================================
# Tool source files
TOOL_QUANTIZE_SRCS := llama.cpp/tools/quantize/quantize.cpp
TOOL_IMATRIX_SRCS := llama.cpp/tools/imatrix/imatrix.cpp
TOOL_PERPLEXITY_SRCS := llama.cpp/tools/perplexity/perplexity.cpp
TOOL_BENCH_SRCS := llama.cpp/tools/llama-bench/llama-bench.cpp
TOOL_SERVER_SRCS := \
llama.cpp/tools/server/server.cpp \
llama.cpp/tools/server/server-common.cpp \
llama.cpp/tools/server/server-context.cpp \
llama.cpp/tools/server/server-http.cpp \
llama.cpp/tools/server/server-models.cpp \
llama.cpp/tools/server/server-queue.cpp \
llama.cpp/tools/server/server-task.cpp
# Tool object files
TOOL_QUANTIZE_OBJS := $(TOOL_QUANTIZE_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
TOOL_IMATRIX_OBJS := $(TOOL_IMATRIX_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
TOOL_PERPLEXITY_OBJS := $(TOOL_PERPLEXITY_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
TOOL_BENCH_OBJS := $(TOOL_BENCH_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
TOOL_SERVER_OBJS := $(TOOL_SERVER_SRCS:%.cpp=o/$(MODE)/%.cpp.o)
# llamafile objects are used to add dynamic GPU support (Metal, CUDA, ROCm)
TOOL_LLAMAFILE_OBJS := \
o/$(MODE)/llamafile/llamafile.o \
o/$(MODE)/llamafile/metal.o \
o/$(MODE)/llamafile/cuda.o \
o/$(MODE)/llamafile/zip.o
# Server objects depend on generated assets
$(TOOL_SERVER_OBJS): $(SERVER_ASSETS) llamafile/llamafile.h
# ==============================================================================
# Compiler flags
# ==============================================================================
# Include paths for new llama.cpp structure
$(LLAMA_CPP_OBJS) $(TOOL_QUANTIZE_OBJS) $(TOOL_IMATRIX_OBJS) \
$(TOOL_PERPLEXITY_OBJS) $(TOOL_BENCH_OBJS) $(TOOL_SERVER_OBJS) $(MTMD_OBJS): \
private CPPFLAGS += \
-iquote llama.cpp/common \
-iquote llama.cpp/include \
-iquote llama.cpp/ggml/include \
-iquote llama.cpp/ggml/src \
-iquote llama.cpp/ggml/src/ggml-cpu \
-iquote llama.cpp/src \
-iquote llama.cpp/tools/mtmd \
-iquote o/$(MODE)/llama.cpp/tools/server \
-isystem llama.cpp/vendor
# Server needs llamafile headers for Metal support
$(TOOL_SERVER_OBJS): private CPPFLAGS += -iquote llamafile
# Version definitions
$(GGML_OBJS): private CCFLAGS += \
-DGGML_VERSION=\"$(GGML_VERSION)\" \
-DGGML_COMMIT=\"$(GGML_COMMIT)\"
$(LLAMA_OBJS): private CCFLAGS += \
-DLLAMA_VERSION=\"$(LLAMA_VERSION)\" \
-DLLAMA_COMMIT=\"$(LLAMA_COMMIT)\"
# Base flags for all objects
$(LLAMA_CPP_OBJS) $(TOOL_SERVER_OBJS): private CCFLAGS += \
-DCOSMOCC=1 \
-DGGML_MULTIPLATFORM \
-DGGML_USE_LLAMAFILE \
-DGGML_USE_CPU \
-DGGML_USE_CPU_REPACK \
-DGGML_USE_OPENMP \
-DGGML_CPU_GENERIC \
-DGGML_SCHED_MAX_COPIES=4 \
-fopenmp
# Common library needs httplib support
$(COMMON_OBJS): private CCFLAGS += -DLLAMA_USE_HTTPLIB
# Optimization flags for specific components
$(LLAMA_OBJS) $(COMMON_OBJS): private CCFLAGS += -DNDEBUG
# Memory management and backend - use default -O2 (backend is in hot path)
o/$(MODE)/llama.cpp/ggml/src/ggml-alloc.c.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-backend.cpp.o: \
private CCFLAGS += -mgcc
# Backend registration and utilities - can optimize for size
o/$(MODE)/llama.cpp/ggml/src/ggml-backend-reg.cpp.o \
o/$(MODE)/llama.cpp/common/arg.cpp.o \
o/$(MODE)/llama.cpp/common/log.cpp.o: \
private CCFLAGS += -Os
# Unicode data - use gcc for better compatibility
o/$(MODE)/llama.cpp/src/unicode-data.cpp.o: \
private CCFLAGS += -mgcc
# Core GGML and vector operations - optimize for performance
o/$(MODE)/llama.cpp/ggml/src/ggml.c.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/vec.cpp.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/ops.cpp.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/binary-ops.cpp.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/unary-ops.cpp.o: \
private CCFLAGS += -O3 -mgcc
# Quantization - optimize for performance (critical hot path)
o/$(MODE)/llama.cpp/ggml/src/ggml-quants.c.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/quants.c.o: \
private CCFLAGS += -O3 -mgcc
# ==============================================================================
# Tool executables
# ==============================================================================
# Enable secondary expansion for prerequisites that reference variables defined
# in other BUILD.mk files (e.g., TINYBLAS_CPU_OBJS from llamafile/BUILD.mk).
# Without this, $(TINYBLAS_CPU_OBJS) would expand to empty since llamafile/BUILD.mk
# is included after this file.
.SECONDEXPANSION:
# All llama.cpp tools need pthread and OpenMP for threading
o/$(MODE)/llama.cpp/quantize/quantize \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llama-bench/llama-bench \
o/$(MODE)/llama.cpp/server/llama-server: \
private LDFLAGS += -fopenmp
o/$(MODE)/llama.cpp/quantize/quantize \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llama-bench/llama-bench \
o/$(MODE)/llama.cpp/server/llama-server: \
private LDLIBS += -lpthread
o/$(MODE)/llama.cpp/quantize/quantize: \
$(TOOL_QUANTIZE_OBJS) \
$$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/llama.cpp/llama.cpp.a
o/$(MODE)/llama.cpp/imatrix/imatrix: \
$(TOOL_IMATRIX_OBJS) \
$$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/llama.cpp/llama.cpp.a
o/$(MODE)/llama.cpp/perplexity/perplexity: \
$(TOOL_PERPLEXITY_OBJS) \
$$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/llama.cpp/llama.cpp.a
o/$(MODE)/llama.cpp/llama-bench/llama-bench: \
$(TOOL_BENCH_OBJS) \
$$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/llama.cpp/llama.cpp.a
o/$(MODE)/llama.cpp/server/llama-server: \
$(TOOL_SERVER_OBJS) \
$(MTMD_OBJS) \
$(HTTPLIB_OBJS) \
$(TOOL_LLAMAFILE_OBJS) \
$$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/llama.cpp/llama.cpp.a \
$(SERVER_ASSETS)
@mkdir -p $(dir $@)
$(LINK.o) $(TOOL_SERVER_OBJS) $(MTMD_OBJS) $(HTTPLIB_OBJS) $(TOOL_LLAMAFILE_OBJS) $(TINYBLAS_CPU_OBJS) o/$(MODE)/llama.cpp/llama.cpp.a $(LOADLIBES) $(LDLIBS) -o $@
# ==============================================================================
# Dependencies
# ==============================================================================
$(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk
$(TOOL_QUANTIZE_OBJS) $(TOOL_IMATRIX_OBJS) \
$(TOOL_PERPLEXITY_OBJS) $(TOOL_BENCH_OBJS) $(TOOL_SERVER_OBJS): llama.cpp/BUILD.mk
# ==============================================================================
# Main target
# ==============================================================================
.PHONY: o/$(MODE)/llama.cpp
o/$(MODE)/llama.cpp: \
o/$(MODE)/llama.cpp/llama.cpp.a \
o/$(MODE)/llama.cpp/server/llama-server \
o/$(MODE)/llama.cpp/quantize/quantize \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llama-bench/llama-bench
================================================
FILE: llama.cpp.patches/llamafile-files/README.llamafile
================================================
DESCRIPTION
llama.cpp is a machine learning library for large language models
LICENSE
MIT
ORIGIN
https://github.com/ggerganov/llama.cpp/
8b3befc0e2ed8fb18b903735831496b8b0c80949
2024-08-16
LOCAL MODIFICATIONS
- See [jart] and [kawrakow] annotations
- Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
- Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
- Unify main, server, and llava-cli into single llamafile program
- Make cuBLAS / hipBLAS optional by introducing tinyBLAS library
- Add support to main() programs for Cosmo /zip/.args files
- Introduce pledge() SECCOMP sandboxing to improve security
- Call exit() rather than abort() when GGML_ASSERT() fails
- Clamp bf16/f32 values before passing to K quantizers
- Make GPU logger callback API safer and less generic
- Write log to /dev/null when main.log fails to open
- Make main and llava-cli print timings on ctrl-c
- Make emebeddings CLI program shell scriptable
- Avoid bind() conflicts on port 8080 w/ server
- Use runtime dispatching for matmul quants
- Remove operating system #ifdef statements
- Remove stdout logging from LLaVA
================================================
FILE: llama.cpp.patches/llamafile-files/common/license.cpp
================================================
// Generated by CMake
const char* LICENSES[] = {
R"=L=(License for llama.cpp
=====================
MIT License
Copyright (c) 2023-2026 The ggml authors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
)=L=",
R"=L=(License for cpp-httplib
=======================
The MIT License (MIT)
Copyright (c) 2017 yhirose
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
)=L=",
R"=L=(License for jsonhpp
===================
MIT License
Copyright (c) 2013-2025 Niels Lohmann
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
)=L=",
nullptr
};
================================================
FILE: llama.cpp.patches/patches/common_arg.cpp.patch
================================================
diff --git a/common/arg.cpp b/common/arg.cpp
--- a/llama.cpp/common/arg.cpp
+++ b/llama.cpp/common/arg.cpp
@@ -36,6 +36,8 @@
#ifndef __EMSCRIPTEN__
#ifdef __linux__
#include
+#elif defined(COSMOCC)
+#include
#elif defined(_WIN32)
# if !defined(PATH_MAX)
# define PATH_MAX MAX_PATH
================================================
FILE: llama.cpp.patches/patches/common_chat.cpp.patch
================================================
diff --git a/common/chat.cpp b/common/chat.cpp
--- a/llama.cpp/common/chat.cpp
+++ b/llama.cpp/common/chat.cpp
@@ -1795,7 +1795,7 @@ static common_chat_params common_chat_params_init_deepseek_v3_1(const common_cha
};
auto prompt = apply(tmpl, inputs,
- /* messages_override= */ inputs.messages,
+ /* messages_override= */ std::optional(inputs.messages),
/* tools_override= */ std::nullopt,
additional_context);
data.prompt = prompt;
================================================
FILE: llama.cpp.patches/patches/common_common.cpp.patch
================================================
diff --git a/common/common.cpp b/common/common.cpp
--- a/llama.cpp/common/common.cpp
+++ b/llama.cpp/common/common.cpp
@@ -874,6 +874,16 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
+#elif defined(COSMOCC)
+ // We don't know what OS we are running on at compile time, just CPU architecture.
+ // try various environment variables, fall back to ~/.cache.
+ if (std::getenv("LOCALAPPDATA")) {
+ cache_directory = std::getenv("LOCALAPPDATA");
+ } else if (std::getenv("XDG_CACHE_HOME")) {
+ cache_directory = std::getenv("XDG_CACHE_HOME");
+ } else {
+ cache_directory = std::getenv("HOME") + std::string("/.cache/");
+ }
#elif defined(__EMSCRIPTEN__)
GGML_ABORT("not implemented on this platform");
#else
================================================
FILE: llama.cpp.patches/patches/common_download.cpp.patch
================================================
diff --git a/common/download.cpp b/common/download.cpp
--- a/llama.cpp/common/download.cpp
+++ b/llama.cpp/common/download.cpp
@@ -24,6 +24,8 @@
#ifndef __EMSCRIPTEN__
#ifdef __linux__
#include
+#elif defined(COSMOCC)
+#include
#elif defined(_WIN32)
# if !defined(PATH_MAX)
# define PATH_MAX MAX_PATH
================================================
FILE: llama.cpp.patches/patches/common_log.cpp.patch
================================================
diff --git a/common/log.cpp b/common/log.cpp
--- a/llama.cpp/common/log.cpp
+++ b/llama.cpp/common/log.cpp
@@ -19,6 +19,7 @@
# define fileno _fileno
#else
# include
+# include
#endif // defined(_WIN32)
int common_log_verbosity_thold = LOG_DEFAULT_LLAMA;
@@ -257,10 +258,27 @@ public:
running = true;
thrd = std::thread([this]() {
+#if !defined(_WIN32)
+ // Block SIGINT and SIGTERM on this thread to prevent EINTR during
+ // condition_variable::wait(). In Cosmopolitan libc, condition_variable
+ // throws std::system_error(EINTR) when interrupted by signals, and this
+ // exception cannot be caught (goes directly to std::terminate).
+ sigset_t block_mask;
+ sigemptyset(&block_mask);
+ sigaddset(&block_mask, SIGINT);
+ sigaddset(&block_mask, SIGTERM);
+ pthread_sigmask(SIG_BLOCK, &block_mask, nullptr);
+#endif
while (true) {
{
std::unique_lock lock(mtx);
- cv.wait(lock, [this]() { return head != tail; });
+ // Use wait_for() instead of wait() to work around a
+ // Cosmopolitan libc bug where untimed futex waits on
+ // XNU (macOS) expire after ~72 minutes, causing
+ // condition_variable::wait() to throw ETIMEDOUT.
+ while (head == tail) {
+ cv.wait_for(lock, std::chrono::seconds(30));
+ }
cur = entries[head];
================================================
FILE: llama.cpp.patches/patches/common_ngram-mod.cpp.patch
================================================
diff --git a/common/ngram-mod.cpp b/common/ngram-mod.cpp
--- a/llama.cpp/common/ngram-mod.cpp
+++ b/llama.cpp/common/ngram-mod.cpp
@@ -1,5 +1,7 @@
#include "ngram-mod.h"
+#include
+
//
// common_ngram_mod
//
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-backend-impl.h.patch
================================================
diff --git a/ggml/src/ggml-backend-impl.h b/ggml/src/ggml-backend-impl.h
--- a/llama.cpp/ggml/src/ggml-backend-impl.h
+++ b/llama.cpp/ggml/src/ggml-backend-impl.h
@@ -39,7 +39,7 @@ extern "C" {
//
struct ggml_backend_buffer_i {
- // (optional) free the buffer
+ // (optional) free the buffer context
void (*free_buffer) (ggml_backend_buffer_t buffer);
// base address of the buffer
void * (*get_base) (ggml_backend_buffer_t buffer);
@@ -55,6 +55,10 @@ extern "C" {
void (*clear) (ggml_backend_buffer_t buffer, uint8_t value);
// (optional) reset any internal state due to tensor initialization, such as tensor extras
void (*reset) (ggml_backend_buffer_t buffer);
+ // (optional) free the buffer struct itself - used for cross-module memory management
+ // (eg. when buffer is allocated by a dynamically loaded library)
+ // if NULL, the default 'delete buffer' is used
+ void (*free_struct) (ggml_backend_buffer_t buffer);
};
struct ggml_backend_buffer {
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-backend-reg.cpp.patch
================================================
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
--- a/llama.cpp/ggml/src/ggml-backend-reg.cpp
+++ b/llama.cpp/ggml/src/ggml-backend-reg.cpp
@@ -478,7 +478,7 @@ static ggml_backend_reg_t ggml_backend_load_best(const char * name, bool silent,
if (ec) {
GGML_LOG_DEBUG("%s: posix_stat(%s) failure, error-message: %s\n", __func__, path_str(search_path).c_str(), ec.message().c_str());
} else {
- GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
+ // GGML_LOG_DEBUG("%s: search path %s does not exist\n", __func__, path_str(search_path).c_str());
}
continue;
}
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-backend.cpp.patch
================================================
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
--- a/llama.cpp/ggml/src/ggml-backend.cpp
+++ b/llama.cpp/ggml/src/ggml-backend.cpp
@@ -112,7 +112,14 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
if (buffer->iface.free_buffer != NULL) {
buffer->iface.free_buffer(buffer);
}
- delete buffer;
+
+ // Use free_struct if provided (for cross-module memory management,
+ // e.g., when the buffer was allocated by a dynamically loaded library)
+ if (buffer->iface.free_struct != NULL) {
+ buffer->iface.free_struct(buffer);
+ } else {
+ delete buffer;
+ }
}
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
@@ -613,6 +620,7 @@ static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = {
/* .cpy_tensor = */ NULL,
/* .clear = */ ggml_backend_multi_buffer_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ NULL,
};
ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) {
@@ -2177,6 +2185,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
/* .clear = */ ggml_backend_cpu_buffer_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ NULL,
};
static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
@@ -2189,6 +2198,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
/* .clear = */ ggml_backend_cpu_buffer_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ NULL,
};
// CPU backend buffer type
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-cuda_common.cuh.patch
================================================
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
--- a/llama.cpp/ggml/src/ggml-cuda/common.cuh
+++ b/llama.cpp/ggml/src/ggml-cuda/common.cuh
@@ -292,9 +292,15 @@ static bool fp16_mma_hardware_available(const int cc) {
}
static bool bf16_mma_hardware_available(const int cc) {
+#ifdef GGML_USE_TINYBLAS
+ // TinyBLAS does not support BF16 - it would incorrectly interpret BF16 bits as FP16
+ GGML_UNUSED(cc);
+ return false;
+#else
return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) ||
GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3 ||
(GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_PH1);
+#endif
}
static bool fp32_mma_hardware_available(const int cc) {
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-cuda_ggml-cuda.cu.patch
================================================
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
--- a/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/llama.cpp/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -669,6 +669,14 @@ static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
}
+// free_struct callback for cross-module memory management
+// When the CUDA backend is loaded as a dynamic library, the buffer struct
+// is allocated by the DSO's 'new' operator and must be freed by the DSO's
+// 'delete' operator (not the main executable's)
+static void ggml_backend_cuda_buffer_free_struct(ggml_backend_buffer_t buffer) {
+ delete buffer;
+}
+
static const ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {
/* .free_buffer = */ ggml_backend_cuda_buffer_free_buffer,
/* .get_base = */ ggml_backend_cuda_buffer_get_base,
@@ -679,6 +687,7 @@ static const ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {
/* .cpy_tensor = */ ggml_backend_cuda_buffer_cpy_tensor,
/* .clear = */ ggml_backend_cuda_buffer_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ ggml_backend_cuda_buffer_free_struct,
};
// cuda buffer type
@@ -991,6 +1000,7 @@ static const ggml_backend_buffer_i ggml_backend_cuda_split_buffer_interface = {
/* .cpy_tensor = */ NULL,
/* .clear = */ ggml_backend_cuda_split_buffer_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ ggml_backend_cuda_buffer_free_struct,
};
// cuda split buffer type
@@ -1145,12 +1155,20 @@ static ggml_backend_buffer_t ggml_backend_cuda_host_buffer_type_alloc_buffer(ggm
if (ptr == nullptr) {
// fallback to cpu buffer
- return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
+ // Note: must set free_struct for cross-module memory management since the
+ // buffer is allocated by the DSO's copy of ggml_backend_buft_alloc_buffer
+ ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
+ if (buffer != nullptr) {
+ buffer->iface.free_struct = ggml_backend_cuda_buffer_free_struct;
+ }
+ return buffer;
}
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
buffer->buft = buft;
buffer->iface.free_buffer = ggml_backend_cuda_host_buffer_free_buffer;
+ // Set free_struct for cross-module memory management (DSO must free what it allocated)
+ buffer->iface.free_struct = ggml_backend_cuda_buffer_free_struct;
return buffer;
}
@@ -1250,8 +1268,13 @@ static void ggml_cuda_op_mul_mat_cublas(
const int cc = ggml_cuda_info().devices[id].cc;
+#ifdef GGML_USE_TINYBLAS
+ // TinyBLAS does not support BF16 - it would incorrectly interpret BF16 bits as FP16
+ const bool supports_bf16 = false;
+#else
const bool supports_bf16 = GGML_CUDA_CC_IS_NVIDIA(cc) || GGML_CUDA_CC_IS_AMD(cc) ||
(GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2);
+#endif
const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT;
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-cuda_solve_tri.cu.patch
================================================
diff --git a/ggml/src/ggml-cuda/solve_tri.cu b/ggml/src/ggml-cuda/solve_tri.cu
--- a/llama.cpp/ggml/src/ggml-cuda/solve_tri.cu
+++ b/llama.cpp/ggml/src/ggml-cuda/solve_tri.cu
@@ -5,6 +5,10 @@
#define MAX_N_FAST 64
#define MAX_K_FAST 32
+// cuBLAS TRSM-based implementation for large matrices
+// Not available when using TinyBLAS (which doesn't implement TRSM)
+#ifndef GGML_TINYBLAS_NO_TRSM
+
static __global__ void get_batch_pointers(const float * A,
float * X,
const float ** A_ptrs,
@@ -78,6 +82,8 @@ static void solve_tri_f32_cublas(ggml_backend_cuda_context & ctx,
GGML_UNUSED_VARS(s12, s13);
}
+#endif // GGML_TINYBLAS_NO_TRSM
+
// ======================
// Fast Kernel (n <= 64, k <= 32) - Warp-based parallel reduction
// ======================
@@ -267,9 +273,18 @@ void ggml_cuda_op_solve_tri(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
dst->nb[3] / sizeof(float), ctx.stream());
} else {
+#ifdef GGML_TINYBLAS_NO_TRSM
+ // TinyBLAS doesn't support TRSM (triangular solve)
+ // This path is only reached by Qwen3-Next models with large matrices
+ GGML_ABORT("solve_tri with n > %d or k > %d requires cuBLAS TRSM which is not available with TinyBLAS. "
+ "This operation is only used by Qwen3-Next models. "
+ "Please rebuild with cuBLAS or use CPU backend for this model.",
+ MAX_N_FAST, MAX_K_FAST);
+#else
solve_tri_f32_cublas(ctx, (const float *) src0->data, (const float *) src1->data, (float *) dst->data, n, k,
ne02, ne03, src0->nb[2] / sizeof(float), src0->nb[3] / sizeof(float),
src1->nb[2] / sizeof(float), src1->nb[3] / sizeof(float), dst->nb[2] / sizeof(float),
dst->nb[3] / sizeof(float), ctx.stream());
+#endif // GGML_TINYBLAS_NO_TRSM
}
}
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-cuda_vendors_cuda.h.patch
================================================
diff --git a/ggml/src/ggml-cuda/vendors/cuda.h b/ggml/src/ggml-cuda/vendors/cuda.h
--- a/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h
+++ b/llama.cpp/ggml/src/ggml-cuda/vendors/cuda.h
@@ -2,7 +2,17 @@
#include
#include
+
+#ifdef GGML_USE_TINYBLAS
+// Use TinyBLAS instead of cuBLAS for llamafile
+// TinyBLAS is a lightweight BLAS implementation that provides
+// API-compatible replacements for cuBLAS GEMM functions
+#include "tinyblas.h"
+#include "tinyblas-compat.h"
+#else
#include
+#endif // GGML_USE_TINYBLAS
+
#include
#include
@@ -16,8 +26,10 @@
#if CUDART_VERSION < 11020
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED
+#ifndef GGML_USE_TINYBLAS
#define CUBLAS_TF32_TENSOR_OP_MATH CUBLAS_TENSOR_OP_MATH
#define CUBLAS_COMPUTE_16F CUDA_R_16F
#define CUBLAS_COMPUTE_32F CUDA_R_32F
#define cublasComputeType_t cudaDataType_t
+#endif // GGML_USE_TINYBLAS
#endif // CUDART_VERSION < 11020
================================================
FILE: llama.cpp.patches/patches/ggml_src_ggml-metal_ggml-metal.cpp.patch
================================================
diff --git a/ggml/src/ggml-metal/ggml-metal.cpp b/ggml/src/ggml-metal/ggml-metal.cpp
--- a/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp
+++ b/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp
@@ -83,6 +83,14 @@ static void ggml_backend_metal_buffer_shared_clear(ggml_backend_buffer_t buffer,
ggml_metal_buffer_clear(ctx, value);
}
+// free_struct callback for cross-module memory management
+// When the Metal backend is loaded as a dynamic library, the buffer struct
+// is allocated by the dylib's 'new' operator and must be freed by the dylib's
+// 'delete' operator (not the main executable's)
+static void ggml_backend_metal_buffer_free_struct(ggml_backend_buffer_t buffer) {
+ delete buffer;
+}
+
static ggml_backend_buffer_i ggml_backend_metal_buffer_shared_i = {
/* .free_buffer = */ ggml_backend_metal_buffer_shared_free_buffer,
/* .get_base = */ ggml_backend_metal_buffer_shared_get_base,
@@ -93,6 +101,7 @@ static ggml_backend_buffer_i ggml_backend_metal_buffer_shared_i = {
/* .cpy_tensor = */ ggml_backend_metal_buffer_shared_cpy_tensor,
/* .clear = */ ggml_backend_metal_buffer_shared_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ ggml_backend_metal_buffer_free_struct,
};
// private buffer
@@ -167,6 +176,7 @@ static ggml_backend_buffer_i ggml_backend_metal_buffer_private_i = {
/* .cpy_tensor = */ ggml_backend_metal_buffer_private_cpy_tensor,
/* .clear = */ ggml_backend_metal_buffer_private_clear,
/* .reset = */ NULL,
+ /* .free_struct = */ ggml_backend_metal_buffer_free_struct,
};
static bool ggml_backend_buffer_is_metal(ggml_backend_buffer_t buffer) {
================================================
FILE: llama.cpp.patches/patches/ggml_src_gguf.cpp.patch
================================================
diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp
--- a/llama.cpp/ggml/src/gguf.cpp
+++ b/llama.cpp/ggml/src/gguf.cpp
@@ -3,6 +3,10 @@
#include "ggml-impl.h"
#include "gguf.h"
+#ifdef COSMOCC
+#include "llamafile/llamafile.h"
+#endif
+
#include
#include
#include
@@ -358,18 +362,107 @@ struct gguf_reader {
return nread == size;
}
+ size_t tell() const {
+ return gguf_ftell(file);
+ }
+
+ bool seek(size_t offset, int whence) const {
+ return gguf_fseek(file, offset, whence) == 0;
+ }
+
private:
FILE * file;
mutable uint64_t nbytes_remain;
};
+#ifdef COSMOCC
+// [llamafile] Reader that uses llamafile API for both regular files and memory-mapped content
+struct gguf_llamafile_reader {
+ struct llamafile * lfile;
+
+ gguf_llamafile_reader(struct llamafile * lfile) : lfile(lfile) {}
+
+ template
+ bool read(T & dst) const {
+ return llamafile_read(lfile, &dst, sizeof(dst)) == (long)sizeof(dst);
+ }
+
+ template
+ bool read(std::vector & dst, const size_t n) const {
+ dst.resize(n);
+ for (size_t i = 0; i < dst.size(); ++i) {
+ if constexpr (std::is_same::value) {
+ bool tmp;
+ if (!read(tmp)) {
+ return false;
+ }
+ dst[i] = tmp;
+ } else {
+ if (!read(dst[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ bool read(bool & dst) const {
+ int8_t tmp = -1;
+ if (!read(tmp)) {
+ return false;
+ }
+ dst = tmp != 0;
+ return true;
+ }
+
+ bool read(enum ggml_type & dst) const {
+ int32_t tmp = -1;
+ if (!read(tmp)) {
+ return false;
+ }
+ dst = ggml_type(tmp);
+ return true;
+ }
+
+ bool read(enum gguf_type & dst) const {
+ int32_t tmp = -1;
+ if (!read(tmp)) {
+ return false;
+ }
+ dst = gguf_type(tmp);
+ return true;
+ }
+
+ bool read(std::string & dst) const {
+ uint64_t size = 0;
+ if (!read(size)) {
+ return false;
+ }
+ dst.resize(size);
+ return llamafile_read(lfile, dst.data(), dst.length()) == (long)dst.length();
+ }
+
+ bool read(void * dst, const size_t size) const {
+ return llamafile_read(lfile, dst, size) == (long)size;
+ }
+
+ size_t tell() const {
+ return llamafile_tell(lfile);
+ }
+
+ bool seek(size_t offset, int whence) const {
+ return llamafile_seek(lfile, offset, whence);
+ }
+};
+#endif
+
struct gguf_context * gguf_init_empty(void) {
return new gguf_context;
}
-template
-bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector & kv, const std::string & key, const bool is_array, const size_t n) {
+template
+bool gguf_read_emplace_helper(const Reader & gr, std::vector & kv, const std::string & key, const bool is_array, const size_t n) {
if (is_array) {
std::vector value;
try {
@@ -394,8 +487,8 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector
+struct gguf_context * gguf_init_from_reader_impl(const Reader & gr, struct gguf_init_params params) {
struct gguf_context * ctx = new gguf_context;
bool ok = true;
@@ -696,14 +789,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
GGML_ASSERT(int64_t(ctx->info.size()) == n_tensors);
// we require the data section to be aligned, so take into account any padding
- if (gguf_fseek(file, GGML_PAD(gguf_ftell(file), ctx->alignment), SEEK_SET) != 0) {
+ if (!gr.seek(GGML_PAD(gr.tell(), ctx->alignment), SEEK_SET)) {
GGML_LOG_ERROR("%s: failed to seek to beginning of data section\n", __func__);
gguf_free(ctx);
return nullptr;
}
// store the current file offset - this is where the data section starts
- ctx->offset = gguf_ftell(file);
+ ctx->offset = gr.tell();
// compute the total size of the data section, taking into account the alignment
{
@@ -840,7 +933,27 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par
return ctx;
}
+// Wrapper for FILE*-based reading
+struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
+ const struct gguf_reader gr(file);
+ return gguf_init_from_reader_impl(gr, params);
+}
+
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
+#ifdef COSMOCC
+ // [llamafile] Use llamafile API for all file types: plain GGUF, /zip/ paths,
+ // foo.zip@weights.gguf, .llamafile containers. The llamafile API handles both
+ // FILE*-backed and memory-mapped content transparently.
+ struct llamafile * lfile = llamafile_open_gguf(fname, "rb");
+ if (!lfile) {
+ GGML_LOG_ERROR("%s: failed to open GGUF file '%s': %s\n", __func__, fname, strerror(errno));
+ return nullptr;
+ }
+ const struct gguf_llamafile_reader gr(lfile);
+ struct gguf_context * result = gguf_init_from_reader_impl(gr, params);
+ llamafile_close(lfile);
+ return result;
+#else
FILE * file = ggml_fopen(fname, "rb");
if (!file) {
@@ -851,6 +964,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
struct gguf_context * result = gguf_init_from_file_impl(file, params);
fclose(file);
return result;
+#endif
}
void gguf_free(struct gguf_context * ctx) {
================================================
FILE: llama.cpp.patches/patches/src_llama-mmap.cpp.patch
================================================
diff --git a/src/llama-mmap.cpp b/src/llama-mmap.cpp
--- a/llama.cpp/src/llama-mmap.cpp
+++ b/llama.cpp/src/llama-mmap.cpp
@@ -4,6 +4,10 @@
#include "ggml.h"
+#ifdef COSMOCC
+#include "llamafile/llamafile.h"
+#endif
+
#include
#include
#include
@@ -165,6 +169,19 @@ struct llama_file::impl {
}
#else
impl(const char * fname, const char * mode, [[maybe_unused]] const bool use_direct_io = false) : fname(fname) {
+#ifdef COSMOCC
+ // [llamafile] Use llamafile_open_gguf for all file opening. This handles:
+ // - Plain GGUF files (opened normally with fopen)
+ // - /zip/ paths (memory-mapped from executable's zip)
+ // - foo.zip@weights.gguf syntax
+ // - .llamafile containers
+ lfile = llamafile_open_gguf(fname, mode);
+ if (lfile == NULL) {
+ throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
+ }
+ size = llamafile_size(lfile);
+ return;
+#endif
#ifdef __linux__
// Try unbuffered I/O for read only
if (use_direct_io && std::strcmp(mode, "rb") == 0) {
@@ -210,6 +227,9 @@ struct llama_file::impl {
}
size_t tell() const {
+#ifdef COSMOCC
+ return llamafile_tell(lfile);
+#endif
if (fd == -1) {
long ret = std::ftell(fp);
if (ret == -1) {
@@ -227,6 +247,11 @@ struct llama_file::impl {
}
void seek(size_t offset, int whence) const {
+#ifdef COSMOCC
+ if (!llamafile_seek(lfile, offset, whence)) {
+ throw std::runtime_error(format("seek error: %s", strerror(errno)));
+ }
+#else
off_t ret = 0;
if (fd == -1) {
ret = std::fseek(fp, (long) offset, whence);
@@ -236,12 +261,22 @@ struct llama_file::impl {
if (ret == -1) {
throw std::runtime_error(format("seek error: %s", strerror(errno)));
}
+#endif
}
void read_raw_unsafe(void * ptr, size_t len) {
if (len == 0) {
return;
}
+#ifdef COSMOCC
+ long rc = llamafile_read(lfile, ptr, len);
+ if (rc == -1) {
+ throw std::runtime_error(format("read error: %s", strerror(errno)));
+ }
+ if ((size_t)rc != len) {
+ throw std::runtime_error(format("short read: expected %zu bytes, got %ld", len, rc));
+ }
+#else
errno = 0;
if (fd == -1) {
const size_t curr_off = tell();
@@ -291,6 +326,7 @@ struct llama_file::impl {
bytes_read += (size_t) ret;
}
}
+#endif
}
void read_aligned_chunk(void * dest, size_t size) {
@@ -351,13 +387,20 @@ struct llama_file::impl {
}
~impl() {
+#ifdef COSMOCC
+ llamafile_close(lfile);
+#else
if (fd != -1) {
close(fd);
- } else {
+ } else if (fp) {
std::fclose(fp);
}
+#endif
}
int fd = -1;
+#ifdef COSMOCC
+ llamafile * lfile = nullptr;
+#endif
std::string fname;
#endif
@@ -382,6 +425,13 @@ size_t llama_file::read_alignment() const { return pimpl->read_alignment(); }
bool llama_file::has_direct_io() const { return pimpl->has_direct_io(); }
int llama_file::file_id() const {
+#ifdef COSMOCC
+ // [llamafile] For pre-mapped files (bundled /zip/ assets), there's no fd.
+ // Callers that need mmap should check has_premapped_content() first and use
+ // premapped_content() instead of calling file_id() + mmap().
+ FILE * fp = llamafile_fp(pimpl->lfile);
+ return fp ? fileno(fp) : -1;
+#else
#ifdef _WIN32
return _fileno(pimpl->fp);
#else
@@ -389,9 +439,10 @@ int llama_file::file_id() const {
return pimpl->fd;
}
#if defined(fileno)
- return fileno(pimpl->fp);
+ return pimpl->fp ? fileno(pimpl->fp) : -1;
#else
- return ::fileno(pimpl->fp);
+ return pimpl->fp ? ::fileno(pimpl->fp) : -1;
+#endif
#endif
#endif
}
@@ -409,14 +460,64 @@ uint32_t llama_file::read_u32() { return pimpl->read_u32(); }
void llama_file::write_raw(const void * ptr, size_t len) const { pimpl->write_raw(ptr, len); }
void llama_file::write_u32(uint32_t val) const { pimpl->write_u32(val); }
+// [llamafile] Check if file has pre-mapped content (for bundled zip assets)
+bool llama_file::has_premapped_content() const {
+#ifdef COSMOCC
+ return !llamafile_fp(pimpl->lfile);
+#else
+ return false;
+#endif
+}
+
+void * llama_file::premapped_content() const {
+#ifdef COSMOCC
+ if (!llamafile_fp(pimpl->lfile)) {
+ return llamafile_content(pimpl->lfile);
+ }
+#endif
+ return nullptr;
+}
+
+void * llama_file::get_llamafile() const {
+#ifdef COSMOCC
+ return pimpl->lfile;
+#else
+ return nullptr;
+#endif
+}
+
// llama_mmap
struct llama_mmap::impl {
#ifdef _POSIX_MAPPED_FILES
std::vector> mapped_fragments;
+ bool is_owned = true; // [llamafile] false if using pre-mapped content
+#ifdef COSMOCC
+ llamafile * lfile = nullptr; // [llamafile] for reference counting
+#endif
impl(struct llama_file * file, size_t prefetch, bool numa) {
size = file->size();
+
+ // [llamafile] Check if file already has pre-mapped content (bundled zip assets).
+ // For /zip/ paths, the content is already memory-mapped by llamafile_open_zip().
+ // We use reference counting to keep the mapping alive: llamafile_ref() here and
+ // llamafile_unref() in the destructor. The llama_file also calls llamafile_close()
+ // (which internally calls llamafile_unref()) in its destructor. Either destruction
+ // order is safe: the memory is only freed when the last reference is released.
+ if (file->has_premapped_content()) {
+ addr = file->premapped_content();
+ is_owned = false;
+#ifdef COSMOCC
+ // Keep the llamafile alive by incrementing reference count
+ lfile = (llamafile *)file->get_llamafile();
+ if (lfile) {
+ llamafile_ref(lfile);
+ }
+#endif
+ return;
+ }
+
int fd = file->file_id();
int flags = MAP_SHARED;
if (numa) { prefetch = 0; }
@@ -461,6 +562,11 @@ struct llama_mmap::impl {
}
void unmap_fragment(size_t first, size_t last) {
+ // [llamafile] Don't unmap if we're using pre-mapped content
+ if (!is_owned) {
+ return;
+ }
+
int page_size = sysconf(_SC_PAGESIZE);
align_range(&first, &last, page_size);
size_t len = last - first;
@@ -497,6 +603,16 @@ struct llama_mmap::impl {
}
~impl() {
+ // [llamafile] Don't munmap if we're using pre-mapped content
+ if (!is_owned) {
+#ifdef COSMOCC
+ // Decrement reference count on the llamafile
+ if (lfile) {
+ llamafile_unref(lfile);
+ }
+#endif
+ return;
+ }
for (const auto & frag : mapped_fragments) {
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
================================================
FILE: llama.cpp.patches/patches/src_llama-mmap.h.patch
================================================
diff --git a/src/llama-mmap.h b/src/llama-mmap.h
--- a/llama.cpp/src/llama-mmap.h
+++ b/llama.cpp/src/llama-mmap.h
@@ -34,6 +34,11 @@ struct llama_file {
size_t read_alignment() const;
bool has_direct_io() const;
+
+ // [llamafile] Check if file has pre-mapped content (for bundled zip assets)
+ bool has_premapped_content() const;
+ void * premapped_content() const;
+ void * get_llamafile() const; // Returns llamafile* for reference counting
private:
struct impl;
std::unique_ptr pimpl;
================================================
FILE: llama.cpp.patches/patches/tools_server_server-queue.cpp.patch
================================================
diff --git a/tools/server/server-queue.cpp b/tools/server/server-queue.cpp
--- a/llama.cpp/tools/server/server-queue.cpp
+++ b/llama.cpp/tools/server/server-queue.cpp
@@ -4,6 +4,9 @@
#include "log.h"
#include
+#include
+#include
+#include
#define QUE_INF(fmt, ...) LOG_INF("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
#define QUE_WRN(fmt, ...) LOG_WRN("que %12.*s: " fmt, 12, __func__, __VA_ARGS__)
@@ -110,9 +113,13 @@ void server_queue::wait_until_no_sleep() {
condition_tasks.notify_one(); // only main thread is waiting on this
}
QUE_DBG("%s", "waiting until no sleep\n");
- condition_tasks.wait(lock, [&]{
- return !sleeping;
- });
+ // Use wait_for() instead of wait() to work around a
+ // Cosmopolitan libc bug where untimed futex waits on XNU
+ // (macOS) expire after ~72 minutes, causing
+ // condition_variable::wait() to throw ETIMEDOUT.
+ while (sleeping) {
+ condition_tasks.wait_for(lock, std::chrono::seconds(30));
+ }
}
}
@@ -123,6 +130,18 @@ void server_queue::terminate() {
}
void server_queue::start_loop(int64_t idle_sleep_ms) {
+ // Block SIGINT and SIGTERM on this thread to prevent EINTR during
+ // condition_variable::wait(). In Cosmopolitan libc, condition_variable
+ // throws std::system_error(EINTR) when interrupted by signals, and this
+ // exception cannot be caught (goes directly to std::terminate).
+ // The main thread handles these signals, sets running=false, and calls
+ // notify_all() to wake us up cleanly.
+ sigset_t block_mask;
+ sigemptyset(&block_mask);
+ sigaddset(&block_mask, SIGINT);
+ sigaddset(&block_mask, SIGTERM);
+ pthread_sigmask(SIG_BLOCK, &block_mask, nullptr);
+
running = true;
time_last_task = ggml_time_ms();
@@ -181,9 +200,13 @@ void server_queue::start_loop(int64_t idle_sleep_ms) {
callback_sleeping_state(true);
req_stop_sleeping = false;
// wait until we are requested to exit sleeping state
- condition_tasks.wait(lock, [&]{
- return (!running || req_stop_sleeping);
- });
+ // Use wait_for() instead of wait() to work around a
+ // Cosmopolitan libc bug where untimed futex waits on
+ // XNU (macOS) expire after ~72 minutes, causing
+ // condition_variable::wait() to throw ETIMEDOUT.
+ while (running && !req_stop_sleeping) {
+ condition_tasks.wait_for(lock, std::chrono::seconds(30));
+ }
if (!running) { // may changed during sleep
break; // terminate
}
@@ -266,13 +289,17 @@ void server_response::remove_waiting_task_ids(const std::unordered_set & id
server_task_result_ptr server_response::recv(const std::unordered_set & id_tasks) {
while (true) {
std::unique_lock lock(mutex_results);
- condition_results.wait(lock, [&]{
+ // Use wait_for() instead of wait() to work around a
+ // Cosmopolitan libc bug where untimed futex waits on XNU
+ // (macOS) expire after ~72 minutes, causing
+ // condition_variable::wait() to throw ETIMEDOUT.
+ while (queue_results.empty()) {
+ condition_results.wait_for(lock, std::chrono::seconds(30));
if (!running) {
RES_DBG("%s : queue result stop\n", "recv");
std::terminate(); // we cannot return here since the caller is HTTP code
}
- return !queue_results.empty();
- });
+ }
for (size_t i = 0; i < queue_results.size(); i++) {
if (id_tasks.find(queue_results[i]->id) != id_tasks.end()) {
================================================
FILE: llama.cpp.patches/patches/tools_server_server.cpp.patch
================================================
diff --git a/tools/server/server.cpp b/tools/server/server.cpp
--- a/llama.cpp/tools/server/server.cpp
+++ b/llama.cpp/tools/server/server.cpp
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include
#include // for std::thread::hardware_concurrency
@@ -17,6 +18,11 @@
#include
#endif
+#ifdef COSMOCC
+#include
+#include "llamafile.h"
+#endif
+
static std::function shutdown_handler;
static std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
@@ -67,7 +73,13 @@ static server_http_context::handler_t ex_wrapper(server_http_context::handler_t
};
}
-int main(int argc, char ** argv) {
+// Core server logic - can be called from llamafile main.cpp or standalone main()
+// Optional callbacks for combined mode:
+// on_ready: called when server is fully loaded and accepting requests
+// on_shutdown_available: provides a function to trigger server shutdown
+int server_main(int argc, char ** argv,
+ std::function on_ready,
+ std::function)> on_shutdown_available) {
std::setlocale(LC_NUMERIC, "C");
// own arguments required by this example
@@ -98,6 +110,10 @@ int main(int argc, char ** argv) {
params.model_alias.insert(params.model.name);
}
+#ifdef COSMOCC
+ llamafile_has_metal(); // triggers Metal backend registration on macOS ARM64
+#endif
+
common_init();
// struct that contains llama context and inference
@@ -267,6 +283,13 @@ int main(int argc, char ** argv) {
// this will unblock start_loop()
ctx_server.terminate();
};
+
+ // Provide shutdown function to caller (for combined mode)
+ if (on_shutdown_available) {
+ on_shutdown_available([&ctx_server]() {
+ ctx_server.terminate();
+ });
+ }
}
// TODO: refactor in common/console
@@ -298,6 +321,11 @@ int main(int argc, char ** argv) {
LOG_INF("%s: server is listening on %s\n", __func__, ctx_http.listening_address.c_str());
LOG_INF("%s: starting the main loop...\n", __func__);
+ // Notify caller that server is ready (for combined mode TUI startup)
+ if (on_ready) {
+ on_ready(ctx_http.listening_address);
+ }
+
// optionally, notify router server that this instance is ready
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
std::thread monitor_thread;
@@ -322,5 +350,48 @@ int main(int argc, char ** argv) {
}
}
+#ifdef LLAMAFILE_TUI
+ // In combined mode (callbacks provided), return normally so the caller
+ // can handle cleanup and TUI thread joining
+ if (on_ready || on_shutdown_available) {
+ return 0;
+ }
+ // By now the program can safely exit:
+ // Metal backend has async logging that llama_synchronize() doesn't wait for.
+ // Without this delay, _exit() might truncate llama_memory_breakdown_print's output
+ sleep(1);
+ // Use _exit() to avoid Metal cleanup crash (dangling refs with TUI + Metal + server)
+ _exit(0);
+#else
return 0;
+#endif
}
+
+// Standalone entry point for llama-server executable
+// Not compiled when building as part of llamafile TUI (which has its own main)
+// Having this allows us to test cosmocc-compiled llama.cpp in isolation.
+#ifndef LLAMAFILE_TUI
+int main(int argc, char ** argv) {
+#ifdef COSMOCC
+ argc = cosmo_args("/zip/.args", &argv);
+
+ // Check if verbose mode is requested (must be set before GPU init)
+ bool verbose = llamafile_has(argv, "--verbose");
+ FLAG_verbose = verbose ? 1 : 0;
+
+ // Initialize GPU support early (must happen BEFORE llama_backend_init())
+ // This triggers dynamic loading of GPU backends (CUDA, ROCm, Metal)
+ // The llamafile_has_* functions use lazy initialization via cosmo_once()
+ llamafile_has_gpu();
+ if (!verbose) {
+ // disable ggml verbose logging
+ if (llamafile_has_metal()) {
+ llamafile_metal_log_set(llamafile_log_callback_null, NULL);
+ } else if (llamafile_has_cuda() || llamafile_has_amd_gpu()) {
+ llamafile_cuda_log_set(llamafile_log_callback_null, NULL);
+ }
+ }
+#endif
+ return server_main(argc, argv, nullptr, nullptr);
+}
+#endif
================================================
FILE: llama.cpp.patches/patches/vendor_cpp-httplib_httplib.cpp.patch
================================================
diff --git a/vendor/cpp-httplib/httplib.cpp b/vendor/cpp-httplib/httplib.cpp
--- a/llama.cpp/vendor/cpp-httplib/httplib.cpp
+++ b/llama.cpp/vendor/cpp-httplib/httplib.cpp
@@ -6043,7 +6043,13 @@ void ThreadPool::worker(bool is_dynamic) {
break;
}
} else {
- cond_.wait(lock, [&] { return !jobs_.empty() || shutdown_; });
+ // Use wait_for() instead of wait() to work around a
+ // Cosmopolitan libc bug where untimed futex waits on XNU
+ // (macOS) expire after ~72 minutes, causing
+ // condition_variable::wait() to throw ETIMEDOUT.
+ while (jobs_.empty() && !shutdown_) {
+ cond_.wait_for(lock, std::chrono::seconds(30));
+ }
}
idle_thread_count_--;
================================================
FILE: llama.cpp.patches/renames.sh
================================================
#!/usr/bin/env bash
set -euo pipefail
# use this script if you just want to move files from one directory to another.
# For instance:
# mv common/base64.hpp base64.h
================================================
FILE: llamafile/BUILD.mk
================================================
#
# Copyright 2024 Mozilla Foundation
# Copyright 2026 Mozilla.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PKGS += LLAMAFILE
# ==============================================================================
# Header files (for mkdeps dependency tracking)
# ==============================================================================
# ==============================================================================
# Package Sources (NOT using full deps.mk SRCS/HDRS mechanism)
# ==============================================================================
# Note: We only list headers that:
# 1. Are needed by code scanned by mkdeps (like third_party sources)
# 2. Only include standard library headers (no llama.cpp dependencies)
# Headers like chatbot.h that include llama.cpp headers are excluded
# because mkdeps can't resolve those include paths.
LLAMAFILE_HDRS := \
llamafile/llamafile.h \
llamafile/sgemm.h
# ==============================================================================
# Include paths
# ==============================================================================
LLAMAFILE_INCLUDES := \
-iquote llamafile \
-iquote llama.cpp/common \
-iquote llama.cpp/include \
-iquote llama.cpp/ggml/include \
-iquote llama.cpp/ggml/src \
-iquote llama.cpp/ggml/src/ggml-cpu \
-iquote llama.cpp/src \
-iquote llama.cpp/tools/mtmd \
-isystem llama.cpp/vendor \
-isystem third_party
# ==============================================================================
# Compiler flags
# ==============================================================================
# When LLAMAFILE_TUI is defined, llama.cpp server's main() function is renamed
# to server_main() and called by llamafile's main.cpp. In the standalone build,
# this flag is off and a new main() function is compiled to call server_main
# (see llama.cpp/tools/server/server.cpp).
LLAMAFILE_CPPFLAGS := \
$(LLAMAFILE_INCLUDES) \
-DLLAMAFILE_TUI \
-DCOSMOCC=1
# ==============================================================================
# Source files - Highlight library
# ==============================================================================
LLAMAFILE_HIGHLIGHT_SRCS := \
llamafile/highlight/color_bleeder.cpp \
llamafile/highlight/highlight.cpp \
llamafile/highlight/highlight_ada.cpp \
llamafile/highlight/highlight_asm.cpp \
llamafile/highlight/highlight_basic.cpp \
llamafile/highlight/highlight_bnf.cpp \
llamafile/highlight/highlight_c.cpp \
llamafile/highlight/highlight_cmake.cpp \
llamafile/highlight/highlight_cobol.cpp \
llamafile/highlight/highlight_csharp.cpp \
llamafile/highlight/highlight_css.cpp \
llamafile/highlight/highlight_d.cpp \
llamafile/highlight/highlight_forth.cpp \
llamafile/highlight/highlight_fortran.cpp \
llamafile/highlight/highlight_go.cpp \
llamafile/highlight/highlight_haskell.cpp \
llamafile/highlight/highlight_html.cpp \
llamafile/highlight/highlight_java.cpp \
llamafile/highlight/highlight_js.cpp \
llamafile/highlight/highlight_julia.cpp \
llamafile/highlight/highlight_kotlin.cpp \
llamafile/highlight/highlight_ld.cpp \
llamafile/highlight/highlight_lisp.cpp \
llamafile/highlight/highlight_lua.cpp \
llamafile/highlight/highlight_m4.cpp \
llamafile/highlight/highlight_make.cpp \
llamafile/highlight/highlight_markdown.cpp \
llamafile/highlight/highlight_matlab.cpp \
llamafile/highlight/highlight_ocaml.cpp \
llamafile/highlight/highlight_pascal.cpp \
llamafile/highlight/highlight_perl.cpp \
llamafile/highlight/highlight_php.cpp \
llamafile/highlight/highlight_python.cpp \
llamafile/highlight/highlight_r.cpp \
llamafile/highlight/highlight_ruby.cpp \
llamafile/highlight/highlight_rust.cpp \
llamafile/highlight/highlight_scala.cpp \
llamafile/highlight/highlight_shell.cpp \
llamafile/highlight/highlight_sql.cpp \
llamafile/highlight/highlight_swift.cpp \
llamafile/highlight/highlight_tcl.cpp \
llamafile/highlight/highlight_tex.cpp \
llamafile/highlight/highlight_txt.cpp \
llamafile/highlight/highlight_typescript.cpp \
llamafile/highlight/highlight_zig.cpp \
llamafile/highlight/util.cpp
# ==============================================================================
# Source files - Core TUI
# ==============================================================================
LLAMAFILE_SRCS_C := \
llamafile/bestline.c \
llamafile/cuda.c \
llamafile/llamafile.c \
llamafile/metal.c \
llamafile/zip.c
LLAMAFILE_SRCS_CPP := \
llamafile/args.cpp \
llamafile/chatbot_api.cpp \
llamafile/chatbot_cli.cpp \
llamafile/chatbot_comm.cpp \
llamafile/chatbot_comp.cpp \
llamafile/chatbot_direct.cpp \
llamafile/chatbot_eval.cpp \
llamafile/chatbot_file.cpp \
llamafile/chatbot_help.cpp \
llamafile/chatbot_hint.cpp \
llamafile/chatbot_hist.cpp \
llamafile/chatbot_logo.cpp \
llamafile/chatbot_main.cpp \
llamafile/chatbot_repl.cpp \
llamafile/compute.cpp \
llamafile/datauri.cpp \
llamafile/extract_data_uris.cpp \
llamafile/image.cpp \
llamafile/llama.cpp \
llamafile/string.cpp \
llamafile/xterm.cpp \
$(LLAMAFILE_HIGHLIGHT_SRCS)
# ==============================================================================
# TinyBLAS CPU Optimized Kernels
# ==============================================================================
# These provide runtime CPU dispatch to architecture-specific SIMD implementations
# for matrix multiplication (sgemm) and mixture-of-experts (mixmul) operations.
TINYBLAS_CPU_SGEMM_SRCS := \
llamafile/tinyblas_cpu_sgemm_amd_avx.cpp \
llamafile/tinyblas_cpu_sgemm_amd_fma.cpp \
llamafile/tinyblas_cpu_sgemm_amd_avx2.cpp \
llamafile/tinyblas_cpu_sgemm_amd_avxvnni.cpp \
llamafile/tinyblas_cpu_sgemm_amd_avx512f.cpp \
llamafile/tinyblas_cpu_sgemm_amd_zen4.cpp \
llamafile/tinyblas_cpu_sgemm_arm80.cpp \
llamafile/tinyblas_cpu_sgemm_arm82.cpp \
llamafile/tinyblas_cpu_unsupported.cpp
TINYBLAS_CPU_MIXMUL_SRCS := \
llamafile/tinyblas_cpu_mixmul_amd_avx.cpp \
llamafile/tinyblas_cpu_mixmul_amd_fma.cpp \
llamafile/tinyblas_cpu_mixmul_amd_avx2.cpp \
llamafile/tinyblas_cpu_mixmul_amd_avxvnni.cpp \
llamafile/tinyblas_cpu_mixmul_amd_avx512f.cpp \
llamafile/tinyblas_cpu_mixmul_amd_zen4.cpp \
llamafile/tinyblas_cpu_mixmul_arm80.cpp \
llamafile/tinyblas_cpu_mixmul_arm82.cpp
# IQK (Integer Quantized Kernels) for optimized k-quant/i-quant matmul
# Provides 150-400% speedup for Q4_K, Q5_K, Q6_K quantized models
TINYBLAS_CPU_IQK_SRCS := \
llamafile/iqk_mul_mat_amd_avx2.cpp \
llamafile/iqk_mul_mat_amd_zen4.cpp \
llamafile/iqk_mul_mat_arm82.cpp
TINYBLAS_CPU_SRCS := \
llamafile/sgemm.cpp \
$(TINYBLAS_CPU_SGEMM_SRCS) \
$(TINYBLAS_CPU_MIXMUL_SRCS) \
$(TINYBLAS_CPU_IQK_SRCS)
TINYBLAS_CPU_OBJS := $(TINYBLAS_CPU_SRCS:%.cpp=o/$(MODE)/%.o)
# ==============================================================================
# Object files
# ==============================================================================
LLAMAFILE_OBJS := \
$(LLAMAFILE_SRCS_C:%.c=o/$(MODE)/%.o) \
$(LLAMAFILE_SRCS_CPP:%.cpp=o/$(MODE)/%.o)
# ==============================================================================
# Dependency libraries
# ==============================================================================
# Dependencies from llama.cpp/BUILD.mk:
# GGML_OBJS - Core tensor operations
# LLAMA_OBJS - LLM inference
# COMMON_OBJS - Common utilities (arg parsing, sampling, chat templates)
# MTMD_OBJS - Multimodal support (vision models)
# HTTPLIB_OBJS - HTTP client support for downloads
# Dependencies from llamafile/highlight/BUILD.mk:
# We only need the gperf-generated keyword dictionary objects, not the
# highlight cpp files (since we have our own copies in llamafile/highlight)
LLAMAFILE_HIGHLIGHT_GPERF_FILES := $(wildcard llamafile/highlight/*.gperf)
LLAMAFILE_HIGHLIGHT_KEYWORDS := $(LLAMAFILE_HIGHLIGHT_GPERF_FILES:%.gperf=o/$(MODE)/%.o)
# Server objects for llamafile
LLAMAFILE_SERVER_SUPPORT_OBJS := \
o/$(MODE)/llama.cpp/tools/server/server-common.cpp.o \
o/$(MODE)/llama.cpp/tools/server/server-context.cpp.o \
o/$(MODE)/llama.cpp/tools/server/server-http.cpp.o \
o/$(MODE)/llama.cpp/tools/server/server-models.cpp.o \
o/$(MODE)/llama.cpp/tools/server/server-queue.cpp.o \
o/$(MODE)/llama.cpp/tools/server/server-task.cpp.o
# Metal source files to embed in the executable (for runtime compilation on macOS)
# These are extracted at runtime and compiled into ggml-metal.dylib
LLAMAFILE_METAL_SOURCES := \
o/$(MODE)/llama.cpp/ggml/src/ggml.c.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-alloc.c.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-backend.cpp.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-quants.c.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-threading.cpp.zip.o \
o/$(MODE)/llama.cpp/ggml/include/ggml.h.zip.o \
o/$(MODE)/llama.cpp/ggml/include/gguf.h.zip.o \
o/$(MODE)/llama.cpp/ggml/include/ggml-cpu.h.zip.o \
o/$(MODE)/llama.cpp/ggml/include/ggml-alloc.h.zip.o \
o/$(MODE)/llama.cpp/ggml/include/ggml-backend.h.zip.o \
o/$(MODE)/llama.cpp/ggml/include/ggml-metal.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-impl.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-common.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-quants.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-threading.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-backend-impl.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-cpu/ggml-cpu-impl.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal.cpp.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal.metal.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-impl.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-device.cpp.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-context.m.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-common.cpp.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.h.zip.o \
o/$(MODE)/llama.cpp/ggml/src/ggml-metal/ggml-metal-ops.cpp.zip.o
# Use deferred expansion (=) since this depends on variables from llama.cpp/BUILD.mk
LLAMAFILE_DEPS = \
$(GGML_OBJS) \
$(LLAMA_OBJS) \
$(COMMON_OBJS) \
$(MTMD_OBJS) \
$(HTTPLIB_OBJS) \
$(LLAMAFILE_SERVER_SUPPORT_OBJS) \
$(LLAMAFILE_HIGHLIGHT_KEYWORDS) \
$(LLAMAFILE_METAL_SOURCES) \
$(TINYBLAS_CPU_OBJS) \
o/$(MODE)/third_party/stb/stb_image_resize2.o
# ==============================================================================
# Server integration
# ==============================================================================
# Include paths needed for server compilation
LLAMAFILE_SERVER_INCS := \
$(LLAMAFILE_INCLUDES) \
-iquote llama.cpp/tools/server \
-iquote o/$(MODE)/llama.cpp/tools/server
# Compile server.cpp
o/$(MODE)/llamafile/server.cpp.o: llama.cpp/tools/server/server.cpp $(SERVER_ASSETS)
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(LLAMAFILE_CPPFLAGS) $(LLAMAFILE_SERVER_INCS) -c -o $@ $<
# ==============================================================================
# Main executable
# ==============================================================================
# main.cpp: no special includes needed (combined mode uses server_main via forward decl)
o/$(MODE)/llamafile/main.o: llamafile/main.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<
o/$(MODE)/llamafile/llamafile: \
o/$(MODE)/llamafile/main.o \
o/$(MODE)/llamafile/server.cpp.o \
$(LLAMAFILE_OBJS) \
$(LLAMAFILE_DEPS) \
$(SERVER_ASSETS)
@mkdir -p $(@D)
$(CXX) $(LDFLAGS) -o $@ $(filter %.o,$^) $(LDLIBS)
# ==============================================================================
# Pattern rules for llamafile sources
# ==============================================================================
# metal.c needs GGML_VERSION and GGML_COMMIT for runtime Metal compilation
# GGML_VERSION and GGML_COMMIT are inherited from build/config.mk
o/$(MODE)/llamafile/metal.o: llamafile/metal.c
@mkdir -p $(@D)
$(CC) $(CFLAGS) $(LLAMAFILE_CPPFLAGS) \
-DGGML_VERSION=\"$(GGML_VERSION)\" \
-DGGML_COMMIT=\"$(GGML_COMMIT)\" \
-c -o $@ $<
o/$(MODE)/llamafile/%.o: llamafile/%.c
@mkdir -p $(@D)
$(CC) $(CFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<
o/$(MODE)/llamafile/%.o: llamafile/%.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<
o/$(MODE)/llamafile/highlight/%.o: llamafile/highlight/%.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(LLAMAFILE_CPPFLAGS) -c -o $@ $<
# ==============================================================================
# TinyBLAS CPU Architecture-Specific Compilation Flags
# ==============================================================================
# Each variant is compiled with flags specific to its target CPU architecture.
# The -Xx86_64 and -Xaarch64 prefixes are cosmocc conventions for arch-specific flags.
# The -mgcc flag is critical for enabling GCC SIMD intrinsics with cosmocc.
# Static pattern rule for tinyblas CPU files
# This ensures these targets use the specialized recipe with SIMD flags
$(TINYBLAS_CPU_OBJS): o/$(MODE)/%.o: %.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(CCFLAGS) $(TARGET_ARCH) -c -o $@ $<
# Base flags for all tinyblas CPU files
# -mgcc enables GCC intrinsics (__m128, __m256, etc.) with cosmocc
$(TINYBLAS_CPU_OBJS): private CCFLAGS += -O3 -fopenmp -mgcc
$(TINYBLAS_CPU_OBJS): private CPPFLAGS += $(LLAMAFILE_INCLUDES) -DCOSMOCC=1 -DGGML_USE_LLAMAFILE
# x86_64 AVX (Sandy Bridge, Ivy Bridge - 2010-2012)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_avx.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_avx.o: \
private TARGET_ARCH += -Xx86_64-mtune=sandybridge -Xx86_64-mavx -Xx86_64-mf16c
# x86_64 FMA (AMD Piledriver - 2011-2014)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_fma.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_fma.o: \
private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma
# x86_64 AVX2 (Haswell, Broadwell, Skylake - 2013-2020)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_avx2.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_avx2.o: \
private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
# x86_64 AVX-VNNI (Intel Alder Lake - 2021+)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_avxvnni.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_avxvnni.o: \
private TARGET_ARCH += -Xx86_64-mtune=alderlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavxvnni
# x86_64 AVX-512F (Intel Skylake-X, Xeon - 2015+)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_avx512f.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_avx512f.o: \
private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
# x86_64 Zen4 (AMD Zen 4 - 2023+, with AVX-512 BF16/VNNI)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_zen4.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_amd_zen4.o: \
private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512vl -Xx86_64-mavx512vnni -Xx86_64-mavx512bf16
# ARM64 v8.2-a (Apple M1/M2, Raspberry Pi 5 - with FP16 and dotprod)
o/$(MODE)/llamafile/tinyblas_cpu_sgemm_arm82.o \
o/$(MODE)/llamafile/tinyblas_cpu_mixmul_arm82.o: \
private TARGET_ARCH += -Xaarch64-march=armv8.2-a+dotprod+fp16
# ARM64 v8.0-a baseline and unsupported have no special flags
# IQK (Integer Quantized Kernels) architecture-specific flags
# AVX2 variant (Haswell+)
o/$(MODE)/llamafile/iqk_mul_mat_amd_avx2.o: \
private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mavx2 -Xx86_64-mfma -Xx86_64-mf16c
# Zen4 variant (AMD Zen 4+ with AVX-512)
o/$(MODE)/llamafile/iqk_mul_mat_amd_zen4.o: \
private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mavx2 -Xx86_64-mfma -Xx86_64-mf16c -Xx86_64-mavx512f -Xx86_64-mavx512vl -Xx86_64-mavx512vnni -Xx86_64-mavx512bw -Xx86_64-mavx512dq
# ARM82 variant (Apple M1+, Raspberry Pi 5)
o/$(MODE)/llamafile/iqk_mul_mat_arm82.o: \
private TARGET_ARCH += -Xaarch64-march=armv8.2-a+dotprod+fp16
# ==============================================================================
# Targets
# ==============================================================================
.PHONY: o/$(MODE)/llamafile
o/$(MODE)/llamafile: o/$(MODE)/llamafile/llamafile
================================================
FILE: llamafile/args.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "args.h"
#include "llamafile.h"
#include
#include
namespace lf {
// Static storage for filtered argv (persists after function returns)
static std::vector g_filtered_argv;
// Helper: returns true if arg is a llamafile-specific flag (not recognized by llama.cpp)
static bool is_llamafile_flag(const char* arg) {
return strcmp(arg, "--server") == 0 ||
strcmp(arg, "--chat") == 0 ||
strcmp(arg, "--cli") == 0 ||
strcmp(arg, "--gpu") == 0 ||
strcmp(arg, "--ascii") == 0 ||
strcmp(arg, "--nologo") == 0 ||
strcmp(arg, "--nothink") == 0 ||
strcmp(arg, "--version") == 0;
}
LlamafileArgs parse_llamafile_args(int argc, char** argv) {
LlamafileArgs args;
// Early GPU init must happen before we filter args
// This reads --gpu and -ngl flags to set FLAG_gpu
llamafile_early_gpu_init(argv);
// Capture -p/--prompt value before filtering (needed for combined mode
// where SERVER parsing excludes -p)
// Note: Loop does not break early; if multiple -p flags are given,
// the last occurrence wins (intentional for override flexibility)
for (int i = 0; i < argc; ++i) {
if ((strcmp(argv[i], "-p") == 0 || strcmp(argv[i], "--prompt") == 0) && i + 1 < argc) {
args.system_prompt = argv[i + 1];
}
if ((strcmp(argv[i], "-m") == 0 || strcmp(argv[i], "--model") == 0) && i + 1 < argc) {
args.model_path = argv[i + 1];
}
}
// Determine execution mode from flags
// Priority: explicit flags override defaults
if (llamafile_has(argv, "--server")) {
args.mode = ProgramMode::SERVER;
} else if (llamafile_has(argv, "--chat")) {
args.mode = ProgramMode::CHAT;
} else if (llamafile_has(argv, "--cli")) {
args.mode = ProgramMode::CLI;
} else {
// AUTO mode: will run combined chat + server
args.mode = ProgramMode::AUTO;
}
// Check verbose flag
FLAG_verbose = llamafile_has(argv, "--verbose") ? 1 : 0;
// Check --nothink flag (filters thinking/reasoning content in CLI mode)
FLAG_nothink = llamafile_has(argv, "--nothink");
// Check logo flags
FLAG_nologo = llamafile_has(argv, "--nologo");
FLAG_ascii = llamafile_has(argv, "--ascii");
// Filter out llamafile-specific arguments
// These are not recognized by llama.cpp and would cause errors
g_filtered_argv.clear();
for (int i = 0; i < argc; ++i) {
const char* arg = argv[i];
// Skip llamafile-specific flags
if (is_llamafile_flag(arg)) {
// --gpu takes a value argument, skip it too
if (strcmp(arg, "--gpu") == 0 && i + 1 < argc) {
++i;
}
continue;
}
// Keep this argument
g_filtered_argv.push_back(argv[i]);
}
// Null-terminate argv array (required by convention)
g_filtered_argv.push_back(nullptr);
args.llama_argc = static_cast(g_filtered_argv.size()) - 1;
args.llama_argv = g_filtered_argv.data();
return args;
}
} // namespace lf
================================================
FILE: llamafile/args.h
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include
namespace lf {
// Program execution modes
enum class ProgramMode {
AUTO, // Default: combined chat + server
CHAT, // --chat: TUI chat only
SERVER, // --server: HTTP server only
CLI, // --cli: Single prompt -> response, then exit
};
// Parsed llamafile arguments
struct LlamafileArgs {
ProgramMode mode = ProgramMode::AUTO;
// Filtered argc/argv for llama.cpp (excludes llamafile-specific args)
int llama_argc = 0;
char** llama_argv = nullptr;
// System prompt captured from -p (needed for combined mode where SERVER
// parsing excludes -p)
std::string system_prompt;
// Model path captured from -m (for display in combined mode TUI)
std::string model_path;
// Note: Llamafile-specific flags are stored in FLAG_* globals (llamafile.h):
// --verbose -> FLAG_verbose
// --nothink -> FLAG_nothink
// --gpu -> FLAG_gpu (set by llamafile_early_gpu_init)
};
// Parse command line arguments, determine execution mode, and filter out
// llamafile-specific arguments before passing to llama.cpp.
//
// This function:
// 1. Calls llamafile_early_gpu_init() to handle GPU flags
// 2. Determines the program mode from --chat, --server, --cli flags
// 3. Removes llamafile-specific flags from argv
// 4. Returns filtered argc/argv suitable for llama.cpp
LlamafileArgs parse_llamafile_args(int argc, char** argv);
} // namespace lf
================================================
FILE: llamafile/bestline.c
================================================
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;tab-width:8;coding:utf-8 -*-│
│ vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi │
╞══════════════════════════════════════════════════════════════════════════════╡
│ │
│ Bestline ── Library for interactive pseudoteletypewriter command │
│ sessions using ANSI Standard X3.64 control sequences │
│ │
│ OVERVIEW │
│ │
│ Bestline is a fork of linenoise (a popular readline alternative) │
│ that fixes its bugs and adds the missing features while reducing │
│ binary footprint (surprisingly) by removing bloated dependencies │
│ which means you can finally have a permissively-licensed command │
│ prompt w/ a 30kb footprint that's nearly as good as gnu readline │
│ │
│ EXAMPLE │
│ │
│ main() { │
│ char *line; │
│ while ((line = bestlineWithHistory("IN> ", "foo"))) { │
│ fputs("OUT> ", stdout); │
│ fputs(line, stdout); │
│ fputs("\n", stdout); │
│ free(line); │
│ } │
│ } │
│ │
│ CHANGES │
│ │
│ - Remove bell │
│ - Add kill ring │
│ - Fix flickering │
│ - Add UTF-8 editing │
│ - Add CTRL-R search │
│ - Support unlimited lines │
│ - Add parentheses awareness │
│ - React to terminal resizing │
│ - Don't generate .data section │
│ - Support terminal flow control │
│ - Make history loading 10x faster │
│ - Make multiline mode the only mode │
│ - Accommodate O_NONBLOCK file descriptors │
│ - Restore raw mode on process foregrounding │
│ - Make source code compatible with C++ compilers │
│ - Fix corruption issues by using generalized parsing │
│ - Implement nearly all GNU readline editing shortcuts │
│ - Remove heavyweight dependencies like printf/sprintf │
│ - Remove ISIG→^C→EAGAIN hack and use ephemeral handlers │
│ - Support running on Windows in MinTTY or CMD.EXE on Win10+ │
│ - Support diacratics, русский, Ελληνικά, 漢字, 仮名, 한글 │
│ │
│ SHORTCUTS │
│ │
│ CTRL-E END │
│ CTRL-A START │
│ CTRL-B BACK │
│ CTRL-F FORWARD │
│ CTRL-L CLEAR │
│ CTRL-H BACKSPACE │
│ CTRL-D DELETE │
│ CTRL-Y YANK │
│ CTRL-D EOF (IF EMPTY) │
│ CTRL-N NEXT HISTORY │
│ CTRL-P PREVIOUS HISTORY │
│ CTRL-R SEARCH HISTORY │
│ CTRL-G CANCEL SEARCH │
│ CTRL-J INSERT NEWLINE │
│ ALT-< BEGINNING OF HISTORY │
│ ALT-> END OF HISTORY │
│ ALT-F FORWARD WORD │
│ ALT-B BACKWARD WORD │
│ CTRL-ALT-F FORWARD EXPR │
│ CTRL-ALT-B BACKWARD EXPR │
│ ALT-RIGHT FORWARD EXPR │
│ ALT-LEFT BACKWARD EXPR │
│ ALT-SHIFT-B BARF EXPR │
│ ALT-SHIFT-S SLURP EXPR │
│ ALT-SHIFT-R RAISE EXPR │
│ CTRL-K KILL LINE FORWARDS │
│ CTRL-U KILL LINE BACKWARDS │
│ ALT-H KILL WORD BACKWARDS │
│ CTRL-W KILL WORD BACKWARDS │
│ CTRL-ALT-H KILL WORD BACKWARDS │
│ ALT-D KILL WORD FORWARDS │
│ ALT-Y ROTATE KILL RING AND YANK AGAIN │
│ ALT-\ SQUEEZE ADJACENT WHITESPACE │
│ CTRL-T TRANSPOSE │
│ ALT-T TRANSPOSE WORD │
│ ALT-U UPPERCASE WORD │
│ ALT-L LOWERCASE WORD │
│ ALT-C CAPITALIZE WORD │
│ CTRL-C CTRL-C INTERRUPT PROCESS │
│ CTRL-Z SUSPEND PROCESS │
│ CTRL-\ QUIT PROCESS │
│ CTRL-S PAUSE OUTPUT │
│ CTRL-Q UNPAUSE OUTPUT (IF PAUSED) │
│ CTRL-Q ESCAPED INSERT │
│ CTRL-SPACE SET MARK │
│ CTRL-X CTRL-X GOTO MARK │
│ PROTIP REMAP CAPS LOCK TO CTRL │
│ │
╞══════════════════════════════════════════════════════════════════════════════╡
│ │
│ Copyright 2018-2021 Justine Tunney │
│ Copyright 2010-2016 Salvatore Sanfilippo │
│ Copyright 2010-2013 Pieter Noordhuis │
│ │
│ All rights reserved. │
│ │
│ Redistribution and use in source and binary forms, with or without │
│ modification, are permitted provided that the following conditions are │
│ met: │
│ │
│ * Redistributions of source code must retain the above copyright │
│ notice, this list of conditions and the following disclaimer. │
│ │
│ * Redistributions in binary form must reproduce the above copyright │
│ notice, this list of conditions and the following disclaimer in the │
│ documentation and/or other materials provided with the distribution. │
│ │
│ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS │
│ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT │
│ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR │
│ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT │
│ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, │
│ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT │
│ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, │
│ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY │
│ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT │
│ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE │
│ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. │
│ │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "bestline.h"
#define _POSIX_C_SOURCE 1 /* so GCC builds in ANSI mode */
#define _XOPEN_SOURCE 700 /* so GCC builds in ANSI mode */
#define _DARWIN_C_SOURCE 1 /* so SIGWINCH / IUTF8 on XNU */
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifndef SIGWINCH
#define SIGWINCH 28 /* GNU/Systemd + XNU + FreeBSD + NetBSD + OpenBSD */
#endif
#ifndef IUTF8
#define IUTF8 0
#endif
__asm__(".ident\t\"\\n\\n\
Bestline (BSD-2)\\n\
Copyright 2018-2020 Justine Tunney \\n\
Copyright 2010-2016 Salvatore Sanfilippo \\n\
Copyright 2010-2013 Pieter Noordhuis \"");
#ifndef BESTLINE_MAX_RING
#define BESTLINE_MAX_RING 8
#endif
#ifndef BESTLINE_MAX_HISTORY
#define BESTLINE_MAX_HISTORY 1024
#endif
#define BESTLINE_HISTORY_PREV +1
#define BESTLINE_HISTORY_NEXT -1
#define Ctrl(C) ((C) ^ 0100)
#define Min(X, Y) ((Y) > (X) ? (X) : (Y))
#define Max(X, Y) ((Y) < (X) ? (X) : (Y))
#define Case(X, Y) \
case X: \
Y; \
break
#define Read16le(X) ((255 & (X)[0]) << 000 | (255 & (X)[1]) << 010)
#define Read32le(X) \
((unsigned)(255 & (X)[0]) << 000 | (unsigned)(255 & (X)[1]) << 010 | \
(unsigned)(255 & (X)[2]) << 020 | (unsigned)(255 & (X)[3]) << 030)
struct abuf {
char *b;
unsigned len;
unsigned cap;
};
struct rune {
unsigned c;
unsigned n;
};
struct bestlineRing {
unsigned i;
char *p[BESTLINE_MAX_RING];
};
/* The bestlineState structure represents the state during line editing.
* We pass this state to functions implementing specific editing
* functionalities. */
struct bestlineState {
int ifd; /* terminal stdin file descriptor */
int ofd; /* terminal stdout file descriptor */
struct winsize ws; /* rows and columns in terminal */
char *buf; /* edited line buffer */
const char *prompt; /* prompt to display */
int hindex; /* history index */
int rows; /* rows being used */
int oldpos; /* previous refresh cursor position */
unsigned buflen; /* edited line buffer size */
unsigned pos; /* current buffer index */
unsigned len; /* current edited line length */
unsigned mark; /* saved cursor position */
unsigned yi, yj; /* boundaries of last yank */
char seq[2][16]; /* keystroke history for yanking code */
char final; /* set to true on last update */
char dirty; /* if an update was squashed */
struct abuf full; /* used for multiline mode */
};
static const char *const kUnsupported[] = {"dumb", "cons25", "emacs"};
static int gotint;
static int gotcont;
static int gotwinch;
static signed char rawmode;
static char maskmode;
static char emacsmode;
static char llamamode;
static char balancemode;
static char ispaused;
static char iscapital;
static unsigned historylen;
static struct bestlineRing ring;
static struct sigaction orig_cont;
static struct sigaction orig_winch;
static struct termios orig_termios;
static char *history[BESTLINE_MAX_HISTORY];
static bestlineXlatCallback *xlatCallback;
static bestlineHintsCallback *hintsCallback;
static bestlineFreeHintsCallback *freeHintsCallback;
static bestlineCompletionCallback *completionCallback;
static void bestlineAtExit(void);
static void bestlineRefreshLine(struct bestlineState *);
static void bestlineOnInt(int sig) {
gotint = sig;
}
static void bestlineOnCont(int sig) {
gotcont = sig;
}
static void bestlineOnWinch(int sig) {
gotwinch = sig;
}
static char IsControl(unsigned c) {
return c <= 0x1F || (0x7F <= c && c <= 0x9F);
}
/**
* Returns monospace character width.
*
* This will be zero for control characters, combining marks, etc.
* Chinese, Korean, Japanese, Emoji, etc. will have a width of 2, and
* all other characters will be 1.
*
* This implementation is consistent with wcwidth() on Linux, except
* that this won't return -1 for various character ranges.
*/
int bestlineCharacterWidth(int c) {
if ((0x000 <= c && c <= 0x01F) || (0x07F <= c && c <= 0x09F) ||
(0x300 <= c && c <= 0x36f) || (0x483 <= c && c <= 0x489) ||
(0x591 <= c && c <= 0x5bd) || (0x5bf <= c && c <= 0x5bf) ||
(0x5c1 <= c && c <= 0x5c2) || (0x5c4 <= c && c <= 0x5c5) ||
(0x5c7 <= c && c <= 0x5c7) || (0x610 <= c && c <= 0x61a) ||
(0x61c <= c && c <= 0x61c) || (0x64b <= c && c <= 0x65f) ||
(0x670 <= c && c <= 0x670) || (0x6d6 <= c && c <= 0x6dc) ||
(0x6df <= c && c <= 0x6e4) || (0x6e7 <= c && c <= 0x6e8) ||
(0x6ea <= c && c <= 0x6ed) || (0x711 <= c && c <= 0x711) ||
(0x730 <= c && c <= 0x74a) || (0x7a6 <= c && c <= 0x7b0) ||
(0x7eb <= c && c <= 0x7f3) || (0x7fd <= c && c <= 0x7fd) ||
(0x816 <= c && c <= 0x819) || (0x81b <= c && c <= 0x823) ||
(0x825 <= c && c <= 0x827) || (0x829 <= c && c <= 0x82d) ||
(0x859 <= c && c <= 0x85b) || (0x898 <= c && c <= 0x89f) ||
(0x8ca <= c && c <= 0x8e1) || (0x8e3 <= c && c <= 0x902) ||
(0x93a <= c && c <= 0x93a) || (0x93c <= c && c <= 0x93c) ||
(0x941 <= c && c <= 0x948) || (0x94d <= c && c <= 0x94d) ||
(0x951 <= c && c <= 0x957) || (0x962 <= c && c <= 0x963) ||
(0x981 <= c && c <= 0x981) || (0x9bc <= c && c <= 0x9bc) ||
(0x9c1 <= c && c <= 0x9c4) || (0x9cd <= c && c <= 0x9cd) ||
(0x9e2 <= c && c <= 0x9e3) || (0x9fe <= c && c <= 0x9fe) ||
(0xa01 <= c && c <= 0xa02) || (0xa3c <= c && c <= 0xa3c) ||
(0xa41 <= c && c <= 0xa42) || (0xa47 <= c && c <= 0xa48) ||
(0xa4b <= c && c <= 0xa4d) || (0xa51 <= c && c <= 0xa51) ||
(0xa70 <= c && c <= 0xa71) || (0xa75 <= c && c <= 0xa75) ||
(0xa81 <= c && c <= 0xa82) || (0xabc <= c && c <= 0xabc) ||
(0xac1 <= c && c <= 0xac5) || (0xac7 <= c && c <= 0xac8) ||
(0xacd <= c && c <= 0xacd) || (0xae2 <= c && c <= 0xae3) ||
(0xafa <= c && c <= 0xaff) || (0xb01 <= c && c <= 0xb01) ||
(0xb3c <= c && c <= 0xb3c) || (0xb3f <= c && c <= 0xb3f) ||
(0xb41 <= c && c <= 0xb44) || (0xb4d <= c && c <= 0xb4d) ||
(0xb55 <= c && c <= 0xb56) || (0xb62 <= c && c <= 0xb63) ||
(0xb82 <= c && c <= 0xb82) || (0xbc0 <= c && c <= 0xbc0) ||
(0xbcd <= c && c <= 0xbcd) || (0xc00 <= c && c <= 0xc00) ||
(0xc04 <= c && c <= 0xc04) || (0xc3c <= c && c <= 0xc3c) ||
(0xc3e <= c && c <= 0xc40) || (0xc46 <= c && c <= 0xc48) ||
(0xc4a <= c && c <= 0xc4d) || (0xc55 <= c && c <= 0xc56) ||
(0xc62 <= c && c <= 0xc63) || (0xc81 <= c && c <= 0xc81) ||
(0xcbc <= c && c <= 0xcbc) || (0xcbf <= c && c <= 0xcbf) ||
(0xcc6 <= c && c <= 0xcc6) || (0xccc <= c && c <= 0xccd) ||
(0xce2 <= c && c <= 0xce3) || (0xd00 <= c && c <= 0xd01) ||
(0xd3b <= c && c <= 0xd3c) || (0xd41 <= c && c <= 0xd44) ||
(0xd4d <= c && c <= 0xd4d) || (0xd62 <= c && c <= 0xd63) ||
(0xd81 <= c && c <= 0xd81) || (0xdca <= c && c <= 0xdca) ||
(0xdd2 <= c && c <= 0xdd4) || (0xdd6 <= c && c <= 0xdd6) ||
(0xe31 <= c && c <= 0xe31) || (0xe34 <= c && c <= 0xe3a) ||
(0xe47 <= c && c <= 0xe4e) || (0xeb1 <= c && c <= 0xeb1) ||
(0xeb4 <= c && c <= 0xebc) || (0xec8 <= c && c <= 0xece) ||
(0xf18 <= c && c <= 0xf19) || (0xf35 <= c && c <= 0xf35) ||
(0xf37 <= c && c <= 0xf37) || (0xf39 <= c && c <= 0xf39) ||
(0xf71 <= c && c <= 0xf7e) || (0xf80 <= c && c <= 0xf84) ||
(0xf86 <= c && c <= 0xf87) || (0xf8d <= c && c <= 0xf97) ||
(0xf99 <= c && c <= 0xfbc) || (0xfc6 <= c && c <= 0xfc6) ||
(0x102d <= c && c <= 0x1030) || (0x1032 <= c && c <= 0x1037) ||
(0x1039 <= c && c <= 0x103a) || (0x103d <= c && c <= 0x103e) ||
(0x1058 <= c && c <= 0x1059) || (0x105e <= c && c <= 0x1060) ||
(0x1071 <= c && c <= 0x1074) || (0x1082 <= c && c <= 0x1082) ||
(0x1085 <= c && c <= 0x1086) || (0x108d <= c && c <= 0x108d) ||
(0x109d <= c && c <= 0x109d) || (0x1160 <= c && c <= 0x11ff) ||
(0x135d <= c && c <= 0x135f) || (0x1712 <= c && c <= 0x1714) ||
(0x1732 <= c && c <= 0x1733) || (0x1752 <= c && c <= 0x1753) ||
(0x1772 <= c && c <= 0x1773) || (0x17b4 <= c && c <= 0x17b5) ||
(0x17b7 <= c && c <= 0x17bd) || (0x17c6 <= c && c <= 0x17c6) ||
(0x17c9 <= c && c <= 0x17d3) || (0x17dd <= c && c <= 0x17dd) ||
(0x180b <= c && c <= 0x180f) || (0x1885 <= c && c <= 0x1886) ||
(0x18a9 <= c && c <= 0x18a9) || (0x1920 <= c && c <= 0x1922) ||
(0x1927 <= c && c <= 0x1928) || (0x1932 <= c && c <= 0x1932) ||
(0x1939 <= c && c <= 0x193b) || (0x1a17 <= c && c <= 0x1a18) ||
(0x1a1b <= c && c <= 0x1a1b) || (0x1a56 <= c && c <= 0x1a56) ||
(0x1a58 <= c && c <= 0x1a5e) || (0x1a60 <= c && c <= 0x1a60) ||
(0x1a62 <= c && c <= 0x1a62) || (0x1a65 <= c && c <= 0x1a6c) ||
(0x1a73 <= c && c <= 0x1a7c) || (0x1a7f <= c && c <= 0x1a7f) ||
(0x1ab0 <= c && c <= 0x1ace) || (0x1b00 <= c && c <= 0x1b03) ||
(0x1b34 <= c && c <= 0x1b34) || (0x1b36 <= c && c <= 0x1b3a) ||
(0x1b3c <= c && c <= 0x1b3c) || (0x1b42 <= c && c <= 0x1b42) ||
(0x1b6b <= c && c <= 0x1b73) || (0x1b80 <= c && c <= 0x1b81) ||
(0x1ba2 <= c && c <= 0x1ba5) || (0x1ba8 <= c && c <= 0x1ba9) ||
(0x1bab <= c && c <= 0x1bad) || (0x1be6 <= c && c <= 0x1be6) ||
(0x1be8 <= c && c <= 0x1be9) || (0x1bed <= c && c <= 0x1bed) ||
(0x1bef <= c && c <= 0x1bf1) || (0x1c2c <= c && c <= 0x1c33) ||
(0x1c36 <= c && c <= 0x1c37) || (0x1cd0 <= c && c <= 0x1cd2) ||
(0x1cd4 <= c && c <= 0x1ce0) || (0x1ce2 <= c && c <= 0x1ce8) ||
(0x1ced <= c && c <= 0x1ced) || (0x1cf4 <= c && c <= 0x1cf4) ||
(0x1cf8 <= c && c <= 0x1cf9) || (0x1dc0 <= c && c <= 0x1dff) ||
(0x200b <= c && c <= 0x200f) || (0x202a <= c && c <= 0x202e) ||
(0x2060 <= c && c <= 0x2064) || (0x2066 <= c && c <= 0x206f) ||
(0x20d0 <= c && c <= 0x20f0) || (0x2cef <= c && c <= 0x2cf1) ||
(0x2d7f <= c && c <= 0x2d7f) || (0x2de0 <= c && c <= 0x2dff) ||
(0x302a <= c && c <= 0x302d) || (0x3099 <= c && c <= 0x309a) ||
(0xa66f <= c && c <= 0xa672) || (0xa674 <= c && c <= 0xa67d) ||
(0xa69e <= c && c <= 0xa69f) || (0xa6f0 <= c && c <= 0xa6f1) ||
(0xa802 <= c && c <= 0xa802) || (0xa806 <= c && c <= 0xa806) ||
(0xa80b <= c && c <= 0xa80b) || (0xa825 <= c && c <= 0xa826) ||
(0xa82c <= c && c <= 0xa82c) || (0xa8c4 <= c && c <= 0xa8c5) ||
(0xa8e0 <= c && c <= 0xa8f1) || (0xa8ff <= c && c <= 0xa8ff) ||
(0xa926 <= c && c <= 0xa92d) || (0xa947 <= c && c <= 0xa951) ||
(0xa980 <= c && c <= 0xa982) || (0xa9b3 <= c && c <= 0xa9b3) ||
(0xa9b6 <= c && c <= 0xa9b9) || (0xa9bc <= c && c <= 0xa9bd) ||
(0xa9e5 <= c && c <= 0xa9e5) || (0xaa29 <= c && c <= 0xaa2e) ||
(0xaa31 <= c && c <= 0xaa32) || (0xaa35 <= c && c <= 0xaa36) ||
(0xaa43 <= c && c <= 0xaa43) || (0xaa4c <= c && c <= 0xaa4c) ||
(0xaa7c <= c && c <= 0xaa7c) || (0xaab0 <= c && c <= 0xaab0) ||
(0xaab2 <= c && c <= 0xaab4) || (0xaab7 <= c && c <= 0xaab8) ||
(0xaabe <= c && c <= 0xaabf) || (0xaac1 <= c && c <= 0xaac1) ||
(0xaaec <= c && c <= 0xaaed) || (0xaaf6 <= c && c <= 0xaaf6) ||
(0xabe5 <= c && c <= 0xabe5) || (0xabe8 <= c && c <= 0xabe8) ||
(0xabed <= c && c <= 0xabed) || (0xd7b0 <= c && c <= 0xd7c6) ||
(0xd7cb <= c && c <= 0xd7fb) || (0xfb1e <= c && c <= 0xfb1e) ||
(0xfe00 <= c && c <= 0xfe0f) || (0xfe20 <= c && c <= 0xfe2f) ||
(0xfeff <= c && c <= 0xfeff) || (0xfff9 <= c && c <= 0xfffb) ||
(0x101fd <= c && c <= 0x101fd) || (0x102e0 <= c && c <= 0x102e0) ||
(0x10376 <= c && c <= 0x1037a) || (0x10a01 <= c && c <= 0x10a03) ||
(0x10a05 <= c && c <= 0x10a06) || (0x10a0c <= c && c <= 0x10a0f) ||
(0x10a38 <= c && c <= 0x10a3a) || (0x10a3f <= c && c <= 0x10a3f) ||
(0x10ae5 <= c && c <= 0x10ae6) || (0x10d24 <= c && c <= 0x10d27) ||
(0x10eab <= c && c <= 0x10eac) || (0x10efd <= c && c <= 0x10eff) ||
(0x10f46 <= c && c <= 0x10f50) || (0x10f82 <= c && c <= 0x10f85) ||
(0x11001 <= c && c <= 0x11001) || (0x11038 <= c && c <= 0x11046) ||
(0x11070 <= c && c <= 0x11070) || (0x11073 <= c && c <= 0x11074) ||
(0x1107f <= c && c <= 0x11081) || (0x110b3 <= c && c <= 0x110b6) ||
(0x110b9 <= c && c <= 0x110ba) || (0x110c2 <= c && c <= 0x110c2) ||
(0x11100 <= c && c <= 0x11102) || (0x11127 <= c && c <= 0x1112b) ||
(0x1112d <= c && c <= 0x11134) || (0x11173 <= c && c <= 0x11173) ||
(0x11180 <= c && c <= 0x11181) || (0x111b6 <= c && c <= 0x111be) ||
(0x111c9 <= c && c <= 0x111cc) || (0x111cf <= c && c <= 0x111cf) ||
(0x1122f <= c && c <= 0x11231) || (0x11234 <= c && c <= 0x11234) ||
(0x11236 <= c && c <= 0x11237) || (0x1123e <= c && c <= 0x1123e) ||
(0x11241 <= c && c <= 0x11241) || (0x112df <= c && c <= 0x112df) ||
(0x112e3 <= c && c <= 0x112ea) || (0x11300 <= c && c <= 0x11301) ||
(0x1133b <= c && c <= 0x1133c) || (0x11340 <= c && c <= 0x11340) ||
(0x11366 <= c && c <= 0x1136c) || (0x11370 <= c && c <= 0x11374) ||
(0x11438 <= c && c <= 0x1143f) || (0x11442 <= c && c <= 0x11444) ||
(0x11446 <= c && c <= 0x11446) || (0x1145e <= c && c <= 0x1145e) ||
(0x114b3 <= c && c <= 0x114b8) || (0x114ba <= c && c <= 0x114ba) ||
(0x114bf <= c && c <= 0x114c0) || (0x114c2 <= c && c <= 0x114c3) ||
(0x115b2 <= c && c <= 0x115b5) || (0x115bc <= c && c <= 0x115bd) ||
(0x115bf <= c && c <= 0x115c0) || (0x115dc <= c && c <= 0x115dd) ||
(0x11633 <= c && c <= 0x1163a) || (0x1163d <= c && c <= 0x1163d) ||
(0x1163f <= c && c <= 0x11640) || (0x116ab <= c && c <= 0x116ab) ||
(0x116ad <= c && c <= 0x116ad) || (0x116b0 <= c && c <= 0x116b5) ||
(0x116b7 <= c && c <= 0x116b7) || (0x1171d <= c && c <= 0x1171f) ||
(0x11722 <= c && c <= 0x11725) || (0x11727 <= c && c <= 0x1172b) ||
(0x1182f <= c && c <= 0x11837) || (0x11839 <= c && c <= 0x1183a) ||
(0x1193b <= c && c <= 0x1193c) || (0x1193e <= c && c <= 0x1193e) ||
(0x11943 <= c && c <= 0x11943) || (0x119d4 <= c && c <= 0x119d7) ||
(0x119da <= c && c <= 0x119db) || (0x119e0 <= c && c <= 0x119e0) ||
(0x11a01 <= c && c <= 0x11a0a) || (0x11a33 <= c && c <= 0x11a38) ||
(0x11a3b <= c && c <= 0x11a3e) || (0x11a47 <= c && c <= 0x11a47) ||
(0x11a51 <= c && c <= 0x11a56) || (0x11a59 <= c && c <= 0x11a5b) ||
(0x11a8a <= c && c <= 0x11a96) || (0x11a98 <= c && c <= 0x11a99) ||
(0x11c30 <= c && c <= 0x11c36) || (0x11c38 <= c && c <= 0x11c3d) ||
(0x11c3f <= c && c <= 0x11c3f) || (0x11c92 <= c && c <= 0x11ca7) ||
(0x11caa <= c && c <= 0x11cb0) || (0x11cb2 <= c && c <= 0x11cb3) ||
(0x11cb5 <= c && c <= 0x11cb6) || (0x11d31 <= c && c <= 0x11d36) ||
(0x11d3a <= c && c <= 0x11d3a) || (0x11d3c <= c && c <= 0x11d3d) ||
(0x11d3f <= c && c <= 0x11d45) || (0x11d47 <= c && c <= 0x11d47) ||
(0x11d90 <= c && c <= 0x11d91) || (0x11d95 <= c && c <= 0x11d95) ||
(0x11d97 <= c && c <= 0x11d97) || (0x11ef3 <= c && c <= 0x11ef4) ||
(0x11f00 <= c && c <= 0x11f01) || (0x11f36 <= c && c <= 0x11f3a) ||
(0x11f40 <= c && c <= 0x11f40) || (0x11f42 <= c && c <= 0x11f42) ||
(0x13430 <= c && c <= 0x13440) || (0x13447 <= c && c <= 0x13455) ||
(0x16af0 <= c && c <= 0x16af4) || (0x16b30 <= c && c <= 0x16b36) ||
(0x16f4f <= c && c <= 0x16f4f) || (0x16f8f <= c && c <= 0x16f92) ||
(0x16fe4 <= c && c <= 0x16fe4) || (0x1bc9d <= c && c <= 0x1bc9e) ||
(0x1bca0 <= c && c <= 0x1bca3) || (0x1cf00 <= c && c <= 0x1cf2d) ||
(0x1cf30 <= c && c <= 0x1cf46) || (0x1d167 <= c && c <= 0x1d169) ||
(0x1d173 <= c && c <= 0x1d182) || (0x1d185 <= c && c <= 0x1d18b) ||
(0x1d1aa <= c && c <= 0x1d1ad) || (0x1d242 <= c && c <= 0x1d244) ||
(0x1da00 <= c && c <= 0x1da36) || (0x1da3b <= c && c <= 0x1da6c) ||
(0x1da75 <= c && c <= 0x1da75) || (0x1da84 <= c && c <= 0x1da84) ||
(0x1da9b <= c && c <= 0x1da9f) || (0x1daa1 <= c && c <= 0x1daaf) ||
(0x1e000 <= c && c <= 0x1e006) || (0x1e008 <= c && c <= 0x1e018) ||
(0x1e01b <= c && c <= 0x1e021) || (0x1e023 <= c && c <= 0x1e024) ||
(0x1e026 <= c && c <= 0x1e02a) || (0x1e08f <= c && c <= 0x1e08f) ||
(0x1e130 <= c && c <= 0x1e136) || (0x1e2ae <= c && c <= 0x1e2ae) ||
(0x1e2ec <= c && c <= 0x1e2ef) || (0x1e4ec <= c && c <= 0x1e4ef) ||
(0x1e8d0 <= c && c <= 0x1e8d6) || (0x1e944 <= c && c <= 0x1e94a) ||
(0xe0001 <= c && c <= 0xe0001) || (0xe0020 <= c && c <= 0xe007f) ||
(0xe0100 <= c && c <= 0xe01ef))
return 0;
if ((0x1100 <= c && c <= 0x115f) || (0x231a <= c && c <= 0x231b) ||
(0x2329 <= c && c <= 0x232a) || (0x23e9 <= c && c <= 0x23ec) ||
(0x23f0 <= c && c <= 0x23f0) || (0x23f3 <= c && c <= 0x23f3) ||
(0x25fd <= c && c <= 0x25fe) || (0x2614 <= c && c <= 0x2615) ||
(0x2648 <= c && c <= 0x2653) || (0x267f <= c && c <= 0x267f) ||
(0x2693 <= c && c <= 0x2693) || (0x26a1 <= c && c <= 0x26a1) ||
(0x26aa <= c && c <= 0x26ab) || (0x26bd <= c && c <= 0x26be) ||
(0x26c4 <= c && c <= 0x26c5) || (0x26ce <= c && c <= 0x26ce) ||
(0x26d4 <= c && c <= 0x26d4) || (0x26ea <= c && c <= 0x26ea) ||
(0x26f2 <= c && c <= 0x26f3) || (0x26f5 <= c && c <= 0x26f5) ||
(0x26fa <= c && c <= 0x26fa) || (0x26fd <= c && c <= 0x26fd) ||
(0x2705 <= c && c <= 0x2705) || (0x270a <= c && c <= 0x270b) ||
(0x2728 <= c && c <= 0x2728) || (0x274c <= c && c <= 0x274c) ||
(0x274e <= c && c <= 0x274e) || (0x2753 <= c && c <= 0x2755) ||
(0x2757 <= c && c <= 0x2757) || (0x2795 <= c && c <= 0x2797) ||
(0x27b0 <= c && c <= 0x27b0) || (0x27bf <= c && c <= 0x27bf) ||
(0x2b1b <= c && c <= 0x2b1c) || (0x2b50 <= c && c <= 0x2b50) ||
(0x2b55 <= c && c <= 0x2b55) || (0x2e80 <= c && c <= 0x2e99) ||
(0x2e9b <= c && c <= 0x2ef3) || (0x2f00 <= c && c <= 0x2fd5) ||
(0x2ff0 <= c && c <= 0x3029) || (0x302e <= c && c <= 0x303e) ||
(0x3041 <= c && c <= 0x3096) || (0x309b <= c && c <= 0x30ff) ||
(0x3105 <= c && c <= 0x312f) || (0x3131 <= c && c <= 0x318e) ||
(0x3190 <= c && c <= 0x31e3) || (0x31ef <= c && c <= 0x321e) ||
(0x3220 <= c && c <= 0xa48c) || (0xa490 <= c && c <= 0xa4c6) ||
(0xa960 <= c && c <= 0xa97c) || (0xac00 <= c && c <= 0xd7a3) ||
(0xf900 <= c && c <= 0xfa6d) || (0xfa70 <= c && c <= 0xfad9) ||
(0xfe10 <= c && c <= 0xfe19) || (0xfe30 <= c && c <= 0xfe52) ||
(0xfe54 <= c && c <= 0xfe66) || (0xfe68 <= c && c <= 0xfe6b) ||
(0xff01 <= c && c <= 0xff60) || (0xffe0 <= c && c <= 0xffe6) ||
(0x16fe0 <= c && c <= 0x16fe3) || (0x16ff0 <= c && c <= 0x16ff1) ||
(0x17000 <= c && c <= 0x187f7) || (0x18800 <= c && c <= 0x18cd5) ||
(0x18d00 <= c && c <= 0x18d08) || (0x1aff0 <= c && c <= 0x1aff3) ||
(0x1aff5 <= c && c <= 0x1affb) || (0x1affd <= c && c <= 0x1affe) ||
(0x1b000 <= c && c <= 0x1b122) || (0x1b132 <= c && c <= 0x1b132) ||
(0x1b150 <= c && c <= 0x1b152) || (0x1b155 <= c && c <= 0x1b155) ||
(0x1b164 <= c && c <= 0x1b167) || (0x1b170 <= c && c <= 0x1b2fb) ||
(0x1f004 <= c && c <= 0x1f004) || (0x1f0cf <= c && c <= 0x1f0cf) ||
(0x1f18e <= c && c <= 0x1f18e) || (0x1f191 <= c && c <= 0x1f19a) ||
(0x1f200 <= c && c <= 0x1f202) || (0x1f210 <= c && c <= 0x1f23b) ||
(0x1f240 <= c && c <= 0x1f248) || (0x1f250 <= c && c <= 0x1f251) ||
(0x1f260 <= c && c <= 0x1f265) || (0x1f300 <= c && c <= 0x1f320) ||
(0x1f32d <= c && c <= 0x1f335) || (0x1f337 <= c && c <= 0x1f37c) ||
(0x1f37e <= c && c <= 0x1f393) || (0x1f3a0 <= c && c <= 0x1f3ca) ||
(0x1f3cf <= c && c <= 0x1f3d3) || (0x1f3e0 <= c && c <= 0x1f3f0) ||
(0x1f3f4 <= c && c <= 0x1f3f4) || (0x1f3f8 <= c && c <= 0x1f43e) ||
(0x1f440 <= c && c <= 0x1f440) || (0x1f442 <= c && c <= 0x1f4fc) ||
(0x1f4ff <= c && c <= 0x1f53d) || (0x1f54b <= c && c <= 0x1f54e) ||
(0x1f550 <= c && c <= 0x1f567) || (0x1f57a <= c && c <= 0x1f57a) ||
(0x1f595 <= c && c <= 0x1f596) || (0x1f5a4 <= c && c <= 0x1f5a4) ||
(0x1f5fb <= c && c <= 0x1f64f) || (0x1f680 <= c && c <= 0x1f6c5) ||
(0x1f6cc <= c && c <= 0x1f6cc) || (0x1f6d0 <= c && c <= 0x1f6d2) ||
(0x1f6d5 <= c && c <= 0x1f6d7) || (0x1f6dc <= c && c <= 0x1f6df) ||
(0x1f6eb <= c && c <= 0x1f6ec) || (0x1f6f4 <= c && c <= 0x1f6fc) ||
(0x1f7e0 <= c && c <= 0x1f7eb) || (0x1f7f0 <= c && c <= 0x1f7f0) ||
(0x1f90c <= c && c <= 0x1f93a) || (0x1f93c <= c && c <= 0x1f945) ||
(0x1f947 <= c && c <= 0x1f9ff) || (0x1fa70 <= c && c <= 0x1fa7c) ||
(0x1fa80 <= c && c <= 0x1fa88) || (0x1fa90 <= c && c <= 0x1fabd) ||
(0x1fabf <= c && c <= 0x1fac5) || (0x1face <= c && c <= 0x1fadb) ||
(0x1fae0 <= c && c <= 0x1fae8) || (0x1faf0 <= c && c <= 0x1faf8) ||
(0x20000 <= c && c <= 0x2a6df) || (0x2a700 <= c && c <= 0x2b739) ||
(0x2b740 <= c && c <= 0x2b81d) || (0x2b820 <= c && c <= 0x2cea1) ||
(0x2ceb0 <= c && c <= 0x2ebe0) || (0x2ebf0 <= c && c <= 0x2ee5d) ||
(0x2f800 <= c && c <= 0x2fa1d) || (0x30000 <= c && c <= 0x3134a) ||
(0x31350 <= c && c <= 0x323af))
return 2;
return 1;
}
/**
* Returns nonzero if 𝑐 isn't alphanumeric.
*
* Line reading interfaces generally define this operation as UNICODE
* characters that aren't in the letter category (Lu, Ll, Lt, Lm, Lo)
* and aren't in the number categorie (Nd, Nl, No). We also add a few
* other things like blocks and emoji (So).
*/
char bestlineIsSeparator(unsigned c) {
int m, l, r, n;
if (c < 0200) {
return !(('0' <= c && c <= '9') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'));
}
if (c <= 0xffff) {
static const unsigned short kGlyphs[][2] = {
{0x00aa, 0x00aa}, /* 1x English */
{0x00b2, 0x00b3}, /* 2x English Arabic */
{0x00b5, 0x00b5}, /* 1x Greek */
{0x00b9, 0x00ba}, /* 2x English Arabic */
{0x00bc, 0x00be}, /* 3x Vulgar English Arabic */
{0x00c0, 0x00d6}, /* 23x Watin */
{0x00d8, 0x00f6}, /* 31x Watin */
{0x0100, 0x02c1}, /* 450x Watin-AB,IPA,Spacemod */
{0x02c6, 0x02d1}, /* 12x Spacemod */
{0x02e0, 0x02e4}, /* 5x Spacemod */
{0x02ec, 0x02ec}, /* 1x Spacemod */
{0x02ee, 0x02ee}, /* 1x Spacemod */
{0x0370, 0x0374}, /* 5x Greek */
{0x0376, 0x0377}, /* 2x Greek */
{0x037a, 0x037d}, /* 4x Greek */
{0x037f, 0x037f}, /* 1x Greek */
{0x0386, 0x0386}, /* 1x Greek */
{0x0388, 0x038a}, /* 3x Greek */
{0x038c, 0x038c}, /* 1x Greek */
{0x038e, 0x03a1}, /* 20x Greek */
{0x03a3, 0x03f5}, /* 83x Greek */
{0x03f7, 0x0481}, /* 139x Greek */
{0x048a, 0x052f}, /* 166x Cyrillic */
{0x0531, 0x0556}, /* 38x Armenian */
{0x0560, 0x0588}, /* 41x Armenian */
{0x05d0, 0x05ea}, /* 27x Hebrew */
{0x0620, 0x064a}, /* 43x Arabic */
{0x0660, 0x0669}, /* 10x Arabic */
{0x0671, 0x06d3}, /* 99x Arabic */
{0x06ee, 0x06fc}, /* 15x Arabic */
{0x0712, 0x072f}, /* 30x Syriac */
{0x074d, 0x07a5}, /* 89x Syriac,Arabic2,Thaana */
{0x07c0, 0x07ea}, /* 43x NKo */
{0x0800, 0x0815}, /* 22x Samaritan */
{0x0840, 0x0858}, /* 25x Mandaic */
{0x0904, 0x0939}, /* 54x Devanagari */
{0x0993, 0x09a8}, /* 22x Bengali */
{0x09e6, 0x09f1}, /* 12x Bengali */
{0x0a13, 0x0a28}, /* 22x Gurmukhi */
{0x0a66, 0x0a6f}, /* 10x Gurmukhi */
{0x0a93, 0x0aa8}, /* 22x Gujarati */
{0x0b13, 0x0b28}, /* 22x Oriya */
{0x0c92, 0x0ca8}, /* 23x Kannada */
{0x0caa, 0x0cb3}, /* 10x Kannada */
{0x0ce6, 0x0cef}, /* 10x Kannada */
{0x0d12, 0x0d3a}, /* 41x Malayalam */
{0x0d85, 0x0d96}, /* 18x Sinhala */
{0x0d9a, 0x0db1}, /* 24x Sinhala */
{0x0de6, 0x0def}, /* 10x Sinhala */
{0x0e01, 0x0e30}, /* 48x Thai */
{0x0e8c, 0x0ea3}, /* 24x Lao */
{0x0f20, 0x0f33}, /* 20x Tibetan */
{0x0f49, 0x0f6c}, /* 36x Tibetan */
{0x109e, 0x10c5}, /* 40x Myanmar,Georgian */
{0x10d0, 0x10fa}, /* 43x Georgian */
{0x10fc, 0x1248}, /* 333x Georgian,Hangul,Ethiopic */
{0x13a0, 0x13f5}, /* 86x Cherokee */
{0x1401, 0x166d}, /* 621x Aboriginal */
{0x16a0, 0x16ea}, /* 75x Runic */
{0x1700, 0x170c}, /* 13x Tagalog */
{0x1780, 0x17b3}, /* 52x Khmer */
{0x1820, 0x1878}, /* 89x Mongolian */
{0x1a00, 0x1a16}, /* 23x Buginese */
{0x1a20, 0x1a54}, /* 53x Tai Tham */
{0x1a80, 0x1a89}, /* 10x Tai Tham */
{0x1a90, 0x1a99}, /* 10x Tai Tham */
{0x1b05, 0x1b33}, /* 47x Balinese */
{0x1b50, 0x1b59}, /* 10x Balinese */
{0x1b83, 0x1ba0}, /* 30x Sundanese */
{0x1bae, 0x1be5}, /* 56x Sundanese */
{0x1c90, 0x1cba}, /* 43x Georgian2 */
{0x1cbd, 0x1cbf}, /* 3x Georgian2 */
{0x1e00, 0x1f15}, /* 278x Watin-C,Greek2 */
{0x2070, 0x2071}, /* 2x Supersub */
{0x2074, 0x2079}, /* 6x Supersub */
{0x207f, 0x2089}, /* 11x Supersub */
{0x2090, 0x209c}, /* 13x Supersub */
{0x2100, 0x2117}, /* 24x Letterlike */
{0x2119, 0x213f}, /* 39x Letterlike */
{0x2145, 0x214a}, /* 6x Letterlike */
{0x214c, 0x218b}, /* 64x Letterlike,Numbery */
{0x21af, 0x21cd}, /* 31x Arrows */
{0x21d5, 0x21f3}, /* 31x Arrows */
{0x230c, 0x231f}, /* 20x Technical */
{0x232b, 0x237b}, /* 81x Technical */
{0x237d, 0x239a}, /* 30x Technical */
{0x23b4, 0x23db}, /* 40x Technical */
{0x23e2, 0x2426}, /* 69x Technical,ControlPictures */
{0x2460, 0x25b6}, /* 343x Enclosed,Boxes,Blocks,Shapes */
{0x25c2, 0x25f7}, /* 54x Shapes */
{0x2600, 0x266e}, /* 111x Symbols */
{0x2670, 0x2767}, /* 248x Symbols,Dingbats */
{0x2776, 0x27bf}, /* 74x Dingbats */
{0x2800, 0x28ff}, /* 256x Braille */
{0x2c00, 0x2c2e}, /* 47x Glagolitic */
{0x2c30, 0x2c5e}, /* 47x Glagolitic */
{0x2c60, 0x2ce4}, /* 133x Watin-D */
{0x2d00, 0x2d25}, /* 38x Georgian2 */
{0x2d30, 0x2d67}, /* 56x Tifinagh */
{0x2d80, 0x2d96}, /* 23x Ethiopic2 */
{0x2e2f, 0x2e2f}, /* 1x Punctuation2 */
{0x3005, 0x3007}, /* 3x CJK Symbols & Punctuation */
{0x3021, 0x3029}, /* 9x CJK Symbols & Punctuation */
{0x3031, 0x3035}, /* 5x CJK Symbols & Punctuation */
{0x3038, 0x303c}, /* 5x CJK Symbols & Punctuation */
{0x3041, 0x3096}, /* 86x Hiragana */
{0x30a1, 0x30fa}, /* 90x Katakana */
{0x3105, 0x312f}, /* 43x Bopomofo */
{0x3131, 0x318e}, /* 94x Hangul Compatibility Jamo */
{0x31a0, 0x31ba}, /* 27x Bopomofo Extended */
{0x31f0, 0x31ff}, /* 16x Katakana Phonetic Extensions */
{0x3220, 0x3229}, /* 10x Enclosed CJK Letters & Months */
{0x3248, 0x324f}, /* 8x Enclosed CJK Letters & Months */
{0x3251, 0x325f}, /* 15x Enclosed CJK Letters & Months */
{0x3280, 0x3289}, /* 10x Enclosed CJK Letters & Months */
{0x32b1, 0x32bf}, /* 15x Enclosed CJK Letters & Months */
{0x3400, 0x4db5}, /* 6582x CJK Unified Ideographs Extension A */
{0x4dc0, 0x9fef}, /* 21040x Yijing Hexagram, CJK Unified Ideographs */
{0xa000, 0xa48c}, /* 1165x Yi Syllables */
{0xa4d0, 0xa4fd}, /* 46x Lisu */
{0xa500, 0xa60c}, /* 269x Vai */
{0xa610, 0xa62b}, /* 28x Vai */
{0xa6a0, 0xa6ef}, /* 80x Bamum */
{0xa80c, 0xa822}, /* 23x Syloti Nagri */
{0xa840, 0xa873}, /* 52x Phags-pa */
{0xa882, 0xa8b3}, /* 50x Saurashtra */
{0xa8d0, 0xa8d9}, /* 10x Saurashtra */
{0xa900, 0xa925}, /* 38x Kayah Li */
{0xa930, 0xa946}, /* 23x Rejang */
{0xa960, 0xa97c}, /* 29x Hangul Jamo Extended-A */
{0xa984, 0xa9b2}, /* 47x Javanese */
{0xa9cf, 0xa9d9}, /* 11x Javanese */
{0xaa00, 0xaa28}, /* 41x Cham */
{0xaa50, 0xaa59}, /* 10x Cham */
{0xabf0, 0xabf9}, /* 10x Meetei Mayek */
{0xac00, 0xd7a3}, /* 11172x Hangul Syllables */
{0xf900, 0xfa6d}, /* 366x CJK Compatibility Ideographs */
{0xfa70, 0xfad9}, /* 106x CJK Compatibility Ideographs */
{0xfb1f, 0xfb28}, /* 10x Alphabetic Presentation Forms */
{0xfb2a, 0xfb36}, /* 13x Alphabetic Presentation Forms */
{0xfb46, 0xfbb1}, /* 108x Alphabetic Presentation Forms */
{0xfbd3, 0xfd3d}, /* 363x Arabic Presentation Forms-A */
{0xfe76, 0xfefc}, /* 135x Arabic Presentation Forms-B */
{0xff10, 0xff19}, /* 10x Dubs */
{0xff21, 0xff3a}, /* 26x Dubs */
{0xff41, 0xff5a}, /* 26x Dubs */
{0xff66, 0xffbe}, /* 89x Dubs */
{0xffc2, 0xffc7}, /* 6x Dubs */
{0xffca, 0xffcf}, /* 6x Dubs */
{0xffd2, 0xffd7}, /* 6x Dubs */
{0xffda, 0xffdc}, /* 3x Dubs */
};
l = 0;
r = n = sizeof(kGlyphs) / sizeof(kGlyphs[0]);
while (l < r) {
unsigned m = (l & r) + ((l ^ r) >> 1);
if (c < kGlyphs[m][0]) {
r = m;
} else if (c > kGlyphs[m][1]) {
l = m + 1;
} else {
return 0;
}
}
return 1;
} else {
static const unsigned kAstralGlyphs[][2] = {
{0x10107, 0x10133}, /* 45x Aegean */
{0x10140, 0x10178}, /* 57x Ancient Greek Numbers */
{0x1018a, 0x1018b}, /* 2x Ancient Greek Numbers */
{0x10280, 0x1029c}, /* 29x Lycian */
{0x102a0, 0x102d0}, /* 49x Carian */
{0x102e1, 0x102fb}, /* 27x Coptic Epact Numbers */
{0x10300, 0x10323}, /* 36x Old Italic */
{0x1032d, 0x1034a}, /* 30x Old Italic, Gothic */
{0x10350, 0x10375}, /* 38x Old Permic */
{0x10380, 0x1039d}, /* 30x Ugaritic */
{0x103a0, 0x103c3}, /* 36x Old Persian */
{0x103c8, 0x103cf}, /* 8x Old Persian */
{0x103d1, 0x103d5}, /* 5x Old Persian */
{0x10400, 0x1049d}, /* 158x Deseret, Shavian, Osmanya */
{0x104b0, 0x104d3}, /* 36x Osage */
{0x104d8, 0x104fb}, /* 36x Osage */
{0x10500, 0x10527}, /* 40x Elbasan */
{0x10530, 0x10563}, /* 52x Caucasian Albanian */
{0x10600, 0x10736}, /* 311x Linear A */
{0x10800, 0x10805}, /* 6x Cypriot Syllabary */
{0x1080a, 0x10835}, /* 44x Cypriot Syllabary */
{0x10837, 0x10838}, /* 2x Cypriot Syllabary */
{0x1083f, 0x1089e}, /* 86x Cypriot,ImperialAramaic,Palmyrene,Nabataean */
{0x108e0, 0x108f2}, /* 19x Hatran */
{0x108f4, 0x108f5}, /* 2x Hatran */
{0x108fb, 0x1091b}, /* 33x Hatran */
{0x10920, 0x10939}, /* 26x Lydian */
{0x10980, 0x109b7}, /* 56x Meroitic Hieroglyphs */
{0x109bc, 0x109cf}, /* 20x Meroitic Cursive */
{0x109d2, 0x10a00}, /* 47x Meroitic Cursive */
{0x10a10, 0x10a13}, /* 4x Kharoshthi */
{0x10a15, 0x10a17}, /* 3x Kharoshthi */
{0x10a19, 0x10a35}, /* 29x Kharoshthi */
{0x10a40, 0x10a48}, /* 9x Kharoshthi */
{0x10a60, 0x10a7e}, /* 31x Old South Arabian */
{0x10a80, 0x10a9f}, /* 32x Old North Arabian */
{0x10ac0, 0x10ac7}, /* 8x Manichaean */
{0x10ac9, 0x10ae4}, /* 28x Manichaean */
{0x10aeb, 0x10aef}, /* 5x Manichaean */
{0x10b00, 0x10b35}, /* 54x Avestan */
{0x10b40, 0x10b55}, /* 22x Inscriptional Parthian */
{0x10b58, 0x10b72}, /* 27x Inscriptional Parthian and Pahlavi */
{0x10b78, 0x10b91}, /* 26x Inscriptional Pahlavi, Psalter Pahlavi */
{0x10c00, 0x10c48}, /* 73x Old Turkic */
{0x10c80, 0x10cb2}, /* 51x Old Hungarian */
{0x10cc0, 0x10cf2}, /* 51x Old Hungarian */
{0x10cfa, 0x10d23}, /* 42x Old Hungarian, Hanifi Rohingya */
{0x10d30, 0x10d39}, /* 10x Hanifi Rohingya */
{0x10e60, 0x10e7e}, /* 31x Rumi Numeral Symbols */
{0x10f00, 0x10f27}, /* 40x Old Sogdian */
{0x10f30, 0x10f45}, /* 22x Sogdian */
{0x10f51, 0x10f54}, /* 4x Sogdian */
{0x10fe0, 0x10ff6}, /* 23x Elymaic */
{0x11003, 0x11037}, /* 53x Brahmi */
{0x11052, 0x1106f}, /* 30x Brahmi */
{0x11083, 0x110af}, /* 45x Kaithi */
{0x110d0, 0x110e8}, /* 25x Sora Sompeng */
{0x110f0, 0x110f9}, /* 10x Sora Sompeng */
{0x11103, 0x11126}, /* 36x Chakma */
{0x11136, 0x1113f}, /* 10x Chakma */
{0x11144, 0x11144}, /* 1x Chakma */
{0x11150, 0x11172}, /* 35x Mahajani */
{0x11176, 0x11176}, /* 1x Mahajani */
{0x11183, 0x111b2}, /* 48x Sharada */
{0x111c1, 0x111c4}, /* 4x Sharada */
{0x111d0, 0x111da}, /* 11x Sharada */
{0x111dc, 0x111dc}, /* 1x Sharada */
{0x111e1, 0x111f4}, /* 20x Sinhala Archaic Numbers */
{0x11200, 0x11211}, /* 18x Khojki */
{0x11213, 0x1122b}, /* 25x Khojki */
{0x11280, 0x11286}, /* 7x Multani */
{0x11288, 0x11288}, /* 1x Multani */
{0x1128a, 0x1128d}, /* 4x Multani */
{0x1128f, 0x1129d}, /* 15x Multani */
{0x1129f, 0x112a8}, /* 10x Multani */
{0x112b0, 0x112de}, /* 47x Khudawadi */
{0x112f0, 0x112f9}, /* 10x Khudawadi */
{0x11305, 0x1130c}, /* 8x Grantha */
{0x1130f, 0x11310}, /* 2x Grantha */
{0x11313, 0x11328}, /* 22x Grantha */
{0x1132a, 0x11330}, /* 7x Grantha */
{0x11332, 0x11333}, /* 2x Grantha */
{0x11335, 0x11339}, /* 5x Grantha */
{0x1133d, 0x1133d}, /* 1x Grantha */
{0x11350, 0x11350}, /* 1x Grantha */
{0x1135d, 0x11361}, /* 5x Grantha */
{0x11400, 0x11434}, /* 53x Newa */
{0x11447, 0x1144a}, /* 4x Newa */
{0x11450, 0x11459}, /* 10x Newa */
{0x1145f, 0x1145f}, /* 1x Newa */
{0x11480, 0x114af}, /* 48x Tirhuta */
{0x114c4, 0x114c5}, /* 2x Tirhuta */
{0x114c7, 0x114c7}, /* 1x Tirhuta */
{0x114d0, 0x114d9}, /* 10x Tirhuta */
{0x11580, 0x115ae}, /* 47x Siddham */
{0x115d8, 0x115db}, /* 4x Siddham */
{0x11600, 0x1162f}, /* 48x Modi */
{0x11644, 0x11644}, /* 1x Modi */
{0x11650, 0x11659}, /* 10x Modi */
{0x11680, 0x116aa}, /* 43x Takri */
{0x116b8, 0x116b8}, /* 1x Takri */
{0x116c0, 0x116c9}, /* 10x Takri */
{0x11700, 0x1171a}, /* 27x Ahom */
{0x11730, 0x1173b}, /* 12x Ahom */
{0x11800, 0x1182b}, /* 44x Dogra */
{0x118a0, 0x118f2}, /* 83x Warang Citi */
{0x118ff, 0x118ff}, /* 1x Warang Citi */
{0x119a0, 0x119a7}, /* 8x Nandinagari */
{0x119aa, 0x119d0}, /* 39x Nandinagari */
{0x119e1, 0x119e1}, /* 1x Nandinagari */
{0x119e3, 0x119e3}, /* 1x Nandinagari */
{0x11a00, 0x11a00}, /* 1x Zanabazar Square */
{0x11a0b, 0x11a32}, /* 40x Zanabazar Square */
{0x11a3a, 0x11a3a}, /* 1x Zanabazar Square */
{0x11a50, 0x11a50}, /* 1x Soyombo */
{0x11a5c, 0x11a89}, /* 46x Soyombo */
{0x11a9d, 0x11a9d}, /* 1x Soyombo */
{0x11ac0, 0x11af8}, /* 57x Pau Cin Hau */
{0x11c00, 0x11c08}, /* 9x Bhaiksuki */
{0x11c0a, 0x11c2e}, /* 37x Bhaiksuki */
{0x11c40, 0x11c40}, /* 1x Bhaiksuki */
{0x11c50, 0x11c6c}, /* 29x Bhaiksuki */
{0x11c72, 0x11c8f}, /* 30x Marchen */
{0x11d00, 0x11d06}, /* 7x Masaram Gondi */
{0x11d08, 0x11d09}, /* 2x Masaram Gondi */
{0x11d0b, 0x11d30}, /* 38x Masaram Gondi */
{0x11d46, 0x11d46}, /* 1x Masaram Gondi */
{0x11d50, 0x11d59}, /* 10x Masaram Gondi */
{0x11d60, 0x11d65}, /* 6x Gunjala Gondi */
{0x11d67, 0x11d68}, /* 2x Gunjala Gondi */
{0x11d6a, 0x11d89}, /* 32x Gunjala Gondi */
{0x11d98, 0x11d98}, /* 1x Gunjala Gondi */
{0x11da0, 0x11da9}, /* 10x Gunjala Gondi */
{0x11ee0, 0x11ef2}, /* 19x Makasar */
{0x11fc0, 0x11fd4}, /* 21x Tamil Supplement */
{0x12000, 0x12399}, /* 922x Cuneiform */
{0x12400, 0x1246e}, /* 111x Cuneiform Numbers & Punctuation */
{0x12480, 0x12543}, /* 196x Early Dynastic Cuneiform */
{0x13000, 0x1342e}, /* 1071x Egyptian Hieroglyphs */
{0x14400, 0x14646}, /* 583x Anatolian Hieroglyphs */
{0x16800, 0x16a38}, /* 569x Bamum Supplement */
{0x16a40, 0x16a5e}, /* 31x Mro */
{0x16a60, 0x16a69}, /* 10x Mro */
{0x16ad0, 0x16aed}, /* 30x Bassa Vah */
{0x16b00, 0x16b2f}, /* 48x Pahawh Hmong */
{0x16b40, 0x16b43}, /* 4x Pahawh Hmong */
{0x16b50, 0x16b59}, /* 10x Pahawh Hmong */
{0x16b5b, 0x16b61}, /* 7x Pahawh Hmong */
{0x16b63, 0x16b77}, /* 21x Pahawh Hmong */
{0x16b7d, 0x16b8f}, /* 19x Pahawh Hmong */
{0x16e40, 0x16e96}, /* 87x Medefaidrin */
{0x16f00, 0x16f4a}, /* 75x Miao */
{0x16f50, 0x16f50}, /* 1x Miao */
{0x16f93, 0x16f9f}, /* 13x Miao */
{0x16fe0, 0x16fe1}, /* 2x Ideographic Symbols & Punctuation */
{0x16fe3, 0x16fe3}, /* 1x Ideographic Symbols & Punctuation */
{0x17000, 0x187f7}, /* 6136x Tangut */
{0x18800, 0x18af2}, /* 755x Tangut Components */
{0x1b000, 0x1b11e}, /* 287x Kana Supplement */
{0x1b150, 0x1b152}, /* 3x Small Kana Extension */
{0x1b164, 0x1b167}, /* 4x Small Kana Extension */
{0x1b170, 0x1b2fb}, /* 396x Nushu */
{0x1bc00, 0x1bc6a}, /* 107x Duployan */
{0x1bc70, 0x1bc7c}, /* 13x Duployan */
{0x1bc80, 0x1bc88}, /* 9x Duployan */
{0x1bc90, 0x1bc99}, /* 10x Duployan */
{0x1d2e0, 0x1d2f3}, /* 20x Mayan Numerals */
{0x1d360, 0x1d378}, /* 25x Counting Rod Numerals */
{0x1d400, 0x1d454}, /* 85x 𝐀..𝑔 Math */
{0x1d456, 0x1d49c}, /* 71x 𝑖..𝒜 Math */
{0x1d49e, 0x1d49f}, /* 2x 𝒞..𝒟 Math */
{0x1d4a2, 0x1d4a2}, /* 1x 𝒢..𝒢 Math */
{0x1d4a5, 0x1d4a6}, /* 2x 𝒥..𝒦 Math */
{0x1d4a9, 0x1d4ac}, /* 4x 𝒩..𝒬 Math */
{0x1d4ae, 0x1d4b9}, /* 12x 𝒮..𝒹 Math */
{0x1d4bb, 0x1d4bb}, /* 1x 𝒻..𝒻 Math */
{0x1d4bd, 0x1d4c3}, /* 7x 𝒽..𝓃 Math */
{0x1d4c5, 0x1d505}, /* 65x 𝓅..𝔅 Math */
{0x1d507, 0x1d50a}, /* 4x 𝔇..𝔊 Math */
{0x1d50d, 0x1d514}, /* 8x 𝔍..𝔔 Math */
{0x1d516, 0x1d51c}, /* 7x 𝔖..𝔜 Math */
{0x1d51e, 0x1d539}, /* 28x 𝔞..𝔹 Math */
{0x1d53b, 0x1d53e}, /* 4x 𝔻..𝔾 Math */
{0x1d540, 0x1d544}, /* 5x 𝕀..𝕄 Math */
{0x1d546, 0x1d546}, /* 1x 𝕆..𝕆 Math */
{0x1d54a, 0x1d550}, /* 7x 𝕊..𝕐 Math */
{0x1d552, 0x1d6a5}, /* 340x 𝕒..𝚥 Math */
{0x1d6a8, 0x1d6c0}, /* 25x 𝚨..𝛀 Math */
{0x1d6c2, 0x1d6da}, /* 25x 𝛂..𝛚 Math */
{0x1d6dc, 0x1d6fa}, /* 31x 𝛜..𝛺 Math */
{0x1d6fc, 0x1d714}, /* 25x 𝛼..𝜔 Math */
{0x1d716, 0x1d734}, /* 31x 𝜖..𝜴 Math */
{0x1d736, 0x1d74e}, /* 25x 𝜶..𝝎 Math */
{0x1d750, 0x1d76e}, /* 31x 𝝐..𝝮 Math */
{0x1d770, 0x1d788}, /* 25x 𝝰..𝞈 Math */
{0x1d78a, 0x1d7a8}, /* 31x 𝞊..𝞨 Math */
{0x1d7aa, 0x1d7c2}, /* 25x 𝞪..𝟂 Math */
{0x1d7c4, 0x1d7cb}, /* 8x 𝟄..𝟋 Math */
{0x1d7ce, 0x1d9ff}, /* 562x Math, Sutton SignWriting */
{0x1f100, 0x1f10c}, /* 13x Enclosed Alphanumeric Supplement */
{0x20000, 0x2a6d6}, /* 42711x CJK Unified Ideographs Extension B */
{0x2a700, 0x2b734}, /* 4149x CJK Unified Ideographs Extension C */
{0x2b740, 0x2b81d}, /* 222x CJK Unified Ideographs Extension D */
{0x2b820, 0x2cea1}, /* 5762x CJK Unified Ideographs Extension E */
{0x2ceb0, 0x2ebe0}, /* 7473x CJK Unified Ideographs Extension F */
{0x2f800, 0x2fa1d}, /* 542x CJK Compatibility Ideographs Supplement */
};
l = 0;
r = n = sizeof(kAstralGlyphs) / sizeof(kAstralGlyphs[0]);
while (l < r) {
unsigned m = (l & r) + ((l ^ r) >> 1);
if (c < kAstralGlyphs[m][0]) {
r = m;
} else if (c > kAstralGlyphs[m][1]) {
l = m + 1;
} else {
return 0;
}
}
return 1;
}
}
unsigned bestlineLowercase(unsigned c) {
int m, l, r, n;
if (c < 0200) {
if ('A' <= c && c <= 'Z') {
return c + 32;
} else {
return c;
}
} else if (c <= 0xffff) {
if ((0x0100 <= c && c <= 0x0176) || /* 60x Ā..ā → ā..ŵ Watin-A */
(0x01de <= c && c <= 0x01ee) || /* 9x Ǟ..Ǯ → ǟ..ǯ Watin-B */
(0x01f8 <= c && c <= 0x021e) || /* 20x Ǹ..Ȟ → ǹ..ȟ Watin-B */
(0x0222 <= c && c <= 0x0232) || /* 9x Ȣ..Ȳ → ȣ..ȳ Watin-B */
(0x1e00 <= c && c <= 0x1eff)) { /*256x Ḁ..Ỿ → ḁ..ỿ Watin-C */
if (c == 0x0130)
return c - 199;
if (c == 0x1e9e)
return c;
return c + (~c & 1);
} else if (0x01cf <= c && c <= 0x01db) {
return c + (c & 1); /* 7x Ǐ..Ǜ → ǐ..ǜ Watin-B */
} else if (0x13a0 <= c && c <= 0x13ef) {
return c + 38864; /* 80x Ꭰ ..Ꮿ → ꭰ ..ꮿ Cherokee */
} else {
static const struct {
unsigned short a;
unsigned short b;
short d;
} kLower[] = {
{0x00c0, 0x00d6, +32}, /* 23x À ..Ö → à ..ö Watin */
{0x00d8, 0x00de, +32}, /* 7x Ø ..Þ → ø ..þ Watin */
{0x0178, 0x0178, -121}, /* 1x Ÿ ..Ÿ → ÿ ..ÿ Watin-A */
{0x0179, 0x0179, +1}, /* 1x Ź ..Ź → ź ..ź Watin-A */
{0x017b, 0x017b, +1}, /* 1x Ż ..Ż → ż ..ż Watin-A */
{0x017d, 0x017d, +1}, /* 1x Ž ..Ž → ž ..ž Watin-A */
{0x0181, 0x0181, +210}, /* 1x Ɓ ..Ɓ → ɓ ..ɓ Watin-B */
{0x0182, 0x0182, +1}, /* 1x Ƃ ..Ƃ → ƃ ..ƃ Watin-B */
{0x0184, 0x0184, +1}, /* 1x Ƅ ..Ƅ → ƅ ..ƅ Watin-B */
{0x0186, 0x0186, +206}, /* 1x Ɔ ..Ɔ → ɔ ..ɔ Watin-B */
{0x0187, 0x0187, +1}, /* 1x Ƈ ..Ƈ → ƈ ..ƈ Watin-B */
{0x0189, 0x018a, +205}, /* 2x Ɖ ..Ɗ → ɖ ..ɗ Watin-B */
{0x018b, 0x018b, +1}, /* 1x Ƌ ..Ƌ → ƌ ..ƌ Watin-B */
{0x018e, 0x018e, +79}, /* 1x Ǝ ..Ǝ → ǝ ..ǝ Watin-B */
{0x018f, 0x018f, +202}, /* 1x Ə ..Ə → ə ..ə Watin-B */
{0x0190, 0x0190, +203}, /* 1x Ɛ ..Ɛ → ɛ ..ɛ Watin-B */
{0x0191, 0x0191, +1}, /* 1x Ƒ ..Ƒ → ƒ ..ƒ Watin-B */
{0x0193, 0x0193, +205}, /* 1x Ɠ ..Ɠ → ɠ ..ɠ Watin-B */
{0x0194, 0x0194, +207}, /* 1x Ɣ ..Ɣ → ɣ ..ɣ Watin-B */
{0x0196, 0x0196, +211}, /* 1x Ɩ ..Ɩ → ɩ ..ɩ Watin-B */
{0x0197, 0x0197, +209}, /* 1x Ɨ ..Ɨ → ɨ ..ɨ Watin-B */
{0x0198, 0x0198, +1}, /* 1x Ƙ ..Ƙ → ƙ ..ƙ Watin-B */
{0x019c, 0x019c, +211}, /* 1x Ɯ ..Ɯ → ɯ ..ɯ Watin-B */
{0x019d, 0x019d, +213}, /* 1x Ɲ ..Ɲ → ɲ ..ɲ Watin-B */
{0x019f, 0x019f, +214}, /* 1x Ɵ ..Ɵ → ɵ ..ɵ Watin-B */
{0x01a0, 0x01a0, +1}, /* 1x Ơ ..Ơ → ơ ..ơ Watin-B */
{0x01a2, 0x01a2, +1}, /* 1x Ƣ ..Ƣ → ƣ ..ƣ Watin-B */
{0x01a4, 0x01a4, +1}, /* 1x Ƥ ..Ƥ → ƥ ..ƥ Watin-B */
{0x01a6, 0x01a6, +218}, /* 1x Ʀ ..Ʀ → ʀ ..ʀ Watin-B */
{0x01a7, 0x01a7, +1}, /* 1x Ƨ ..Ƨ → ƨ ..ƨ Watin-B */
{0x01a9, 0x01a9, +218}, /* 1x Ʃ ..Ʃ → ʃ ..ʃ Watin-B */
{0x01ac, 0x01ac, +1}, /* 1x Ƭ ..Ƭ → ƭ ..ƭ Watin-B */
{0x01ae, 0x01ae, +218}, /* 1x Ʈ ..Ʈ → ʈ ..ʈ Watin-B */
{0x01af, 0x01af, +1}, /* 1x Ư ..Ư → ư ..ư Watin-B */
{0x01b1, 0x01b2, +217}, /* 2x Ʊ ..Ʋ → ʊ ..ʋ Watin-B */
{0x01b3, 0x01b3, +1}, /* 1x Ƴ ..Ƴ → ƴ ..ƴ Watin-B */
{0x01b5, 0x01b5, +1}, /* 1x Ƶ ..Ƶ → ƶ ..ƶ Watin-B */
{0x01b7, 0x01b7, +219}, /* 1x Ʒ ..Ʒ → ʒ ..ʒ Watin-B */
{0x01b8, 0x01b8, +1}, /* 1x Ƹ ..Ƹ → ƹ ..ƹ Watin-B */
{0x01bc, 0x01bc, +1}, /* 1x Ƽ ..Ƽ → ƽ ..ƽ Watin-B */
{0x01c4, 0x01c4, +2}, /* 1x DŽ ..DŽ → dž ..dž Watin-B */
{0x01c5, 0x01c5, +1}, /* 1x Dž ..Dž → dž ..dž Watin-B */
{0x01c7, 0x01c7, +2}, /* 1x LJ ..LJ → lj ..lj Watin-B */
{0x01c8, 0x01c8, +1}, /* 1x Lj ..Lj → lj ..lj Watin-B */
{0x01ca, 0x01ca, +2}, /* 1x NJ ..NJ → nj ..nj Watin-B */
{0x01cb, 0x01cb, +1}, /* 1x Nj ..Nj → nj ..nj Watin-B */
{0x01cd, 0x01cd, +1}, /* 1x Ǎ ..Ǎ → ǎ ..ǎ Watin-B */
{0x01f1, 0x01f1, +2}, /* 1x DZ ..DZ → dz ..dz Watin-B */
{0x01f2, 0x01f2, +1}, /* 1x Dz ..Dz → dz ..dz Watin-B */
{0x01f4, 0x01f4, +1}, /* 1x Ǵ ..Ǵ → ǵ ..ǵ Watin-B */
{0x01f6, 0x01f6, -97}, /* 1x Ƕ ..Ƕ → ƕ ..ƕ Watin-B */
{0x01f7, 0x01f7, -56}, /* 1x Ƿ ..Ƿ → ƿ ..ƿ Watin-B */
{0x0220, 0x0220, -130}, /* 1x Ƞ ..Ƞ → ƞ ..ƞ Watin-B */
{0x023b, 0x023b, +1}, /* 1x Ȼ ..Ȼ → ȼ ..ȼ Watin-B */
{0x023d, 0x023d, -163}, /* 1x Ƚ ..Ƚ → ƚ ..ƚ Watin-B */
{0x0241, 0x0241, +1}, /* 1x Ɂ ..Ɂ → ɂ ..ɂ Watin-B */
{0x0243, 0x0243, -195}, /* 1x Ƀ ..Ƀ → ƀ ..ƀ Watin-B */
{0x0244, 0x0244, +69}, /* 1x Ʉ ..Ʉ → ʉ ..ʉ Watin-B */
{0x0245, 0x0245, +71}, /* 1x Ʌ ..Ʌ → ʌ ..ʌ Watin-B */
{0x0246, 0x0246, +1}, /* 1x Ɇ ..Ɇ → ɇ ..ɇ Watin-B */
{0x0248, 0x0248, +1}, /* 1x Ɉ ..Ɉ → ɉ ..ɉ Watin-B */
{0x024a, 0x024a, +1}, /* 1x Ɋ ..Ɋ → ɋ ..ɋ Watin-B */
{0x024c, 0x024c, +1}, /* 1x Ɍ ..Ɍ → ɍ ..ɍ Watin-B */
{0x024e, 0x024e, +1}, /* 1x Ɏ ..Ɏ → ɏ ..ɏ Watin-B */
{0x0386, 0x0386, +38}, /* 1x Ά ..Ά → ά ..ά Greek */
{0x0388, 0x038a, +37}, /* 3x Έ ..Ί → έ ..ί Greek */
{0x038c, 0x038c, +64}, /* 1x Ό ..Ό → ό ..ό Greek */
{0x038e, 0x038f, +63}, /* 2x Ύ ..Ώ → ύ ..ώ Greek */
{0x0391, 0x03a1, +32}, /* 17x Α ..Ρ → α ..ρ Greek */
{0x03a3, 0x03ab, +32}, /* 9x Σ ..Ϋ → σ ..ϋ Greek */
{0x03dc, 0x03dc, +1}, /* 1x Ϝ ..Ϝ → ϝ ..ϝ Greek */
{0x03f4, 0x03f4, -60}, /* 1x ϴ ..ϴ → θ ..θ Greek */
{0x0400, 0x040f, +80}, /* 16x Ѐ ..Џ → ѐ ..џ Cyrillic */
{0x0410, 0x042f, +32}, /* 32x А ..Я → а ..я Cyrillic */
{0x0460, 0x0460, +1}, /* 1x Ѡ ..Ѡ → ѡ ..ѡ Cyrillic */
{0x0462, 0x0462, +1}, /* 1x Ѣ ..Ѣ → ѣ ..ѣ Cyrillic */
{0x0464, 0x0464, +1}, /* 1x Ѥ ..Ѥ → ѥ ..ѥ Cyrillic */
{0x0472, 0x0472, +1}, /* 1x Ѳ ..Ѳ → ѳ ..ѳ Cyrillic */
{0x0490, 0x0490, +1}, /* 1x Ґ ..Ґ → ґ ..ґ Cyrillic */
{0x0498, 0x0498, +1}, /* 1x Ҙ ..Ҙ → ҙ ..ҙ Cyrillic */
{0x049a, 0x049a, +1}, /* 1x Қ ..Қ → қ ..қ Cyrillic */
{0x0531, 0x0556, +48}, /* 38x Ա ..Ֆ → ա ..ֆ Armenian */
{0x10a0, 0x10c5, +7264}, /* 38x Ⴀ ..Ⴥ → ⴀ ..ⴥ Georgian */
{0x10c7, 0x10c7, +7264}, /* 1x Ⴧ ..Ⴧ → ⴧ ..ⴧ Georgian */
{0x10cd, 0x10cd, +7264}, /* 1x Ⴭ ..Ⴭ → ⴭ ..ⴭ Georgian */
{0x13f0, 0x13f5, +8}, /* 6x Ᏸ ..Ᏽ → ᏸ ..ᏽ Cherokee */
{0x1c90, 0x1cba, -3008}, /* 43x Ა ..Ჺ → ა ..ჺ Georgian2 */
{0x1cbd, 0x1cbf, -3008}, /* 3x Ჽ ..Ჿ → ჽ ..ჿ Georgian2 */
{0x1f08, 0x1f0f, -8}, /* 8x Ἀ ..Ἇ → ἀ ..ἇ Greek2 */
{0x1f18, 0x1f1d, -8}, /* 6x Ἐ ..Ἕ → ἐ ..ἕ Greek2 */
{0x1f28, 0x1f2f, -8}, /* 8x Ἠ ..Ἧ → ἠ ..ἧ Greek2 */
{0x1f38, 0x1f3f, -8}, /* 8x Ἰ ..Ἷ → ἰ ..ἷ Greek2 */
{0x1f48, 0x1f4d, -8}, /* 6x Ὀ ..Ὅ → ὀ ..ὅ Greek2 */
{0x1f59, 0x1f59, -8}, /* 1x Ὑ ..Ὑ → ὑ ..ὑ Greek2 */
{0x1f5b, 0x1f5b, -8}, /* 1x Ὓ ..Ὓ → ὓ ..ὓ Greek2 */
{0x1f5d, 0x1f5d, -8}, /* 1x Ὕ ..Ὕ → ὕ ..ὕ Greek2 */
{0x1f5f, 0x1f5f, -8}, /* 1x Ὗ ..Ὗ → ὗ ..ὗ Greek2 */
{0x1f68, 0x1f6f, -8}, /* 8x Ὠ ..Ὧ → ὠ ..ὧ Greek2 */
{0x1f88, 0x1f8f, -8}, /* 8x ᾈ ..ᾏ → ᾀ ..ᾇ Greek2 */
{0x1f98, 0x1f9f, -8}, /* 8x ᾘ ..ᾟ → ᾐ ..ᾗ Greek2 */
{0x1fa8, 0x1faf, -8}, /* 8x ᾨ ..ᾯ → ᾠ ..ᾧ Greek2 */
{0x1fb8, 0x1fb9, -8}, /* 2x Ᾰ ..Ᾱ → ᾰ ..ᾱ Greek2 */
{0x1fba, 0x1fbb, -74}, /* 2x Ὰ ..Ά → ὰ ..ά Greek2 */
{0x1fbc, 0x1fbc, -9}, /* 1x ᾼ ..ᾼ → ᾳ ..ᾳ Greek2 */
{0x1fc8, 0x1fcb, -86}, /* 4x Ὲ ..Ή → ὲ ..ή Greek2 */
{0x1fcc, 0x1fcc, -9}, /* 1x ῌ ..ῌ → ῃ ..ῃ Greek2 */
{0x1fd8, 0x1fd9, -8}, /* 2x Ῐ ..Ῑ → ῐ ..ῑ Greek2 */
{0x1fda, 0x1fdb, -100}, /* 2x Ὶ ..Ί → ὶ ..ί Greek2 */
{0x1fe8, 0x1fe9, -8}, /* 2x Ῠ ..Ῡ → ῠ ..ῡ Greek2 */
{0x1fea, 0x1feb, -112}, /* 2x Ὺ ..Ύ → ὺ ..ύ Greek2 */
{0x1fec, 0x1fec, -7}, /* 1x Ῥ ..Ῥ → ῥ ..ῥ Greek2 */
{0x1ff8, 0x1ff9, -128}, /* 2x Ὸ ..Ό → ὸ ..ό Greek2 */
{0x1ffa, 0x1ffb, -126}, /* 2x Ὼ ..Ώ → ὼ ..ώ Greek2 */
{0x1ffc, 0x1ffc, -9}, /* 1x ῼ ..ῼ → ῳ ..ῳ Greek2 */
{0x2126, 0x2126, -7517}, /* 1x Ω ..Ω → ω ..ω Letterlike */
{0x212a, 0x212a, -8383}, /* 1x K ..K → k ..k Letterlike */
{0x212b, 0x212b, -8262}, /* 1x Å ..Å → å ..å Letterlike */
{0x2132, 0x2132, +28}, /* 1x Ⅎ ..Ⅎ → ⅎ ..ⅎ Letterlike */
{0x2160, 0x216f, +16}, /* 16x Ⅰ ..Ⅿ → ⅰ ..ⅿ Numbery */
{0x2183, 0x2183, +1}, /* 1x Ↄ ..Ↄ → ↄ ..ↄ Numbery */
{0x24b6, 0x24cf, +26}, /* 26x Ⓐ ..Ⓩ → ⓐ ..ⓩ Enclosed */
{0x2c00, 0x2c2e, +48}, /* 47x Ⰰ ..Ⱞ → ⰰ ..ⱞ Glagolitic */
{0xff21, 0xff3a, +32}, /* 26x A..Z → a..z Dubs */
};
l = 0;
r = n = sizeof(kLower) / sizeof(kLower[0]);
while (l < r) {
m = (l + r) >> 1;
if (kLower[m].b < c) {
l = m + 1;
} else {
r = m;
}
}
if (l < n && kLower[l].a <= c && c <= kLower[l].b) {
return c + kLower[l].d;
} else {
return c;
}
}
} else {
static struct {
unsigned a;
unsigned b;
short d;
} kAstralLower[] = {
{0x10400, 0x10427, +40}, /* 40x 𐐀 ..𐐧 → 𐐨 ..𐑏 Deseret */
{0x104b0, 0x104d3, +40}, /* 36x 𐒰 ..𐓓 → 𐓘 ..𐓻 Osage */
{0x1d400, 0x1d419, +26}, /* 26x 𝐀 ..𝐙 → 𝐚 ..𝐳 Math */
{0x1d43c, 0x1d44d, +26}, /* 18x 𝐼 ..𝑍 → 𝑖 ..𝑧 Math */
{0x1d468, 0x1d481, +26}, /* 26x 𝑨 ..𝒁 → 𝒂 ..𝒛 Math */
{0x1d4ae, 0x1d4b5, +26}, /* 8x 𝒮 ..𝒵 → 𝓈 ..𝓏 Math */
{0x1d4d0, 0x1d4e9, +26}, /* 26x 𝓐 ..𝓩 → 𝓪 ..𝔃 Math */
{0x1d50d, 0x1d514, +26}, /* 8x 𝔍 ..𝔔 → 𝔧 ..𝔮 Math */
{0x1d56c, 0x1d585, +26}, /* 26x 𝕬 ..𝖅 → 𝖆 ..𝖟 Math */
{0x1d5a0, 0x1d5b9, +26}, /* 26x 𝖠 ..𝖹 → 𝖺 ..𝗓 Math */
{0x1d5d4, 0x1d5ed, +26}, /* 26x 𝗔 ..𝗭 → 𝗮 ..𝘇 Math */
{0x1d608, 0x1d621, +26}, /* 26x 𝘈 ..𝘡 → 𝘢 ..𝘻 Math */
{0x1d63c, 0x1d655, -442}, /* 26x 𝘼 ..𝙕 → 𝒂 ..𝒛 Math */
{0x1d670, 0x1d689, +26}, /* 26x 𝙰 ..𝚉 → 𝚊 ..𝚣 Math */
{0x1d6a8, 0x1d6b8, +26}, /* 17x 𝚨 ..𝚸 → 𝛂 ..𝛒 Math */
{0x1d6e2, 0x1d6f2, +26}, /* 17x 𝛢 ..𝛲 → 𝛼 ..𝜌 Math */
{0x1d71c, 0x1d72c, +26}, /* 17x 𝜜 ..𝜬 → 𝜶 ..𝝆 Math */
{0x1d756, 0x1d766, +26}, /* 17x 𝝖 ..𝝦 → 𝝰 ..𝞀 Math */
{0x1d790, 0x1d7a0, -90}, /* 17x 𝞐 ..𝞠 → 𝜶 ..𝝆 Math */
};
l = 0;
r = n = sizeof(kAstralLower) / sizeof(kAstralLower[0]);
while (l < r) {
m = (l + r) >> 1;
if (kAstralLower[m].b < c) {
l = m + 1;
} else {
r = m;
}
}
if (l < n && kAstralLower[l].a <= c && c <= kAstralLower[l].b) {
return c + kAstralLower[l].d;
} else {
return c;
}
}
}
unsigned bestlineUppercase(unsigned c) {
int m, l, r, n;
if (c < 0200) {
if ('a' <= c && c <= 'z') {
return c - 32;
} else {
return c;
}
} else if (c <= 0xffff) {
if ((0x0101 <= c && c <= 0x0177) || /* 60x ā..ŵ → Ā..ā Watin-A */
(0x01df <= c && c <= 0x01ef) || /* 9x ǟ..ǯ → Ǟ..Ǯ Watin-B */
(0x01f8 <= c && c <= 0x021e) || /* 20x ǹ..ȟ → Ǹ..Ȟ Watin-B */
(0x0222 <= c && c <= 0x0232) || /* 9x ȣ..ȳ → Ȣ..Ȳ Watin-B */
(0x1e01 <= c && c <= 0x1eff)) { /*256x ḁ..ỿ → Ḁ..Ỿ Watin-C */
if (c == 0x0131)
return c + 232;
if (c == 0x1e9e)
return c;
return c - (c & 1);
} else if (0x01d0 <= c && c <= 0x01dc) {
return c - (~c & 1); /* 7x ǐ..ǜ → Ǐ..Ǜ Watin-B */
} else if (0xab70 <= c && c <= 0xabbf) {
return c - 38864; /* 80x ꭰ ..ꮿ → Ꭰ ..Ꮿ Cherokee Supplement */
} else {
static const struct {
unsigned short a;
unsigned short b;
short d;
} kUpper[] = {
{0x00b5, 0x00b5, +743}, /* 1x µ ..µ → Μ ..Μ Watin */
{0x00e0, 0x00f6, -32}, /* 23x à ..ö → À ..Ö Watin */
{0x00f8, 0x00fe, -32}, /* 7x ø ..þ → Ø ..Þ Watin */
{0x00ff, 0x00ff, +121}, /* 1x ÿ ..ÿ → Ÿ ..Ÿ Watin */
{0x017a, 0x017a, -1}, /* 1x ź ..ź → Ź ..Ź Watin-A */
{0x017c, 0x017c, -1}, /* 1x ż ..ż → Ż ..Ż Watin-A */
{0x017e, 0x017e, -1}, /* 1x ž ..ž → Ž ..Ž Watin-A */
{0x017f, 0x017f, -300}, /* 1x ſ ..ſ → S ..S Watin-A */
{0x0180, 0x0180, +195}, /* 1x ƀ ..ƀ → Ƀ ..Ƀ Watin-B */
{0x0183, 0x0183, -1}, /* 1x ƃ ..ƃ → Ƃ ..Ƃ Watin-B */
{0x0185, 0x0185, -1}, /* 1x ƅ ..ƅ → Ƅ ..Ƅ Watin-B */
{0x0188, 0x0188, -1}, /* 1x ƈ ..ƈ → Ƈ ..Ƈ Watin-B */
{0x018c, 0x018c, -1}, /* 1x ƌ ..ƌ → Ƌ ..Ƌ Watin-B */
{0x0192, 0x0192, -1}, /* 1x ƒ ..ƒ → Ƒ ..Ƒ Watin-B */
{0x0195, 0x0195, +97}, /* 1x ƕ ..ƕ → Ƕ ..Ƕ Watin-B */
{0x0199, 0x0199, -1}, /* 1x ƙ ..ƙ → Ƙ ..Ƙ Watin-B */
{0x019a, 0x019a, +163}, /* 1x ƚ ..ƚ → Ƚ ..Ƚ Watin-B */
{0x019e, 0x019e, +130}, /* 1x ƞ ..ƞ → Ƞ ..Ƞ Watin-B */
{0x01a1, 0x01a1, -1}, /* 1x ơ ..ơ → Ơ ..Ơ Watin-B */
{0x01a3, 0x01a3, -1}, /* 1x ƣ ..ƣ → Ƣ ..Ƣ Watin-B */
{0x01a5, 0x01a5, -1}, /* 1x ƥ ..ƥ → Ƥ ..Ƥ Watin-B */
{0x01a8, 0x01a8, -1}, /* 1x ƨ ..ƨ → Ƨ ..Ƨ Watin-B */
{0x01ad, 0x01ad, -1}, /* 1x ƭ ..ƭ → Ƭ ..Ƭ Watin-B */
{0x01b0, 0x01b0, -1}, /* 1x ư ..ư → Ư ..Ư Watin-B */
{0x01b4, 0x01b4, -1}, /* 1x ƴ ..ƴ → Ƴ ..Ƴ Watin-B */
{0x01b6, 0x01b6, -1}, /* 1x ƶ ..ƶ → Ƶ ..Ƶ Watin-B */
{0x01b9, 0x01b9, -1}, /* 1x ƹ ..ƹ → Ƹ ..Ƹ Watin-B */
{0x01bd, 0x01bd, -1}, /* 1x ƽ ..ƽ → Ƽ ..Ƽ Watin-B */
{0x01bf, 0x01bf, +56}, /* 1x ƿ ..ƿ → Ƿ ..Ƿ Watin-B */
{0x01c5, 0x01c5, -1}, /* 1x Dž ..Dž → DŽ ..DŽ Watin-B */
{0x01c6, 0x01c6, -2}, /* 1x dž ..dž → DŽ ..DŽ Watin-B */
{0x01c8, 0x01c8, -1}, /* 1x Lj ..Lj → LJ ..LJ Watin-B */
{0x01c9, 0x01c9, -2}, /* 1x lj ..lj → LJ ..LJ Watin-B */
{0x01cb, 0x01cb, -1}, /* 1x Nj ..Nj → NJ ..NJ Watin-B */
{0x01cc, 0x01cc, -2}, /* 1x nj ..nj → NJ ..NJ Watin-B */
{0x01ce, 0x01ce, -1}, /* 1x ǎ ..ǎ → Ǎ ..Ǎ Watin-B */
{0x01dd, 0x01dd, -79}, /* 1x ǝ ..ǝ → Ǝ ..Ǝ Watin-B */
{0x01f2, 0x01f2, -1}, /* 1x Dz ..Dz → DZ ..DZ Watin-B */
{0x01f3, 0x01f3, -2}, /* 1x dz ..dz → DZ ..DZ Watin-B */
{0x01f5, 0x01f5, -1}, /* 1x ǵ ..ǵ → Ǵ ..Ǵ Watin-B */
{0x023c, 0x023c, -1}, /* 1x ȼ ..ȼ → Ȼ ..Ȼ Watin-B */
{0x023f, 0x0240, +10815}, /* 2x ȿ ..ɀ → Ȿ ..Ɀ Watin-B */
{0x0242, 0x0242, -1}, /* 1x ɂ ..ɂ → Ɂ ..Ɂ Watin-B */
{0x0247, 0x0247, -1}, /* 1x ɇ ..ɇ → Ɇ ..Ɇ Watin-B */
{0x0249, 0x0249, -1}, /* 1x ɉ ..ɉ → Ɉ ..Ɉ Watin-B */
{0x024b, 0x024b, -1}, /* 1x ɋ ..ɋ → Ɋ ..Ɋ Watin-B */
{0x024d, 0x024d, -1}, /* 1x ɍ ..ɍ → Ɍ ..Ɍ Watin-B */
{0x024f, 0x024f, -1}, /* 1x ɏ ..ɏ → Ɏ ..Ɏ Watin-B */
{0x037b, 0x037d, +130}, /* 3x ͻ ..ͽ → Ͻ ..Ͽ Greek */
{0x03ac, 0x03ac, -38}, /* 1x ά ..ά → Ά ..Ά Greek */
{0x03ad, 0x03af, -37}, /* 3x έ ..ί → Έ ..Ί Greek */
{0x03b1, 0x03c1, -32}, /* 17x α ..ρ → Α ..Ρ Greek */
{0x03c2, 0x03c2, -31}, /* 1x ς ..ς → Σ ..Σ Greek */
{0x03c3, 0x03cb, -32}, /* 9x σ ..ϋ → Σ ..Ϋ Greek */
{0x03cc, 0x03cc, -64}, /* 1x ό ..ό → Ό ..Ό Greek */
{0x03cd, 0x03ce, -63}, /* 2x ύ ..ώ → Ύ ..Ώ Greek */
{0x03d0, 0x03d0, -62}, /* 1x ϐ ..ϐ → Β ..Β Greek */
{0x03d1, 0x03d1, -57}, /* 1x ϑ ..ϑ → Θ ..Θ Greek */
{0x03d5, 0x03d5, -47}, /* 1x ϕ ..ϕ → Φ ..Φ Greek */
{0x03d6, 0x03d6, -54}, /* 1x ϖ ..ϖ → Π ..Π Greek */
{0x03dd, 0x03dd, -1}, /* 1x ϝ ..ϝ → Ϝ ..Ϝ Greek */
{0x03f0, 0x03f0, -86}, /* 1x ϰ ..ϰ → Κ ..Κ Greek */
{0x03f1, 0x03f1, -80}, /* 1x ϱ ..ϱ → Ρ ..Ρ Greek */
{0x03f5, 0x03f5, -96}, /* 1x ϵ ..ϵ → Ε ..Ε Greek */
{0x0430, 0x044f, -32}, /* 32x а ..я → А ..Я Cyrillic */
{0x0450, 0x045f, -80}, /* 16x ѐ ..џ → Ѐ ..Џ Cyrillic */
{0x0461, 0x0461, -1}, /* 1x ѡ ..ѡ → Ѡ ..Ѡ Cyrillic */
{0x0463, 0x0463, -1}, /* 1x ѣ ..ѣ → Ѣ ..Ѣ Cyrillic */
{0x0465, 0x0465, -1}, /* 1x ѥ ..ѥ → Ѥ ..Ѥ Cyrillic */
{0x0473, 0x0473, -1}, /* 1x ѳ ..ѳ → Ѳ ..Ѳ Cyrillic */
{0x0491, 0x0491, -1}, /* 1x ґ ..ґ → Ґ ..Ґ Cyrillic */
{0x0499, 0x0499, -1}, /* 1x ҙ ..ҙ → Ҙ ..Ҙ Cyrillic */
{0x049b, 0x049b, -1}, /* 1x қ ..қ → Қ ..Қ Cyrillic */
{0x0561, 0x0586, -48}, /* 38x ա ..ֆ → Ա ..Ֆ Armenian */
{0x10d0, 0x10fa, +3008}, /* 43x ა ..ჺ → Ა ..Ჺ Georgian */
{0x10fd, 0x10ff, +3008}, /* 3x ჽ ..ჿ → Ჽ ..Ჿ Georgian */
{0x13f8, 0x13fd, -8}, /* 6x ᏸ ..ᏽ → Ᏸ ..Ᏽ Cherokee */
{0x214e, 0x214e, -28}, /* 1x ⅎ ..ⅎ → Ⅎ ..Ⅎ Letterlike */
{0x2170, 0x217f, -16}, /* 16x ⅰ ..ⅿ → Ⅰ ..Ⅿ Numbery */
{0x2184, 0x2184, -1}, /* 1x ↄ ..ↄ → Ↄ ..Ↄ Numbery */
{0x24d0, 0x24e9, -26}, /* 26x ⓐ ..ⓩ → Ⓐ ..Ⓩ Enclosed */
{0x2c30, 0x2c5e, -48}, /* 47x ⰰ ..ⱞ → Ⰰ ..Ⱞ Glagolitic */
{0x2d00, 0x2d25, -7264}, /* 38x ⴀ ..ⴥ → Ⴀ ..Ⴥ Georgian2 */
{0x2d27, 0x2d27, -7264}, /* 1x ⴧ ..ⴧ → Ⴧ ..Ⴧ Georgian2 */
{0x2d2d, 0x2d2d, -7264}, /* 1x ⴭ ..ⴭ → Ⴭ ..Ⴭ Georgian2 */
{0xff41, 0xff5a, -32}, /* 26x a..z → A..Z Dubs */
};
l = 0;
r = n = sizeof(kUpper) / sizeof(kUpper[0]);
while (l < r) {
m = (l + r) >> 1;
if (kUpper[m].b < c) {
l = m + 1;
} else {
r = m;
}
}
if (l < n && kUpper[l].a <= c && c <= kUpper[l].b) {
return c + kUpper[l].d;
} else {
return c;
}
}
} else {
static const struct {
unsigned a;
unsigned b;
short d;
} kAstralUpper[] = {
{0x10428, 0x1044f, -40}, /* 40x 𐐨..𐑏 → 𐐀..𐐧 Deseret */
{0x104d8, 0x104fb, -40}, /* 36x 𐓘..𐓻 → 𐒰..𐓓 Osage */
{0x1d41a, 0x1d433, -26}, /* 26x 𝐚..𝐳 → 𝐀..𝐙 Math */
{0x1d456, 0x1d467, -26}, /* 18x 𝑖..𝑧 → 𝐼..𝑍 Math */
{0x1d482, 0x1d49b, -26}, /* 26x 𝒂..𝒛 → 𝑨..𝒁 Math */
{0x1d4c8, 0x1d4cf, -26}, /* 8x 𝓈..𝓏 → 𝒮..𝒵 Math */
{0x1d4ea, 0x1d503, -26}, /* 26x 𝓪..𝔃 → 𝓐..𝓩 Math */
{0x1d527, 0x1d52e, -26}, /* 8x 𝔧..𝔮 → 𝔍..𝔔 Math */
{0x1d586, 0x1d59f, -26}, /* 26x 𝖆..𝖟 → 𝕬..𝖅 Math */
{0x1d5ba, 0x1d5d3, -26}, /* 26x 𝖺..𝗓 → 𝖠..𝖹 Math */
{0x1d5ee, 0x1d607, -26}, /* 26x 𝗮..𝘇 → 𝗔..𝗭 Math */
{0x1d622, 0x1d63b, -26}, /* 26x 𝘢..𝘻 → 𝘈..𝘡 Math */
{0x1d68a, 0x1d6a3, +442}, /* 26x 𝒂..𝒛 → 𝘼..𝙕 Math */
{0x1d6c2, 0x1d6d2, -26}, /* 26x 𝚊..𝚣 → 𝙰..𝚉 Math */
{0x1d6fc, 0x1d70c, -26}, /* 17x 𝛂..𝛒 → 𝚨..𝚸 Math */
{0x1d736, 0x1d746, -26}, /* 17x 𝛼..𝜌 → 𝛢..𝛲 Math */
{0x1d770, 0x1d780, -26}, /* 17x 𝜶..𝝆 → 𝜜..𝜬 Math */
{0x1d770, 0x1d756, -26}, /* 17x 𝝰..𝞀 → 𝝖..𝝦 Math */
{0x1d736, 0x1d790, -90}, /* 17x 𝜶..𝝆 → 𝞐..𝞠 Math */
};
l = 0;
r = n = sizeof(kAstralUpper) / sizeof(kAstralUpper[0]);
while (l < r) {
m = (l + r) >> 1;
if (kAstralUpper[m].b < c) {
l = m + 1;
} else {
r = m;
}
}
if (l < n && kAstralUpper[l].a <= c && c <= kAstralUpper[l].b) {
return c + kAstralUpper[l].d;
} else {
return c;
}
}
}
char bestlineNotSeparator(unsigned c) {
return !bestlineIsSeparator(c);
}
static unsigned GetMirror(const unsigned short A[][2], size_t n, unsigned c) {
int l, m, r;
l = 0;
r = n - 1;
while (l <= r) {
m = (l + r) >> 1;
if (A[m][0] < c) {
l = m + 1;
} else if (A[m][0] > c) {
r = m - 1;
} else {
return A[m][1];
}
}
return 0;
}
unsigned bestlineMirrorLeft(unsigned c) {
static const unsigned short kMirrorRight[][2] = {
{L')', L'('}, {L']', L'['}, {L'}', L'{'}, {L'⁆', L'⁅'}, {L'⁾', L'⁽'},
{L'₎', L'₍'}, {L'⌉', L'⌈'}, {L'⌋', L'⌊'}, {L'〉', L'〈'}, {L'❩', L'❨'},
{L'❫', L'❪'}, {L'❭', L'❬'}, {L'❯', L'❮'}, {L'❱', L'❰'}, {L'❳', L'❲'},
{L'❵', L'❴'}, {L'⟆', L'⟅'}, {L'⟧', L'⟦'}, {L'⟩', L'⟨'}, {L'⟫', L'⟪'},
{L'⟭', L'⟬'}, {L'⟯', L'⟮'}, {L'⦄', L'⦃'}, {L'⦆', L'⦅'}, {L'⦈', L'⦇'},
{L'⦊', L'⦉'}, {L'⦌', L'⦋'}, {L'⦎', L'⦏'}, {L'⦐', L'⦍'}, {L'⦒', L'⦑'},
{L'⦔', L'⦓'}, {L'⦘', L'⦗'}, {L'⧙', L'⧘'}, {L'⧛', L'⧚'}, {L'⧽', L'⧼'},
{L'﹚', L'﹙'}, {L'﹜', L'﹛'}, {L'﹞', L'﹝'}, {L')', L'('}, {L']', L'['},
{L'}', L'{'}, {L'」', L'「'},
};
return GetMirror(kMirrorRight, sizeof(kMirrorRight) / sizeof(kMirrorRight[0]), c);
}
unsigned bestlineMirrorRight(unsigned c) {
static const unsigned short kMirrorLeft[][2] = {
{L'(', L')'}, {L'[', L']'}, {L'{', L'}'}, {L'⁅', L'⁆'}, {L'⁽', L'⁾'},
{L'₍', L'₎'}, {L'⌈', L'⌉'}, {L'⌊', L'⌋'}, {L'〈', L'〉'}, {L'❨', L'❩'},
{L'❪', L'❫'}, {L'❬', L'❭'}, {L'❮', L'❯'}, {L'❰', L'❱'}, {L'❲', L'❳'},
{L'❴', L'❵'}, {L'⟅', L'⟆'}, {L'⟦', L'⟧'}, {L'⟨', L'⟩'}, {L'⟪', L'⟫'},
{L'⟬', L'⟭'}, {L'⟮', L'⟯'}, {L'⦃', L'⦄'}, {L'⦅', L'⦆'}, {L'⦇', L'⦈'},
{L'⦉', L'⦊'}, {L'⦋', L'⦌'}, {L'⦍', L'⦐'}, {L'⦏', L'⦎'}, {L'⦑', L'⦒'},
{L'⦓', L'⦔'}, {L'⦗', L'⦘'}, {L'⧘', L'⧙'}, {L'⧚', L'⧛'}, {L'⧼', L'⧽'},
{L'﹙', L'﹚'}, {L'﹛', L'﹜'}, {L'﹝', L'﹞'}, {L'(', L')'}, {L'[', L']'},
{L'{', L'}'}, {L'「', L'」'},
};
return GetMirror(kMirrorLeft, sizeof(kMirrorLeft) / sizeof(kMirrorLeft[0]), c);
}
static char StartsWith(const char *s, const char *prefix) {
for (;;) {
if (!*prefix)
return 1;
if (!*s)
return 0;
if (*s++ != *prefix++)
return 0;
}
}
static char EndsWith(const char *s, const char *suffix) {
size_t n, m;
n = strlen(s);
m = strlen(suffix);
if (m > n)
return 0;
return !memcmp(s + n - m, suffix, m);
}
char bestlineIsXeparator(unsigned c) {
return (bestlineIsSeparator(c) && !bestlineMirrorLeft(c) && !bestlineMirrorRight(c));
}
static unsigned Capitalize(unsigned c) {
if (!iscapital) {
c = bestlineUppercase(c);
iscapital = 1;
}
return c;
}
static inline int Bsr(unsigned long long x) {
#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
int b;
b = __builtin_clzll(x);
b ^= sizeof(unsigned long long) * CHAR_BIT - 1;
return b;
#else
static const char kDebruijn[64] = {
0, 47, 1, 56, 48, 27, 2, 60, 57, 49, 41, 37, 28, 16, 3, 61, 54, 58, 35, 52, 50, 42,
21, 44, 38, 32, 29, 23, 17, 11, 4, 62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43,
31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9, 24, 13, 18, 8, 12, 7, 6, 5, 63,
};
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x |= x >> 32;
return kDebruijn[(x * 0x03f79d71b4cb0a89) >> 58];
#endif
}
static struct rune DecodeUtf8(int c) {
struct rune r;
if (c < 252) {
r.n = Bsr(255 & ~c);
r.c = c & (((1 << r.n) - 1) | 3);
r.n = 6 - r.n;
} else {
r.c = c & 3;
r.n = 5;
}
return r;
}
static unsigned long long EncodeUtf8(unsigned c) {
static const unsigned short kTpEnc[32 - 7] = {
1 | 0300 << 8, 1 | 0300 << 8, 1 | 0300 << 8, 1 | 0300 << 8, 2 | 0340 << 8,
2 | 0340 << 8, 2 | 0340 << 8, 2 | 0340 << 8, 2 | 0340 << 8, 3 | 0360 << 8,
3 | 0360 << 8, 3 | 0360 << 8, 3 | 0360 << 8, 3 | 0360 << 8, 4 | 0370 << 8,
4 | 0370 << 8, 4 | 0370 << 8, 4 | 0370 << 8, 4 | 0370 << 8, 5 | 0374 << 8,
5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8, 5 | 0374 << 8,
};
int e, n;
unsigned long long w;
if (c < 0200)
return c;
e = kTpEnc[Bsr(c) - 7];
n = e & 0xff;
w = 0;
do {
w |= 0200 | (c & 077);
w <<= 8;
c >>= 6;
} while (--n);
return c | w | e >> 8;
}
static struct rune GetUtf8(const char *p, size_t n) {
struct rune r;
if ((r.n = r.c = 0) < n && (r.c = p[r.n++] & 255) >= 0300) {
r.c = DecodeUtf8(r.c).c;
while (r.n < n && (p[r.n] & 0300) == 0200) {
r.c = r.c << 6 | (p[r.n++] & 077);
}
}
return r;
}
static char *FormatUnsigned(char *p, unsigned x) {
char t;
size_t i, a, b;
i = 0;
do {
p[i++] = x % 10 + '0';
x = x / 10;
} while (x > 0);
p[i] = '\0';
if (i) {
for (a = 0, b = i - 1; a < b; ++a, --b) {
t = p[a];
p[a] = p[b];
p[b] = t;
}
}
return p + i;
}
static void abInit(struct abuf *a) {
a->len = 0;
a->cap = 16;
a->b = (char *)malloc(a->cap);
a->b[0] = 0;
}
static char abGrow(struct abuf *a, int need) {
int cap;
char *b;
cap = a->cap;
do
cap += cap / 2;
while (cap < need);
if (!(b = (char *)realloc(a->b, cap * sizeof(*a->b))))
return 0;
a->cap = cap;
a->b = b;
return 1;
}
static void abAppendw(struct abuf *a, unsigned long long w) {
char *p;
if (a->len + 8 > a->cap && !abGrow(a, a->len + 8))
return;
p = a->b + a->len;
p[0] = (0x00000000000000FF & w) >> 000;
p[1] = (0x000000000000FF00 & w) >> 010;
p[2] = (0x0000000000FF0000 & w) >> 020;
p[3] = (0x00000000FF000000 & w) >> 030;
p[4] = (0x000000FF00000000 & w) >> 040;
p[5] = (0x0000FF0000000000 & w) >> 050;
p[6] = (0x00FF000000000000 & w) >> 060;
p[7] = (0xFF00000000000000 & w) >> 070;
a->len += w ? (Bsr(w) >> 3) + 1 : 1;
}
static void abAppend(struct abuf *a, const char *s, int len) {
if (a->len + len + 1 > a->cap && !abGrow(a, a->len + len + 1))
return;
memcpy(a->b + a->len, s, len);
a->b[a->len + len] = 0;
a->len += len;
}
static void abAppends(struct abuf *a, const char *s) {
abAppend(a, s, strlen(s));
}
static void abAppendu(struct abuf *a, unsigned u) {
char b[11];
abAppend(a, b, FormatUnsigned(b, u) - b);
}
static void abFree(struct abuf *a) {
free(a->b);
a->b = 0;
}
static size_t GetFdSize(int fd) {
struct stat st;
st.st_size = 0;
fstat(fd, &st);
return st.st_size;
}
static char IsCharDev(int fd) {
struct stat st;
st.st_mode = 0;
fstat(fd, &st);
return (st.st_mode & S_IFMT) == S_IFCHR;
}
static int MyRead(int fd, void *c, int);
static int MyWrite(int fd, const void *c, int);
static int MyPoll(int fd, int events, int to);
static int (*_MyRead)(int fd, void *c, int n) = MyRead;
static int (*_MyWrite)(int fd, const void *c, int n) = MyWrite;
static int (*_MyPoll)(int fd, int events, int to) = MyPoll;
static int WaitUntilReady(int fd, int events) {
return _MyPoll(fd, events, -1);
}
static char HasPendingInput(int fd) {
return _MyPoll(fd, POLLIN, 0) == 1;
}
static char *GetLineBlock(FILE *f) {
ssize_t rc;
char *p = 0;
size_t n, c = 0;
if ((rc = getdelim(&p, &c, '\n', f)) != EOF) {
for (n = rc; n; --n) {
if (p[n - 1] == '\r' || p[n - 1] == '\n') {
p[n - 1] = 0;
} else {
break;
}
}
return p;
} else {
free(p);
return 0;
}
}
long bestlineReadCharacter(int fd, char *p, unsigned long n) {
int e;
size_t i;
ssize_t rc;
struct rune r;
unsigned char c;
enum { kAscii, kUtf8, kEsc, kCsi1, kCsi2, kSs, kNf, kStr, kStr2, kDone } t;
i = 0;
r.c = 0;
r.n = 0;
e = errno;
t = kAscii;
if (n)
p[0] = 0;
do {
for (;;) {
if (gotint) {
errno = EINTR;
return -1;
}
if (n) {
rc = _MyRead(fd, &c, 1);
} else {
rc = _MyRead(fd, 0, 0);
}
if (rc == -1 && errno == EINTR) {
if (!i) {
return -1;
}
} else if (rc == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
if (WaitUntilReady(fd, POLLIN) == -1) {
if (rc == -1 && errno == EINTR) {
if (!i) {
return -1;
}
} else {
return -1;
}
}
} else if (rc == -1) {
return -1;
} else if (!rc) {
if (!i) {
errno = e;
return 0;
} else {
errno = EILSEQ;
return -1;
}
} else {
break;
}
}
if (i + 1 < n) {
p[i] = c;
p[i + 1] = 0;
} else if (i < n) {
p[i] = 0;
}
++i;
switch (t) {
Whoopsie:
if (n)
p[0] = c;
t = kAscii;
i = 1;
/* fallthrough */
case kAscii:
if (c < 0200) {
if (c == 033) {
t = kEsc;
} else {
t = kDone;
}
} else if (c >= 0300) {
t = kUtf8;
r = DecodeUtf8(c);
} else {
/* ignore overlong sequences */
}
break;
case kUtf8:
if ((c & 0300) == 0200) {
r.c <<= 6;
r.c |= c & 077;
if (!--r.n) {
switch (r.c) {
case 033:
t = kEsc; /* parsed but not canonicalized */
break;
case 0x9b:
t = kCsi1; /* unusual but legal */
break;
case 0x8e: /* SS2 (Single Shift Two) */
case 0x8f: /* SS3 (Single Shift Three) */
t = kSs;
break;
case 0x90: /* DCS (Device Control String) */
case 0x98: /* SOS (Start of String) */
case 0x9d: /* OSC (Operating System Command) */
case 0x9e: /* PM (Privacy Message) */
case 0x9f: /* APC (Application Program Command) */
t = kStr;
break;
default:
t = kDone;
break;
}
}
} else {
goto Whoopsie; /* ignore underlong sequences if not eof */
}
break;
case kEsc:
if (0x20 <= c && c <= 0x2f) { /* Nf */
/*
* Almost no one uses ANSI Nf sequences
* They overlaps with alt+graphic keystrokes
* We care more about being able to type alt-/
*/
if (c == ' ' || c == '#') {
t = kNf;
} else {
t = kDone;
}
} else if (0x30 <= c && c <= 0x3f) { /* Fp */
t = kDone;
} else if (0x20 <= c && c <= 0x5F) { /* Fe */
switch (c) {
case '[':
t = kCsi1;
break;
case 'N': /* SS2 (Single Shift Two) */
case 'O': /* SS3 (Single Shift Three) */
t = kSs;
break;
case 'P': /* DCS (Device Control String) */
case 'X': /* SOS (Start of String) */
case ']': /* OSC (Operating System Command) */
case '^': /* PM (Privacy Message) */
case '_': /* APC (Application Program Command) */
t = kStr;
break;
default:
t = kDone;
break;
}
} else if (0x60 <= c && c <= 0x7e) { /* Fs */
t = kDone;
} else if (c == 033) {
if (i < 3) {
/* alt chording */
} else {
t = kDone; /* esc mashing */
i = 1;
}
} else {
t = kDone;
}
break;
case kSs:
t = kDone;
break;
case kNf:
if (0x30 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x20 <= c && c <= 0x2f)) {
goto Whoopsie;
}
break;
case kCsi1:
if (0x20 <= c && c <= 0x2f) {
t = kCsi2;
} else if (c == '[' && ((i == 3) || (i == 4 && p[1] == 033))) {
/* linux function keys */
} else if (0x40 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x30 <= c && c <= 0x3f)) {
goto Whoopsie;
}
break;
case kCsi2:
if (0x40 <= c && c <= 0x7e) {
t = kDone;
} else if (!(0x20 <= c && c <= 0x2f)) {
goto Whoopsie;
}
break;
case kStr:
switch (c) {
case '\a':
t = kDone;
break;
case 0033: /* ESC */
case 0302: /* C1 (UTF-8) */
t = kStr2;
break;
default:
break;
}
break;
case kStr2:
switch (c) {
case '\a':
case '\\': /* ST (ASCII) */
case 0234: /* ST (UTF-8) */
t = kDone;
break;
default:
t = kStr;
break;
}
break;
default:
assert(0);
}
} while (t != kDone);
errno = e;
return i;
}
static char *GetLineChar(int fin, int fout) {
size_t got;
ssize_t rc;
char seq[16];
struct abuf a;
struct sigaction sa[3];
abInit(&a);
gotint = 0;
sigemptyset(&sa->sa_mask);
sa->sa_flags = 0;
sa->sa_handler = bestlineOnInt;
sigaction(SIGINT, sa, sa + 1);
sigaction(SIGQUIT, sa, sa + 2);
for (;;) {
if (gotint) {
rc = -1;
break;
}
if ((rc = bestlineReadCharacter(fin, seq, sizeof(seq))) == -1) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
if (WaitUntilReady(fin, POLLIN) > 0) {
continue;
}
}
if (errno == EINTR) {
continue;
} else {
break;
}
}
if (!(got = rc)) {
if (a.len) {
break;
} else {
rc = -1;
break;
}
}
if (seq[0] == '\r') {
if (HasPendingInput(fin)) {
if ((rc = bestlineReadCharacter(fin, seq + 1, sizeof(seq) - 1)) > 0) {
if (seq[0] == '\n') {
break;
}
} else {
rc = -1;
break;
}
} else {
_MyWrite(fout, "\n", 1);
break;
}
} else if (seq[0] == Ctrl('D')) {
break;
} else if (seq[0] == '\n') {
break;
} else if (seq[0] == '\b') {
while (a.len && (a.b[a.len - 1] & 0300) == 0200)
--a.len;
if (a.len)
--a.len;
}
if (!IsControl(seq[0])) {
abAppend(&a, seq, got);
}
}
sigaction(SIGQUIT, sa + 2, 0);
sigaction(SIGINT, sa + 1, 0);
if (gotint) {
abFree(&a);
raise(gotint);
errno = EINTR;
rc = -1;
}
if (rc != -1) {
return a.b;
} else {
abFree(&a);
return 0;
}
}
static char *GetLine(FILE *in, FILE *out) {
if (!IsCharDev(fileno(in))) {
return GetLineBlock(in);
} else {
return GetLineChar(fileno(in), fileno(out));
}
}
static char *Copy(char *d, const char *s, size_t n) {
memcpy(d, s, n);
return d + n;
}
static int CompareStrings(const char *a, const char *b) {
size_t i;
int x, y, c;
for (i = 0;; ++i) {
x = bestlineLowercase(a[i] & 255);
y = bestlineLowercase(b[i] & 255);
if ((c = x - y) || !x) {
return c;
}
}
}
static const char *FindSubstringReverse(const char *p, size_t n, const char *q, size_t m) {
size_t i;
if (m <= n) {
n -= m;
do {
for (i = 0; i < m; ++i) {
if (p[n + i] != q[i]) {
break;
}
}
if (i == m) {
return p + n;
}
} while (n--);
}
return 0;
}
static int ParseUnsigned(const char *s, void *e) {
int c, x;
for (x = 0; (c = *s++);) {
if ('0' <= c && c <= '9') {
x = Min(c - '0' + x * 10, 32767);
} else {
break;
}
}
if (e)
*(const char **)e = s;
return x;
}
/**
* Returns UNICODE CJK Monospace Width of string.
*
* Control codes and ANSI sequences have a width of zero. We only parse
* a limited subset of ANSI here since we don't store ANSI codes in the
* linenoiseState::buf, but we do encourage CSI color codes in prompts.
*/
static size_t GetMonospaceWidth(const char *p, size_t n, char *out_haswides) {
int c, d;
size_t i, w;
struct rune r;
char haswides;
enum { kAscii, kUtf8, kEsc, kCsi1, kCsi2 } t;
for (haswides = r.c = r.n = w = i = 0, t = kAscii; i < n; ++i) {
c = p[i] & 255;
switch (t) {
Whoopsie:
t = kAscii;
/* fallthrough */
case kAscii:
if (c < 0200) {
if (c == 033) {
t = kEsc;
} else {
++w;
}
} else if (c >= 0300) {
t = kUtf8;
r = DecodeUtf8(c);
}
break;
case kUtf8:
if ((c & 0300) == 0200) {
r.c <<= 6;
r.c |= c & 077;
if (!--r.n) {
d = bestlineCharacterWidth(r.c);
d = Max(0, d);
w += d;
haswides |= d > 1;
t = kAscii;
break;
}
} else {
goto Whoopsie;
}
break;
case kEsc:
if (c == '[') {
t = kCsi1;
} else {
t = kAscii;
}
break;
case kCsi1:
if (0x20 <= c && c <= 0x2f) {
t = kCsi2;
} else if (0x40 <= c && c <= 0x7e) {
t = kAscii;
} else if (!(0x30 <= c && c <= 0x3f)) {
goto Whoopsie;
}
break;
case kCsi2:
if (0x40 <= c && c <= 0x7e) {
t = kAscii;
} else if (!(0x20 <= c && c <= 0x2f)) {
goto Whoopsie;
}
break;
default:
assert(0);
}
}
if (out_haswides) {
*out_haswides = haswides;
}
return w;
}
static int bestlineIsUnsupportedTerm(void) {
size_t i;
char *term;
static char once, res;
if (!once) {
if ((term = getenv("TERM"))) {
for (i = 0; i < sizeof(kUnsupported) / sizeof(*kUnsupported); i++) {
if (!CompareStrings(term, kUnsupported[i])) {
res = 1;
break;
}
}
}
once = 1;
}
return res;
}
static int enableRawMode(int fd) {
struct termios raw;
struct sigaction sa;
if (tcgetattr(fd, &orig_termios) != -1) {
raw = orig_termios;
raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON);
raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG);
raw.c_iflag |= IUTF8;
raw.c_cflag |= CS8;
raw.c_cc[VMIN] = 1;
raw.c_cc[VTIME] = 0;
if (tcsetattr(fd, TCSANOW, &raw) != -1) {
sa.sa_flags = 0;
sa.sa_handler = bestlineOnCont;
sigemptyset(&sa.sa_mask);
sigaction(SIGCONT, &sa, &orig_cont);
sa.sa_handler = bestlineOnWinch;
sigaction(SIGWINCH, &sa, &orig_winch);
rawmode = fd;
gotwinch = 0;
gotcont = 0;
return 0;
}
}
errno = ENOTTY;
return -1;
}
static void bestlineUnpause(int fd) {
if (ispaused) {
tcflow(fd, TCOON);
ispaused = 0;
}
}
void bestlineDisableRawMode(void) {
if (rawmode != -1) {
bestlineUnpause(rawmode);
sigaction(SIGCONT, &orig_cont, 0);
sigaction(SIGWINCH, &orig_winch, 0);
tcsetattr(rawmode, TCSANOW, &orig_termios);
rawmode = -1;
}
}
static int bestlineWrite(int fd, const void *p, size_t n) {
ssize_t rc;
size_t wrote;
do {
for (;;) {
if (gotint) {
errno = EINTR;
return -1;
}
if (ispaused) {
return 0;
}
rc = _MyWrite(fd, p, n);
if (rc == -1 && errno == EINTR) {
continue;
} else if (rc == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) {
if (WaitUntilReady(fd, POLLOUT) == -1) {
if (errno == EINTR) {
continue;
} else {
return -1;
}
}
} else {
break;
}
}
if (rc != -1) {
wrote = rc;
n -= wrote;
p = (char *)p + wrote;
} else {
return -1;
}
} while (n);
return 0;
}
static int bestlineWriteStr(int fd, const char *p) {
return bestlineWrite(fd, p, strlen(p));
}
static ssize_t bestlineRead(int fd, char *buf, size_t size, struct bestlineState *l) {
size_t got;
ssize_t rc;
int refreshme;
do {
refreshme = 0;
if (gotint) {
errno = EINTR;
return -1;
}
if (gotcont && rawmode != -1) {
enableRawMode(rawmode);
if (l)
refreshme = 1;
}
if (gotwinch && l) {
refreshme = 1;
}
if (refreshme)
bestlineRefreshLine(l);
rc = bestlineReadCharacter(fd, buf, size);
} while (rc == -1 && errno == EINTR);
if (rc != -1) {
got = rc;
if (got > 0 && l) {
memcpy(l->seq[1], l->seq[0], sizeof(l->seq[0]));
memset(l->seq[0], 0, sizeof(l->seq[0]));
memcpy(l->seq[0], buf, Min(Min(size, got), sizeof(l->seq[0]) - 1));
}
}
return rc;
}
/**
* Returns number of columns in current terminal.
*
* 1. Checks COLUMNS environment variable (set by Emacs)
* 2. Tries asking termios (works for pseudoteletypewriters)
* 3. Falls back to inband signalling (works w/ pipe or serial)
* 4. Otherwise we conservatively assume 80 columns
*
* @param ws should be initialized by caller to zero before first call
* @param ifd is input file descriptor
* @param ofd is output file descriptor
* @return window size
*/
static struct winsize GetTerminalSize(struct winsize ws, int ifd, int ofd) {
int x;
ssize_t n;
char *p, *s, b[16];
ioctl(ofd, TIOCGWINSZ, &ws);
if ((!ws.ws_row && (s = getenv("ROWS")) && (x = ParseUnsigned(s, 0)))) {
ws.ws_row = x;
}
if ((!ws.ws_col && (s = getenv("COLUMNS")) && (x = ParseUnsigned(s, 0)))) {
ws.ws_col = x;
}
if (((!ws.ws_col || !ws.ws_row) && bestlineRead(ifd, 0, 0, 0) != -1 &&
bestlineWriteStr(ofd, "\0337" /* save position */
"\033[9979;9979H" /* move cursor to bottom right corner */
"\033[6n" /* report position */
"\0338") != -1 && /* restore position */
(n = bestlineRead(ifd, b, sizeof(b), 0)) != -1 &&
n && b[0] == 033 && b[1] == '[' && b[n - 1] == 'R')) {
p = b + 2;
if ((x = ParseUnsigned(p, &p)))
ws.ws_row = x;
if (*p++ == ';' && (x = ParseUnsigned(p, 0)))
ws.ws_col = x;
}
if (!ws.ws_col)
ws.ws_col = 80;
if (!ws.ws_row)
ws.ws_row = 24;
return ws;
}
/* Clear the screen. Used to handle ctrl+l */
void bestlineClearScreen(int fd) {
bestlineWriteStr(fd, "\033[H" /* move cursor to top left corner */
"\033[2J"); /* erase display */
}
static void bestlineBeep(void) {
/* THE TERMINAL BELL IS DEAD - HISTORY HAS KILLED IT */
}
static char bestlineGrow(struct bestlineState *ls, size_t n) {
char *p;
size_t m;
m = ls->buflen;
if (m >= n)
return 1;
do
m += m >> 1;
while (m < n);
if (!(p = (char *)realloc(ls->buf, m * sizeof(*ls->buf))))
return 0;
ls->buf = p;
ls->buflen = m;
return 1;
}
/* This is an helper function for bestlineEdit() and is called when the
* user types the key in order to complete the string currently in the
* input.
*
* The state of the editing is encapsulated into the pointed bestlineState
* structure as described in the structure definition. */
static ssize_t bestlineCompleteLine(struct bestlineState *ls, char *seq, int size) {
ssize_t nread;
size_t i, n, stop;
bestlineCompletions lc;
struct bestlineState original, saved;
nread = 0;
memset(&lc, 0, sizeof(lc));
completionCallback(ls->buf, ls->pos, &lc);
if (!lc.len) {
bestlineBeep();
} else {
i = 0;
stop = 0;
original = *ls;
while (!stop) {
/* Show completion or original buffer */
if (i < lc.len) {
saved = *ls;
ls->len = strlen(lc.cvec[i]);
ls->pos = original.pos + ls->len - original.len;
ls->buf = lc.cvec[i];
bestlineRefreshLine(ls);
ls->len = saved.len;
ls->pos = saved.pos;
ls->buf = saved.buf;
if (lc.len == 1) {
nread = 0;
goto FinishQuickly;
}
} else {
bestlineRefreshLine(ls);
}
if ((nread = bestlineRead(ls->ifd, seq, size, ls)) <= 0) {
bestlineFreeCompletions(&lc);
return -1;
}
switch (seq[0]) {
case '\t':
i = (i + 1) % (lc.len + 1);
if (i == lc.len) {
bestlineBeep();
}
break;
default:
if (i < lc.len) {
FinishQuickly:
n = strlen(lc.cvec[i]);
if (bestlineGrow(ls, n + 1)) {
memcpy(ls->buf, lc.cvec[i], n + 1);
ls->len = n;
ls->pos = original.pos + n - original.len;
}
}
stop = 1;
break;
}
}
}
bestlineFreeCompletions(&lc);
return nread;
}
static void bestlineEditHistoryGoto(struct bestlineState *l, unsigned i) {
size_t n;
if (historylen <= 1)
return;
if (i > historylen - 1)
return;
i = Max(Min(i, historylen - 1), 0);
free(history[historylen - 1 - l->hindex]);
history[historylen - 1 - l->hindex] = strdup(l->buf);
l->hindex = i;
n = strlen(history[historylen - 1 - l->hindex]);
bestlineGrow(l, n + 1);
n = Min(n, l->buflen - 1);
memcpy(l->buf, history[historylen - 1 - l->hindex], n);
l->buf[n] = 0;
l->len = l->pos = n;
bestlineRefreshLine(l);
}
static void bestlineEditHistoryMove(struct bestlineState *l, int dx) {
bestlineEditHistoryGoto(l, l->hindex + dx);
}
static char *bestlineMakeSearchPrompt(struct abuf *ab, int fail, const char *s, int n) {
ab->len = 0;
abAppendw(ab, '(');
if (fail)
abAppends(ab, "failed ");
abAppends(ab, "reverse-i-search `\033[4m");
abAppend(ab, s, n);
abAppends(ab, "\033[24m");
abAppends(ab, s + n);
abAppendw(ab, Read32le("') "));
return ab->b;
}
static int bestlineSearch(struct bestlineState *l, char *seq, int size) {
char *p;
char isstale;
struct abuf ab;
struct abuf prompt;
unsigned i, j, k, matlen;
const char *oldprompt, *q;
int rc, fail, added, oldpos, oldindex;
if (historylen <= 1)
return 0;
abInit(&ab);
abInit(&prompt);
oldpos = l->pos;
oldprompt = l->prompt;
oldindex = l->hindex;
for (fail = matlen = 0;;) {
l->prompt = bestlineMakeSearchPrompt(&prompt, fail, ab.b, matlen);
bestlineRefreshLine(l);
fail = 1;
added = 0;
j = l->pos;
i = l->hindex;
rc = bestlineRead(l->ifd, seq, size, l);
if (rc > 0) {
if (seq[0] == Ctrl('?') || seq[0] == Ctrl('H')) {
if (ab.len) {
--ab.len;
matlen = Min(matlen, ab.len);
}
} else if (seq[0] == Ctrl('R')) {
if (j) {
--j;
} else if (i + 1 < historylen) {
++i;
j = strlen(history[historylen - 1 - i]);
}
} else if (seq[0] == Ctrl('G')) {
bestlineEditHistoryGoto(l, oldindex);
l->pos = oldpos;
rc = 0;
break;
} else if (IsControl(seq[0])) { /* only sees canonical c0 */
break;
} else {
abAppend(&ab, seq, rc);
added = rc;
}
} else {
break;
}
isstale = 0;
while (i < historylen) {
p = history[historylen - 1 - i];
k = strlen(p);
if (!isstale) {
j = Min(k, j + ab.len);
} else {
isstale = 0;
j = k;
}
if ((q = FindSubstringReverse(p, j, ab.b, ab.len))) {
bestlineEditHistoryGoto(l, i);
l->pos = q - p;
fail = 0;
if (added) {
matlen += added;
added = 0;
}
break;
} else {
isstale = 1;
++i;
}
}
}
l->prompt = oldprompt;
bestlineRefreshLine(l);
abFree(&prompt);
abFree(&ab);
bestlineRefreshLine(l);
return rc;
}
static void bestlineRingFree(void) {
size_t i;
for (i = 0; i < BESTLINE_MAX_RING; ++i) {
if (ring.p[i]) {
free(ring.p[i]);
ring.p[i] = 0;
}
}
}
static void bestlineRingPush(const char *p, size_t n) {
char *q;
if (!n)
return;
if (!(q = (char *)malloc(n + 1)))
return;
ring.i = (ring.i + 1) % BESTLINE_MAX_RING;
free(ring.p[ring.i]);
ring.p[ring.i] = (char *)memcpy(q, p, n);
ring.p[ring.i][n] = 0;
}
static void bestlineRingRotate(void) {
size_t i;
for (i = 0; i < BESTLINE_MAX_RING; ++i) {
ring.i = (ring.i - 1) % BESTLINE_MAX_RING;
if (ring.p[ring.i])
break;
}
}
static char *bestlineRefreshHints(struct bestlineState *l) {
char *hint;
struct abuf ab;
const char *ansi1 = "\033[90m", *ansi2 = "\033[39m";
if (!hintsCallback)
return 0;
if (!(hint = hintsCallback(l->buf, &ansi1, &ansi2)))
return 0;
abInit(&ab);
if (ansi1)
abAppends(&ab, ansi1);
abAppends(&ab, hint);
if (ansi2)
abAppends(&ab, ansi2);
if (freeHintsCallback)
freeHintsCallback(hint);
return ab.b;
}
static size_t Backward(struct bestlineState *l, size_t pos) {
if (pos) {
do
--pos;
while (pos && (l->buf[pos] & 0300) == 0200);
}
return pos;
}
static int bestlineEditMirrorLeft(struct bestlineState *l, int res[2]) {
unsigned c, pos, left, right, depth, index;
if ((pos = Backward(l, l->pos))) {
right = GetUtf8(l->buf + pos, l->len - pos).c;
if ((left = bestlineMirrorLeft(right))) {
depth = 0;
index = pos;
do {
pos = Backward(l, pos);
c = GetUtf8(l->buf + pos, l->len - pos).c;
if (c == right) {
++depth;
} else if (c == left) {
if (depth) {
--depth;
} else {
res[0] = pos;
res[1] = index;
return 0;
}
}
} while (pos);
}
}
return -1;
}
static int bestlineEditMirrorRight(struct bestlineState *l, int res[2]) {
struct rune rune;
unsigned pos, left, right, depth, index;
pos = l->pos;
rune = GetUtf8(l->buf + pos, l->len - pos);
left = rune.c;
if ((right = bestlineMirrorRight(left))) {
depth = 0;
index = pos;
do {
pos += rune.n;
rune = GetUtf8(l->buf + pos, l->len - pos);
if (rune.c == left) {
++depth;
} else if (rune.c == right) {
if (depth) {
--depth;
} else {
res[0] = index;
res[1] = pos;
return 0;
}
}
} while (pos + rune.n < l->len);
}
return -1;
}
static int bestlineEditMirror(struct bestlineState *l, int res[2]) {
int rc;
rc = bestlineEditMirrorLeft(l, res);
if (rc == -1)
rc = bestlineEditMirrorRight(l, res);
return rc;
}
static void bestlineRefreshLineImpl(struct bestlineState *l, int force) {
char *hint;
char flipit;
char hasflip;
char haswides;
struct abuf ab;
const char *buf;
struct rune rune;
struct winsize oldsize;
int fd, plen, rows, len, pos;
unsigned x, xn, yn, width, pwidth;
int i, t, cx, cy, tn, resized, flip[2];
/*
* synchonize the i/o state
*/
if (ispaused) {
if (force) {
bestlineUnpause(l->ofd);
} else {
return;
}
}
if (!force && HasPendingInput(l->ifd)) {
l->dirty = 1;
return;
}
oldsize = l->ws;
if ((resized = gotwinch) && rawmode != -1) {
gotwinch = 0;
l->ws = GetTerminalSize(l->ws, l->ifd, l->ofd);
}
hasflip = !l->final && !bestlineEditMirror(l, flip);
StartOver:
fd = l->ofd;
buf = l->buf;
pos = l->pos;
len = l->len;
xn = l->ws.ws_col;
yn = l->ws.ws_row;
plen = strlen(l->prompt);
pwidth = GetMonospaceWidth(l->prompt, plen, 0);
width = GetMonospaceWidth(buf, len, &haswides);
/*
* handle the case where the line is larger than the whole display
* gnu readline actually isn't able to deal with this situation!!!
* we kludge xn to address the edge case of wide chars on the edge
*/
for (tn = xn - haswides * 2;;) {
if (pwidth + width + 1 < tn * yn)
break; /* we're fine */
if (!len || width < 2)
break; /* we can't do anything */
if (pwidth + 2 > tn * yn)
break; /* we can't do anything */
if (pos > len / 2) {
/* hide content on the left if we're editing on the right */
rune = GetUtf8(buf, len);
buf += rune.n;
len -= rune.n;
pos -= rune.n;
} else {
/* hide content on the right if we're editing on left */
t = len;
while (len && (buf[len - 1] & 0300) == 0200)
--len;
if (len)
--len;
rune = GetUtf8(buf + len, t - len);
}
if ((t = bestlineCharacterWidth(rune.c)) > 0) {
width -= t;
}
}
pos = Max(0, Min(pos, len));
/*
* now generate the terminal codes to update the line
*
* since we support unlimited lines it's important that we don't
* clear the screen before we draw the screen. doing that causes
* flickering. the key with terminals is to overwrite cells, and
* then use \e[K and \e[J to clear everything else.
*
* we make the assumption that prompts and hints may contain ansi
* sequences, but the buffer does not.
*
* we need to handle the edge case where a wide character like 度
* might be at the edge of the window, when there's one cell left.
* so we can't use division based on string width to compute the
* coordinates and have to track it as we go.
*/
cy = -1;
cx = -1;
rows = 1;
abInit(&ab);
abAppendw(&ab, '\r'); /* start of line */
if (l->rows - l->oldpos - 1 > 0) {
abAppends(&ab, "\033[");
abAppendu(&ab, l->rows - l->oldpos - 1);
abAppendw(&ab, 'A'); /* cursor up clamped */
}
abAppends(&ab, l->prompt);
x = pwidth;
for (i = 0; i < len; i += rune.n) {
rune = GetUtf8(buf + i, len - i);
if (x && x + rune.n > xn) {
if (cy >= 0)
++cy;
if (x < xn) {
abAppends(&ab, "\033[K"); /* clear line forward */
}
abAppends(&ab, "\r" /* start of line */
"\n"); /* cursor down unclamped */
++rows;
x = 0;
}
if (i == pos) {
cy = 0;
cx = x;
}
if (maskmode) {
abAppendw(&ab, '*');
} else {
flipit = hasflip && (i == flip[0] || i == flip[1]);
if (flipit)
abAppends(&ab, "\033[1m");
abAppendw(&ab, EncodeUtf8(rune.c));
if (flipit)
abAppends(&ab, "\033[22m");
}
t = bestlineCharacterWidth(rune.c);
t = Max(0, t);
x += t;
}
if (!l->final && (hint = bestlineRefreshHints(l))) {
if (GetMonospaceWidth(hint, strlen(hint), 0) < xn - x) {
if (cx < 0) {
cx = x;
}
abAppends(&ab, hint);
}
free(hint);
}
abAppendw(&ab, Read32le("\033[J")); /* erase display forwards */
/*
* if we are at the very end of the screen with our prompt, we need
* to emit a newline and move the prompt to the first column.
*/
if (pos && pos == len && x >= xn) {
abAppendw(&ab, Read32le("\n\r\0"));
++rows;
}
/*
* move cursor to right position
*/
if (cy > 0) {
abAppends(&ab, "\033[");
abAppendu(&ab, cy);
abAppendw(&ab, 'A'); /* cursor up */
}
if (cx > 0) {
abAppendw(&ab, Read32le("\r\033["));
abAppendu(&ab, cx);
abAppendw(&ab, 'C'); /* cursor right */
} else if (!cx) {
abAppendw(&ab, '\r'); /* start */
}
/*
* now get ready to progress state
* we use a mostly correct kludge when the tty resizes
*/
l->rows = rows;
if (resized && oldsize.ws_col > l->ws.ws_col) {
resized = 0;
abFree(&ab);
goto StartOver;
}
l->dirty = 0;
l->oldpos = Max(0, cy);
/*
* send codes to terminal
*/
bestlineWrite(fd, ab.b, ab.len);
abFree(&ab);
}
static void bestlineRefreshLine(struct bestlineState *l) {
bestlineRefreshLineImpl(l, 0);
}
static void bestlineRefreshLineForce(struct bestlineState *l) {
bestlineRefreshLineImpl(l, 1);
}
static void bestlineEditInsert(struct bestlineState *l, const char *p, size_t n) {
if (!bestlineGrow(l, l->len + n + 1))
return;
memmove(l->buf + l->pos + n, l->buf + l->pos, l->len - l->pos);
memcpy(l->buf + l->pos, p, n);
l->pos += n;
l->len += n;
l->buf[l->len] = 0;
bestlineRefreshLine(l);
}
static void bestlineEditHome(struct bestlineState *l) {
l->pos = 0;
bestlineRefreshLine(l);
}
static void bestlineEditEnd(struct bestlineState *l) {
l->pos = l->len;
bestlineRefreshLine(l);
}
static void bestlineEditUp(struct bestlineState *l) {
bestlineEditHistoryMove(l, BESTLINE_HISTORY_PREV);
}
static void bestlineEditDown(struct bestlineState *l) {
bestlineEditHistoryMove(l, BESTLINE_HISTORY_NEXT);
}
static void bestlineEditBof(struct bestlineState *l) {
bestlineEditHistoryGoto(l, historylen - 1);
}
static void bestlineEditEof(struct bestlineState *l) {
bestlineEditHistoryGoto(l, 0);
}
static void bestlineEditRefresh(struct bestlineState *l) {
bestlineClearScreen(l->ofd);
bestlineRefreshLine(l);
}
static size_t Forward(struct bestlineState *l, size_t pos) {
return pos + GetUtf8(l->buf + pos, l->len - pos).n;
}
static size_t Backwards(struct bestlineState *l, size_t pos, char pred(unsigned)) {
size_t i;
struct rune r;
while (pos) {
i = Backward(l, pos);
r = GetUtf8(l->buf + i, l->len - i);
if (pred(r.c)) {
pos = i;
} else {
break;
}
}
return pos;
}
static size_t Forwards(struct bestlineState *l, size_t pos, char pred(unsigned)) {
struct rune r;
while (pos < l->len) {
r = GetUtf8(l->buf + pos, l->len - pos);
if (pred(r.c)) {
pos += r.n;
} else {
break;
}
}
return pos;
}
static size_t ForwardWord(struct bestlineState *l, size_t pos) {
pos = Forwards(l, pos, bestlineIsSeparator);
pos = Forwards(l, pos, bestlineNotSeparator);
return pos;
}
static size_t BackwardWord(struct bestlineState *l, size_t pos) {
pos = Backwards(l, pos, bestlineIsSeparator);
pos = Backwards(l, pos, bestlineNotSeparator);
return pos;
}
static size_t EscapeWord(struct bestlineState *l, size_t i) {
size_t j;
struct rune r;
for (; i && i < l->len; i += r.n) {
if (i < l->len) {
r = GetUtf8(l->buf + i, l->len - i);
if (bestlineIsSeparator(r.c))
break;
}
if ((j = i)) {
do
--j;
while (j && (l->buf[j] & 0300) == 0200);
r = GetUtf8(l->buf + j, l->len - j);
if (bestlineIsSeparator(r.c))
break;
}
}
return i;
}
static void bestlineEditLeft(struct bestlineState *l) {
l->pos = Backward(l, l->pos);
bestlineRefreshLine(l);
}
static void bestlineEditRight(struct bestlineState *l) {
if (l->pos == l->len)
return;
do
l->pos++;
while (l->pos < l->len && (l->buf[l->pos] & 0300) == 0200);
bestlineRefreshLine(l);
}
static void bestlineEditLeftWord(struct bestlineState *l) {
l->pos = BackwardWord(l, l->pos);
bestlineRefreshLine(l);
}
static void bestlineEditRightWord(struct bestlineState *l) {
l->pos = ForwardWord(l, l->pos);
bestlineRefreshLine(l);
}
static void bestlineEditLeftExpr(struct bestlineState *l) {
int mark[2];
l->pos = Backwards(l, l->pos, bestlineIsXeparator);
if (!bestlineEditMirrorLeft(l, mark)) {
l->pos = mark[0];
} else {
l->pos = Backwards(l, l->pos, bestlineNotSeparator);
}
bestlineRefreshLine(l);
}
static void bestlineEditRightExpr(struct bestlineState *l) {
int mark[2];
l->pos = Forwards(l, l->pos, bestlineIsXeparator);
if (!bestlineEditMirrorRight(l, mark)) {
l->pos = Forward(l, mark[1]);
} else {
l->pos = Forwards(l, l->pos, bestlineNotSeparator);
}
bestlineRefreshLine(l);
}
static void bestlineEditDelete(struct bestlineState *l) {
size_t i;
if (l->pos == l->len)
return;
i = Forward(l, l->pos);
memmove(l->buf + l->pos, l->buf + i, l->len - i + 1);
l->len -= i - l->pos;
bestlineRefreshLine(l);
}
static void bestlineEditRubout(struct bestlineState *l) {
size_t i;
if (!l->pos)
return;
i = Backward(l, l->pos);
memmove(l->buf + i, l->buf + l->pos, l->len - l->pos + 1);
l->len -= l->pos - i;
l->pos = i;
bestlineRefreshLine(l);
}
static void bestlineEditDeleteWord(struct bestlineState *l) {
size_t i;
if (l->pos == l->len)
return;
i = ForwardWord(l, l->pos);
bestlineRingPush(l->buf + l->pos, i - l->pos);
memmove(l->buf + l->pos, l->buf + i, l->len - i + 1);
l->len -= i - l->pos;
bestlineRefreshLine(l);
}
static void bestlineEditRuboutWord(struct bestlineState *l) {
size_t i;
if (!l->pos)
return;
i = BackwardWord(l, l->pos);
bestlineRingPush(l->buf + i, l->pos - i);
memmove(l->buf + i, l->buf + l->pos, l->len - l->pos + 1);
l->len -= l->pos - i;
l->pos = i;
bestlineRefreshLine(l);
}
static void bestlineEditXlatWord(struct bestlineState *l, unsigned xlat(unsigned)) {
unsigned c;
size_t i, j;
struct rune r;
struct abuf ab;
abInit(&ab);
i = Forwards(l, l->pos, bestlineIsSeparator);
for (j = i; j < l->len; j += r.n) {
r = GetUtf8(l->buf + j, l->len - j);
if (bestlineIsSeparator(r.c))
break;
if ((c = xlat(r.c)) != r.c) {
abAppendw(&ab, EncodeUtf8(c));
} else { /* avoid canonicalization */
abAppend(&ab, l->buf + j, r.n);
}
}
if (ab.len && bestlineGrow(l, i + ab.len + l->len - j + 1)) {
l->pos = i + ab.len;
abAppend(&ab, l->buf + j, l->len - j);
l->len = i + ab.len;
memcpy(l->buf + i, ab.b, ab.len + 1);
bestlineRefreshLine(l);
}
abFree(&ab);
}
static void bestlineEditLowercaseWord(struct bestlineState *l) {
bestlineEditXlatWord(l, bestlineLowercase);
}
static void bestlineEditUppercaseWord(struct bestlineState *l) {
bestlineEditXlatWord(l, bestlineUppercase);
}
static void bestlineEditCapitalizeWord(struct bestlineState *l) {
iscapital = 0;
bestlineEditXlatWord(l, Capitalize);
}
static void bestlineEditKillLeft(struct bestlineState *l) {
size_t diff, old_pos;
bestlineRingPush(l->buf, l->pos);
old_pos = l->pos;
l->pos = 0;
diff = old_pos - l->pos;
memmove(l->buf + l->pos, l->buf + old_pos, l->len - old_pos + 1);
l->len -= diff;
bestlineRefreshLine(l);
}
static void bestlineEditKillRight(struct bestlineState *l) {
bestlineRingPush(l->buf + l->pos, l->len - l->pos);
l->buf[l->pos] = '\0';
l->len = l->pos;
bestlineRefreshLine(l);
}
static void bestlineEditYank(struct bestlineState *l) {
char *p;
size_t n;
if (!ring.p[ring.i])
return;
n = strlen(ring.p[ring.i]);
if (!bestlineGrow(l, l->len + n + 1))
return;
if (!(p = (char *)malloc(l->len - l->pos + 1)))
return;
memcpy(p, l->buf + l->pos, l->len - l->pos + 1);
memcpy(l->buf + l->pos, ring.p[ring.i], n);
memcpy(l->buf + l->pos + n, p, l->len - l->pos + 1);
free(p);
l->yi = l->pos;
l->yj = l->pos + n;
l->pos += n;
l->len += n;
bestlineRefreshLine(l);
}
static void bestlineEditRotate(struct bestlineState *l) {
if ((l->seq[1][0] == Ctrl('Y') || (l->seq[1][0] == 033 && l->seq[1][1] == 'y'))) {
if (l->yi < l->len && l->yj <= l->len) {
memmove(l->buf + l->yi, l->buf + l->yj, l->len - l->yj + 1);
l->len -= l->yj - l->yi;
l->pos -= l->yj - l->yi;
}
bestlineRingRotate();
bestlineEditYank(l);
}
}
static void bestlineEditTranspose(struct bestlineState *l) {
char *q, *p;
size_t a, b, c;
b = l->pos;
if (b == l->len)
--b;
a = Backward(l, b);
c = Forward(l, b);
if (!(a < b && b < c))
return;
p = q = (char *)malloc(c - a);
p = Copy(p, l->buf + b, c - b);
p = Copy(p, l->buf + a, b - a);
assert((size_t)(p - q) == c - a);
memcpy(l->buf + a, q, p - q);
l->pos = c;
free(q);
bestlineRefreshLine(l);
}
static void bestlineEditTransposeWords(struct bestlineState *l) {
char *q, *p;
size_t i, pi, xi, xj, yi, yj;
i = l->pos;
if (i == l->len) {
i = Backwards(l, i, bestlineIsSeparator);
i = Backwards(l, i, bestlineNotSeparator);
}
pi = EscapeWord(l, i);
xj = Backwards(l, pi, bestlineIsSeparator);
xi = Backwards(l, xj, bestlineNotSeparator);
yi = Forwards(l, pi, bestlineIsSeparator);
yj = Forwards(l, yi, bestlineNotSeparator);
if (!(xi < xj && xj < yi && yi < yj))
return;
p = q = (char *)malloc(yj - xi);
p = Copy(p, l->buf + yi, yj - yi);
p = Copy(p, l->buf + xj, yi - xj);
p = Copy(p, l->buf + xi, xj - xi);
assert((size_t)(p - q) == yj - xi);
memcpy(l->buf + xi, q, p - q);
l->pos = yj;
free(q);
bestlineRefreshLine(l);
}
static void bestlineEditSqueeze(struct bestlineState *l) {
size_t i, j;
i = Backwards(l, l->pos, bestlineIsSeparator);
j = Forwards(l, l->pos, bestlineIsSeparator);
if (!(i < j))
return;
memmove(l->buf + i, l->buf + j, l->len - j + 1);
l->len -= j - i;
l->pos = i;
bestlineRefreshLine(l);
}
static void bestlineEditMark(struct bestlineState *l) {
l->mark = l->pos;
}
static void bestlineEditGoto(struct bestlineState *l) {
if (l->mark > l->len)
return;
l->pos = Min(l->mark, l->len);
bestlineRefreshLine(l);
}
static size_t bestlineEscape(char *d, const char *s, size_t n) {
char *p;
size_t i;
unsigned c, w, l;
for (p = d, l = i = 0; i < n; ++i) {
switch ((c = s[i] & 255)) {
Case('\a', w = Read16le("\\a"));
Case('\b', w = Read16le("\\b"));
Case('\t', w = Read16le("\\t"));
Case('\n', w = Read16le("\\n"));
Case('\v', w = Read16le("\\v"));
Case('\f', w = Read16le("\\f"));
Case('\r', w = Read16le("\\r"));
Case('"', w = Read16le("\\\""));
Case('\'', w = Read16le("\\\'"));
Case('\\', w = Read16le("\\\\"));
default:
if (c <= 0x1F || c == 0x7F || (c == '?' && l == '?')) {
w = Read16le("\\x");
w |= "0123456789abcdef"[(c & 0xF0) >> 4] << 020;
w |= "0123456789abcdef"[(c & 0x0F) >> 0] << 030;
} else {
w = c;
}
break;
}
p[0] = (w & 0x000000ff) >> 000;
p[1] = (w & 0x0000ff00) >> 010;
p[2] = (w & 0x00ff0000) >> 020;
p[3] = (w & 0xff000000) >> 030;
p += (Bsr(w) >> 3) + 1;
l = w;
}
return p - d;
}
static void bestlineEditInsertEscape(struct bestlineState *l) {
size_t m;
ssize_t n;
char seq[16];
char esc[sizeof(seq) * 4];
if ((n = bestlineRead(l->ifd, seq, sizeof(seq), l)) > 0) {
m = bestlineEscape(esc, seq, n);
bestlineEditInsert(l, esc, m);
}
}
static void bestlineEditInterrupt(void) {
gotint = SIGINT;
}
static void bestlineEditQuit(void) {
gotint = SIGQUIT;
}
static void bestlineEditSuspend(void) {
raise(SIGSTOP);
}
static void bestlineEditPause(struct bestlineState *l) {
tcflow(l->ofd, TCOOFF);
ispaused = 1;
}
static void bestlineEditCtrlq(struct bestlineState *l) {
if (ispaused) {
bestlineUnpause(l->ofd);
bestlineRefreshLineForce(l);
} else {
bestlineEditInsertEscape(l);
}
}
/**
* Moves last item inside current s-expression to outside, e.g.
*
* (a| b c)
* (a| b) c
*
* The cursor position changes only if a paren is moved before it:
*
* (a b c |)
* (a b) c |
*
* To accommodate non-LISP languages we connect unspaced outer symbols:
*
* f(a,| b, g())
* f(a,| b), g()
*
* Our standard keybinding is ALT-SHIFT-B.
*/
static void bestlineEditBarf(struct bestlineState *l) {
struct rune r;
unsigned long w;
size_t i, pos, depth = 0;
unsigned lhs, rhs, end, *stack = 0;
/* go as far right within current s-expr as possible */
for (pos = l->pos;; pos += r.n) {
if (pos == l->len)
goto Finish;
r = GetUtf8(l->buf + pos, l->len - pos);
if (depth) {
if (r.c == stack[depth - 1]) {
--depth;
}
} else {
if ((rhs = bestlineMirrorRight(r.c))) {
stack = (unsigned *)realloc(stack, ++depth * sizeof(*stack));
stack[depth - 1] = rhs;
} else if (bestlineMirrorLeft(r.c)) {
end = pos;
break;
}
}
}
/* go back one item */
pos = Backwards(l, pos, bestlineIsXeparator);
for (;; pos = i) {
if (!pos)
goto Finish;
i = Backward(l, pos);
r = GetUtf8(l->buf + i, l->len - i);
if (depth) {
if (r.c == stack[depth - 1]) {
--depth;
}
} else {
if ((lhs = bestlineMirrorLeft(r.c))) {
stack = (unsigned *)realloc(stack, ++depth * sizeof(*stack));
stack[depth - 1] = lhs;
} else if (bestlineIsSeparator(r.c)) {
break;
}
}
}
pos = Backwards(l, pos, bestlineIsXeparator);
/* now move the text */
r = GetUtf8(l->buf + end, l->len - end);
memmove(l->buf + pos + r.n, l->buf + pos, end - pos);
w = EncodeUtf8(r.c);
for (i = 0; i < r.n; ++i) {
l->buf[pos + i] = w;
w >>= 8;
}
if (l->pos > pos) {
l->pos += r.n;
}
bestlineRefreshLine(l);
Finish:
free(stack);
}
/**
* Moves first item outside current s-expression to inside, e.g.
*
* (a| b) c d
* (a| b c) d
*
* To accommodate non-LISP languages we connect unspaced outer symbols:
*
* f(a,| b), g()
* f(a,| b, g())
*
* Our standard keybinding is ALT-SHIFT-S.
*/
static void bestlineEditSlurp(struct bestlineState *l) {
char rp[6];
struct rune r;
size_t pos, depth = 0;
unsigned rhs, point = 0, start = 0, *stack = 0;
/* go to outside edge of current s-expr */
for (pos = l->pos; pos < l->len; pos += r.n) {
r = GetUtf8(l->buf + pos, l->len - pos);
if (depth) {
if (r.c == stack[depth - 1]) {
--depth;
}
} else {
if ((rhs = bestlineMirrorRight(r.c))) {
stack = (unsigned *)realloc(stack, ++depth * sizeof(*stack));
stack[depth - 1] = rhs;
} else if (bestlineMirrorLeft(r.c)) {
point = pos;
pos += r.n;
start = pos;
break;
}
}
}
/* go forward one item */
pos = Forwards(l, pos, bestlineIsXeparator);
for (; pos < l->len; pos += r.n) {
r = GetUtf8(l->buf + pos, l->len - pos);
if (depth) {
if (r.c == stack[depth - 1]) {
--depth;
}
} else {
if ((rhs = bestlineMirrorRight(r.c))) {
stack = (unsigned *)realloc(stack, ++depth * sizeof(*stack));
stack[depth - 1] = rhs;
} else if (bestlineIsSeparator(r.c)) {
break;
}
}
}
/* now move the text */
memcpy(rp, l->buf + point, start - point);
memmove(l->buf + point, l->buf + start, pos - start);
memcpy(l->buf + pos - (start - point), rp, start - point);
bestlineRefreshLine(l);
free(stack);
}
static void bestlineEditRaise(struct bestlineState *l) {
(void)l;
}
static char IsBalanced(struct abuf *buf) {
unsigned i, d;
for (d = i = 0; i < buf->len; ++i) {
if (buf->b[i] == '(')
++d;
else if (d > 0 && buf->b[i] == ')')
--d;
}
return d == 0;
}
/**
* Runs bestline engine.
*
* This function is the core of the line editing capability of bestline.
* It expects 'fd' to be already in "raw mode" so that every key pressed
* will be returned ASAP to read().
*
* The resulting string is put into 'buf' when the user type enter, or
* when ctrl+d is typed.
*
* Returns chomped character count in buf >=0 or -1 on eof / error
*/
static ssize_t bestlineEdit(int stdin_fd, int stdout_fd, const char *prompt, const char *init,
char **obuf) {
ssize_t rc;
char seq[16];
const char *promptnotnull, *promptlastnl;
size_t nread;
int pastemode;
struct rune rune;
unsigned long long w;
struct bestlineState l;
pastemode = 0;
memset(&l, 0, sizeof(l));
if (!(l.buf = (char *)malloc((l.buflen = 32))))
return -1;
l.buf[0] = 0;
l.ifd = stdin_fd;
l.ofd = stdout_fd;
promptnotnull = prompt ? prompt : "";
promptlastnl = strrchr(promptnotnull, '\n');
l.prompt = promptlastnl ? promptlastnl + 1 : promptnotnull;
l.ws = GetTerminalSize(l.ws, l.ifd, l.ofd);
abInit(&l.full);
bestlineHistoryAdd("");
bestlineWriteStr(l.ofd, promptnotnull);
init = init ? init : "";
bestlineEditInsert(&l, init, strlen(init));
while (1) {
if (l.dirty)
bestlineRefreshLineForce(&l);
rc = bestlineRead(l.ifd, seq, sizeof(seq), &l);
if (rc > 0) {
if (seq[0] == Ctrl('R')) {
rc = bestlineSearch(&l, seq, sizeof(seq));
if (!rc)
continue;
} else if (seq[0] == '\t' && completionCallback) {
rc = bestlineCompleteLine(&l, seq, sizeof(seq));
if (!rc)
continue;
}
}
if (rc > 0) {
nread = rc;
} else if (!rc && l.len) {
nread = 1;
seq[0] = '\r';
seq[1] = 0;
} else {
if (historylen) {
free(history[--historylen]);
history[historylen] = 0;
}
free(l.buf);
abFree(&l.full);
return -1;
}
switch (seq[0]) {
Case(Ctrl('P'), bestlineEditUp(&l));
Case(Ctrl('E'), bestlineEditEnd(&l));
Case(Ctrl('N'), bestlineEditDown(&l));
Case(Ctrl('A'), bestlineEditHome(&l));
Case(Ctrl('B'), bestlineEditLeft(&l));
Case(Ctrl('@'), bestlineEditMark(&l));
Case(Ctrl('Y'), bestlineEditYank(&l));
Case(Ctrl('Q'), bestlineEditCtrlq(&l));
Case(Ctrl('F'), bestlineEditRight(&l));
Case(Ctrl('\\'), bestlineEditQuit());
Case(Ctrl('S'), bestlineEditPause(&l));
Case(Ctrl('?'), bestlineEditRubout(&l));
Case(Ctrl('H'), bestlineEditRubout(&l));
Case(Ctrl('L'), bestlineEditRefresh(&l));
Case(Ctrl('Z'), bestlineEditSuspend());
Case(Ctrl('U'), bestlineEditKillLeft(&l));
Case(Ctrl('T'), bestlineEditTranspose(&l));
Case(Ctrl('K'), bestlineEditKillRight(&l));
Case(Ctrl('W'), bestlineEditRuboutWord(&l));
case Ctrl('C'):
if (emacsmode) {
if (bestlineRead(l.ifd, seq, sizeof(seq), &l) != 1)
break;
switch (seq[0]) {
Case(Ctrl('C'), bestlineEditInterrupt());
Case(Ctrl('B'), bestlineEditBarf(&l));
Case(Ctrl('S'), bestlineEditSlurp(&l));
Case(Ctrl('R'), bestlineEditRaise(&l));
default:
break;
}
} else {
bestlineEditInterrupt();
}
break;
case Ctrl('X'):
if (l.seq[1][0] == Ctrl('X')) {
bestlineEditGoto(&l);
}
break;
case Ctrl('D'):
if (l.len) {
bestlineEditDelete(&l);
} else {
if (historylen) {
free(history[--historylen]);
history[historylen] = 0;
}
free(l.buf);
abFree(&l.full);
return -1;
}
break;
case '\n':
l.final = 1;
bestlineEditEnd(&l);
bestlineRefreshLineForce(&l);
l.final = 0;
abAppend(&l.full, l.buf, l.len);
l.prompt = "... ";
abAppends(&l.full, "\n");
l.len = 0;
l.pos = 0;
bestlineWriteStr(stdout_fd, "\r\n");
bestlineRefreshLineForce(&l);
break;
case '\r': {
char is_finished = 1;
char needs_strip = 0;
if (historylen) {
free(history[--historylen]);
history[historylen] = 0;
}
l.final = 1;
bestlineEditEnd(&l);
bestlineRefreshLineForce(&l);
l.final = 0;
abAppend(&l.full, l.buf, l.len);
if (pastemode)
is_finished = 0;
if (balancemode)
if (!IsBalanced(&l.full))
is_finished = 0;
if (llamamode)
if (StartsWith(l.full.b, "\"\"\""))
needs_strip = is_finished = l.full.len > 6 && EndsWith(l.full.b, "\"\"\"");
if (is_finished) {
if (needs_strip) {
int len = l.full.len - 6;
*obuf = strndup(l.full.b + 3, len);
abFree(&l.full);
free(l.buf);
return len;
} else {
*obuf = l.full.b;
free(l.buf);
return l.full.len;
}
} else {
l.prompt = "... ";
abAppends(&l.full, "\n");
l.len = 0;
l.pos = 0;
bestlineWriteStr(stdout_fd, "\r\n");
bestlineRefreshLineForce(&l);
}
break;
}
case 033:
if (nread < 2)
break;
switch (seq[1]) {
Case('<', bestlineEditBof(&l));
Case('>', bestlineEditEof(&l));
Case('B', bestlineEditBarf(&l));
Case('S', bestlineEditSlurp(&l));
Case('R', bestlineEditRaise(&l));
Case('y', bestlineEditRotate(&l));
Case('\\', bestlineEditSqueeze(&l));
Case('b', bestlineEditLeftWord(&l));
Case('f', bestlineEditRightWord(&l));
Case('h', bestlineEditRuboutWord(&l));
Case('d', bestlineEditDeleteWord(&l));
Case('l', bestlineEditLowercaseWord(&l));
Case('u', bestlineEditUppercaseWord(&l));
Case('c', bestlineEditCapitalizeWord(&l));
Case('t', bestlineEditTransposeWords(&l));
Case(Ctrl('B'), bestlineEditLeftExpr(&l));
Case(Ctrl('F'), bestlineEditRightExpr(&l));
Case(Ctrl('H'), bestlineEditRuboutWord(&l));
case '[':
if (nread == 6 && !memcmp(seq, "\033[200~", 6)) {
pastemode = 1;
break;
}
if (nread == 6 && !memcmp(seq, "\033[201~", 6)) {
pastemode = 0;
break;
}
if (nread < 3)
break;
if (seq[2] >= '0' && seq[2] <= '9') {
if (nread < 4)
break;
if (seq[3] == '~') {
switch (seq[2]) {
Case('1', bestlineEditHome(&l)); /* \e[1~ */
Case('3', bestlineEditDelete(&l)); /* \e[3~ */
Case('4', bestlineEditEnd(&l)); /* \e[4~ */
default:
break;
}
}
} else {
switch (seq[2]) {
Case('A', bestlineEditUp(&l));
Case('B', bestlineEditDown(&l));
Case('C', bestlineEditRight(&l));
Case('D', bestlineEditLeft(&l));
Case('H', bestlineEditHome(&l));
Case('F', bestlineEditEnd(&l));
default:
break;
}
}
break;
case 'O':
if (nread < 3)
break;
switch (seq[2]) {
Case('A', bestlineEditUp(&l));
Case('B', bestlineEditDown(&l));
Case('C', bestlineEditRight(&l));
Case('D', bestlineEditLeft(&l));
Case('H', bestlineEditHome(&l));
Case('F', bestlineEditEnd(&l));
default:
break;
}
break;
case 033:
if (nread < 3)
break;
switch (seq[2]) {
case '[':
if (nread < 4)
break;
switch (seq[3]) {
Case('C', bestlineEditRightExpr(&l)); /* \e\e[C alt-right */
Case('D', bestlineEditLeftExpr(&l)); /* \e\e[D alt-left */
default:
break;
}
break;
case 'O':
if (nread < 4)
break;
switch (seq[3]) {
Case('C', bestlineEditRightExpr(&l)); /* \e\eOC alt-right */
Case('D', bestlineEditLeftExpr(&l)); /* \e\eOD alt-left */
default:
break;
}
break;
default:
break;
}
break;
default:
break;
}
break;
default:
if (!IsControl(seq[0])) { /* only sees canonical c0 */
if (xlatCallback) {
rune = GetUtf8(seq, nread);
w = EncodeUtf8(xlatCallback(rune.c));
nread = 0;
do {
seq[nread++] = w;
} while ((w >>= 8));
}
bestlineEditInsert(&l, seq, nread);
}
break;
}
}
}
void bestlineFree(void *ptr) {
free(ptr);
}
void bestlineHistoryFree(void) {
size_t i;
for (i = 0; i < BESTLINE_MAX_HISTORY; i++) {
if (history[i]) {
free(history[i]);
history[i] = 0;
}
}
historylen = 0;
}
static void bestlineAtExit(void) {
bestlineDisableRawMode();
bestlineHistoryFree();
bestlineRingFree();
}
int bestlineHistoryAdd(const char *line) {
char *linecopy;
if (!BESTLINE_MAX_HISTORY)
return 0;
if (historylen && !strcmp(history[historylen - 1], line))
return 0;
if (!(linecopy = strdup(line)))
return 0;
if (historylen == BESTLINE_MAX_HISTORY) {
free(history[0]);
memmove(history, history + 1, sizeof(char *) * (BESTLINE_MAX_HISTORY - 1));
historylen--;
}
history[historylen++] = linecopy;
return 1;
}
/**
* Saves line editing history to file.
*
* @return 0 on success, or -1 w/ errno
*/
int bestlineHistorySave(const char *filename) {
FILE *fp;
unsigned j;
mode_t old_umask;
old_umask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
fp = fopen(filename, "w");
umask(old_umask);
if (!fp)
return -1;
chmod(filename, S_IRUSR | S_IWUSR);
for (j = 0; j < historylen; j++) {
fputs(history[j], fp);
fputc('\n', fp);
}
fclose(fp);
return 0;
}
/**
* Loads history from the specified file.
*
* If the file doesn't exist, zero is returned and this will do nothing.
* If the file does exists and the operation succeeded zero is returned
* otherwise on error -1 is returned.
*
* @return 0 on success, or -1 w/ errno
*/
int bestlineHistoryLoad(const char *filename) {
char **h;
int rc, fd, err;
size_t i, j, k, n, t;
char *m, *e, *p, *q, *f, *s;
err = errno, rc = 0;
if (!BESTLINE_MAX_HISTORY)
return 0;
if (!(h = (char **)calloc(2 * BESTLINE_MAX_HISTORY, sizeof(char *))))
return -1;
if ((fd = open(filename, O_RDONLY)) != -1) {
if ((n = GetFdSize(fd))) {
if ((m = (char *)mmap(0, n, PROT_READ, MAP_SHARED, fd, 0)) != MAP_FAILED) {
for (i = 0, e = (p = m) + n; p < e; p = f + 1) {
if (!(q = (char *)memchr(p, '\n', e - p)))
q = e;
for (f = q; q > p; --q) {
if (q[-1] != '\n' && q[-1] != '\r')
break;
}
if (q > p) {
h[i * 2 + 0] = p;
h[i * 2 + 1] = q;
i = (i + 1) % BESTLINE_MAX_HISTORY;
}
}
bestlineHistoryFree();
for (j = 0; j < BESTLINE_MAX_HISTORY; ++j) {
if (h[(k = (i + j) % BESTLINE_MAX_HISTORY) * 2]) {
if ((s = (char *)malloc((t = h[k * 2 + 1] - h[k * 2]) + 1))) {
memcpy(s, h[k * 2], t), s[t] = 0;
history[historylen++] = s;
}
}
}
munmap(m, n);
} else {
rc = -1;
}
}
close(fd);
} else if (errno == ENOENT) {
errno = err;
} else {
rc = -1;
}
free(h);
return rc;
}
/**
* Like bestlineRaw, but with the additional parameter init used as the buffer
* initial value.
*/
char *bestlineRawInit(const char *prompt, const char *init, int infd, int outfd) {
char *buf;
ssize_t rc;
static char once;
struct sigaction sa[3];
if (!once)
atexit(bestlineAtExit), once = 1;
if (enableRawMode(infd) == -1)
return 0;
buf = 0;
gotint = 0;
sigemptyset(&sa->sa_mask);
sa->sa_flags = 0;
sa->sa_handler = bestlineOnInt;
sigaction(SIGINT, sa, sa + 1);
sigaction(SIGQUIT, sa, sa + 2);
bestlineWriteStr(outfd, "\033[?2004h"); // enable bracketed paste mode
rc = bestlineEdit(infd, outfd, prompt, init, &buf);
bestlineWriteStr(outfd, "\033[?2004l"); // disable bracketed paste mode
bestlineDisableRawMode();
sigaction(SIGQUIT, sa + 2, 0);
sigaction(SIGINT, sa + 1, 0);
if (gotint) {
free(buf);
buf = 0;
raise(gotint);
errno = EINTR;
rc = -1;
}
bestlineWriteStr(outfd, "\r\n");
if (rc != -1) {
return buf;
} else {
free(buf);
return 0;
}
}
/**
* Reads line interactively.
*
* This function can be used instead of bestline() in cases where we
* know for certain we're dealing with a terminal, which means we can
* avoid linking any stdio code.
*
* @return chomped allocated string of read line or null on eof/error
*/
char *bestlineRaw(const char *prompt, int infd, int outfd) {
return bestlineRawInit(prompt, "", infd, outfd);
}
/**
* Like bestline, but with the additional parameter init used as the buffer
* initial value. The init parameter is only used if the terminal has basic
* capabilites.
*/
char *bestlineInit(const char *prompt, const char *init) {
if (prompt && *prompt && (strchr(prompt, '\t') || strchr(prompt + 1, '\r'))) {
errno = EINVAL;
return 0;
}
if ((!isatty(fileno(stdin)) || !isatty(fileno(stdout)))) {
if (prompt && *prompt && (IsCharDev(fileno(stdin)) && IsCharDev(fileno(stdout)))) {
fputs(prompt, stdout);
fflush(stdout);
}
return GetLine(stdin, stdout);
} else if (bestlineIsUnsupportedTerm()) {
if (prompt && *prompt) {
fputs(prompt, stdout);
fflush(stdout);
}
return GetLine(stdin, stdout);
} else {
fflush(stdout);
return bestlineRawInit(prompt, init, fileno(stdin), fileno(stdout));
}
}
/**
* Reads line intelligently.
*
* The high level function that is the main API of the bestline library.
* This function checks if the terminal has basic capabilities, just checking
* for a blacklist of inarticulate terminals, and later either calls the line
* editing function or uses dummy fgets() so that you will be able to type
* something even in the most desperate of the conditions.
*
* @param prompt is printed before asking for input if we have a term
* and this may be set to empty or null to disable and prompt may
* contain ansi escape sequences, color, utf8, etc.
* @return chomped allocated string of read line or null on eof/error
*/
char *bestline(const char *prompt) {
return bestlineInit(prompt, "");
}
/**
* Reads line intelligently w/ history, e.g.
*
* // see ~/.foo_history
* main() {
* char *line;
* while ((line = bestlineWithHistory("IN> ", "foo"))) {
* printf("OUT> %s\n", line);
* free(line);
* }
* }
*
* @param prompt is printed before asking for input if we have a term
* and this may be set to empty or null to disable and prompt may
* contain ansi escape sequences, color, utf8, etc.
* @param prog is name of your app, used to generate history filename
* however if it contains a slash / dot then we'll assume prog is
* the history filename which as determined by the caller
* @return chomped allocated string of read line or null on eof/error
*/
char *bestlineWithHistory(const char *prompt, const char *prog) {
char *line;
struct abuf path;
const char *a, *b;
abInit(&path);
if (prog) {
if (strchr(prog, '/') || strchr(prog, '.')) {
abAppends(&path, prog);
} else {
b = "";
if (!(a = getenv("HOME"))) {
if (!(a = getenv("HOMEDRIVE")) || !(b = getenv("HOMEPATH"))) {
a = "";
}
}
if (*a) {
abAppends(&path, a);
abAppends(&path, b);
abAppendw(&path, '/');
}
abAppendw(&path, '.');
abAppends(&path, prog);
abAppends(&path, "_history");
}
}
if (path.len) {
bestlineHistoryLoad(path.b);
}
line = bestline(prompt);
if (path.len && line && *line) {
/* history here is inefficient but helpful when the user has multiple
* repls open at the same time, so history propagates between them */
bestlineHistoryLoad(path.b);
bestlineHistoryAdd(line);
bestlineHistorySave(path.b);
}
abFree(&path);
return line;
}
/**
* Registers tab completion callback.
*/
void bestlineSetCompletionCallback(bestlineCompletionCallback *fn) {
completionCallback = fn;
}
/**
* Registers hints callback.
*
* Register a hits function to be called to show hits to the user at the
* right of the prompt.
*/
void bestlineSetHintsCallback(bestlineHintsCallback *fn) {
hintsCallback = fn;
}
/**
* Sets free hints callback.
*
* This registers a function to free the hints returned by the hints
* callback registered with bestlineSetHintsCallback().
*/
void bestlineSetFreeHintsCallback(bestlineFreeHintsCallback *fn) {
freeHintsCallback = fn;
}
/**
* Sets character translation callback.
*/
void bestlineSetXlatCallback(bestlineXlatCallback *fn) {
xlatCallback = fn;
}
/**
* Adds completion.
*
* This function is used by the callback function registered by the user
* in order to add completion options given the input string when the
* user typed . See the example.c source code for a very easy to
* understand example.
*/
void bestlineAddCompletion(bestlineCompletions *lc, const char *str) {
size_t len;
char *copy, **cvec;
if ((copy = (char *)malloc((len = strlen(str)) + 1))) {
memcpy(copy, str, len + 1);
if ((cvec = (char **)realloc(lc->cvec, (lc->len + 1) * sizeof(*lc->cvec)))) {
lc->cvec = cvec;
lc->cvec[lc->len++] = copy;
} else {
free(copy);
}
}
}
/**
* Frees list of completion option populated by bestlineAddCompletion().
*/
void bestlineFreeCompletions(bestlineCompletions *lc) {
size_t i;
for (i = 0; i < lc->len; i++)
free(lc->cvec[i]);
if (lc->cvec)
free(lc->cvec);
}
/**
* Enables "mask mode".
*
* When it is enabled, instead of the input that the user is typing, the
* terminal will just display a corresponding number of asterisks, like
* "****". This is useful for passwords and other secrets that should
* not be displayed.
*
* @see bestlineMaskModeDisable()
*/
void bestlineMaskModeEnable(void) {
maskmode = 1;
}
/**
* Disables "mask mode".
*
* @see bestlineMaskModeEnable()
*/
void bestlineMaskModeDisable(void) {
maskmode = 0;
}
/**
* Enables or disables "balance mode".
*
* When it is enabled, bestline() will block until parentheses are
* balanced. This is useful for code but not for free text.
*/
void bestlineBalanceMode(char mode) {
balancemode = mode;
}
/**
* Enables or disables "ollama mode".
*
* This enables you to type multiline input by putting triple quotes at
* the beginning and end. For example:
*
* >>> """
* ... second line
* ... third line
* ... """
*
* Would yield the string `"\nsecond line\nthird line\n"`.
*
* @param mode is 1 to enable, or 0 to disable
*/
void bestlineLlamaMode(char mode) {
llamamode = mode;
}
/**
* Enables Emacs mode.
*
* This mode remaps CTRL-C so you can use additional shortcuts, like C-c
* C-s for slurp. By default, CTRL-C raises SIGINT for exiting programs.
*/
void bestlineEmacsMode(char mode) {
emacsmode = mode;
}
/**
* Allows implementation of user functions for read, write, and poll
* with the intention of polling for background I/O.
*/
static int MyRead(int fd, void *c, int n) {
return read(fd, c, n);
}
static int MyWrite(int fd, const void *c, int n) {
return write(fd, c, n);
}
static int MyPoll(int fd, int events, int to) {
struct pollfd p[1];
p[0].fd = fd;
p[0].events = events;
return poll(p, 1, to);
}
void bestlineUserIO(int (*userReadFn)(int, void *, int), int (*userWriteFn)(int, const void *, int),
int (*userPollFn)(int, int, int)) {
if (userReadFn)
_MyRead = userReadFn;
else
_MyRead = MyRead;
if (userWriteFn)
_MyWrite = userWriteFn;
else
_MyWrite = MyWrite;
if (userPollFn)
_MyPoll = userPollFn;
else
_MyPoll = MyPoll;
}
================================================
FILE: llamafile/bestline.h
================================================
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bestlineCompletions {
unsigned long len;
char **cvec;
} bestlineCompletions;
typedef void(bestlineCompletionCallback)(const char *, int,
bestlineCompletions *);
typedef char *(bestlineHintsCallback)(const char *, const char **, const char **);
typedef void(bestlineFreeHintsCallback)(void *);
typedef unsigned(bestlineXlatCallback)(unsigned);
void bestlineSetCompletionCallback(bestlineCompletionCallback *);
void bestlineSetHintsCallback(bestlineHintsCallback *);
void bestlineSetFreeHintsCallback(bestlineFreeHintsCallback *);
void bestlineAddCompletion(bestlineCompletions *, const char *);
void bestlineSetXlatCallback(bestlineXlatCallback *);
char *bestline(const char *);
char *bestlineInit(const char *, const char *);
char *bestlineRaw(const char *, int, int);
char *bestlineRawInit(const char *, const char *, int, int);
char *bestlineWithHistory(const char *, const char *);
int bestlineHistoryAdd(const char *);
int bestlineHistoryLoad(const char *);
int bestlineHistorySave(const char *);
void bestlineBalanceMode(char);
void bestlineEmacsMode(char);
void bestlineClearScreen(int);
void bestlineDisableRawMode(void);
void bestlineFree(void *);
void bestlineFreeCompletions(bestlineCompletions *);
void bestlineHistoryFree(void);
void bestlineLlamaMode(char);
void bestlineMaskModeDisable(void);
void bestlineMaskModeEnable(void);
void bestlineUserIO(int (*)(int, void *, int), int (*)(int, const void *, int),
int (*)(int, int, int));
int bestlineCharacterWidth(int);
char bestlineIsSeparator(unsigned);
char bestlineNotSeparator(unsigned);
char bestlineIsXeparator(unsigned);
unsigned bestlineUppercase(unsigned);
unsigned bestlineLowercase(unsigned);
long bestlineReadCharacter(int, char *, unsigned long);
#ifdef __cplusplus
}
#endif
================================================
FILE: llamafile/build-functions.sh
================================================
#!/bin/bash
# -*- mode:sh;indent-tabs-mode:nil;tab-width:4;coding:utf-8 -*-
# vi: set et ft=sh ts=4 sts=4 sw=4 fenc=utf-8 :vi
#
# Copyright 2024 Mozilla Foundation
# Copyright 2026 Mozilla.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Shared build functions for llamafile GPU backends
#
# This file contains common functions used by cuda.sh and rocm.sh
# to reduce code duplication while keeping each script's toolchain-specific
# configuration clear and readable.
#
# Usage: source this file from a build script, then call the functions
#
# Parse common command-line arguments
# Sets: JOBS, CLEAN, OUTPUT (if --output provided)
# Args: all script arguments ($@)
parse_build_args() {
JOBS=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
CLEAN=0
while [ $# -gt 0 ]; do
case "$1" in
-j*)
JOBS="${1#-j}"
;;
--clean)
CLEAN=1
;;
--output)
shift
OUTPUT="$1"
;;
--output=*)
OUTPUT="${1#--output=}"
;;
--help)
echo "Usage: $0 [-jN] [--clean] [--output PATH]"
echo " -jN Use N parallel jobs (default: auto-detect)"
echo " --clean Clean build directory before building"
echo " --output Output path for shared library"
exit 0
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
shift
done
}
# Extract GGML version from CMakeLists.txt or environment
# Sets: GGML_VERSION, GGML_COMMIT
# Args: $1 = LLAMA_CPP_DIR
get_ggml_version() {
local llama_cpp_dir="$1"
if [ -z "$GGML_VERSION" ]; then
GGML_VERSION_MAJOR=$(grep 'set(GGML_VERSION_MAJOR' "$llama_cpp_dir/ggml/CMakeLists.txt" 2>/dev/null | sed 's/[^0-9]*//g')
GGML_VERSION_MINOR=$(grep 'set(GGML_VERSION_MINOR' "$llama_cpp_dir/ggml/CMakeLists.txt" 2>/dev/null | sed 's/[^0-9]*//g')
GGML_VERSION_PATCH=$(grep 'set(GGML_VERSION_PATCH' "$llama_cpp_dir/ggml/CMakeLists.txt" 2>/dev/null | sed 's/[^0-9]*//g')
GGML_VERSION="${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}"
if ! echo "$GGML_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
echo "Error: Invalid GGML version format: '$GGML_VERSION'"
exit 1
fi
fi
if [ -z "$GGML_COMMIT" ]; then
GGML_COMMIT=$(cd "$llama_cpp_dir/ggml" 2>/dev/null && git rev-parse --short HEAD 2>/dev/null || echo "unknown")
fi
}
# Setup and clean build directory
# Args: $1 = BUILD_DIR, $2 = CLEAN (0 or 1)
setup_build_dir() {
local build_dir="$1"
local clean="$2"
if [ "$clean" = "1" ] && [ -d "$build_dir" ]; then
echo "Cleaning build directory..."
rm -rf "$build_dir"
fi
mkdir -p "$build_dir"
}
# Collect CUDA/HIP source files
# Sets: CUDA_SOURCES, NUM_SOURCES
# Args: $1 = GGML_CUDA_DIR, $2 = extra sources (optional, e.g., tinyblas.cu path)
collect_gpu_sources() {
local ggml_cuda_dir="$1"
local extra_sources="$2"
CUDA_SOURCES="$extra_sources"
for f in "$ggml_cuda_dir"/*.cu "$ggml_cuda_dir/template-instances"/*.cu; do
if [ -f "$f" ]; then
CUDA_SOURCES="$CUDA_SOURCES $f"
fi
done
NUM_SOURCES=$(echo $CUDA_SOURCES | wc -w)
}
# Compile GPU sources in parallel
# Args: $1 = compiler, $2 = arch_flags, $3 = common_flags, $4 = build_dir, $5 = jobs
compile_gpu_sources_parallel() {
local compiler="$1"
local arch_flags="$2"
local common_flags="$3"
local build_dir="$4"
local jobs="$5"
echo "Compiling $NUM_SOURCES files with $jobs parallel jobs..."
echo ""
local count=0
local total=$NUM_SOURCES
for src in $CUDA_SOURCES; do
count=$((count + 1))
local base=$(basename "$src" .cu)
# Create unique name to avoid collisions between main files and template-instances
local obj
if echo "$src" | grep -q "template-instances"; then
obj="$build_dir/ti-${base}.o"
else
obj="$build_dir/${base}.o"
fi
# Skip if object file is newer than source
if [ -f "$obj" ] && [ "$obj" -nt "$src" ]; then
echo "[$count/$total] Skipping: $base.cu (up to date)"
continue
fi
echo "[$count/$total] Compiling: $base.cu"
$compiler -c $arch_flags $common_flags -o "$obj" "$src" &
# Limit parallel jobs by waiting when we hit the limit
local running=$(jobs -r | wc -l)
while [ "$running" -ge "$jobs" ]; do
sleep 0.1
running=$(jobs -r | wc -l)
done
done
echo ""
echo "Waiting for remaining compilations to finish..."
wait
}
# Compile core GGML C/C++ sources
# Args: $1 = LLAMA_CPP_DIR, $2 = BUILD_DIR
compile_ggml_core() {
local llama_cpp_dir="$1"
local build_dir="$2"
local ggml_core_sources="\
$llama_cpp_dir/ggml/src/ggml.c \
$llama_cpp_dir/ggml/src/ggml-alloc.c \
$llama_cpp_dir/ggml/src/ggml-backend.cpp \
$llama_cpp_dir/ggml/src/ggml-quants.c \
$llama_cpp_dir/ggml/src/ggml-threading.cpp"
echo "Compiling core GGML sources..."
local host_flags=(
-fPIC -O2 -DNDEBUG
-DGGML_BUILD=1
-DGGML_SHARED=1
-DGGML_MULTIPLATFORM
"-DGGML_VERSION=\"$GGML_VERSION\""
"-DGGML_COMMIT=\"$GGML_COMMIT\""
-I"$llama_cpp_dir/ggml/include"
-I"$llama_cpp_dir/ggml/src"
)
for src in $ggml_core_sources; do
local base=$(basename "$src")
local ext="${base##*.}"
local name="${base%.*}"
local obj="$build_dir/ggml-core-${name}.o"
# Skip if object file is newer than source
if [ -f "$obj" ] && [ "$obj" -nt "$src" ]; then
echo " Skipping: $base (up to date)"
continue
fi
echo " Compiling: $base"
if [ "$ext" = "c" ]; then
gcc -c "${host_flags[@]}" -o "$obj" "$src"
else
g++ -c "${host_flags[@]}" -std=c++17 -o "$obj" "$src"
fi
done
echo ""
}
# Link object files into shared library
# Args: $1 = linker command, $2 = linker_flags (e.g., "--shared" or "-shared -fPIC")
# $3 = arch_flags, $4 = build_dir, $5 = output, $6 = extra_libs
link_shared_library() {
local linker="$1"
local linker_flags="$2"
local arch_flags="$3"
local build_dir="$4"
local output="$5"
local extra_libs="$6"
echo "Linking..."
local obj_files=$(find "$build_dir" -name "*.o" -type f | tr '\n' ' ')
local num_objs=$(find "$build_dir" -name "*.o" -type f | wc -l)
echo " Linking $num_objs object files..."
$linker $linker_flags $arch_flags -o "$output" $obj_files $extra_libs
}
# Print build summary
# Args: $1 = output file, $2 = start_time, $3 = optional note
print_build_summary() {
local output="$1"
local start_time="$2"
local note="$3"
local end_time=$(date +%s)
echo ""
echo "Total time: $((end_time - start_time)) seconds"
echo ""
echo "Successfully built: $output"
if [ -n "$note" ]; then
echo "$note"
fi
ls -lh "$output"
}
================================================
FILE: llamafile/chatbot.h
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include
#include
#include
#include
#include
#include
#include "chat.h"
#include "chatbot_backend.h"
#define DEFAULT_SYSTEM_PROMPT \
"A chat between a curious human and an artificial intelligence assistant. " \
"The assistant gives helpful, detailed, and polite answers to the " \
"human's questions."
struct bestlineCompletions;
struct common_params;
struct common_sampler;
struct llama_context;
struct llama_model;
struct mtmd_context;
namespace lf {
namespace chatbot {
enum Role {
ROLE_USER,
ROLE_ASSISTANT,
ROLE_SYSTEM,
};
enum SpecialToken {
IMAGE_PLACEHOLDER_TOKEN = -31337,
};
// Result of extracting data URIs from text
struct DataUriExtraction {
std::string modified_text; // text with data URIs replaced by marker
std::vector images; // decoded image data
const char *marker; // marker string used for replacement
};
extern bool g_manual_mode;
extern bool g_said_something;
extern char g_last_printed_char;
extern mtmd_context *g_mtmd; // multimodal context (replaces g_clip)
extern enum Role g_role;
extern common_params *g_params; // pointer to params (replaces gpt_params)
extern common_sampler *g_sampler; // sampler context (new)
extern std::vector g_messages; // chat message history
extern llama_context *g_ctx;
extern llama_model *g_model;
extern std::vector g_history;
extern volatile sig_atomic_t g_got_sigint;
extern bool g_interrupted_exit;
extern common_chat_templates_ptr g_chat_templates;
extern common_chat_parser_params g_chat_syntax;
extern std::string g_pending_file_content; // accumulated /upload content awaiting user message
extern ChatBackend *g_backend; // active inference backend
// Original entry point: loads its own model (--chat mode)
int main(int argc, char **argv);
// API client entry point for combined mode (HTTP client to local server)
int api_main(const std::string &server_url, const std::string &system_prompt,
const std::string &model_path, std::function shutdown_fn);
// CLI mode: single prompt -> response, then exit
int cli_main(int argc, char **argv);
// Backend factories
std::unique_ptr create_direct_backend();
std::unique_ptr create_api_backend(const std::string &server_url);
bool eval_string(std::string_view, bool, bool);
DataUriExtraction extract_data_uris(std::string_view, const char *marker);
bool eval_token(int);
bool eval_tokens(std::vector);
bool handle_command(const char *);
bool is_base_model();
bool out_of_context(int);
char *on_hint(const char *, const char **, const char **);
const char *get_role_color(enum Role);
const char *get_role_name(enum Role);
enum Role cycle_role(enum Role);
enum Role get_next_role(enum Role);
int tokens_used(void);
std::string token_to_piece(const llama_context *, int, bool);
void adjust_stacks(int, int);
void clear_ephemeral(void);
void ensure_newline();
void err(const char *, ...);
void fix_stacks(void);
void logo(char **);
void on_clear(const std::vector &);
void on_completion(const char *, int, bestlineCompletions *);
void on_context(const std::vector &);
void on_dump(const std::vector &);
void on_forget(const std::vector &);
void on_help(const std::vector &);
void on_manual(const std::vector &);
void on_pop(const std::vector &);
void on_push(const std::vector &);
void on_stack(const std::vector &);
void on_undo(const std::vector &);
void on_upload(const std::vector &);
void print(const std::string_view &);
void print_ephemeral(const std::string_view &);
void record_undo(void);
void repl(ChatBackend &backend);
void rewind(int);
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_api.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include "chatbot_backend.h"
#include "color.h"
#include
#include
#include
#include
#include
using json = nlohmann::ordered_json;
namespace lf {
namespace chatbot {
// ApiBackend: HTTP client that talks to a local llamafile server
// via /v1/chat/completions with SSE streaming.
class ApiBackend : public ChatBackend {
public:
explicit ApiBackend(const std::string &server_url)
: server_url_(server_url) {
// Parse host and port from URL
// Expected format: http://host:port
std::string url = server_url;
if (url.substr(0, 7) == "http://")
url = url.substr(7);
auto colon = url.find(':');
if (colon != std::string::npos) {
host_ = url.substr(0, colon);
port_ = std::stoi(url.substr(colon + 1));
} else {
host_ = url;
port_ = 8080;
}
}
// Build the "content" field for a message, converting any embedded
// data URIs into multimodal content parts (image_url).
static json build_content(const std::string &text) {
// Look for data:image/ URIs
static const std::string prefix = "data:image/";
auto pos = text.find(prefix);
if (pos == std::string::npos) {
return text; // plain string, no images
}
// Split text into text parts and image_url parts
json parts = json::array();
size_t last = 0;
while (pos != std::string::npos) {
// Add preceding text
if (pos > last) {
std::string before = text.substr(last, pos - last);
if (!before.empty())
parts.push_back({{"type", "text"}, {"text", before}});
}
// Find end of data URI (terminated by whitespace, quote, or end of string)
size_t end = pos;
while (end < text.size() && text[end] != ' ' && text[end] != '\n' &&
text[end] != '\r' && text[end] != '\t' && text[end] != '"')
end++;
std::string uri = text.substr(pos, end - pos);
parts.push_back({
{"type", "image_url"},
{"image_url", {{"url", uri}}}
});
last = end;
pos = text.find(prefix, last);
}
// Add trailing text
if (last < text.size()) {
std::string after = text.substr(last);
if (!after.empty())
parts.push_back({{"type", "text"}, {"text", after}});
}
return parts;
}
std::string complete(
const std::vector &messages,
TokenCallback on_token) override
{
// Build request JSON
json req_json;
json msgs_json = json::array();
for (const auto &msg : messages) {
msgs_json.push_back({
{"role", msg.role},
{"content", build_content(msg.content)}
});
}
req_json["messages"] = msgs_json;
req_json["stream"] = true;
req_json["stream_options"] = {{"include_usage", true}};
std::string body = req_json.dump();
std::string assistant_content;
std::string sse_buffer;
bool stopped = false;
httplib::Client cli(host_, port_);
cli.set_read_timeout(300); // 5 minutes for long generations
auto result = cli.Post(
"/v1/chat/completions",
httplib::Headers{},
body,
"application/json",
[&](const char *data, size_t len) -> bool {
if (stopped)
return false;
sse_buffer.append(data, len);
// Process complete SSE lines
size_t pos;
while ((pos = sse_buffer.find("\n")) != std::string::npos) {
std::string line = sse_buffer.substr(0, pos);
sse_buffer.erase(0, pos + 1);
// Skip empty lines
if (line.empty() || line == "\r")
continue;
// Strip trailing \r
if (!line.empty() && line.back() == '\r')
line.pop_back();
// Only process "data: " lines
if (line.substr(0, 6) != "data: ")
continue;
std::string payload = line.substr(6);
// Check for stream end
if (payload == "[DONE]")
return true;
// Parse JSON
try {
json chunk = json::parse(payload);
// Extract usage stats from final chunk
if (chunk.contains("usage")) {
auto &usage = chunk["usage"];
if (usage.contains("prompt_tokens"))
last_prompt_tokens_ = usage["prompt_tokens"].get();
if (usage.contains("completion_tokens"))
last_completion_tokens_ = usage["completion_tokens"].get();
if (usage.contains("total_tokens"))
total_tokens_ = usage["total_tokens"].get();
}
if (!chunk.contains("choices") || chunk["choices"].empty())
continue;
auto &choice = chunk["choices"][0];
if (!choice.contains("delta"))
continue;
auto &delta = choice["delta"];
std::string content_delta;
std::string reasoning_delta;
if (delta.contains("content") && !delta["content"].is_null())
content_delta = delta["content"].get();
if (delta.contains("reasoning_content") && !delta["reasoning_content"].is_null())
reasoning_delta = delta["reasoning_content"].get();
if (!content_delta.empty())
assistant_content += content_delta;
if (!content_delta.empty() || !reasoning_delta.empty()) {
if (!on_token(content_delta, reasoning_delta)) {
stopped = true;
return false; // close connection to cancel
}
}
} catch (const json::exception &) {
// Skip malformed JSON chunks
continue;
}
}
return true;
});
if (!result) {
err("error: HTTP request failed: %s", httplib::to_string(result.error()).c_str());
} else if (result->status != 200) {
err("error: server returned HTTP %d", result->status);
}
return assistant_content;
}
int context_used() override {
return total_tokens_;
}
int context_max() override {
if (context_max_ <= 0)
fetch_context_max();
return context_max_;
}
void print_stats() override {
printf("prompt tokens: %d\n"
"completion tokens: %d\n"
"total tokens: %d\n",
last_prompt_tokens_, last_completion_tokens_, total_tokens_);
}
void on_clear() override {
// Message list is managed by the REPL (g_messages)
// Just reset our token counters
last_prompt_tokens_ = 0;
last_completion_tokens_ = 0;
total_tokens_ = 0;
}
void on_push() override {
message_stack_.push_back(g_messages);
printf(FAINT "conversation state pushed (depth: %zu)" RESET "\n",
message_stack_.size());
}
void on_pop() override {
if (message_stack_.empty()) {
err("error: conversation stack is empty");
return;
}
g_messages = message_stack_.back();
message_stack_.pop_back();
printf(FAINT "conversation state restored (depth: %zu)" RESET "\n",
message_stack_.size());
}
void on_undo() override {
// Remove last assistant + user message pair
while (!g_messages.empty() && g_messages.back().role == "assistant")
g_messages.pop_back();
if (!g_messages.empty() && g_messages.back().role == "user")
g_messages.pop_back();
printf(FAINT "last exchange undone (%zu messages remaining)" RESET "\n",
g_messages.size());
}
void on_forget(int n) override {
// Remove the oldest non-system message pair to free context
auto it = g_messages.begin();
while (it != g_messages.end() && it->role == "system")
++it;
if (it == g_messages.end()) {
err("error: nothing left to forget");
return;
}
// Remove one user+assistant exchange
auto start = it;
++it; // skip user
if (it != g_messages.end() && it->role == "assistant")
++it; // skip assistant
g_messages.erase(start, it);
printf(FAINT "oldest exchange forgotten (%zu messages remaining)" RESET "\n",
g_messages.size());
}
private:
std::string server_url_;
std::string host_;
int port_;
// Token usage tracking (from SSE usage stats)
int last_prompt_tokens_ = 0;
int last_completion_tokens_ = 0;
int total_tokens_ = 0;
// Cached context size from server /props endpoint
int context_max_ = 0;
void fetch_context_max() {
httplib::Client cli(host_, port_);
cli.set_read_timeout(5);
auto result = cli.Get("/props");
if (result && result->status == 200) {
try {
json props = json::parse(result->body);
if (props.contains("default_generation_settings")) {
auto &settings = props["default_generation_settings"];
if (settings.contains("n_ctx"))
context_max_ = settings["n_ctx"].get();
}
} catch (const json::exception &) {
}
}
}
// Message stack for /push and /pop
std::vector> message_stack_;
};
// Factory function
std::unique_ptr create_api_backend(const std::string &server_url) {
return std::make_unique(server_url);
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_backend.h
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include
#include
#include
#include "chat.h"
namespace lf {
namespace chatbot {
// Callback for streaming tokens. Return false to stop generation.
using TokenCallback = std::function;
// Abstract inference backend for the chatbot REPL.
// DirectBackend: wraps llama_decode (used by --chat mode)
// ApiBackend: HTTP client to /v1/chat/completions (used by combined mode)
class ChatBackend {
public:
virtual ~ChatBackend() = default;
// Send messages and stream the response.
// Calls on_token for each streamed chunk.
// Returns the full assistant content (no reasoning).
virtual std::string complete(
const std::vector &messages,
TokenCallback on_token) = 0;
// Context info
virtual int context_used() = 0;
virtual int context_max() = 0;
// Stats
virtual void print_stats() = 0;
// History management
virtual void on_clear() = 0;
virtual void on_push() = 0;
virtual void on_pop() = 0;
virtual void on_undo() = 0;
virtual void on_forget(int n) = 0;
// Whether this backend supports token-level dump
virtual bool supports_dump() { return false; }
virtual void on_dump(int fd) {}
// Whether this backend supports manual mode (role cycling)
virtual bool supports_manual_mode() { return false; }
};
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_cli.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// CLI mode: single prompt → response, then exit
//
// This mode is designed for programmatic use:
// - No logo, no streaming decorations
// - Uses chat completions (applies chat template)
// - Clean output suitable for piping
// - Exits after response completes
//
// Usage: llamafile -m model.gguf --cli -p "Your prompt here"
// llamafile -m model.gguf --cli --nothink -p "Your prompt here"
// llamafile -m model.gguf --cli --mmproj mmproj.gguf --image photo.jpg -p "Describe this image"
//
#include "chatbot.h"
#include
#include
#include
#include
#include
#include
#include
#include "arg.h"
#include "chat.h"
#include "common.h"
#include "llama.h"
#include "log.h"
#include "mtmd.h"
#include "mtmd-helper.h"
#include "sampling.h"
#include "llamafile.h"
namespace lf {
namespace chatbot {
// Forward declarations from chatbot_repl.cpp
extern void on_sigint(int sig);
// Result of applying chat template - includes prompt and parser params for output parsing
struct cli_chat_template_result {
std::string prompt;
common_chat_parser_params parser_params;
};
// Helper to apply chat template with full control over inputs
static cli_chat_template_result cli_apply_chat_template_full(llama_model *model,
common_chat_templates *templates,
const common_params ¶ms,
const std::vector &messages,
bool add_assistant,
bool enable_thinking) {
cli_chat_template_result result;
if (templates) {
common_chat_templates_inputs inputs;
inputs.messages = messages;
inputs.use_jinja = true;
inputs.add_generation_prompt = add_assistant;
inputs.enable_thinking = enable_thinking;
// Set reasoning_format so the PEG parser includes reasoning extraction
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
auto chat_params = common_chat_templates_apply(templates, inputs);
result.prompt = chat_params.prompt;
// Initialize parser params from chat_params
result.parser_params.format = chat_params.format;
result.parser_params.thinking_forced_open = chat_params.thinking_forced_open;
if (!chat_params.parser.empty()) {
result.parser_params.parser.load(chat_params.parser);
}
// Enable reasoning parsing for thinking models
result.parser_params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
result.parser_params.reasoning_in_content = false;
return result;
}
// Fallback to heuristic-based template (doesn't support enable_thinking)
const char *tmpl = params.chat_template.empty()
? llama_model_chat_template(model, nullptr)
: params.chat_template.c_str();
// Build llama_chat_message array from messages
// Note: c_str() pointers remain valid because messages vector is not modified
// until after llama_chat_apply_template() completes
std::vector chat;
for (const auto &msg : messages) {
chat.push_back({msg.role.c_str(), msg.content.c_str()});
}
int len = llama_chat_apply_template(tmpl, chat.data(), chat.size(), add_assistant, nullptr, 0);
if (len < 0) {
return result;
}
result.prompt.resize(len);
llama_chat_apply_template(tmpl, chat.data(), chat.size(), add_assistant, &result.prompt[0], result.prompt.size());
// For fallback, parser_params will be default (COMMON_CHAT_FORMAT_CONTENT_ONLY)
return result;
}
static void cleanup(mtmd_context *mtmd_ctx, common_sampler *sampler,
llama_context *ctx, llama_model *model) {
if (mtmd_ctx) mtmd_free(mtmd_ctx);
if (sampler) common_sampler_free(sampler);
if (ctx) llama_free(ctx);
if (model) llama_model_free(model);
}
int cli_main(int argc, char **argv) {
signal(SIGPIPE, SIG_IGN);
// Parse flags quietly (no logo, no ephemeral messages)
common_params params;
params.sampling.n_prev = 64;
params.n_batch = 256;
params.sampling.temp = 0; // deterministic by default
// Note: FLAG_nothink, FLAG_verbose, FLAG_nologo are set by main.cpp
// before calling cli_main(). GPU is also initialized there;
// Fully disable common_log system BEFORE common_init() to prevent build info log
// This pauses the log worker thread so LOG_INF calls become no-ops
common_log_pause(common_log_main());
// Set llama log callback to null
llama_log_set((ggml_log_callback)llamafile_log_callback_null, NULL);
// Initialize backend and common
llama_backend_init();
common_init();
// Parse arguments (argv is already filtered by parse_llamafile_args in args.cpp)
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_CLI)) {
fprintf(stderr, "error: failed to parse arguments\n");
return 1;
}
// Check that a prompt was provided
if (params.prompt.empty()) {
fprintf(stderr, "error: --cli mode requires -p \"prompt\"\n");
return 1;
}
// GPU layers default
if (llamafile_has_metal() && params.n_gpu_layers < 0) {
params.n_gpu_layers = INT_MAX;
}
// Load model
llama_model_params model_params = common_model_params_to_llama(params);
llama_model *model = llama_model_load_from_file(params.model.path.c_str(), model_params);
if (!model) {
fprintf(stderr, "error: failed to load model: %s\n", params.model.path.c_str());
return 2;
}
// Adjust context size
if (params.n_ctx <= 0 || params.n_ctx > (int)llama_model_n_ctx_train(model))
params.n_ctx = llama_model_n_ctx_train(model);
if (params.n_ctx < params.n_batch)
params.n_batch = params.n_ctx;
// Create context
llama_context_params ctx_params = common_context_params_to_llama(params);
llama_context *ctx = llama_init_from_model(model, ctx_params);
if (!ctx) {
fprintf(stderr, "error: failed to create context\n");
cleanup(nullptr, nullptr, nullptr, model);
return 3;
}
// Initialize sampler
common_sampler *sampler = common_sampler_init(model, params.sampling);
if (!sampler) {
fprintf(stderr, "error: failed to initialize sampler\n");
cleanup(nullptr, nullptr, ctx, model);
return 4;
}
// Initialize multimodal context and load images if provided
mtmd_context *mtmd_ctx = nullptr;
mtmd::bitmaps bitmaps;
bool has_images = !params.image.empty();
if (has_images) {
if (params.mmproj.path.empty()) {
fprintf(stderr, "error: --image requires --mmproj to specify a vision model\n");
cleanup(nullptr, sampler, ctx, model);
return 5;
}
// Initialize vision model
mtmd_context_params mparams = mtmd_context_params_default();
mparams.use_gpu = params.mmproj_use_gpu;
mparams.n_threads = params.cpuparams.n_threads;
mparams.print_timings = false;
mparams.flash_attn_type = params.flash_attn_type;
mparams.warmup = params.warmup;
mparams.image_min_tokens = params.image_min_tokens;
mparams.image_max_tokens = params.image_max_tokens;
mtmd_helper_log_set((ggml_log_callback)llamafile_log_callback_null, NULL);
mtmd_ctx = mtmd_init_from_file(params.mmproj.path.c_str(), model, mparams);
if (!mtmd_ctx) {
fprintf(stderr, "error: failed to load vision model: %s\n",
params.mmproj.path.c_str());
cleanup(nullptr, sampler, ctx, model);
return 5;
}
// Load image bitmaps
for (const auto &image_path : params.image) {
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_file(mtmd_ctx, image_path.c_str()));
if (!bmp.ptr) {
fprintf(stderr, "error: failed to load image: %s\n", image_path.c_str());
cleanup(mtmd_ctx, sampler, ctx, model);
return 5;
}
bitmaps.entries.push_back(std::move(bmp));
}
} else if (!params.mmproj.path.empty()) {
LOG_INF("--mmproj specified without --image, vision model will not be loaded\n");
}
// Initialize chat templates
common_chat_templates_ptr chat_templates;
bool is_chat_model = llama_model_meta_val_str(model, "tokenizer.chat_template", 0, 0) != -1
|| !params.chat_template.empty();
if (is_chat_model) {
chat_templates = common_chat_templates_init(model, params.chat_template);
}
// Build the prompt
// If images are provided, prepend image markers to the prompt
std::string user_prompt = params.prompt;
if (has_images && user_prompt.find(mtmd_default_marker()) == std::string::npos) {
std::string markers;
for (size_t i = 0; i < params.image.size(); i++) {
markers += mtmd_default_marker();
}
user_prompt = markers + user_prompt;
}
std::string formatted_prompt;
common_chat_parser_params parser_params; // For parsing output
bool enable_thinking = false;
const llama_vocab *vocab = llama_model_get_vocab(model);
if (is_chat_model) {
// Build message list
std::vector messages;
if (!params.system_prompt.empty()) {
common_chat_msg sys_msg;
sys_msg.role = "system";
sys_msg.content = params.system_prompt;
messages.push_back(sys_msg);
}
common_chat_msg user_msg;
user_msg.role = "user";
user_msg.content = user_prompt;
messages.push_back(user_msg);
// Apply chat template with enable_thinking based on --nothink flag
// When --nothink is set, we tell the template to disable thinking mode
// so the model won't produce ... output at all
enable_thinking = !FLAG_nothink;
auto template_result = cli_apply_chat_template_full(model, chat_templates.get(), params,
messages, true, enable_thinking);
formatted_prompt = template_result.prompt;
parser_params = template_result.parser_params;
} else {
// Base model: use prompt as-is
formatted_prompt = user_prompt;
}
// Tokenize and evaluate prompt
llama_pos n_past = 0;
if (has_images) {
// Use mtmd pipeline for multimodal prompt evaluation
mtmd_input_text text;
text.text = formatted_prompt.c_str();
text.add_special = true;
text.parse_special = true;
mtmd::input_chunks chunks(mtmd_input_chunks_init());
auto bitmaps_c_ptr = bitmaps.c_ptr();
int32_t res = mtmd_tokenize(mtmd_ctx, chunks.ptr.get(), &text,
bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
if (res != 0) {
if (res == 1)
fprintf(stderr, "error: number of images doesn't match number of markers in prompt\n");
else if (res == 2)
fprintf(stderr, "error: image preprocessing failed\n");
else
fprintf(stderr, "error: failed to tokenize prompt with images (error %d)\n", res);
cleanup(mtmd_ctx, sampler, ctx, model);
return 6;
}
// Check context using n_tokens (actual KV cache entries needed)
size_t total_tokens = mtmd_helper_get_n_tokens(chunks.ptr.get());
if ((int)total_tokens > params.n_ctx) {
size_t text_tokens = 0, image_tokens = 0;
for (size_t i = 0; i < mtmd_input_chunks_size(chunks.ptr.get()); i++) {
auto chunk = mtmd_input_chunks_get(chunks.ptr.get(), i);
if (mtmd_input_chunk_get_type(chunk) == MTMD_INPUT_CHUNK_TYPE_TEXT)
text_tokens += mtmd_input_chunk_get_n_tokens(chunk);
else
image_tokens += mtmd_input_chunk_get_n_tokens(chunk);
}
fprintf(stderr, "error: prompt too long (%zu tokens, context is %d)\n"
" text: %zu tokens, image: %zu tokens\n"
" hint: use --image-max-tokens to reduce image token count\n",
total_tokens, params.n_ctx, text_tokens, image_tokens);
cleanup(mtmd_ctx, sampler, ctx, model);
return 5;
}
llama_pos new_n_past = 0;
if (mtmd_helper_eval_chunks(mtmd_ctx, ctx, chunks.ptr.get(),
0, 0, params.n_batch, true, &new_n_past)) {
fprintf(stderr, "error: failed to evaluate prompt with images\n");
cleanup(mtmd_ctx, sampler, ctx, model);
return 6;
}
n_past = new_n_past;
} else {
// Plain text tokenization
std::vector tokens = llamafile_tokenize(model, formatted_prompt, false, true);
// Add BOS if needed
if (llama_vocab_get_add_bos(vocab)) {
tokens.insert(tokens.begin(), llama_vocab_bos(vocab));
}
// Check context
if ((int)tokens.size() > params.n_ctx) {
fprintf(stderr, "error: prompt too long (%zu tokens, context is %d)\n",
tokens.size(), params.n_ctx);
cleanup(mtmd_ctx, sampler, ctx, model);
return 5;
}
// Evaluate prompt
for (int i = 0; i < (int)tokens.size(); i += params.n_batch) {
int n_eval = std::min(params.n_batch, (int)tokens.size() - i);
if (llama_decode(ctx, llama_batch_get_one(&tokens[i], n_eval))) {
fprintf(stderr, "error: failed to evaluate prompt\n");
cleanup(mtmd_ctx, sampler, ctx, model);
return 6;
}
}
n_past = tokens.size();
}
// Install signal handler for graceful interrupt
struct sigaction sa, old_sa;
sa.sa_handler = on_sigint;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, &old_sa);
// Generate response
// When thinking is enabled, we parse the output to show ... and content.
int n_cur = n_past;
const bool use_chat_parser = enable_thinking &&
parser_params.format != COMMON_CHAT_FORMAT_CONTENT_ONLY;
std::string raw_output; // Accumulates raw token output for parsing
common_chat_msg prev_msg; // Previous parse result for diff computation
bool think_tag_opened = false; // Track if we've printed
bool think_tag_closed = false; // Track if we've printed
while (n_cur < params.n_ctx) {
if (g_got_sigint) {
g_got_sigint = 0;
break;
}
llama_token id = common_sampler_sample(sampler, ctx, -1);
common_sampler_accept(sampler, id, true);
// Check for end of generation
if (llama_vocab_is_eog(vocab, id)) {
break;
}
if (use_chat_parser) {
// Accumulate tokens and parse to extract content
std::string token_str = llamafile_token_to_piece(ctx, id, true);
raw_output += token_str;
// Parse incrementally
auto msg = common_chat_parse(raw_output, /*is_partial=*/true, parser_params);
// Compute diffs to find new content
auto diffs = common_chat_msg_diff::compute_diffs(prev_msg, msg);
for (const auto &diff : diffs) {
// Output reasoning content wrapped in tags
if (!diff.reasoning_content_delta.empty()) {
if (!think_tag_opened) {
fputs("", stdout);
think_tag_opened = true;
}
fputs(diff.reasoning_content_delta.c_str(), stdout);
fflush(stdout);
}
// Output final content (close think tag first if needed)
if (!diff.content_delta.empty()) {
if (think_tag_opened && !think_tag_closed) {
fputs("\n", stdout);
think_tag_closed = true;
}
fputs(diff.content_delta.c_str(), stdout);
fflush(stdout);
}
}
prev_msg = msg;
} else {
// No parsing needed - output token directly
std::string piece = llamafile_token_to_piece(ctx, id, false);
fputs(piece.c_str(), stdout);
fflush(stdout);
}
// Evaluate token
if (llama_decode(ctx, llama_batch_get_one(&id, 1))) {
break;
}
n_cur++;
}
// Ensure output ends with newline
printf("\n");
// Restore signal handler
sigaction(SIGINT, &old_sa, nullptr);
// Cleanup
cleanup(mtmd_ctx, sampler, ctx, model);
llama_backend_free();
return 0;
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_comm.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include "chatbot_backend.h"
#include
#include
#include
#include "llama.h"
#include "llamafile.h"
#include "sampling.h" // llama.cpp common/sampling.h
namespace lf {
namespace chatbot {
// handle irc style commands like: `/arg0 arg1 arg2`
bool handle_command(const char *command) {
if (!strcmp(command, "/?")) {
const std::vector args = {"?"};
on_help(args);
return true;
}
if (!(command[0] == '/' && std::isalpha(command[1])))
return false;
std::vector args;
std::istringstream iss(command + 1);
std::string arg;
while (iss >> arg)
args.push_back(arg);
if (args[0] == "exit" || args[0] == "bye") {
exit(0);
} else if (args[0] == "help") {
on_help(args);
} else if (args[0] == "stats") {
g_backend->print_stats();
} else if (args[0] == "context") {
int used = g_backend->context_used();
int max = g_backend->context_max();
printf("%d out of %d context tokens used (%d tokens remaining)\n",
used, max, max - used);
} else if (args[0] == "manual") {
if (!g_backend->supports_manual_mode()) {
err("manual mode not available in this mode — use --chat for direct model access");
} else {
on_manual(args);
}
} else if (args[0] == "clear") {
g_backend->on_clear();
} else if (args[0] == "dump") {
if (!g_backend->supports_dump()) {
err("dump not available in this mode — use --chat for direct model access");
} else {
on_dump(args);
}
} else if (args[0] == "push") {
g_backend->on_push();
} else if (args[0] == "pop") {
g_backend->on_pop();
} else if (args[0] == "undo") {
g_backend->on_undo();
} else if (args[0] == "forget") {
g_backend->on_forget(1);
} else if (args[0] == "stack") {
on_stack(args);
} else if (args[0] == "upload") {
on_upload(args);
} else {
err("%s: unrecognized command", args[0].c_str());
}
return true;
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_comp.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include
#include
#include
#include "bestline.h"
namespace lf {
namespace chatbot {
static bool starts_with(const char *str, const char *prefix) {
return strncmp(str, prefix, strlen(prefix)) == 0;
}
static bool is_directory(const char *path) {
struct stat st;
return !stat(path, &st) && S_ISDIR(st.st_mode);
}
void on_completion(const char *line, int pos, bestlineCompletions *comp) {
if (starts_with(line, "/upload ")) {
std::string pattern(line + strlen("/upload "));
pattern += '*';
glob_t gl;
if (!glob(pattern.c_str(), GLOB_TILDE, 0, &gl)) {
for (size_t i = 0; i < gl.gl_pathc; ++i) {
std::string completion = "/upload ";
completion += gl.gl_pathv[i];
if (is_directory(gl.gl_pathv[i]))
completion += '/';
bestlineAddCompletion(comp, completion.c_str());
}
globfree(&gl);
}
} else {
static const char *const kCompletions[] = {
"/clear", // usage: /clear
"/context", // usage: /context
"/dump", // usage: /dump [FILE]
"/exit", // usage: /exit
"/forget", // usage: /forget
"/help", // usage: /help [COMMAND]
"/manual", // usage: /manual [on|off]
"/pop", // usage: /pop
"/push", // usage: /push
"/stack", // usage: /stack
"/stats", // usage: /stats
"/undo", // usage: /undo
"/upload", // usage: /upload FILE
};
for (int i = 0; i < sizeof(kCompletions) / sizeof(*kCompletions); ++i)
if (starts_with(kCompletions[i], line))
bestlineAddCompletion(comp, kCompletions[i]);
}
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_direct.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include "chatbot_backend.h"
#include "chat.h"
#include "common.h"
#include "llama.h"
#include "llamafile.h"
#include "sampling.h"
namespace lf {
namespace chatbot {
// DirectBackend: wraps llama_decode for --chat mode (standalone TUI).
// Uses the existing global state (g_ctx, g_model, g_sampler, etc.)
// for inference and KV cache management.
class DirectBackend : public ChatBackend {
public:
std::string complete(
const std::vector &messages,
TokenCallback on_token) override
{
std::string assistant_content;
const llama_vocab *vocab = llama_model_get_vocab(g_model);
// Check if we should use chat parsing (for think mode models)
const bool use_chat_parser =
g_chat_syntax.format != COMMON_CHAT_FORMAT_CONTENT_ONLY;
std::string raw_output;
common_chat_msg prev_msg;
for (;;) {
if (g_got_sigint) {
eval_token(llamafile_token_eot(g_model));
break;
}
llama_token id = common_sampler_sample(g_sampler, g_ctx, -1);
common_sampler_accept(g_sampler, id, true);
if (!eval_token(id))
break;
if (llama_vocab_is_eog(vocab, id))
break;
if (use_chat_parser) {
std::string token_str = token_to_piece(g_ctx, id, true);
raw_output += token_str;
auto msg = common_chat_parse(raw_output, true, g_chat_syntax);
auto diffs = common_chat_msg_diff::compute_diffs(prev_msg, msg);
for (const auto &diff : diffs) {
std::string content_delta = diff.content_delta;
std::string reasoning_delta = diff.reasoning_content_delta;
if (!content_delta.empty())
assistant_content += content_delta;
if (!content_delta.empty() || !reasoning_delta.empty()) {
if (!on_token(content_delta, reasoning_delta))
goto done;
}
}
prev_msg = msg;
} else {
std::string token_str = token_to_piece(g_ctx, id, g_params->special);
assistant_content += token_str;
if (!on_token(token_str, ""))
goto done;
}
}
done:
return assistant_content;
}
int context_used() override {
return tokens_used();
}
int context_max() override {
return llama_n_ctx(g_ctx);
}
void print_stats() override {
FLAG_log_disable = false;
common_perf_print(g_ctx, g_sampler);
FLAG_log_disable = true;
}
void on_clear() override {
lf::chatbot::on_clear({});
}
void on_push() override {
lf::chatbot::on_push({});
}
void on_pop() override {
lf::chatbot::on_pop({});
}
void on_undo() override {
lf::chatbot::on_undo({});
}
void on_forget(int n) override {
lf::chatbot::on_forget({});
}
bool supports_dump() override { return true; }
void on_dump(int fd) override {
std::vector args = {"dump"};
lf::chatbot::on_dump(args);
}
bool supports_manual_mode() override { return true; }
};
// Factory function
std::unique_ptr create_direct_backend() {
return std::make_unique();
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_eval.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include "base64.hpp"
#include "common.h"
#include "llama.h"
#include "mtmd.h"
#include "mtmd-helper.h"
#include "datauri.h"
#include "image.h"
#include "llama.h" // llamafile wrapper
#include "string.h"
#include
#include
#include
namespace lf {
namespace chatbot {
bool eval_tokens(std::vector tokens) {
int N = (int)tokens.size();
if (tokens_used() + N > llama_n_ctx(g_ctx))
return out_of_context(N);
for (int i = 0; i < N; i += g_params->n_batch) {
if (g_got_sigint) {
g_got_sigint = false;
clear_ephemeral();
return false;
}
if (N > g_params->n_batch)
print_ephemeral(format("loading prompt %d%%...", (int)((double)i / N * 100)));
int n_eval = (int)tokens.size() - i;
if (n_eval > g_params->n_batch)
n_eval = g_params->n_batch;
if (llama_decode(g_ctx, llama_batch_get_one(&tokens[i], n_eval)))
return out_of_context(n_eval);
g_history.insert(g_history.end(), tokens.begin() + i, tokens.begin() + i + n_eval);
}
clear_ephemeral();
// this function is what computes /stats. we need to call it now
// since llama_decode() kicks the can down the road to functions
// like common_sampler_sample(). that is bad because the chatbot
// returns control to the repl rather than sampling when loading
// system and image prompts.
llama_synchronize(g_ctx);
return true;
}
bool eval_token(int id) {
return eval_tokens({id});
}
bool eval_plain_text(const std::string &str, bool add_special, bool parse_special) {
return eval_tokens(llamafile_tokenize(g_model, str, add_special, parse_special));
}
// Helper to evaluate chunks from mtmd_tokenize and update g_history.
// Uses mtmd_helper_eval_chunk_single() for consistency with llama.cpp server.
// Tracks n_past explicitly to handle M-RoPE models where n_pos != n_tokens.
static bool eval_mtmd_chunks(mtmd_input_chunks *chunks) {
size_t n_chunks = mtmd_input_chunks_size(chunks);
// Check context using n_pos (not n_tokens) for M-RoPE compatibility
llama_pos total_pos = mtmd_helper_get_n_pos(chunks);
if (tokens_used() + total_pos > llama_n_ctx(g_ctx))
return out_of_context(total_pos);
// Track position explicitly across chunks (like llama.cpp server)
llama_pos n_past = tokens_used();
// Evaluate each chunk using the same helper as llama.cpp server
for (size_t i = 0; i < n_chunks; i++) {
if (g_got_sigint) {
g_got_sigint = false;
clear_ephemeral();
return false;
}
const mtmd_input_chunk *chunk = mtmd_input_chunks_get(chunks, i);
auto chunk_type = mtmd_input_chunk_get_type(chunk);
// Show progress for large prompts or image processing
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
size_t n_tokens = mtmd_input_chunk_get_n_tokens(chunk);
if ((int)n_tokens > g_params->n_batch)
print_ephemeral("loading prompt...");
} else {
print_ephemeral("processing image...");
}
// Use the same helper function as llama.cpp server
llama_pos new_n_past = n_past;
int32_t ret = mtmd_helper_eval_chunk_single(
g_mtmd, g_ctx, chunk,
n_past,
0, // seq_id
g_params->n_batch,
true, // logits_last
&new_n_past);
if (ret != 0) {
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT)
err("failed to evaluate text chunk");
else
err("failed to evaluate image chunk");
return false;
}
// Update history for context tracking
// Use n_pos (not n_tokens) for M-RoPE model compatibility
llama_pos n_pos = mtmd_input_chunk_get_n_pos(chunk);
if (chunk_type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
// Add actual tokens to history
size_t n_text_tokens;
const llama_token *tokens = mtmd_input_chunk_get_tokens_text(chunk, &n_text_tokens);
g_history.insert(g_history.end(), tokens, tokens + n_text_tokens);
} else {
// Add placeholder tokens for image/audio (use n_pos for M-RoPE)
for (llama_pos j = 0; j < n_pos; j++) {
g_history.push_back(IMAGE_PLACEHOLDER_TOKEN);
}
}
// Update position for next chunk
n_past = new_n_past;
}
clear_ephemeral();
llama_synchronize(g_ctx);
return true;
}
// Evaluate a string that may contain embedded data URIs for images.
// Images are processed using the mtmd API which requires tokenizing
// text and images together.
bool eval_string(std::string_view s, bool add_special, bool parse_special) {
// Extract data URIs from the input
DataUriExtraction extraction = extract_data_uris(s, mtmd_default_marker());
// If no images found, just evaluate as plain text
if (extraction.images.empty()) {
return eval_plain_text(std::string(s), add_special, parse_special);
}
// We have images - check if we have multimodal support
if (!g_mtmd) {
err("multimodal model not loaded (use --mmproj to specify vision model)");
return false;
}
// Create bitmaps from decoded image data
mtmd::bitmaps bitmaps;
for (const auto &image : extraction.images) {
mtmd::bitmap bmp(mtmd_helper_bitmap_init_from_buf(
g_mtmd, (const unsigned char *)image.data(), image.size()));
if (!bmp.ptr) {
err("failed to load image");
return false;
}
bitmaps.entries.push_back(std::move(bmp));
}
// Use mtmd_tokenize to process text with images
mtmd_input_text text;
text.text = extraction.modified_text.c_str();
text.add_special = add_special;
text.parse_special = parse_special;
mtmd::input_chunks chunks(mtmd_input_chunks_init());
auto bitmaps_c_ptr = bitmaps.c_ptr();
int32_t res = mtmd_tokenize(g_mtmd, chunks.ptr.get(), &text,
bitmaps_c_ptr.data(), bitmaps_c_ptr.size());
if (res != 0) {
if (res == 1)
err("number of images doesn't match number of markers in prompt");
else if (res == 2)
err("image preprocessing error");
else
err("failed to tokenize prompt with images (error %d)", res);
return false;
}
// Evaluate the chunks
return eval_mtmd_chunks(chunks.ptr.get());
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_file.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include
#include
#include "common.h"
#include "llama.h"
#include "color.h"
#include "image.h"
#include "llama.h" // llamafile wrapper
#include "string.h"
namespace lf {
namespace chatbot {
static bool has_binary(const std::string_view s) {
return s.find('\0') != std::string_view::npos;
}
void on_upload(const std::vector &args) {
if (args.size() < 2) {
err("error: missing file path" RESET "\n"
"usage: /upload PATH");
return;
}
if (args.size() > 2) {
err("error: too many arguments" RESET "\n"
"usage: /upload PATH");
return;
}
const char *path = args[1].c_str();
struct stat st;
if (stat(path, &st) || !S_ISREG(st.st_mode)) {
err("%s: file does not exist", path);
return;
}
std::string content;
if (!slurp(&content, path)) {
err("%s: failed to slurp file", path);
return;
}
std::string markdown;
markdown += "- **Filename**: `";
markdown += path;
markdown += "`\n- **Last modified**: ";
markdown += iso8601(st.st_mtim);
markdown += "\n\n";
if (is_image(content)) {
// In direct mode, need multimodal context loaded locally.
// In API mode (g_model==null), the server handles multimodal.
if (g_model && !g_mtmd) {
err("%s: need --mmproj model to process images", path);
return;
}
print_image(1, content, 80);
convert_image_to_uri(&markdown, content);
} else {
if (has_binary(content)) {
err("%s: binary file type not supported", path);
return;
}
markdown += "``````";
markdown += extname(path);
markdown += '\n';
markdown += content;
if (markdown.back() != '\n')
markdown += '\n';
markdown += "``````";
}
// Store content for inclusion with next user message.
// This avoids template validation errors in models like Qwen3.5 that
// require user messages to be present when applying the template.
if (!g_pending_file_content.empty()) {
g_pending_file_content += "\n\n";
}
g_pending_file_content += markdown;
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_help.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include "color.h"
namespace lf {
namespace chatbot {
void on_help(const std::vector &args) {
if (args.size() == 1) {
fprintf(stderr, "\
" BOLD "available commands" RESET "\n\
ctrl-j insert line in multi-line mode\n\
\"\"\" use triple quotes for multi-line input\n\
/clear restart conversation\n\
/context print context window usage\n\
/dump [FILE] print or save context window to file\n\
/exit end program\n\
/forget erase oldest message from context\n\
/help [COMMAND] show help\n\
/manual [on|off] toggle manual role mode\n\
/pop restore context window size\n\
/push push context window size to stack\n\
/stack prints context window stack\n\
/stats print performance metrics\n\
/undo erases last message in conversation\n\
/upload FILE share image or text file with assistant\n\
");
} else if (args[1] == "context") {
fprintf(stderr, "\
usage: /context" RESET "\n\
prints information about context window usage. this helps you know how\n\
soon you're going to run out of tokens for the current conversation.\n\
");
} else if (args[1] == "dump") {
fprintf(stderr, "\
" BOLD "usage: /dump [FILE]" RESET "\n\
dumps raw tokens for current conversation history. special tokens are\n\
printed in the a model specific chat syntax. this is useful for seeing\n\
specifically what data is being evaluated by the model. by default it\n\
will be printed to the terminal. if a FILE argument is specified, then\n\
the raw conversation history will be written to that filename.\n\
");
} else if (args[1] == "exit") {
fprintf(stderr, "\
" BOLD "usage: /exit" RESET "\n\
this command will cause the process to exit. it is essentially the same\n\
as typing ctrl-d which signals an eof condition. it also does the same\n\
thing as typing ctrl-c when the >>> user input prompt is displayed.\n\
");
} else if (args[1] == "manual") {
fprintf(stderr, "\
" BOLD "usage: /manual [on|off]" RESET "\n\
puts the chatbot in manual mode. this is useful if you want to inject\n\
a response as the model rather than the user. it's also possible to add\n\
additional system prompts to the conversation history. when the manual\n\
mode is activated, a hint is displayed next to the '>>>' indicating\n\
the current role, which can be 'user', 'assistant', or 'system'. if\n\
enter is pressed on an empty line, then llamafile will cycle between\n\
all three roles. when /manual is specified without an argument, it will\n\
toggle manual mode. otherwise an 'on' or 'off' argument is supplied.\n\
");
} else if (args[1] == "help") {
fprintf(stderr, "\
" BOLD "usage: /help [COMMAND]" RESET "\n\
shows help on how to issue commands to your llamafile. if no argument is\n\
specified, then a synopsis of all available commands will be printed. if\n\
a specific command name is given (e.g. /help dump) then documentation on\n\
the usage of that specific command will be printed.\n\
");
} else if (args[1] == "stats") {
fprintf(stderr, "\
" BOLD "usage: /stats" RESET "\n\
prints performance statistics for current session. this includes prompt\n\
evaluation time in tokens per second, which indicates prefill speed, or\n\
how quickly llamafile is able to read text. the 'eval time' statistic\n\
gives you prediction or token generation speed, in tokens per second,\n\
which tells you how quickly llamafile is able to write text.\n\
");
} else if (args[1] == "clear") {
fprintf(stderr, "\
usage: /clear" RESET "\n\
start conversation over from the beginning. this command adjusts the\n\
context window to what it was after the initial system prompt. this\n\
command also erases the /push stack.\n\
");
} else if (args[1] == "push") {
fprintf(stderr, "\
usage: /push" RESET "\n\
save current size of context window to stack. this command may be used\n\
with /pop to backtrack a conversation.\n\
");
} else if (args[1] == "pop") {
fprintf(stderr, "\
usage: /pop" RESET "\n\
restores size of context window from stack. this command may be used\n\
with /push to backtrack a conversation.\n\
");
} else if (args[1] == "stack") {
fprintf(stderr, "\
usage: /stack" RESET "\n\
prints the current conversation stack, created by /push commands.\n\
the stack consists of token offsets within the context window.\n\
");
} else if (args[1] == "undo") {
fprintf(stderr, "\
usage: /undo" RESET "\n\
erases last exchange in conversation. in the normal mode, this includes\n\
what the assistant last said, as well as the question that was asked. in\n\
manual mode, this will erase only the last chat message.\n\
");
} else if (args[1] == "upload") {
fprintf(stderr, "\
usage: /upload FILE" RESET "\n\
shares file from local hard drive with assistant. if this is a text file\n\
then a markdown system prompt is generated and added to the conversation\n\
history that gives the assistant readonly access to the file content and\n\
metadata. files with nul characters in them are currently not supported.\n\
image files (jpg/png/gif) may be uploaded if you specified a clip vision\n\
model (e.g. LLaVA) earlier when running llamafile with the --mmproj flag\n\
");
} else if (args[1] == "forget") {
fprintf(stderr, "\
usage: /forget" RESET "\n\
erase oldest chat message from context window. if you run out of context\n\
window, then this command can help you free up space. the oldest message\n\
excludes the original system prompt, with is preserved. this command may\n\
be run multiple times to erase multiple messages. there's also the /undo\n\
command which deletes the most recent chat message instead.\n\
");
} else {
fprintf(stderr, BRIGHT_RED "%s: unknown command" RESET "\n", args[1].c_str());
}
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_hint.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include "color.h"
namespace lf {
namespace chatbot {
static const char *on_hint_impl(const char *line) {
if (!*line && g_manual_mode)
return get_role_name(g_role);
if (!*line && !g_manual_mode && !g_said_something) {
if (is_base_model()) {
return "type text to be completed (or /help for help)";
} else {
return "say something (or type /help for help)";
}
}
static const char *const kHints[] = {
"/clear", //
"/context", //
"/dump", //
"/exit", //
"/forget", //
"/help", //
"/manual", //
"/pop", //
"/push", //
"/stack", //
"/stats", //
"/undo", //
"/upload", //
};
int z = strlen(line);
int n = sizeof(kHints) / sizeof(kHints[0]);
int l = 0;
int r = n - 1;
int i = -1;
while (l <= r) {
int m = (l & r) + ((l ^ r) >> 1); // floor((a+b)/2)
int c = strncmp(line, kHints[m], z);
if (!c) {
i = m;
r = m - 1;
} else if (c < 0) {
r = m - 1;
} else {
l = m + 1;
}
}
if (i == -1 || (i + 1 < n && !strncmp(line, kHints[i + 1], z)))
return "";
return kHints[i] + z;
}
char *on_hint(const char *line, const char **ansi1, const char **ansi2) {
*ansi1 = FAINT;
*ansi2 = UNBOLD;
return strdup(on_hint_impl(line));
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_hist.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include
#include "chat.h"
#include "common.h" // llama.cpp common (includes llama.h)
#include "llama.h" // llamafile wrapper functions
#include "color.h"
#include "macros.h"
#include "string.h"
namespace lf {
namespace chatbot {
bool g_manual_mode;
enum Role g_role = ROLE_USER;
std::vector g_messages; // chat message history
std::vector g_stack;
std::vector g_undo;
std::vector g_history;
const char *get_role_name(enum Role role) {
switch (role) {
case ROLE_USER:
return "user";
case ROLE_ASSISTANT:
return "assistant";
case ROLE_SYSTEM:
return "system";
default:
__builtin_unreachable();
}
}
const char *get_role_color(enum Role role) {
switch (role) {
case ROLE_USER:
return GREEN;
case ROLE_ASSISTANT:
return MAGENTA;
case ROLE_SYSTEM:
return YELLOW;
default:
__builtin_unreachable();
}
}
enum Role get_next_role(enum Role role) {
switch (role) {
case ROLE_USER:
return ROLE_ASSISTANT;
case ROLE_ASSISTANT:
return ROLE_USER;
case ROLE_SYSTEM:
return ROLE_USER;
default:
__builtin_unreachable();
}
}
enum Role cycle_role(enum Role role) {
switch (role) {
case ROLE_USER:
return ROLE_ASSISTANT;
case ROLE_ASSISTANT:
return ROLE_SYSTEM;
case ROLE_SYSTEM:
return ROLE_USER;
default:
__builtin_unreachable();
}
}
int tokens_used(void) {
return g_history.size();
}
std::string describe_token(int token) {
const llama_vocab *vocab = llama_model_get_vocab(g_model);
if (token == llama_vocab_bos(vocab))
return "§";
if (token == llama_vocab_eos(vocab))
return "∎";
if (token == llama_vocab_cls(vocab))
return "⌘";
if (token == llama_vocab_sep(vocab))
return "⋯";
if (token == llama_vocab_pad(vocab))
return "␣";
if (token == llama_vocab_nl(vocab))
return "↵";
if (llama_vocab_is_eog(vocab, token))
return "⌟";
if (llama_vocab_is_control(vocab, token))
return "∷";
std::string s = token_to_piece(g_ctx, token, DONT_RENDER_SPECIAL_TOKENS);
if (s.empty())
return "↯";
return s;
}
std::string describe_erasure(int begin, int end) {
assert(begin <= end);
assert(end <= tokens_used());
std::string description;
int pos = begin;
while (pos < end && description.size() < 63)
description += describe_token(g_history[pos++]);
if (!description.empty() && pos < end)
description += " ...";
description = collapse(description);
if (pos == end && description.empty())
description = "";
return description;
}
std::string describe_position(int pos) {
assert(pos <= tokens_used());
std::string description;
while (pos > 0 && description.size() < 63)
description = describe_token(g_history[--pos]) + description;
if (!description.empty() && pos > 0)
description = std::string("... ") + description;
description = collapse(description);
if (!pos && description.empty())
description = "";
return description;
}
static void fix_stack(std::vector *stack) {
while (!stack->empty() && stack->back() > tokens_used())
stack->pop_back();
}
void fix_stacks(void) {
fix_stack(&g_undo);
fix_stack(&g_stack);
}
static std::vector adjust_stack(int erase_begin, int erase_end,
const std::vector &stack) {
std::vector builder;
for (int pos : stack) {
if (erase_begin <= pos && pos < erase_end)
continue;
if (pos >= erase_end)
pos -= erase_end - erase_begin;
builder.push_back(pos);
}
return builder;
}
void adjust_stacks(int erase_begin, int erase_end) {
g_undo = adjust_stack(erase_begin, erase_end, g_undo);
g_stack = adjust_stack(erase_begin, erase_end, g_stack);
}
void record_undo(void) {
if (g_undo.empty() || g_undo.back() != tokens_used())
g_undo.push_back(tokens_used());
}
void on_undo(const std::vector &args) {
while (!g_undo.empty() && g_undo.back() == tokens_used())
g_undo.pop_back();
if (g_undo.empty()) {
err("error: no further undo actions possible");
return;
}
printf(FAINT "restoring conversation to: %s" RESET "\n",
describe_position(g_undo.back()).c_str());
rewind(g_undo.back());
g_undo.pop_back();
fix_stacks();
}
void on_forget(const std::vector &args) {
if (g_undo.size() < 2) {
err("error: nothing left to forget");
return;
}
int erase_count;
int erase_begin = g_undo[1];
int erase_end = g_undo.size() > 2 ? g_undo[2] : tokens_used();
if (!(erase_count = erase_end - erase_begin)) {
err("error: nothing left to forget");
return;
}
printf(FAINT "forgetting: %s" RESET "\n", describe_erasure(erase_begin, erase_end).c_str());
llama_memory_t mem = llama_get_memory(g_ctx);
llama_memory_seq_rm(mem, 0, erase_begin, erase_end);
llama_memory_seq_add(mem, 0, erase_end, -1, -erase_count);
g_history.erase(g_history.begin() + erase_begin, //
g_history.begin() + erase_end);
adjust_stacks(erase_begin, erase_end);
fix_stacks();
}
void rewind(int pos) {
assert(pos <= tokens_used());
llama_memory_t mem = llama_get_memory(g_ctx);
llama_memory_seq_rm(mem, 0, pos, -1);
g_history.resize(pos);
}
void on_manual(const std::vector &args) {
if (is_base_model()) {
err("error: /manual mode not supported on base models");
return;
}
if (args.size() == 1) {
g_manual_mode = !g_manual_mode;
} else if (args.size() == 2 && (args[1] == "on" || args[1] == "off")) {
g_manual_mode = args[1] == "on";
} else {
err("error: bad /manual command\n"
"usage: /manual [on|off]");
return;
}
fprintf(stderr, FAINT "manual mode %s" RESET "\n", g_manual_mode ? "enabled" : "disabled");
if (!g_manual_mode)
g_role = ROLE_USER;
}
void on_context(const std::vector &args) {
int configured_context = llama_n_ctx(g_ctx);
int max_context = llama_model_n_ctx_train(g_model);
printf("%d out of %d context tokens used (%d tokens remaining)\n", tokens_used(),
configured_context, configured_context - tokens_used());
if (configured_context < max_context)
printf("use the `-c %d` flag at startup for maximum context\n", max_context);
}
void on_clear(const std::vector &args) {
rewind(0);
g_messages.clear();
g_pending_file_content.clear();
g_stack.clear();
fix_stacks();
}
void print_stack(void) {
for (size_t i = g_stack.size(); i--;)
printf("%12d " FAINT "(%s)" RESET "\n", g_stack[i], describe_position(g_stack[i]).c_str());
}
void on_push(const std::vector &args) {
g_stack.push_back(tokens_used());
print_stack();
}
void on_pop(const std::vector &args) {
if (g_stack.empty()) {
err("error: context length stack is empty");
return;
}
printf(BOLD "%12d" RESET " restored " FAINT "(%s)" RESET "\n", g_stack.back(),
describe_position(g_stack.back()).c_str());
rewind(g_stack.back());
g_stack.pop_back();
fix_stacks();
print_stack();
}
void on_stack(const std::vector &args) {
if (g_stack.empty()) {
printf(FAINT "stack is currently empty (try using /push)" RESET "\n");
return;
}
print_stack();
}
void on_dump(const std::vector &args) {
int fd = 1;
if (args.size() >= 2) {
if ((fd = creat(args[1].c_str(), 0644)) == -1) {
perror(args[1].c_str());
return;
}
}
std::string s;
for (auto id : g_history)
s += token_to_piece(g_ctx, id, RENDER_SPECIAL_TOKENS);
if (!s.empty() && s[s.size() - 1] != '\n')
s += '\n';
write(fd, s.data(), s.size());
if (args.size() >= 2)
close(fd);
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_logo.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include "color.h"
#include "llamafile.h"
namespace lf {
namespace chatbot {
static void print_logo(const char16_t *s) {
for (int i = 0; s[i]; ++i) {
switch (s[i]) {
case u'█':
printf(GREEN "█" UNFOREGROUND);
break;
case u'╚':
case u'═':
case u'╝':
case u'╗':
case u'║':
case u'╔':
printf(FAINT "%C" UNBOLD, s[i]);
break;
default:
printf("%C", s[i]);
break;
}
}
}
void logo(char **) {
if (FLAG_nologo) {
return;
}
if (FLAG_ascii) {
printf("\
_ _ __ _ _\n\
| | | __ _ _ __ ___ __ _ / _(_) | ___\n\
| | |/ _` | '_ ` _ \\ / _` | |_| | |/ _ \\\n\
| | | (_| | | | | | | (_| | _| | | __/\n\
|_|_|\\__,_|_| |_| |_|\\__,_|_| |_|_|\\___|\n");
} else {
print_logo(u"\n\
██╗ ██╗ █████╗ ███╗ ███╗ █████╗ ███████╗██╗██╗ ███████╗\n\
██║ ██║ ██╔══██╗████╗ ████║██╔══██╗██╔════╝██║██║ ██╔════╝\n\
██║ ██║ ███████║██╔████╔██║███████║█████╗ ██║██║ █████╗\n\
██║ ██║ ██╔══██║██║╚██╔╝██║██╔══██║██╔══╝ ██║██║ ██╔══╝\n\
███████╗███████╗██║ ██║██║ ╚═╝ ██║██║ ██║██║ ██║███████╗███████╗\n\
╚══════╝╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝╚══════╝\n");
}
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_main.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "arg.h"
#include "chat.h"
#include "common.h"
#include "llama.h"
#include "log.h"
#include "sampling.h"
#include "mtmd.h"
#include "mtmd-helper.h"
#include "color.h"
#include "compute.h"
#include "string.h"
#include
#include "llamafile.h"
#include "version.h"
namespace lf {
namespace chatbot {
// Global state
common_params *g_params = nullptr; // pointer to params
common_sampler *g_sampler = nullptr; // sampler context
mtmd_context *g_mtmd = nullptr; // multimodal context
llama_model *g_model = nullptr;
llama_context *g_ctx = nullptr;
common_chat_templates_ptr g_chat_templates; // chat template handler
common_chat_parser_params g_chat_syntax; // chat syntax for parsing
std::string g_pending_file_content; // accumulated /upload content awaiting user message
// Static storage for params
static common_params s_params;
// Track whether we own the model (for cleanup)
static bool g_owns_model = true;
std::string describe_compute(void) {
// Check if using GPU based on params
// n_gpu_layers > 0 means explicitly enabled, < 0 means "auto" (use GPU if available)
if (g_params && g_params->n_gpu_layers != 0 && llamafile_has_gpu()) {
if (llamafile_has_metal()) {
return "Apple Metal GPU";
} else {
// Try to get CUDA device info if available
return llamafile_describe_cpu() + " (with GPU acceleration)";
}
} else {
return llamafile_describe_cpu();
}
}
std::string token_to_piece(const struct llama_context *ctx, llama_token token, bool special) {
if (token == IMAGE_PLACEHOLDER_TOKEN)
return "⁑";
return llamafile_token_to_piece(ctx, token, special);
}
const char *tip() {
if (g_params->verbosity)
return "";
return " (use the --verbose flag for further details)";
}
bool is_base_model() {
// API mode: no local model, assume chat model
if (!g_model)
return false;
// check if user explicitly passed --chat-template flag
if (!g_params->chat_template.empty())
return false;
// check if gguf metadata has chat template. this should always be
// present for "instruct" models, and never specified on base ones
return llama_model_meta_val_str(g_model, "tokenizer.chat_template", 0, 0) == -1;
}
int main(int argc, char **argv) {
signal(SIGPIPE, SIG_IGN);
// print logo
logo(argv);
// FLAG_verbose is set by parse_llamafile_args() in args.cpp
bool verbose = FLAG_verbose;
// Initialize params with defaults
g_params = &s_params;
g_params->sampling.n_prev = 64;
g_params->n_batch = 256; // for better progress indication
g_params->sampling.temp = 0; // don't use randomness by default
g_params->prompt = DEFAULT_SYSTEM_PROMPT;
// Initialize GPU support (must happen BEFORE llama_backend_init())
// This triggers dynamic compilation and loading of GPU backends
print_ephemeral("initializing gpu...");
if (!verbose) {
// disable ggml verbose logging
if (llamafile_has_metal()) {
llamafile_metal_log_set(llamafile_log_callback_null, NULL);
} else if (llamafile_has_cuda() || llamafile_has_amd_gpu()) {
llamafile_cuda_log_set(llamafile_log_callback_null, NULL);
}
} else {
clear_ephemeral();
}
// parse flags
print_ephemeral("loading backend...");
llama_backend_init();
// Pause common_log BEFORE common_init() to suppress llama.cpp build info line
if (!verbose)
common_log_pause(common_log_main());
common_init();
if (!verbose)
common_log_resume(common_log_main());
// NOTE that we are currently using llama.cpp flags parser here, so
// either we create a new kind of example for a custom set of flags
// or we need to deal with them separately and remove them prior to
// this step (see removeArgs in main.cpp)
if (!common_params_parse(argc, argv, *g_params, LLAMA_EXAMPLE_CLI)) {
fprintf(stderr, "error: failed to parse flags\n");
exit(1);
}
if (llamafile_has_metal() && g_params->n_gpu_layers < 0) {
// if Metal and no ngl was specified, default to INT_MAX
g_params->n_gpu_layers = INT_MAX;
}
clear_ephemeral();
// Suppress logging for model loading unless --verbose was specified
// We must set this AFTER common_init() since it overwrites the log callback
// and BEFORE model loading to suppress those logs
if (!verbose) {
llama_log_set((ggml_log_callback)llamafile_log_callback_null, NULL);
// Also suppress LOG_INF() and LOG_WRN() messages from common_log (used by LLM loader)
common_log_set_verbosity_thold(LOG_LEVEL_ERROR);
// Suppress mtmd/CLIP and mtmd-helper logging
mtmd_helper_log_set((ggml_log_callback)llamafile_log_callback_null, NULL);
}
print_ephemeral("loading model...");
llama_model_params model_params = common_model_params_to_llama(*g_params);
g_model = llama_model_load_from_file(g_params->model.path.c_str(), model_params);
clear_ephemeral();
if (g_model == NULL) {
fprintf(stderr, "%s: failed to load model%s\n", g_params->model.path.c_str(), tip());
exit(2);
}
// Adjust context size
if (g_params->n_ctx <= 0 || g_params->n_ctx > (int)llama_model_n_ctx_train(g_model))
g_params->n_ctx = llama_model_n_ctx_train(g_model);
if (g_params->n_ctx < g_params->n_batch)
g_params->n_batch = g_params->n_ctx;
// Print info (format line is added later after template detection)
if (!FLAG_nologo) {
printf(BOLD "software" UNBOLD ": llamafile " LLAMAFILE_VERSION_STRING "\n"
BOLD "model" UNBOLD ": %s\n",
basename(g_params->model.path).c_str());
if (is_base_model())
printf(BOLD "mode" UNBOLD ": RAW TEXT COMPLETION (base model)\n");
printf(BOLD "compute" UNBOLD ": %s\n", describe_compute().c_str());
}
print_ephemeral("initializing context...");
llama_context_params ctx_params = common_context_params_to_llama(*g_params);
g_ctx = llama_init_from_model(g_model, ctx_params);
clear_ephemeral();
if (!g_ctx) {
fprintf(stderr, "error: failed to initialize context%s\n", tip());
exit(3);
}
if (llama_model_has_encoder(g_model))
fprintf(stderr, "warning: this model has an encoder\n");
// Initialize sampler
g_sampler = common_sampler_init(g_model, g_params->sampling);
if (!g_sampler) {
fprintf(stderr, "error: failed to initialize sampler\n");
exit(4);
}
// Initialize multimodal if mmproj is specified
if (!g_params->mmproj.path.empty()) {
print_ephemeral("initializing vision model...");
mtmd_context_params mparams = mtmd_context_params_default();
mparams.use_gpu = g_params->mmproj_use_gpu;
mparams.n_threads = g_params->cpuparams.n_threads;
mparams.print_timings = g_params->verbosity > 0;
mparams.flash_attn_type = g_params->flash_attn_type;
mparams.warmup = g_params->warmup;
mparams.image_min_tokens = g_params->image_min_tokens;
mparams.image_max_tokens = g_params->image_max_tokens;
g_mtmd = mtmd_init_from_file(g_params->mmproj.path.c_str(), g_model, mparams);
clear_ephemeral();
if (!g_mtmd) {
fprintf(stderr, "%s: failed to initialize multimodal model%s\n",
g_params->mmproj.path.c_str(), tip());
exit(5);
}
}
// Initialize chat templates for output parsing (e.g., gpt-oss think mode)
// Use the same approach as common_chat_verify_template() - provide a dummy message
if (!is_base_model()) {
g_chat_templates = common_chat_templates_init(g_model, g_params->chat_template);
if (g_chat_templates) {
// Provide a minimal dummy message (same approach as common_chat_verify_template)
common_chat_msg dummy_msg;
dummy_msg.role = "user";
dummy_msg.content = "test";
// Check if the template supports enable_thinking (like llama.cpp server does).
// This is needed for models like Qwen3.5 that check enable_thinking in their
// template - without this, the template outputs a closed thinking block.
bool supports_thinking = common_chat_templates_support_enable_thinking(g_chat_templates.get());
common_chat_templates_inputs inputs;
inputs.messages = {dummy_msg};
inputs.use_jinja = true;
inputs.enable_thinking = supports_thinking;
// CRITICAL: Set reasoning_format BEFORE applying templates. The PEG parser
// is built during common_chat_templates_apply() and checks this value to
// decide whether to include reasoning extraction in the grammar.
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
try {
auto chat_params = common_chat_templates_apply(g_chat_templates.get(), inputs);
g_chat_syntax.format = chat_params.format;
g_chat_syntax.thinking_forced_open = chat_params.thinking_forced_open;
// Load the PEG parser if one was provided
if (!chat_params.parser.empty()) {
g_chat_syntax.parser.load(chat_params.parser);
}
// Copy reasoning format to chat syntax for use by the parser at runtime
g_chat_syntax.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
g_chat_syntax.reasoning_in_content = false;
// Print detected format
if (!FLAG_nologo && g_chat_syntax.format != COMMON_CHAT_FORMAT_CONTENT_ONLY) {
printf(BOLD "format" UNBOLD ": %s\n", common_chat_format_name(g_chat_syntax.format));
}
} catch (const std::exception &e) {
// Template application failed, fall back to content-only parsing
LOG_DBG("chat template application failed: %s\n", e.what());
}
}
}
// Ensure there's a blank line after info block
if (!FLAG_nologo) {
printf("\n");
}
// Create direct backend and run the REPL
auto backend = create_direct_backend();
g_backend = backend.get();
// Direct-backend-specific init: evaluate BOS token and system prompt
const llama_vocab *vocab = llama_model_get_vocab(g_model);
if (llama_vocab_get_add_bos(vocab)) {
print_ephemeral("loading bos token...");
eval_token(llama_vocab_bos(vocab));
}
record_undo();
// Make base models have no system prompt by default
if (is_base_model() && g_params->prompt == DEFAULT_SYSTEM_PROMPT)
g_params->prompt = "";
// For base models, evaluate system prompt directly (no template)
if (!g_params->prompt.empty() && is_base_model()) {
print_ephemeral("loading system prompt...");
std::string msg = g_params->prompt;
if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL))
exit(6);
llama_synchronize(g_ctx);
clear_ephemeral();
}
repl(*backend);
// Synchronize before cleanup to ensure all GPU operations complete
if (g_ctx) {
llama_synchronize(g_ctx);
}
// Cleanup
if (g_mtmd) {
print_ephemeral("freeing vision model...");
mtmd_free(g_mtmd);
clear_ephemeral();
}
if (g_sampler) {
common_sampler_free(g_sampler);
}
// If interrupted, directly exit to avoid Metal backend crash on exit
// (NOTE: the issue occurs when llama_free(g_ctx) is run)
if (g_interrupted_exit) {
_exit(0);
}
print_ephemeral("freeing context...");
llama_free(g_ctx);
clear_ephemeral();
// Only free the model if we own it
if (g_owns_model) {
print_ephemeral("freeing model...");
llama_model_free(g_model);
clear_ephemeral();
print_ephemeral("freeing backend...");
llama_backend_free();
clear_ephemeral();
}
return 0;
}
// API client entry point for combined mode.
// Runs TUI chatbot that communicates with the server via HTTP.
int api_main(const std::string &server_url, const std::string &system_prompt,
const std::string &model_path, std::function shutdown_fn) {
signal(SIGPIPE, SIG_IGN);
// Initialize minimal params
g_params = &s_params;
g_params->prompt = system_prompt.empty() ? DEFAULT_SYSTEM_PROMPT : system_prompt;
// Print logo and info
char *fake_argv[] = {const_cast("llamafile"), nullptr};
if (!FLAG_nologo) {
logo(fake_argv);
printf(BOLD "software" UNBOLD ": llamafile " LLAMAFILE_VERSION_STRING "\n"
BOLD "model" UNBOLD ": %s\n"
BOLD "compute" UNBOLD ": %s\n"
BOLD "server" UNBOLD ": %s\n",
basename(model_path).c_str(),
describe_compute().c_str(),
server_url.c_str());
printf("\n");
}
// Create API backend
auto backend = create_api_backend(server_url);
g_backend = backend.get();
// Run REPL
repl(*backend);
// Signal the server to shut down when the TUI exits
if (shutdown_fn) {
shutdown_fn();
}
return 0;
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/chatbot_repl.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "chatbot.h"
#include "chatbot_backend.h"
#include
#include
#include
#include
#include "chat.h"
#include "common.h"
#include "llama.h"
#include "sampling.h"
#include "bestline.h"
#include "color.h"
#include "highlight/highlight.h"
#include "llama.h" // llamafile wrapper
namespace lf {
namespace chatbot {
bool g_has_ephemeral;
bool g_said_something;
char g_last_printed_char;
volatile sig_atomic_t g_got_sigint;
ChatBackend *g_backend = nullptr;
// Replace RESET (\e[0m) with RESET+FAINT (\e[0m\e[2m) to maintain dim styling
// when markdown highlighting resets attributes inside reasoning content.
static std::string maintain_faint_styling(const std::string &s) {
std::string result;
result.reserve(s.size() + 32);
size_t pos = 0;
while (pos < s.size()) {
// Look for \e[0m (RESET)
if (pos + 3 < s.size() && s[pos] == '\e' && s[pos+1] == '[' && s[pos+2] == '0' && s[pos+3] == 'm') {
// Replace with \e[0m\e[2m (RESET + FAINT)
result += "\e[0m\e[2m";
pos += 4;
} else {
result += s[pos++];
}
}
return result;
}
// Helper to apply chat template with enable_thinking support for Qwen3.5-style models.
// common_chat_format_single() doesn't support enable_thinking, so we need this wrapper.
// Only used by DirectBackend path (API backend lets the server handle templates).
std::string apply_chat_template_with_thinking(
const std::vector &past_msgs,
const common_chat_msg &new_msg,
bool add_generation_prompt) {
if (!g_chat_templates)
return "";
// Check if template supports thinking mode
bool supports_thinking = common_chat_templates_support_enable_thinking(g_chat_templates.get());
common_chat_templates_inputs inputs;
inputs.messages = past_msgs;
inputs.messages.push_back(new_msg);
inputs.use_jinja = true;
inputs.add_generation_prompt = add_generation_prompt;
inputs.enable_thinking = supports_thinking;
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
auto chat_params = common_chat_templates_apply(g_chat_templates.get(), inputs);
return chat_params.prompt;
}
void on_sigint(int sig) {
g_got_sigint = 1;
}
// Flag to track if we're exiting due to interrupt (skip cleanup)
bool g_interrupted_exit = false;
bool is_empty(const char *s) {
int c;
while ((c = *s++))
if (!isspace(c))
return false;
return true;
}
void print(const std::string_view &s) {
for (char c : s) {
g_last_printed_char = c;
fputc(c, stdout);
if (c == '\n')
g_has_ephemeral = false;
}
}
void ensure_newline() {
if (g_last_printed_char != '\n')
print("\n");
}
void err(const char *fmt, ...) {
va_list ap;
clear_ephemeral();
ensure_newline();
va_start(ap, fmt);
fputs(BRIGHT_RED, stderr);
vfprintf(stderr, fmt, ap);
fputs(RESET "\n", stderr);
va_end(ap);
}
void print_ephemeral(const std::string_view &description) {
fprintf(stderr, " " BRIGHT_BLACK "%.*s" UNFOREGROUND "\r", (int)description.size(),
description.data());
g_has_ephemeral = true;
}
void clear_ephemeral(void) {
if (g_has_ephemeral) {
fprintf(stderr, CLEAR_FORWARD);
g_has_ephemeral = false;
}
}
bool out_of_context(int extra) {
err("error: ran out of context window at %d tokens\n"
"consider passing `-c %d` at startup for the maximum\n"
"you can free up more space using /forget or /clear",
g_backend->context_used() + extra,
g_backend->context_max());
return false;
}
void repl(ChatBackend &backend) {
// setup system prompt for message history
// (Direct backend handles BOS token and system prompt eval in chatbot_main.cpp
// before calling repl(); API backend just needs the message history)
if (!g_params->prompt.empty()) {
if (!is_base_model()) {
// Chat models: add system prompt to messages array
common_chat_msg sys_msg;
sys_msg.role = "system";
sys_msg.content = g_params->prompt;
g_messages.push_back(sys_msg);
}
// Display system prompt at startup
if (g_params->display_prompt)
printf("%s\n", g_params->prompt.c_str());
}
// perform important setup
HighlightTxt txt;
HighlightMarkdown markdown;
ColorBleeder bleeder(is_base_model() ? (Highlight *)&txt : (Highlight *)&markdown);
// Save old signal handler and install ours
// NOTE: In combined mode, this overrides the server's SIGINT handler.
// Only install if we're NOT in API mode (no local model = API mode).
struct sigaction sa, old_sa;
if (g_model) {
// Direct mode: install our own handler
sa.sa_handler = on_sigint;
sa.sa_flags = 0;
sigemptyset(&sa.sa_mask);
sigaction(SIGINT, &sa, &old_sa);
}
// run chatbot
for (;;) {
record_undo();
bestlineLlamaMode(true);
bestlineSetHintsCallback(on_hint);
bestlineSetFreeHintsCallback(free);
bestlineSetCompletionCallback(on_completion);
write(1, get_role_color(g_role), strlen(get_role_color(g_role)));
char *line = bestlineWithHistory(">>> ", "llamafile");
write(1, RESET, strlen(RESET));
g_last_printed_char = '\n';
if (!line) {
if (g_got_sigint) {
ensure_newline();
}
// Skip cleanup to avoid Metal crash (see chatbot_main)
// Setting g_interrupted_exit here covers both CTRL+C
// (sigint) and CTRL+D (newline)
g_interrupted_exit = true;
break;
}
if (!is_base_model() && is_empty(line)) {
if (g_manual_mode) {
g_role = cycle_role(g_role);
write(1, "\033[F", 3);
}
free(line);
continue;
}
g_said_something = true;
if (handle_command(line)) {
free(line);
continue;
}
// Manual mode: only available with direct backend
if (g_manual_mode && !backend.supports_manual_mode()) {
err("manual mode not available in this mode — use --chat for direct model access");
free(line);
continue;
}
bool add_assi = !g_manual_mode;
int tokens_before = backend.context_used();
// Combine any pending file content with user's message
std::string user_content;
if (!g_pending_file_content.empty()) {
user_content = g_pending_file_content;
user_content += "\n\n";
user_content += line;
g_pending_file_content.clear();
} else {
user_content = line;
}
// Build the message
common_chat_msg user_msg;
user_msg.role = get_role_name(g_role);
user_msg.content = user_content;
// Direct backend: format and eval the prompt ourselves
if (backend.supports_manual_mode()) {
std::string msg;
if (is_base_model()) {
msg = user_content;
} else {
msg = apply_chat_template_with_thinking(g_messages, user_msg, add_assi);
}
if (!eval_string(msg, DONT_ADD_SPECIAL, PARSE_SPECIAL)) {
rewind(tokens_before);
free(line);
continue;
}
}
// Track message in history
if (!is_base_model()) {
g_messages.push_back(user_msg);
}
if (g_manual_mode) {
g_role = get_next_role(g_role);
free(line);
continue;
}
// Generate response via backend
bool in_reasoning = false;
std::string assistant_content = backend.complete(g_messages,
[&](const std::string &content, const std::string &reasoning) -> bool {
if (!reasoning.empty()) {
if (!in_reasoning) {
print(FAINT);
in_reasoning = true;
}
std::string s;
bleeder.feed(&s, reasoning);
print(maintain_faint_styling(s));
}
if (!content.empty()) {
if (in_reasoning) {
print(UNBOLD);
print("\n\n");
in_reasoning = false;
}
std::string s;
bleeder.feed(&s, content);
print(s);
}
fflush(stdout);
return !g_got_sigint;
});
// End reasoning mode if still active
if (in_reasoning) {
print(UNBOLD);
}
// Track assistant response in message history
if (!is_base_model() && !assistant_content.empty()) {
common_chat_msg asst_msg;
asst_msg.role = "assistant";
asst_msg.content = assistant_content;
g_messages.push_back(asst_msg);
}
g_got_sigint = 0;
free(line);
std::string s;
bleeder.flush(&s);
print(s);
ensure_newline();
}
// Restore original signal handler before cleanup
if (g_model) {
sigaction(SIGINT, &old_sa, nullptr);
}
}
} // namespace chatbot
} // namespace lf
================================================
FILE: llamafile/check_cpu.c
================================================
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2023 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include
#include
#include
/**
* Dies if CPU doesn't have mandatory features.
*
* This check is based on which `$(TARGET_ARCH)` microarchitecture
* features were used globally. Object files that are specifically
* written to use runtime dispatching should be configured so that
* microarchitecture flags only get passed to that specific object
*/
void llamafile_check_cpu(void) {
// side effect: the constructor below has now been linked
}
static int on_missing_feature(const char *name) {
tinyprint(2, GetProgramExecutableName(), ": fatal error: the cpu feature ", name,
" was required at build time but isn't available on this system\n", NULL);
#if defined(__AVX2__) && !defined(__AVX512F__)
tinyprint(2,
"note: amd microprocessors made after 2017 usually work\n"
"note: intel microprocessors made after 2013 usually work\n",
NULL);
#endif
tinyprint(2, "exiting process.\n", NULL);
_Exit(1);
}
// We need to perform this early in the initialization process, before
// C++ codes built with -mavx has a chance to start allocating dynamic
// memory that would otherwise crash the cpu before this could be done
__attribute__((__constructor__(101))) static void llamafile_actually_check_cpu(void) {
if (X86_NEED(SSE3) && !X86_CHECK(SSE3)) {
on_missing_feature("SSE3");
}
if (X86_NEED(SSSE3) && !X86_CHECK(SSSE3)) {
on_missing_feature("SSSE3");
}
if (X86_NEED(AVX) && !X86_CHECK(AVX)) {
on_missing_feature("AVX");
}
if (X86_NEED(AVX2) && !X86_CHECK(AVX2)) {
on_missing_feature("AVX2");
}
if (X86_NEED(FMA) && !X86_CHECK(FMA)) {
on_missing_feature("FMA");
}
if (X86_NEED(F16C) && !X86_CHECK(F16C)) {
on_missing_feature("F16C");
}
if (X86_NEED(AVX512F) && !X86_CHECK(AVX512F)) {
on_missing_feature("AVX512F");
}
if (X86_NEED(AVX512VBMI) && !X86_CHECK(AVX512VBMI)) {
on_missing_feature("AVX512VBMI");
}
if (X86_NEED(AVX512_VNNI) && !X86_CHECK(AVX512_VNNI)) {
on_missing_feature("AVX512_VNNI");
}
}
================================================
FILE: llamafile/color.h
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define RESET "\e[0m"
#define BOLD "\e[1m"
#define FAINT "\e[2m"
#define UNBOLD "\e[22m"
#define RED "\e[31m"
#define GREEN "\e[32m"
#define MAGENTA "\e[35m"
#define YELLOW "\e[33m"
#define CYAN "\e[36m"
#define UNFOREGROUND "\e[39m"
#define BRIGHT_BLACK "\e[90m"
#define BRIGHT_RED "\e[91m"
#define BRIGHT_GREEN "\e[92m"
#define CLEAR_FORWARD "\e[K"
================================================
FILE: llamafile/compute.cpp
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "compute.h"
#include
#include
#include
#include
#include "common.h"
#include "sgemm.h"
static bool starts_with_str(const char *str, const char *prefix) {
return strncmp(str, prefix, strlen(prefix)) == 0;
}
#ifdef __x86_64__
static void cpuid(unsigned leaf, unsigned subleaf, unsigned *info) {
asm("movq\t%%rbx,%%rsi\n\t"
"cpuid\n\t"
"xchgq\t%%rbx,%%rsi"
: "=a"(info[0]), "=S"(info[1]), "=c"(info[2]), "=d"(info[3])
: "0"(leaf), "2"(subleaf));
}
#endif // __x86_64__
/**
* Returns string describing host CPU.
*/
std::string llamafile_describe_cpu() {
std::string id;
#ifdef __x86_64__
union {
char str[64];
unsigned reg[16];
} u = {0};
cpuid(0x80000002, 0, u.reg + 0 * 4);
cpuid(0x80000003, 0, u.reg + 1 * 4);
cpuid(0x80000004, 0, u.reg + 2 * 4);
int len = strlen(u.str);
while (len > 0 && u.str[len - 1] == ' ')
u.str[--len] = 0;
id = u.str;
#else
if (IsLinux()) {
FILE *f = fopen("/proc/cpuinfo", "r");
if (f) {
char buf[1024];
while (fgets(buf, sizeof(buf), f)) {
if (!strncmp(buf, "model name", 10) ||
starts_with_str(buf, "Model\t\t:")) { // e.g. raspi
char *p = strchr(buf, ':');
if (p) {
p++;
while (std::isspace(*p))
p++;
while (std::isspace(p[strlen(p) - 1]))
p[strlen(p) - 1] = '\0';
id = p;
break;
}
}
}
fclose(f);
}
}
#endif
string_replace_all(id, " 96-Cores", "");
string_replace_all(id, "(TM)", "");
string_replace_all(id, "(R)", "");
// Add sgemm kernel info (this describes the CPU capabilities used)
const char *sgemm = llamafile_sgemm_name();
if (sgemm && strcmp(sgemm, "unsupported") != 0) {
if (!id.empty())
id += " ";
id += "(";
id += sgemm;
id += ")";
} else {
// Fallback: show march info if no sgemm kernel
#ifdef __x86_64__
if (__cpu_march(__cpu_model.__cpu_subtype)) {
if (!id.empty())
id += " ";
id += "(";
id += __cpu_march(__cpu_model.__cpu_subtype);
id += ")";
}
#else
std::string march;
long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_ASIMDHP)
march += "+fp16";
if (hwcap & HWCAP_ASIMDDP)
march += "+dotprod";
if (!march.empty()) {
if (!id.empty())
id += " ";
id += "(";
id += march;
id += ")";
}
#endif
}
return id;
}
================================================
FILE: llamafile/compute.h
================================================
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include
std::string llamafile_describe_cpu();
================================================
FILE: llamafile/cuda.c
================================================
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
//
// Copyright 2024 Mozilla Foundation
// Copyright 2026 Mozilla.ai
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Runtime CUDA/ROCm GPU support for llamafile
//
// This file implements dynamic loading of CUDA/ROCm GPU support.
// At runtime on Linux/Windows with NVIDIA or AMD GPU:
// 1. Try to load pre-built DSO from /zip/ggml-cuda.so (bundled)
// 2. Or try to load from ~/.llamafile/ (pre-compiled)
// 3. Or compile at runtime if nvcc/hipcc is available
// 4. Load the DSO with cosmo_dlopen() and register the CUDA backend
//
#include "llamafile.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include