SYMBOL INDEX (750 symbols across 38 files)

FILE: docker/open_llama/hug_model.py
  function make_request (line 7) | def make_request(url, params=None):
  function check_magic_and_version (line 16) | def check_magic_and_version(filename):
  function download_file (line 29) | def download_file(url, destination):
  function get_user_choice (line 51) | def get_user_choice(model_list):
  function main (line 73) | def main():

FILE: examples/batch-processing/server.py
  function create_chat_completions (line 30) | def create_chat_completions():

FILE: examples/gradio_chat/local.py
  function predict (line 18) | def predict(message, history):

FILE: examples/gradio_chat/server.py
  function predict (line 10) | def predict(message, history):

FILE: examples/high_level_api/langchain_custom_llm.py
  class LlamaLLM (line 9) | class LlamaLLM(LLM):
    method _llm_type (line 14) | def _llm_type(self) -> str:
    method __init__ (line 17) | def __init__(self, model_path: str, **kwargs: Any):
    method _call (line 22) | def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
    method _identifying_params (line 27) | def _identifying_params(self) -> Mapping[str, Any]:

FILE: examples/low_level_api/Chat.py
  function env_or_def (line 7) | def env_or_def(env, default):

FILE: examples/low_level_api/Miku.py
  function env_or_def (line 7) | def env_or_def(env, default):

FILE: examples/low_level_api/ReasonAct.py
  function env_or_def (line 7) | def env_or_def(env, default):

FILE: examples/low_level_api/common.py
  class GptParams (line 12) | class GptParams:
  function gpt_params_parse (line 77) | def gpt_params_parse(argv=None):
  function gpt_random_prompt (line 389) | def gpt_random_prompt(rng):

FILE: examples/low_level_api/low_level_api_chat_cpp.py
  class LLaMAInteract (line 25) | class LLaMAInteract:
    method __init__ (line 26) | def __init__(self, params: GptParams) -> None:
    method _tokenize (line 314) | def _tokenize(self, prompt, bos=True):
    method set_color (line 327) | def set_color(self, c):
    method use_antiprompt (line 331) | def use_antiprompt(self):
    method generate (line 335) | def generate(self):
    method __enter__ (line 625) | def __enter__(self):
    method __exit__ (line 628) | def __exit__(self, type, value, tb):
    method exit (line 631) | def exit(self):
    method token_to_str (line 635) | def token_to_str(self, token_id: int) -> bytes:
    method past (line 645) | def past(self):
    method input (line 650) | def input(self, prompt: str):
    method output (line 661) | def output(self):
    method read_input (line 689) | def read_input(self):
    method interact (line 696) | def interact(self):

FILE: examples/low_level_api/quantize.py
  function main (line 6) | def main(args):

FILE: examples/low_level_api/util.py
  class IterSearch (line 13) | class IterSearch:
    method __init__ (line 14) | def __init__(self, pattern):
    method __call__ (line 18) | def __call__(self, char):
  class Circle (line 31) | class Circle:
    method __init__ (line 32) | def __init__(self, size, default=0):
    method append (line 38) | def append(self, elem):
    method __getitem__ (line 46) | def __getitem__(self, val):

FILE: examples/ray/llm.py
  class LlamaDeployment (line 9) | class LlamaDeployment:
    method __init__ (line 10) | def __init__(self, model_path: str):
    method __call__ (line 13) | async def __call__(self, http_request: Request) -> Dict:
  function llm_builder (line 20) | def llm_builder(args: Dict[str, str]) -> Application:

FILE: llama_cpp/_ctypes_extensions.py
  function load_shared_library (line 23) | def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
  class CtypesRef (line 93) | class CtypesRef(Generic[CtypesCData]):
  function ctypes_function_for_shared_library (line 105) | def ctypes_function_for_shared_library(lib: ctypes.CDLL):
  function _byref (line 126) | def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[...

FILE: llama_cpp/_internals.py
  class LlamaModel (line 31) | class LlamaModel:
    method __init__ (line 35) | def __init__(
    method close (line 77) | def close(self):
    method __del__ (line 85) | def __del__(self):
    method vocab_type (line 88) | def vocab_type(self) -> int:
    method n_vocab (line 91) | def n_vocab(self) -> int:
    method n_ctx_train (line 94) | def n_ctx_train(self) -> int:
    method n_embd (line 97) | def n_embd(self) -> int:
    method rope_freq_scale_train (line 100) | def rope_freq_scale_train(self) -> float:
    method desc (line 103) | def desc(self) -> str:
    method size (line 108) | def size(self) -> int:
    method n_params (line 111) | def n_params(self) -> int:
    method get_tensor (line 114) | def get_tensor(self, name: str) -> ctypes.c_void_p:
    method token_get_text (line 119) | def token_get_text(self, token: int) -> str:
    method token_get_score (line 122) | def token_get_score(self, token: int) -> float:
    method token_get_attr (line 125) | def token_get_attr(self, token: int) -> int:
    method token_bos (line 130) | def token_bos(self) -> int:
    method token_eos (line 133) | def token_eos(self) -> int:
    method token_cls (line 136) | def token_cls(self) -> int:
    method token_sep (line 139) | def token_sep(self) -> int:
    method token_nl (line 142) | def token_nl(self) -> int:
    method token_prefix (line 145) | def token_prefix(self) -> int:
    method token_middle (line 148) | def token_middle(self) -> int:
    method token_suffix (line 151) | def token_suffix(self) -> int:
    method token_eot (line 154) | def token_eot(self) -> int:
    method add_bos_token (line 157) | def add_bos_token(self) -> bool:
    method add_eos_token (line 160) | def add_eos_token(self) -> bool:
    method tokenize (line 165) | def tokenize(self, text: bytes, add_bos: bool, special: bool):
    method token_to_piece (line 183) | def token_to_piece(self, token: int, special: bool = False) -> bytes:
    method detokenize (line 188) | def detokenize(self, tokens: List[int], special: bool = False) -> bytes:
    method metadata (line 207) | def metadata(self) -> Dict[str, str]:
    method default_params (line 239) | def default_params():
  class LlamaContext (line 244) | class LlamaContext:
    method __init__ (line 248) | def __init__(
    method close (line 277) | def close(self):
    method __del__ (line 280) | def __del__(self):
    method n_ctx (line 283) | def n_ctx(self) -> int:
    method pooling_type (line 286) | def pooling_type(self) -> int:
    method kv_cache_clear (line 289) | def kv_cache_clear(self):
    method kv_cache_seq_rm (line 293) | def kv_cache_seq_rm(self, seq_id: int, p0: int, p1: int):
    method kv_cache_seq_cp (line 298) | def kv_cache_seq_cp(self, seq_id_src: int, seq_id_dst: int, p0: int, p...
    method kv_cache_seq_keep (line 302) | def kv_cache_seq_keep(self, seq_id: int):
    method kv_cache_seq_shift (line 306) | def kv_cache_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int):
    method get_state_size (line 310) | def get_state_size(self) -> int:
    method decode (line 321) | def decode(self, batch: LlamaBatch):
    method encode (line 329) | def encode(self, batch: LlamaBatch):
    method set_n_threads (line 337) | def set_n_threads(self, n_threads: int, n_threads_batch: int):
    method get_logits (line 340) | def get_logits(self):
    method get_logits_ith (line 343) | def get_logits_ith(self, i: int):
    method get_embeddings (line 346) | def get_embeddings(self):
    method get_embeddings_ith (line 349) | def get_embeddings_ith(self, i: int):
    method get_embeddings_seq (line 352) | def get_embeddings_seq(self, seq_id: int):
    method set_rng_seed (line 357) | def set_rng_seed(self, seed: int):
    method sample_repetition_penalties (line 360) | def sample_repetition_penalties(
    method sample_softmax (line 371) | def sample_softmax(self, candidates: "_LlamaTokenDataArray"):
    method sample_top_k (line 374) | def sample_top_k(self, candidates: "_LlamaTokenDataArray", k: int, min...
    method sample_top_p (line 377) | def sample_top_p(self, candidates: "_LlamaTokenDataArray", p: float, m...
    method sample_min_p (line 380) | def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, m...
    method sample_typical (line 383) | def sample_typical(
    method sample_temp (line 388) | def sample_temp(self, candidates: "_LlamaTokenDataArray", temp: float):
    method sample_grammar (line 391) | def sample_grammar(self, candidates: "_LlamaTokenDataArray", grammar: ...
    method sample_token_mirostat (line 394) | def sample_token_mirostat(
    method sample_token_mirostat_v2 (line 404) | def sample_token_mirostat_v2(
    method sample_token_greedy (line 413) | def sample_token_greedy(self, candidates: "_LlamaTokenDataArray") -> int:
    method sample_token (line 416) | def sample_token(self, candidates: "_LlamaTokenDataArray") -> int:
    method grammar_accept_token (line 420) | def grammar_accept_token(self, grammar: LlamaGrammar, token: int):
    method reset_timings (line 423) | def reset_timings(self):
    method print_timings (line 426) | def print_timings(self):
    method default_params (line 431) | def default_params():
  class LlamaBatch (line 436) | class LlamaBatch:
    method __init__ (line 437) | def __init__(
    method close (line 462) | def close(self):
    method __del__ (line 465) | def __del__(self):
    method n_tokens (line 468) | def n_tokens(self) -> int:
    method reset (line 471) | def reset(self):
    method set_batch (line 474) | def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool):
    method add_sequence (line 485) | def add_sequence(self, batch: Sequence[int], seq_id: int, logits_all: ...
  class LlamaTokenDataArray (line 499) | class LlamaTokenDataArray:
    method __init__ (line 500) | def __init__(self, *, n_vocab: int):
    method copy_logits (line 517) | def copy_logits(self, logits: npt.NDArray[np.single]):
  function normalize_embedding (line 528) | def normalize_embedding(embedding):
  class LlamaSamplingParams (line 539) | class LlamaSamplingParams:
  class LlamaSamplingContext (line 566) | class LlamaSamplingContext:
    method reset (line 574) | def reset(self):
    method cp (line 580) | def cp(self):
    method last (line 589) | def last(self) -> Optional[int]:
    method prev_str (line 595) | def prev_str(self, ctx_main: LlamaContext, n: int) -> str:
    method sample (line 598) | def sample(
    method accept (line 607) | def accept(self, ctx_main: LlamaContext, id: int, apply_grammar: bool):
  class CustomSampler (line 611) | class CustomSampler:
    method __init__ (line 612) | def __init__(
    method get_sampler (line 640) | def get_sampler(self) -> llama_cpp.llama_sampler_p:
  class LlamaSampler (line 644) | class LlamaSampler:
    method __init__ (line 645) | def __init__(self):
    method close (line 661) | def close(self):
    method __del__ (line 664) | def __del__(self):
    method add_greedy (line 667) | def add_greedy(self):
    method add_dist (line 671) | def add_dist(self, seed: int):
    method add_softmax (line 675) | def add_softmax(self):
    method add_top_k (line 679) | def add_top_k(self, k: int):
    method add_top_p (line 683) | def add_top_p(self, p: float, min_keep: int = 1):
    method add_min_p (line 687) | def add_min_p(self, p: float, min_keep: int = 1):
    method add_typical (line 691) | def add_typical(self, p: float, min_keep: int = 1):
    method add_temp (line 695) | def add_temp(self, temp: float):
    method add_temp_ext (line 699) | def add_temp_ext(self, t: float, delta: float, exponent: float):
    method add_xtc (line 703) | def add_xtc(self, p: float, t: float, min_keep: int, seed: int):
    method add_top_n_sigma (line 707) | def add_top_n_sigma(self, n: float):
    method add_mirostat (line 711) | def add_mirostat(self, n_vocab: int, seed: int, tau: float, eta: float...
    method add_mirostat_v2 (line 715) | def add_mirostat_v2(self, seed: int, tau: float, eta: float):
    method add_grammar (line 719) | def add_grammar(self, model: LlamaModel, grammar: LlamaGrammar):
    method add_grammar_lazy_patterns (line 725) | def add_grammar_lazy_patterns(
    method add_penalties (line 751) | def add_penalties(
    method add_dry (line 766) | def add_dry(
    method add_logit_bias (line 793) | def add_logit_bias(
    method add_infill (line 811) | def add_infill(self, model: LlamaModel):
    method add_custom (line 815) | def add_custom(
    method get_seed (line 826) | def get_seed(self) -> int:
    method sample (line 829) | def sample(self, ctx: LlamaContext, idx: int = -1) -> int:
    method accept (line 832) | def accept(self, token: int):
    method reset (line 835) | def reset(self):
    method clone (line 838) | def clone(self):

FILE: llama_cpp/_logger.py
  function llama_log_callback (line 30) | def llama_log_callback(
  function set_verbose (line 46) | def set_verbose(verbose: bool):

FILE: llama_cpp/_utils.py
  class suppress_stdout_stderr (line 14) | class suppress_stdout_stderr(object):
    method __init__ (line 20) | def __init__(self, disable: bool = True):
    method __enter__ (line 24) | def __enter__(self):
    method __exit__ (line 44) | def __exit__(self, *_):
  class MetaSingleton (line 59) | class MetaSingleton(type):
    method __call__ (line 66) | def __call__(cls, *args: Any, **kwargs: Any) -> Any:
  class Singleton (line 72) | class Singleton(object, metaclass=MetaSingleton):
    method __init__ (line 77) | def __init__(self):

FILE: llama_cpp/llama.py
  class Llama (line 55) | class Llama:
    method __init__ (line 60) | def __init__(
    method ctx (line 550) | def ctx(self) -> llama_cpp.llama_context_p:
    method model (line 554) | def model(self) -> llama_cpp.llama_model_p:
    method _input_ids (line 558) | def _input_ids(self) -> npt.NDArray[np.intc]:
    method _scores (line 562) | def _scores(self) -> npt.NDArray[np.single]:
    method eval_tokens (line 566) | def eval_tokens(self) -> Deque[int]:
    method eval_logits (line 570) | def eval_logits(self) -> Deque[List[float]]:
    method tokenize (line 576) | def tokenize(
    method detokenize (line 594) | def detokenize(
    method set_cache (line 614) | def set_cache(self, cache: Optional[BaseLlamaCache]):
    method set_seed (line 622) | def set_seed(self, seed: int):
    method reset (line 630) | def reset(self):
    method eval (line 634) | def eval(self, tokens: Sequence[int]):
    method _init_sampler (line 671) | def _init_sampler(
    method sample (line 760) | def sample(
    method generate (line 822) | def generate(
    method create_embedding (line 962) | def create_embedding(
    method embed (line 1002) | def embed(
    method _create_completion (line 1123) | def _create_completion(
    method create_completion (line 1743) | def create_completion(
    method __call__ (line 1840) | def __call__(
    method create_chat_completion (line 1932) | def create_chat_completion(
    method create_chat_completion_openai_v1 (line 2035) | def create_chat_completion_openai_v1(
    method __getstate__ (line 2068) | def __getstate__(self):
    method __setstate__ (line 2124) | def __setstate__(self, state):
    method save_state (line 2127) | def save_state(self) -> LlamaState:
    method load_state (line 2157) | def load_state(self, state: LlamaState) -> None:
    method n_ctx (line 2172) | def n_ctx(self) -> int:
    method n_embd (line 2176) | def n_embd(self) -> int:
    method n_vocab (line 2180) | def n_vocab(self) -> int:
    method tokenizer (line 2184) | def tokenizer(self) -> LlamaTokenizer:
    method token_eos (line 2188) | def token_eos(self) -> int:
    method token_bos (line 2192) | def token_bos(self) -> int:
    method token_nl (line 2196) | def token_nl(self) -> int:
    method pooling_type (line 2200) | def pooling_type(self) -> str:
    method close (line 2204) | def close(self) -> None:
    method __del__ (line 2208) | def __del__(self) -> None:
    method logits_to_logprobs (line 2212) | def logits_to_logprobs(
    method longest_token_prefix (line 2230) | def longest_token_prefix(a: Sequence[int], b: Sequence[int]):
    method from_pretrained (line 2240) | def from_pretrained(
  class LlamaState (line 2367) | class LlamaState:
    method __init__ (line 2368) | def __init__(
  class LogitsProcessorList (line 2390) | class LogitsProcessorList(List[LogitsProcessor]):
    method __call__ (line 2391) | def __call__(
  class StoppingCriteriaList (line 2402) | class StoppingCriteriaList(List[StoppingCriteria]):
    method __call__ (line 2403) | def __call__(
  class MinTokensLogitsProcessor (line 2409) | class MinTokensLogitsProcessor(LogitsProcessor):
    method __init__ (line 2410) | def __init__(self, min_tokens: int, token_eos: int):
    method __call__ (line 2415) | def __call__(

FILE: llama_cpp/llama_cache.py
  class BaseLlamaCache (line 17) | class BaseLlamaCache(ABC):
    method __init__ (line 20) | def __init__(self, capacity_bytes: int = (2 << 30)):
    method cache_size (line 25) | def cache_size(self) -> int:
    method _find_longest_prefix_key (line 28) | def _find_longest_prefix_key(
    method __getitem__ (line 35) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta...
    method __contains__ (line 39) | def __contains__(self, key: Sequence[int]) -> bool:
    method __setitem__ (line 43) | def __setitem__(
  class LlamaRAMCache (line 49) | class LlamaRAMCache(BaseLlamaCache):
    method __init__ (line 52) | def __init__(self, capacity_bytes: int = (2 << 30)):
    method cache_size (line 60) | def cache_size(self):
    method _find_longest_prefix_key (line 63) | def _find_longest_prefix_key(
    method __getitem__ (line 79) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta...
    method __contains__ (line 88) | def __contains__(self, key: Sequence[int]) -> bool:
    method __setitem__ (line 91) | def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.Llam...
  class LlamaDiskCache (line 104) | class LlamaDiskCache(BaseLlamaCache):
    method __init__ (line 107) | def __init__(
    method cache_size (line 114) | def cache_size(self):
    method _find_longest_prefix_key (line 117) | def _find_longest_prefix_key(
    method __getitem__ (line 130) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta...
    method __contains__ (line 141) | def __contains__(self, key: Sequence[int]) -> bool:
    method __setitem__ (line 144) | def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.Llam...

FILE: llama_cpp/llama_chat_format.py
  class LlamaChatCompletionHandler (line 61) | class LlamaChatCompletionHandler(Protocol):
    method __call__ (line 68) | def __call__(
  class LlamaChatCompletionHandlerNotFoundException (line 112) | class LlamaChatCompletionHandlerNotFoundException(Exception):
  class LlamaChatCompletionHandlerRegistry (line 116) | class LlamaChatCompletionHandlerRegistry(Singleton):
    method register_chat_completion_handler (line 119) | def register_chat_completion_handler(
    method unregister_chat_handler (line 131) | def unregister_chat_handler(self, name: str):
    method get_chat_completion_handler_by_name (line 137) | def get_chat_completion_handler_by_name(
  function get_chat_completion_handler (line 149) | def get_chat_completion_handler(name: str) -> LlamaChatCompletionHandler:
  function register_chat_completion_handler (line 155) | def register_chat_completion_handler(name: str):
  class ChatFormatterResponse (line 167) | class ChatFormatterResponse:
  class ChatFormatter (line 180) | class ChatFormatter(Protocol):
    method __call__ (line 186) | def __call__(
  class Jinja2ChatFormatter (line 194) | class Jinja2ChatFormatter(ChatFormatter):
    method __init__ (line 195) | def __init__(
    method strftime_now (line 219) | def strftime_now(f: str) -> str:
    method __call__ (line 222) | def __call__(
    method to_chat_handler (line 265) | def to_chat_handler(self) -> LlamaChatCompletionHandler:
  function _convert_text_completion_logprobs_to_chat (line 269) | def _convert_text_completion_logprobs_to_chat(
  function _convert_text_completion_to_chat (line 294) | def _convert_text_completion_to_chat(
  function _convert_text_completion_chunks_to_chat (line 318) | def _convert_text_completion_chunks_to_chat(
  function _convert_completion_to_chat (line 361) | def _convert_completion_to_chat(
  function _convert_completion_to_chat_function (line 378) | def _convert_completion_to_chat_function(
  function chat_formatter_to_chat_completion_handler (line 555) | def chat_formatter_to_chat_completion_handler(
  function hf_autotokenizer_to_chat_formatter (line 704) | def hf_autotokenizer_to_chat_formatter(
  function hf_autotokenizer_to_chat_completion_handler (line 729) | def hf_autotokenizer_to_chat_completion_handler(
  function hf_tokenizer_config_to_chat_formatter (line 736) | def hf_tokenizer_config_to_chat_formatter(
  function hf_tokenizer_config_to_chat_completion_handler (line 784) | def hf_tokenizer_config_to_chat_completion_handler(
  function guess_chat_format_from_gguf_metadata (line 794) | def guess_chat_format_from_gguf_metadata(metadata: Dict[str, str]) -> Op...
  function _get_system_message (line 817) | def _get_system_message(
  function _map_roles (line 827) | def _map_roles(
  function _format_llama2 (line 843) | def _format_llama2(
  function _format_add_colon_single (line 860) | def _format_add_colon_single(
  function _format_add_colon_two (line 873) | def _format_add_colon_two(
  function _format_no_colon_single (line 887) | def _format_no_colon_single(
  function _format_add_colon_space_single (line 900) | def _format_add_colon_space_single(
  function _format_chatml (line 913) | def _format_chatml(
  function _format_chatglm3 (line 926) | def _format_chatglm3(
  function _grammar_for_json (line 941) | def _grammar_for_json(verbose: bool = False):
  function _grammar_for_json_schema (line 947) | def _grammar_for_json_schema(
  function _grammar_for_response_format (line 959) | def _grammar_for_response_format(
  function register_chat_format (line 977) | def register_chat_format(name: str):
  function format_llama2 (line 991) | def format_llama2(
  function format_llama3 (line 1008) | def format_llama3(
  function format_alpaca (line 1025) | def format_alpaca(
  function format_qwen (line 1039) | def format_qwen(
  function format (line 1056) | def format(
  function format_oasst_llama (line 1072) | def format_oasst_llama(
  function format_baichuan2 (line 1088) | def format_baichuan2(
  function format_baichuan (line 1104) | def format_baichuan(
  function format_openbuddy (line 1120) | def format_openbuddy(
  function format_redpajama_incite (line 1142) | def format_redpajama_incite(
  function format_snoozy (line 1158) | def format_snoozy(
  function format_phind (line 1180) | def format_phind(
  function format_intel (line 1194) | def format_intel(
  function format_open_orca (line 1208) | def format_open_orca(
  function format_mistrallite (line 1235) | def format_mistrallite(
  function format_zephyr (line 1251) | def format_zephyr(
  function format_pygmalion (line 1268) | def format_pygmalion(
  function format_chatml (line 1284) | def format_chatml(
  function format_mistral_instruct (line 1301) | def format_mistral_instruct(
  function format_chatglm3 (line 1322) | def format_chatglm3(
  function format_openchat (line 1339) | def format_openchat(
  function format_saiga (line 1359) | def format_saiga(
  function format_gemma (line 1381) | def format_gemma(
  function functionary_chat_handler (line 1402) | def functionary_chat_handler(
  function functionary_v1_v2_chat_handler (line 1761) | def functionary_v1_v2_chat_handler(
  class Llava15ChatHandler (line 2659) | class Llava15ChatHandler:
    method __init__ (line 2699) | def __init__(self, clip_model_path: str, verbose: bool = True):
    method _init_mtmd_context (line 2711) | def _init_mtmd_context(self, llama_model: llama.Llama):
    method load_image (line 2746) | def load_image(self, image_url: str) -> bytes:
    method _create_bitmap_from_bytes (line 2749) | def _create_bitmap_from_bytes(self, image_bytes: bytes):
    method __call__ (line 2767) | def __call__(
    method _load_image (line 3031) | def _load_image(image_url: str) -> bytes:
    method get_image_urls (line 3044) | def get_image_urls(messages: List[llama_types.ChatCompletionRequestMes...
    method split_text_on_image_urls (line 3063) | def split_text_on_image_urls(text: str, image_urls: List[str]):
    method from_pretrained (line 3088) | def from_pretrained(
  class ObsidianChatHandler (line 3172) | class ObsidianChatHandler(Llava15ChatHandler):
  class MoondreamChatHandler (line 3228) | class MoondreamChatHandler(Llava15ChatHandler):
  class Llava16ChatHandler (line 3270) | class Llava16ChatHandler(Llava15ChatHandler):
  class NanoLlavaChatHandler (line 3318) | class NanoLlavaChatHandler(Llava15ChatHandler):
  class Llama3VisionAlphaChatHandler (line 3373) | class Llama3VisionAlphaChatHandler(Llava15ChatHandler):
  class MiniCPMv26ChatHandler (line 3426) | class MiniCPMv26ChatHandler(Llava15ChatHandler):
  class Qwen25VLChatHandler (line 3464) | class Qwen25VLChatHandler(Llava15ChatHandler):
    method __call__ (line 3497) | def __call__(self, **kwargs):
  function chatml_function_calling (line 3523) | def chatml_function_calling(

FILE: llama_cpp/llama_cpp.py
  class llama_token_data (line 481) | class llama_token_data(ctypes.Structure):
  class llama_token_data_array (line 512) | class llama_token_data_array(ctypes.Structure):
  class llama_batch (line 569) | class llama_batch(ctypes.Structure):
  class llama_model_kv_override_value (line 630) | class llama_model_kv_override_value(ctypes.Union):
  class llama_model_kv_override (line 645) | class llama_model_kv_override(ctypes.Structure):
  class llama_model_params (line 698) | class llama_model_params(ctypes.Structure):
  class llama_context_params (line 800) | class llama_context_params(ctypes.Structure):
  class llama_model_quantize_params (line 934) | class llama_model_quantize_params(ctypes.Structure):
  class llama_logit_bias (line 989) | class llama_logit_bias(ctypes.Structure):
  class llama_sampler_chain_params (line 1012) | class llama_sampler_chain_params(ctypes.Structure):
  class llama_chat_message (line 1031) | class llama_chat_message(ctypes.Structure):
  function llama_model_default_params (line 1051) | def llama_model_default_params() -> llama_model_params:
  function llama_context_default_params (line 1062) | def llama_context_default_params() -> llama_context_params:
  function llama_sampler_chain_default_params (line 1073) | def llama_sampler_chain_default_params() -> llama_sampler_chain_params:
  function llama_model_quantize_default_params (line 1084) | def llama_model_quantize_default_params() -> llama_model_quantize_params:
  function llama_backend_init (line 1098) | def llama_backend_init():
  function llama_backend_free (line 1128) | def llama_backend_free():
  function llama_numa_init (line 1140) | def llama_numa_init(numa: int, /):
  function llama_load_model_from_file (line 1165) | def llama_load_model_from_file(
  function llama_model_load_from_file (line 1182) | def llama_model_load_from_file(
  function llama_model_load_from_splits (line 1204) | def llama_model_load_from_splits(
  function llama_model_save_to_file (line 1221) | def llama_model_save_to_file(model: llama_model_p, path_model: bytes, /):
  function llama_free_model (line 1233) | def llama_free_model(model: llama_model_p, /):
  function llama_model_free (line 1243) | def llama_model_free(model: llama_model_p, /):
  function llama_init_from_model (line 1255) | def llama_init_from_model(
  function llama_new_context_with_model (line 1270) | def llama_new_context_with_model(
  function llama_free (line 1283) | def llama_free(ctx: llama_context_p, /):
  function llama_time_us (line 1294) | def llama_time_us() -> int:
  function llama_max_devices (line 1300) | def llama_max_devices() -> int:
  function llama_max_parallel_sequences (line 1306) | def llama_max_parallel_sequences() -> int:
  function llama_supports_mmap (line 1312) | def llama_supports_mmap() -> bool:
  function llama_supports_mlock (line 1318) | def llama_supports_mlock() -> bool:
  function llama_supports_gpu_offload (line 1324) | def llama_supports_gpu_offload() -> bool:
  function llama_supports_rpc (line 1330) | def llama_supports_rpc() -> bool:
  function llama_n_ctx (line 1336) | def llama_n_ctx(ctx: llama_context_p, /) -> int:
  function llama_n_batch (line 1342) | def llama_n_batch(ctx: llama_context_p, /) -> int:
  function llama_n_ubatch (line 1348) | def llama_n_ubatch(ctx: llama_context_p, /) -> int:
  function llama_n_seq_max (line 1354) | def llama_n_seq_max(ctx: llama_context_p, /) -> int:
  function llama_n_ctx_train (line 1360) | def llama_n_ctx_train(model: llama_model_p, /) -> int:
  function llama_n_embd (line 1366) | def llama_n_embd(model: llama_model_p, /) -> int:
  function llama_n_layer (line 1372) | def llama_n_layer(model: llama_model_p, /) -> int:
  function llama_n_head (line 1378) | def llama_n_head(model: llama_model_p, /) -> int:
  function llama_n_vocab (line 1384) | def llama_n_vocab(model: llama_vocab_p, /) -> int:
  function llama_get_model (line 1390) | def llama_get_model(ctx: llama_context_p, /) -> Optional[llama_model_p]:
  function llama_get_memory (line 1396) | def llama_get_memory(ctx: llama_context_p, /) -> Optional[llama_memory_t]:
  function llama_pooling_type (line 1403) | def llama_pooling_type(ctx: llama_context_p, /) -> int:
  function llama_get_kv_self (line 1413) | def llama_get_kv_self(ctx: llama_context_p, /) -> Optional[llama_kv_cach...
  function llama_model_get_vocab (line 1420) | def llama_model_get_vocab(model: llama_model_p, /) -> Optional[llama_voc...
  function llama_model_rope_type (line 1426) | def llama_model_rope_type(model: llama_model_p, /) -> int:
  function llama_model_n_ctx_train (line 1432) | def llama_model_n_ctx_train(model: llama_model_p, /) -> int:
  function llama_model_n_embd (line 1438) | def llama_model_n_embd(model: llama_model_p, /) -> int:
  function llama_model_n_layer (line 1444) | def llama_model_n_layer(model: llama_model_p, /) -> int:
  function llama_model_n_head (line 1450) | def llama_model_n_head(model: llama_model_p, /) -> int:
  function llama_model_n_head_kv (line 1456) | def llama_model_n_head_kv(model: llama_model_p, /) -> int:
  function llama_model_n_swa (line 1462) | def llama_model_n_swa(model: llama_model_p, /) -> int:
  function llama_model_rope_freq_scale_train (line 1469) | def llama_model_rope_freq_scale_train(model: llama_model_p, /) -> float:
  function llama_model_n_cls_out (line 1477) | def llama_model_n_cls_out(model: llama_model_p, /) -> int:
  function llama_model_cls_label (line 1485) | def llama_model_cls_label(model: llama_model_p, i: int, /) -> Optional[b...
  function llama_vocab_type (line 1492) | def llama_vocab_type(vocab: llama_vocab_p, /) -> int:
  function llama_vocab_n_tokens (line 1498) | def llama_vocab_n_tokens(vocab: llama_vocab_p, /) -> int:
  function llama_model_meta_val_str (line 1521) | def llama_model_meta_val_str(
  function llama_model_meta_count (line 1535) | def llama_model_meta_count(model: llama_model_p, /) -> int:
  function llama_model_meta_key_by_index (line 1552) | def llama_model_meta_key_by_index(
  function llama_model_meta_val_str_by_index (line 1575) | def llama_model_meta_val_str_by_index(
  function llama_model_desc (line 1593) | def llama_model_desc(
  function llama_model_size (line 1606) | def llama_model_size(model: llama_model_p, /) -> int:
  function llama_model_chat_template (line 1615) | def llama_model_chat_template(model: llama_model_p, name: Optional[bytes...
  function llama_model_n_params (line 1624) | def llama_model_n_params(model: llama_model_p, /) -> int:
  function llama_model_has_encoder (line 1632) | def llama_model_has_encoder(model: llama_model_p, /) -> bool:
  function llama_model_has_decoder (line 1640) | def llama_model_has_decoder(model: llama_model_p, /) -> bool:
  function llama_model_decoder_start_token (line 1651) | def llama_model_decoder_start_token(model: llama_model_p, /) -> int:
  function llama_model_is_recurrent (line 1661) | def llama_model_is_recurrent(model: llama_model_p, /) -> bool:
  function llama_model_is_diffusion (line 1669) | def llama_model_is_diffusion(model: llama_model_p, /) -> bool:
  function llama_model_quantize (line 1688) | def llama_model_quantize(
  function llama_adapter_lora_init (line 1711) | def llama_adapter_lora_init(
  function llama_adapter_lora_free (line 1725) | def llama_adapter_lora_free(adapter: llama_adapter_lora_p, /):
  function llama_set_adapter_lora (line 1743) | def llama_set_adapter_lora(
  function llama_rm_adapter_lora (line 1761) | def llama_rm_adapter_lora(
  function llama_clear_adapter_lora (line 1776) | def llama_clear_adapter_lora(ctx: llama_context_p, /):
  function llama_apply_adapter_cvec (line 1806) | def llama_apply_adapter_cvec(
  function llama_memory_clear (line 1838) | def llama_memory_clear(mem: llama_memory_t, data: bool, /):
  function llama_memory_seq_rm (line 1864) | def llama_memory_seq_rm(
  function llama_memory_seq_cp (line 1901) | def llama_memory_seq_cp(
  function llama_memory_seq_keep (line 1922) | def llama_memory_seq_keep(mem: llama_memory_t, seq_id: Union[llama_seq_i...
  function llama_memory_seq_add (line 1947) | def llama_memory_seq_add(
  function llama_memory_seq_div (line 1981) | def llama_memory_seq_div(
  function llama_memory_seq_pos_min (line 2005) | def llama_memory_seq_pos_min(
  function llama_memory_seq_pos_max (line 2023) | def llama_memory_seq_pos_max(
  function llama_memory_can_shift (line 2034) | def llama_memory_can_shift(mem: llama_memory_t, /) -> bool:
  function llama_kv_self_n_tokens (line 2050) | def llama_kv_self_n_tokens(ctx: llama_context_p, /) -> int:
  function llama_kv_self_used_cells (line 2061) | def llama_kv_self_used_cells(ctx: llama_context_p, /) -> int:
  function llama_kv_self_clear (line 2073) | def llama_kv_self_clear(ctx: llama_context_p, /):
  function llama_kv_self_seq_rm (line 2099) | def llama_kv_self_seq_rm(
  function llama_kv_self_seq_cp (line 2132) | def llama_kv_self_seq_cp(
  function llama_kv_self_seq_keep (line 2152) | def llama_kv_self_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq...
  function llama_kv_self_seq_add (line 2180) | def llama_kv_self_seq_add(
  function llama_kv_self_seq_div (line 2215) | def llama_kv_self_seq_div(
  function llama_kv_self_seq_pos_min (line 2238) | def llama_kv_self_seq_pos_min(
  function llama_kv_self_seq_pos_max (line 2255) | def llama_kv_self_seq_pos_max(
  function llama_kv_self_defrag (line 2268) | def llama_kv_self_defrag(ctx: llama_context_p, /):
  function llama_kv_self_can_shift (line 2277) | def llama_kv_self_can_shift(ctx: llama_context_p, /) -> bool:
  function llama_kv_self_update (line 2286) | def llama_kv_self_update(ctx: llama_context_p, /):
  function llama_state_get_size (line 2300) | def llama_state_get_size(ctx: llama_context_p, /) -> int:
  function llama_get_state_size (line 2308) | def llama_get_state_size(ctx: llama_context_p, /) -> int:
  function llama_state_get_data (line 2329) | def llama_state_get_data(
  function llama_copy_state_data (line 2353) | def llama_copy_state_data(
  function llama_state_set_data (line 2371) | def llama_state_set_data(
  function llama_set_state_data (line 2391) | def llama_set_state_data(
  function llama_state_load_file (line 2416) | def llama_state_load_file(
  function llama_load_session_file (line 2445) | def llama_load_session_file(
  function llama_state_save_file (line 2471) | def llama_state_save_file(
  function llama_save_session_file (line 2497) | def llama_save_session_file(
  function llama_state_seq_get_size (line 2516) | def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id,...
  function llama_state_seq_get_data (line 2537) | def llama_state_seq_get_data(
  function llama_state_seq_set_data (line 2567) | def llama_state_seq_set_data(
  function llama_state_seq_save_file (line 2595) | def llama_state_seq_save_file(
  function llama_state_seq_load_file (line 2625) | def llama_state_seq_load_file(
  function llama_batch_get_one (line 2658) | def llama_batch_get_one(
  function llama_batch_init (line 2684) | def llama_batch_init(
  function llama_batch_free (line 2703) | def llama_batch_free(batch: llama_batch, /):
  function llama_encode (line 2718) | def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int:
  function llama_decode (line 2741) | def llama_decode(ctx: llama_context_p, batch: llama_batch, /) -> int:
  function llama_set_n_threads (line 2764) | def llama_set_n_threads(
  function llama_n_threads (line 2780) | def llama_n_threads(ctx: llama_context_p, /) -> int:
  function llama_n_threads_batch (line 2788) | def llama_n_threads_batch(ctx: llama_context_p, /) -> int:
  function llama_set_embeddings (line 2797) | def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /):
  function llama_set_causal_attn (line 2806) | def llama_set_causal_attn(ctx: llama_context_p, causal_attn: bool, /):
  function llama_set_warmup (line 2816) | def llama_set_warmup(ctx: llama_context_p, warmup: bool, /):
  function llama_set_abort_callback (line 2829) | def llama_set_abort_callback(
  function llama_synchronize (line 2844) | def llama_synchronize(ctx: llama_context_p, /):
  function llama_get_logits (line 2861) | def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_fl...
  function llama_get_logits_ith (line 2883) | def llama_get_logits_ith(
  function llama_get_embeddings (line 2902) | def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes....
  function llama_get_embeddings_ith (line 2919) | def llama_get_embeddings_ith(
  function llama_get_embeddings_seq (line 2937) | def llama_get_embeddings_seq(
  function llama_vocab_get_text (line 2954) | def llama_vocab_get_text(
  function llama_vocab_get_score (line 2964) | def llama_vocab_get_score(
  function llama_vocab_get_attr (line 2974) | def llama_vocab_get_attr(
  function llama_vocab_is_eog (line 2985) | def llama_vocab_is_eog(vocab: llama_vocab_p, token: Union[llama_token, i...
  function llama_vocab_is_control (line 2995) | def llama_vocab_is_control(
  function llama_vocab_bos (line 3005) | def llama_vocab_bos(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_eos (line 3012) | def llama_vocab_eos(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_eot (line 3019) | def llama_vocab_eot(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_sep (line 3026) | def llama_vocab_sep(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_nl (line 3033) | def llama_vocab_nl(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_pad (line 3040) | def llama_vocab_pad(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_mask (line 3047) | def llama_vocab_mask(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_get_add_bos (line 3058) | def llama_vocab_get_add_bos(vocab: llama_vocab_p, /) -> bool:
  function llama_vocab_get_add_eos (line 3068) | def llama_vocab_get_add_eos(vocab: llama_vocab_p, /) -> bool:
  function llama_vocab_get_add_sep (line 3078) | def llama_vocab_get_add_sep(vocab: llama_vocab_p, /) -> bool:
  function llama_vocab_fim_pre (line 3088) | def llama_vocab_fim_pre(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_fim_suf (line 3098) | def llama_vocab_fim_suf(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_fim_mid (line 3108) | def llama_vocab_fim_mid(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_fim_pad (line 3118) | def llama_vocab_fim_pad(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_fim_rep (line 3128) | def llama_vocab_fim_rep(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_fim_sep (line 3138) | def llama_vocab_fim_sep(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_get_text (line 3149) | def llama_token_get_text(
  function llama_token_get_score (line 3161) | def llama_token_get_score(
  function llama_token_get_attr (line 3172) | def llama_token_get_attr(
  function llama_token_is_eog (line 3183) | def llama_token_is_eog(
  function llama_token_is_control (line 3194) | def llama_token_is_control(
  function llama_token_bos (line 3205) | def llama_token_bos(vocab: llama_vocab_p, /) -> int:
  function llama_token_eos (line 3214) | def llama_token_eos(vocab: llama_vocab_p, /) -> int:
  function llama_token_eot (line 3223) | def llama_token_eot(vocab: llama_vocab_p, /) -> int:
  function llama_token_cls (line 3232) | def llama_token_cls(vocab: llama_vocab_p, /) -> int:
  function llama_token_sep (line 3241) | def llama_token_sep(vocab: llama_vocab_p, /) -> int:
  function llama_token_nl (line 3251) | def llama_token_nl(vocab: llama_vocab_p, /) -> int:
  function llama_token_pad (line 3261) | def llama_token_pad(vocab: llama_vocab_p, /) -> int:
  function llama_add_bos_token (line 3271) | def llama_add_bos_token(vocab: llama_vocab_p, /) -> bool:
  function llama_add_eos_token (line 3280) | def llama_add_eos_token(vocab: llama_vocab_p, /) -> bool:
  function llama_token_fim_pre (line 3290) | def llama_token_fim_pre(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_fim_suf (line 3299) | def llama_token_fim_suf(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_fim_mid (line 3308) | def llama_token_fim_mid(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_fim_pad (line 3317) | def llama_token_fim_pad(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_fim_rep (line 3326) | def llama_token_fim_rep(vocab: llama_vocab_p, /) -> llama_token:
  function llama_token_fim_sep (line 3335) | def llama_token_fim_sep(vocab: llama_vocab_p, /) -> llama_token:
  function llama_vocab_cls (line 3346) | def llama_vocab_cls(vocab: llama_vocab_p, /) -> llama_token:
  function llama_tokenize (line 3385) | def llama_tokenize(
  function llama_token_to_piece (line 3437) | def llama_token_to_piece(
  function llama_detokenize (line 3488) | def llama_detokenize(
  function llama_chat_apply_template (line 3544) | def llama_chat_apply_template(
  function llama_chat_builtin_templates (line 3579) | def llama_chat_builtin_templates(
  class llama_sampler_i (line 3617) | class llama_sampler_i(ctypes.Structure):
  class llama_sampler (line 3625) | class llama_sampler(ctypes.Structure):
  function llama_sampler_init (line 3663) | def llama_sampler_init(
  function llama_sampler_name (line 3675) | def llama_sampler_name(smpl: llama_sampler_p, /) -> bytes:
  function llama_sampler_accept (line 3685) | def llama_sampler_accept(smpl: llama_sampler_p, token: Union[llama_token...
  function llama_sampler_apply (line 3695) | def llama_sampler_apply(
  function llama_sampler_reset (line 3707) | def llama_sampler_reset(smpl: llama_sampler_p, /):
  function llama_sampler_clone (line 3717) | def llama_sampler_clone(smpl: llama_sampler_p, /) -> llama_sampler_p:
  function llama_sampler_free (line 3728) | def llama_sampler_free(smpl: llama_sampler_p, /):
  function llama_sampler_chain_init (line 3741) | def llama_sampler_chain_init(params: llama_sampler_chain_params, /) -> l...
  function llama_sampler_chain_add (line 3752) | def llama_sampler_chain_add(chain: llama_sampler_p, smpl: llama_sampler_...
  function llama_sampler_chain_get (line 3762) | def llama_sampler_chain_get(
  function llama_sampler_chain_n (line 3774) | def llama_sampler_chain_n(chain: llama_sampler_p, /) -> int:
  function llama_sampler_chain_remove (line 3785) | def llama_sampler_chain_remove(
  function llama_sampler_init_greedy (line 3795) | def llama_sampler_init_greedy() -> llama_sampler_p:
  function llama_sampler_init_dist (line 3801) | def llama_sampler_init_dist(seed: int) -> llama_sampler_p:
  function llama_sampler_init_softmax (line 3810) | def llama_sampler_init_softmax() -> llama_sampler_p:
  function llama_sampler_init_top_k (line 3818) | def llama_sampler_init_top_k(k: int) -> llama_sampler_p:
  function llama_sampler_init_top_p (line 3829) | def llama_sampler_init_top_p(p: float, min_keep: int) -> llama_sampler_p:
  function llama_sampler_init_min_p (line 3840) | def llama_sampler_init_min_p(p: float, min_keep: int) -> llama_sampler_p:
  function llama_sampler_init_typical (line 3851) | def llama_sampler_init_typical(p: float, min_keep: int) -> llama_sampler_p:
  function llama_sampler_init_temp (line 3858) | def llama_sampler_init_temp(t: float) -> llama_sampler_p:
  function llama_sampler_init_temp_ext (line 3869) | def llama_sampler_init_temp_ext(
  function llama_sampler_init_xtc (line 3882) | def llama_sampler_init_xtc(
  function llama_sampler_init_top_n_sigma (line 3895) | def llama_sampler_init_top_n_sigma(n: float, /) -> llama_sampler_p:
  function llama_sampler_init_mirostat (line 3911) | def llama_sampler_init_mirostat(
  function llama_sampler_init_mirostat_v2 (line 3927) | def llama_sampler_init_mirostat_v2(
  function llama_sampler_init_grammar (line 3943) | def llama_sampler_init_grammar(
  function llama_sampler_init_grammar_lazy (line 3971) | def llama_sampler_init_grammar_lazy(
  function llama_sampler_init_grammar_lazy_patterns (line 4006) | def llama_sampler_init_grammar_lazy_patterns(
  function llama_sampler_init_penalties (line 4030) | def llama_sampler_init_penalties(
  function llama_sampler_init_dry (line 4064) | def llama_sampler_init_dry(
  function llama_sampler_init_logit_bias (line 4087) | def llama_sampler_init_logit_bias(
  function llama_sampler_init_infill (line 4100) | def llama_sampler_init_infill(vocab: llama_vocab_p, /) -> llama_sampler_p:
  function llama_sampler_get_seed (line 4111) | def llama_sampler_get_seed(smpl: llama_sampler_p, /) -> int:
  function llama_sampler_sample (line 4122) | def llama_sampler_sample(
  function llama_split_path (line 4139) | def llama_split_path(
  function llama_split_prefix (line 4158) | def llama_split_prefix(
  function llama_print_system_info (line 4173) | def llama_print_system_info() -> bytes:
  function llama_log_set (line 4185) | def llama_log_set(
  class llama_perf_context_data (line 4210) | class llama_perf_context_data(ctypes.Structure):
  class llama_perf_sampler_data (line 4227) | class llama_perf_sampler_data(ctypes.Structure):
  function llama_perf_context (line 4240) | def llama_perf_context(ctx: llama_context_p, /) -> llama_perf_context_data:
  function llama_perf_context_print (line 4250) | def llama_perf_context_print(ctx: llama_context_p, /):
  function llama_perf_context_reset (line 4260) | def llama_perf_context_reset(ctx: llama_context_p, /):
  function llama_perf_sampler (line 4271) | def llama_perf_sampler(chain: llama_sampler_p, /) -> llama_perf_sampler_...
  function llama_perf_sampler_print (line 4281) | def llama_perf_sampler_print(chain: llama_sampler_p, /):
  function llama_perf_sampler_reset (line 4291) | def llama_perf_sampler_reset(chain: llama_sampler_p, /):
  function llama_opt_param_filter_all (line 4310) | def llama_opt_param_filter_all(tensor: ctypes.c_void_p, userdata: ctypes...
  class llama_opt_params (line 4323) | class llama_opt_params(ctypes.Structure):
  function llama_opt_init (line 4339) | def llama_opt_init(lctx: llama_context_p, model: llama_model_p, lopt_par...
  function llama_opt_epoch (line 4364) | def llama_opt_epoch(

FILE: llama_cpp/llama_grammar.py
  class LlamaGrammar (line 19) | class LlamaGrammar:
    method __init__ (line 20) | def __init__(self, *args, _grammar: str, **kwargs):
    method from_string (line 25) | def from_string(cls, grammar: str, verbose: bool = True) -> "LlamaGram...
    method from_file (line 29) | def from_file(cls, file: Union[str, Path], verbose: bool = True) -> "L...
    method from_json_schema (line 46) | def from_json_schema(cls, json_schema: str, verbose: bool = True) -> "...
  function _build_repetition (line 254) | def _build_repetition(
  class BuiltinRule (line 310) | class BuiltinRule:
    method __init__ (line 311) | def __init__(self, content: str, deps: list = None):
  class SchemaConverter (line 380) | class SchemaConverter:
    method __init__ (line 381) | def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
    method _format_literal (line 392) | def _format_literal(self, literal):
    method not_literal (line 398) | def not_literal(
    method _add_rule (line 424) | def _add_rule(self, name, rule):
    method resolve_refs (line 439) | def resolve_refs(self, schema: dict, url: str):
    method _generate_union_rule (line 492) | def _generate_union_rule(self, name, alt_schemas):
    method _visit_pattern (line 500) | def _visit_pattern(self, pattern, name):
    method _resolve_ref (line 685) | def _resolve_ref(self, ref):
    method _generate_constant_rule (line 694) | def _generate_constant_rule(self, value):
    method visit (line 697) | def visit(self, schema, name):
    method _add_primitive (line 846) | def _add_primitive(self, name: str, rule: BuiltinRule):
    method _build_object_rule (line 856) | def _build_object_rule(
    method format_grammar (line 937) | def format_grammar(self):
  function json_schema_to_gbnf (line 944) | def json_schema_to_gbnf(schema: str, prop_order: Optional[List[str]] = N...

FILE: llama_cpp/llama_speculative.py
  class LlamaDraftModel (line 9) | class LlamaDraftModel(abc.ABC):
    method __call__ (line 11) | def __call__(
  class LlamaPromptLookupDecoding (line 17) | class LlamaPromptLookupDecoding(LlamaDraftModel):
    method __init__ (line 20) | def __init__(self, max_ngram_size: int = 2, num_pred_tokens: int = 10):
    method find_candidate_pred_tokens (line 25) | def find_candidate_pred_tokens(
    method __call__ (line 57) | def __call__(

FILE: llama_cpp/llama_tokenizer.py
  class BaseLlamaTokenizer (line 14) | class BaseLlamaTokenizer(abc.ABC):
    method tokenize (line 16) | def tokenize(
    method detokenize (line 29) | def detokenize(
  class LlamaTokenizer (line 45) | class LlamaTokenizer(BaseLlamaTokenizer):
    method __init__ (line 46) | def __init__(self, llama: llama_cpp.Llama):
    method tokenize (line 49) | def tokenize(
    method detokenize (line 54) | def detokenize(
    method encode (line 62) | def encode(
    method decode (line 69) | def decode(self, tokens: List[int]) -> str:
    method from_ggml_file (line 73) | def from_ggml_file(cls, path: str) -> "LlamaTokenizer":
  class LlamaHFTokenizer (line 77) | class LlamaHFTokenizer(BaseLlamaTokenizer):
    method __init__ (line 78) | def __init__(self, hf_tokenizer: Any):
    method tokenize (line 81) | def tokenize(
    method detokenize (line 88) | def detokenize(
    method from_pretrained (line 109) | def from_pretrained(cls, pretrained_model_name_or_path: str) -> "Llama...

FILE: llama_cpp/llama_types.py
  class EmbeddingUsage (line 20) | class EmbeddingUsage(TypedDict):
  class Embedding (line 25) | class Embedding(TypedDict):
  class CreateEmbeddingResponse (line 31) | class CreateEmbeddingResponse(TypedDict):
  class CompletionLogprobs (line 38) | class CompletionLogprobs(TypedDict):
  class CompletionChoice (line 45) | class CompletionChoice(TypedDict):
  class CompletionUsage (line 52) | class CompletionUsage(TypedDict):
  class CreateCompletionResponse (line 58) | class CreateCompletionResponse(TypedDict):
  class ChatCompletionResponseFunctionCall (line 67) | class ChatCompletionResponseFunctionCall(TypedDict):
  class ChatCompletionResponseMessage (line 72) | class ChatCompletionResponseMessage(TypedDict):
  class ChatCompletionFunction (line 79) | class ChatCompletionFunction(TypedDict):
  class ChatCompletionTopLogprobToken (line 85) | class ChatCompletionTopLogprobToken(TypedDict):
  class ChatCompletionLogprobToken (line 91) | class ChatCompletionLogprobToken(ChatCompletionTopLogprobToken):
  class ChatCompletionLogprobs (line 98) | class ChatCompletionLogprobs(TypedDict):
  class ChatCompletionResponseChoice (line 103) | class ChatCompletionResponseChoice(TypedDict):
  class CreateChatCompletionResponse (line 110) | class CreateChatCompletionResponse(TypedDict):
  class ChatCompletionMessageToolCallChunkFunction (line 119) | class ChatCompletionMessageToolCallChunkFunction(TypedDict):
  class ChatCompletionMessageToolCallChunk (line 124) | class ChatCompletionMessageToolCallChunk(TypedDict):
  class ChatCompletionStreamResponseDeltaEmpty (line 131) | class ChatCompletionStreamResponseDeltaEmpty(TypedDict):
  class ChatCompletionStreamResponseDeltaFunctionCall (line 135) | class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict):
  class ChatCompletionStreamResponseDelta (line 140) | class ChatCompletionStreamResponseDelta(TypedDict):
  class ChatCompletionStreamResponseChoice (line 149) | class ChatCompletionStreamResponseChoice(TypedDict):
  class CreateChatCompletionStreamResponse (line 158) | class CreateChatCompletionStreamResponse(TypedDict):
  class ChatCompletionFunctions (line 166) | class ChatCompletionFunctions(TypedDict):
  class ChatCompletionFunctionCallOption (line 172) | class ChatCompletionFunctionCallOption(TypedDict):
  class ChatCompletionRequestResponseFormat (line 176) | class ChatCompletionRequestResponseFormat(TypedDict):
  class ChatCompletionRequestMessageContentPartText (line 183) | class ChatCompletionRequestMessageContentPartText(TypedDict):
  class ChatCompletionRequestMessageContentPartImageImageUrl (line 188) | class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict):
  class ChatCompletionRequestMessageContentPartImage (line 193) | class ChatCompletionRequestMessageContentPartImage(TypedDict):
  class ChatCompletionRequestSystemMessage (line 204) | class ChatCompletionRequestSystemMessage(TypedDict):
  class ChatCompletionRequestUserMessage (line 209) | class ChatCompletionRequestUserMessage(TypedDict):
  class ChatCompletionMessageToolCallFunction (line 214) | class ChatCompletionMessageToolCallFunction(TypedDict):
  class ChatCompletionMessageToolCall (line 219) | class ChatCompletionMessageToolCall(TypedDict):
  class ChatCompletionRequestAssistantMessageFunctionCall (line 228) | class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict):
  class ChatCompletionRequestAssistantMessage (line 233) | class ChatCompletionRequestAssistantMessage(TypedDict):
  class ChatCompletionRequestToolMessage (line 242) | class ChatCompletionRequestToolMessage(TypedDict):
  class ChatCompletionRequestFunctionMessage (line 248) | class ChatCompletionRequestFunctionMessage(TypedDict):
  class ChatCompletionRequestFunctionCallOption (line 264) | class ChatCompletionRequestFunctionCallOption(TypedDict):
  class ChatCompletionToolFunction (line 275) | class ChatCompletionToolFunction(TypedDict):
  class ChatCompletionTool (line 281) | class ChatCompletionTool(TypedDict):
  class ChatCompletionNamedToolChoiceFunction (line 286) | class ChatCompletionNamedToolChoiceFunction(TypedDict):
  class ChatCompletionNamedToolChoice (line 290) | class ChatCompletionNamedToolChoice(TypedDict):

FILE: llama_cpp/llava_cpp.py
  class llava_image_embed (line 60) | class llava_image_embed(Structure):
  function llava_validate_embed_size (line 74) | def llava_validate_embed_size(
  function llava_image_embed_make_with_bytes (line 87) | def llava_image_embed_make_with_bytes(
  function llava_image_embed_make_with_filename (line 104) | def llava_image_embed_make_with_filename(
  function llava_image_embed_free (line 113) | def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /):
  function llava_eval_image_embed (line 129) | def llava_eval_image_embed(
  function clip_model_load (line 147) | def clip_model_load(
  function clip_free (line 156) | def clip_free(ctx: clip_ctx_p, /):

FILE: llama_cpp/mtmd_cpp.py
  class mtmd_context_params (line 75) | class mtmd_context_params(Structure):
  class mtmd_input_text (line 85) | class mtmd_input_text(Structure):
  function mtmd_default_marker (line 98) | def mtmd_default_marker() -> bytes:
  function mtmd_context_params_default (line 103) | def mtmd_context_params_default() -> mtmd_context_params:
  function mtmd_init_from_file (line 114) | def mtmd_init_from_file(
  function mtmd_free (line 124) | def mtmd_free(ctx: mtmd_context_p, /):
  function mtmd_support_vision (line 129) | def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool:
  function mtmd_bitmap_init (line 138) | def mtmd_bitmap_init(
  function mtmd_bitmap_free (line 148) | def mtmd_bitmap_free(bitmap: mtmd_bitmap_p, /):
  function mtmd_input_chunks_init (line 153) | def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]:
  function mtmd_input_chunks_free (line 158) | def mtmd_input_chunks_free(chunks: mtmd_input_chunks_p, /):
  function mtmd_input_chunks_size (line 163) | def mtmd_input_chunks_size(chunks: mtmd_input_chunks_p, /) -> int:
  function mtmd_input_chunks_get (line 172) | def mtmd_input_chunks_get(
  function mtmd_tokenize (line 193) | def mtmd_tokenize(
  function mtmd_input_chunk_get_n_tokens (line 205) | def mtmd_input_chunk_get_n_tokens(chunk: mtmd_input_chunk_p, /) -> int:
  function mtmd_input_chunk_get_type (line 210) | def mtmd_input_chunk_get_type(chunk: mtmd_input_chunk_p, /) -> int:
  function mtmd_input_chunk_get_tokens_text (line 219) | def mtmd_input_chunk_get_tokens_text(
  function mtmd_helper_bitmap_init_from_buf (line 234) | def mtmd_helper_bitmap_init_from_buf(
  function mtmd_helper_get_n_tokens (line 244) | def mtmd_helper_get_n_tokens(chunks: mtmd_input_chunks_p, /) -> int:
  function mtmd_helper_eval_chunk_single (line 269) | def mtmd_helper_eval_chunk_single(

FILE: llama_cpp/server/__main__.py
  function main (line 43) | def main():

FILE: llama_cpp/server/app.py
  function set_server_settings (line 53) | def set_server_settings(server_settings: ServerSettings):
  function get_server_settings (line 58) | def get_server_settings():
  function set_llama_proxy (line 68) | def set_llama_proxy(model_settings: List[ModelSettings]):
  function get_llama_proxy (line 73) | async def get_llama_proxy():
  function set_ping_message_factory (line 95) | def set_ping_message_factory(factory: typing.Callable[[], bytes]):
  function create_app (line 100) | def create_app(
  function prepare_request_resources (line 158) | def prepare_request_resources(
  function get_event_publisher (line 191) | async def get_event_publisher(
  function _logit_bias_tokens_to_input_ids (line 225) | def _logit_bias_tokens_to_input_ids(
  function authenticate (line 241) | async def authenticate(
  function create_completion (line 303) | async def create_completion(
  function create_embedding (line 366) | async def create_embedding(
  function create_chat_completion (line 408) | async def create_chat_completion(
  function get_models (line 535) | async def get_models(
  function tokenize (line 561) | async def tokenize(
  function count_query_tokens (line 576) | async def count_query_tokens(
  function detokenize (line 591) | async def detokenize(

FILE: llama_cpp/server/cli.py
  function _get_base_type (line 10) | def _get_base_type(annotation: Type[Any]) -> Type[Any]:
  function _contains_list_type (line 30) | def _contains_list_type(annotation: Type[Any] | None) -> bool:
  function _parse_bool_arg (line 41) | def _parse_bool_arg(arg: str | bytes | bool) -> bool:
  function add_args_from_model (line 58) | def add_args_from_model(parser: argparse.ArgumentParser, model: Type[Bas...
  function parse_model_from_args (line 89) | def parse_model_from_args(model: T, args: argparse.Namespace) -> T:

FILE: llama_cpp/server/errors.py
  class ErrorResponse (line 26) | class ErrorResponse(TypedDict):
  class ErrorResponseFormatters (line 35) | class ErrorResponseFormatters:
    method context_length_exceeded (line 48) | def context_length_exceeded(
    method model_not_found (line 86) | def model_not_found(
  class RouteErrorHandler (line 102) | class RouteErrorHandler(APIRoute):
    method error_message_wrapper (line 125) | def error_message_wrapper(
    method get_route_handler (line 162) | def get_route_handler(

FILE: llama_cpp/server/model.py
  class LlamaProxy (line 14) | class LlamaProxy:
    method __init__ (line 15) | def __init__(self, models: List[ModelSettings]) -> None:
    method __call__ (line 36) | def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama:
    method __getitem__ (line 56) | def __getitem__(self, model: str):
    method __setitem__ (line 59) | def __setitem__(self, model: str, settings: Union[ModelSettings, str, ...
    method __iter__ (line 64) | def __iter__(self):
    method free (line 68) | def free(self):
    method load_llama_from_model_settings (line 74) | def load_llama_from_model_settings(settings: ModelSettings) -> llama_c...

FILE: llama_cpp/server/settings.py
  class ModelSettings (line 17) | class ModelSettings(BaseSettings):
    method set_dynamic_defaults (line 191) | def set_dynamic_defaults(self) -> Self:
  class ServerSettings (line 202) | class ServerSettings(BaseSettings):
  class Settings (line 233) | class Settings(ServerSettings, ModelSettings):
  class ConfigFileSettings (line 237) | class ConfigFileSettings(ServerSettings):

FILE: llama_cpp/server/types.py
  class CreateCompletionRequest (line 109) | class CreateCompletionRequest(BaseModel):
  class CreateEmbeddingRequest (line 167) | class CreateEmbeddingRequest(BaseModel):
  class ChatCompletionRequestMessage (line 183) | class ChatCompletionRequestMessage(BaseModel):
  class CreateChatCompletionRequest (line 192) | class CreateChatCompletionRequest(BaseModel):
  class ModelData (line 271) | class ModelData(TypedDict):
  class ModelList (line 278) | class ModelList(TypedDict):
  class TokenizeInputRequest (line 283) | class TokenizeInputRequest(BaseModel):
  class TokenizeInputResponse (line 292) | class TokenizeInputResponse(BaseModel):
  class TokenizeInputCountResponse (line 298) | class TokenizeInputCountResponse(BaseModel):
  class DetokenizeInputRequest (line 304) | class DetokenizeInputRequest(BaseModel):
  class DetokenizeInputResponse (line 311) | class DetokenizeInputResponse(BaseModel):

FILE: tests/test_llama.py
  function test_llama_cpp_version (line 18) | def test_llama_cpp_version():
  function test_llama_cpp_tokenization (line 22) | def test_llama_cpp_tokenization():
  function llama_cpp_model_path (line 60) | def llama_cpp_model_path():
  function test_real_model (line 67) | def test_real_model(llama_cpp_model_path):
  function test_real_llama (line 117) | def test_real_llama(llama_cpp_model_path):
  function test_real_llama_embeddings (line 221) | def test_real_llama_embeddings(llama_cpp_model_path):

FILE: tests/test_llama_chat_format.py
  function test_mistral_instruct (line 13) | def test_mistral_instruct():
  function test_hf_tokenizer_config_str_to_chat_formatter (line 78) | def test_hf_tokenizer_config_str_to_chat_formatter():

FILE: tests/test_llama_grammar.py
  function test_grammar_from_string (line 11) | def test_grammar_from_string():
  function test_composed_pydantic_grammar (line 18) | def test_composed_pydantic_grammar():
  function test_grammar_anyof (line 55) | def test_grammar_anyof():

FILE: tests/test_llama_speculative.py
  function test_find_candidate_pred_tokens (line 5) | def test_find_candidate_pred_tokens():