SYMBOL INDEX (750 symbols across 38 files) FILE: docker/open_llama/hug_model.py function make_request (line 7) | def make_request(url, params=None): function check_magic_and_version (line 16) | def check_magic_and_version(filename): function download_file (line 29) | def download_file(url, destination): function get_user_choice (line 51) | def get_user_choice(model_list): function main (line 73) | def main(): FILE: examples/batch-processing/server.py function create_chat_completions (line 30) | def create_chat_completions(): FILE: examples/gradio_chat/local.py function predict (line 18) | def predict(message, history): FILE: examples/gradio_chat/server.py function predict (line 10) | def predict(message, history): FILE: examples/high_level_api/langchain_custom_llm.py class LlamaLLM (line 9) | class LlamaLLM(LLM): method _llm_type (line 14) | def _llm_type(self) -> str: method __init__ (line 17) | def __init__(self, model_path: str, **kwargs: Any): method _call (line 22) | def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: method _identifying_params (line 27) | def _identifying_params(self) -> Mapping[str, Any]: FILE: examples/low_level_api/Chat.py function env_or_def (line 7) | def env_or_def(env, default): FILE: examples/low_level_api/Miku.py function env_or_def (line 7) | def env_or_def(env, default): FILE: examples/low_level_api/ReasonAct.py function env_or_def (line 7) | def env_or_def(env, default): FILE: examples/low_level_api/common.py class GptParams (line 12) | class GptParams: function gpt_params_parse (line 77) | def gpt_params_parse(argv=None): function gpt_random_prompt (line 389) | def gpt_random_prompt(rng): FILE: examples/low_level_api/low_level_api_chat_cpp.py class LLaMAInteract (line 25) | class LLaMAInteract: method __init__ (line 26) | def __init__(self, params: GptParams) -> None: method _tokenize (line 314) | def _tokenize(self, prompt, bos=True): method set_color (line 327) | def set_color(self, c): method use_antiprompt (line 331) | def use_antiprompt(self): method generate (line 335) | def generate(self): method __enter__ (line 625) | def __enter__(self): method __exit__ (line 628) | def __exit__(self, type, value, tb): method exit (line 631) | def exit(self): method token_to_str (line 635) | def token_to_str(self, token_id: int) -> bytes: method past (line 645) | def past(self): method input (line 650) | def input(self, prompt: str): method output (line 661) | def output(self): method read_input (line 689) | def read_input(self): method interact (line 696) | def interact(self): FILE: examples/low_level_api/quantize.py function main (line 6) | def main(args): FILE: examples/low_level_api/util.py class IterSearch (line 13) | class IterSearch: method __init__ (line 14) | def __init__(self, pattern): method __call__ (line 18) | def __call__(self, char): class Circle (line 31) | class Circle: method __init__ (line 32) | def __init__(self, size, default=0): method append (line 38) | def append(self, elem): method __getitem__ (line 46) | def __getitem__(self, val): FILE: examples/ray/llm.py class LlamaDeployment (line 9) | class LlamaDeployment: method __init__ (line 10) | def __init__(self, model_path: str): method __call__ (line 13) | async def __call__(self, http_request: Request) -> Dict: function llm_builder (line 20) | def llm_builder(args: Dict[str, str]) -> Application: FILE: llama_cpp/_ctypes_extensions.py function load_shared_library (line 23) | def load_shared_library(lib_base_name: str, base_path: pathlib.Path): class CtypesRef (line 93) | class CtypesRef(Generic[CtypesCData]): function ctypes_function_for_shared_library (line 105) | def ctypes_function_for_shared_library(lib: ctypes.CDLL): function _byref (line 126) | def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[... FILE: llama_cpp/_internals.py class LlamaModel (line 31) | class LlamaModel: method __init__ (line 35) | def __init__( method close (line 77) | def close(self): method __del__ (line 85) | def __del__(self): method vocab_type (line 88) | def vocab_type(self) -> int: method n_vocab (line 91) | def n_vocab(self) -> int: method n_ctx_train (line 94) | def n_ctx_train(self) -> int: method n_embd (line 97) | def n_embd(self) -> int: method rope_freq_scale_train (line 100) | def rope_freq_scale_train(self) -> float: method desc (line 103) | def desc(self) -> str: method size (line 108) | def size(self) -> int: method n_params (line 111) | def n_params(self) -> int: method get_tensor (line 114) | def get_tensor(self, name: str) -> ctypes.c_void_p: method token_get_text (line 119) | def token_get_text(self, token: int) -> str: method token_get_score (line 122) | def token_get_score(self, token: int) -> float: method token_get_attr (line 125) | def token_get_attr(self, token: int) -> int: method token_bos (line 130) | def token_bos(self) -> int: method token_eos (line 133) | def token_eos(self) -> int: method token_cls (line 136) | def token_cls(self) -> int: method token_sep (line 139) | def token_sep(self) -> int: method token_nl (line 142) | def token_nl(self) -> int: method token_prefix (line 145) | def token_prefix(self) -> int: method token_middle (line 148) | def token_middle(self) -> int: method token_suffix (line 151) | def token_suffix(self) -> int: method token_eot (line 154) | def token_eot(self) -> int: method add_bos_token (line 157) | def add_bos_token(self) -> bool: method add_eos_token (line 160) | def add_eos_token(self) -> bool: method tokenize (line 165) | def tokenize(self, text: bytes, add_bos: bool, special: bool): method token_to_piece (line 183) | def token_to_piece(self, token: int, special: bool = False) -> bytes: method detokenize (line 188) | def detokenize(self, tokens: List[int], special: bool = False) -> bytes: method metadata (line 207) | def metadata(self) -> Dict[str, str]: method default_params (line 239) | def default_params(): class LlamaContext (line 244) | class LlamaContext: method __init__ (line 248) | def __init__( method close (line 277) | def close(self): method __del__ (line 280) | def __del__(self): method n_ctx (line 283) | def n_ctx(self) -> int: method pooling_type (line 286) | def pooling_type(self) -> int: method kv_cache_clear (line 289) | def kv_cache_clear(self): method kv_cache_seq_rm (line 293) | def kv_cache_seq_rm(self, seq_id: int, p0: int, p1: int): method kv_cache_seq_cp (line 298) | def kv_cache_seq_cp(self, seq_id_src: int, seq_id_dst: int, p0: int, p... method kv_cache_seq_keep (line 302) | def kv_cache_seq_keep(self, seq_id: int): method kv_cache_seq_shift (line 306) | def kv_cache_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int): method get_state_size (line 310) | def get_state_size(self) -> int: method decode (line 321) | def decode(self, batch: LlamaBatch): method encode (line 329) | def encode(self, batch: LlamaBatch): method set_n_threads (line 337) | def set_n_threads(self, n_threads: int, n_threads_batch: int): method get_logits (line 340) | def get_logits(self): method get_logits_ith (line 343) | def get_logits_ith(self, i: int): method get_embeddings (line 346) | def get_embeddings(self): method get_embeddings_ith (line 349) | def get_embeddings_ith(self, i: int): method get_embeddings_seq (line 352) | def get_embeddings_seq(self, seq_id: int): method set_rng_seed (line 357) | def set_rng_seed(self, seed: int): method sample_repetition_penalties (line 360) | def sample_repetition_penalties( method sample_softmax (line 371) | def sample_softmax(self, candidates: "_LlamaTokenDataArray"): method sample_top_k (line 374) | def sample_top_k(self, candidates: "_LlamaTokenDataArray", k: int, min... method sample_top_p (line 377) | def sample_top_p(self, candidates: "_LlamaTokenDataArray", p: float, m... method sample_min_p (line 380) | def sample_min_p(self, candidates: "_LlamaTokenDataArray", p: float, m... method sample_typical (line 383) | def sample_typical( method sample_temp (line 388) | def sample_temp(self, candidates: "_LlamaTokenDataArray", temp: float): method sample_grammar (line 391) | def sample_grammar(self, candidates: "_LlamaTokenDataArray", grammar: ... method sample_token_mirostat (line 394) | def sample_token_mirostat( method sample_token_mirostat_v2 (line 404) | def sample_token_mirostat_v2( method sample_token_greedy (line 413) | def sample_token_greedy(self, candidates: "_LlamaTokenDataArray") -> int: method sample_token (line 416) | def sample_token(self, candidates: "_LlamaTokenDataArray") -> int: method grammar_accept_token (line 420) | def grammar_accept_token(self, grammar: LlamaGrammar, token: int): method reset_timings (line 423) | def reset_timings(self): method print_timings (line 426) | def print_timings(self): method default_params (line 431) | def default_params(): class LlamaBatch (line 436) | class LlamaBatch: method __init__ (line 437) | def __init__( method close (line 462) | def close(self): method __del__ (line 465) | def __del__(self): method n_tokens (line 468) | def n_tokens(self) -> int: method reset (line 471) | def reset(self): method set_batch (line 474) | def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool): method add_sequence (line 485) | def add_sequence(self, batch: Sequence[int], seq_id: int, logits_all: ... class LlamaTokenDataArray (line 499) | class LlamaTokenDataArray: method __init__ (line 500) | def __init__(self, *, n_vocab: int): method copy_logits (line 517) | def copy_logits(self, logits: npt.NDArray[np.single]): function normalize_embedding (line 528) | def normalize_embedding(embedding): class LlamaSamplingParams (line 539) | class LlamaSamplingParams: class LlamaSamplingContext (line 566) | class LlamaSamplingContext: method reset (line 574) | def reset(self): method cp (line 580) | def cp(self): method last (line 589) | def last(self) -> Optional[int]: method prev_str (line 595) | def prev_str(self, ctx_main: LlamaContext, n: int) -> str: method sample (line 598) | def sample( method accept (line 607) | def accept(self, ctx_main: LlamaContext, id: int, apply_grammar: bool): class CustomSampler (line 611) | class CustomSampler: method __init__ (line 612) | def __init__( method get_sampler (line 640) | def get_sampler(self) -> llama_cpp.llama_sampler_p: class LlamaSampler (line 644) | class LlamaSampler: method __init__ (line 645) | def __init__(self): method close (line 661) | def close(self): method __del__ (line 664) | def __del__(self): method add_greedy (line 667) | def add_greedy(self): method add_dist (line 671) | def add_dist(self, seed: int): method add_softmax (line 675) | def add_softmax(self): method add_top_k (line 679) | def add_top_k(self, k: int): method add_top_p (line 683) | def add_top_p(self, p: float, min_keep: int = 1): method add_min_p (line 687) | def add_min_p(self, p: float, min_keep: int = 1): method add_typical (line 691) | def add_typical(self, p: float, min_keep: int = 1): method add_temp (line 695) | def add_temp(self, temp: float): method add_temp_ext (line 699) | def add_temp_ext(self, t: float, delta: float, exponent: float): method add_xtc (line 703) | def add_xtc(self, p: float, t: float, min_keep: int, seed: int): method add_top_n_sigma (line 707) | def add_top_n_sigma(self, n: float): method add_mirostat (line 711) | def add_mirostat(self, n_vocab: int, seed: int, tau: float, eta: float... method add_mirostat_v2 (line 715) | def add_mirostat_v2(self, seed: int, tau: float, eta: float): method add_grammar (line 719) | def add_grammar(self, model: LlamaModel, grammar: LlamaGrammar): method add_grammar_lazy_patterns (line 725) | def add_grammar_lazy_patterns( method add_penalties (line 751) | def add_penalties( method add_dry (line 766) | def add_dry( method add_logit_bias (line 793) | def add_logit_bias( method add_infill (line 811) | def add_infill(self, model: LlamaModel): method add_custom (line 815) | def add_custom( method get_seed (line 826) | def get_seed(self) -> int: method sample (line 829) | def sample(self, ctx: LlamaContext, idx: int = -1) -> int: method accept (line 832) | def accept(self, token: int): method reset (line 835) | def reset(self): method clone (line 838) | def clone(self): FILE: llama_cpp/_logger.py function llama_log_callback (line 30) | def llama_log_callback( function set_verbose (line 46) | def set_verbose(verbose: bool): FILE: llama_cpp/_utils.py class suppress_stdout_stderr (line 14) | class suppress_stdout_stderr(object): method __init__ (line 20) | def __init__(self, disable: bool = True): method __enter__ (line 24) | def __enter__(self): method __exit__ (line 44) | def __exit__(self, *_): class MetaSingleton (line 59) | class MetaSingleton(type): method __call__ (line 66) | def __call__(cls, *args: Any, **kwargs: Any) -> Any: class Singleton (line 72) | class Singleton(object, metaclass=MetaSingleton): method __init__ (line 77) | def __init__(self): FILE: llama_cpp/llama.py class Llama (line 55) | class Llama: method __init__ (line 60) | def __init__( method ctx (line 550) | def ctx(self) -> llama_cpp.llama_context_p: method model (line 554) | def model(self) -> llama_cpp.llama_model_p: method _input_ids (line 558) | def _input_ids(self) -> npt.NDArray[np.intc]: method _scores (line 562) | def _scores(self) -> npt.NDArray[np.single]: method eval_tokens (line 566) | def eval_tokens(self) -> Deque[int]: method eval_logits (line 570) | def eval_logits(self) -> Deque[List[float]]: method tokenize (line 576) | def tokenize( method detokenize (line 594) | def detokenize( method set_cache (line 614) | def set_cache(self, cache: Optional[BaseLlamaCache]): method set_seed (line 622) | def set_seed(self, seed: int): method reset (line 630) | def reset(self): method eval (line 634) | def eval(self, tokens: Sequence[int]): method _init_sampler (line 671) | def _init_sampler( method sample (line 760) | def sample( method generate (line 822) | def generate( method create_embedding (line 962) | def create_embedding( method embed (line 1002) | def embed( method _create_completion (line 1123) | def _create_completion( method create_completion (line 1743) | def create_completion( method __call__ (line 1840) | def __call__( method create_chat_completion (line 1932) | def create_chat_completion( method create_chat_completion_openai_v1 (line 2035) | def create_chat_completion_openai_v1( method __getstate__ (line 2068) | def __getstate__(self): method __setstate__ (line 2124) | def __setstate__(self, state): method save_state (line 2127) | def save_state(self) -> LlamaState: method load_state (line 2157) | def load_state(self, state: LlamaState) -> None: method n_ctx (line 2172) | def n_ctx(self) -> int: method n_embd (line 2176) | def n_embd(self) -> int: method n_vocab (line 2180) | def n_vocab(self) -> int: method tokenizer (line 2184) | def tokenizer(self) -> LlamaTokenizer: method token_eos (line 2188) | def token_eos(self) -> int: method token_bos (line 2192) | def token_bos(self) -> int: method token_nl (line 2196) | def token_nl(self) -> int: method pooling_type (line 2200) | def pooling_type(self) -> str: method close (line 2204) | def close(self) -> None: method __del__ (line 2208) | def __del__(self) -> None: method logits_to_logprobs (line 2212) | def logits_to_logprobs( method longest_token_prefix (line 2230) | def longest_token_prefix(a: Sequence[int], b: Sequence[int]): method from_pretrained (line 2240) | def from_pretrained( class LlamaState (line 2367) | class LlamaState: method __init__ (line 2368) | def __init__( class LogitsProcessorList (line 2390) | class LogitsProcessorList(List[LogitsProcessor]): method __call__ (line 2391) | def __call__( class StoppingCriteriaList (line 2402) | class StoppingCriteriaList(List[StoppingCriteria]): method __call__ (line 2403) | def __call__( class MinTokensLogitsProcessor (line 2409) | class MinTokensLogitsProcessor(LogitsProcessor): method __init__ (line 2410) | def __init__(self, min_tokens: int, token_eos: int): method __call__ (line 2415) | def __call__( FILE: llama_cpp/llama_cache.py class BaseLlamaCache (line 17) | class BaseLlamaCache(ABC): method __init__ (line 20) | def __init__(self, capacity_bytes: int = (2 << 30)): method cache_size (line 25) | def cache_size(self) -> int: method _find_longest_prefix_key (line 28) | def _find_longest_prefix_key( method __getitem__ (line 35) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta... method __contains__ (line 39) | def __contains__(self, key: Sequence[int]) -> bool: method __setitem__ (line 43) | def __setitem__( class LlamaRAMCache (line 49) | class LlamaRAMCache(BaseLlamaCache): method __init__ (line 52) | def __init__(self, capacity_bytes: int = (2 << 30)): method cache_size (line 60) | def cache_size(self): method _find_longest_prefix_key (line 63) | def _find_longest_prefix_key( method __getitem__ (line 79) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta... method __contains__ (line 88) | def __contains__(self, key: Sequence[int]) -> bool: method __setitem__ (line 91) | def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.Llam... class LlamaDiskCache (line 104) | class LlamaDiskCache(BaseLlamaCache): method __init__ (line 107) | def __init__( method cache_size (line 114) | def cache_size(self): method _find_longest_prefix_key (line 117) | def _find_longest_prefix_key( method __getitem__ (line 130) | def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaSta... method __contains__ (line 141) | def __contains__(self, key: Sequence[int]) -> bool: method __setitem__ (line 144) | def __setitem__(self, key: Sequence[int], value: "llama_cpp.llama.Llam... FILE: llama_cpp/llama_chat_format.py class LlamaChatCompletionHandler (line 61) | class LlamaChatCompletionHandler(Protocol): method __call__ (line 68) | def __call__( class LlamaChatCompletionHandlerNotFoundException (line 112) | class LlamaChatCompletionHandlerNotFoundException(Exception): class LlamaChatCompletionHandlerRegistry (line 116) | class LlamaChatCompletionHandlerRegistry(Singleton): method register_chat_completion_handler (line 119) | def register_chat_completion_handler( method unregister_chat_handler (line 131) | def unregister_chat_handler(self, name: str): method get_chat_completion_handler_by_name (line 137) | def get_chat_completion_handler_by_name( function get_chat_completion_handler (line 149) | def get_chat_completion_handler(name: str) -> LlamaChatCompletionHandler: function register_chat_completion_handler (line 155) | def register_chat_completion_handler(name: str): class ChatFormatterResponse (line 167) | class ChatFormatterResponse: class ChatFormatter (line 180) | class ChatFormatter(Protocol): method __call__ (line 186) | def __call__( class Jinja2ChatFormatter (line 194) | class Jinja2ChatFormatter(ChatFormatter): method __init__ (line 195) | def __init__( method strftime_now (line 219) | def strftime_now(f: str) -> str: method __call__ (line 222) | def __call__( method to_chat_handler (line 265) | def to_chat_handler(self) -> LlamaChatCompletionHandler: function _convert_text_completion_logprobs_to_chat (line 269) | def _convert_text_completion_logprobs_to_chat( function _convert_text_completion_to_chat (line 294) | def _convert_text_completion_to_chat( function _convert_text_completion_chunks_to_chat (line 318) | def _convert_text_completion_chunks_to_chat( function _convert_completion_to_chat (line 361) | def _convert_completion_to_chat( function _convert_completion_to_chat_function (line 378) | def _convert_completion_to_chat_function( function chat_formatter_to_chat_completion_handler (line 555) | def chat_formatter_to_chat_completion_handler( function hf_autotokenizer_to_chat_formatter (line 704) | def hf_autotokenizer_to_chat_formatter( function hf_autotokenizer_to_chat_completion_handler (line 729) | def hf_autotokenizer_to_chat_completion_handler( function hf_tokenizer_config_to_chat_formatter (line 736) | def hf_tokenizer_config_to_chat_formatter( function hf_tokenizer_config_to_chat_completion_handler (line 784) | def hf_tokenizer_config_to_chat_completion_handler( function guess_chat_format_from_gguf_metadata (line 794) | def guess_chat_format_from_gguf_metadata(metadata: Dict[str, str]) -> Op... function _get_system_message (line 817) | def _get_system_message( function _map_roles (line 827) | def _map_roles( function _format_llama2 (line 843) | def _format_llama2( function _format_add_colon_single (line 860) | def _format_add_colon_single( function _format_add_colon_two (line 873) | def _format_add_colon_two( function _format_no_colon_single (line 887) | def _format_no_colon_single( function _format_add_colon_space_single (line 900) | def _format_add_colon_space_single( function _format_chatml (line 913) | def _format_chatml( function _format_chatglm3 (line 926) | def _format_chatglm3( function _grammar_for_json (line 941) | def _grammar_for_json(verbose: bool = False): function _grammar_for_json_schema (line 947) | def _grammar_for_json_schema( function _grammar_for_response_format (line 959) | def _grammar_for_response_format( function register_chat_format (line 977) | def register_chat_format(name: str): function format_llama2 (line 991) | def format_llama2( function format_llama3 (line 1008) | def format_llama3( function format_alpaca (line 1025) | def format_alpaca( function format_qwen (line 1039) | def format_qwen( function format (line 1056) | def format( function format_oasst_llama (line 1072) | def format_oasst_llama( function format_baichuan2 (line 1088) | def format_baichuan2( function format_baichuan (line 1104) | def format_baichuan( function format_openbuddy (line 1120) | def format_openbuddy( function format_redpajama_incite (line 1142) | def format_redpajama_incite( function format_snoozy (line 1158) | def format_snoozy( function format_phind (line 1180) | def format_phind( function format_intel (line 1194) | def format_intel( function format_open_orca (line 1208) | def format_open_orca( function format_mistrallite (line 1235) | def format_mistrallite( function format_zephyr (line 1251) | def format_zephyr( function format_pygmalion (line 1268) | def format_pygmalion( function format_chatml (line 1284) | def format_chatml( function format_mistral_instruct (line 1301) | def format_mistral_instruct( function format_chatglm3 (line 1322) | def format_chatglm3( function format_openchat (line 1339) | def format_openchat( function format_saiga (line 1359) | def format_saiga( function format_gemma (line 1381) | def format_gemma( function functionary_chat_handler (line 1402) | def functionary_chat_handler( function functionary_v1_v2_chat_handler (line 1761) | def functionary_v1_v2_chat_handler( class Llava15ChatHandler (line 2659) | class Llava15ChatHandler: method __init__ (line 2699) | def __init__(self, clip_model_path: str, verbose: bool = True): method _init_mtmd_context (line 2711) | def _init_mtmd_context(self, llama_model: llama.Llama): method load_image (line 2746) | def load_image(self, image_url: str) -> bytes: method _create_bitmap_from_bytes (line 2749) | def _create_bitmap_from_bytes(self, image_bytes: bytes): method __call__ (line 2767) | def __call__( method _load_image (line 3031) | def _load_image(image_url: str) -> bytes: method get_image_urls (line 3044) | def get_image_urls(messages: List[llama_types.ChatCompletionRequestMes... method split_text_on_image_urls (line 3063) | def split_text_on_image_urls(text: str, image_urls: List[str]): method from_pretrained (line 3088) | def from_pretrained( class ObsidianChatHandler (line 3172) | class ObsidianChatHandler(Llava15ChatHandler): class MoondreamChatHandler (line 3228) | class MoondreamChatHandler(Llava15ChatHandler): class Llava16ChatHandler (line 3270) | class Llava16ChatHandler(Llava15ChatHandler): class NanoLlavaChatHandler (line 3318) | class NanoLlavaChatHandler(Llava15ChatHandler): class Llama3VisionAlphaChatHandler (line 3373) | class Llama3VisionAlphaChatHandler(Llava15ChatHandler): class MiniCPMv26ChatHandler (line 3426) | class MiniCPMv26ChatHandler(Llava15ChatHandler): class Qwen25VLChatHandler (line 3464) | class Qwen25VLChatHandler(Llava15ChatHandler): method __call__ (line 3497) | def __call__(self, **kwargs): function chatml_function_calling (line 3523) | def chatml_function_calling( FILE: llama_cpp/llama_cpp.py class llama_token_data (line 481) | class llama_token_data(ctypes.Structure): class llama_token_data_array (line 512) | class llama_token_data_array(ctypes.Structure): class llama_batch (line 569) | class llama_batch(ctypes.Structure): class llama_model_kv_override_value (line 630) | class llama_model_kv_override_value(ctypes.Union): class llama_model_kv_override (line 645) | class llama_model_kv_override(ctypes.Structure): class llama_model_params (line 698) | class llama_model_params(ctypes.Structure): class llama_context_params (line 800) | class llama_context_params(ctypes.Structure): class llama_model_quantize_params (line 934) | class llama_model_quantize_params(ctypes.Structure): class llama_logit_bias (line 989) | class llama_logit_bias(ctypes.Structure): class llama_sampler_chain_params (line 1012) | class llama_sampler_chain_params(ctypes.Structure): class llama_chat_message (line 1031) | class llama_chat_message(ctypes.Structure): function llama_model_default_params (line 1051) | def llama_model_default_params() -> llama_model_params: function llama_context_default_params (line 1062) | def llama_context_default_params() -> llama_context_params: function llama_sampler_chain_default_params (line 1073) | def llama_sampler_chain_default_params() -> llama_sampler_chain_params: function llama_model_quantize_default_params (line 1084) | def llama_model_quantize_default_params() -> llama_model_quantize_params: function llama_backend_init (line 1098) | def llama_backend_init(): function llama_backend_free (line 1128) | def llama_backend_free(): function llama_numa_init (line 1140) | def llama_numa_init(numa: int, /): function llama_load_model_from_file (line 1165) | def llama_load_model_from_file( function llama_model_load_from_file (line 1182) | def llama_model_load_from_file( function llama_model_load_from_splits (line 1204) | def llama_model_load_from_splits( function llama_model_save_to_file (line 1221) | def llama_model_save_to_file(model: llama_model_p, path_model: bytes, /): function llama_free_model (line 1233) | def llama_free_model(model: llama_model_p, /): function llama_model_free (line 1243) | def llama_model_free(model: llama_model_p, /): function llama_init_from_model (line 1255) | def llama_init_from_model( function llama_new_context_with_model (line 1270) | def llama_new_context_with_model( function llama_free (line 1283) | def llama_free(ctx: llama_context_p, /): function llama_time_us (line 1294) | def llama_time_us() -> int: function llama_max_devices (line 1300) | def llama_max_devices() -> int: function llama_max_parallel_sequences (line 1306) | def llama_max_parallel_sequences() -> int: function llama_supports_mmap (line 1312) | def llama_supports_mmap() -> bool: function llama_supports_mlock (line 1318) | def llama_supports_mlock() -> bool: function llama_supports_gpu_offload (line 1324) | def llama_supports_gpu_offload() -> bool: function llama_supports_rpc (line 1330) | def llama_supports_rpc() -> bool: function llama_n_ctx (line 1336) | def llama_n_ctx(ctx: llama_context_p, /) -> int: function llama_n_batch (line 1342) | def llama_n_batch(ctx: llama_context_p, /) -> int: function llama_n_ubatch (line 1348) | def llama_n_ubatch(ctx: llama_context_p, /) -> int: function llama_n_seq_max (line 1354) | def llama_n_seq_max(ctx: llama_context_p, /) -> int: function llama_n_ctx_train (line 1360) | def llama_n_ctx_train(model: llama_model_p, /) -> int: function llama_n_embd (line 1366) | def llama_n_embd(model: llama_model_p, /) -> int: function llama_n_layer (line 1372) | def llama_n_layer(model: llama_model_p, /) -> int: function llama_n_head (line 1378) | def llama_n_head(model: llama_model_p, /) -> int: function llama_n_vocab (line 1384) | def llama_n_vocab(model: llama_vocab_p, /) -> int: function llama_get_model (line 1390) | def llama_get_model(ctx: llama_context_p, /) -> Optional[llama_model_p]: function llama_get_memory (line 1396) | def llama_get_memory(ctx: llama_context_p, /) -> Optional[llama_memory_t]: function llama_pooling_type (line 1403) | def llama_pooling_type(ctx: llama_context_p, /) -> int: function llama_get_kv_self (line 1413) | def llama_get_kv_self(ctx: llama_context_p, /) -> Optional[llama_kv_cach... function llama_model_get_vocab (line 1420) | def llama_model_get_vocab(model: llama_model_p, /) -> Optional[llama_voc... function llama_model_rope_type (line 1426) | def llama_model_rope_type(model: llama_model_p, /) -> int: function llama_model_n_ctx_train (line 1432) | def llama_model_n_ctx_train(model: llama_model_p, /) -> int: function llama_model_n_embd (line 1438) | def llama_model_n_embd(model: llama_model_p, /) -> int: function llama_model_n_layer (line 1444) | def llama_model_n_layer(model: llama_model_p, /) -> int: function llama_model_n_head (line 1450) | def llama_model_n_head(model: llama_model_p, /) -> int: function llama_model_n_head_kv (line 1456) | def llama_model_n_head_kv(model: llama_model_p, /) -> int: function llama_model_n_swa (line 1462) | def llama_model_n_swa(model: llama_model_p, /) -> int: function llama_model_rope_freq_scale_train (line 1469) | def llama_model_rope_freq_scale_train(model: llama_model_p, /) -> float: function llama_model_n_cls_out (line 1477) | def llama_model_n_cls_out(model: llama_model_p, /) -> int: function llama_model_cls_label (line 1485) | def llama_model_cls_label(model: llama_model_p, i: int, /) -> Optional[b... function llama_vocab_type (line 1492) | def llama_vocab_type(vocab: llama_vocab_p, /) -> int: function llama_vocab_n_tokens (line 1498) | def llama_vocab_n_tokens(vocab: llama_vocab_p, /) -> int: function llama_model_meta_val_str (line 1521) | def llama_model_meta_val_str( function llama_model_meta_count (line 1535) | def llama_model_meta_count(model: llama_model_p, /) -> int: function llama_model_meta_key_by_index (line 1552) | def llama_model_meta_key_by_index( function llama_model_meta_val_str_by_index (line 1575) | def llama_model_meta_val_str_by_index( function llama_model_desc (line 1593) | def llama_model_desc( function llama_model_size (line 1606) | def llama_model_size(model: llama_model_p, /) -> int: function llama_model_chat_template (line 1615) | def llama_model_chat_template(model: llama_model_p, name: Optional[bytes... function llama_model_n_params (line 1624) | def llama_model_n_params(model: llama_model_p, /) -> int: function llama_model_has_encoder (line 1632) | def llama_model_has_encoder(model: llama_model_p, /) -> bool: function llama_model_has_decoder (line 1640) | def llama_model_has_decoder(model: llama_model_p, /) -> bool: function llama_model_decoder_start_token (line 1651) | def llama_model_decoder_start_token(model: llama_model_p, /) -> int: function llama_model_is_recurrent (line 1661) | def llama_model_is_recurrent(model: llama_model_p, /) -> bool: function llama_model_is_diffusion (line 1669) | def llama_model_is_diffusion(model: llama_model_p, /) -> bool: function llama_model_quantize (line 1688) | def llama_model_quantize( function llama_adapter_lora_init (line 1711) | def llama_adapter_lora_init( function llama_adapter_lora_free (line 1725) | def llama_adapter_lora_free(adapter: llama_adapter_lora_p, /): function llama_set_adapter_lora (line 1743) | def llama_set_adapter_lora( function llama_rm_adapter_lora (line 1761) | def llama_rm_adapter_lora( function llama_clear_adapter_lora (line 1776) | def llama_clear_adapter_lora(ctx: llama_context_p, /): function llama_apply_adapter_cvec (line 1806) | def llama_apply_adapter_cvec( function llama_memory_clear (line 1838) | def llama_memory_clear(mem: llama_memory_t, data: bool, /): function llama_memory_seq_rm (line 1864) | def llama_memory_seq_rm( function llama_memory_seq_cp (line 1901) | def llama_memory_seq_cp( function llama_memory_seq_keep (line 1922) | def llama_memory_seq_keep(mem: llama_memory_t, seq_id: Union[llama_seq_i... function llama_memory_seq_add (line 1947) | def llama_memory_seq_add( function llama_memory_seq_div (line 1981) | def llama_memory_seq_div( function llama_memory_seq_pos_min (line 2005) | def llama_memory_seq_pos_min( function llama_memory_seq_pos_max (line 2023) | def llama_memory_seq_pos_max( function llama_memory_can_shift (line 2034) | def llama_memory_can_shift(mem: llama_memory_t, /) -> bool: function llama_kv_self_n_tokens (line 2050) | def llama_kv_self_n_tokens(ctx: llama_context_p, /) -> int: function llama_kv_self_used_cells (line 2061) | def llama_kv_self_used_cells(ctx: llama_context_p, /) -> int: function llama_kv_self_clear (line 2073) | def llama_kv_self_clear(ctx: llama_context_p, /): function llama_kv_self_seq_rm (line 2099) | def llama_kv_self_seq_rm( function llama_kv_self_seq_cp (line 2132) | def llama_kv_self_seq_cp( function llama_kv_self_seq_keep (line 2152) | def llama_kv_self_seq_keep(ctx: llama_context_p, seq_id: Union[llama_seq... function llama_kv_self_seq_add (line 2180) | def llama_kv_self_seq_add( function llama_kv_self_seq_div (line 2215) | def llama_kv_self_seq_div( function llama_kv_self_seq_pos_min (line 2238) | def llama_kv_self_seq_pos_min( function llama_kv_self_seq_pos_max (line 2255) | def llama_kv_self_seq_pos_max( function llama_kv_self_defrag (line 2268) | def llama_kv_self_defrag(ctx: llama_context_p, /): function llama_kv_self_can_shift (line 2277) | def llama_kv_self_can_shift(ctx: llama_context_p, /) -> bool: function llama_kv_self_update (line 2286) | def llama_kv_self_update(ctx: llama_context_p, /): function llama_state_get_size (line 2300) | def llama_state_get_size(ctx: llama_context_p, /) -> int: function llama_get_state_size (line 2308) | def llama_get_state_size(ctx: llama_context_p, /) -> int: function llama_state_get_data (line 2329) | def llama_state_get_data( function llama_copy_state_data (line 2353) | def llama_copy_state_data( function llama_state_set_data (line 2371) | def llama_state_set_data( function llama_set_state_data (line 2391) | def llama_set_state_data( function llama_state_load_file (line 2416) | def llama_state_load_file( function llama_load_session_file (line 2445) | def llama_load_session_file( function llama_state_save_file (line 2471) | def llama_state_save_file( function llama_save_session_file (line 2497) | def llama_save_session_file( function llama_state_seq_get_size (line 2516) | def llama_state_seq_get_size(ctx: llama_context_p, seq_id: llama_seq_id,... function llama_state_seq_get_data (line 2537) | def llama_state_seq_get_data( function llama_state_seq_set_data (line 2567) | def llama_state_seq_set_data( function llama_state_seq_save_file (line 2595) | def llama_state_seq_save_file( function llama_state_seq_load_file (line 2625) | def llama_state_seq_load_file( function llama_batch_get_one (line 2658) | def llama_batch_get_one( function llama_batch_init (line 2684) | def llama_batch_init( function llama_batch_free (line 2703) | def llama_batch_free(batch: llama_batch, /): function llama_encode (line 2718) | def llama_encode(ctx: llama_context_p, batch: llama_batch, /) -> int: function llama_decode (line 2741) | def llama_decode(ctx: llama_context_p, batch: llama_batch, /) -> int: function llama_set_n_threads (line 2764) | def llama_set_n_threads( function llama_n_threads (line 2780) | def llama_n_threads(ctx: llama_context_p, /) -> int: function llama_n_threads_batch (line 2788) | def llama_n_threads_batch(ctx: llama_context_p, /) -> int: function llama_set_embeddings (line 2797) | def llama_set_embeddings(ctx: llama_context_p, embeddings: bool, /): function llama_set_causal_attn (line 2806) | def llama_set_causal_attn(ctx: llama_context_p, causal_attn: bool, /): function llama_set_warmup (line 2816) | def llama_set_warmup(ctx: llama_context_p, warmup: bool, /): function llama_set_abort_callback (line 2829) | def llama_set_abort_callback( function llama_synchronize (line 2844) | def llama_synchronize(ctx: llama_context_p, /): function llama_get_logits (line 2861) | def llama_get_logits(ctx: llama_context_p, /) -> CtypesArray[ctypes.c_fl... function llama_get_logits_ith (line 2883) | def llama_get_logits_ith( function llama_get_embeddings (line 2902) | def llama_get_embeddings(ctx: llama_context_p, /) -> CtypesArray[ctypes.... function llama_get_embeddings_ith (line 2919) | def llama_get_embeddings_ith( function llama_get_embeddings_seq (line 2937) | def llama_get_embeddings_seq( function llama_vocab_get_text (line 2954) | def llama_vocab_get_text( function llama_vocab_get_score (line 2964) | def llama_vocab_get_score( function llama_vocab_get_attr (line 2974) | def llama_vocab_get_attr( function llama_vocab_is_eog (line 2985) | def llama_vocab_is_eog(vocab: llama_vocab_p, token: Union[llama_token, i... function llama_vocab_is_control (line 2995) | def llama_vocab_is_control( function llama_vocab_bos (line 3005) | def llama_vocab_bos(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_eos (line 3012) | def llama_vocab_eos(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_eot (line 3019) | def llama_vocab_eot(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_sep (line 3026) | def llama_vocab_sep(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_nl (line 3033) | def llama_vocab_nl(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_pad (line 3040) | def llama_vocab_pad(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_mask (line 3047) | def llama_vocab_mask(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_get_add_bos (line 3058) | def llama_vocab_get_add_bos(vocab: llama_vocab_p, /) -> bool: function llama_vocab_get_add_eos (line 3068) | def llama_vocab_get_add_eos(vocab: llama_vocab_p, /) -> bool: function llama_vocab_get_add_sep (line 3078) | def llama_vocab_get_add_sep(vocab: llama_vocab_p, /) -> bool: function llama_vocab_fim_pre (line 3088) | def llama_vocab_fim_pre(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_fim_suf (line 3098) | def llama_vocab_fim_suf(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_fim_mid (line 3108) | def llama_vocab_fim_mid(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_fim_pad (line 3118) | def llama_vocab_fim_pad(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_fim_rep (line 3128) | def llama_vocab_fim_rep(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_fim_sep (line 3138) | def llama_vocab_fim_sep(vocab: llama_vocab_p, /) -> llama_token: function llama_token_get_text (line 3149) | def llama_token_get_text( function llama_token_get_score (line 3161) | def llama_token_get_score( function llama_token_get_attr (line 3172) | def llama_token_get_attr( function llama_token_is_eog (line 3183) | def llama_token_is_eog( function llama_token_is_control (line 3194) | def llama_token_is_control( function llama_token_bos (line 3205) | def llama_token_bos(vocab: llama_vocab_p, /) -> int: function llama_token_eos (line 3214) | def llama_token_eos(vocab: llama_vocab_p, /) -> int: function llama_token_eot (line 3223) | def llama_token_eot(vocab: llama_vocab_p, /) -> int: function llama_token_cls (line 3232) | def llama_token_cls(vocab: llama_vocab_p, /) -> int: function llama_token_sep (line 3241) | def llama_token_sep(vocab: llama_vocab_p, /) -> int: function llama_token_nl (line 3251) | def llama_token_nl(vocab: llama_vocab_p, /) -> int: function llama_token_pad (line 3261) | def llama_token_pad(vocab: llama_vocab_p, /) -> int: function llama_add_bos_token (line 3271) | def llama_add_bos_token(vocab: llama_vocab_p, /) -> bool: function llama_add_eos_token (line 3280) | def llama_add_eos_token(vocab: llama_vocab_p, /) -> bool: function llama_token_fim_pre (line 3290) | def llama_token_fim_pre(vocab: llama_vocab_p, /) -> llama_token: function llama_token_fim_suf (line 3299) | def llama_token_fim_suf(vocab: llama_vocab_p, /) -> llama_token: function llama_token_fim_mid (line 3308) | def llama_token_fim_mid(vocab: llama_vocab_p, /) -> llama_token: function llama_token_fim_pad (line 3317) | def llama_token_fim_pad(vocab: llama_vocab_p, /) -> llama_token: function llama_token_fim_rep (line 3326) | def llama_token_fim_rep(vocab: llama_vocab_p, /) -> llama_token: function llama_token_fim_sep (line 3335) | def llama_token_fim_sep(vocab: llama_vocab_p, /) -> llama_token: function llama_vocab_cls (line 3346) | def llama_vocab_cls(vocab: llama_vocab_p, /) -> llama_token: function llama_tokenize (line 3385) | def llama_tokenize( function llama_token_to_piece (line 3437) | def llama_token_to_piece( function llama_detokenize (line 3488) | def llama_detokenize( function llama_chat_apply_template (line 3544) | def llama_chat_apply_template( function llama_chat_builtin_templates (line 3579) | def llama_chat_builtin_templates( class llama_sampler_i (line 3617) | class llama_sampler_i(ctypes.Structure): class llama_sampler (line 3625) | class llama_sampler(ctypes.Structure): function llama_sampler_init (line 3663) | def llama_sampler_init( function llama_sampler_name (line 3675) | def llama_sampler_name(smpl: llama_sampler_p, /) -> bytes: function llama_sampler_accept (line 3685) | def llama_sampler_accept(smpl: llama_sampler_p, token: Union[llama_token... function llama_sampler_apply (line 3695) | def llama_sampler_apply( function llama_sampler_reset (line 3707) | def llama_sampler_reset(smpl: llama_sampler_p, /): function llama_sampler_clone (line 3717) | def llama_sampler_clone(smpl: llama_sampler_p, /) -> llama_sampler_p: function llama_sampler_free (line 3728) | def llama_sampler_free(smpl: llama_sampler_p, /): function llama_sampler_chain_init (line 3741) | def llama_sampler_chain_init(params: llama_sampler_chain_params, /) -> l... function llama_sampler_chain_add (line 3752) | def llama_sampler_chain_add(chain: llama_sampler_p, smpl: llama_sampler_... function llama_sampler_chain_get (line 3762) | def llama_sampler_chain_get( function llama_sampler_chain_n (line 3774) | def llama_sampler_chain_n(chain: llama_sampler_p, /) -> int: function llama_sampler_chain_remove (line 3785) | def llama_sampler_chain_remove( function llama_sampler_init_greedy (line 3795) | def llama_sampler_init_greedy() -> llama_sampler_p: function llama_sampler_init_dist (line 3801) | def llama_sampler_init_dist(seed: int) -> llama_sampler_p: function llama_sampler_init_softmax (line 3810) | def llama_sampler_init_softmax() -> llama_sampler_p: function llama_sampler_init_top_k (line 3818) | def llama_sampler_init_top_k(k: int) -> llama_sampler_p: function llama_sampler_init_top_p (line 3829) | def llama_sampler_init_top_p(p: float, min_keep: int) -> llama_sampler_p: function llama_sampler_init_min_p (line 3840) | def llama_sampler_init_min_p(p: float, min_keep: int) -> llama_sampler_p: function llama_sampler_init_typical (line 3851) | def llama_sampler_init_typical(p: float, min_keep: int) -> llama_sampler_p: function llama_sampler_init_temp (line 3858) | def llama_sampler_init_temp(t: float) -> llama_sampler_p: function llama_sampler_init_temp_ext (line 3869) | def llama_sampler_init_temp_ext( function llama_sampler_init_xtc (line 3882) | def llama_sampler_init_xtc( function llama_sampler_init_top_n_sigma (line 3895) | def llama_sampler_init_top_n_sigma(n: float, /) -> llama_sampler_p: function llama_sampler_init_mirostat (line 3911) | def llama_sampler_init_mirostat( function llama_sampler_init_mirostat_v2 (line 3927) | def llama_sampler_init_mirostat_v2( function llama_sampler_init_grammar (line 3943) | def llama_sampler_init_grammar( function llama_sampler_init_grammar_lazy (line 3971) | def llama_sampler_init_grammar_lazy( function llama_sampler_init_grammar_lazy_patterns (line 4006) | def llama_sampler_init_grammar_lazy_patterns( function llama_sampler_init_penalties (line 4030) | def llama_sampler_init_penalties( function llama_sampler_init_dry (line 4064) | def llama_sampler_init_dry( function llama_sampler_init_logit_bias (line 4087) | def llama_sampler_init_logit_bias( function llama_sampler_init_infill (line 4100) | def llama_sampler_init_infill(vocab: llama_vocab_p, /) -> llama_sampler_p: function llama_sampler_get_seed (line 4111) | def llama_sampler_get_seed(smpl: llama_sampler_p, /) -> int: function llama_sampler_sample (line 4122) | def llama_sampler_sample( function llama_split_path (line 4139) | def llama_split_path( function llama_split_prefix (line 4158) | def llama_split_prefix( function llama_print_system_info (line 4173) | def llama_print_system_info() -> bytes: function llama_log_set (line 4185) | def llama_log_set( class llama_perf_context_data (line 4210) | class llama_perf_context_data(ctypes.Structure): class llama_perf_sampler_data (line 4227) | class llama_perf_sampler_data(ctypes.Structure): function llama_perf_context (line 4240) | def llama_perf_context(ctx: llama_context_p, /) -> llama_perf_context_data: function llama_perf_context_print (line 4250) | def llama_perf_context_print(ctx: llama_context_p, /): function llama_perf_context_reset (line 4260) | def llama_perf_context_reset(ctx: llama_context_p, /): function llama_perf_sampler (line 4271) | def llama_perf_sampler(chain: llama_sampler_p, /) -> llama_perf_sampler_... function llama_perf_sampler_print (line 4281) | def llama_perf_sampler_print(chain: llama_sampler_p, /): function llama_perf_sampler_reset (line 4291) | def llama_perf_sampler_reset(chain: llama_sampler_p, /): function llama_opt_param_filter_all (line 4310) | def llama_opt_param_filter_all(tensor: ctypes.c_void_p, userdata: ctypes... class llama_opt_params (line 4323) | class llama_opt_params(ctypes.Structure): function llama_opt_init (line 4339) | def llama_opt_init(lctx: llama_context_p, model: llama_model_p, lopt_par... function llama_opt_epoch (line 4364) | def llama_opt_epoch( FILE: llama_cpp/llama_grammar.py class LlamaGrammar (line 19) | class LlamaGrammar: method __init__ (line 20) | def __init__(self, *args, _grammar: str, **kwargs): method from_string (line 25) | def from_string(cls, grammar: str, verbose: bool = True) -> "LlamaGram... method from_file (line 29) | def from_file(cls, file: Union[str, Path], verbose: bool = True) -> "L... method from_json_schema (line 46) | def from_json_schema(cls, json_schema: str, verbose: bool = True) -> "... function _build_repetition (line 254) | def _build_repetition( class BuiltinRule (line 310) | class BuiltinRule: method __init__ (line 311) | def __init__(self, content: str, deps: list = None): class SchemaConverter (line 380) | class SchemaConverter: method __init__ (line 381) | def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): method _format_literal (line 392) | def _format_literal(self, literal): method not_literal (line 398) | def not_literal( method _add_rule (line 424) | def _add_rule(self, name, rule): method resolve_refs (line 439) | def resolve_refs(self, schema: dict, url: str): method _generate_union_rule (line 492) | def _generate_union_rule(self, name, alt_schemas): method _visit_pattern (line 500) | def _visit_pattern(self, pattern, name): method _resolve_ref (line 685) | def _resolve_ref(self, ref): method _generate_constant_rule (line 694) | def _generate_constant_rule(self, value): method visit (line 697) | def visit(self, schema, name): method _add_primitive (line 846) | def _add_primitive(self, name: str, rule: BuiltinRule): method _build_object_rule (line 856) | def _build_object_rule( method format_grammar (line 937) | def format_grammar(self): function json_schema_to_gbnf (line 944) | def json_schema_to_gbnf(schema: str, prop_order: Optional[List[str]] = N... FILE: llama_cpp/llama_speculative.py class LlamaDraftModel (line 9) | class LlamaDraftModel(abc.ABC): method __call__ (line 11) | def __call__( class LlamaPromptLookupDecoding (line 17) | class LlamaPromptLookupDecoding(LlamaDraftModel): method __init__ (line 20) | def __init__(self, max_ngram_size: int = 2, num_pred_tokens: int = 10): method find_candidate_pred_tokens (line 25) | def find_candidate_pred_tokens( method __call__ (line 57) | def __call__( FILE: llama_cpp/llama_tokenizer.py class BaseLlamaTokenizer (line 14) | class BaseLlamaTokenizer(abc.ABC): method tokenize (line 16) | def tokenize( method detokenize (line 29) | def detokenize( class LlamaTokenizer (line 45) | class LlamaTokenizer(BaseLlamaTokenizer): method __init__ (line 46) | def __init__(self, llama: llama_cpp.Llama): method tokenize (line 49) | def tokenize( method detokenize (line 54) | def detokenize( method encode (line 62) | def encode( method decode (line 69) | def decode(self, tokens: List[int]) -> str: method from_ggml_file (line 73) | def from_ggml_file(cls, path: str) -> "LlamaTokenizer": class LlamaHFTokenizer (line 77) | class LlamaHFTokenizer(BaseLlamaTokenizer): method __init__ (line 78) | def __init__(self, hf_tokenizer: Any): method tokenize (line 81) | def tokenize( method detokenize (line 88) | def detokenize( method from_pretrained (line 109) | def from_pretrained(cls, pretrained_model_name_or_path: str) -> "Llama... FILE: llama_cpp/llama_types.py class EmbeddingUsage (line 20) | class EmbeddingUsage(TypedDict): class Embedding (line 25) | class Embedding(TypedDict): class CreateEmbeddingResponse (line 31) | class CreateEmbeddingResponse(TypedDict): class CompletionLogprobs (line 38) | class CompletionLogprobs(TypedDict): class CompletionChoice (line 45) | class CompletionChoice(TypedDict): class CompletionUsage (line 52) | class CompletionUsage(TypedDict): class CreateCompletionResponse (line 58) | class CreateCompletionResponse(TypedDict): class ChatCompletionResponseFunctionCall (line 67) | class ChatCompletionResponseFunctionCall(TypedDict): class ChatCompletionResponseMessage (line 72) | class ChatCompletionResponseMessage(TypedDict): class ChatCompletionFunction (line 79) | class ChatCompletionFunction(TypedDict): class ChatCompletionTopLogprobToken (line 85) | class ChatCompletionTopLogprobToken(TypedDict): class ChatCompletionLogprobToken (line 91) | class ChatCompletionLogprobToken(ChatCompletionTopLogprobToken): class ChatCompletionLogprobs (line 98) | class ChatCompletionLogprobs(TypedDict): class ChatCompletionResponseChoice (line 103) | class ChatCompletionResponseChoice(TypedDict): class CreateChatCompletionResponse (line 110) | class CreateChatCompletionResponse(TypedDict): class ChatCompletionMessageToolCallChunkFunction (line 119) | class ChatCompletionMessageToolCallChunkFunction(TypedDict): class ChatCompletionMessageToolCallChunk (line 124) | class ChatCompletionMessageToolCallChunk(TypedDict): class ChatCompletionStreamResponseDeltaEmpty (line 131) | class ChatCompletionStreamResponseDeltaEmpty(TypedDict): class ChatCompletionStreamResponseDeltaFunctionCall (line 135) | class ChatCompletionStreamResponseDeltaFunctionCall(TypedDict): class ChatCompletionStreamResponseDelta (line 140) | class ChatCompletionStreamResponseDelta(TypedDict): class ChatCompletionStreamResponseChoice (line 149) | class ChatCompletionStreamResponseChoice(TypedDict): class CreateChatCompletionStreamResponse (line 158) | class CreateChatCompletionStreamResponse(TypedDict): class ChatCompletionFunctions (line 166) | class ChatCompletionFunctions(TypedDict): class ChatCompletionFunctionCallOption (line 172) | class ChatCompletionFunctionCallOption(TypedDict): class ChatCompletionRequestResponseFormat (line 176) | class ChatCompletionRequestResponseFormat(TypedDict): class ChatCompletionRequestMessageContentPartText (line 183) | class ChatCompletionRequestMessageContentPartText(TypedDict): class ChatCompletionRequestMessageContentPartImageImageUrl (line 188) | class ChatCompletionRequestMessageContentPartImageImageUrl(TypedDict): class ChatCompletionRequestMessageContentPartImage (line 193) | class ChatCompletionRequestMessageContentPartImage(TypedDict): class ChatCompletionRequestSystemMessage (line 204) | class ChatCompletionRequestSystemMessage(TypedDict): class ChatCompletionRequestUserMessage (line 209) | class ChatCompletionRequestUserMessage(TypedDict): class ChatCompletionMessageToolCallFunction (line 214) | class ChatCompletionMessageToolCallFunction(TypedDict): class ChatCompletionMessageToolCall (line 219) | class ChatCompletionMessageToolCall(TypedDict): class ChatCompletionRequestAssistantMessageFunctionCall (line 228) | class ChatCompletionRequestAssistantMessageFunctionCall(TypedDict): class ChatCompletionRequestAssistantMessage (line 233) | class ChatCompletionRequestAssistantMessage(TypedDict): class ChatCompletionRequestToolMessage (line 242) | class ChatCompletionRequestToolMessage(TypedDict): class ChatCompletionRequestFunctionMessage (line 248) | class ChatCompletionRequestFunctionMessage(TypedDict): class ChatCompletionRequestFunctionCallOption (line 264) | class ChatCompletionRequestFunctionCallOption(TypedDict): class ChatCompletionToolFunction (line 275) | class ChatCompletionToolFunction(TypedDict): class ChatCompletionTool (line 281) | class ChatCompletionTool(TypedDict): class ChatCompletionNamedToolChoiceFunction (line 286) | class ChatCompletionNamedToolChoiceFunction(TypedDict): class ChatCompletionNamedToolChoice (line 290) | class ChatCompletionNamedToolChoice(TypedDict): FILE: llama_cpp/llava_cpp.py class llava_image_embed (line 60) | class llava_image_embed(Structure): function llava_validate_embed_size (line 74) | def llava_validate_embed_size( function llava_image_embed_make_with_bytes (line 87) | def llava_image_embed_make_with_bytes( function llava_image_embed_make_with_filename (line 104) | def llava_image_embed_make_with_filename( function llava_image_embed_free (line 113) | def llava_image_embed_free(embed: "_Pointer[llava_image_embed]", /): function llava_eval_image_embed (line 129) | def llava_eval_image_embed( function clip_model_load (line 147) | def clip_model_load( function clip_free (line 156) | def clip_free(ctx: clip_ctx_p, /): FILE: llama_cpp/mtmd_cpp.py class mtmd_context_params (line 75) | class mtmd_context_params(Structure): class mtmd_input_text (line 85) | class mtmd_input_text(Structure): function mtmd_default_marker (line 98) | def mtmd_default_marker() -> bytes: function mtmd_context_params_default (line 103) | def mtmd_context_params_default() -> mtmd_context_params: function mtmd_init_from_file (line 114) | def mtmd_init_from_file( function mtmd_free (line 124) | def mtmd_free(ctx: mtmd_context_p, /): function mtmd_support_vision (line 129) | def mtmd_support_vision(ctx: mtmd_context_p, /) -> bool: function mtmd_bitmap_init (line 138) | def mtmd_bitmap_init( function mtmd_bitmap_free (line 148) | def mtmd_bitmap_free(bitmap: mtmd_bitmap_p, /): function mtmd_input_chunks_init (line 153) | def mtmd_input_chunks_init() -> Optional[mtmd_input_chunks_p]: function mtmd_input_chunks_free (line 158) | def mtmd_input_chunks_free(chunks: mtmd_input_chunks_p, /): function mtmd_input_chunks_size (line 163) | def mtmd_input_chunks_size(chunks: mtmd_input_chunks_p, /) -> int: function mtmd_input_chunks_get (line 172) | def mtmd_input_chunks_get( function mtmd_tokenize (line 193) | def mtmd_tokenize( function mtmd_input_chunk_get_n_tokens (line 205) | def mtmd_input_chunk_get_n_tokens(chunk: mtmd_input_chunk_p, /) -> int: function mtmd_input_chunk_get_type (line 210) | def mtmd_input_chunk_get_type(chunk: mtmd_input_chunk_p, /) -> int: function mtmd_input_chunk_get_tokens_text (line 219) | def mtmd_input_chunk_get_tokens_text( function mtmd_helper_bitmap_init_from_buf (line 234) | def mtmd_helper_bitmap_init_from_buf( function mtmd_helper_get_n_tokens (line 244) | def mtmd_helper_get_n_tokens(chunks: mtmd_input_chunks_p, /) -> int: function mtmd_helper_eval_chunk_single (line 269) | def mtmd_helper_eval_chunk_single( FILE: llama_cpp/server/__main__.py function main (line 43) | def main(): FILE: llama_cpp/server/app.py function set_server_settings (line 53) | def set_server_settings(server_settings: ServerSettings): function get_server_settings (line 58) | def get_server_settings(): function set_llama_proxy (line 68) | def set_llama_proxy(model_settings: List[ModelSettings]): function get_llama_proxy (line 73) | async def get_llama_proxy(): function set_ping_message_factory (line 95) | def set_ping_message_factory(factory: typing.Callable[[], bytes]): function create_app (line 100) | def create_app( function prepare_request_resources (line 158) | def prepare_request_resources( function get_event_publisher (line 191) | async def get_event_publisher( function _logit_bias_tokens_to_input_ids (line 225) | def _logit_bias_tokens_to_input_ids( function authenticate (line 241) | async def authenticate( function create_completion (line 303) | async def create_completion( function create_embedding (line 366) | async def create_embedding( function create_chat_completion (line 408) | async def create_chat_completion( function get_models (line 535) | async def get_models( function tokenize (line 561) | async def tokenize( function count_query_tokens (line 576) | async def count_query_tokens( function detokenize (line 591) | async def detokenize( FILE: llama_cpp/server/cli.py function _get_base_type (line 10) | def _get_base_type(annotation: Type[Any]) -> Type[Any]: function _contains_list_type (line 30) | def _contains_list_type(annotation: Type[Any] | None) -> bool: function _parse_bool_arg (line 41) | def _parse_bool_arg(arg: str | bytes | bool) -> bool: function add_args_from_model (line 58) | def add_args_from_model(parser: argparse.ArgumentParser, model: Type[Bas... function parse_model_from_args (line 89) | def parse_model_from_args(model: T, args: argparse.Namespace) -> T: FILE: llama_cpp/server/errors.py class ErrorResponse (line 26) | class ErrorResponse(TypedDict): class ErrorResponseFormatters (line 35) | class ErrorResponseFormatters: method context_length_exceeded (line 48) | def context_length_exceeded( method model_not_found (line 86) | def model_not_found( class RouteErrorHandler (line 102) | class RouteErrorHandler(APIRoute): method error_message_wrapper (line 125) | def error_message_wrapper( method get_route_handler (line 162) | def get_route_handler( FILE: llama_cpp/server/model.py class LlamaProxy (line 14) | class LlamaProxy: method __init__ (line 15) | def __init__(self, models: List[ModelSettings]) -> None: method __call__ (line 36) | def __call__(self, model: Optional[str] = None) -> llama_cpp.Llama: method __getitem__ (line 56) | def __getitem__(self, model: str): method __setitem__ (line 59) | def __setitem__(self, model: str, settings: Union[ModelSettings, str, ... method __iter__ (line 64) | def __iter__(self): method free (line 68) | def free(self): method load_llama_from_model_settings (line 74) | def load_llama_from_model_settings(settings: ModelSettings) -> llama_c... FILE: llama_cpp/server/settings.py class ModelSettings (line 17) | class ModelSettings(BaseSettings): method set_dynamic_defaults (line 191) | def set_dynamic_defaults(self) -> Self: class ServerSettings (line 202) | class ServerSettings(BaseSettings): class Settings (line 233) | class Settings(ServerSettings, ModelSettings): class ConfigFileSettings (line 237) | class ConfigFileSettings(ServerSettings): FILE: llama_cpp/server/types.py class CreateCompletionRequest (line 109) | class CreateCompletionRequest(BaseModel): class CreateEmbeddingRequest (line 167) | class CreateEmbeddingRequest(BaseModel): class ChatCompletionRequestMessage (line 183) | class ChatCompletionRequestMessage(BaseModel): class CreateChatCompletionRequest (line 192) | class CreateChatCompletionRequest(BaseModel): class ModelData (line 271) | class ModelData(TypedDict): class ModelList (line 278) | class ModelList(TypedDict): class TokenizeInputRequest (line 283) | class TokenizeInputRequest(BaseModel): class TokenizeInputResponse (line 292) | class TokenizeInputResponse(BaseModel): class TokenizeInputCountResponse (line 298) | class TokenizeInputCountResponse(BaseModel): class DetokenizeInputRequest (line 304) | class DetokenizeInputRequest(BaseModel): class DetokenizeInputResponse (line 311) | class DetokenizeInputResponse(BaseModel): FILE: tests/test_llama.py function test_llama_cpp_version (line 18) | def test_llama_cpp_version(): function test_llama_cpp_tokenization (line 22) | def test_llama_cpp_tokenization(): function llama_cpp_model_path (line 60) | def llama_cpp_model_path(): function test_real_model (line 67) | def test_real_model(llama_cpp_model_path): function test_real_llama (line 117) | def test_real_llama(llama_cpp_model_path): function test_real_llama_embeddings (line 221) | def test_real_llama_embeddings(llama_cpp_model_path): FILE: tests/test_llama_chat_format.py function test_mistral_instruct (line 13) | def test_mistral_instruct(): function test_hf_tokenizer_config_str_to_chat_formatter (line 78) | def test_hf_tokenizer_config_str_to_chat_formatter(): FILE: tests/test_llama_grammar.py function test_grammar_from_string (line 11) | def test_grammar_from_string(): function test_composed_pydantic_grammar (line 18) | def test_composed_pydantic_grammar(): function test_grammar_anyof (line 55) | def test_grammar_anyof(): FILE: tests/test_llama_speculative.py function test_find_candidate_pred_tokens (line 5) | def test_find_candidate_pred_tokens():