SYMBOL INDEX (289 symbols across 48 files)

FILE: examples/alpaca/process_data.py
  class Preprocessor (line 18) | class Preprocessor:
    method __init__ (line 21) | def __init__(self, tokenizer):
    method batch_tokenize (line 25) | def batch_tokenize(self, texts):
    method make_prompt (line 37) | def make_prompt(self, input_row):
    method make_short_prompt (line 42) | def make_short_prompt(self, input_row):
    method construct_dataset (line 47) | def construct_dataset(self, input_data):

FILE: llama_recipes/configs/datasets.py
  class samsum_dataset (line 8) | class samsum_dataset:
  class grammar_dataset (line 16) | class grammar_dataset:
  class alpaca_dataset (line 24) | class alpaca_dataset:
  class completion (line 32) | class completion:

FILE: llama_recipes/configs/fsdp.py
  class fsdp_config (line 10) | class fsdp_config:

FILE: llama_recipes/configs/peft.py
  class lora_config (line 10) | class lora_config:
  class llama_adapter_config (line 21) | class llama_adapter_config:
  class prefix_config (line 28) | class prefix_config:
  class bitsandbytes_config (line 34) | class bitsandbytes_config:
  class qlora_config (line 42) | class qlora_config:

FILE: llama_recipes/configs/training.py
  class train_config (line 7) | class train_config:

FILE: llama_recipes/ft_datasets/alpaca_dataset.py
  class InstructionDataset (line 26) | class InstructionDataset(Dataset):
    method __init__ (line 27) | def __init__(self, dataset_config, tokenizer, partition="train", max_w...
    method __len__ (line 39) | def __len__(self):
    method __getitem__ (line 42) | def __getitem__(self, index):

FILE: llama_recipes/ft_datasets/completion_dataset.py
  function load_data (line 6) | def load_data(
  function format_data (line 54) | def format_data(dataset, tokenizer, config=None):
  function tokenize_data (line 81) | def tokenize_data(dataset, tokenizer, config=None):
  function get_completion_dataset (line 107) | def get_completion_dataset(config: str, tokenizer, split: str = "train"):

FILE: llama_recipes/ft_datasets/grammar_dataset/grammar_dataset.py
  class grammar (line 17) | class grammar(Dataset):
    method __init__ (line 18) | def __init__(
    method __len__ (line 41) | def __len__(self):
    method convert_to_features (line 44) | def convert_to_features(self, example_batch):
    method __getitem__ (line 60) | def __getitem__(self, index):
  function get_dataset (line 73) | def get_dataset(dataset_config, tokenizer, csv_name=None):

FILE: llama_recipes/ft_datasets/samsum_dataset.py
  function get_preprocessed_samsum (line 10) | def get_preprocessed_samsum(dataset_config, tokenizer, split):

FILE: llama_recipes/ft_datasets/utils.py
  class Concatenator (line 9) | class Concatenator(object):
    method __init__ (line 10) | def __init__(self, chunk_size=2048, wrap_packed_sequences=False):
    method _wrap_concat (line 15) | def _wrap_concat(self, batch):
    method _concat (line 50) | def _concat(self, batch):
    method __call__ (line 98) | def __call__(self, batch):
  class ConcatDataset (line 105) | class ConcatDataset(Dataset):
    method __init__ (line 106) | def __init__(self, dataset, chunk_size=4096):
    method __getitem__ (line 127) | def __getitem__(self, idx):
    method __len__ (line 130) | def __len__(self):

FILE: llama_recipes/llama_finetuning.py
  function main (line 54) | def main(**kwargs):

FILE: llama_recipes/model_checkpointing/checkpoint_handler.py
  function get_date_of_run (line 28) | def get_date_of_run():
  function load_model_sharded (line 41) | def load_model_sharded(model, rank, cfg):
  function save_model_and_optimizer_sharded (line 80) | def save_model_and_optimizer_sharded(model, rank, cfg, optim=None):
  function save_model_checkpoint (line 117) | def save_model_checkpoint(
  function load_model_checkpoint (line 154) | def load_model_checkpoint(model, rank, cfg):
  function save_optimizer_checkpoint (line 179) | def save_optimizer_checkpoint(model, optimizer, rank, cfg, epoch=1):
  function load_optimizer_checkpoint (line 211) | def load_optimizer_checkpoint(model, optimizer_checkpoint_path, rank):
  function load_sharded_model_single_gpu (line 233) | def load_sharded_model_single_gpu(model, model_path):

FILE: llama_recipes/policies/activation_checkpointing_functions.py
  function apply_fsdp_checkpointing (line 21) | def apply_fsdp_checkpointing(model):

FILE: llama_recipes/policies/anyprecision_optimizer.py
  class AnyPrecisionAdamW (line 16) | class AnyPrecisionAdamW(Optimizer):
    method __init__ (line 17) | def __init__(
    method step (line 73) | def step(self, closure=None):

FILE: llama_recipes/policies/wrapping.py
  function get_size_policy (line 15) | def get_size_policy(min_params=1e8):
  function get_llama_wrapper (line 22) | def get_llama_wrapper():

FILE: llama_recipes/utils/config_utils.py
  function update_config (line 26) | def update_config(config, **kwargs):
  function generate_peft_config (line 47) | def generate_peft_config(peft_method, kwargs):
  function generate_dataset_config (line 98) | def generate_dataset_config(train_config, kwargs):

FILE: llama_recipes/utils/dataset_utils.py
  function get_preprocessed_dataset (line 25) | def get_preprocessed_dataset(

FILE: llama_recipes/utils/fsdp_utils.py
  function fsdp_auto_wrap_policy (line 5) | def fsdp_auto_wrap_policy(model, transformer_layer_name):

FILE: llama_recipes/utils/memory_utils.py
  function byte2gb (line 10) | def byte2gb(x):
  class MemoryTrace (line 15) | class MemoryTrace:
    method __enter__ (line 16) | def __enter__(self):
    method cpu_mem_used (line 29) | def cpu_mem_used(self):
    method peak_monitor_func (line 33) | def peak_monitor_func(self):
    method __exit__ (line 45) | def __exit__(self, *exc):

FILE: llama_recipes/utils/train_utils.py
  function set_tokenizer_params (line 30) | def set_tokenizer_params(tokenizer: LlamaTokenizer):
  function byte2mb (line 36) | def byte2mb(x):
  function train (line 40) | def train(
  function evaluation (line 276) | def evaluation(
  function freeze_transformer_layers (line 364) | def freeze_transformer_layers(model, num_layer):
  function check_frozen_layers_peft_model (line 371) | def check_frozen_layers_peft_model(model):
  function setup (line 377) | def setup():
  function setup_environ_flags (line 382) | def setup_environ_flags(rank):
  function cleanup (line 394) | def cleanup():
  function clear_gpu_cache (line 399) | def clear_gpu_cache(rank=None):
  function get_parameter_dtypes (line 406) | def get_parameter_dtypes(model):
  function print_model_size (line 414) | def print_model_size(model, config, rank: int = 0) -> None:
  function get_policies (line 431) | def get_policies(cfg, rank):
  function save_train_params (line 463) | def save_train_params(train_config, fsdp_config, rank):

FILE: predict.py
  class Predictor (line 36) | class Predictor(BasePredictor):
    method setup (line 37) | def setup(self, weights: Optional[Path] = None):
    method get_lora (line 56) | def get_lora(self, replicate_weights: str) -> Any:
    method initialize_peft (line 77) | def initialize_peft(self, replicate_weights: str) -> None:
    method delete_lora (line 86) | def delete_lora(self):
    method predict (line 94) | def predict(
    method remove (line 234) | def remove(f: Callable, defaults: dict[str, Any]) -> Callable:

FILE: scripts/benchmark_token_latency.py
  class AbstractInferenceModel (line 12) | class AbstractInferenceModel(ABC):
    method __init__ (line 14) | def __init__(self, model_name_or_path, tokenizer_name_or_path):
    method _load_model (line 21) | def _load_model(self):
    method _load_tokenizer (line 25) | def _load_tokenizer(self):
    method generate_tokens (line 29) | def generate_tokens(self, input_ids, prompt_length, output_length):
  class LlamaBnB4Bit (line 33) | class LlamaBnB4Bit(AbstractInferenceModel):
    method __init__ (line 34) | def __init__(self, model_name_or_path, tokenizer_name_or_path, some_ot...
    method _load_model (line 37) | def _load_model(self):
    method _load_tokenizer (line 49) | def _load_tokenizer(self):
    method generate_tokens (line 68) | def generate_tokens(self, input_ids, prompt_length, output_length):
  function measure_latency (line 75) | def measure_latency(inference_model, prompt_length, output_length):
  function benchmark_model (line 112) | def benchmark_model(model_name, inference_model, prompt_lengths, output_...

FILE: scripts/test_fast_llama.py
  class Engine (line 16) | class Engine(Enum):
  class LoraAdapter (line 22) | class LoraAdapter:
  class SpeedyReplicateGonzalez (line 27) | class SpeedyReplicateGonzalez:
    method __init__ (line 28) | def __init__(self):
    method replicate_model_name (line 63) | def replicate_model_name(self):
    method replicate_model_name (line 67) | def replicate_model_name(self, model_name):
    method get_lora (line 71) | def get_lora(self, lora_path):
    method generate_replicate (line 84) | def generate_replicate(self, prompt, lora):
    method generate_vllm (line 99) | def generate_vllm(self, prompt, lora):
    method set_engine (line 111) | def set_engine(self, engine):
    method timing_decorator (line 123) | def timing_decorator(self, prompt, lora):
    method enable_timing (line 134) | def enable_timing(self, verbose: bool = False):
    method disable_timing (line 138) | def disable_timing(self):
    method run_long_generation (line 141) | def run_long_generation(self):
    method run_base (line 147) | def run_base(self):
    method run_sql (line 160) | def run_sql(self):
    method run_summary (line 187) | def run_summary(self):

FILE: scripts/test_load_unload_lora.py
  class vLLMLoraTest (line 11) | class vLLMLoraTest:
    method __init__ (line 12) | def __init__(self):
    method get_lora (line 35) | def get_lora(self, lora_path):
    method generate_replicate (line 47) | def generate_replicate(self, prompt, lora_path):
    method generate (line 57) | def generate(self, prompt, lora):
    method run_base (line 68) | def run_base(self):
    method run_sql (line 81) | def run_sql(self):
    method run_summary (line 108) | def run_summary(self):

FILE: src/config_utils.py
  class Weights (line 9) | class Weights(BaseModel):
  function get_fp16_file_list (line 15) | def get_fp16_file_list(n_shards: int):
  function get_gptq_file_list (line 35) | def get_gptq_file_list(base_model_name: str):
  function get_mlc_file_list (line 52) | def get_mlc_file_list(model_name: str, n_shards: int):
  function exllama_kwargs (line 70) | def exllama_kwargs(weights: Weights, config_overrides: Optional[dict] = ...
  function vllm_kwargs (line 77) | def vllm_kwargs(weights: Weights, config_overrides: Optional[dict] = None):
  function mlc_kwargs (line 87) | def mlc_kwargs(

FILE: src/download.py
  class SeekableMmap (line 28) | class SeekableMmap(mmap.mmap):
    method seekable (line 29) | def seekable(self) -> bool:
  class Downloader (line 33) | class Downloader:
    method __init__ (line 34) | def __init__(self, concurrency: int | None = None) -> None:
    method session (line 50) | def session(self) -> aiohttp.ClientSession:
    method threadpool (line 61) | def threadpool(self) -> ThreadPoolExecutor:
    method get_remote_file_size (line 66) | async def get_remote_file_size(self, url: str | URL) -> "tuple[URL, in...
    method download_chunk (line 101) | async def download_chunk(
    method download_file (line 120) | async def download_file(self, url: str | URL) -> mmap.mmap:
    method download_file_to_disk (line 154) | async def download_file_to_disk(self, url: str, path: str) -> None:
    method maybe_download_files_to_disk (line 163) | async def maybe_download_files_to_disk(
    method sync (line 186) | def sync(f: t.Callable) -> t.Callable:

FILE: src/inference_engines/engine.py
  class Engine (line 9) | class Engine(ABC):
    method load_weights (line 14) | def load_weights(self, weights: Weights):
    method load_lora (line 23) | def load_lora(self, lora_data: dict):
    method set_lora (line 31) | def set_lora(self, lora: Any):
    method is_lora_active (line 38) | def is_lora_active(self) -> bool:
    method delete_lora (line 45) | def delete_lora(self):
    method __call__ (line 52) | def __call__(self, prompt, **kwargs):

FILE: src/inference_engines/exllama.py
  function next_logits (line 27) | def next_logits(
  function begin (line 36) | def begin(generator):
  function timer (line 44) | def timer(name, func):
  class ExllamaEngine (line 52) | class ExllamaEngine(Engine):
    method __init__ (line 53) | def __init__(self, weights: Weights, fused_attn=True):
    method delete_lora (line 90) | def delete_lora(self):
    method is_lora_active (line 94) | def is_lora_active(self) -> bool:
    method load_lora (line 97) | def load_lora(self, data_ref: dict) -> ExLlamaLora:
    method set_lora (line 104) | def set_lora(self, lora: ExLlamaLora | None) -> None:
    method __call__ (line 107) | def __call__(

FILE: src/inference_engines/mlc_engine.py
  class MLCEngine (line 11) | class MLCEngine(Engine):
    method __init__ (line 16) | def __init__(
    method load_weights (line 50) | def load_weights(self, weights: Weights) -> str:
    method get_logits (line 69) | def get_logits(self):
    method load_lora (line 75) | def load_lora(self):
    method is_lora_active (line 82) | def is_lora_active(self):
    method set_lora (line 88) | def set_lora(self):
    method delete_lora (line 94) | def delete_lora(self):
    method __call__ (line 97) | def __call__(

FILE: src/inference_engines/mlc_vllm_engine.py
  class MLCvLLMEngine (line 8) | class MLCvLLMEngine(Engine):
    method __init__ (line 13) | def __init__(self, mlc_args: dict, vllm_args: dict) -> None:
    method load_lora (line 24) | def load_lora(self, lora_data: dict) -> Any:
    method is_lora_active (line 43) | def is_lora_active(self) -> bool:
    method set_lora (line 51) | def set_lora(self, lora: Any) -> None:
    method delete_lora (line 61) | def delete_lora(self) -> None:
    method __call__ (line 64) | def __call__(

FILE: src/inference_engines/transformers_engine.py
  class ExtraStopSequence (line 23) | class ExtraStopSequence(StoppingCriteria):
    method __init__ (line 29) | def __init__(self, stop_sequence: torch.Tensor, device: str):
    method __call__ (line 32) | def __call__(
  class TransformersEngine (line 40) | class TransformersEngine(Engine):
    method __init__ (line 46) | def __init__(self, weights: Weights, tokenizer_func=None, device="cuda"):
    method load_lora (line 55) | def load_lora(self, lora_weights: dict) -> Tuple[LoraConfig, Any]:
    method is_lora_active (line 78) | def is_lora_active(self) -> bool:
    method delete_lora (line 81) | def delete_lora(self):
    method set_lora (line 90) | def set_lora(self, lora):
    method get_logits (line 114) | def get_logits(self, prompt):
    method __call__ (line 129) | def __call__(

FILE: src/inference_engines/vllm_engine.py
  class LoRA (line 20) | class LoRA:
    method __init__ (line 21) | def __init__(
    method load_from_path (line 28) | def load_from_path(
    method load_from_bytes (line 40) | def load_from_bytes(
  class vLLMEngine (line 48) | class vLLMEngine(Engine):
    method __init__ (line 53) | def __init__(self, weights: Weights, dtype: str) -> None:
    method load_lora (line 63) | def load_lora(
    method is_lora_active (line 122) | def is_lora_active(self) -> bool:
    method set_lora (line 128) | def set_lora(self, lora: LoRA) -> None:
    method delete_lora (line 138) | def delete_lora(self) -> None:
    method generate_stream (line 141) | async def generate_stream(
    method __call__ (line 148) | def __call__(
  function run_generation (line 241) | def run_generation():

FILE: src/inference_engines/vllm_exllama_engine.py
  class ExllamaVllmEngine (line 12) | class ExllamaVllmEngine(Engine):
    method __init__ (line 17) | def __init__(self, vllm_args: dict, exllama_args: dict) -> None:
    method load_lora (line 27) | def load_lora(self, lora_data: dict) -> Any:
    method is_lora_active (line 45) | def is_lora_active(self) -> bool:
    method set_lora (line 53) | def set_lora(self, lora: Any) -> None:
    method delete_lora (line 63) | def delete_lora(self) -> None:
    method __call__ (line 66) | def __call__(

FILE: src/inference_engines/vllm_transformers.py
  class vLLMTransformersEngine (line 11) | class vLLMTransformersEngine(Engine):
    method __init__ (line 16) | def __init__(
    method load_lora (line 23) | def load_lora(self, lora_data: dict) -> Any:
    method is_lora_active (line 43) | def is_lora_active(self) -> bool:
    method set_lora (line 51) | def set_lora(self, lora: Any) -> None:
    method delete_lora (line 61) | def delete_lora(self) -> None:
    method __call__ (line 64) | def __call__(

FILE: src/more_utils.py
  function log_memory_stuff (line 10) | def log_memory_stuff(prompt=None):
  function load_tokenizer (line 20) | def load_tokenizer(tokenizer_path):

FILE: src/utils.py
  function seed_all (line 11) | def seed_all(seed: int):
  function get_env_var_or_default (line 23) | def get_env_var_or_default(var_name, default_value):
  class Logger (line 42) | class Logger:
    method __init__ (line 43) | def __init__(self, marker: str = "predict-timings"):
    method log (line 48) | def log(self, *args):
  function get_loop (line 60) | def get_loop() -> asyncio.AbstractEventLoop:
  function download_file (line 67) | def download_file(file, local_filename):
  function check_files_exist (line 79) | def check_files_exist(remote_files: list[str], local_path: str) -> list[...
  function download_file_with_pget (line 89) | async def download_file_with_pget(remote_path, dest_path, pget_concurren...
  function download_files_with_pget (line 116) | async def download_files_with_pget(
  function maybe_download_with_pget (line 126) | def maybe_download_with_pget(
  class StreamingTextStopSequenceHandler (line 166) | class StreamingTextStopSequenceHandler:
    method __init__ (line 167) | def __init__(self, stop_sequences: tp.List[str] = None, eos_token: str...
    method get_match_length (line 176) | def get_match_length(self, text: str, stop_sequence: str):
    method process (line 191) | def process(self, token):
    method __call__ (line 245) | def __call__(self, token):
    method finalize (line 252) | def finalize(self):
  function delay_prints (line 259) | def delay_prints(REALLY_EAT_MY_PRINT_STATEMENTS: bool = False) -> tp.Ite...

FILE: tests/conftest.py
  function pytest_addoption (line 1) | def pytest_addoption(parser):

FILE: tests/test_e2e.py
  function wait_for_server_to_be_ready (line 14) | def wait_for_server_to_be_ready(url, timeout=300):
  function server (line 46) | def server():
  function test_health_check (line 73) | def test_health_check():
  function test_prediction (line 80) | def test_prediction():

FILE: tests/test_predict.py
  function server (line 21) | def server():
  function test_health_check (line 52) | def test_health_check(server):
  function test_simple_prediction (line 59) | def test_simple_prediction(server):
  function test_input_too_long (line 77) | def test_input_too_long(server):

FILE: tests/test_remote_predict.py
  function model_name (line 6) | def model_name(request):
  function model (line 11) | def model(model_name):
  function version (line 16) | def version(model):
  function prediction_tests (line 22) | def prediction_tests():
  function test_initial_predictions (line 28) | def test_initial_predictions(version, prediction_tests):

FILE: tests/test_remote_train.py
  function model_name (line 7) | def model_name(request):
  function model (line 12) | def model(model_name):
  function version (line 17) | def version(model):
  function training (line 23) | def training(model_name, version):
  function prediction_tests (line 36) | def prediction_tests():
  function test_training (line 62) | def test_training(training):
  function trained_model_and_version (line 70) | def trained_model_and_version(training):
  function test_post_training_predictions (line 75) | def test_post_training_predictions(trained_model_and_version, prediction...

FILE: tests/test_train.py
  function test_train (line 42) | def test_train():

FILE: tests/test_train_predict.py
  function server (line 22) | def server():
  function test_health_check (line 58) | def test_health_check(server):
  function test_prediction (line 65) | def test_prediction(server):
  function test_input_too_long (line 83) | def test_input_too_long(server):

FILE: tests/test_utils.py
  function get_image_name (line 12) | def get_image_name():
  function process_log_line (line 21) | def process_log_line(line):
  function capture_output (line 37) | def capture_output(pipe, print_lock, logs=None, error_detected=None):
  function wait_for_server_to_be_ready (line 49) | def wait_for_server_to_be_ready(url, timeout=300):
  function run_training_subprocess (line 79) | def run_training_subprocess(command):

FILE: tests/timing.py
  function run (line 17) | def run(v):

FILE: tests/unit_tests/test_completion_dataset.py
  function dataset_config (line 17) | def dataset_config():
  function tokenizer (line 35) | def tokenizer():
  function test__load_data_train (line 51) | def test__load_data_train(dataset_config):
  function test__load_data_train_with_val_split (line 59) | def test__load_data_train_with_val_split(dataset_config):
  function dataset (line 73) | def dataset(dataset_config):
  function test_format_data (line 79) | def test_format_data(dataset, tokenizer):
  function formatted_dataset (line 87) | def formatted_dataset(dataset, tokenizer):
  function test_tokenize_data_with_wrapped_packing (line 91) | def test_tokenize_data_with_wrapped_packing(
  function test_tokenize_data_without_wrapped_packing_small_chunk (line 121) | def test_tokenize_data_without_wrapped_packing_small_chunk(
  function test_tokenize_data_without_wrapped_packing_large_chunk (line 155) | def test_tokenize_data_without_wrapped_packing_large_chunk(
  function test_tokenize_data_without_packing (line 189) | def test_tokenize_data_without_packing(formatted_dataset, tokenizer, dat...

FILE: tests/unit_tests/test_utils.py
  function tokenizer (line 11) | def tokenizer():
  function get_decoded_prompt_tokens (line 27) | def get_decoded_prompt_tokens(tokenizer, prompt):
  function test_no_stop_sequences (line 33) | def test_no_stop_sequences(tokenizer):
  function test_single_stop_sequence_1 (line 70) | def test_single_stop_sequence_1(tokenizer):
  function test_single_stop_sequence_2 (line 107) | def test_single_stop_sequence_2(tokenizer):
  function test_multiple_stop_sequence (line 144) | def test_multiple_stop_sequence(tokenizer):
  function test_adjacent_stop_sequences (line 181) | def test_adjacent_stop_sequences(tokenizer):
  function test_substring_stop_sequence (line 218) | def test_substring_stop_sequence(tokenizer):

FILE: train.py
  class TrainingOutput (line 29) | class TrainingOutput(BaseModel):
  function train (line 33) | def train(