SYMBOL INDEX (289 symbols across 48 files) FILE: examples/alpaca/process_data.py class Preprocessor (line 18) | class Preprocessor: method __init__ (line 21) | def __init__(self, tokenizer): method batch_tokenize (line 25) | def batch_tokenize(self, texts): method make_prompt (line 37) | def make_prompt(self, input_row): method make_short_prompt (line 42) | def make_short_prompt(self, input_row): method construct_dataset (line 47) | def construct_dataset(self, input_data): FILE: llama_recipes/configs/datasets.py class samsum_dataset (line 8) | class samsum_dataset: class grammar_dataset (line 16) | class grammar_dataset: class alpaca_dataset (line 24) | class alpaca_dataset: class completion (line 32) | class completion: FILE: llama_recipes/configs/fsdp.py class fsdp_config (line 10) | class fsdp_config: FILE: llama_recipes/configs/peft.py class lora_config (line 10) | class lora_config: class llama_adapter_config (line 21) | class llama_adapter_config: class prefix_config (line 28) | class prefix_config: class bitsandbytes_config (line 34) | class bitsandbytes_config: class qlora_config (line 42) | class qlora_config: FILE: llama_recipes/configs/training.py class train_config (line 7) | class train_config: FILE: llama_recipes/ft_datasets/alpaca_dataset.py class InstructionDataset (line 26) | class InstructionDataset(Dataset): method __init__ (line 27) | def __init__(self, dataset_config, tokenizer, partition="train", max_w... method __len__ (line 39) | def __len__(self): method __getitem__ (line 42) | def __getitem__(self, index): FILE: llama_recipes/ft_datasets/completion_dataset.py function load_data (line 6) | def load_data( function format_data (line 54) | def format_data(dataset, tokenizer, config=None): function tokenize_data (line 81) | def tokenize_data(dataset, tokenizer, config=None): function get_completion_dataset (line 107) | def get_completion_dataset(config: str, tokenizer, split: str = "train"): FILE: llama_recipes/ft_datasets/grammar_dataset/grammar_dataset.py class grammar (line 17) | class grammar(Dataset): method __init__ (line 18) | def __init__( method __len__ (line 41) | def __len__(self): method convert_to_features (line 44) | def convert_to_features(self, example_batch): method __getitem__ (line 60) | def __getitem__(self, index): function get_dataset (line 73) | def get_dataset(dataset_config, tokenizer, csv_name=None): FILE: llama_recipes/ft_datasets/samsum_dataset.py function get_preprocessed_samsum (line 10) | def get_preprocessed_samsum(dataset_config, tokenizer, split): FILE: llama_recipes/ft_datasets/utils.py class Concatenator (line 9) | class Concatenator(object): method __init__ (line 10) | def __init__(self, chunk_size=2048, wrap_packed_sequences=False): method _wrap_concat (line 15) | def _wrap_concat(self, batch): method _concat (line 50) | def _concat(self, batch): method __call__ (line 98) | def __call__(self, batch): class ConcatDataset (line 105) | class ConcatDataset(Dataset): method __init__ (line 106) | def __init__(self, dataset, chunk_size=4096): method __getitem__ (line 127) | def __getitem__(self, idx): method __len__ (line 130) | def __len__(self): FILE: llama_recipes/llama_finetuning.py function main (line 54) | def main(**kwargs): FILE: llama_recipes/model_checkpointing/checkpoint_handler.py function get_date_of_run (line 28) | def get_date_of_run(): function load_model_sharded (line 41) | def load_model_sharded(model, rank, cfg): function save_model_and_optimizer_sharded (line 80) | def save_model_and_optimizer_sharded(model, rank, cfg, optim=None): function save_model_checkpoint (line 117) | def save_model_checkpoint( function load_model_checkpoint (line 154) | def load_model_checkpoint(model, rank, cfg): function save_optimizer_checkpoint (line 179) | def save_optimizer_checkpoint(model, optimizer, rank, cfg, epoch=1): function load_optimizer_checkpoint (line 211) | def load_optimizer_checkpoint(model, optimizer_checkpoint_path, rank): function load_sharded_model_single_gpu (line 233) | def load_sharded_model_single_gpu(model, model_path): FILE: llama_recipes/policies/activation_checkpointing_functions.py function apply_fsdp_checkpointing (line 21) | def apply_fsdp_checkpointing(model): FILE: llama_recipes/policies/anyprecision_optimizer.py class AnyPrecisionAdamW (line 16) | class AnyPrecisionAdamW(Optimizer): method __init__ (line 17) | def __init__( method step (line 73) | def step(self, closure=None): FILE: llama_recipes/policies/wrapping.py function get_size_policy (line 15) | def get_size_policy(min_params=1e8): function get_llama_wrapper (line 22) | def get_llama_wrapper(): FILE: llama_recipes/utils/config_utils.py function update_config (line 26) | def update_config(config, **kwargs): function generate_peft_config (line 47) | def generate_peft_config(peft_method, kwargs): function generate_dataset_config (line 98) | def generate_dataset_config(train_config, kwargs): FILE: llama_recipes/utils/dataset_utils.py function get_preprocessed_dataset (line 25) | def get_preprocessed_dataset( FILE: llama_recipes/utils/fsdp_utils.py function fsdp_auto_wrap_policy (line 5) | def fsdp_auto_wrap_policy(model, transformer_layer_name): FILE: llama_recipes/utils/memory_utils.py function byte2gb (line 10) | def byte2gb(x): class MemoryTrace (line 15) | class MemoryTrace: method __enter__ (line 16) | def __enter__(self): method cpu_mem_used (line 29) | def cpu_mem_used(self): method peak_monitor_func (line 33) | def peak_monitor_func(self): method __exit__ (line 45) | def __exit__(self, *exc): FILE: llama_recipes/utils/train_utils.py function set_tokenizer_params (line 30) | def set_tokenizer_params(tokenizer: LlamaTokenizer): function byte2mb (line 36) | def byte2mb(x): function train (line 40) | def train( function evaluation (line 276) | def evaluation( function freeze_transformer_layers (line 364) | def freeze_transformer_layers(model, num_layer): function check_frozen_layers_peft_model (line 371) | def check_frozen_layers_peft_model(model): function setup (line 377) | def setup(): function setup_environ_flags (line 382) | def setup_environ_flags(rank): function cleanup (line 394) | def cleanup(): function clear_gpu_cache (line 399) | def clear_gpu_cache(rank=None): function get_parameter_dtypes (line 406) | def get_parameter_dtypes(model): function print_model_size (line 414) | def print_model_size(model, config, rank: int = 0) -> None: function get_policies (line 431) | def get_policies(cfg, rank): function save_train_params (line 463) | def save_train_params(train_config, fsdp_config, rank): FILE: predict.py class Predictor (line 36) | class Predictor(BasePredictor): method setup (line 37) | def setup(self, weights: Optional[Path] = None): method get_lora (line 56) | def get_lora(self, replicate_weights: str) -> Any: method initialize_peft (line 77) | def initialize_peft(self, replicate_weights: str) -> None: method delete_lora (line 86) | def delete_lora(self): method predict (line 94) | def predict( method remove (line 234) | def remove(f: Callable, defaults: dict[str, Any]) -> Callable: FILE: scripts/benchmark_token_latency.py class AbstractInferenceModel (line 12) | class AbstractInferenceModel(ABC): method __init__ (line 14) | def __init__(self, model_name_or_path, tokenizer_name_or_path): method _load_model (line 21) | def _load_model(self): method _load_tokenizer (line 25) | def _load_tokenizer(self): method generate_tokens (line 29) | def generate_tokens(self, input_ids, prompt_length, output_length): class LlamaBnB4Bit (line 33) | class LlamaBnB4Bit(AbstractInferenceModel): method __init__ (line 34) | def __init__(self, model_name_or_path, tokenizer_name_or_path, some_ot... method _load_model (line 37) | def _load_model(self): method _load_tokenizer (line 49) | def _load_tokenizer(self): method generate_tokens (line 68) | def generate_tokens(self, input_ids, prompt_length, output_length): function measure_latency (line 75) | def measure_latency(inference_model, prompt_length, output_length): function benchmark_model (line 112) | def benchmark_model(model_name, inference_model, prompt_lengths, output_... FILE: scripts/test_fast_llama.py class Engine (line 16) | class Engine(Enum): class LoraAdapter (line 22) | class LoraAdapter: class SpeedyReplicateGonzalez (line 27) | class SpeedyReplicateGonzalez: method __init__ (line 28) | def __init__(self): method replicate_model_name (line 63) | def replicate_model_name(self): method replicate_model_name (line 67) | def replicate_model_name(self, model_name): method get_lora (line 71) | def get_lora(self, lora_path): method generate_replicate (line 84) | def generate_replicate(self, prompt, lora): method generate_vllm (line 99) | def generate_vllm(self, prompt, lora): method set_engine (line 111) | def set_engine(self, engine): method timing_decorator (line 123) | def timing_decorator(self, prompt, lora): method enable_timing (line 134) | def enable_timing(self, verbose: bool = False): method disable_timing (line 138) | def disable_timing(self): method run_long_generation (line 141) | def run_long_generation(self): method run_base (line 147) | def run_base(self): method run_sql (line 160) | def run_sql(self): method run_summary (line 187) | def run_summary(self): FILE: scripts/test_load_unload_lora.py class vLLMLoraTest (line 11) | class vLLMLoraTest: method __init__ (line 12) | def __init__(self): method get_lora (line 35) | def get_lora(self, lora_path): method generate_replicate (line 47) | def generate_replicate(self, prompt, lora_path): method generate (line 57) | def generate(self, prompt, lora): method run_base (line 68) | def run_base(self): method run_sql (line 81) | def run_sql(self): method run_summary (line 108) | def run_summary(self): FILE: src/config_utils.py class Weights (line 9) | class Weights(BaseModel): function get_fp16_file_list (line 15) | def get_fp16_file_list(n_shards: int): function get_gptq_file_list (line 35) | def get_gptq_file_list(base_model_name: str): function get_mlc_file_list (line 52) | def get_mlc_file_list(model_name: str, n_shards: int): function exllama_kwargs (line 70) | def exllama_kwargs(weights: Weights, config_overrides: Optional[dict] = ... function vllm_kwargs (line 77) | def vllm_kwargs(weights: Weights, config_overrides: Optional[dict] = None): function mlc_kwargs (line 87) | def mlc_kwargs( FILE: src/download.py class SeekableMmap (line 28) | class SeekableMmap(mmap.mmap): method seekable (line 29) | def seekable(self) -> bool: class Downloader (line 33) | class Downloader: method __init__ (line 34) | def __init__(self, concurrency: int | None = None) -> None: method session (line 50) | def session(self) -> aiohttp.ClientSession: method threadpool (line 61) | def threadpool(self) -> ThreadPoolExecutor: method get_remote_file_size (line 66) | async def get_remote_file_size(self, url: str | URL) -> "tuple[URL, in... method download_chunk (line 101) | async def download_chunk( method download_file (line 120) | async def download_file(self, url: str | URL) -> mmap.mmap: method download_file_to_disk (line 154) | async def download_file_to_disk(self, url: str, path: str) -> None: method maybe_download_files_to_disk (line 163) | async def maybe_download_files_to_disk( method sync (line 186) | def sync(f: t.Callable) -> t.Callable: FILE: src/inference_engines/engine.py class Engine (line 9) | class Engine(ABC): method load_weights (line 14) | def load_weights(self, weights: Weights): method load_lora (line 23) | def load_lora(self, lora_data: dict): method set_lora (line 31) | def set_lora(self, lora: Any): method is_lora_active (line 38) | def is_lora_active(self) -> bool: method delete_lora (line 45) | def delete_lora(self): method __call__ (line 52) | def __call__(self, prompt, **kwargs): FILE: src/inference_engines/exllama.py function next_logits (line 27) | def next_logits( function begin (line 36) | def begin(generator): function timer (line 44) | def timer(name, func): class ExllamaEngine (line 52) | class ExllamaEngine(Engine): method __init__ (line 53) | def __init__(self, weights: Weights, fused_attn=True): method delete_lora (line 90) | def delete_lora(self): method is_lora_active (line 94) | def is_lora_active(self) -> bool: method load_lora (line 97) | def load_lora(self, data_ref: dict) -> ExLlamaLora: method set_lora (line 104) | def set_lora(self, lora: ExLlamaLora | None) -> None: method __call__ (line 107) | def __call__( FILE: src/inference_engines/mlc_engine.py class MLCEngine (line 11) | class MLCEngine(Engine): method __init__ (line 16) | def __init__( method load_weights (line 50) | def load_weights(self, weights: Weights) -> str: method get_logits (line 69) | def get_logits(self): method load_lora (line 75) | def load_lora(self): method is_lora_active (line 82) | def is_lora_active(self): method set_lora (line 88) | def set_lora(self): method delete_lora (line 94) | def delete_lora(self): method __call__ (line 97) | def __call__( FILE: src/inference_engines/mlc_vllm_engine.py class MLCvLLMEngine (line 8) | class MLCvLLMEngine(Engine): method __init__ (line 13) | def __init__(self, mlc_args: dict, vllm_args: dict) -> None: method load_lora (line 24) | def load_lora(self, lora_data: dict) -> Any: method is_lora_active (line 43) | def is_lora_active(self) -> bool: method set_lora (line 51) | def set_lora(self, lora: Any) -> None: method delete_lora (line 61) | def delete_lora(self) -> None: method __call__ (line 64) | def __call__( FILE: src/inference_engines/transformers_engine.py class ExtraStopSequence (line 23) | class ExtraStopSequence(StoppingCriteria): method __init__ (line 29) | def __init__(self, stop_sequence: torch.Tensor, device: str): method __call__ (line 32) | def __call__( class TransformersEngine (line 40) | class TransformersEngine(Engine): method __init__ (line 46) | def __init__(self, weights: Weights, tokenizer_func=None, device="cuda"): method load_lora (line 55) | def load_lora(self, lora_weights: dict) -> Tuple[LoraConfig, Any]: method is_lora_active (line 78) | def is_lora_active(self) -> bool: method delete_lora (line 81) | def delete_lora(self): method set_lora (line 90) | def set_lora(self, lora): method get_logits (line 114) | def get_logits(self, prompt): method __call__ (line 129) | def __call__( FILE: src/inference_engines/vllm_engine.py class LoRA (line 20) | class LoRA: method __init__ (line 21) | def __init__( method load_from_path (line 28) | def load_from_path( method load_from_bytes (line 40) | def load_from_bytes( class vLLMEngine (line 48) | class vLLMEngine(Engine): method __init__ (line 53) | def __init__(self, weights: Weights, dtype: str) -> None: method load_lora (line 63) | def load_lora( method is_lora_active (line 122) | def is_lora_active(self) -> bool: method set_lora (line 128) | def set_lora(self, lora: LoRA) -> None: method delete_lora (line 138) | def delete_lora(self) -> None: method generate_stream (line 141) | async def generate_stream( method __call__ (line 148) | def __call__( function run_generation (line 241) | def run_generation(): FILE: src/inference_engines/vllm_exllama_engine.py class ExllamaVllmEngine (line 12) | class ExllamaVllmEngine(Engine): method __init__ (line 17) | def __init__(self, vllm_args: dict, exllama_args: dict) -> None: method load_lora (line 27) | def load_lora(self, lora_data: dict) -> Any: method is_lora_active (line 45) | def is_lora_active(self) -> bool: method set_lora (line 53) | def set_lora(self, lora: Any) -> None: method delete_lora (line 63) | def delete_lora(self) -> None: method __call__ (line 66) | def __call__( FILE: src/inference_engines/vllm_transformers.py class vLLMTransformersEngine (line 11) | class vLLMTransformersEngine(Engine): method __init__ (line 16) | def __init__( method load_lora (line 23) | def load_lora(self, lora_data: dict) -> Any: method is_lora_active (line 43) | def is_lora_active(self) -> bool: method set_lora (line 51) | def set_lora(self, lora: Any) -> None: method delete_lora (line 61) | def delete_lora(self) -> None: method __call__ (line 64) | def __call__( FILE: src/more_utils.py function log_memory_stuff (line 10) | def log_memory_stuff(prompt=None): function load_tokenizer (line 20) | def load_tokenizer(tokenizer_path): FILE: src/utils.py function seed_all (line 11) | def seed_all(seed: int): function get_env_var_or_default (line 23) | def get_env_var_or_default(var_name, default_value): class Logger (line 42) | class Logger: method __init__ (line 43) | def __init__(self, marker: str = "predict-timings"): method log (line 48) | def log(self, *args): function get_loop (line 60) | def get_loop() -> asyncio.AbstractEventLoop: function download_file (line 67) | def download_file(file, local_filename): function check_files_exist (line 79) | def check_files_exist(remote_files: list[str], local_path: str) -> list[... function download_file_with_pget (line 89) | async def download_file_with_pget(remote_path, dest_path, pget_concurren... function download_files_with_pget (line 116) | async def download_files_with_pget( function maybe_download_with_pget (line 126) | def maybe_download_with_pget( class StreamingTextStopSequenceHandler (line 166) | class StreamingTextStopSequenceHandler: method __init__ (line 167) | def __init__(self, stop_sequences: tp.List[str] = None, eos_token: str... method get_match_length (line 176) | def get_match_length(self, text: str, stop_sequence: str): method process (line 191) | def process(self, token): method __call__ (line 245) | def __call__(self, token): method finalize (line 252) | def finalize(self): function delay_prints (line 259) | def delay_prints(REALLY_EAT_MY_PRINT_STATEMENTS: bool = False) -> tp.Ite... FILE: tests/conftest.py function pytest_addoption (line 1) | def pytest_addoption(parser): FILE: tests/test_e2e.py function wait_for_server_to_be_ready (line 14) | def wait_for_server_to_be_ready(url, timeout=300): function server (line 46) | def server(): function test_health_check (line 73) | def test_health_check(): function test_prediction (line 80) | def test_prediction(): FILE: tests/test_predict.py function server (line 21) | def server(): function test_health_check (line 52) | def test_health_check(server): function test_simple_prediction (line 59) | def test_simple_prediction(server): function test_input_too_long (line 77) | def test_input_too_long(server): FILE: tests/test_remote_predict.py function model_name (line 6) | def model_name(request): function model (line 11) | def model(model_name): function version (line 16) | def version(model): function prediction_tests (line 22) | def prediction_tests(): function test_initial_predictions (line 28) | def test_initial_predictions(version, prediction_tests): FILE: tests/test_remote_train.py function model_name (line 7) | def model_name(request): function model (line 12) | def model(model_name): function version (line 17) | def version(model): function training (line 23) | def training(model_name, version): function prediction_tests (line 36) | def prediction_tests(): function test_training (line 62) | def test_training(training): function trained_model_and_version (line 70) | def trained_model_and_version(training): function test_post_training_predictions (line 75) | def test_post_training_predictions(trained_model_and_version, prediction... FILE: tests/test_train.py function test_train (line 42) | def test_train(): FILE: tests/test_train_predict.py function server (line 22) | def server(): function test_health_check (line 58) | def test_health_check(server): function test_prediction (line 65) | def test_prediction(server): function test_input_too_long (line 83) | def test_input_too_long(server): FILE: tests/test_utils.py function get_image_name (line 12) | def get_image_name(): function process_log_line (line 21) | def process_log_line(line): function capture_output (line 37) | def capture_output(pipe, print_lock, logs=None, error_detected=None): function wait_for_server_to_be_ready (line 49) | def wait_for_server_to_be_ready(url, timeout=300): function run_training_subprocess (line 79) | def run_training_subprocess(command): FILE: tests/timing.py function run (line 17) | def run(v): FILE: tests/unit_tests/test_completion_dataset.py function dataset_config (line 17) | def dataset_config(): function tokenizer (line 35) | def tokenizer(): function test__load_data_train (line 51) | def test__load_data_train(dataset_config): function test__load_data_train_with_val_split (line 59) | def test__load_data_train_with_val_split(dataset_config): function dataset (line 73) | def dataset(dataset_config): function test_format_data (line 79) | def test_format_data(dataset, tokenizer): function formatted_dataset (line 87) | def formatted_dataset(dataset, tokenizer): function test_tokenize_data_with_wrapped_packing (line 91) | def test_tokenize_data_with_wrapped_packing( function test_tokenize_data_without_wrapped_packing_small_chunk (line 121) | def test_tokenize_data_without_wrapped_packing_small_chunk( function test_tokenize_data_without_wrapped_packing_large_chunk (line 155) | def test_tokenize_data_without_wrapped_packing_large_chunk( function test_tokenize_data_without_packing (line 189) | def test_tokenize_data_without_packing(formatted_dataset, tokenizer, dat... FILE: tests/unit_tests/test_utils.py function tokenizer (line 11) | def tokenizer(): function get_decoded_prompt_tokens (line 27) | def get_decoded_prompt_tokens(tokenizer, prompt): function test_no_stop_sequences (line 33) | def test_no_stop_sequences(tokenizer): function test_single_stop_sequence_1 (line 70) | def test_single_stop_sequence_1(tokenizer): function test_single_stop_sequence_2 (line 107) | def test_single_stop_sequence_2(tokenizer): function test_multiple_stop_sequence (line 144) | def test_multiple_stop_sequence(tokenizer): function test_adjacent_stop_sequences (line 181) | def test_adjacent_stop_sequences(tokenizer): function test_substring_stop_sequence (line 218) | def test_substring_stop_sequence(tokenizer): FILE: train.py class TrainingOutput (line 29) | class TrainingOutput(BaseModel): function train (line 33) | def train(