SYMBOL INDEX (222 symbols across 17 files) FILE: kernels/cross_entropy_loss.py function _cross_entropy_forward (line 28) | def _cross_entropy_forward( function _chunked_cross_entropy_forward (line 99) | def _chunked_cross_entropy_forward( function _cross_entropy_backward (line 180) | def _cross_entropy_backward( class Fast_CrossEntropyLoss (line 245) | class Fast_CrossEntropyLoss(torch.autograd.Function): method forward (line 247) | def forward(ctx, logits, labels, logit_scale=1.0): method backward (line 314) | def backward(ctx, dlosses): function fast_cross_entropy_loss (line 352) | def fast_cross_entropy_loss(logits, labels, logit_scale=1.0): FILE: kernels/utils.py function device_warp_size (line 26) | def device_warp_size(): function calculate_settings (line 33) | def calculate_settings(n): function QUANT_STATE (line 62) | def QUANT_STATE(W): function get_lora_parameters (line 69) | def get_lora_parameters(proj): function fast_dequantize (line 88) | def fast_dequantize(W, quant_state=None, out=None): function fast_gemv (line 144) | def fast_gemv(X, W, quant_state, out=None): function fast_linear_forward (line 222) | def fast_linear_forward(proj, X, temp_lora=None, out=None): function matmul_lora (line 265) | def matmul_lora(X, W, W_quant, A, B, s, out=None): FILE: models/layers.py function move_data_to_device (line 12) | def move_data_to_device(module, device): function set_data (line 27) | def set_data(module, data): function move_experts_to_device (line 38) | def move_experts_to_device(experts, device, num_experts_to_offload): function set_experts_data (line 48) | def set_experts_data(experts, orig_data): function entropy_fn (line 55) | def entropy_fn(logits): function top_k_accuracy (line 65) | def top_k_accuracy(logits, labels, k_list, ignore_index=-100): class LayerSpec (line 77) | class LayerSpec(ds_pipe_module.LayerSpec): method __init__ (line 78) | def __init__(self, typename, *module_args, **module_kwargs): method build (line 81) | def build(self): method estimated_size (line 86) | def estimated_size(self): class OutputLayer (line 92) | class OutputLayer(nn.Module): method __init__ (line 93) | def __init__( method forward (line 121) | def forward(self, inputs): function load_balancing_loss_func (line 238) | def load_balancing_loss_func(gate_logits: torch.Tensor, num_experts: tor... class MixtralOutputLayer (line 255) | class MixtralOutputLayer(OutputLayer): method __init__ (line 256) | def __init__( method forward (line 271) | def forward(self, inputs): class InputLayer (line 288) | class InputLayer(nn.Module): method __init__ (line 289) | def __init__(self, model): method model (line 298) | def model(self): method forward (line 301) | def forward(self, inputs): class LlamaRMSNormPipe (line 351) | class LlamaRMSNormPipe(nn.Module): method __init__ (line 352) | def __init__(self, loader_util, orig): method forward (line 357) | def forward(self, inputs): class LlamaDecoderLayerPipe (line 362) | class LlamaDecoderLayerPipe(nn.Module): method __init__ (line 363) | def __init__(self, pipeline_model, loader_util, orig): method forward (line 377) | def forward(self, inputs): method move_mlp_to_cpu (line 405) | def move_mlp_to_cpu(self): method move_mlp_to_device (line 418) | def move_mlp_to_device(self, device): class Phi3DecoderLayerPipe (line 424) | class Phi3DecoderLayerPipe(LlamaDecoderLayerPipe): method __init__ (line 425) | def __init__(self, *args, **kwargs): method move_mlp_to_cpu (line 428) | def move_mlp_to_cpu(self): method move_mlp_to_device (line 438) | def move_mlp_to_device(self, device): class MixtralDecoderLayerPipe (line 443) | class MixtralDecoderLayerPipe(LlamaDecoderLayerPipe): method __init__ (line 444) | def __init__(self, *args, **kwargs): method forward (line 448) | def forward(self, inputs): method move_mlp_to_cpu (line 478) | def move_mlp_to_cpu(self): method move_mlp_to_device (line 486) | def move_mlp_to_device(self, device): class Gemma3InputLayer (line 492) | class Gemma3InputLayer(nn.Module): method __init__ (line 493) | def __init__(self, model): method model (line 503) | def model(self): method forward (line 506) | def forward(self, inputs): class Gemma3DecoderLayerPipe (line 557) | class Gemma3DecoderLayerPipe(nn.Module): method __init__ (line 558) | def __init__(self, pipeline_model, loader_util, orig): method forward (line 566) | def forward(self, inputs): method move_mlp_to_cpu (line 602) | def move_mlp_to_cpu(self): method move_mlp_to_device (line 615) | def move_mlp_to_device(self, device): class Gemma3RMSNormPipe (line 621) | class Gemma3RMSNormPipe(nn.Module): method __init__ (line 622) | def __init__(self, loader_util, orig): method forward (line 627) | def forward(self, inputs): FILE: models/models.py class LlamaForCausalLMPipe (line 27) | class LlamaForCausalLMPipe(PipelineModel, transformers.LlamaForCausalLM): method __init__ (line 28) | def __init__(self, config, quantization_config): method to_layer_specs (line 37) | def to_layer_specs(self): class Qwen2ForCausalLMPipe (line 59) | class Qwen2ForCausalLMPipe(PipelineModel, transformers.Qwen2ForCausalLM): method __init__ (line 60) | def __init__(self, config, quantization_config): method to_layer_specs (line 69) | def to_layer_specs(self): class CohereForCausalLMPipe (line 88) | class CohereForCausalLMPipe(PipelineModel, transformers.CohereForCausalLM): method __init__ (line 89) | def __init__(self, config, quantization_config): method to_layer_specs (line 98) | def to_layer_specs(self): class Phi3ForCausalLMPipe (line 122) | class Phi3ForCausalLMPipe(PipelineModel, transformers.Phi3ForCausalLM): method __init__ (line 123) | def __init__(self, config, quantization_config): method to_layer_specs (line 132) | def to_layer_specs(self): class Gemma2ForCausalLMPipe (line 150) | class Gemma2ForCausalLMPipe(PipelineModel, transformers.Gemma2ForCausalLM): method __init__ (line 151) | def __init__(self, config, quantization_config): method to_layer_specs (line 160) | def to_layer_specs(self): class MistralForCausalLMPipe (line 185) | class MistralForCausalLMPipe(PipelineModel, transformers.MistralForCausa... method __init__ (line 186) | def __init__(self, config, quantization_config): method to_layer_specs (line 195) | def to_layer_specs(self): class MixtralForCausalLMPipe (line 213) | class MixtralForCausalLMPipe(PipelineModel, transformers.MixtralForCausa... method __init__ (line 214) | def __init__(self, config, quantization_config): method to_layer_specs (line 227) | def to_layer_specs(self): class Gemma3ForCausalLMPipe (line 247) | class Gemma3ForCausalLMPipe(PipelineModel, transformers.Gemma3ForCausalLM): method __init__ (line 248) | def __init__(self, config, quantization_config): method to_layer_specs (line 257) | def to_layer_specs(self): class Cohere2ForCausalLMPipe (line 282) | class Cohere2ForCausalLMPipe(PipelineModel, transformers.Cohere2ForCausa... method __init__ (line 283) | def __init__(self, config, quantization_config): method to_layer_specs (line 292) | def to_layer_specs(self): FILE: models/pipeline_model.py class PipelineModel (line 22) | class PipelineModel(nn.Module): method __init__ (line 23) | def __init__(self, config, quantization_config, model_config): method to_layer_specs (line 43) | def to_layer_specs(self): method set_dpo_reference_mode (line 46) | def set_dpo_reference_mode(self, dpo_reference_mode): method set_sampling_mode (line 49) | def set_sampling_mode(self, sampling_mode): method set_cache (line 66) | def set_cache(self, micro_batch_id): function _partial_module_name_match (line 70) | def _partial_module_name_match(full_name, list_to_match): function _replace_with_quantized_linear (line 74) | def _replace_with_quantized_linear(parent_modules_map, name, full_name, ... function _replace_with_bnb_linear (line 83) | def _replace_with_bnb_linear(parent_modules_map, name, full_name, quanti... function _replace_with_hqq_linear (line 126) | def _replace_with_hqq_linear(parent_modules_map, name, full_name, quanti... function _recursively_replace_with_quantized_linear (line 151) | def _recursively_replace_with_quantized_linear( class LoaderUtil (line 191) | class LoaderUtil: method __init__ (line 192) | def __init__(self, model_path, quantization_config, modules_to_not_qua... method get_partial_state_dict (line 210) | def get_partial_state_dict(self, leaf_file): method maybe_quantize (line 218) | def maybe_quantize(self, module): method load_state_dict_into_module (line 231) | def load_state_dict_into_module(self, module): FILE: tools/convert_dpo_dataset_to_chat_format.py function convert (line 15) | def convert(x): FILE: tools/convert_ds_checkpoint_to_lora.py function convert_ds_checkpoint_to_lora (line 12) | def convert_ds_checkpoint_to_lora(ds_checkpoint_dir, lora_output_dir): FILE: tools/merge_lora.py function find_lora_weights (line 45) | def find_lora_weights(key): FILE: tools/test_sampling.py function bnb_cuda_hijack (line 70) | def bnb_cuda_hijack(self, device): FILE: train.py function print_model_info (line 47) | def print_model_info(model): function set_config_defaults (line 61) | def set_config_defaults(config): function get_most_recent_run_dir (line 66) | def get_most_recent_run_dir(output_dir): function write_metrics (line 70) | def write_metrics(tb_writer, prefix, metrics, step): function evaluate_single (line 144) | def evaluate_single(model_engine, name, eval_dataloader, tb_writer, step... function evaluate (line 168) | def evaluate(model_engine, eval_dataloaders, tb_writer, step, eval_gradi... function apply_max_norm_regularization (line 185) | def apply_max_norm_regularization(model, config): function parse_layers_to_transform (line 232) | def parse_layers_to_transform(spec): function one_at_a_time (line 242) | def one_at_a_time(): function load_pipeline_model_with_lora (line 249) | def load_pipeline_model_with_lora(config, model_type): function bnb_cuda_hijack (line 469) | def bnb_cuda_hijack(self, device): function get_optimizer (line 485) | def get_optimizer(model_parameters): FILE: utils/dataloader.py function split_batch (line 25) | def split_batch(example, pieces): function combine_piecewise (line 41) | def combine_piecewise(a, b, pieces): function flatten_examples (line 54) | def flatten_examples(examples): function example_to_tuple (line 67) | def example_to_tuple(example): function shuffle_list (line 71) | def shuffle_list(l, seed): function batch_size_tokens_after_padding (line 79) | def batch_size_tokens_after_padding(batch): class DistributedBatchSamper (line 84) | class DistributedBatchSamper(torch.utils.data.Sampler): method __init__ (line 85) | def __init__( method should_emit_current_batch (line 160) | def should_emit_current_batch(self, current_batch, slice): method __iter__ (line 174) | def __iter__(self): method __len__ (line 177) | def __len__(self): class PipelineDataLoader (line 181) | class PipelineDataLoader: method __init__ (line 182) | def __init__( method reset (line 248) | def reset(self): method __iter__ (line 254) | def __iter__(self): method __len__ (line 257) | def __len__(self): method __next__ (line 260) | def __next__(self): method _pull_batches_from_dataloader (line 276) | def _pull_batches_from_dataloader(self): method _create_dataloader (line 288) | def _create_dataloader(self): method state_dict (line 296) | def state_dict(self): method load_state_dict (line 302) | def load_state_dict(self, state_dict): method sync_epoch (line 317) | def sync_epoch(self): FILE: utils/dataset_utils.py function yield_sequences_from_token_batch (line 21) | def yield_sequences_from_token_batch(tokenizer, token_batch, sequence_len): function slice_into_chunks (line 50) | def slice_into_chunks(x, sequence_len, overlap=0): function load_raw_dataset (line 58) | def load_raw_dataset(dataset_path, tokenizer, sequence_len, eval_size, o... function load_axolotl_dataset (line 101) | def load_axolotl_dataset(dataset_path, tokenizer, sequence_len, eval_size): function load_pretokenized_dataset (line 120) | def load_pretokenized_dataset(dataset_path, tokenizer, sequence_len, eva... function load_single_dataset (line 136) | def load_single_dataset(dataset_config, tokenizer): function combine_datasets (line 190) | def combine_datasets(dataset_list, config, sample_weights): function process_dataset_for_rejected_sampling (line 219) | def process_dataset_for_rejected_sampling(dataset): function load_datasets (line 250) | def load_datasets(config, tokenizer): FILE: utils/engine.py function initialize (line 47) | def initialize( function unpack_accepted_rejected (line 72) | def unpack_accepted_rejected(example): class LoadMicroBatchMultipleBuffers (line 82) | class LoadMicroBatchMultipleBuffers(PipeInstruction): method __init__ (line 83) | def __init__(self, *buffer_ids, **kwargs): class ReferenceLogitsForwardPass (line 87) | class ReferenceLogitsForwardPass(BufferOpInstruction): class CustomPipelineEngine (line 91) | class CustomPipelineEngine(PipelineEngine): method __init__ (line 92) | def __init__( method set_dataloader (line 137) | def set_dataloader(self, loader): method train_batch (line 144) | def train_batch(self): method eval_batch (line 225) | def eval_batch(self, data_iter): method sample_batch (line 275) | def sample_batch(self, prompts): method _sample_from_iterator (line 305) | def _sample_from_iterator(self, data_iter, collate_fn): method _aggregate_total_losses (line 335) | def _aggregate_total_losses(self): method _exec_forward_pass (line 389) | def _exec_forward_pass(self, buffer_id): method _exec_load_micro_batch_multiple_buffers (line 459) | def _exec_load_micro_batch_multiple_buffers(self, buffer_ids): method _exec_reference_logits_forward_pass (line 512) | def _exec_reference_logits_forward_pass(self, buffer_id): method _exec_send_micro_batch_id (line 570) | def _exec_send_micro_batch_id(self, send_micro_batch_id): method _exec_load_micro_batch_for_sampling (line 593) | def _exec_load_micro_batch_for_sampling(self, buffer_id, inputs): method _exec_sampling_forward_pass (line 603) | def _exec_sampling_forward_pass(self, buffer_id): method _sample_from_logits (line 657) | def _sample_from_logits(self, buffer_id): method _valid_stage (line 666) | def _valid_stage(self, stage_id): method _valid_micro_batch (line 669) | def _valid_micro_batch(self, micro_batch_id): method _exec_sampling_schedule (line 672) | def _exec_sampling_schedule(self, examples, feature_prefix='', max_tot... class ColumnMajorParallelTopology (line 825) | class ColumnMajorParallelTopology(ProcessTopology): method __init__ (line 832) | def __init__(self, num_pp, num_dp): class CustomPipelineModule (line 837) | class CustomPipelineModule(PipelineModule): method __init__ (line 838) | def __init__(self, layers, use_column_major_topology, model=None, **kw... method model (line 854) | def model(self): method set_dpo_reference_mode (line 857) | def set_dpo_reference_mode(self, dpo_reference_mode): method set_sampling_mode (line 860) | def set_sampling_mode(self, sampling_mode): method _partition_layers (line 863) | def _partition_layers(self, method='uniform'): class DPOTrainSchedule (line 926) | class DPOTrainSchedule(PipeSchedule): method steps (line 929) | def steps(self): method num_pipe_buffers (line 986) | def num_pipe_buffers(self): method _step_to_micro_batch (line 993) | def _step_to_micro_batch(self, step_id): method _even_step_forward_id (line 1015) | def _even_step_forward_id(self, step_id): method _odd_step_forward_id (line 1020) | def _odd_step_forward_id(self, step_id): method _even_step_backward_id (line 1025) | def _even_step_backward_id(self, step_id): method _odd_step_backward_id (line 1030) | def _odd_step_backward_id(self, step_id): method _buffer_idx (line 1036) | def _buffer_idx(self, micro_batch_id): class DPOInferenceSchedule (line 1041) | class DPOInferenceSchedule(PipeSchedule): method steps (line 1042) | def steps(self): method num_pipe_buffers (line 1089) | def num_pipe_buffers(self): FILE: utils/hqq_utils.py function _maybe_include_all_linear_layers (line 15) | def _maybe_include_all_linear_layers(peft_config: peft.PeftConfig, model... class CustomHQQConfig (line 55) | class CustomHQQConfig: method __post_init__ (line 64) | def __post_init__(self): method use_aten (line 67) | def use_aten(self): method get_dict (line 70) | def get_dict(self, full_name): FILE: utils/saver.py function need_to_checkpoint (line 20) | def need_to_checkpoint(config): function convert_state_dict_dtype (line 38) | def convert_state_dict_dtype(state_dict, dtype): class Saver (line 43) | class Saver: method __init__ (line 44) | def __init__(self, model_engine, pipeline_model, train_dataloader, lor... method save_lora (line 69) | def save_lora(self, name): method save_full_model (line 105) | def save_full_model(self, name, max_shard_size='5GB'): method will_save (line 153) | def will_save(self, type, name): method save_model (line 169) | def save_model(self, name): method save_checkpoint (line 175) | def save_checkpoint(self, step): method process_epoch (line 187) | def process_epoch(self, epoch, step): method process_step (line 202) | def process_step(self, step): method append_eval_results (line 247) | def append_eval_results(self, loss, save_best=True): method safe_rmtree (line 265) | def safe_rmtree(self, dir_path, max_retries=5, initial_wait_seconds=1): FILE: utils/unsloth_utils.py class Unsloth_Offloaded_Gradient_Checkpointer (line 23) | class Unsloth_Offloaded_Gradient_Checkpointer(torch.autograd.Function): method forward (line 32) | def forward(ctx, forward_function, hidden_states, *args): method backward (line 45) | def backward(ctx, *grads): function unsloth_checkpoint (line 70) | def unsloth_checkpoint(function, *args): FILE: utils/utils.py function log (line 16) | def log(msg): function eta_str (line 20) | def eta_str(eta):