SYMBOL INDEX (222 symbols across 17 files)

FILE: kernels/cross_entropy_loss.py
  function _cross_entropy_forward (line 28) | def _cross_entropy_forward(
  function _chunked_cross_entropy_forward (line 99) | def _chunked_cross_entropy_forward(
  function _cross_entropy_backward (line 180) | def _cross_entropy_backward(
  class Fast_CrossEntropyLoss (line 245) | class Fast_CrossEntropyLoss(torch.autograd.Function):
    method forward (line 247) | def forward(ctx, logits, labels, logit_scale=1.0):
    method backward (line 314) | def backward(ctx, dlosses):
  function fast_cross_entropy_loss (line 352) | def fast_cross_entropy_loss(logits, labels, logit_scale=1.0):

FILE: kernels/utils.py
  function device_warp_size (line 26) | def device_warp_size():
  function calculate_settings (line 33) | def calculate_settings(n):
  function QUANT_STATE (line 62) | def QUANT_STATE(W):
  function get_lora_parameters (line 69) | def get_lora_parameters(proj):
  function fast_dequantize (line 88) | def fast_dequantize(W, quant_state=None, out=None):
  function fast_gemv (line 144) | def fast_gemv(X, W, quant_state, out=None):
  function fast_linear_forward (line 222) | def fast_linear_forward(proj, X, temp_lora=None, out=None):
  function matmul_lora (line 265) | def matmul_lora(X, W, W_quant, A, B, s, out=None):

FILE: models/layers.py
  function move_data_to_device (line 12) | def move_data_to_device(module, device):
  function set_data (line 27) | def set_data(module, data):
  function move_experts_to_device (line 38) | def move_experts_to_device(experts, device, num_experts_to_offload):
  function set_experts_data (line 48) | def set_experts_data(experts, orig_data):
  function entropy_fn (line 55) | def entropy_fn(logits):
  function top_k_accuracy (line 65) | def top_k_accuracy(logits, labels, k_list, ignore_index=-100):
  class LayerSpec (line 77) | class LayerSpec(ds_pipe_module.LayerSpec):
    method __init__ (line 78) | def __init__(self, typename, *module_args, **module_kwargs):
    method build (line 81) | def build(self):
    method estimated_size (line 86) | def estimated_size(self):
  class OutputLayer (line 92) | class OutputLayer(nn.Module):
    method __init__ (line 93) | def __init__(
    method forward (line 121) | def forward(self, inputs):
  function load_balancing_loss_func (line 238) | def load_balancing_loss_func(gate_logits: torch.Tensor, num_experts: tor...
  class MixtralOutputLayer (line 255) | class MixtralOutputLayer(OutputLayer):
    method __init__ (line 256) | def __init__(
    method forward (line 271) | def forward(self, inputs):
  class InputLayer (line 288) | class InputLayer(nn.Module):
    method __init__ (line 289) | def __init__(self, model):
    method model (line 298) | def model(self):
    method forward (line 301) | def forward(self, inputs):
  class LlamaRMSNormPipe (line 351) | class LlamaRMSNormPipe(nn.Module):
    method __init__ (line 352) | def __init__(self, loader_util, orig):
    method forward (line 357) | def forward(self, inputs):
  class LlamaDecoderLayerPipe (line 362) | class LlamaDecoderLayerPipe(nn.Module):
    method __init__ (line 363) | def __init__(self, pipeline_model, loader_util, orig):
    method forward (line 377) | def forward(self, inputs):
    method move_mlp_to_cpu (line 405) | def move_mlp_to_cpu(self):
    method move_mlp_to_device (line 418) | def move_mlp_to_device(self, device):
  class Phi3DecoderLayerPipe (line 424) | class Phi3DecoderLayerPipe(LlamaDecoderLayerPipe):
    method __init__ (line 425) | def __init__(self, *args, **kwargs):
    method move_mlp_to_cpu (line 428) | def move_mlp_to_cpu(self):
    method move_mlp_to_device (line 438) | def move_mlp_to_device(self, device):
  class MixtralDecoderLayerPipe (line 443) | class MixtralDecoderLayerPipe(LlamaDecoderLayerPipe):
    method __init__ (line 444) | def __init__(self, *args, **kwargs):
    method forward (line 448) | def forward(self, inputs):
    method move_mlp_to_cpu (line 478) | def move_mlp_to_cpu(self):
    method move_mlp_to_device (line 486) | def move_mlp_to_device(self, device):
  class Gemma3InputLayer (line 492) | class Gemma3InputLayer(nn.Module):
    method __init__ (line 493) | def __init__(self, model):
    method model (line 503) | def model(self):
    method forward (line 506) | def forward(self, inputs):
  class Gemma3DecoderLayerPipe (line 557) | class Gemma3DecoderLayerPipe(nn.Module):
    method __init__ (line 558) | def __init__(self, pipeline_model, loader_util, orig):
    method forward (line 566) | def forward(self, inputs):
    method move_mlp_to_cpu (line 602) | def move_mlp_to_cpu(self):
    method move_mlp_to_device (line 615) | def move_mlp_to_device(self, device):
  class Gemma3RMSNormPipe (line 621) | class Gemma3RMSNormPipe(nn.Module):
    method __init__ (line 622) | def __init__(self, loader_util, orig):
    method forward (line 627) | def forward(self, inputs):

FILE: models/models.py
  class LlamaForCausalLMPipe (line 27) | class LlamaForCausalLMPipe(PipelineModel, transformers.LlamaForCausalLM):
    method __init__ (line 28) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 37) | def to_layer_specs(self):
  class Qwen2ForCausalLMPipe (line 59) | class Qwen2ForCausalLMPipe(PipelineModel, transformers.Qwen2ForCausalLM):
    method __init__ (line 60) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 69) | def to_layer_specs(self):
  class CohereForCausalLMPipe (line 88) | class CohereForCausalLMPipe(PipelineModel, transformers.CohereForCausalLM):
    method __init__ (line 89) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 98) | def to_layer_specs(self):
  class Phi3ForCausalLMPipe (line 122) | class Phi3ForCausalLMPipe(PipelineModel, transformers.Phi3ForCausalLM):
    method __init__ (line 123) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 132) | def to_layer_specs(self):
  class Gemma2ForCausalLMPipe (line 150) | class Gemma2ForCausalLMPipe(PipelineModel, transformers.Gemma2ForCausalLM):
    method __init__ (line 151) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 160) | def to_layer_specs(self):
  class MistralForCausalLMPipe (line 185) | class MistralForCausalLMPipe(PipelineModel, transformers.MistralForCausa...
    method __init__ (line 186) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 195) | def to_layer_specs(self):
  class MixtralForCausalLMPipe (line 213) | class MixtralForCausalLMPipe(PipelineModel, transformers.MixtralForCausa...
    method __init__ (line 214) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 227) | def to_layer_specs(self):
  class Gemma3ForCausalLMPipe (line 247) | class Gemma3ForCausalLMPipe(PipelineModel, transformers.Gemma3ForCausalLM):
    method __init__ (line 248) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 257) | def to_layer_specs(self):
  class Cohere2ForCausalLMPipe (line 282) | class Cohere2ForCausalLMPipe(PipelineModel, transformers.Cohere2ForCausa...
    method __init__ (line 283) | def __init__(self, config, quantization_config):
    method to_layer_specs (line 292) | def to_layer_specs(self):

FILE: models/pipeline_model.py
  class PipelineModel (line 22) | class PipelineModel(nn.Module):
    method __init__ (line 23) | def __init__(self, config, quantization_config, model_config):
    method to_layer_specs (line 43) | def to_layer_specs(self):
    method set_dpo_reference_mode (line 46) | def set_dpo_reference_mode(self, dpo_reference_mode):
    method set_sampling_mode (line 49) | def set_sampling_mode(self, sampling_mode):
    method set_cache (line 66) | def set_cache(self, micro_batch_id):
  function _partial_module_name_match (line 70) | def _partial_module_name_match(full_name, list_to_match):
  function _replace_with_quantized_linear (line 74) | def _replace_with_quantized_linear(parent_modules_map, name, full_name, ...
  function _replace_with_bnb_linear (line 83) | def _replace_with_bnb_linear(parent_modules_map, name, full_name, quanti...
  function _replace_with_hqq_linear (line 126) | def _replace_with_hqq_linear(parent_modules_map, name, full_name, quanti...
  function _recursively_replace_with_quantized_linear (line 151) | def _recursively_replace_with_quantized_linear(
  class LoaderUtil (line 191) | class LoaderUtil:
    method __init__ (line 192) | def __init__(self, model_path, quantization_config, modules_to_not_qua...
    method get_partial_state_dict (line 210) | def get_partial_state_dict(self, leaf_file):
    method maybe_quantize (line 218) | def maybe_quantize(self, module):
    method load_state_dict_into_module (line 231) | def load_state_dict_into_module(self, module):

FILE: tools/convert_dpo_dataset_to_chat_format.py
  function convert (line 15) | def convert(x):

FILE: tools/convert_ds_checkpoint_to_lora.py
  function convert_ds_checkpoint_to_lora (line 12) | def convert_ds_checkpoint_to_lora(ds_checkpoint_dir, lora_output_dir):

FILE: tools/merge_lora.py
  function find_lora_weights (line 45) | def find_lora_weights(key):

FILE: tools/test_sampling.py
  function bnb_cuda_hijack (line 70) | def bnb_cuda_hijack(self, device):

FILE: train.py
  function print_model_info (line 47) | def print_model_info(model):
  function set_config_defaults (line 61) | def set_config_defaults(config):
  function get_most_recent_run_dir (line 66) | def get_most_recent_run_dir(output_dir):
  function write_metrics (line 70) | def write_metrics(tb_writer, prefix, metrics, step):
  function evaluate_single (line 144) | def evaluate_single(model_engine, name, eval_dataloader, tb_writer, step...
  function evaluate (line 168) | def evaluate(model_engine, eval_dataloaders, tb_writer, step, eval_gradi...
  function apply_max_norm_regularization (line 185) | def apply_max_norm_regularization(model, config):
  function parse_layers_to_transform (line 232) | def parse_layers_to_transform(spec):
  function one_at_a_time (line 242) | def one_at_a_time():
  function load_pipeline_model_with_lora (line 249) | def load_pipeline_model_with_lora(config, model_type):
  function bnb_cuda_hijack (line 469) | def bnb_cuda_hijack(self, device):
  function get_optimizer (line 485) | def get_optimizer(model_parameters):

FILE: utils/dataloader.py
  function split_batch (line 25) | def split_batch(example, pieces):
  function combine_piecewise (line 41) | def combine_piecewise(a, b, pieces):
  function flatten_examples (line 54) | def flatten_examples(examples):
  function example_to_tuple (line 67) | def example_to_tuple(example):
  function shuffle_list (line 71) | def shuffle_list(l, seed):
  function batch_size_tokens_after_padding (line 79) | def batch_size_tokens_after_padding(batch):
  class DistributedBatchSamper (line 84) | class DistributedBatchSamper(torch.utils.data.Sampler):
    method __init__ (line 85) | def __init__(
    method should_emit_current_batch (line 160) | def should_emit_current_batch(self, current_batch, slice):
    method __iter__ (line 174) | def __iter__(self):
    method __len__ (line 177) | def __len__(self):
  class PipelineDataLoader (line 181) | class PipelineDataLoader:
    method __init__ (line 182) | def __init__(
    method reset (line 248) | def reset(self):
    method __iter__ (line 254) | def __iter__(self):
    method __len__ (line 257) | def __len__(self):
    method __next__ (line 260) | def __next__(self):
    method _pull_batches_from_dataloader (line 276) | def _pull_batches_from_dataloader(self):
    method _create_dataloader (line 288) | def _create_dataloader(self):
    method state_dict (line 296) | def state_dict(self):
    method load_state_dict (line 302) | def load_state_dict(self, state_dict):
    method sync_epoch (line 317) | def sync_epoch(self):

FILE: utils/dataset_utils.py
  function yield_sequences_from_token_batch (line 21) | def yield_sequences_from_token_batch(tokenizer, token_batch, sequence_len):
  function slice_into_chunks (line 50) | def slice_into_chunks(x, sequence_len, overlap=0):
  function load_raw_dataset (line 58) | def load_raw_dataset(dataset_path, tokenizer, sequence_len, eval_size, o...
  function load_axolotl_dataset (line 101) | def load_axolotl_dataset(dataset_path, tokenizer, sequence_len, eval_size):
  function load_pretokenized_dataset (line 120) | def load_pretokenized_dataset(dataset_path, tokenizer, sequence_len, eva...
  function load_single_dataset (line 136) | def load_single_dataset(dataset_config, tokenizer):
  function combine_datasets (line 190) | def combine_datasets(dataset_list, config, sample_weights):
  function process_dataset_for_rejected_sampling (line 219) | def process_dataset_for_rejected_sampling(dataset):
  function load_datasets (line 250) | def load_datasets(config, tokenizer):

FILE: utils/engine.py
  function initialize (line 47) | def initialize(
  function unpack_accepted_rejected (line 72) | def unpack_accepted_rejected(example):
  class LoadMicroBatchMultipleBuffers (line 82) | class LoadMicroBatchMultipleBuffers(PipeInstruction):
    method __init__ (line 83) | def __init__(self, *buffer_ids, **kwargs):
  class ReferenceLogitsForwardPass (line 87) | class ReferenceLogitsForwardPass(BufferOpInstruction):
  class CustomPipelineEngine (line 91) | class CustomPipelineEngine(PipelineEngine):
    method __init__ (line 92) | def __init__(
    method set_dataloader (line 137) | def set_dataloader(self, loader):
    method train_batch (line 144) | def train_batch(self):
    method eval_batch (line 225) | def eval_batch(self, data_iter):
    method sample_batch (line 275) | def sample_batch(self, prompts):
    method _sample_from_iterator (line 305) | def _sample_from_iterator(self, data_iter, collate_fn):
    method _aggregate_total_losses (line 335) | def _aggregate_total_losses(self):
    method _exec_forward_pass (line 389) | def _exec_forward_pass(self, buffer_id):
    method _exec_load_micro_batch_multiple_buffers (line 459) | def _exec_load_micro_batch_multiple_buffers(self, buffer_ids):
    method _exec_reference_logits_forward_pass (line 512) | def _exec_reference_logits_forward_pass(self, buffer_id):
    method _exec_send_micro_batch_id (line 570) | def _exec_send_micro_batch_id(self, send_micro_batch_id):
    method _exec_load_micro_batch_for_sampling (line 593) | def _exec_load_micro_batch_for_sampling(self, buffer_id, inputs):
    method _exec_sampling_forward_pass (line 603) | def _exec_sampling_forward_pass(self, buffer_id):
    method _sample_from_logits (line 657) | def _sample_from_logits(self, buffer_id):
    method _valid_stage (line 666) | def _valid_stage(self, stage_id):
    method _valid_micro_batch (line 669) | def _valid_micro_batch(self, micro_batch_id):
    method _exec_sampling_schedule (line 672) | def _exec_sampling_schedule(self, examples, feature_prefix='', max_tot...
  class ColumnMajorParallelTopology (line 825) | class ColumnMajorParallelTopology(ProcessTopology):
    method __init__ (line 832) | def __init__(self, num_pp, num_dp):
  class CustomPipelineModule (line 837) | class CustomPipelineModule(PipelineModule):
    method __init__ (line 838) | def __init__(self, layers, use_column_major_topology, model=None, **kw...
    method model (line 854) | def model(self):
    method set_dpo_reference_mode (line 857) | def set_dpo_reference_mode(self, dpo_reference_mode):
    method set_sampling_mode (line 860) | def set_sampling_mode(self, sampling_mode):
    method _partition_layers (line 863) | def _partition_layers(self, method='uniform'):
  class DPOTrainSchedule (line 926) | class DPOTrainSchedule(PipeSchedule):
    method steps (line 929) | def steps(self):
    method num_pipe_buffers (line 986) | def num_pipe_buffers(self):
    method _step_to_micro_batch (line 993) | def _step_to_micro_batch(self, step_id):
    method _even_step_forward_id (line 1015) | def _even_step_forward_id(self, step_id):
    method _odd_step_forward_id (line 1020) | def _odd_step_forward_id(self, step_id):
    method _even_step_backward_id (line 1025) | def _even_step_backward_id(self, step_id):
    method _odd_step_backward_id (line 1030) | def _odd_step_backward_id(self, step_id):
    method _buffer_idx (line 1036) | def _buffer_idx(self, micro_batch_id):
  class DPOInferenceSchedule (line 1041) | class DPOInferenceSchedule(PipeSchedule):
    method steps (line 1042) | def steps(self):
    method num_pipe_buffers (line 1089) | def num_pipe_buffers(self):

FILE: utils/hqq_utils.py
  function _maybe_include_all_linear_layers (line 15) | def _maybe_include_all_linear_layers(peft_config: peft.PeftConfig, model...
  class CustomHQQConfig (line 55) | class CustomHQQConfig:
    method __post_init__ (line 64) | def __post_init__(self):
    method use_aten (line 67) | def use_aten(self):
    method get_dict (line 70) | def get_dict(self, full_name):

FILE: utils/saver.py
  function need_to_checkpoint (line 20) | def need_to_checkpoint(config):
  function convert_state_dict_dtype (line 38) | def convert_state_dict_dtype(state_dict, dtype):
  class Saver (line 43) | class Saver:
    method __init__ (line 44) | def __init__(self, model_engine, pipeline_model, train_dataloader, lor...
    method save_lora (line 69) | def save_lora(self, name):
    method save_full_model (line 105) | def save_full_model(self, name, max_shard_size='5GB'):
    method will_save (line 153) | def will_save(self, type, name):
    method save_model (line 169) | def save_model(self, name):
    method save_checkpoint (line 175) | def save_checkpoint(self, step):
    method process_epoch (line 187) | def process_epoch(self, epoch, step):
    method process_step (line 202) | def process_step(self, step):
    method append_eval_results (line 247) | def append_eval_results(self, loss, save_best=True):
    method safe_rmtree (line 265) | def safe_rmtree(self, dir_path, max_retries=5, initial_wait_seconds=1):

FILE: utils/unsloth_utils.py
  class Unsloth_Offloaded_Gradient_Checkpointer (line 23) | class Unsloth_Offloaded_Gradient_Checkpointer(torch.autograd.Function):
    method forward (line 32) | def forward(ctx, forward_function, hidden_states, *args):
    method backward (line 45) | def backward(ctx, *grads):
  function unsloth_checkpoint (line 70) | def unsloth_checkpoint(function, *args):

FILE: utils/utils.py
  function log (line 16) | def log(msg):
  function eta_str (line 20) | def eta_str(eta):