SYMBOL INDEX (830 symbols across 67 files)

FILE: .github/scripts/auditwheel_show.py
  function main (line 5) | def main():

FILE: .github/scripts/set_platform_tag.py
  function get_platform_tag (line 6) | def get_platform_tag(architecture):
  function main (line 21) | def main():

FILE: agents/fetch_issues.py
  function gh_graphql (line 69) | def gh_graphql(query: str, variables: dict) -> dict:
  function transform_reactions (line 84) | def transform_reactions(reaction_groups: list) -> dict:
  function transform_timeline_event (line 94) | def transform_timeline_event(event: dict) -> dict | None:
  function transform_issue (line 122) | def transform_issue(raw: dict) -> dict:
  function fetch_all_issues (line 161) | def fetch_all_issues(owner: str, repo: str, states: list[str] | None = N...
  function main (line 223) | def main():

FILE: agents/query_issues.py
  function load_data (line 162) | def load_data(path: str) -> dict:
  function all_issues (line 167) | def all_issues(data: dict) -> list[dict]:
  function format_compact (line 171) | def format_compact(issue: dict) -> str:
  function format_list_line (line 182) | def format_list_line(issue: dict) -> str:
  function format_detail (line 201) | def format_detail(issue: dict, brief: bool = False) -> str:
  function tokenize (line 274) | def tokenize(text: str) -> set[str]:
  function extract_signatures (line 285) | def extract_signatures(text: str) -> set[str]:
  function find_related (line 314) | def find_related(target: dict, issues: list[dict], state_filter: str | N...
  function format_related_result (line 345) | def format_related_result(score, issue, sig_ol, tok_ol, verbose=False):
  function cmd_list (line 367) | def cmd_list(args, data):
  function cmd_search (line 403) | def cmd_search(args, data):
  function cmd_related (line 436) | def cmd_related(args, data):
  function cmd_batch_related (line 458) | def cmd_batch_related(args, data):
  function cmd_show (line 486) | def cmd_show(args, data):
  function cmd_top (line 502) | def cmd_top(args, data):
  function cmd_stats (line 515) | def cmd_stats(args, data):
  function main (line 537) | def main():

FILE: benchmarking/inference_benchmark.py
  function parse_args (line 83) | def parse_args():
  function run_benchmark (line 120) | def run_benchmark(args, config, batch_size):

FILE: benchmarking/int8/training_benchmark.py
  function test_bench_8bit_training (line 28) | def test_bench_8bit_training(batch, seq, model, hidden):

FILE: benchmarking/matmul_benchmark.py
  function test_bench_matmul (line 30) | def test_bench_matmul(batch, seq, model, hidden):

FILE: benchmarking/optimizer_benchmark.py
  function test_stream_optimizer_bench (line 23) | def test_stream_optimizer_bench(dim1, gtype, optim_name, mode):

FILE: benchmarking/xpu/inference_benchmark.py
  function get_inputs (line 34) | def get_inputs(tokenizer):
  function get_streamer (line 45) | def get_streamer(tokenizer):
  class Streamer (line 51) | class Streamer:
    method __init__ (line 52) | def __init__(self, tokenizer, print_median=False):
    method put (line 57) | def put(self, t):
    method print_report (line 68) | def print_report(self):
    method end (line 78) | def end(self, *args):
  function parse_arguments (line 82) | def parse_arguments():

FILE: bitsandbytes/__init__.py
  function _import_backends (line 52) | def _import_backends():

FILE: bitsandbytes/_ops.py
  function _ (line 26) | def _(
  function _ (line 53) | def _(
  function _ (line 72) | def _(A: torch.Tensor, B: torch.Tensor):
  function _ (line 88) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor):
  function _ (line 105) | def _(A: torch.Tensor, threshold=0.0):
  function _ (line 121) | def _(A: torch.Tensor, stats: torch.Tensor) -> torch.Tensor:
  function _ (line 128) | def _(A: torch.Tensor, stats: torch.Tensor):
  function _ (line 140) | def _(
  function _ (line 158) | def _(
  function _ (line 178) | def _(
  function _ (line 197) | def _(
  function _ (line 219) | def _(
  function _ (line 238) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi...
  function _ (line 251) | def _(
  function _ (line 265) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc...
  function _ (line 281) | def _(
  function _ (line 305) | def _(
  function _ (line 339) | def _(
  function _ (line 382) | def _(

FILE: bitsandbytes/autograd/_functions.py
  class GlobalOutlierPooler (line 25) | class GlobalOutlierPooler:
    method __init__ (line 28) | def __init__(self):
    method initialize (line 31) | def initialize(self):
    method get_instance (line 36) | def get_instance(cls):
    method add_outliers (line 42) | def add_outliers(self, outlier_idx, feature_dim):
    method get_current_outlier_idx (line 50) | def get_current_outlier_idx(self):
  class MatmulLtState (line 58) | class MatmulLtState:
    method __getattr__ (line 82) | def __getattr__(self, name):
    method reset_grads (line 92) | def reset_grads(self):
  class MatMul8bitLt (line 101) | class MatMul8bitLt(torch.autograd.Function):
    method forward (line 103) | def forward(
    method backward (line 202) | def backward(ctx: torch.autograd.function.FunctionCtx, grad_output: to...
  class MatMul8bitFp (line 245) | class MatMul8bitFp(torch.autograd.Function):
    method forward (line 252) | def forward(ctx, A, B, out=None, bias=None, state=MatmulLtState):
    method backward (line 274) | def backward(ctx, grad_output):
  class MatMul4Bit (line 300) | class MatMul4Bit(torch.autograd.Function):
    method forward (line 304) | def forward(ctx, A, B, out=None, bias=None, quant_state: Optional[F.Qu...
    method backward (line 337) | def backward(ctx, grad_output):
  function matmul (line 359) | def matmul(
  function matmul_4bit (line 377) | def matmul_4bit(

FILE: bitsandbytes/backends/cpu/ops.py
  function _ (line 25) | def _(A: torch.Tensor, B: torch.Tensor):
  function _ (line 35) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc...
  function _ (line 77) | def _(
  function _ (line 124) | def _(
  function _ (line 243) | def _(

FILE: bitsandbytes/backends/cuda/ops.py
  function _ (line 15) | def _(A: torch.Tensor, B: torch.Tensor):
  function _ (line 21) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor):
  function _int8_linear_matmul_impl (line 25) | def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: torc...
  function _ (line 89) | def _(
  function _ (line 128) | def _(A: torch.Tensor, threshold=0.0):
  function _ (line 170) | def _(
  function _get_col_absmax (line 189) | def _get_col_absmax(
  function _ (line 211) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc...
  function _ (line 247) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi...
  function _ (line 254) | def _(
  function _dequantize_blockwise_impl (line 267) | def _dequantize_blockwise_impl(
  function _ (line 299) | def _(
  function _ (line 346) | def _(
  function _ (line 360) | def _(
  function _dequantize_4bit_impl (line 374) | def _dequantize_4bit_impl(
  function _ (line 420) | def _(
  function _ (line 430) | def _(
  function _gemv_4bit_impl (line 447) | def _gemv_4bit_impl(
  function _optimizer_update_32bit_impl (line 609) | def _optimizer_update_32bit_impl(
  function _optimizer_update_8bit_blockwise_impl (line 668) | def _optimizer_update_8bit_blockwise_impl(

FILE: bitsandbytes/backends/default/ops.py
  function _try_torch_compile (line 12) | def _try_torch_compile(func=None, **compile_kwargs):
  function _ (line 39) | def _(
  function _ (line 62) | def _(
  function _ (line 101) | def _(
  function _ (line 120) | def _(A: torch.Tensor, B: torch.Tensor):
  function _ (line 125) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor):
  function _int8_linear_matmul_impl (line 130) | def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: Opti...
  function _ (line 139) | def _(A: torch.Tensor, threshold=0.0):
  function _ (line 177) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc...
  function _ (line 203) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi...
  function _ (line 220) | def _(
  function _dequantize_4bit_impl (line 265) | def _dequantize_4bit_impl(
  function _ (line 312) | def _(
  function _ (line 331) | def _(
  function _optimizer_precondition_32bit (line 369) | def _optimizer_precondition_32bit(
  function _optimizer_update_32bit (line 430) | def _optimizer_update_32bit(
  function _ (line 543) | def _(

FILE: bitsandbytes/backends/hpu/ops.py
  function _reverse_4bit_compress_format (line 12) | def _reverse_4bit_compress_format(weight: torch.Tensor):
  function _ (line 20) | def _(

FILE: bitsandbytes/backends/mps/ops.py
  function _get_kernel (line 21) | def _get_kernel():
  function _ (line 36) | def _(
  function _dequantize_4bit_impl (line 56) | def _dequantize_4bit_impl(
  function _ (line 74) | def _(
  function _ (line 88) | def _(
  function _gemv_4bit_impl (line 104) | def _gemv_4bit_impl(
  function _ (line 123) | def _(
  function _ (line 135) | def _(

FILE: bitsandbytes/backends/triton/kernels_4bit.py
  function quantize_fp4_blockwise_kernel (line 20) | def quantize_fp4_blockwise_kernel(
  function quantize_nf4_blockwise_kernel (line 87) | def quantize_nf4_blockwise_kernel(
  function quantize_4bit_blockwise_triton (line 157) | def quantize_4bit_blockwise_triton(A, blocksize, quant_type, blocks, abs...
  function dequant_4bit_body_util (line 183) | def dequant_4bit_body_util(a, offsets, quant_ptr, absmax_ptr, n_elems, Q...
  function dequantize_fp4_tree (line 205) | def dequantize_fp4_tree(val, absmax):
  function dequant_fp4_body_util (line 229) | def dequant_fp4_body_util(a, offsets, absmax_ptr, n_elems, QUANT_BLOCK: ...
  function dequantize_nf4_tree (line 245) | def dequantize_nf4_tree(val):
  function dequant_nf4_body_util (line 285) | def dequant_nf4_body_util(a, offsets, absmax_ptr, n_elems, QUANT_BLOCK: ...
  function dequant_4bit_kernel (line 334) | def dequant_4bit_kernel(
  function dequant_fp4_kernel (line 378) | def dequant_fp4_kernel(
  function dequant_nf4_kernel (line 420) | def dequant_nf4_kernel(
  function dequantize_4bit_impl (line 450) | def dequantize_4bit_impl(
  function dequantize_4bit_impl_passing_code (line 475) | def dequantize_4bit_impl_passing_code(
  function quantize_4bit_blockwise_kernel (line 519) | def quantize_4bit_blockwise_kernel(

FILE: bitsandbytes/backends/triton/kernels_8bit_quant.py
  function dequant_8bit_kernel (line 28) | def dequant_8bit_kernel(
  function dequant_8bit_blockwise (line 45) | def dequant_8bit_blockwise(
  function quantize_8bit_blockwise_kernel (line 84) | def quantize_8bit_blockwise_kernel(
  function quantize_blockwise_triton (line 107) | def quantize_blockwise_triton(A, code, blocksize, absmax=None, out=None):
  function quantize_8bit_blockwise_kernel_util (line 137) | def quantize_8bit_blockwise_kernel_util(
  function dequant_8bit_blockwise_kernel_util (line 180) | def dequant_8bit_blockwise_kernel_util(

FILE: bitsandbytes/backends/triton/kernels_optim.py
  function _optimizer_precondition_2state_32bit (line 36) | def _optimizer_precondition_2state_32bit(
  function _optimizer_precondition_1state_32bit (line 91) | def _optimizer_precondition_1state_32bit(
  function _optimizer_update_2state_32bit_triton_kernel (line 149) | def _optimizer_update_2state_32bit_triton_kernel(
  function _optimizer_update_1state_32bit_triton_kernel (line 234) | def _optimizer_update_1state_32bit_triton_kernel(
  function optimizer_update_32bit_impl (line 339) | def optimizer_update_32bit_impl(
  function _dequantize_blockwise_pytorch (line 488) | def _dequantize_blockwise_pytorch(
  function _quantize_blockwise_pytorch (line 523) | def _quantize_blockwise_pytorch(
  function optimizer_update_8bit_blockwise_pytorch (line 562) | def optimizer_update_8bit_blockwise_pytorch(
  function optimizer_update_8bit_blockwise_triton_quant (line 709) | def optimizer_update_8bit_blockwise_triton_quant(
  function _optimizer_update_1state_8bit_blockwise_triton_kernel (line 856) | def _optimizer_update_1state_8bit_blockwise_triton_kernel(
  function _optimizer_update_2state_8bit_blockwise_triton_kernel (line 939) | def _optimizer_update_2state_8bit_blockwise_triton_kernel(
  function optimizer_update_8bit_blockwise_impl (line 1076) | def optimizer_update_8bit_blockwise_impl(

FILE: bitsandbytes/backends/triton/ops.py
  function quantize_blockwise (line 17) | def quantize_blockwise(A: torch.Tensor, code: torch.Tensor, blocksize: i...
  function dequantize_blockwise (line 25) | def dequantize_blockwise(
  function dequantize_blockwise_inplace (line 42) | def dequantize_blockwise_inplace(
  function quantize_4bit (line 67) | def quantize_4bit(
  function dequantize_4bit (line 104) | def dequantize_4bit(
  function dequantize_4bit_inplace (line 133) | def dequantize_4bit_inplace(
  function gemv_4bit (line 148) | def gemv_4bit(
  function optimizer_update_8bit_blockwise (line 185) | def optimizer_update_8bit_blockwise(
  function optimizer_update_32bit (line 264) | def optimizer_update_32bit(

FILE: bitsandbytes/backends/utils.py
  function get_gaudi_sw_version (line 66) | def get_gaudi_sw_version():

FILE: bitsandbytes/backends/xpu/ops.py
  function _ (line 20) | def _(A: torch.Tensor, B: torch.Tensor):
  function _dequantize_4bit_impl (line 27) | def _dequantize_4bit_impl(
  function _dequantize_blockwise_impl (line 61) | def _dequantize_blockwise_impl(
  function _gemv_4bit_impl (line 81) | def _gemv_4bit_impl(
  function _ (line 165) | def _(
  function _ (line 178) | def _(
  function _ (line 186) | def _(
  function _ (line 199) | def _(
  function _ (line 213) | def _(

FILE: bitsandbytes/cextension.py
  function get_cuda_bnb_library_path (line 22) | def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path:
  class BNBNativeLibrary (line 60) | class BNBNativeLibrary:
    method __init__ (line 64) | def __init__(self, lib: ct.CDLL):
    method __getattr__ (line 68) | def __getattr__(self, name):
    method __getitem__ (line 82) | def __getitem__(self, item):
  class CudaBNBNativeLibrary (line 86) | class CudaBNBNativeLibrary(BNBNativeLibrary):
    method __init__ (line 89) | def __init__(self, lib: ct.CDLL):
  class XpuBNBNativeLibrary (line 95) | class XpuBNBNativeLibrary(BNBNativeLibrary):
    method __init__ (line 98) | def __init__(self, lib: ct.CDLL):
  function get_available_cuda_binary_versions (line 104) | def get_available_cuda_binary_versions() -> list[str]:
  function parse_cuda_version (line 119) | def parse_cuda_version(version_str: str) -> str:
  class ErrorHandlerMockBNBNativeLibrary (line 126) | class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
    method __init__ (line 147) | def __init__(self, error_msg: str):
    method _format_lib_error_message (line 175) | def _format_lib_error_message(
    method _format_dependency_error (line 258) | def _format_dependency_error(self) -> str:
    method __getattr__ (line 286) | def __getattr__(self, name):
    method __getitem__ (line 294) | def __getitem__(self, name):
  function get_native_library (line 298) | def get_native_library() -> BNBNativeLibrary:

FILE: bitsandbytes/cuda_specs.py
  class CUDASpecs (line 13) | class CUDASpecs:
    method has_imma (line 19) | def has_imma(self) -> bool:
  function get_compute_capabilities (line 23) | def get_compute_capabilities() -> list[tuple[int, int]]:
  function get_cuda_version_tuple (line 28) | def get_cuda_version_tuple() -> Optional[tuple[int, int]]:
  function get_cuda_version_string (line 46) | def get_cuda_version_string() -> Optional[str]:
  function get_cuda_specs (line 55) | def get_cuda_specs() -> Optional[CUDASpecs]:
  function get_rocm_gpu_arch (line 82) | def get_rocm_gpu_arch() -> str:
  function get_rocm_warpsize (line 114) | def get_rocm_warpsize() -> int:

FILE: bitsandbytes/diagnostics/cuda.py
  function find_cuda_libraries_in_path_list (line 47) | def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Itera...
  function is_relevant_candidate_env_var (line 69) | def is_relevant_candidate_env_var(env_var: str, value: str) -> bool:
  function get_potentially_lib_path_containing_env_vars (line 82) | def get_potentially_lib_path_containing_env_vars() -> dict[str, str]:
  function find_cudart_libraries (line 86) | def find_cudart_libraries() -> Iterator[Path]:
  function _print_cuda_diagnostics (line 110) | def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None:
  function _print_hip_diagnostics (line 135) | def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None:
  function print_diagnostics (line 164) | def print_diagnostics(cuda_specs: CUDASpecs) -> None:
  function _print_cuda_runtime_diagnostics (line 171) | def _print_cuda_runtime_diagnostics() -> None:
  function _print_hip_runtime_diagnostics (line 198) | def _print_hip_runtime_diagnostics() -> None:
  function print_runtime_diagnostics (line 225) | def print_runtime_diagnostics() -> None:

FILE: bitsandbytes/diagnostics/main.py
  function sanity_check (line 30) | def sanity_check():
  function get_package_version (line 45) | def get_package_version(name: str) -> str:
  function show_environment (line 53) | def show_environment():
  function main (line 73) | def main():

FILE: bitsandbytes/diagnostics/utils.py
  function print_header (line 6) | def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "=")...
  function print_dedented (line 11) | def print_dedented(text):

FILE: bitsandbytes/functional.py
  class GlobalPageManager (line 24) | class GlobalPageManager:
    method __init__ (line 27) | def __init__(self):
    method initialize (line 30) | def initialize(self):
    method get_instance (line 34) | def get_instance(cls):
    method prefetch_all (line 40) | def prefetch_all(self, to_cpu=False):
  class CUBLAS_Context (line 48) | class CUBLAS_Context:
    method __init__ (line 51) | def __init__(self):
    method initialize (line 54) | def initialize(self):
    method get_instance (line 58) | def get_instance(cls):
    method get_context (line 64) | def get_context(self, device):
  function _cuda_device_of (line 81) | def _cuda_device_of(a: torch.Tensor):
  function _cuda_device_of (line 86) | def _cuda_device_of(a: torch.Tensor):
  function get_paged (line 90) | def get_paged(*shape, dtype=torch.float32, device=FIRST_CUDA_DEVICE):
  function prefetch_tensor (line 101) | def prefetch_tensor(A: torch.Tensor, to_cpu=False):
  function elementwise_func (line 111) | def elementwise_func(func_name, A, B, value, prefetch=True):
  function fill (line 141) | def fill(A, value, device=None, prefetch=True):
  function _mul (line 145) | def _mul(A, B, device=None):
  function create_linear_map (line 149) | def create_linear_map(signed=True, total_bits=8, add_zero=True):
  function create_normal_map (line 168) | def create_normal_map(offset=0.9677083, use_extra_value=True):
  function create_fp8_map (line 226) | def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total...
  function create_dynamic_map (line 295) | def create_dynamic_map(signed=True, max_exponent_bits=7, total_bits=8):
  function is_on_gpu (line 350) | def is_on_gpu(tensors: Iterable[Optional[torch.Tensor]]):
  function _get_tensor_stream (line 386) | def _get_tensor_stream(tensor: Tensor) -> ct.c_void_p:
  function get_ptr (line 398) | def get_ptr(A: Optional[Tensor]) -> Optional[ct.c_void_p]:
  class QuantState (line 413) | class QuantState:
    method __init__ (line 433) | def __init__(
    method __getattr__ (line 454) | def __getattr__(self, name):
    method __getitem__ (line 466) | def __getitem__(self, idx):
    method from_dict (line 487) | def from_dict(cls, qs_dict: dict[str, Any], device: torch.device) -> "...
    method as_dict (line 538) | def as_dict(self, packed: bool = False) -> dict[str, Any]:
    method to (line 573) | def to(self, device):
    method __eq__ (line 582) | def __eq__(self, other):
  function quantize_blockwise (line 606) | def quantize_blockwise(
  function dequantize_blockwise (line 677) | def dequantize_blockwise(
  function get_4bit_type (line 754) | def get_4bit_type(typename, device=None, blocksize=64):
  function quantize_fp4 (line 844) | def quantize_fp4(
  function quantize_nf4 (line 855) | def quantize_nf4(
  function quantize_4bit (line 866) | def quantize_4bit(
  function dequantize_fp4 (line 947) | def dequantize_fp4(
  function dequantize_nf4 (line 957) | def dequantize_nf4(
  function dequantize_4bit (line 967) | def dequantize_4bit(
  function optimizer_update_32bit (line 1044) | def optimizer_update_32bit(
  function optimizer_update_8bit_blockwise (line 1133) | def optimizer_update_8bit_blockwise(
  function check_matmul (line 1179) | def check_matmul(A, B, out, transposed_A, transposed_B, expected_type=to...
  function gemv_4bit (line 1263) | def gemv_4bit(
  function igemm (line 1300) | def igemm(
  function batched_igemm (line 1401) | def batched_igemm(
  function int8_linear_matmul (line 1497) | def int8_linear_matmul(A: torch.Tensor, B: torch.Tensor, out: Optional[t...
  function int8_mm_dequant (line 1523) | def int8_mm_dequant(
  function int8_double_quant (line 1551) | def int8_double_quant(
  function int8_vectorwise_dequant (line 1602) | def int8_vectorwise_dequant(A: torch.Tensor, stats: torch.Tensor):
  function int8_vectorwise_quant (line 1616) | def int8_vectorwise_quant(A: torch.Tensor, threshold=0.0):
  function _convert_weight_packed_for_cpu (line 1637) | def _convert_weight_packed_for_cpu(qweight: torch.Tensor, quant_state: Q...
  function _convert_weight_packed_for_cpu_inverse (line 1691) | def _convert_weight_packed_for_cpu_inverse(
  function has_avx512bf16 (line 1759) | def has_avx512bf16():

FILE: bitsandbytes/nn/modules.py
  class StableEmbedding (line 28) | class StableEmbedding(torch.nn.Embedding):
    method __init__ (line 54) | def __init__(
    method reset_parameters (line 101) | def reset_parameters(self) -> None:
    method _fill_padding_idx_with_zero (line 112) | def _fill_padding_idx_with_zero(self) -> None:
    method forward (line 117) | def forward(self, input: Tensor) -> Tensor:
  class Embedding (line 134) | class Embedding(torch.nn.Embedding):
    method __init__ (line 139) | def __init__(
    method reset_parameters (line 183) | def reset_parameters(self) -> None:
    method _fill_padding_idx_with_zero (line 194) | def _fill_padding_idx_with_zero(self) -> None:
    method forward (line 199) | def forward(self, input: Tensor) -> Tensor:
  class Params4bit (line 213) | class Params4bit(torch.nn.Parameter):
    method __new__ (line 214) | def __new__(
    method __getstate__ (line 243) | def __getstate__(self):
    method __setstate__ (line 249) | def __setstate__(self, state):
    method __getattr__ (line 284) | def __getattr__(self, name):
    method __deepcopy__ (line 297) | def __deepcopy__(self, memo):
    method __copy__ (line 305) | def __copy__(self):
    method from_prequantized (line 312) | def from_prequantized(
    method _quantize (line 337) | def _quantize(self, device):
    method cpu (line 353) | def cpu(self):
    method cuda (line 356) | def cuda(self, device: Optional[int | device | str] = None, non_blocki...
    method xpu (line 361) | def xpu(self, device: Optional[int | device | str] = None, non_blockin...
    method to (line 367) | def to(
    method to (line 375) | def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ...
    method to (line 378) | def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ...
    method to (line 380) | def to(self, *args, **kwargs):
    method __torch_function__ (line 403) | def __torch_function__(cls, func, types, args=(), kwargs=None):
  function fix_4bit_weight_quant_state_from_module (line 443) | def fix_4bit_weight_quant_state_from_module(module: Union["Embedding4bit...
  class Linear4bit (line 460) | class Linear4bit(nn.Linear):
    method __init__ (line 493) | def __init__(
    method set_compute_type (line 531) | def set_compute_type(self, x):
    method _save_to_state_dict (line 549) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method forward (line 565) | def forward(self, x: torch.Tensor):
  class LinearFP4 (line 596) | class LinearFP4(Linear4bit):
    method __init__ (line 601) | def __init__(
  class LinearNF4 (line 632) | class LinearNF4(Linear4bit):
    method __init__ (line 644) | def __init__(
  class Int8Params (line 675) | class Int8Params(torch.nn.Parameter):
    method __new__ (line 676) | def __new__(
    method _quantize (line 692) | def _quantize(self, device):
    method cpu (line 705) | def cpu(self):
    method cuda (line 708) | def cuda(self, device: Optional[int | device | str] = None, non_blocki...
    method xpu (line 711) | def xpu(self, device: Optional[int | device | str] = None, non_blockin...
    method __deepcopy__ (line 714) | def __deepcopy__(self, memo):
    method to (line 727) | def to(
    method to (line 735) | def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ...
    method to (line 738) | def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ...
    method to (line 740) | def to(self, *args, **kwargs):
  function maybe_rearrange_weight (line 767) | def maybe_rearrange_weight(state_dict, prefix, local_metadata, strict, m...
  class Embedding8bit (line 788) | class Embedding8bit(nn.Embedding):
    method __init__ (line 808) | def __init__(self, num_embeddings, embedding_dim, device=None, dtype=N...
    method _save_to_state_dict (line 814) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method forward (line 817) | def forward(self, input: Tensor) -> Tensor:
  class Embedding4bit (line 835) | class Embedding4bit(nn.Embedding):
    method __init__ (line 856) | def __init__(
    method _forward_with_partial_dequantize (line 885) | def _forward_with_partial_dequantize(self, input: Tensor):
    method _save_to_state_dict (line 918) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method forward (line 921) | def forward(self, input: Tensor) -> Tensor:
  class EmbeddingFP4 (line 935) | class EmbeddingFP4(Embedding4bit):
    method __init__ (line 936) | def __init__(
  class EmbeddingNF4 (line 954) | class EmbeddingNF4(Embedding4bit):
    method __init__ (line 955) | def __init__(
  class Linear8bitLt (line 973) | class Linear8bitLt(nn.Linear):
    method __init__ (line 1005) | def __init__(
    method _save_to_state_dict (line 1050) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method _load_from_state_dict (line 1074) | def _load_from_state_dict(
    method init_8bit_state (line 1113) | def init_8bit_state(self):
    method to (line 1119) | def to(self, *args, **kwargs):
    method forward (line 1134) | def forward(self, x: torch.Tensor):
  class OutlierAwareLinear (line 1151) | class OutlierAwareLinear(nn.Linear):
    method __init__ (line 1152) | def __init__(self, input_features, output_features, bias=True, device=...
    method forward_with_outliers (line 1157) | def forward_with_outliers(self, x, outlier_idx):
    method quantize_weight (line 1160) | def quantize_weight(self, w, outlier_idx):
    method forward (line 1163) | def forward(self, x):

FILE: bitsandbytes/nn/parametrize.py
  class Bnb4bitParametrization (line 11) | class Bnb4bitParametrization(nn.Module):
    method __init__ (line 24) | def __init__(self, quant_state: F.QuantState):
    method forward (line 29) | def forward(self, quantized_param: torch.Tensor) -> torch.Tensor:
  function replace_parameter_4bit_prequantized (line 42) | def replace_parameter_4bit_prequantized(
  function replace_parameter_4bit (line 62) | def replace_parameter_4bit(
  function _disable_parametrization_cache (line 129) | def _disable_parametrization_cache(module: nn.Module, inputs: tuple[Any,...
  function _enable_parametrization_cache (line 135) | def _enable_parametrization_cache(module: nn.Module, inputs: tuple[Any, ...
  function _register_parametrization_hooks (line 139) | def _register_parametrization_hooks(module: nn.Module, param_name: str):
  function _parametrized_state_dict_post_hook (line 156) | def _parametrized_state_dict_post_hook(

FILE: bitsandbytes/optim/adagrad.py
  class Adagrad (line 8) | class Adagrad(Optimizer1State):
    method __init__ (line 9) | def __init__(
  class Adagrad8bit (line 67) | class Adagrad8bit(Optimizer1State):
    method __init__ (line 68) | def __init__(
  class Adagrad32bit (line 126) | class Adagrad32bit(Optimizer1State):
    method __init__ (line 127) | def __init__(

FILE: bitsandbytes/optim/adam.py
  class Adam (line 9) | class Adam(Optimizer2State):
    method __init__ (line 10) | def __init__(
  class Adam8bit (line 62) | class Adam8bit(Optimizer2State):
    method __init__ (line 63) | def __init__(
  class Adam32bit (line 126) | class Adam32bit(Optimizer2State):
    method __init__ (line 127) | def __init__(
  class PagedAdam (line 179) | class PagedAdam(Optimizer2State):
    method __init__ (line 180) | def __init__(
  class PagedAdam8bit (line 232) | class PagedAdam8bit(Optimizer2State):
    method __init__ (line 233) | def __init__(
  class PagedAdam32bit (line 296) | class PagedAdam32bit(Optimizer2State):
    method __init__ (line 297) | def __init__(

FILE: bitsandbytes/optim/adamw.py
  class AdamW (line 9) | class AdamW(Optimizer2State):
    method __init__ (line 10) | def __init__(
  class AdamW8bit (line 62) | class AdamW8bit(Optimizer2State):
    method __init__ (line 63) | def __init__(
  class AdamW32bit (line 126) | class AdamW32bit(Optimizer2State):
    method __init__ (line 127) | def __init__(
  class PagedAdamW (line 179) | class PagedAdamW(Optimizer2State):
    method __init__ (line 180) | def __init__(
  class PagedAdamW8bit (line 229) | class PagedAdamW8bit(Optimizer2State):
    method __init__ (line 230) | def __init__(
  class PagedAdamW32bit (line 290) | class PagedAdamW32bit(Optimizer2State):
    method __init__ (line 291) | def __init__(

FILE: bitsandbytes/optim/ademamix.py
  class _ReferenceAdEMAMix (line 11) | class _ReferenceAdEMAMix(torch.optim.Optimizer):
    method __init__ (line 16) | def __init__(
    method step (line 34) | def step(self, closure=None):
  class AdEMAMix (line 107) | class AdEMAMix(Optimizer2State):
    method __init__ (line 108) | def __init__(
    method init_state (line 139) | def init_state(self, group, p, gindex, pindex):
    method update_step (line 176) | def update_step(self, group, p, gindex, pindex):
    method _get_state_double_buffer (line 260) | def _get_state_double_buffer(self, p, dtype=torch.float32):
  class AdEMAMix8bit (line 270) | class AdEMAMix8bit(AdEMAMix):
    method __init__ (line 271) | def __init__(
  class PagedAdEMAMix8bit (line 299) | class PagedAdEMAMix8bit(AdEMAMix8bit):
    method __init__ (line 300) | def __init__(
  class PagedAdEMAMix (line 326) | class PagedAdEMAMix(AdEMAMix):
    method __init__ (line 327) | def __init__(
  class AdEMAMix32bit (line 355) | class AdEMAMix32bit(Optimizer2State):
    method __init__ (line 356) | def __init__(
  class PagedAdEMAMix32bit (line 386) | class PagedAdEMAMix32bit(AdEMAMix32bit):
    method __init__ (line 387) | def __init__(

FILE: bitsandbytes/optim/lamb.py
  class LAMB (line 8) | class LAMB(Optimizer2State):
    method __init__ (line 9) | def __init__(
  class LAMB8bit (line 67) | class LAMB8bit(Optimizer2State):
    method __init__ (line 68) | def __init__(
  class LAMB32bit (line 123) | class LAMB32bit(Optimizer2State):
    method __init__ (line 124) | def __init__(

FILE: bitsandbytes/optim/lars.py
  class LARS (line 11) | class LARS(Optimizer1State):
    method __init__ (line 12) | def __init__(
  class LARS8bit (line 66) | class LARS8bit(Optimizer1State):
    method __init__ (line 67) | def __init__(
  class LARS32bit (line 118) | class LARS32bit(Optimizer1State):
    method __init__ (line 119) | def __init__(
  class PytorchLARS (line 170) | class PytorchLARS(Optimizer):
    method __init__ (line 171) | def __init__(
    method __setstate__ (line 200) | def __setstate__(self, state):
    method step (line 206) | def step(self, closure=None):

FILE: bitsandbytes/optim/lion.py
  class Lion (line 8) | class Lion(Optimizer1State):
    method __init__ (line 9) | def __init__(
  class Lion8bit (line 55) | class Lion8bit(Optimizer1State):
    method __init__ (line 56) | def __init__(
  class Lion32bit (line 99) | class Lion32bit(Optimizer1State):
    method __init__ (line 100) | def __init__(
  class PagedLion (line 143) | class PagedLion(Optimizer1State):
    method __init__ (line 144) | def __init__(
  class PagedLion8bit (line 187) | class PagedLion8bit(Optimizer1State):
    method __init__ (line 188) | def __init__(
  class PagedLion32bit (line 228) | class PagedLion32bit(Optimizer1State):
    method __init__ (line 229) | def __init__(

FILE: bitsandbytes/optim/optimizer.py
  class MockArgs (line 16) | class MockArgs:
    method __init__ (line 17) | def __init__(self, initial_data):
  class GlobalOptimManager (line 22) | class GlobalOptimManager:
    method __init__ (line 29) | def __init__(self):
    method initialize (line 32) | def initialize(self):
    method get_instance (line 40) | def get_instance(cls):
    method register_parameters (line 46) | def register_parameters(self, params):
    method override_config (line 56) | def override_config(self, parameters, key=None, value=None, key_value_...
    method register_module_override (line 109) | def register_module_override(self, module, param_name, config):
  class Optimizer8bit (line 113) | class Optimizer8bit(torch.optim.Optimizer):
    method __init__ (line 116) | def __init__(self, params, defaults, optim_bits=32, is_paged=False):
    method fill_qmap (line 153) | def fill_qmap(self):
    method state_dict (line 157) | def state_dict(self):
    method __setstate__ (line 185) | def __setstate__(self, state):
    method load_state_dict (line 188) | def load_state_dict(self, state_dict, move_to_device=True):
    method to_gpu (line 269) | def to_gpu(self):
    method check_overrides (line 280) | def check_overrides(self):
    method step (line 300) | def step(self, closure=None):
    method get_config (line 337) | def get_config(self, gindex, pindex, group):
    method init_state (line 362) | def init_state(self, group, p, gindex, pindex):
    method update_step (line 365) | def update_step(self, group, p, gindex, pindex):
    method get_state_buffer (line 368) | def get_state_buffer(self, p, dtype=torch.float32):
    method prefetch_state (line 378) | def prefetch_state(self, p):
  class Optimizer2State (line 389) | class Optimizer2State(Optimizer8bit):
    method __init__ (line 390) | def __init__(
    method init_state (line 478) | def init_state(self, group, p, gindex, pindex):
    method update_step (line 521) | def update_step(self, group, p, gindex, pindex):
  class Optimizer1State (line 579) | class Optimizer1State(Optimizer8bit):
    method __init__ (line 580) | def __init__(
    method init_state (line 650) | def init_state(self, group, p, gindex, pindex):
    method update_step (line 687) | def update_step(self, group, p, gindex, pindex):

FILE: bitsandbytes/optim/rmsprop.py
  class RMSprop (line 8) | class RMSprop(Optimizer1State):
    method __init__ (line 9) | def __init__(
  class RMSprop8bit (line 64) | class RMSprop8bit(Optimizer1State):
    method __init__ (line 65) | def __init__(
  class RMSprop32bit (line 117) | class RMSprop32bit(Optimizer1State):
    method __init__ (line 118) | def __init__(

FILE: bitsandbytes/optim/sgd.py
  class SGD (line 8) | class SGD(Optimizer1State):
    method __init__ (line 9) | def __init__(
  class SGD8bit (line 59) | class SGD8bit(Optimizer1State):
    method __init__ (line 60) | def __init__(
  class SGD32bit (line 107) | class SGD32bit(Optimizer1State):
    method __init__ (line 108) | def __init__(

FILE: bitsandbytes/utils.py
  function outlier_hook (line 11) | def outlier_hook(module, input):
  class OutlierTracer (line 44) | class OutlierTracer:
    method __init__ (line 47) | def __init__(self):
    method initialize (line 50) | def initialize(self, model):
    method is_initialized (line 63) | def is_initialized(self):
    method get_hvalue (line 66) | def get_hvalue(self, weight):
    method get_outliers (line 69) | def get_outliers(self, weight):
    method get_instance (line 80) | def get_instance(cls):
  function find_outlier_dims (line 86) | def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rd...
  function execute_and_return (line 104) | def execute_and_return(command_string: str) -> tuple[str, str]:
  function replace_linear (line 121) | def replace_linear(
  function pack_dict_to_tensor (line 166) | def pack_dict_to_tensor(source_dict):
  function unpack_tensor_to_dict (line 183) | def unpack_tensor_to_dict(tensor_data):
  function sync_gpu (line 204) | def sync_gpu(t: torch.Tensor):

FILE: csrc/common.h
  type DataType_t (line 3) | typedef enum DataType_t {

FILE: csrc/cpu_ops.cpp
  function lookup_code_index (line 19) | inline unsigned char lookup_code_index(const float* codebook, float valu...
  function __m256i (line 42) | inline __m256i cvt_fp32_to_fp16(const __m512 src) {
  function __m256i (line 46) | inline __m256i cvt_fp32_to_bf16(const __m512 src) {
  function __m512 (line 70) | static inline __m512 set_nf4_lut() {
  function __m512 (line 78) | static inline __m512 set_fp4_lut() {
  function dequantizeBlockwise4bitCpu (line 89) | void dequantizeBlockwise4bitCpu(
  function dequantizeBlockwise8bitCpu (line 183) | void dequantizeBlockwise8bitCpu(
  function quantize_cpu (line 207) | void quantize_cpu(float* code, float* A, float* absmax, unsigned char* o...
  type tinygemm_kernel_nn (line 267) | struct tinygemm_kernel_nn {
    method apply (line 268) | static inline void apply(
  type tinygemm_kernel_nn<bf16_t, BLOCK_M, BLOCK_N, DATA_TYPE> (line 276) | struct tinygemm_kernel_nn<bf16_t, BLOCK_M, BLOCK_N, DATA_TYPE> {
    method apply (line 277) | static inline void apply(
  function tinygemm_kernel (line 389) | void tinygemm_kernel(
  function gemv_4bit_inference (line 446) | void gemv_4bit_inference(

FILE: csrc/cpu_ops.h
  function block_size_m (line 24) | constexpr int block_size_m() { return 2 * TILE_M; }
  function block_size_n (line 26) | constexpr int block_size_n() { return 2 * TILE_N; }
  function get_cache_blocks (line 28) | int get_cache_blocks(int chunk_size) {
  function const (line 42) | void operator()(const Func& f, Args... args) const {
  type Unroll (line 48) | struct Unroll
  function const (line 49) | void operator()(const Func& f, Args... args) const {
  function get_max_threads (line 58) | inline int get_max_threads() {
  function adjust_num_threads (line 67) | inline int adjust_num_threads(int m) {
  function parallel_2d (line 74) | void parallel_2d(int m, int n, const func_t& f) {
  type fp16_t (line 124) | struct fp16_t {
  type bf16_t (line 128) | struct bf16_t {
  function bf16_to_float (line 139) | static float bf16_to_float(uint16_t bf16) {
  function fp16_t (line 146) | static inline fp16_t float_to_fp16(float x) {
  function dDequantizeFP4 (line 188) | inline float dDequantizeFP4(unsigned char val) {
  function dDequantizeNF4 (line 230) | inline float dDequantizeNF4(unsigned char val) {
  function has_avx512f (line 292) | static inline bool has_avx512f() {
  function has_avx512bf16 (line 302) | static inline bool has_avx512bf16() {
  function has_avx512f (line 312) | static inline bool has_avx512f() {
  function has_avx512bf16 (line 318) | static inline bool has_avx512bf16() {

FILE: csrc/pythonInterface.cpp
  function gemm_4bit_inference_naive_fp16 (line 43) | void gemm_4bit_inference_naive_fp16(
  function gemm_4bit_inference_naive_bf16 (line 50) | void gemm_4bit_inference_naive_bf16(
  function gemm_4bit_inference_naive_fp32 (line 59) | void gemm_4bit_inference_naive_fp32(
  function quantizeBlockwise_fp16 (line 133) | void quantizeBlockwise_fp16(float* code, half* A, float* absmax, unsigne...
  function quantizeBlockwise_fp16_fp4 (line 137) | void quantizeBlockwise_fp16_fp4(float* code, half* A, float* absmax, uns...
  function quantizeBlockwise_fp16_nf4 (line 141) | void quantizeBlockwise_fp16_nf4(float* code, half* A, float* absmax, uns...
  function quantizeBlockwise_bf16 (line 145) | void quantizeBlockwise_bf16(
  function quantizeBlockwise_bf16_fp4 (line 151) | void quantizeBlockwise_bf16_fp4(
  function quantizeBlockwise_bf16_nf4 (line 157) | void quantizeBlockwise_bf16_nf4(
  function quantizeBlockwise_fp32 (line 163) | void quantizeBlockwise_fp32(float* code, float* A, float* absmax, unsign...
  function quantizeBlockwise_fp32_fp4 (line 167) | void quantizeBlockwise_fp32_fp4(float* code, float* A, float* absmax, un...
  function quantizeBlockwise_fp32_nf4 (line 171) | void quantizeBlockwise_fp32_nf4(float* code, float* A, float* absmax, un...
  function dequantizeBlockwise_fp16 (line 175) | void dequantizeBlockwise_fp16(
  function dequantizeBlockwise_fp16_fp4 (line 181) | void dequantizeBlockwise_fp16_fp4(
  function dequantizeBlockwise_fp16_nf4 (line 187) | void dequantizeBlockwise_fp16_nf4(
  function dequantizeBlockwise_fp32 (line 193) | void dequantizeBlockwise_fp32(
  function dequantizeBlockwise_fp32_fp4 (line 199) | void dequantizeBlockwise_fp32_fp4(
  function dequantizeBlockwise_fp32_nf4 (line 205) | void dequantizeBlockwise_fp32_nf4(
  function dequantizeBlockwise_bf16 (line 211) | void dequantizeBlockwise_bf16(
  function dequantizeBlockwise_bf16_fp4 (line 217) | void dequantizeBlockwise_bf16_fp4(
  function dequantizeBlockwise_bf16_nf4 (line 223) | void dequantizeBlockwise_bf16_nf4(
  function igemmlt_32 (line 229) | int igemmlt_32(
  function igemmlt_8 (line 236) | int igemmlt_8(
  function igemmlt_8_rowscale (line 243) | int igemmlt_8_rowscale(
  function dequantizeBlockwise_fp16 (line 254) | void dequantizeBlockwise_fp16(
  function dequantizeBlockwise_fp16_fp4 (line 260) | void dequantizeBlockwise_fp16_fp4(
  function dequantizeBlockwise_fp16_nf4 (line 266) | void dequantizeBlockwise_fp16_nf4(
  function dequantizeBlockwise_fp32 (line 272) | void dequantizeBlockwise_fp32(
  function dequantizeBlockwise_fp32_fp4 (line 278) | void dequantizeBlockwise_fp32_fp4(
  function dequantizeBlockwise_fp32_nf4 (line 284) | void dequantizeBlockwise_fp32_nf4(
  function dequantizeBlockwise_bf16 (line 290) | void dequantizeBlockwise_bf16(
  function dequantizeBlockwise_bf16_fp4 (line 297) | void dequantizeBlockwise_bf16_fp4(
  function dequantizeBlockwise_bf16_nf4 (line 304) | void dequantizeBlockwise_bf16_nf4(
  function gemv_4bit_inference_fp16 (line 311) | void gemv_4bit_inference_fp16(
  function gemv_4bit_inference_bf16 (line 318) | void gemv_4bit_inference_bf16(
  function gemv_4bit_inference_fp32 (line 327) | void gemv_4bit_inference_fp32(
  function cdequantize_blockwise_fp16_fp4 (line 352) | void cdequantize_blockwise_fp16_fp4(
  function cdequantize_blockwise_fp16 (line 358) | void cdequantize_blockwise_fp16(
  function cdequantize_blockwise_fp16_nf4 (line 364) | void cdequantize_blockwise_fp16_nf4(
  function cquantize_blockwise_fp16 (line 370) | void cquantize_blockwise_fp16(float* code, half* A, float* absmax, unsig...
  function cquantize_blockwise_fp16_fp4 (line 374) | void cquantize_blockwise_fp16_fp4(float* code, half* A, float* absmax, u...
  function cquantize_blockwise_fp16_nf4 (line 378) | void cquantize_blockwise_fp16_nf4(float* code, half* A, float* absmax, u...
  function cquantize_blockwise_fp32 (line 382) | void cquantize_blockwise_fp32(float* code, float* A, float* absmax, unsi...
  function cquantize_blockwise_fp32_fp4 (line 386) | void cquantize_blockwise_fp32_fp4(
  function cquantize_blockwise_fp32_nf4 (line 392) | void cquantize_blockwise_fp32_nf4(
  function cdequantize_blockwise_fp32 (line 398) | void cdequantize_blockwise_fp32(
  function cdequantize_blockwise_fp32_fp4 (line 404) | void cdequantize_blockwise_fp32_fp4(
  function cdequantize_blockwise_fp32_nf4 (line 410) | void cdequantize_blockwise_fp32_nf4(
  function cquantize_blockwise_bf16 (line 416) | void cquantize_blockwise_bf16(
  function cquantize_blockwise_bf16_fp4 (line 422) | void cquantize_blockwise_bf16_fp4(
  function cquantize_blockwise_bf16_nf4 (line 428) | void cquantize_blockwise_bf16_nf4(
  function cdequantize_blockwise_bf16 (line 434) | void cdequantize_blockwise_bf16(
  function cdequantize_blockwise_bf16_fp4 (line 440) | void cdequantize_blockwise_bf16_fp4(
  function cdequantize_blockwise_bf16_nf4 (line 446) | void cdequantize_blockwise_bf16_nf4(
  function cigemm (line 512) | cigemm(
  function cbatched_igemm (line 519) | void cbatched_igemm(
  function Context (line 528) | Context* get_context() { return new Context(); }
  function cigemmlt_32 (line 530) | int cigemmlt_32(
  function cigemmlt_8 (line 537) | int cigemmlt_8(
  function cigemmlt_8_rowscale (line 544) | int cigemmlt_8_rowscale(
  function cdequant_mm_int32_fp16 (line 551) | void cdequant_mm_int32_fp16(
  function cint8_vector_quant (line 557) | void cint8_vector_quant(
  function cprefetch (line 571) | void cprefetch(void* ptr, size_t bytes, int device) {
  function cgemm_4bit_inference_naive_fp16 (line 600) | void cgemm_4bit_inference_naive_fp16(
  function cgemm_4bit_inference_naive_bf16 (line 607) | void cgemm_4bit_inference_naive_bf16(
  function cgemm_4bit_inference_naive_fp32 (line 614) | void cgemm_4bit_inference_naive_fp32(
  function cdequantize_blockwise_fp16_fp4 (line 625) | void cdequantize_blockwise_fp16_fp4(
  function cdequantize_blockwise_fp16 (line 631) | void cdequantize_blockwise_fp16(
  function cdequantize_blockwise_fp16_nf4 (line 637) | void cdequantize_blockwise_fp16_nf4(
  function cdequantize_blockwise_fp32 (line 643) | void cdequantize_blockwise_fp32(
  function cdequantize_blockwise_fp32_fp4 (line 649) | void cdequantize_blockwise_fp32_fp4(
  function cdequantize_blockwise_fp32_nf4 (line 655) | void cdequantize_blockwise_fp32_nf4(
  function cdequantize_blockwise_bf16 (line 661) | void cdequantize_blockwise_bf16(
  function cdequantize_blockwise_bf16_fp4 (line 668) | void cdequantize_blockwise_bf16_fp4(
  function cdequantize_blockwise_bf16_nf4 (line 675) | void cdequantize_blockwise_bf16_nf4(
  function cgemv_4bit_inference_fp16 (line 682) | void cgemv_4bit_inference_fp16(
  function cgemv_4bit_inference_bf16 (line 689) | void cgemv_4bit_inference_bf16(
  function cgemv_4bit_inference_fp32 (line 696) | void cgemv_4bit_inference_fp32(
  function cprefetch (line 723) | void cprefetch(void* ptr, size_t bytes, int device) {
  function cfill_fp32 (line 736) | void cfill_fp32(float* A, float* B, float value, long n) {
  function cfill_uint8 (line 745) | void cfill_uint8(unsigned char* A, unsigned char* B, unsigned char value...
  function cquantize_blockwise_cpu_fp32 (line 754) | void cquantize_blockwise_cpu_fp32(
  function cdequantize_blockwise_cpu_fp32 (line 760) | void cdequantize_blockwise_cpu_fp32(
  function cdequantize_blockwise_cpu_bf16 (line 766) | void cdequantize_blockwise_cpu_bf16(
  function cdequantize_blockwise_cpu_fp16 (line 772) | void cdequantize_blockwise_cpu_fp16(
  function cdequantize_blockwise_cpu_fp4_fp32 (line 778) | void cdequantize_blockwise_cpu_fp4_fp32(
  function cdequantize_blockwise_cpu_fp4_bf16 (line 784) | void cdequantize_blockwise_cpu_fp4_bf16(
  function cdequantize_blockwise_cpu_fp4_fp16 (line 790) | void cdequantize_blockwise_cpu_fp4_fp16(
  function cdequantize_blockwise_cpu_nf4_fp32 (line 796) | void cdequantize_blockwise_cpu_nf4_fp32(
  function cdequantize_blockwise_cpu_nf4_bf16 (line 802) | void cdequantize_blockwise_cpu_nf4_bf16(
  function cdequantize_blockwise_cpu_nf4_fp16 (line 808) | void cdequantize_blockwise_cpu_nf4_fp16(
  function gemv_4bit_inference_cpu_fp4_bf16 (line 815) | void gemv_4bit_inference_cpu_fp4_bf16(
  function gemv_4bit_inference_cpu_nf4_bf16 (line 822) | void gemv_4bit_inference_cpu_nf4_bf16(
  function has_avx512f_cpu (line 830) | bool has_avx512f_cpu() { return has_avx512f(); }
  function has_avx512bf16_cpu (line 832) | bool has_avx512bf16_cpu() { return has_avx512bf16(); }

FILE: csrc/xpu_kernels.cpp
  function dDequantizeFP4 (line 8) | inline float dDequantizeFP4(unsigned char val) {
  function dDequantizeNF4 (line 50) | inline float dDequantizeNF4(unsigned char val) {
  function SYCL_EXTERNAL (line 97) | SYCL_EXTERNAL void kDequantizeBlockwise<T, TILE_SIZE, NUM_PER_TH, DATA_T...
  function SYCL_EXTERNAL (line 175) | SYCL_EXTERNAL void
  class kDequantizeBlockwise<sycl::half, 512, 4, FP4> (line 268) | class kDequantizeBlockwise<sycl::half, 512, 4, FP4>
  class kDequantizeBlockwise<sycl::half, 512, 4, General8bit> (line 269) | class kDequantizeBlockwise<sycl::half, 512, 4, General8bit>
  class kDequantizeBlockwise<sycl::half, 512, 4, NF4> (line 270) | class kDequantizeBlockwise<sycl::half, 512, 4, NF4>
  class kDequantizeBlockwise<float, 512, 4, FP4> (line 272) | class kDequantizeBlockwise<float, 512, 4, FP4>
  class kDequantizeBlockwise<float, 512, 4, General8bit> (line 273) | class kDequantizeBlockwise<float, 512, 4, General8bit>
  class kDequantizeBlockwise<float, 512, 4, NF4> (line 274) | class kDequantizeBlockwise<float, 512, 4, NF4>
  class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, FP4> (line 276) | class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, FP4>
  class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, General8bit> (line 277) | class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, General8...
  class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, NF4> (line 278) | class kDequantizeBlockwise<sycl::ext::oneapi::bfloat16, 512, 4, NF4>
  class kgemv_4bit_inference<sycl::half, 128, 4, 32, 16> (line 280) | class kgemv_4bit_inference<sycl::half, 128, 4, 32, 16>
  class kgemv_4bit_inference<sycl::ext::oneapi::bfloat16, 128, 4, 32, 16> (line 281) | class kgemv_4bit_inference<sycl::ext::oneapi::bfloat16, 128, 4, 32, 16>
  class kgemv_4bit_inference<float, 128, 4, 32, 32> (line 282) | class kgemv_4bit_inference<float, 128, 4, 32, 32>

FILE: csrc/xpu_ops.cpp
  function dequantizeBlockwise (line 5) | void dequantizeBlockwise(
  function gemv_4bit_inference (line 34) | void gemv_4bit_inference(

FILE: csrc/xpu_ops.h
  function sycl_kernel_submit (line 16) | inline void sycl_kernel_submit(sycl::nd_range<dim> range, sycl::queue q,...
  function sycl_comp_kernel_submit (line 23) | inline void sycl_comp_kernel_submit(sycl::nd_range<dim> range, sycl::que...

FILE: examples/xpu/benchmark_paged_memory.py
  function get_args (line 22) | def get_args():
  function get_torch_dtype (line 37) | def get_torch_dtype(name):
  function get_accelerator (line 41) | def get_accelerator(device_type):
  function count_params (line 48) | def count_params(model):
  function create_model (line 52) | def create_model(args):
  function make_batch (line 67) | def make_batch(args):
  function cleanup (line 74) | def cleanup(device_type):
  function measure_training (line 82) | def measure_training(args, optimizer_name, OptClass):
  function fmt_mb (line 142) | def fmt_mb(nbytes):
  function fmt_gb (line 146) | def fmt_gb(nbytes):
  function main (line 150) | def main():

FILE: examples/xpu/paged_xpu_training.py
  function get_args (line 21) | def get_args():
  function format_alpaca (line 59) | def format_alpaca(example):
  function prepare_data (line 65) | def prepare_data(tokenizer, dataset_name, max_length, num_samples=200):
  function collate_fn (line 80) | def collate_fn(batch):
  function create_optimizer (line 84) | def create_optimizer(model, name, lr):
  function train_loop (line 107) | def train_loop(model, optimizer, dataloader, steps, log_interval, device):
  function get_torch_dtype (line 142) | def get_torch_dtype(name):
  function run_single (line 146) | def run_single(args):
  function run_with_trainer (line 184) | def run_with_trainer(args):
  function run_compare (line 250) | def run_compare(args):
  function main (line 290) | def main():

FILE: install_cuda.py
  function install_cuda (line 18) | def install_cuda(version, base_path, download_path):
  function main (line 67) | def main():

FILE: scripts/stale.py
  function main (line 30) | def main():

FILE: setup.py
  class BinaryDistribution (line 15) | class BinaryDistribution(Distribution):
    method has_ext_modules (line 16) | def has_ext_modules(self):
  class ExtBuildPy (line 20) | class ExtBuildPy(build_py):
    method run (line 21) | def run(self):

FILE: tests/conftest.py
  function _set_seed (line 9) | def _set_seed():
  function pytest_runtest_call (line 17) | def pytest_runtest_call(item):
  function pytest_runtest_teardown (line 36) | def pytest_runtest_teardown(item, nextitem):
  function requires_cuda (line 48) | def requires_cuda() -> bool:

FILE: tests/fsdp_state_dict_save.py
  class SimpleQLoRAModel (line 23) | class SimpleQLoRAModel(nn.Module):
    method __init__ (line 26) | def __init__(self, quant_type="nf4"):
    method forward (line 31) | def forward(self, x):
  function main (line 35) | def main():

FILE: tests/helpers.py
  function get_available_devices (line 21) | def get_available_devices(no_cpu=False):
  function torch_save_to_buffer (line 53) | def torch_save_to_buffer(obj):
  function torch_load_from_buffer (line 60) | def torch_load_from_buffer(buffer):
  function get_test_dims (line 67) | def get_test_dims(min: int, max: int, *, n: int) -> list[int]:
  function format_with_label (line 71) | def format_with_label(label: str, value: Any) -> str:
  function id_formatter (line 83) | def id_formatter(label: str):
  function describe_dtype (line 102) | def describe_dtype(dtype: torch.dtype) -> str:
  function is_supported_on_hpu (line 106) | def is_supported_on_hpu(

FILE: tests/test_autograd.py
  function test_matmullt (line 33) | def test_matmullt(
  function test_matmul_4bit (line 166) | def test_matmul_4bit(

FILE: tests/test_cuda_setup_evaluator.py
  function cuda120_spec (line 8) | def cuda120_spec() -> CUDASpecs:
  function test_get_cuda_bnb_library_path (line 17) | def test_get_cuda_bnb_library_path(monkeypatch, cuda120_spec):
  function test_get_cuda_bnb_library_path_override (line 23) | def test_get_cuda_bnb_library_path_override(monkeypatch, cuda120_spec, c...
  function rocm70_spec (line 31) | def rocm70_spec() -> CUDASpecs:
  function test_get_rocm_bnb_library_path (line 40) | def test_get_rocm_bnb_library_path(monkeypatch, rocm70_spec):
  function test_get_rocm_bnb_library_path_override (line 48) | def test_get_rocm_bnb_library_path_override(monkeypatch, rocm70_spec, ca...
  function test_get_rocm_bnb_library_path_rejects_cuda_override (line 57) | def test_get_rocm_bnb_library_path_rejects_cuda_override(monkeypatch, ro...
  function test_get_rocm_bnb_library_path_rocm_override_takes_priority (line 66) | def test_get_rocm_bnb_library_path_rocm_override_takes_priority(monkeypa...

FILE: tests/test_functional.py
  function assert_all_approx_close (line 27) | def assert_all_approx_close(a, b, rtol=1e-3, atol=1e-3, count=0, throw=T...
  class FFN (line 38) | class FFN(torch.nn.Module):
    method __init__ (line 39) | def __init__(self, input_features, hidden_size, bias=True):
    method forward (line 48) | def forward(self, x):
  class Timer (line 54) | class Timer:
    method __init__ (line 55) | def __init__(self):
    method tick (line 60) | def tick(self, name="default"):
    method tock (line 68) | def tock(self, name="default", evict=True, print_ms=True):
    method reset (line 85) | def reset(self):
  class Test8BitBlockwiseQuantizeFunctional (line 92) | class Test8BitBlockwiseQuantizeFunctional:
    method test_dynamic_blockwise_quantization (line 101) | def test_dynamic_blockwise_quantization(self, device, dtype, nested, b...
    method test_dynamic_blockwise_quantization_large (line 163) | def test_dynamic_blockwise_quantization_large(self, device, dtype, blo...
    method test_blockwise_cpu_large (line 190) | def test_blockwise_cpu_large(self, hidden, blocksize):
    method test_few_bit_quant (line 213) | def test_few_bit_quant(self, device, bits, method):
    method test_fp8_quant (line 265) | def test_fp8_quant(self, device):
    method test_bench_dequantization (line 320) | def test_bench_dequantization(self):
  function test_stable_embedding (line 337) | def test_stable_embedding():
  function quant (line 342) | def quant(x):
  function dequant (line 348) | def dequant(c, maxC):
  function mm_dequant (line 352) | def mm_dequant(maxA, maxB, C):
  function quant_multi (line 356) | def quant_multi(x, dim):
  function quant_multi_chunk (line 363) | def quant_multi_chunk(x, dim, chunk_size=32):
  function mean (line 379) | def mean(xx):
  class TestIGEMMFunctional (line 396) | class TestIGEMMFunctional:
    method test_approx_igemm (line 401) | def test_approx_igemm(self, dim1, dim2, quant_methods, batched):
    method test_igemm (line 440) | def test_igemm(self, hidden_dim, batch_dim, transpose, seq_dim):
    method test_dim3_igemm (line 494) | def test_dim3_igemm(self, seq_dim, hidden_dim, batch_dim):
    method test_minmax_igemm (line 511) | def test_minmax_igemm(self, seq_dim, hidden_dim, batch_dim, transpose):
    method test_ibmm (line 588) | def test_ibmm(self, dim1, dim2, dim3, dim4, transpose):
  class TestLLMInt8Functional (line 616) | class TestLLMInt8Functional:
    method vectorwise_mm_dequant (line 618) | def vectorwise_mm_dequant(xq, S1, S2, dtype=torch.half):
    method vectorwise_quant (line 635) | def vectorwise_quant(x, dim=1):
    method test_int8_linear_matmul (line 648) | def test_int8_linear_matmul(self, device, dim1, dim2, dim3, dim4, dims...
    method test_int8_linear_matmul_half (line 666) | def test_int8_linear_matmul_half(self, device, dim1, dim2, dim3, dim4,...
    method test_dequant_mm (line 689) | def test_dequant_mm(self, device, dim1, dim4, dims, has_bias):
    method test_int8_double_quant (line 728) | def test_int8_double_quant(self, dim1, dim2):
    method test_integrated_int8_linear_matmul (line 772) | def test_integrated_int8_linear_matmul(self, device, dim1, dim4, inner):
    method test_coo_double_quant (line 805) | def test_coo_double_quant(self, device, dim1, dim2):
    method test_coo_int8_vectorwise_quant (line 825) | def test_coo_int8_vectorwise_quant(self, device, dim1, dim2):
  class TestQuantize4BitFunctional (line 839) | class TestQuantize4BitFunctional:
    method test_4bit_quant (line 847) | def test_4bit_quant(self, device, dtype, quant_type, blocksize):
    method test_4bit_compressed_stats (line 930) | def test_4bit_compressed_stats(self, device, quant_type, blocksize, dt...
    method test_4bit_quant_large (line 966) | def test_4bit_quant_large(self, device, dtype, quant_type, blocksize):
    method test_bench_4bit_dequant (line 996) | def test_bench_4bit_dequant(self, quant_type):
    method test_gemv_4bit (line 1031) | def test_gemv_4bit(self, device, dim, dtype, storage_type, double_quan...
    method test_gemv_eye_4bit (line 1179) | def test_gemv_eye_4bit(self, device, storage_type, dtype):
  function test_normal_map_tree (line 1211) | def test_normal_map_tree():

FILE: tests/test_generation.py
  function get_4bit_config (line 12) | def get_4bit_config():
  function get_model_and_tokenizer (line 24) | def get_model_and_tokenizer(config):
  function get_prompt_for_generation_eval (line 44) | def get_prompt_for_generation_eval(text, add_roles=True):
  function generate (line 56) | def generate(model, tokenizer, text, generation_config, prompt_func=get_...
  function model_and_tokenizer (line 68) | def model_and_tokenizer(request):
  function test_pi (line 78) | def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dt...

FILE: tests/test_linear4bit.py
  function test_linear_serialization (line 39) | def test_linear_serialization(
  function test_copy_param (line 199) | def test_copy_param(device, quant_type, blocksize, compress_statistics):
  function test_params4bit_torch_chunk_split (line 219) | def test_params4bit_torch_chunk_split(device, quant_type):
  function test_quant_storage_shard_roundtrip (line 259) | def test_quant_storage_shard_roundtrip(device, quant_type, quant_storage):
  function test_deepcopy_param (line 290) | def test_deepcopy_param(device, quant_type, blocksize, compress_statisti...
  function test_params4bit_real_serialization (line 319) | def test_params4bit_real_serialization(device, quant_type, blocksize, co...
  function test_linear4bit_torch_compile (line 362) | def test_linear4bit_torch_compile(device, quant_type, compute_dtype, com...
  function test_params4bit_quant_state_attr_access (line 440) | def test_params4bit_quant_state_attr_access(device, quant_type, compress...
  function test_fsdp_state_dict_save_4bit (line 508) | def test_fsdp_state_dict_save_4bit():

FILE: tests/test_linear8bitlt.py
  function test_linear_no_igemmlt (line 26) | def test_linear_no_igemmlt(device):
  function test_linear_serialization (line 74) | def test_linear_serialization(
  function linear8bit (line 176) | def linear8bit(requires_cuda):
  function test_linear8bit_copy_param (line 195) | def test_linear8bit_copy_param(linear8bit):
  function test_linear8bit_deepcopy_param (line 202) | def test_linear8bit_deepcopy_param(linear8bit):
  function test_linear8bit_serialization (line 217) | def test_linear8bit_serialization(linear8bit):
  function test_linear8bitlt_torch_compile (line 240) | def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, ...
  function test_linear8bitlt_device_movement (line 305) | def test_linear8bitlt_device_movement(device):

FILE: tests/test_modules.py
  function caplog_at_level (line 14) | def caplog_at_level(caplog, level, logger_name):
  class MockArgs (line 19) | class MockArgs:
    method __init__ (line 20) | def __init__(self, initial_data):
  class MLP8bit (line 25) | class MLP8bit(torch.nn.Module):
    method __init__ (line 26) | def __init__(self, dim1, dim2, has_fp16_weights=True, threshold=0.0):
    method forward (line 41) | def forward(self, x):
  function get_args (line 47) | def get_args():
  function assert_all_approx_close (line 55) | def assert_all_approx_close(a, b, atol=1e-8, rtol=1e-5, count=10):
  function test_linear8bitlt_inference (line 65) | def test_linear8bitlt_inference(device, threshold):
  function test_linear8bitlt_accumulated_gradient (line 80) | def test_linear8bitlt_accumulated_gradient(device):
  function test_linear8bitlt_no_fp16_weights (line 127) | def test_linear8bitlt_no_fp16_weights(device, threshold):
  function test_linear_kbit_fp32_bias (line 252) | def test_linear_kbit_fp32_bias(device, module):
  function test_kbit_backprop (line 291) | def test_kbit_backprop(device, module, dtype):
  function test_embedding_lossless (line 373) | def test_embedding_lossless(device, embedding_class, input_shape, embedd...
  function test_embedding_error (line 424) | def test_embedding_error(device, embedding_class, input_shape, embedding...
  function test_4bit_linear_warnings (line 464) | def test_4bit_linear_warnings(device, caplog):
  function test_4bit_embedding_warnings (line 484) | def test_4bit_embedding_warnings(device, caplog):
  function test_4bit_embedding_weight_fsdp_fix (line 498) | def test_4bit_embedding_weight_fsdp_fix(requires_cuda):
  function test_4bit_linear_weight_fsdp_fix (line 515) | def test_4bit_linear_weight_fsdp_fix(requires_cuda):
  function test_embedding_not_implemented_error (line 532) | def test_embedding_not_implemented_error():

FILE: tests/test_ops.py
  class TestLLMInt8Ops (line 17) | class TestLLMInt8Ops:
    method test_int8_linear_matmul (line 19) | def test_int8_linear_matmul(self, device):
    method test_int8_linear_matmul_out (line 31) | def test_int8_linear_matmul_out(self, device):
    method test_int8_vectorwise_quant (line 46) | def test_int8_vectorwise_quant(self, threshold, device):
    method test_int8_mm_dequant (line 71) | def test_int8_mm_dequant(self, device):
    method test_int8_scaled_mm (line 86) | def test_int8_scaled_mm(self, device, dtype, has_bias):
  class TestInt8BlockwiseQuantOps (line 101) | class TestInt8BlockwiseQuantOps:
    method test_quantize_blockwise (line 105) | def test_quantize_blockwise(self, device, dtype, blocksize):
    method test_dequantize_blockwise (line 129) | def test_dequantize_blockwise(self, device, dtype, blocksize):
  class Test4bitBlockwiseQuantOps (line 149) | class Test4bitBlockwiseQuantOps:
    method test_quantize_4bit (line 155) | def test_quantize_4bit(self, device, dtype, storage_dtype, quant_type,...
    method test_quantize_4bit_not_divisible_by_blocksize (line 178) | def test_quantize_4bit_not_divisible_by_blocksize(self, device, dtype,...
    method test_dequantize_4bit (line 205) | def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_typ...
    method test_gemv_4bit (line 239) | def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blo...
  class TestNonContiguousInputs (line 275) | class TestNonContiguousInputs:
    method test_quantize_blockwise_non_contiguous (line 281) | def test_quantize_blockwise_non_contiguous(self, device, dtype, blocks...
    method test_dequantize_blockwise_non_contiguous (line 303) | def test_dequantize_blockwise_non_contiguous(self, device, dtype, bloc...
    method test_quantize_4bit_non_contiguous (line 334) | def test_quantize_4bit_non_contiguous(self, device, dtype, quant_type,...
    method test_quantize_4bit_roundtrip_non_contiguous (line 356) | def test_quantize_4bit_roundtrip_non_contiguous(self, device, dtype, q...

FILE: tests/test_optim.py
  function assert_most_approx_close (line 22) | def assert_most_approx_close(a, b, rtol=1e-3, atol=1e-3, max_error_count...
  function get_temp_dir (line 30) | def get_temp_dir():
  function rm_path (line 36) | def rm_path(path):
  function test_optimizer32bit (line 181) | def test_optimizer32bit(dim1, dim2, gtype, optim_name, device):
  function test_global_config (line 265) | def test_global_config(dim1, dim2, gtype, device):
  function test_override_config_after_register (line 311) | def test_override_config_after_register(device):
  function test_optimizer8bit (line 358) | def test_optimizer8bit(dim1, dim2, gtype, optim_name, device):
  function test_benchmark_blockwise (line 520) | def test_benchmark_blockwise(dim1, dim2, gtype, optim_name, device):
  function test_ademamix_state_dict_no_nan (line 561) | def test_ademamix_state_dict_no_nan(optim_name, optim_factory, device):

FILE: tests/test_parametrize.py
  class ParametrizeTestModule (line 20) | class ParametrizeTestModule(nn.Module):
    method __init__ (line 23) | def __init__(self, device="cpu", dtype=torch.float32):
  function test_replace_parameter_4bit (line 40) | def test_replace_parameter_4bit(device, dtype, quant_type, compress_stat...
  function test_moe_parameter_shape (line 97) | def test_moe_parameter_shape(device, dtype):
  function test_prequantized_replacement (line 143) | def test_prequantized_replacement(device, dtype, quant_type):
  function test_state_dict_functionality (line 174) | def test_state_dict_functionality(device, dtype, quant_type, compress_st...
  function test_moe_realistic_forward (line 206) | def test_moe_realistic_forward(device, dtype):
  function test_error_conditions (line 249) | def test_error_conditions():
  function test_quant_state_preservation (line 272) | def test_quant_state_preservation(device, dtype):
  function test_multiple_parameters (line 306) | def test_multiple_parameters(device, dtype):
  function test_different_blocksizes (line 340) | def test_different_blocksizes(device, dtype, blocksize):
  function test_parametrization_forward_method (line 376) | def test_parametrization_forward_method():
  function test_gradient_behavior (line 415) | def test_gradient_behavior(device, dtype):