SYMBOL INDEX (830 symbols across 67 files) FILE: .github/scripts/auditwheel_show.py function main (line 5) | def main(): FILE: .github/scripts/set_platform_tag.py function get_platform_tag (line 6) | def get_platform_tag(architecture): function main (line 21) | def main(): FILE: agents/fetch_issues.py function gh_graphql (line 69) | def gh_graphql(query: str, variables: dict) -> dict: function transform_reactions (line 84) | def transform_reactions(reaction_groups: list) -> dict: function transform_timeline_event (line 94) | def transform_timeline_event(event: dict) -> dict | None: function transform_issue (line 122) | def transform_issue(raw: dict) -> dict: function fetch_all_issues (line 161) | def fetch_all_issues(owner: str, repo: str, states: list[str] | None = N... function main (line 223) | def main(): FILE: agents/query_issues.py function load_data (line 162) | def load_data(path: str) -> dict: function all_issues (line 167) | def all_issues(data: dict) -> list[dict]: function format_compact (line 171) | def format_compact(issue: dict) -> str: function format_list_line (line 182) | def format_list_line(issue: dict) -> str: function format_detail (line 201) | def format_detail(issue: dict, brief: bool = False) -> str: function tokenize (line 274) | def tokenize(text: str) -> set[str]: function extract_signatures (line 285) | def extract_signatures(text: str) -> set[str]: function find_related (line 314) | def find_related(target: dict, issues: list[dict], state_filter: str | N... function format_related_result (line 345) | def format_related_result(score, issue, sig_ol, tok_ol, verbose=False): function cmd_list (line 367) | def cmd_list(args, data): function cmd_search (line 403) | def cmd_search(args, data): function cmd_related (line 436) | def cmd_related(args, data): function cmd_batch_related (line 458) | def cmd_batch_related(args, data): function cmd_show (line 486) | def cmd_show(args, data): function cmd_top (line 502) | def cmd_top(args, data): function cmd_stats (line 515) | def cmd_stats(args, data): function main (line 537) | def main(): FILE: benchmarking/inference_benchmark.py function parse_args (line 83) | def parse_args(): function run_benchmark (line 120) | def run_benchmark(args, config, batch_size): FILE: benchmarking/int8/training_benchmark.py function test_bench_8bit_training (line 28) | def test_bench_8bit_training(batch, seq, model, hidden): FILE: benchmarking/matmul_benchmark.py function test_bench_matmul (line 30) | def test_bench_matmul(batch, seq, model, hidden): FILE: benchmarking/optimizer_benchmark.py function test_stream_optimizer_bench (line 23) | def test_stream_optimizer_bench(dim1, gtype, optim_name, mode): FILE: benchmarking/xpu/inference_benchmark.py function get_inputs (line 34) | def get_inputs(tokenizer): function get_streamer (line 45) | def get_streamer(tokenizer): class Streamer (line 51) | class Streamer: method __init__ (line 52) | def __init__(self, tokenizer, print_median=False): method put (line 57) | def put(self, t): method print_report (line 68) | def print_report(self): method end (line 78) | def end(self, *args): function parse_arguments (line 82) | def parse_arguments(): FILE: bitsandbytes/__init__.py function _import_backends (line 52) | def _import_backends(): FILE: bitsandbytes/_ops.py function _ (line 26) | def _( function _ (line 53) | def _( function _ (line 72) | def _(A: torch.Tensor, B: torch.Tensor): function _ (line 88) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor): function _ (line 105) | def _(A: torch.Tensor, threshold=0.0): function _ (line 121) | def _(A: torch.Tensor, stats: torch.Tensor) -> torch.Tensor: function _ (line 128) | def _(A: torch.Tensor, stats: torch.Tensor): function _ (line 140) | def _( function _ (line 158) | def _( function _ (line 178) | def _( function _ (line 197) | def _( function _ (line 219) | def _( function _ (line 238) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi... function _ (line 251) | def _( function _ (line 265) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc... function _ (line 281) | def _( function _ (line 305) | def _( function _ (line 339) | def _( function _ (line 382) | def _( FILE: bitsandbytes/autograd/_functions.py class GlobalOutlierPooler (line 25) | class GlobalOutlierPooler: method __init__ (line 28) | def __init__(self): method initialize (line 31) | def initialize(self): method get_instance (line 36) | def get_instance(cls): method add_outliers (line 42) | def add_outliers(self, outlier_idx, feature_dim): method get_current_outlier_idx (line 50) | def get_current_outlier_idx(self): class MatmulLtState (line 58) | class MatmulLtState: method __getattr__ (line 82) | def __getattr__(self, name): method reset_grads (line 92) | def reset_grads(self): class MatMul8bitLt (line 101) | class MatMul8bitLt(torch.autograd.Function): method forward (line 103) | def forward( method backward (line 202) | def backward(ctx: torch.autograd.function.FunctionCtx, grad_output: to... class MatMul8bitFp (line 245) | class MatMul8bitFp(torch.autograd.Function): method forward (line 252) | def forward(ctx, A, B, out=None, bias=None, state=MatmulLtState): method backward (line 274) | def backward(ctx, grad_output): class MatMul4Bit (line 300) | class MatMul4Bit(torch.autograd.Function): method forward (line 304) | def forward(ctx, A, B, out=None, bias=None, quant_state: Optional[F.Qu... method backward (line 337) | def backward(ctx, grad_output): function matmul (line 359) | def matmul( function matmul_4bit (line 377) | def matmul_4bit( FILE: bitsandbytes/backends/cpu/ops.py function _ (line 25) | def _(A: torch.Tensor, B: torch.Tensor): function _ (line 35) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc... function _ (line 77) | def _( function _ (line 124) | def _( function _ (line 243) | def _( FILE: bitsandbytes/backends/cuda/ops.py function _ (line 15) | def _(A: torch.Tensor, B: torch.Tensor): function _ (line 21) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor): function _int8_linear_matmul_impl (line 25) | def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: torc... function _ (line 89) | def _( function _ (line 128) | def _(A: torch.Tensor, threshold=0.0): function _ (line 170) | def _( function _get_col_absmax (line 189) | def _get_col_absmax( function _ (line 211) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc... function _ (line 247) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi... function _ (line 254) | def _( function _dequantize_blockwise_impl (line 267) | def _dequantize_blockwise_impl( function _ (line 299) | def _( function _ (line 346) | def _( function _ (line 360) | def _( function _dequantize_4bit_impl (line 374) | def _dequantize_4bit_impl( function _ (line 420) | def _( function _ (line 430) | def _( function _gemv_4bit_impl (line 447) | def _gemv_4bit_impl( function _optimizer_update_32bit_impl (line 609) | def _optimizer_update_32bit_impl( function _optimizer_update_8bit_blockwise_impl (line 668) | def _optimizer_update_8bit_blockwise_impl( FILE: bitsandbytes/backends/default/ops.py function _try_torch_compile (line 12) | def _try_torch_compile(func=None, **compile_kwargs): function _ (line 39) | def _( function _ (line 62) | def _( function _ (line 101) | def _( function _ (line 120) | def _(A: torch.Tensor, B: torch.Tensor): function _ (line 125) | def _(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor): function _int8_linear_matmul_impl (line 130) | def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: Opti... function _ (line 139) | def _(A: torch.Tensor, threshold=0.0): function _ (line 177) | def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torc... function _ (line 203) | def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksi... function _ (line 220) | def _( function _dequantize_4bit_impl (line 265) | def _dequantize_4bit_impl( function _ (line 312) | def _( function _ (line 331) | def _( function _optimizer_precondition_32bit (line 369) | def _optimizer_precondition_32bit( function _optimizer_update_32bit (line 430) | def _optimizer_update_32bit( function _ (line 543) | def _( FILE: bitsandbytes/backends/hpu/ops.py function _reverse_4bit_compress_format (line 12) | def _reverse_4bit_compress_format(weight: torch.Tensor): function _ (line 20) | def _( FILE: bitsandbytes/backends/mps/ops.py function _get_kernel (line 21) | def _get_kernel(): function _ (line 36) | def _( function _dequantize_4bit_impl (line 56) | def _dequantize_4bit_impl( function _ (line 74) | def _( function _ (line 88) | def _( function _gemv_4bit_impl (line 104) | def _gemv_4bit_impl( function _ (line 123) | def _( function _ (line 135) | def _( FILE: bitsandbytes/backends/triton/kernels_4bit.py function quantize_fp4_blockwise_kernel (line 20) | def quantize_fp4_blockwise_kernel( function quantize_nf4_blockwise_kernel (line 87) | def quantize_nf4_blockwise_kernel( function quantize_4bit_blockwise_triton (line 157) | def quantize_4bit_blockwise_triton(A, blocksize, quant_type, blocks, abs... function dequant_4bit_body_util (line 183) | def dequant_4bit_body_util(a, offsets, quant_ptr, absmax_ptr, n_elems, Q... function dequantize_fp4_tree (line 205) | def dequantize_fp4_tree(val, absmax): function dequant_fp4_body_util (line 229) | def dequant_fp4_body_util(a, offsets, absmax_ptr, n_elems, QUANT_BLOCK: ... function dequantize_nf4_tree (line 245) | def dequantize_nf4_tree(val): function dequant_nf4_body_util (line 285) | def dequant_nf4_body_util(a, offsets, absmax_ptr, n_elems, QUANT_BLOCK: ... function dequant_4bit_kernel (line 334) | def dequant_4bit_kernel( function dequant_fp4_kernel (line 378) | def dequant_fp4_kernel( function dequant_nf4_kernel (line 420) | def dequant_nf4_kernel( function dequantize_4bit_impl (line 450) | def dequantize_4bit_impl( function dequantize_4bit_impl_passing_code (line 475) | def dequantize_4bit_impl_passing_code( function quantize_4bit_blockwise_kernel (line 519) | def quantize_4bit_blockwise_kernel( FILE: bitsandbytes/backends/triton/kernels_8bit_quant.py function dequant_8bit_kernel (line 28) | def dequant_8bit_kernel( function dequant_8bit_blockwise (line 45) | def dequant_8bit_blockwise( function quantize_8bit_blockwise_kernel (line 84) | def quantize_8bit_blockwise_kernel( function quantize_blockwise_triton (line 107) | def quantize_blockwise_triton(A, code, blocksize, absmax=None, out=None): function quantize_8bit_blockwise_kernel_util (line 137) | def quantize_8bit_blockwise_kernel_util( function dequant_8bit_blockwise_kernel_util (line 180) | def dequant_8bit_blockwise_kernel_util( FILE: bitsandbytes/backends/triton/kernels_optim.py function _optimizer_precondition_2state_32bit (line 36) | def _optimizer_precondition_2state_32bit( function _optimizer_precondition_1state_32bit (line 91) | def _optimizer_precondition_1state_32bit( function _optimizer_update_2state_32bit_triton_kernel (line 149) | def _optimizer_update_2state_32bit_triton_kernel( function _optimizer_update_1state_32bit_triton_kernel (line 234) | def _optimizer_update_1state_32bit_triton_kernel( function optimizer_update_32bit_impl (line 339) | def optimizer_update_32bit_impl( function _dequantize_blockwise_pytorch (line 488) | def _dequantize_blockwise_pytorch( function _quantize_blockwise_pytorch (line 523) | def _quantize_blockwise_pytorch( function optimizer_update_8bit_blockwise_pytorch (line 562) | def optimizer_update_8bit_blockwise_pytorch( function optimizer_update_8bit_blockwise_triton_quant (line 709) | def optimizer_update_8bit_blockwise_triton_quant( function _optimizer_update_1state_8bit_blockwise_triton_kernel (line 856) | def _optimizer_update_1state_8bit_blockwise_triton_kernel( function _optimizer_update_2state_8bit_blockwise_triton_kernel (line 939) | def _optimizer_update_2state_8bit_blockwise_triton_kernel( function optimizer_update_8bit_blockwise_impl (line 1076) | def optimizer_update_8bit_blockwise_impl( FILE: bitsandbytes/backends/triton/ops.py function quantize_blockwise (line 17) | def quantize_blockwise(A: torch.Tensor, code: torch.Tensor, blocksize: i... function dequantize_blockwise (line 25) | def dequantize_blockwise( function dequantize_blockwise_inplace (line 42) | def dequantize_blockwise_inplace( function quantize_4bit (line 67) | def quantize_4bit( function dequantize_4bit (line 104) | def dequantize_4bit( function dequantize_4bit_inplace (line 133) | def dequantize_4bit_inplace( function gemv_4bit (line 148) | def gemv_4bit( function optimizer_update_8bit_blockwise (line 185) | def optimizer_update_8bit_blockwise( function optimizer_update_32bit (line 264) | def optimizer_update_32bit( FILE: bitsandbytes/backends/utils.py function get_gaudi_sw_version (line 66) | def get_gaudi_sw_version(): FILE: bitsandbytes/backends/xpu/ops.py function _ (line 20) | def _(A: torch.Tensor, B: torch.Tensor): function _dequantize_4bit_impl (line 27) | def _dequantize_4bit_impl( function _dequantize_blockwise_impl (line 61) | def _dequantize_blockwise_impl( function _gemv_4bit_impl (line 81) | def _gemv_4bit_impl( function _ (line 165) | def _( function _ (line 178) | def _( function _ (line 186) | def _( function _ (line 199) | def _( function _ (line 213) | def _( FILE: bitsandbytes/cextension.py function get_cuda_bnb_library_path (line 22) | def get_cuda_bnb_library_path(cuda_specs: CUDASpecs) -> Path: class BNBNativeLibrary (line 60) | class BNBNativeLibrary: method __init__ (line 64) | def __init__(self, lib: ct.CDLL): method __getattr__ (line 68) | def __getattr__(self, name): method __getitem__ (line 82) | def __getitem__(self, item): class CudaBNBNativeLibrary (line 86) | class CudaBNBNativeLibrary(BNBNativeLibrary): method __init__ (line 89) | def __init__(self, lib: ct.CDLL): class XpuBNBNativeLibrary (line 95) | class XpuBNBNativeLibrary(BNBNativeLibrary): method __init__ (line 98) | def __init__(self, lib: ct.CDLL): function get_available_cuda_binary_versions (line 104) | def get_available_cuda_binary_versions() -> list[str]: function parse_cuda_version (line 119) | def parse_cuda_version(version_str: str) -> str: class ErrorHandlerMockBNBNativeLibrary (line 126) | class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary): method __init__ (line 147) | def __init__(self, error_msg: str): method _format_lib_error_message (line 175) | def _format_lib_error_message( method _format_dependency_error (line 258) | def _format_dependency_error(self) -> str: method __getattr__ (line 286) | def __getattr__(self, name): method __getitem__ (line 294) | def __getitem__(self, name): function get_native_library (line 298) | def get_native_library() -> BNBNativeLibrary: FILE: bitsandbytes/cuda_specs.py class CUDASpecs (line 13) | class CUDASpecs: method has_imma (line 19) | def has_imma(self) -> bool: function get_compute_capabilities (line 23) | def get_compute_capabilities() -> list[tuple[int, int]]: function get_cuda_version_tuple (line 28) | def get_cuda_version_tuple() -> Optional[tuple[int, int]]: function get_cuda_version_string (line 46) | def get_cuda_version_string() -> Optional[str]: function get_cuda_specs (line 55) | def get_cuda_specs() -> Optional[CUDASpecs]: function get_rocm_gpu_arch (line 82) | def get_rocm_gpu_arch() -> str: function get_rocm_warpsize (line 114) | def get_rocm_warpsize() -> int: FILE: bitsandbytes/diagnostics/cuda.py function find_cuda_libraries_in_path_list (line 47) | def find_cuda_libraries_in_path_list(paths_list_candidate: str) -> Itera... function is_relevant_candidate_env_var (line 69) | def is_relevant_candidate_env_var(env_var: str, value: str) -> bool: function get_potentially_lib_path_containing_env_vars (line 82) | def get_potentially_lib_path_containing_env_vars() -> dict[str, str]: function find_cudart_libraries (line 86) | def find_cudart_libraries() -> Iterator[Path]: function _print_cuda_diagnostics (line 110) | def _print_cuda_diagnostics(cuda_specs: CUDASpecs) -> None: function _print_hip_diagnostics (line 135) | def _print_hip_diagnostics(cuda_specs: CUDASpecs) -> None: function print_diagnostics (line 164) | def print_diagnostics(cuda_specs: CUDASpecs) -> None: function _print_cuda_runtime_diagnostics (line 171) | def _print_cuda_runtime_diagnostics() -> None: function _print_hip_runtime_diagnostics (line 198) | def _print_hip_runtime_diagnostics() -> None: function print_runtime_diagnostics (line 225) | def print_runtime_diagnostics() -> None: FILE: bitsandbytes/diagnostics/main.py function sanity_check (line 30) | def sanity_check(): function get_package_version (line 45) | def get_package_version(name: str) -> str: function show_environment (line 53) | def show_environment(): function main (line 73) | def main(): FILE: bitsandbytes/diagnostics/utils.py function print_header (line 6) | def print_header(txt: str, width: int = HEADER_WIDTH, filler: str = "=")... function print_dedented (line 11) | def print_dedented(text): FILE: bitsandbytes/functional.py class GlobalPageManager (line 24) | class GlobalPageManager: method __init__ (line 27) | def __init__(self): method initialize (line 30) | def initialize(self): method get_instance (line 34) | def get_instance(cls): method prefetch_all (line 40) | def prefetch_all(self, to_cpu=False): class CUBLAS_Context (line 48) | class CUBLAS_Context: method __init__ (line 51) | def __init__(self): method initialize (line 54) | def initialize(self): method get_instance (line 58) | def get_instance(cls): method get_context (line 64) | def get_context(self, device): function _cuda_device_of (line 81) | def _cuda_device_of(a: torch.Tensor): function _cuda_device_of (line 86) | def _cuda_device_of(a: torch.Tensor): function get_paged (line 90) | def get_paged(*shape, dtype=torch.float32, device=FIRST_CUDA_DEVICE): function prefetch_tensor (line 101) | def prefetch_tensor(A: torch.Tensor, to_cpu=False): function elementwise_func (line 111) | def elementwise_func(func_name, A, B, value, prefetch=True): function fill (line 141) | def fill(A, value, device=None, prefetch=True): function _mul (line 145) | def _mul(A, B, device=None): function create_linear_map (line 149) | def create_linear_map(signed=True, total_bits=8, add_zero=True): function create_normal_map (line 168) | def create_normal_map(offset=0.9677083, use_extra_value=True): function create_fp8_map (line 226) | def create_fp8_map(signed=True, exponent_bits=5, precision_bits=2, total... function create_dynamic_map (line 295) | def create_dynamic_map(signed=True, max_exponent_bits=7, total_bits=8): function is_on_gpu (line 350) | def is_on_gpu(tensors: Iterable[Optional[torch.Tensor]]): function _get_tensor_stream (line 386) | def _get_tensor_stream(tensor: Tensor) -> ct.c_void_p: function get_ptr (line 398) | def get_ptr(A: Optional[Tensor]) -> Optional[ct.c_void_p]: class QuantState (line 413) | class QuantState: method __init__ (line 433) | def __init__( method __getattr__ (line 454) | def __getattr__(self, name): method __getitem__ (line 466) | def __getitem__(self, idx): method from_dict (line 487) | def from_dict(cls, qs_dict: dict[str, Any], device: torch.device) -> "... method as_dict (line 538) | def as_dict(self, packed: bool = False) -> dict[str, Any]: method to (line 573) | def to(self, device): method __eq__ (line 582) | def __eq__(self, other): function quantize_blockwise (line 606) | def quantize_blockwise( function dequantize_blockwise (line 677) | def dequantize_blockwise( function get_4bit_type (line 754) | def get_4bit_type(typename, device=None, blocksize=64): function quantize_fp4 (line 844) | def quantize_fp4( function quantize_nf4 (line 855) | def quantize_nf4( function quantize_4bit (line 866) | def quantize_4bit( function dequantize_fp4 (line 947) | def dequantize_fp4( function dequantize_nf4 (line 957) | def dequantize_nf4( function dequantize_4bit (line 967) | def dequantize_4bit( function optimizer_update_32bit (line 1044) | def optimizer_update_32bit( function optimizer_update_8bit_blockwise (line 1133) | def optimizer_update_8bit_blockwise( function check_matmul (line 1179) | def check_matmul(A, B, out, transposed_A, transposed_B, expected_type=to... function gemv_4bit (line 1263) | def gemv_4bit( function igemm (line 1300) | def igemm( function batched_igemm (line 1401) | def batched_igemm( function int8_linear_matmul (line 1497) | def int8_linear_matmul(A: torch.Tensor, B: torch.Tensor, out: Optional[t... function int8_mm_dequant (line 1523) | def int8_mm_dequant( function int8_double_quant (line 1551) | def int8_double_quant( function int8_vectorwise_dequant (line 1602) | def int8_vectorwise_dequant(A: torch.Tensor, stats: torch.Tensor): function int8_vectorwise_quant (line 1616) | def int8_vectorwise_quant(A: torch.Tensor, threshold=0.0): function _convert_weight_packed_for_cpu (line 1637) | def _convert_weight_packed_for_cpu(qweight: torch.Tensor, quant_state: Q... function _convert_weight_packed_for_cpu_inverse (line 1691) | def _convert_weight_packed_for_cpu_inverse( function has_avx512bf16 (line 1759) | def has_avx512bf16(): FILE: bitsandbytes/nn/modules.py class StableEmbedding (line 28) | class StableEmbedding(torch.nn.Embedding): method __init__ (line 54) | def __init__( method reset_parameters (line 101) | def reset_parameters(self) -> None: method _fill_padding_idx_with_zero (line 112) | def _fill_padding_idx_with_zero(self) -> None: method forward (line 117) | def forward(self, input: Tensor) -> Tensor: class Embedding (line 134) | class Embedding(torch.nn.Embedding): method __init__ (line 139) | def __init__( method reset_parameters (line 183) | def reset_parameters(self) -> None: method _fill_padding_idx_with_zero (line 194) | def _fill_padding_idx_with_zero(self) -> None: method forward (line 199) | def forward(self, input: Tensor) -> Tensor: class Params4bit (line 213) | class Params4bit(torch.nn.Parameter): method __new__ (line 214) | def __new__( method __getstate__ (line 243) | def __getstate__(self): method __setstate__ (line 249) | def __setstate__(self, state): method __getattr__ (line 284) | def __getattr__(self, name): method __deepcopy__ (line 297) | def __deepcopy__(self, memo): method __copy__ (line 305) | def __copy__(self): method from_prequantized (line 312) | def from_prequantized( method _quantize (line 337) | def _quantize(self, device): method cpu (line 353) | def cpu(self): method cuda (line 356) | def cuda(self, device: Optional[int | device | str] = None, non_blocki... method xpu (line 361) | def xpu(self, device: Optional[int | device | str] = None, non_blockin... method to (line 367) | def to( method to (line 375) | def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ... method to (line 378) | def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ... method to (line 380) | def to(self, *args, **kwargs): method __torch_function__ (line 403) | def __torch_function__(cls, func, types, args=(), kwargs=None): function fix_4bit_weight_quant_state_from_module (line 443) | def fix_4bit_weight_quant_state_from_module(module: Union["Embedding4bit... class Linear4bit (line 460) | class Linear4bit(nn.Linear): method __init__ (line 493) | def __init__( method set_compute_type (line 531) | def set_compute_type(self, x): method _save_to_state_dict (line 549) | def _save_to_state_dict(self, destination, prefix, keep_vars): method forward (line 565) | def forward(self, x: torch.Tensor): class LinearFP4 (line 596) | class LinearFP4(Linear4bit): method __init__ (line 601) | def __init__( class LinearNF4 (line 632) | class LinearNF4(Linear4bit): method __init__ (line 644) | def __init__( class Int8Params (line 675) | class Int8Params(torch.nn.Parameter): method __new__ (line 676) | def __new__( method _quantize (line 692) | def _quantize(self, device): method cpu (line 705) | def cpu(self): method cuda (line 708) | def cuda(self, device: Optional[int | device | str] = None, non_blocki... method xpu (line 711) | def xpu(self, device: Optional[int | device | str] = None, non_blockin... method __deepcopy__ (line 714) | def __deepcopy__(self, memo): method to (line 727) | def to( method to (line 735) | def to(self: T, dtype: dtype | str, non_blocking: bool = ...) -> T: ... method to (line 738) | def to(self: T, tensor: Tensor, non_blocking: bool = ...) -> T: ... method to (line 740) | def to(self, *args, **kwargs): function maybe_rearrange_weight (line 767) | def maybe_rearrange_weight(state_dict, prefix, local_metadata, strict, m... class Embedding8bit (line 788) | class Embedding8bit(nn.Embedding): method __init__ (line 808) | def __init__(self, num_embeddings, embedding_dim, device=None, dtype=N... method _save_to_state_dict (line 814) | def _save_to_state_dict(self, destination, prefix, keep_vars): method forward (line 817) | def forward(self, input: Tensor) -> Tensor: class Embedding4bit (line 835) | class Embedding4bit(nn.Embedding): method __init__ (line 856) | def __init__( method _forward_with_partial_dequantize (line 885) | def _forward_with_partial_dequantize(self, input: Tensor): method _save_to_state_dict (line 918) | def _save_to_state_dict(self, destination, prefix, keep_vars): method forward (line 921) | def forward(self, input: Tensor) -> Tensor: class EmbeddingFP4 (line 935) | class EmbeddingFP4(Embedding4bit): method __init__ (line 936) | def __init__( class EmbeddingNF4 (line 954) | class EmbeddingNF4(Embedding4bit): method __init__ (line 955) | def __init__( class Linear8bitLt (line 973) | class Linear8bitLt(nn.Linear): method __init__ (line 1005) | def __init__( method _save_to_state_dict (line 1050) | def _save_to_state_dict(self, destination, prefix, keep_vars): method _load_from_state_dict (line 1074) | def _load_from_state_dict( method init_8bit_state (line 1113) | def init_8bit_state(self): method to (line 1119) | def to(self, *args, **kwargs): method forward (line 1134) | def forward(self, x: torch.Tensor): class OutlierAwareLinear (line 1151) | class OutlierAwareLinear(nn.Linear): method __init__ (line 1152) | def __init__(self, input_features, output_features, bias=True, device=... method forward_with_outliers (line 1157) | def forward_with_outliers(self, x, outlier_idx): method quantize_weight (line 1160) | def quantize_weight(self, w, outlier_idx): method forward (line 1163) | def forward(self, x): FILE: bitsandbytes/nn/parametrize.py class Bnb4bitParametrization (line 11) | class Bnb4bitParametrization(nn.Module): method __init__ (line 24) | def __init__(self, quant_state: F.QuantState): method forward (line 29) | def forward(self, quantized_param: torch.Tensor) -> torch.Tensor: function replace_parameter_4bit_prequantized (line 42) | def replace_parameter_4bit_prequantized( function replace_parameter_4bit (line 62) | def replace_parameter_4bit( function _disable_parametrization_cache (line 129) | def _disable_parametrization_cache(module: nn.Module, inputs: tuple[Any,... function _enable_parametrization_cache (line 135) | def _enable_parametrization_cache(module: nn.Module, inputs: tuple[Any, ... function _register_parametrization_hooks (line 139) | def _register_parametrization_hooks(module: nn.Module, param_name: str): function _parametrized_state_dict_post_hook (line 156) | def _parametrized_state_dict_post_hook( FILE: bitsandbytes/optim/adagrad.py class Adagrad (line 8) | class Adagrad(Optimizer1State): method __init__ (line 9) | def __init__( class Adagrad8bit (line 67) | class Adagrad8bit(Optimizer1State): method __init__ (line 68) | def __init__( class Adagrad32bit (line 126) | class Adagrad32bit(Optimizer1State): method __init__ (line 127) | def __init__( FILE: bitsandbytes/optim/adam.py class Adam (line 9) | class Adam(Optimizer2State): method __init__ (line 10) | def __init__( class Adam8bit (line 62) | class Adam8bit(Optimizer2State): method __init__ (line 63) | def __init__( class Adam32bit (line 126) | class Adam32bit(Optimizer2State): method __init__ (line 127) | def __init__( class PagedAdam (line 179) | class PagedAdam(Optimizer2State): method __init__ (line 180) | def __init__( class PagedAdam8bit (line 232) | class PagedAdam8bit(Optimizer2State): method __init__ (line 233) | def __init__( class PagedAdam32bit (line 296) | class PagedAdam32bit(Optimizer2State): method __init__ (line 297) | def __init__( FILE: bitsandbytes/optim/adamw.py class AdamW (line 9) | class AdamW(Optimizer2State): method __init__ (line 10) | def __init__( class AdamW8bit (line 62) | class AdamW8bit(Optimizer2State): method __init__ (line 63) | def __init__( class AdamW32bit (line 126) | class AdamW32bit(Optimizer2State): method __init__ (line 127) | def __init__( class PagedAdamW (line 179) | class PagedAdamW(Optimizer2State): method __init__ (line 180) | def __init__( class PagedAdamW8bit (line 229) | class PagedAdamW8bit(Optimizer2State): method __init__ (line 230) | def __init__( class PagedAdamW32bit (line 290) | class PagedAdamW32bit(Optimizer2State): method __init__ (line 291) | def __init__( FILE: bitsandbytes/optim/ademamix.py class _ReferenceAdEMAMix (line 11) | class _ReferenceAdEMAMix(torch.optim.Optimizer): method __init__ (line 16) | def __init__( method step (line 34) | def step(self, closure=None): class AdEMAMix (line 107) | class AdEMAMix(Optimizer2State): method __init__ (line 108) | def __init__( method init_state (line 139) | def init_state(self, group, p, gindex, pindex): method update_step (line 176) | def update_step(self, group, p, gindex, pindex): method _get_state_double_buffer (line 260) | def _get_state_double_buffer(self, p, dtype=torch.float32): class AdEMAMix8bit (line 270) | class AdEMAMix8bit(AdEMAMix): method __init__ (line 271) | def __init__( class PagedAdEMAMix8bit (line 299) | class PagedAdEMAMix8bit(AdEMAMix8bit): method __init__ (line 300) | def __init__( class PagedAdEMAMix (line 326) | class PagedAdEMAMix(AdEMAMix): method __init__ (line 327) | def __init__( class AdEMAMix32bit (line 355) | class AdEMAMix32bit(Optimizer2State): method __init__ (line 356) | def __init__( class PagedAdEMAMix32bit (line 386) | class PagedAdEMAMix32bit(AdEMAMix32bit): method __init__ (line 387) | def __init__( FILE: bitsandbytes/optim/lamb.py class LAMB (line 8) | class LAMB(Optimizer2State): method __init__ (line 9) | def __init__( class LAMB8bit (line 67) | class LAMB8bit(Optimizer2State): method __init__ (line 68) | def __init__( class LAMB32bit (line 123) | class LAMB32bit(Optimizer2State): method __init__ (line 124) | def __init__( FILE: bitsandbytes/optim/lars.py class LARS (line 11) | class LARS(Optimizer1State): method __init__ (line 12) | def __init__( class LARS8bit (line 66) | class LARS8bit(Optimizer1State): method __init__ (line 67) | def __init__( class LARS32bit (line 118) | class LARS32bit(Optimizer1State): method __init__ (line 119) | def __init__( class PytorchLARS (line 170) | class PytorchLARS(Optimizer): method __init__ (line 171) | def __init__( method __setstate__ (line 200) | def __setstate__(self, state): method step (line 206) | def step(self, closure=None): FILE: bitsandbytes/optim/lion.py class Lion (line 8) | class Lion(Optimizer1State): method __init__ (line 9) | def __init__( class Lion8bit (line 55) | class Lion8bit(Optimizer1State): method __init__ (line 56) | def __init__( class Lion32bit (line 99) | class Lion32bit(Optimizer1State): method __init__ (line 100) | def __init__( class PagedLion (line 143) | class PagedLion(Optimizer1State): method __init__ (line 144) | def __init__( class PagedLion8bit (line 187) | class PagedLion8bit(Optimizer1State): method __init__ (line 188) | def __init__( class PagedLion32bit (line 228) | class PagedLion32bit(Optimizer1State): method __init__ (line 229) | def __init__( FILE: bitsandbytes/optim/optimizer.py class MockArgs (line 16) | class MockArgs: method __init__ (line 17) | def __init__(self, initial_data): class GlobalOptimManager (line 22) | class GlobalOptimManager: method __init__ (line 29) | def __init__(self): method initialize (line 32) | def initialize(self): method get_instance (line 40) | def get_instance(cls): method register_parameters (line 46) | def register_parameters(self, params): method override_config (line 56) | def override_config(self, parameters, key=None, value=None, key_value_... method register_module_override (line 109) | def register_module_override(self, module, param_name, config): class Optimizer8bit (line 113) | class Optimizer8bit(torch.optim.Optimizer): method __init__ (line 116) | def __init__(self, params, defaults, optim_bits=32, is_paged=False): method fill_qmap (line 153) | def fill_qmap(self): method state_dict (line 157) | def state_dict(self): method __setstate__ (line 185) | def __setstate__(self, state): method load_state_dict (line 188) | def load_state_dict(self, state_dict, move_to_device=True): method to_gpu (line 269) | def to_gpu(self): method check_overrides (line 280) | def check_overrides(self): method step (line 300) | def step(self, closure=None): method get_config (line 337) | def get_config(self, gindex, pindex, group): method init_state (line 362) | def init_state(self, group, p, gindex, pindex): method update_step (line 365) | def update_step(self, group, p, gindex, pindex): method get_state_buffer (line 368) | def get_state_buffer(self, p, dtype=torch.float32): method prefetch_state (line 378) | def prefetch_state(self, p): class Optimizer2State (line 389) | class Optimizer2State(Optimizer8bit): method __init__ (line 390) | def __init__( method init_state (line 478) | def init_state(self, group, p, gindex, pindex): method update_step (line 521) | def update_step(self, group, p, gindex, pindex): class Optimizer1State (line 579) | class Optimizer1State(Optimizer8bit): method __init__ (line 580) | def __init__( method init_state (line 650) | def init_state(self, group, p, gindex, pindex): method update_step (line 687) | def update_step(self, group, p, gindex, pindex): FILE: bitsandbytes/optim/rmsprop.py class RMSprop (line 8) | class RMSprop(Optimizer1State): method __init__ (line 9) | def __init__( class RMSprop8bit (line 64) | class RMSprop8bit(Optimizer1State): method __init__ (line 65) | def __init__( class RMSprop32bit (line 117) | class RMSprop32bit(Optimizer1State): method __init__ (line 118) | def __init__( FILE: bitsandbytes/optim/sgd.py class SGD (line 8) | class SGD(Optimizer1State): method __init__ (line 9) | def __init__( class SGD8bit (line 59) | class SGD8bit(Optimizer1State): method __init__ (line 60) | def __init__( class SGD32bit (line 107) | class SGD32bit(Optimizer1State): method __init__ (line 108) | def __init__( FILE: bitsandbytes/utils.py function outlier_hook (line 11) | def outlier_hook(module, input): class OutlierTracer (line 44) | class OutlierTracer: method __init__ (line 47) | def __init__(self): method initialize (line 50) | def initialize(self, model): method is_initialized (line 63) | def is_initialized(self): method get_hvalue (line 66) | def get_hvalue(self, weight): method get_outliers (line 69) | def get_outliers(self, weight): method get_instance (line 80) | def get_instance(cls): function find_outlier_dims (line 86) | def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rd... function execute_and_return (line 104) | def execute_and_return(command_string: str) -> tuple[str, str]: function replace_linear (line 121) | def replace_linear( function pack_dict_to_tensor (line 166) | def pack_dict_to_tensor(source_dict): function unpack_tensor_to_dict (line 183) | def unpack_tensor_to_dict(tensor_data): function sync_gpu (line 204) | def sync_gpu(t: torch.Tensor): FILE: csrc/common.h type DataType_t (line 3) | typedef enum DataType_t { FILE: csrc/cpu_ops.cpp function lookup_code_index (line 19) | inline unsigned char lookup_code_index(const float* codebook, float valu... function __m256i (line 42) | inline __m256i cvt_fp32_to_fp16(const __m512 src) { function __m256i (line 46) | inline __m256i cvt_fp32_to_bf16(const __m512 src) { function __m512 (line 70) | static inline __m512 set_nf4_lut() { function __m512 (line 78) | static inline __m512 set_fp4_lut() { function dequantizeBlockwise4bitCpu (line 89) | void dequantizeBlockwise4bitCpu( function dequantizeBlockwise8bitCpu (line 183) | void dequantizeBlockwise8bitCpu( function quantize_cpu (line 207) | void quantize_cpu(float* code, float* A, float* absmax, unsigned char* o... type tinygemm_kernel_nn (line 267) | struct tinygemm_kernel_nn { method apply (line 268) | static inline void apply( type tinygemm_kernel_nn (line 276) | struct tinygemm_kernel_nn { method apply (line 277) | static inline void apply( function tinygemm_kernel (line 389) | void tinygemm_kernel( function gemv_4bit_inference (line 446) | void gemv_4bit_inference( FILE: csrc/cpu_ops.h function block_size_m (line 24) | constexpr int block_size_m() { return 2 * TILE_M; } function block_size_n (line 26) | constexpr int block_size_n() { return 2 * TILE_N; } function get_cache_blocks (line 28) | int get_cache_blocks(int chunk_size) { function const (line 42) | void operator()(const Func& f, Args... args) const { type Unroll (line 48) | struct Unroll function const (line 49) | void operator()(const Func& f, Args... args) const { function get_max_threads (line 58) | inline int get_max_threads() { function adjust_num_threads (line 67) | inline int adjust_num_threads(int m) { function parallel_2d (line 74) | void parallel_2d(int m, int n, const func_t& f) { type fp16_t (line 124) | struct fp16_t { type bf16_t (line 128) | struct bf16_t { function bf16_to_float (line 139) | static float bf16_to_float(uint16_t bf16) { function fp16_t (line 146) | static inline fp16_t float_to_fp16(float x) { function dDequantizeFP4 (line 188) | inline float dDequantizeFP4(unsigned char val) { function dDequantizeNF4 (line 230) | inline float dDequantizeNF4(unsigned char val) { function has_avx512f (line 292) | static inline bool has_avx512f() { function has_avx512bf16 (line 302) | static inline bool has_avx512bf16() { function has_avx512f (line 312) | static inline bool has_avx512f() { function has_avx512bf16 (line 318) | static inline bool has_avx512bf16() { FILE: csrc/pythonInterface.cpp function gemm_4bit_inference_naive_fp16 (line 43) | void gemm_4bit_inference_naive_fp16( function gemm_4bit_inference_naive_bf16 (line 50) | void gemm_4bit_inference_naive_bf16( function gemm_4bit_inference_naive_fp32 (line 59) | void gemm_4bit_inference_naive_fp32( function quantizeBlockwise_fp16 (line 133) | void quantizeBlockwise_fp16(float* code, half* A, float* absmax, unsigne... function quantizeBlockwise_fp16_fp4 (line 137) | void quantizeBlockwise_fp16_fp4(float* code, half* A, float* absmax, uns... function quantizeBlockwise_fp16_nf4 (line 141) | void quantizeBlockwise_fp16_nf4(float* code, half* A, float* absmax, uns... function quantizeBlockwise_bf16 (line 145) | void quantizeBlockwise_bf16( function quantizeBlockwise_bf16_fp4 (line 151) | void quantizeBlockwise_bf16_fp4( function quantizeBlockwise_bf16_nf4 (line 157) | void quantizeBlockwise_bf16_nf4( function quantizeBlockwise_fp32 (line 163) | void quantizeBlockwise_fp32(float* code, float* A, float* absmax, unsign... function quantizeBlockwise_fp32_fp4 (line 167) | void quantizeBlockwise_fp32_fp4(float* code, float* A, float* absmax, un... function quantizeBlockwise_fp32_nf4 (line 171) | void quantizeBlockwise_fp32_nf4(float* code, float* A, float* absmax, un... function dequantizeBlockwise_fp16 (line 175) | void dequantizeBlockwise_fp16( function dequantizeBlockwise_fp16_fp4 (line 181) | void dequantizeBlockwise_fp16_fp4( function dequantizeBlockwise_fp16_nf4 (line 187) | void dequantizeBlockwise_fp16_nf4( function dequantizeBlockwise_fp32 (line 193) | void dequantizeBlockwise_fp32( function dequantizeBlockwise_fp32_fp4 (line 199) | void dequantizeBlockwise_fp32_fp4( function dequantizeBlockwise_fp32_nf4 (line 205) | void dequantizeBlockwise_fp32_nf4( function dequantizeBlockwise_bf16 (line 211) | void dequantizeBlockwise_bf16( function dequantizeBlockwise_bf16_fp4 (line 217) | void dequantizeBlockwise_bf16_fp4( function dequantizeBlockwise_bf16_nf4 (line 223) | void dequantizeBlockwise_bf16_nf4( function igemmlt_32 (line 229) | int igemmlt_32( function igemmlt_8 (line 236) | int igemmlt_8( function igemmlt_8_rowscale (line 243) | int igemmlt_8_rowscale( function dequantizeBlockwise_fp16 (line 254) | void dequantizeBlockwise_fp16( function dequantizeBlockwise_fp16_fp4 (line 260) | void dequantizeBlockwise_fp16_fp4( function dequantizeBlockwise_fp16_nf4 (line 266) | void dequantizeBlockwise_fp16_nf4( function dequantizeBlockwise_fp32 (line 272) | void dequantizeBlockwise_fp32( function dequantizeBlockwise_fp32_fp4 (line 278) | void dequantizeBlockwise_fp32_fp4( function dequantizeBlockwise_fp32_nf4 (line 284) | void dequantizeBlockwise_fp32_nf4( function dequantizeBlockwise_bf16 (line 290) | void dequantizeBlockwise_bf16( function dequantizeBlockwise_bf16_fp4 (line 297) | void dequantizeBlockwise_bf16_fp4( function dequantizeBlockwise_bf16_nf4 (line 304) | void dequantizeBlockwise_bf16_nf4( function gemv_4bit_inference_fp16 (line 311) | void gemv_4bit_inference_fp16( function gemv_4bit_inference_bf16 (line 318) | void gemv_4bit_inference_bf16( function gemv_4bit_inference_fp32 (line 327) | void gemv_4bit_inference_fp32( function cdequantize_blockwise_fp16_fp4 (line 352) | void cdequantize_blockwise_fp16_fp4( function cdequantize_blockwise_fp16 (line 358) | void cdequantize_blockwise_fp16( function cdequantize_blockwise_fp16_nf4 (line 364) | void cdequantize_blockwise_fp16_nf4( function cquantize_blockwise_fp16 (line 370) | void cquantize_blockwise_fp16(float* code, half* A, float* absmax, unsig... function cquantize_blockwise_fp16_fp4 (line 374) | void cquantize_blockwise_fp16_fp4(float* code, half* A, float* absmax, u... function cquantize_blockwise_fp16_nf4 (line 378) | void cquantize_blockwise_fp16_nf4(float* code, half* A, float* absmax, u... function cquantize_blockwise_fp32 (line 382) | void cquantize_blockwise_fp32(float* code, float* A, float* absmax, unsi... function cquantize_blockwise_fp32_fp4 (line 386) | void cquantize_blockwise_fp32_fp4( function cquantize_blockwise_fp32_nf4 (line 392) | void cquantize_blockwise_fp32_nf4( function cdequantize_blockwise_fp32 (line 398) | void cdequantize_blockwise_fp32( function cdequantize_blockwise_fp32_fp4 (line 404) | void cdequantize_blockwise_fp32_fp4( function cdequantize_blockwise_fp32_nf4 (line 410) | void cdequantize_blockwise_fp32_nf4( function cquantize_blockwise_bf16 (line 416) | void cquantize_blockwise_bf16( function cquantize_blockwise_bf16_fp4 (line 422) | void cquantize_blockwise_bf16_fp4( function cquantize_blockwise_bf16_nf4 (line 428) | void cquantize_blockwise_bf16_nf4( function cdequantize_blockwise_bf16 (line 434) | void cdequantize_blockwise_bf16( function cdequantize_blockwise_bf16_fp4 (line 440) | void cdequantize_blockwise_bf16_fp4( function cdequantize_blockwise_bf16_nf4 (line 446) | void cdequantize_blockwise_bf16_nf4( function cigemm (line 512) | cigemm( function cbatched_igemm (line 519) | void cbatched_igemm( function Context (line 528) | Context* get_context() { return new Context(); } function cigemmlt_32 (line 530) | int cigemmlt_32( function cigemmlt_8 (line 537) | int cigemmlt_8( function cigemmlt_8_rowscale (line 544) | int cigemmlt_8_rowscale( function cdequant_mm_int32_fp16 (line 551) | void cdequant_mm_int32_fp16( function cint8_vector_quant (line 557) | void cint8_vector_quant( function cprefetch (line 571) | void cprefetch(void* ptr, size_t bytes, int device) { function cgemm_4bit_inference_naive_fp16 (line 600) | void cgemm_4bit_inference_naive_fp16( function cgemm_4bit_inference_naive_bf16 (line 607) | void cgemm_4bit_inference_naive_bf16( function cgemm_4bit_inference_naive_fp32 (line 614) | void cgemm_4bit_inference_naive_fp32( function cdequantize_blockwise_fp16_fp4 (line 625) | void cdequantize_blockwise_fp16_fp4( function cdequantize_blockwise_fp16 (line 631) | void cdequantize_blockwise_fp16( function cdequantize_blockwise_fp16_nf4 (line 637) | void cdequantize_blockwise_fp16_nf4( function cdequantize_blockwise_fp32 (line 643) | void cdequantize_blockwise_fp32( function cdequantize_blockwise_fp32_fp4 (line 649) | void cdequantize_blockwise_fp32_fp4( function cdequantize_blockwise_fp32_nf4 (line 655) | void cdequantize_blockwise_fp32_nf4( function cdequantize_blockwise_bf16 (line 661) | void cdequantize_blockwise_bf16( function cdequantize_blockwise_bf16_fp4 (line 668) | void cdequantize_blockwise_bf16_fp4( function cdequantize_blockwise_bf16_nf4 (line 675) | void cdequantize_blockwise_bf16_nf4( function cgemv_4bit_inference_fp16 (line 682) | void cgemv_4bit_inference_fp16( function cgemv_4bit_inference_bf16 (line 689) | void cgemv_4bit_inference_bf16( function cgemv_4bit_inference_fp32 (line 696) | void cgemv_4bit_inference_fp32( function cprefetch (line 723) | void cprefetch(void* ptr, size_t bytes, int device) { function cfill_fp32 (line 736) | void cfill_fp32(float* A, float* B, float value, long n) { function cfill_uint8 (line 745) | void cfill_uint8(unsigned char* A, unsigned char* B, unsigned char value... function cquantize_blockwise_cpu_fp32 (line 754) | void cquantize_blockwise_cpu_fp32( function cdequantize_blockwise_cpu_fp32 (line 760) | void cdequantize_blockwise_cpu_fp32( function cdequantize_blockwise_cpu_bf16 (line 766) | void cdequantize_blockwise_cpu_bf16( function cdequantize_blockwise_cpu_fp16 (line 772) | void cdequantize_blockwise_cpu_fp16( function cdequantize_blockwise_cpu_fp4_fp32 (line 778) | void cdequantize_blockwise_cpu_fp4_fp32( function cdequantize_blockwise_cpu_fp4_bf16 (line 784) | void cdequantize_blockwise_cpu_fp4_bf16( function cdequantize_blockwise_cpu_fp4_fp16 (line 790) | void cdequantize_blockwise_cpu_fp4_fp16( function cdequantize_blockwise_cpu_nf4_fp32 (line 796) | void cdequantize_blockwise_cpu_nf4_fp32( function cdequantize_blockwise_cpu_nf4_bf16 (line 802) | void cdequantize_blockwise_cpu_nf4_bf16( function cdequantize_blockwise_cpu_nf4_fp16 (line 808) | void cdequantize_blockwise_cpu_nf4_fp16( function gemv_4bit_inference_cpu_fp4_bf16 (line 815) | void gemv_4bit_inference_cpu_fp4_bf16( function gemv_4bit_inference_cpu_nf4_bf16 (line 822) | void gemv_4bit_inference_cpu_nf4_bf16( function has_avx512f_cpu (line 830) | bool has_avx512f_cpu() { return has_avx512f(); } function has_avx512bf16_cpu (line 832) | bool has_avx512bf16_cpu() { return has_avx512bf16(); } FILE: csrc/xpu_kernels.cpp function dDequantizeFP4 (line 8) | inline float dDequantizeFP4(unsigned char val) { function dDequantizeNF4 (line 50) | inline float dDequantizeNF4(unsigned char val) { function SYCL_EXTERNAL (line 97) | SYCL_EXTERNAL void kDequantizeBlockwise (line 268) | class kDequantizeBlockwise class kDequantizeBlockwise (line 269) | class kDequantizeBlockwise class kDequantizeBlockwise (line 270) | class kDequantizeBlockwise class kDequantizeBlockwise (line 272) | class kDequantizeBlockwise class kDequantizeBlockwise (line 273) | class kDequantizeBlockwise class kDequantizeBlockwise (line 274) | class kDequantizeBlockwise class kDequantizeBlockwise (line 276) | class kDequantizeBlockwise class kDequantizeBlockwise (line 277) | class kDequantizeBlockwise (line 278) | class kDequantizeBlockwise class kgemv_4bit_inference (line 280) | class kgemv_4bit_inference class kgemv_4bit_inference (line 281) | class kgemv_4bit_inference class kgemv_4bit_inference (line 282) | class kgemv_4bit_inference FILE: csrc/xpu_ops.cpp function dequantizeBlockwise (line 5) | void dequantizeBlockwise( function gemv_4bit_inference (line 34) | void gemv_4bit_inference( FILE: csrc/xpu_ops.h function sycl_kernel_submit (line 16) | inline void sycl_kernel_submit(sycl::nd_range range, sycl::queue q,... function sycl_comp_kernel_submit (line 23) | inline void sycl_comp_kernel_submit(sycl::nd_range range, sycl::que... FILE: examples/xpu/benchmark_paged_memory.py function get_args (line 22) | def get_args(): function get_torch_dtype (line 37) | def get_torch_dtype(name): function get_accelerator (line 41) | def get_accelerator(device_type): function count_params (line 48) | def count_params(model): function create_model (line 52) | def create_model(args): function make_batch (line 67) | def make_batch(args): function cleanup (line 74) | def cleanup(device_type): function measure_training (line 82) | def measure_training(args, optimizer_name, OptClass): function fmt_mb (line 142) | def fmt_mb(nbytes): function fmt_gb (line 146) | def fmt_gb(nbytes): function main (line 150) | def main(): FILE: examples/xpu/paged_xpu_training.py function get_args (line 21) | def get_args(): function format_alpaca (line 59) | def format_alpaca(example): function prepare_data (line 65) | def prepare_data(tokenizer, dataset_name, max_length, num_samples=200): function collate_fn (line 80) | def collate_fn(batch): function create_optimizer (line 84) | def create_optimizer(model, name, lr): function train_loop (line 107) | def train_loop(model, optimizer, dataloader, steps, log_interval, device): function get_torch_dtype (line 142) | def get_torch_dtype(name): function run_single (line 146) | def run_single(args): function run_with_trainer (line 184) | def run_with_trainer(args): function run_compare (line 250) | def run_compare(args): function main (line 290) | def main(): FILE: install_cuda.py function install_cuda (line 18) | def install_cuda(version, base_path, download_path): function main (line 67) | def main(): FILE: scripts/stale.py function main (line 30) | def main(): FILE: setup.py class BinaryDistribution (line 15) | class BinaryDistribution(Distribution): method has_ext_modules (line 16) | def has_ext_modules(self): class ExtBuildPy (line 20) | class ExtBuildPy(build_py): method run (line 21) | def run(self): FILE: tests/conftest.py function _set_seed (line 9) | def _set_seed(): function pytest_runtest_call (line 17) | def pytest_runtest_call(item): function pytest_runtest_teardown (line 36) | def pytest_runtest_teardown(item, nextitem): function requires_cuda (line 48) | def requires_cuda() -> bool: FILE: tests/fsdp_state_dict_save.py class SimpleQLoRAModel (line 23) | class SimpleQLoRAModel(nn.Module): method __init__ (line 26) | def __init__(self, quant_type="nf4"): method forward (line 31) | def forward(self, x): function main (line 35) | def main(): FILE: tests/helpers.py function get_available_devices (line 21) | def get_available_devices(no_cpu=False): function torch_save_to_buffer (line 53) | def torch_save_to_buffer(obj): function torch_load_from_buffer (line 60) | def torch_load_from_buffer(buffer): function get_test_dims (line 67) | def get_test_dims(min: int, max: int, *, n: int) -> list[int]: function format_with_label (line 71) | def format_with_label(label: str, value: Any) -> str: function id_formatter (line 83) | def id_formatter(label: str): function describe_dtype (line 102) | def describe_dtype(dtype: torch.dtype) -> str: function is_supported_on_hpu (line 106) | def is_supported_on_hpu( FILE: tests/test_autograd.py function test_matmullt (line 33) | def test_matmullt( function test_matmul_4bit (line 166) | def test_matmul_4bit( FILE: tests/test_cuda_setup_evaluator.py function cuda120_spec (line 8) | def cuda120_spec() -> CUDASpecs: function test_get_cuda_bnb_library_path (line 17) | def test_get_cuda_bnb_library_path(monkeypatch, cuda120_spec): function test_get_cuda_bnb_library_path_override (line 23) | def test_get_cuda_bnb_library_path_override(monkeypatch, cuda120_spec, c... function rocm70_spec (line 31) | def rocm70_spec() -> CUDASpecs: function test_get_rocm_bnb_library_path (line 40) | def test_get_rocm_bnb_library_path(monkeypatch, rocm70_spec): function test_get_rocm_bnb_library_path_override (line 48) | def test_get_rocm_bnb_library_path_override(monkeypatch, rocm70_spec, ca... function test_get_rocm_bnb_library_path_rejects_cuda_override (line 57) | def test_get_rocm_bnb_library_path_rejects_cuda_override(monkeypatch, ro... function test_get_rocm_bnb_library_path_rocm_override_takes_priority (line 66) | def test_get_rocm_bnb_library_path_rocm_override_takes_priority(monkeypa... FILE: tests/test_functional.py function assert_all_approx_close (line 27) | def assert_all_approx_close(a, b, rtol=1e-3, atol=1e-3, count=0, throw=T... class FFN (line 38) | class FFN(torch.nn.Module): method __init__ (line 39) | def __init__(self, input_features, hidden_size, bias=True): method forward (line 48) | def forward(self, x): class Timer (line 54) | class Timer: method __init__ (line 55) | def __init__(self): method tick (line 60) | def tick(self, name="default"): method tock (line 68) | def tock(self, name="default", evict=True, print_ms=True): method reset (line 85) | def reset(self): class Test8BitBlockwiseQuantizeFunctional (line 92) | class Test8BitBlockwiseQuantizeFunctional: method test_dynamic_blockwise_quantization (line 101) | def test_dynamic_blockwise_quantization(self, device, dtype, nested, b... method test_dynamic_blockwise_quantization_large (line 163) | def test_dynamic_blockwise_quantization_large(self, device, dtype, blo... method test_blockwise_cpu_large (line 190) | def test_blockwise_cpu_large(self, hidden, blocksize): method test_few_bit_quant (line 213) | def test_few_bit_quant(self, device, bits, method): method test_fp8_quant (line 265) | def test_fp8_quant(self, device): method test_bench_dequantization (line 320) | def test_bench_dequantization(self): function test_stable_embedding (line 337) | def test_stable_embedding(): function quant (line 342) | def quant(x): function dequant (line 348) | def dequant(c, maxC): function mm_dequant (line 352) | def mm_dequant(maxA, maxB, C): function quant_multi (line 356) | def quant_multi(x, dim): function quant_multi_chunk (line 363) | def quant_multi_chunk(x, dim, chunk_size=32): function mean (line 379) | def mean(xx): class TestIGEMMFunctional (line 396) | class TestIGEMMFunctional: method test_approx_igemm (line 401) | def test_approx_igemm(self, dim1, dim2, quant_methods, batched): method test_igemm (line 440) | def test_igemm(self, hidden_dim, batch_dim, transpose, seq_dim): method test_dim3_igemm (line 494) | def test_dim3_igemm(self, seq_dim, hidden_dim, batch_dim): method test_minmax_igemm (line 511) | def test_minmax_igemm(self, seq_dim, hidden_dim, batch_dim, transpose): method test_ibmm (line 588) | def test_ibmm(self, dim1, dim2, dim3, dim4, transpose): class TestLLMInt8Functional (line 616) | class TestLLMInt8Functional: method vectorwise_mm_dequant (line 618) | def vectorwise_mm_dequant(xq, S1, S2, dtype=torch.half): method vectorwise_quant (line 635) | def vectorwise_quant(x, dim=1): method test_int8_linear_matmul (line 648) | def test_int8_linear_matmul(self, device, dim1, dim2, dim3, dim4, dims... method test_int8_linear_matmul_half (line 666) | def test_int8_linear_matmul_half(self, device, dim1, dim2, dim3, dim4,... method test_dequant_mm (line 689) | def test_dequant_mm(self, device, dim1, dim4, dims, has_bias): method test_int8_double_quant (line 728) | def test_int8_double_quant(self, dim1, dim2): method test_integrated_int8_linear_matmul (line 772) | def test_integrated_int8_linear_matmul(self, device, dim1, dim4, inner): method test_coo_double_quant (line 805) | def test_coo_double_quant(self, device, dim1, dim2): method test_coo_int8_vectorwise_quant (line 825) | def test_coo_int8_vectorwise_quant(self, device, dim1, dim2): class TestQuantize4BitFunctional (line 839) | class TestQuantize4BitFunctional: method test_4bit_quant (line 847) | def test_4bit_quant(self, device, dtype, quant_type, blocksize): method test_4bit_compressed_stats (line 930) | def test_4bit_compressed_stats(self, device, quant_type, blocksize, dt... method test_4bit_quant_large (line 966) | def test_4bit_quant_large(self, device, dtype, quant_type, blocksize): method test_bench_4bit_dequant (line 996) | def test_bench_4bit_dequant(self, quant_type): method test_gemv_4bit (line 1031) | def test_gemv_4bit(self, device, dim, dtype, storage_type, double_quan... method test_gemv_eye_4bit (line 1179) | def test_gemv_eye_4bit(self, device, storage_type, dtype): function test_normal_map_tree (line 1211) | def test_normal_map_tree(): FILE: tests/test_generation.py function get_4bit_config (line 12) | def get_4bit_config(): function get_model_and_tokenizer (line 24) | def get_model_and_tokenizer(config): function get_prompt_for_generation_eval (line 44) | def get_prompt_for_generation_eval(text, add_roles=True): function generate (line 56) | def generate(model, tokenizer, text, generation_config, prompt_func=get_... function model_and_tokenizer (line 68) | def model_and_tokenizer(request): function test_pi (line 78) | def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dt... FILE: tests/test_linear4bit.py function test_linear_serialization (line 39) | def test_linear_serialization( function test_copy_param (line 199) | def test_copy_param(device, quant_type, blocksize, compress_statistics): function test_params4bit_torch_chunk_split (line 219) | def test_params4bit_torch_chunk_split(device, quant_type): function test_quant_storage_shard_roundtrip (line 259) | def test_quant_storage_shard_roundtrip(device, quant_type, quant_storage): function test_deepcopy_param (line 290) | def test_deepcopy_param(device, quant_type, blocksize, compress_statisti... function test_params4bit_real_serialization (line 319) | def test_params4bit_real_serialization(device, quant_type, blocksize, co... function test_linear4bit_torch_compile (line 362) | def test_linear4bit_torch_compile(device, quant_type, compute_dtype, com... function test_params4bit_quant_state_attr_access (line 440) | def test_params4bit_quant_state_attr_access(device, quant_type, compress... function test_fsdp_state_dict_save_4bit (line 508) | def test_fsdp_state_dict_save_4bit(): FILE: tests/test_linear8bitlt.py function test_linear_no_igemmlt (line 26) | def test_linear_no_igemmlt(device): function test_linear_serialization (line 74) | def test_linear_serialization( function linear8bit (line 176) | def linear8bit(requires_cuda): function test_linear8bit_copy_param (line 195) | def test_linear8bit_copy_param(linear8bit): function test_linear8bit_deepcopy_param (line 202) | def test_linear8bit_deepcopy_param(linear8bit): function test_linear8bit_serialization (line 217) | def test_linear8bit_serialization(linear8bit): function test_linear8bitlt_torch_compile (line 240) | def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, ... function test_linear8bitlt_device_movement (line 305) | def test_linear8bitlt_device_movement(device): FILE: tests/test_modules.py function caplog_at_level (line 14) | def caplog_at_level(caplog, level, logger_name): class MockArgs (line 19) | class MockArgs: method __init__ (line 20) | def __init__(self, initial_data): class MLP8bit (line 25) | class MLP8bit(torch.nn.Module): method __init__ (line 26) | def __init__(self, dim1, dim2, has_fp16_weights=True, threshold=0.0): method forward (line 41) | def forward(self, x): function get_args (line 47) | def get_args(): function assert_all_approx_close (line 55) | def assert_all_approx_close(a, b, atol=1e-8, rtol=1e-5, count=10): function test_linear8bitlt_inference (line 65) | def test_linear8bitlt_inference(device, threshold): function test_linear8bitlt_accumulated_gradient (line 80) | def test_linear8bitlt_accumulated_gradient(device): function test_linear8bitlt_no_fp16_weights (line 127) | def test_linear8bitlt_no_fp16_weights(device, threshold): function test_linear_kbit_fp32_bias (line 252) | def test_linear_kbit_fp32_bias(device, module): function test_kbit_backprop (line 291) | def test_kbit_backprop(device, module, dtype): function test_embedding_lossless (line 373) | def test_embedding_lossless(device, embedding_class, input_shape, embedd... function test_embedding_error (line 424) | def test_embedding_error(device, embedding_class, input_shape, embedding... function test_4bit_linear_warnings (line 464) | def test_4bit_linear_warnings(device, caplog): function test_4bit_embedding_warnings (line 484) | def test_4bit_embedding_warnings(device, caplog): function test_4bit_embedding_weight_fsdp_fix (line 498) | def test_4bit_embedding_weight_fsdp_fix(requires_cuda): function test_4bit_linear_weight_fsdp_fix (line 515) | def test_4bit_linear_weight_fsdp_fix(requires_cuda): function test_embedding_not_implemented_error (line 532) | def test_embedding_not_implemented_error(): FILE: tests/test_ops.py class TestLLMInt8Ops (line 17) | class TestLLMInt8Ops: method test_int8_linear_matmul (line 19) | def test_int8_linear_matmul(self, device): method test_int8_linear_matmul_out (line 31) | def test_int8_linear_matmul_out(self, device): method test_int8_vectorwise_quant (line 46) | def test_int8_vectorwise_quant(self, threshold, device): method test_int8_mm_dequant (line 71) | def test_int8_mm_dequant(self, device): method test_int8_scaled_mm (line 86) | def test_int8_scaled_mm(self, device, dtype, has_bias): class TestInt8BlockwiseQuantOps (line 101) | class TestInt8BlockwiseQuantOps: method test_quantize_blockwise (line 105) | def test_quantize_blockwise(self, device, dtype, blocksize): method test_dequantize_blockwise (line 129) | def test_dequantize_blockwise(self, device, dtype, blocksize): class Test4bitBlockwiseQuantOps (line 149) | class Test4bitBlockwiseQuantOps: method test_quantize_4bit (line 155) | def test_quantize_4bit(self, device, dtype, storage_dtype, quant_type,... method test_quantize_4bit_not_divisible_by_blocksize (line 178) | def test_quantize_4bit_not_divisible_by_blocksize(self, device, dtype,... method test_dequantize_4bit (line 205) | def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_typ... method test_gemv_4bit (line 239) | def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blo... class TestNonContiguousInputs (line 275) | class TestNonContiguousInputs: method test_quantize_blockwise_non_contiguous (line 281) | def test_quantize_blockwise_non_contiguous(self, device, dtype, blocks... method test_dequantize_blockwise_non_contiguous (line 303) | def test_dequantize_blockwise_non_contiguous(self, device, dtype, bloc... method test_quantize_4bit_non_contiguous (line 334) | def test_quantize_4bit_non_contiguous(self, device, dtype, quant_type,... method test_quantize_4bit_roundtrip_non_contiguous (line 356) | def test_quantize_4bit_roundtrip_non_contiguous(self, device, dtype, q... FILE: tests/test_optim.py function assert_most_approx_close (line 22) | def assert_most_approx_close(a, b, rtol=1e-3, atol=1e-3, max_error_count... function get_temp_dir (line 30) | def get_temp_dir(): function rm_path (line 36) | def rm_path(path): function test_optimizer32bit (line 181) | def test_optimizer32bit(dim1, dim2, gtype, optim_name, device): function test_global_config (line 265) | def test_global_config(dim1, dim2, gtype, device): function test_override_config_after_register (line 311) | def test_override_config_after_register(device): function test_optimizer8bit (line 358) | def test_optimizer8bit(dim1, dim2, gtype, optim_name, device): function test_benchmark_blockwise (line 520) | def test_benchmark_blockwise(dim1, dim2, gtype, optim_name, device): function test_ademamix_state_dict_no_nan (line 561) | def test_ademamix_state_dict_no_nan(optim_name, optim_factory, device): FILE: tests/test_parametrize.py class ParametrizeTestModule (line 20) | class ParametrizeTestModule(nn.Module): method __init__ (line 23) | def __init__(self, device="cpu", dtype=torch.float32): function test_replace_parameter_4bit (line 40) | def test_replace_parameter_4bit(device, dtype, quant_type, compress_stat... function test_moe_parameter_shape (line 97) | def test_moe_parameter_shape(device, dtype): function test_prequantized_replacement (line 143) | def test_prequantized_replacement(device, dtype, quant_type): function test_state_dict_functionality (line 174) | def test_state_dict_functionality(device, dtype, quant_type, compress_st... function test_moe_realistic_forward (line 206) | def test_moe_realistic_forward(device, dtype): function test_error_conditions (line 249) | def test_error_conditions(): function test_quant_state_preservation (line 272) | def test_quant_state_preservation(device, dtype): function test_multiple_parameters (line 306) | def test_multiple_parameters(device, dtype): function test_different_blocksizes (line 340) | def test_different_blocksizes(device, dtype, blocksize): function test_parametrization_forward_method (line 376) | def test_parametrization_forward_method(): function test_gradient_behavior (line 415) | def test_gradient_behavior(device, dtype):