SYMBOL INDEX (641 symbols across 138 files) FILE: bench/generation/evaluate_configurations.py function evaluate_model_configurations (line 26) | def evaluate_model_configurations( function main (line 64) | def main(): FILE: bench/generation/evaluate_model.py function calibrate (line 36) | def calibrate(model, tokenizer, batch_size, batches): function evaluate (line 51) | def evaluate( function main (line 86) | def main(): FILE: bench/generation/gen_barchart.py function save_bar_chart (line 23) | def save_bar_chart(title, labels, ylabel, series, save_path): function gen_barchart (line 50) | def gen_barchart(model_id, title, label, results, dtype): function main (line 76) | def main(): FILE: bench/generation/metrics/latency.py function latency (line 24) | def latency(model, tokenizer, device, batch_size=1, prompt_length=512, n... function get_device_memory (line 108) | def get_device_memory(device): FILE: bench/generation/metrics/perplexity.py class Perplexity (line 23) | class Perplexity: method __init__ (line 28) | def __init__(self, model, tokenizer, dataset_path="wikitext", dataset_... method _prepare_data (line 55) | def _prepare_data(self): method softmax (line 74) | def softmax(logits): method calculate_perplexity (line 91) | def calculate_perplexity(self, n_ctx=512, n_batch=512): method _process_batch (line 128) | def _process_batch(self, i, n_ctx, n_batch, tokens, nll, count): method _compute_batch_logits (line 197) | def _compute_batch_logits(self, tokens, batch_start, batch_size): function perplexity (line 221) | def perplexity( FILE: bench/generation/metrics/prediction.py function prediction_accuracy (line 22) | def prediction_accuracy(model, tokenizer, batch_size, samples=None): FILE: bench/generation/setup/awq.py function prepare_inputs_for_generation (line 19) | def prepare_inputs_for_generation(input_ids, past_key_values=None, atten... function setup (line 69) | def setup(model_id: str, weights: str, activations: str, group_size: int... FILE: bench/generation/setup/bnb.py function setup (line 19) | def setup( FILE: bench/generation/setup/hqq.py function setup (line 21) | def setup(model_id: str, weights: str, activations: str, device: torch.d... FILE: bench/generation/setup/quanto.py function calibrate (line 25) | def calibrate(model, tokenizer, batch_size, batches): function setup (line 40) | def setup( function keyword_to_qtype (line 71) | def keyword_to_qtype(k): FILE: bench/kernels/benchmark.py function get_unpack_bench (line 26) | def get_unpack_bench(bits, device): function timing (line 36) | def timing(get_bench_func, device, iterations=10): function main (line 95) | def main(): FILE: bench/kernels/benchmark_marlin_fp8.py function run_benchmark (line 28) | def run_benchmark( function shape_generator (line 132) | def shape_generator(): function shape_generator (line 137) | def shape_generator(): FILE: bench/kernels/benchmark_w4a16.py function benchmark (line 12) | def benchmark(f, warmup=1, iter=10): function get_problem (line 28) | def get_problem(m, n, k, groupsize=128): function benchmark_dense (line 44) | def benchmark_dense(A, B, m, n, k): function benchmark_awq (line 53) | def benchmark_awq(A, B, s, sz, m, n, k): function benchmark_marlin (line 64) | def benchmark_marlin(A, B, s, sz, m, n, k): function run_benchmark (line 87) | def run_benchmark(model, tokens=None): function main (line 130) | def main(): FILE: bench/torch_kernels/test_int_mm.py function main (line 21) | def main(): FILE: bench/torch_kernels/test_int_mm_inductor.py function mm (line 20) | def mm(a, b): FILE: bench/torch_kernels/test_weight_int4pack_mm.py function _group_quantize_tensor (line 21) | def _group_quantize_tensor(w, n_bit=4, q_group_size=16): function main (line 64) | def main(): FILE: bench/torch_kernels/test_weight_int8pack_mm.py function main (line 21) | def main(): FILE: examples/nlp/text-classification/sst2/quantize_sst2_model.py function evaluate_model (line 28) | def evaluate_model(model, tokenizer, dataset, device, batch_size): function keyword_to_itype (line 38) | def keyword_to_itype(k): function main (line 42) | def main(): FILE: examples/nlp/text-generation/quantize_causal_lm_model.py function generate (line 26) | def generate(model, tokenizer, device, prompt, max_new_tokens): function calibrate (line 43) | def calibrate(model, tokenizer, dataset, device, batch_size, samples=None): function keyword_to_itype (line 56) | def keyword_to_itype(k): function main (line 65) | def main(): FILE: examples/speech/speech_recognition/quantize_asr_model.py function map_to_feats (line 31) | def map_to_feats(batch, processor): function transcribe_batch (line 42) | def transcribe_batch(batch, model, processor): function evaluate_model (line 51) | def evaluate_model(model, processor, dataset, metric: evaluate.Evaluatio... function keyword_to_itype (line 61) | def keyword_to_itype(k): function main (line 65) | def main(): FILE: examples/vision/StableDiffusion/quantize_StableDiffusion.py function load_pipeline (line 25) | def load_pipeline(torch_dtype, unet_dtype=None, device="cpu"): function run_inference (line 36) | def run_inference(pipe, batch_size=1): function benchmark_fn (line 45) | def benchmark_fn(f, *args, **kwargs): function bytes_to_giga_bytes (line 50) | def bytes_to_giga_bytes(bytes): function get_device_memory (line 54) | def get_device_memory(device): FILE: examples/vision/image-classification/mnist/quantize_mnist_model.py function test (line 39) | def test(model, device, test_loader): function train (line 65) | def train(log_interval, model, device, train_loader, optimizer, epoch): function keyword_to_itype (line 89) | def keyword_to_itype(k): function main (line 93) | def main(): FILE: examples/vision/image-classification/pets/quantize_vit_model.py function test (line 29) | def test(model, device, test_loader): function keyword_to_itype (line 56) | def keyword_to_itype(k): function main (line 60) | def main(): FILE: examples/vision/object-detection/quantize_owl_model.py function detect (line 14) | def detect(model, processor, image, texts): function get_device_memory (line 52) | def get_device_memory(device): function keyword_to_qtype (line 66) | def keyword_to_qtype(k): function main (line 70) | def main(): FILE: examples/vision/text-to-image/quantize_pixart_sigma.py function load_pipeline (line 21) | def load_pipeline(model_id, torch_dtype, qtype=None, device="cpu"): function get_device_memory (line 34) | def get_device_memory(device): FILE: external/awq/conftest.py function device (line 27) | def device(request): function pytest_configure (line 31) | def pytest_configure(config): function pytest_runtest_call (line 36) | def pytest_runtest_call(item): FILE: external/awq/pack_intweight.py function pack_intweight (line 25) | def pack_intweight(unpacked_qweight, interleave, kstride): FILE: external/awq/packing_utils.py function pack_awq (line 8) | def pack_awq(intweight: torch.Tensor, reorder=False): function unpack_awq (line 23) | def unpack_awq(qweight: torch.Tensor, bits: int): function reverse_awq_order (line 35) | def reverse_awq_order(iweights: torch.Tensor, bits: int): function pack_exllama (line 50) | def pack_exllama(iweights: torch.Tensor, izeros: torch.Tensor, bits: int): function unpack_reorder_pack (line 72) | def unpack_reorder_pack(qweight, qzeros, bits): function dequantize_gemm (line 91) | def dequantize_gemm(qweight, qzeros, scales, bits, group_size): FILE: external/awq/test_awq_kernels.py function assert_similar (line 21) | def assert_similar(a, b, atol=None, rtol=None): function test_standalone_kernel (line 41) | def test_standalone_kernel(in_features, out_features, kernel): function test_integrated_kernel (line 103) | def test_integrated_kernel(in_features, out_features, kernel): FILE: external/awq/test_awq_packing.py function test_awq_pack (line 28) | def test_awq_pack(in_features, out_features, reorder, random): function test_awq_pack_v2 (line 64) | def test_awq_pack_v2(in_features, out_features, random): FILE: external/awq/test_awq_quantize.py function awq_quantize (line 7) | def awq_quantize(base, scales, zeros, group_size): function test_awq_quantize (line 24) | def test_awq_quantize(in_features, out_features): FILE: external/smoothquant/smoothquant.py function get_act_scales (line 16) | def get_act_scales(model, tokenizer, dataset, num_samples=512, seq_len=5... function smooth_ln_fcs (line 53) | def smooth_ln_fcs(ln, fcs, act_scales, alpha=0.5): function smooth_lm (line 77) | def smooth_lm(model, scales, alpha=0.5): function main (line 111) | def main(): FILE: optimum/quanto/calibrate.py function _updated_scale (line 31) | def _updated_scale(scale, new_scale, momentum): function absmax_scale (line 37) | def absmax_scale(base: torch.Tensor, qtype: qtype = qint8, axis: Optiona... class Calibration (line 64) | class Calibration(TorchFunctionMode): method __init__ (line 81) | def __init__(self, *args, momentum: float = 0.9, streamline=True, debu... method __torch_function__ (line 90) | def __torch_function__(self, func, types, args=(), kwargs=None): method __enter__ (line 107) | def __enter__(self): method __exit__ (line 112) | def __exit__(self, exc_type, exc_val, exc_tb): method calibrate_input (line 120) | def calibrate_input(self, module: torch.nn.Module, input, momentum: fl... method calibrate_output (line 139) | def calibrate_output( method tag_outputs (line 176) | def tag_outputs( FILE: optimum/quanto/library/extensions/__init__.py function _is_xpu_available (line 34) | def _is_xpu_available(): FILE: optimum/quanto/library/extensions/cpp/__init__.py function unpack_cpp (line 35) | def unpack_cpp(t: torch.Tensor, bits: int): FILE: optimum/quanto/library/extensions/cpp/pybind_module.cpp function PYBIND11_MODULE (line 24) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: optimum/quanto/library/extensions/cpp/unpack.cpp function unpack_4bit (line 19) | static torch::Tensor unpack_4bit(torch::Tensor &t) { function unpack_2bit (line 27) | static torch::Tensor unpack_2bit(torch::Tensor &t) { function unpack (line 37) | torch::Tensor unpack(torch::Tensor &t, int bits) { FILE: optimum/quanto/library/extensions/cuda/__init__.py function get_max_cuda_arch (line 25) | def get_max_cuda_arch(): function unpack_cuda (line 78) | def unpack_cuda(t: torch.Tensor, bits: int): function gemm_f16i4_awq (line 98) | def gemm_f16i4_awq( function fp8_marlin_gemm (line 139) | def fp8_marlin_gemm( function gptq_marlin_repack (line 162) | def gptq_marlin_repack( function gemm_f16i4_marlin (line 177) | def gemm_f16i4_marlin( FILE: optimum/quanto/library/extensions/cuda/awq/v2/semaphore.h function class (line 44) | class Semaphore FILE: optimum/quanto/library/extensions/cuda/marlin/marlin_cuda.cpp function mul (line 28) | void mul( FILE: optimum/quanto/library/extensions/cuda/pybind_module.cpp function PYBIND11_MODULE (line 30) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: optimum/quanto/library/extensions/extension.py class Extension (line 13) | class Extension(object): method __init__ (line 14) | def __init__( method lib (line 30) | def lib(self): function register_extension (line 60) | def register_extension(extension: Extension): function get_extension (line 65) | def get_extension(extension_type: str): function is_extension_available (line 77) | def is_extension_available(extension_type: str): FILE: optimum/quanto/library/extensions/hip/__init__.py function unpack_hip (line 35) | def unpack_hip(t: torch.Tensor, bits: int): FILE: optimum/quanto/library/extensions/hip/pybind_module.cpp function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: optimum/quanto/library/extensions/mps/__init__.py function unpack_mps (line 35) | def unpack_mps(t: torch.Tensor, bits: int): FILE: optimum/quanto/library/extensions/mps/pybind_module.cpp function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: optimum/quanto/library/extensions/xpu/__init__.py function unpack_xpu (line 41) | def unpack_xpu(t: torch.Tensor, bits: int): function gemm_f16i4_awq (line 61) | def gemm_f16i4_awq( FILE: optimum/quanto/library/extensions/xpu/pybind_module.cpp function PYBIND11_MODULE (line 25) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: optimum/quanto/library/qbytes_mm.py function qbytes_mm (line 25) | def qbytes_mm(activations: torch.Tensor, weights: torch.Tensor, output_s... function qbytes_int_mm (line 36) | def qbytes_int_mm(activations: torch.Tensor, weights: torch.Tensor, outp... function qbytes_int8pack_mm (line 53) | def qbytes_int8pack_mm(activations: torch.Tensor, weights: torch.Tensor,... function qbytes_mm_impl_default (line 67) | def qbytes_mm_impl_default( function qbytes_mm_impl_cuda (line 74) | def qbytes_mm_impl_cuda(activations: torch.Tensor, weights: torch.Tensor... function qbytes_mm_impl_cpu (line 92) | def qbytes_mm_impl_cpu(activations: torch.Tensor, weights: torch.Tensor,... function qbytes_mm_impl_mps (line 109) | def qbytes_mm_impl_mps(activations: torch.Tensor, weights: torch.Tensor,... FILE: optimum/quanto/library/quantize.py function quantize_symmetric (line 28) | def quantize_symmetric( function quantize_affine (line 65) | def quantize_affine( FILE: optimum/quanto/library/unpack.py function unpack (line 22) | def unpack(packed: torch.Tensor, bits: int) -> torch.Tensor: FILE: optimum/quanto/models/__init__.py function is_transformers_available (line 21) | def is_transformers_available() -> bool: function is_diffusers_available (line 25) | def is_diffusers_available() -> bool: FILE: optimum/quanto/models/diffusers_models.py class QuantizedDiffusersModel (line 44) | class QuantizedDiffusersModel(ModelHubMixin): method __init__ (line 48) | def __init__(self, model: ModelMixin): method __getattr__ (line 53) | def __getattr__(self, name: str) -> Any: method forward (line 61) | def forward(self, *args, **kwargs): method __call__ (line 64) | def __call__(self, *args, **kwargs): method _qmap_name (line 68) | def _qmap_name(): method quantize (line 72) | def quantize( method from_pretrained (line 119) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.... method _save_pretrained (line 180) | def _save_pretrained(self, save_directory: Path) -> None: class QuantizedPixArtTransformer2DModel (line 189) | class QuantizedPixArtTransformer2DModel(QuantizedDiffusersModel): FILE: optimum/quanto/models/shared_dict.py class ShardedStateDict (line 22) | class ShardedStateDict(Mapping): method __init__ (line 30) | def __init__(self, base_dir: str, tensor_index: Dict[str, str]): method __iter__ (line 35) | def __iter__(self): method __len__ (line 38) | def __len__(self): method __getitem__ (line 41) | def __getitem__(self, key: Any) -> Any: method __contains__ (line 49) | def __contains__(self, key: object) -> bool: method keys (line 52) | def keys(self): FILE: optimum/quanto/models/transformers_models.py class QuantizedTransformersModel (line 38) | class QuantizedTransformersModel(ModelHubMixin): method __init__ (line 42) | def __init__(self, model: PreTrainedModel): method __getattr__ (line 47) | def __getattr__(self, name: str) -> Any: method forward (line 55) | def forward(self, *args, **kwargs): method __call__ (line 58) | def __call__(self, *args, **kwargs): method __repr__ (line 61) | def __repr__(self): method _qmap_name (line 65) | def _qmap_name(): method quantize (line 69) | def quantize( method from_pretrained (line 115) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.... method _save_pretrained (line 165) | def _save_pretrained(self, save_directory: Path) -> None: class QuantizedModelForCausalLM (line 182) | class QuantizedModelForCausalLM(QuantizedTransformersModel): FILE: optimum/quanto/nn/qconv2d.py class QConv2d (line 27) | class QConv2d(QModuleMixin, torch.nn.Conv2d): method qcreate (line 29) | def qcreate( method forward (line 54) | def forward(self, input: torch.Tensor) -> torch.Tensor: FILE: optimum/quanto/nn/qlayernorm.py class QLayerNorm (line 27) | class QLayerNorm(QModuleMixin, torch.nn.LayerNorm): method qcreate (line 29) | def qcreate( method forward (line 52) | def forward(self, input: torch.Tensor) -> torch.Tensor: FILE: optimum/quanto/nn/qlinear.py class QLinear (line 27) | class QLinear(QModuleMixin, torch.nn.Linear): method qcreate (line 29) | def qcreate( method forward (line 49) | def forward(self, input: torch.Tensor) -> torch.Tensor: FILE: optimum/quanto/nn/qmodule.py function register_qmodule (line 44) | def register_qmodule(module_cls): function quantize_module (line 81) | def quantize_module( class QModuleMixin (line 94) | class QModuleMixin(ABC): method __init__ (line 95) | def __init__( method disable_output_quantization (line 143) | def disable_output_quantization(self): method _save_to_state_dict (line 147) | def _save_to_state_dict(self, destination, prefix, keep_vars): method _load_from_state_dict (line 161) | def _load_from_state_dict( method from_module (line 210) | def from_module( method qcreate (line 235) | def qcreate( method qweight (line 246) | def qweight(self): method qforward (line 281) | def qforward(self, input: torch.Tensor) -> torch.Tensor: method quantize_input (line 284) | def quantize_input(self, module: torch.nn.Module, input: torch.Tensor)... method quantize_output (line 296) | def quantize_output( method freeze (line 304) | def freeze(self): method frozen (line 311) | def frozen(self): FILE: optimum/quanto/quantize.py function set_module_by_name (line 27) | def set_module_by_name(parent_module, name, child_module): function _quantize_submodule (line 37) | def _quantize_submodule( function quantize (line 55) | def quantize( function requantize (line 101) | def requantize( function freeze (line 143) | def freeze(model): function quantization_map (line 149) | def quantization_map(model: torch.nn.Module) -> Dict[str, Dict[str, str]]: FILE: optimum/quanto/subpackage/commands/base.py class QuantoCommand (line 25) | class QuantoCommand(BaseOptimumCLICommand): FILE: optimum/quanto/subpackage/commands/quantize.py function parse_quantize_args (line 32) | def parse_quantize_args(parser: "ArgumentParser"): class QuantizeCommand (line 95) | class QuantizeCommand(BaseOptimumCLICommand): method parse_args (line 97) | def parse_args(parser: "ArgumentParser"): method run (line 100) | def run(self): FILE: optimum/quanto/tensor/activations/qbytes.py class ActivationQBytesQuantizer (line 28) | class ActivationQBytesQuantizer(Function): method forward (line 30) | def forward(ctx, base: torch.Tensor, qtype: qtype, scale: torch.Tensor... method backward (line 41) | def backward(ctx, gO): class ActivationQBytesTensor (line 46) | class ActivationQBytesTensor(QBytesTensor): method __new__ (line 48) | def __new__(cls, qtype, size, stride, data, scale, requires_grad=False): method __init__ (line 54) | def __init__(self, qtype, size, stride, data, scale, requires_grad=Fal... method quantize (line 58) | def quantize(cls, base: torch.Tensor, qtype: qtype, scale: torch.Tenso... method __tensor_flatten__ (line 61) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 71) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 82) | def __torch_dispatch__(cls, op, types, args, kwargs=None): FILE: optimum/quanto/tensor/activations/qbytes_ops.py function register_qbytestensor_op (line 34) | def register_qbytestensor_op(aten_ops: List[Callable]): function get_qbytestensor_op_dispatch (line 52) | def get_qbytestensor_op_dispatch(aten_op): function is_scalar (line 56) | def is_scalar(t): function _to_copy (line 61) | def _to_copy(op, t, dtype=None, **kwargs): function detach (line 70) | def detach(op, t): function cat (line 78) | def cat(op, inputs, dim=0): function lt (line 97) | def lt(op, input, other): function clone (line 109) | def clone(op, t, memory_format=torch.preserve_format): function copy_ (line 121) | def copy_(op, dest, src): function div (line 129) | def div(op, input, other): function neg (line 137) | def neg(op, input, *args, **kwargs): function unary_type_agnostic_op (line 154) | def unary_type_agnostic_op(op, input, *args, **kwargs): function is_same_size (line 164) | def is_same_size(op, input, other): function cannot_mm (line 170) | def cannot_mm(t: QTensor): function bmm (line 176) | def bmm(op, input, other): function mul (line 190) | def mul(op, input, other): function relu (line 200) | def relu(op, input): function _softmax (line 209) | def _softmax(op, input, dim, half_to_float): function stack (line 219) | def stack(op, inputs, dim=0): function split (line 237) | def split(op, input, *args, **kwargs): function transpose (line 248) | def transpose(op, input, *args): function transpose2d (line 257) | def transpose2d(op, input): function view (line 268) | def view(op, input, *shape): function where (line 277) | def where(op, condition, input, other): FILE: optimum/quanto/tensor/activations/quantization.py function quantize_activation (line 24) | def quantize_activation(t: torch.Tensor, qtype: qtype, scale: torch.Tens... FILE: optimum/quanto/tensor/core.py function dtype_info (line 22) | def dtype_info(dtype): function axis_to_dim (line 27) | def axis_to_dim(t, axis): FILE: optimum/quanto/tensor/function.py class QuantizedLinearFunction (line 21) | class QuantizedLinearFunction(torch.autograd.Function): method forward (line 42) | def forward(ctx, input, other, bias=None): method backward (line 49) | def backward(ctx, gO): FILE: optimum/quanto/tensor/grouped.py function grouped_shape (line 10) | def grouped_shape(shape: List, axis: int, group_size: int) -> List: function group (line 17) | def group(base: torch.Tensor, axis: int, group_size: int): function ungroup (line 39) | def ungroup(grouped: torch.Tensor, axis: int, orig_shape: torch.Size): FILE: optimum/quanto/tensor/optimizers/absmax_optimizer.py class AbsmaxOptimizer (line 26) | class AbsmaxOptimizer(SymmetricOptimizer): method optimize (line 27) | def optimize( FILE: optimum/quanto/tensor/optimizers/affine_optimizer.py class AffineOptimizer (line 27) | class AffineOptimizer(Optimizer): method __call__ (line 28) | def __call__( method optimize (line 63) | def optimize(self, base: torch.Tensor, qtype: qtype, axis: int) -> Tup... FILE: optimum/quanto/tensor/optimizers/hqq_optimizer.py function shrink_lp_op (line 28) | def shrink_lp_op(x: torch.Tensor, beta: float, lp_norm: float) -> torch.... class HqqOptimizer (line 37) | class HqqOptimizer(MaxOptimizer): method __init__ (line 46) | def __init__( method optimize (line 60) | def optimize( FILE: optimum/quanto/tensor/optimizers/max_optimizer.py class MaxOptimizer (line 26) | class MaxOptimizer(AffineOptimizer): method optimize (line 27) | def optimize( FILE: optimum/quanto/tensor/optimizers/optimizer.py class Optimizer (line 24) | class Optimizer(ABC): method __call__ (line 25) | def __call__( FILE: optimum/quanto/tensor/optimizers/symmetric_optimizer.py class SymmetricOptimizer (line 26) | class SymmetricOptimizer(Optimizer): method __call__ (line 27) | def __call__(self, base: torch.Tensor, qtype: qtype, axis: Optional[in... method optimize (line 37) | def optimize(self, base: torch.Tensor, qmax: float, axis: Optional[int... FILE: optimum/quanto/tensor/packed.py function pack_weights (line 24) | def pack_weights(intweights: torch.Tensor, bits: int) -> torch.Tensor: class PackedTensor (line 72) | class PackedTensor(torch.Tensor): method __new__ (line 74) | def __new__(cls, data, bits, size, stride, requires_grad=False): method __init__ (line 82) | def __init__(self, data, bits, size, stride, requires_grad=False): method __repr__ (line 86) | def __repr__(self): method pack (line 93) | def pack(cls, t, bits=4): method unpack (line 101) | def unpack(self): method bits (line 107) | def bits(self): method dtype (line 111) | def dtype(self): method load_from_state_dict (line 115) | def load_from_state_dict(state_dict, prefix, bits, size, stride, missi... method __tensor_flatten__ (line 125) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 132) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 145) | def __torch_dispatch__(cls, op, types, args, kwargs=None): method numpy (line 162) | def numpy(self): FILE: optimum/quanto/tensor/qbits.py class QBitsDequantizer (line 27) | class QBitsDequantizer(Function): method forward (line 29) | def forward(ctx, t): method backward (line 52) | def backward(ctx, gO): class QBitsTensor (line 56) | class QBitsTensor(QTensor): method __init__ (line 57) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,... method __repr__ (line 64) | def __repr__(self): method dequantize (line 67) | def dequantize(self): FILE: optimum/quanto/tensor/qbytes.py class QBytesDequantizer (line 23) | class QBytesDequantizer(Function): method forward (line 25) | def forward(ctx, t): method backward (line 34) | def backward(ctx, gO): class QBytesTensor (line 39) | class QBytesTensor(QTensor): method __init__ (line 40) | def __init__(self, qtype, axis, size, stride, data, scale, requires_gr... method __repr__ (line 45) | def __repr__(self): method dequantize (line 48) | def dequantize(self): FILE: optimum/quanto/tensor/qtensor.py function qfallback (line 21) | def qfallback(callable, *args, **kwargs): class QTensor (line 32) | class QTensor(torch.Tensor): method __init__ (line 33) | def __init__(self, qtype, axis): method dequantize (line 37) | def dequantize(self): method save_to_state_dict (line 40) | def save_to_state_dict(self, destination, prefix, keep_vars): method axis (line 56) | def axis(self): method qtype (line 60) | def qtype(self): method numpy (line 63) | def numpy(self): method equal (line 66) | def equal(self, other): FILE: optimum/quanto/tensor/qtype.py class qtype (line 21) | class qtype: method __str__ (line 32) | def __str__(self): method __hash__ (line 35) | def __hash__(self): function qint (line 42) | def qint(bits): function qfloat (line 55) | def qfloat(dtype: torch.dtype): FILE: optimum/quanto/tensor/weights/awq/packed.py function pack (line 33) | def pack(unpacked: torch.Tensor, reorder=False): function reverse_awq_order (line 64) | def reverse_awq_order(t: torch.Tensor): function unpack (line 80) | def unpack(packed: torch.Tensor, reorder=False): function pack_v2 (line 100) | def pack_v2(unpacked: torch.Tensor) -> torch.Tensor: function unpack_v2 (line 156) | def unpack_v2(packed): class AWQPacking (line 204) | class AWQPacking(Enum): class AWQPackedTensor (line 209) | class AWQPackedTensor(torch.Tensor): method __new__ (line 211) | def __new__(cls, data, packing, reorder, size, stride, requires_grad=F... method __init__ (line 220) | def __init__(self, data, packing, reorder, size, stride, requires_grad... method __repr__ (line 225) | def __repr__(self): method pack (line 229) | def pack(cls, t, packing=AWQPacking.V1, reorder=False): method unpack (line 237) | def unpack(self): method dtype (line 243) | def dtype(self): method __tensor_flatten__ (line 246) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 258) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 272) | def __torch_dispatch__(cls, op, types, args, kwargs=None): method numpy (line 293) | def numpy(self): FILE: optimum/quanto/tensor/weights/awq/qbits.py class AWQWeightQBitsDequantizer (line 30) | class AWQWeightQBitsDequantizer(Function): method forward (line 32) | def forward(ctx, t): method backward (line 49) | def backward(ctx, gO): class AWQWeightQBitsLinearFunction (line 53) | class AWQWeightQBitsLinearFunction(QuantizedLinearFunction): method forward (line 55) | def forward(ctx, input, other, bias): class AWQWeightQBitsTensor (line 77) | class AWQWeightQBitsTensor(WeightQBitsTensor): method __new__ (line 79) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s... method __init__ (line 87) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,... method dequantize (line 106) | def dequantize(self): method weight_qbits_tensor (line 109) | def weight_qbits_tensor(self): method __tensor_flatten__ (line 123) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 136) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 149) | def __torch_function__(cls, func, types, args=(), kwargs=None): FILE: optimum/quanto/tensor/weights/marlin/fp8/packed.py function pack_fp8_as_int32 (line 22) | def pack_fp8_as_int32(fp8_tensor: torch.Tensor) -> torch.Tensor: function unpack_int32_to_fp8 (line 51) | def unpack_int32_to_fp8(int32_tensor: torch.Tensor) -> torch.Tensor: function get_scale_perms (line 71) | def get_scale_perms() -> torch.Tensor: function get_row_permutation (line 78) | def get_row_permutation(n_rows: int) -> torch.Tensor: function get_column_permutation (line 116) | def get_column_permutation(n_col: int) -> torch.Tensor: class MarlinF8PackedTensor (line 160) | class MarlinF8PackedTensor(torch.Tensor): method __new__ (line 161) | def __new__(cls, data, size, stride, requires_grad=False): method __init__ (line 169) | def __init__(self, data, size, stride, requires_grad=False): method __repr__ (line 172) | def __repr__(self): method pack (line 176) | def pack(cls, tensor: torch.Tensor): method unpack (line 189) | def unpack(self) -> torch.Tensor: method dtype (line 220) | def dtype(self): method __tensor_flatten__ (line 223) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 233) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 245) | def __torch_dispatch__(cls, op, types, args, kwargs=None): FILE: optimum/quanto/tensor/weights/marlin/fp8/qbits.py class MarlinF8QBytesLinearFunction (line 28) | class MarlinF8QBytesLinearFunction(QuantizedLinearFunction): method forward (line 30) | def forward(ctx, input, other, bias=None): class MarlinF8QBytesTensor (line 54) | class MarlinF8QBytesTensor(WeightQBytesTensor): method __new__ (line 56) | def __new__(cls, qtype, axis, size, stride, data, scale, requires_grad... method __init__ (line 63) | def __init__(self, qtype, axis, size, stride, data, scale, requires_gr... method dequantize (line 88) | def dequantize(self): method __repr__ (line 102) | def __repr__(self): method weight_qbytes_tensor (line 105) | def weight_qbytes_tensor(self): method __tensor_flatten__ (line 119) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 130) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 142) | def __torch_function__(cls, func, types, args=(), kwargs=None): FILE: optimum/quanto/tensor/weights/marlin/int4/packed.py function _get_perm (line 19) | def _get_perm(): function pack (line 59) | def pack(unpacked: torch.Tensor): function unpack (line 78) | def unpack(packed, orig_shape): class MarlinInt4PackedTensor (line 91) | class MarlinInt4PackedTensor(torch.Tensor): method __new__ (line 93) | def __new__(cls, data, size, stride, requires_grad=False): method __init__ (line 101) | def __init__(self, data, size, stride, requires_grad=False): method __repr__ (line 104) | def __repr__(self): method pack (line 108) | def pack(cls, t): method unpack (line 112) | def unpack(self): method dtype (line 116) | def dtype(self): method __tensor_flatten__ (line 119) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 128) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 139) | def __torch_dispatch__(cls, op, types, args, kwargs=None): method numpy (line 159) | def numpy(self): FILE: optimum/quanto/tensor/weights/marlin/int4/qbits.py class MarlinQBitsDequantizer (line 31) | class MarlinQBitsDequantizer(Function): method forward (line 33) | def forward(ctx, t): method backward (line 49) | def backward(ctx, gO): class MarlinQBitsLinearFunction (line 53) | class MarlinQBitsLinearFunction(QuantizedLinearFunction): method forward (line 55) | def forward(ctx, input, other, bias): class MarlinInt4WeightQBitsTensor (line 72) | class MarlinInt4WeightQBitsTensor(WeightQBitsTensor): method __new__ (line 74) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s... method __init__ (line 82) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,... method dequantize (line 103) | def dequantize(self): method weight_qbits_tensor (line 106) | def weight_qbits_tensor(self): method __tensor_flatten__ (line 121) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 134) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 147) | def __torch_function__(cls, func, types, args=(), kwargs=None): FILE: optimum/quanto/tensor/weights/marlin/permutations.py function _get_perms (line 28) | def _get_perms() -> Tuple[List[int], List[int]]: function _get_inverted_perms (line 39) | def _get_inverted_perms() -> Tuple[List[int], List[int]]: function marlin_permute (line 44) | def marlin_permute(t: torch.Tensor, reverse=False): FILE: optimum/quanto/tensor/weights/packing.py function unpack_int32_to_uint8 (line 18) | def unpack_int32_to_uint8(packed: torch.Tensor, bits: int): FILE: optimum/quanto/tensor/weights/qbits.py class WeightsQBitsQuantizer (line 34) | class WeightsQBitsQuantizer(Function): method forward (line 36) | def forward( method backward (line 60) | def backward(ctx, gO): class WeightQBitsTensor (line 65) | class WeightQBitsTensor(QBitsTensor): method create (line 67) | def create(qtype, axis, group_size, size, stride, data, scale, shift, ... method __new__ (line 141) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s... method __init__ (line 148) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,... method quantize (line 154) | def quantize( method load_from_state_dict (line 167) | def load_from_state_dict(state_dict, prefix, qtype, axis, group_size, ... method optimize (line 201) | def optimize(self): method save_to_state_dict (line 223) | def save_to_state_dict(self, destination, prefix, keep_vars): method weight_qbits_tensor (line 230) | def weight_qbits_tensor(self): method __tensor_flatten__ (line 237) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 250) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 263) | def __torch_function__(cls, func, types, args=(), kwargs=None): method __torch_dispatch__ (line 290) | def __torch_dispatch__(cls, op, types, args, kwargs=None): FILE: optimum/quanto/tensor/weights/qbytes.py class WeightQBytesQuantizer (line 31) | class WeightQBytesQuantizer(Function): method forward (line 33) | def forward( method backward (line 63) | def backward(ctx, gO): class WeightQBytesLinearFunction (line 68) | class WeightQBytesLinearFunction(QuantizedLinearFunction): method forward (line 70) | def forward(ctx, input, other, bias=None): class WeightQBytesTensor (line 85) | class WeightQBytesTensor(QBytesTensor): method create (line 87) | def create( method __new__ (line 146) | def __new__(cls, qtype, axis, size, stride, data, scale, activation_qt... method __init__ (line 152) | def __init__(self, qtype, axis, size, stride, data, scale, activation_... method quantize (line 157) | def quantize( method load_from_state_dict (line 169) | def load_from_state_dict(state_dict, prefix, qtype, axis, size, stride... method optimize (line 191) | def optimize(self): method save_to_state_dict (line 211) | def save_to_state_dict(self, destination, prefix, keep_vars): method weight_qbytes_tensor (line 218) | def weight_qbytes_tensor(self): method __tensor_flatten__ (line 225) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 237) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 250) | def __torch_function__(cls, func, types, args=(), kwargs=None): method __torch_dispatch__ (line 277) | def __torch_dispatch__(cls, op, types, args, kwargs=None): FILE: optimum/quanto/tensor/weights/quantization.py function quantize_weight (line 27) | def quantize_weight( FILE: optimum/quanto/tensor/weights/reordering.py function reorder (line 23) | def reorder(t: torch.Tensor, permutation: Union[torch.Tensor, List[int]]): function reverse (line 38) | def reverse(permutation: Union[torch.Tensor, List[int]]): FILE: optimum/quanto/tensor/weights/tinygemm/packed.py class TinyGemmPackedTensor (line 25) | class TinyGemmPackedTensor(torch.Tensor): method __new__ (line 27) | def __new__(cls, data, size, stride, requires_grad=False): method __init__ (line 34) | def __init__(self, data, size, stride, requires_grad=False): method __repr__ (line 37) | def __repr__(self): method pack (line 41) | def pack(cls, t): method unpack (line 66) | def unpack(self): method dtype (line 98) | def dtype(self): method __tensor_flatten__ (line 101) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 111) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_dispatch__ (line 123) | def __torch_dispatch__(cls, op, types, args, kwargs=None): method numpy (line 147) | def numpy(self): FILE: optimum/quanto/tensor/weights/tinygemm/qbits.py class TinyGemmQBitsDequantizer (line 30) | class TinyGemmQBitsDequantizer(Function): method forward (line 32) | def forward(ctx, t): method backward (line 38) | def backward(ctx, gO): class TinyGemmQBitsLinearFunction (line 42) | class TinyGemmQBitsLinearFunction(QuantizedLinearFunction): method forward (line 44) | def forward(ctx, input, other, bias): class TinyGemmWeightQBitsTensor (line 65) | class TinyGemmWeightQBitsTensor(WeightQBitsTensor): method __new__ (line 67) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale_sh... method __init__ (line 82) | def __init__(self, qtype, axis, group_size, size, stride, data, scale_... method dequantize (line 111) | def dequantize(self): method weight_qbits_tensor (line 114) | def weight_qbits_tensor(self): method __tensor_flatten__ (line 130) | def __tensor_flatten__(self): method __tensor_unflatten__ (line 143) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride): method __torch_function__ (line 156) | def __torch_function__(cls, func, types, args=(), kwargs=None): FILE: tests/cli/test_quantize_cli.py function test_export_decoder_cli (line 26) | def test_export_decoder_cli(weights): FILE: tests/conftest.py function device (line 29) | def device(request): function pytest_configure (line 33) | def pytest_configure(config): function pytest_runtest_call (line 38) | def pytest_runtest_call(item): FILE: tests/helpers.py function torch_min_version (line 33) | def torch_min_version(v): function device_eq (line 46) | def device_eq(a, b): function random_tensor (line 54) | def random_tensor(shape, dtype=torch.float32, device="cpu"): function random_qactivation (line 65) | def random_qactivation(shape, qtype=qint8, dtype=torch.float32, device="... function random_qweight (line 71) | def random_qweight(shape, qtype, dtype=torch.float32, axis=0, group_size... function assert_similar (line 85) | def assert_similar(a, b, atol=None, rtol=None): function get_device_memory (line 102) | def get_device_memory(device): FILE: tests/library/test_extensions.py function _is_xpu_available (line 10) | def _is_xpu_available(): function test_extension_available (line 32) | def test_extension_available(extension_name): function test_extension_compilation (line 37) | def test_extension_compilation(extension_name): FILE: tests/library/test_mm.py function test_qbytes_mm (line 35) | def test_qbytes_mm(batch_size, input_features, input_dtype, weight_dtype... function test_gemm_fp16_int4 (line 59) | def test_gemm_fp16_int4(batch_size, tokens, in_features, out_features): function test_fp8_marlin (line 112) | def test_fp8_marlin(tokens, in_features, out_features, dtype): function test_gemm_marlin_fp16_int4 (line 155) | def test_gemm_marlin_fp16_int4(batch_size, tokens, in_features, out_feat... FILE: tests/library/test_quantize.py function test_symmetric_quantize_int (line 41) | def test_symmetric_quantize_int(input_shape, dtype, qtype, axis, device): function test_symmetric_quantize_float8 (line 63) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, axis, devi... function test_affine_quantize (line 78) | def test_affine_quantize(input_shape, dtype, qtype, axis, group_size, sh... function test_affine_quantize_integer_tensor (line 107) | def test_affine_quantize_integer_tensor(dtype, qtype, device): FILE: tests/library/test_unpack.py function test_unpack (line 24) | def test_unpack(bits, shape, device): FILE: tests/models/conftest.py function staging (line 6) | def staging(): function skip_if_staging (line 25) | def skip_if_staging(request): FILE: tests/models/test_quantized_model_for_causal_lm.py function quantized_model_for_causal_lm (line 11) | def quantized_model_for_causal_lm(model_id, qtype, exclude, from_config=... function compare_models (line 49) | def compare_models(a_model, b_model): function test_quantized_model_for_causal_lm_base (line 79) | def test_quantized_model_for_causal_lm_base(model_id, qtype, exclude_lm_... function test_quantized_model_for_causal_lm_sharded (line 92) | def test_quantized_model_for_causal_lm_sharded(): function test_causal_lm_base_push_to_hub (line 107) | def test_causal_lm_base_push_to_hub(staging, in_org): function test_quantized_model_load_state_dict_non_strict (line 134) | def test_quantized_model_load_state_dict_non_strict(model_id, qtype): FILE: tests/models/test_quantized_model_for_pixart.py function quantized_model_for_pixart (line 11) | def quantized_model_for_pixart(qtype, exclude): function compare_models (line 40) | def compare_models(a_model, b_model): function test_quantized_model_for_pixart (line 80) | def test_quantized_model_for_pixart(qtype, exclude_proj_out): function test_push_to_hub (line 94) | def test_push_to_hub(staging, in_org): FILE: tests/nn/test_calibrate.py function _test_calibrate_qlinear (line 23) | def _test_calibrate_qlinear(batch_size, tokens, embeddings, use_bias, ac... function test_calibrate_qlinear_activations_int8 (line 45) | def test_calibrate_qlinear_activations_int8(batch_size, tokens, embeddin... function test_calibrate_qlinear_activations_float8 (line 58) | def test_calibrate_qlinear_activations_float8(batch_size, tokens, embedd... function _test_calibrate_custom_module (line 62) | def _test_calibrate_custom_module(activations, device): function test_calibrate_custom_module_activations_int8 (line 88) | def test_calibrate_custom_module_activations_int8(device): function test_calibrate_custom_module_activations_float8 (line 98) | def test_calibrate_custom_module_activations_float8(activations, device): FILE: tests/nn/test_qattention.py class RotaryEmbedding (line 27) | class RotaryEmbedding(nn.Module): method __init__ (line 28) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi... method _set_cos_sin_cache (line 42) | def _set_cos_sin_cache(self, seq_len, device, dtype): method forward (line 52) | def forward(self, x, seq_len=None): function rotate_half (line 63) | def rotate_half(x): function apply_rotary_pos_emb (line 70) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): function repeat_kv (line 98) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: class Attention (line 110) | class Attention(nn.Module): method __init__ (line 113) | def __init__(self, hidden_size=128, num_heads=4, max_position_embeddin... method _shape (line 130) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): method forward (line 133) | def forward( function _test_quantize_attention (line 174) | def _test_quantize_attention(device, dtype=torch.float32, weights=qint8,... function test_quantize_attention_weights_only (line 193) | def test_quantize_attention_weights_only(weights, device): function test_quantize_attention_weights_only_float8 (line 198) | def test_quantize_attention_weights_only_float8(device): function test_quantize_attention_activations_int8 (line 203) | def test_quantize_attention_activations_int8(weights, device): function test_quantize_attention_activations_float8 (line 214) | def test_quantize_attention_activations_float8(weights, activations, dev... FILE: tests/nn/test_qconv2d.py function _test_quantize_conv2d (line 31) | def _test_quantize_conv2d(batch_size, img_shape, out_channels, use_bias,... function test_quantize_conv2d_float16_activations_int8 (line 59) | def test_quantize_conv2d_float16_activations_int8(batch_size, img_shape,... function test_quantize_conv2d_float32_activations_int8 (line 68) | def test_quantize_conv2d_float32_activations_int8(batch_size, img_shape,... function test_quantize_conv2d_float16_activations_float8 (line 83) | def test_quantize_conv2d_float16_activations_float8( function test_quantize_conv2d_float32_activations_float8 (line 100) | def test_quantize_conv2d_float32_activations_float8( function test_quantize_conv2d_float16_weight_only (line 111) | def test_quantize_conv2d_float16_weight_only(batch_size, img_shape, out_... function test_quantize_conv2d_float32_weight_only (line 120) | def test_quantize_conv2d_float32_weight_only(batch_size, img_shape, out_... function test_qconv2d_gradient (line 128) | def test_qconv2d_gradient(img_shape, out_channels, activations, weights,... FILE: tests/nn/test_qlayernorm.py function _test_quantize_layernorm (line 23) | def _test_quantize_layernorm(batch_size, tokens, embeddings, affine, dty... function test_quantize_layernorm_float16_activations_int8 (line 47) | def test_quantize_layernorm_float16_activations_int8(batch_size, tokens,... function test_quantize_layernorm_float32_activations_int8 (line 54) | def test_quantize_layernorm_float32_activations_int8(batch_size, tokens,... function test_quantize_layernorm_float16_activations_float8 (line 67) | def test_quantize_layernorm_float16_activations_float8(batch_size, token... function test_quantize_layernorm_float32_activations_float8 (line 80) | def test_quantize_layernorm_float32_activations_float8(batch_size, token... function test_quantize_layernom_no_activation (line 84) | def test_quantize_layernom_no_activation(): FILE: tests/nn/test_qlinear.py function _test_quantize_linear (line 37) | def _test_quantize_linear(batch_size, tokens, embeddings, use_bias, weig... function test_quantize_linear_float16_activations_int8 (line 65) | def test_quantize_linear_float16_activations_int8(batch_size, tokens, em... function test_quantize_linear_float32_activations_int8 (line 73) | def test_quantize_linear_float32_activations_int8(batch_size, tokens, em... function test_quantize_linear_float16_activations_float8 (line 90) | def test_quantize_linear_float16_activations_float8( function test_quantize_linear_float32_activations_float8 (line 107) | def test_quantize_linear_float32_activations_float8( function test_quantize_linear_float16_weight_only (line 120) | def test_quantize_linear_float16_weight_only(batch_size, tokens, embeddi... function test_quantize_linear_float32_weight_only (line 134) | def test_quantize_linear_float32_weight_only(batch_size, tokens, embeddi... function test_qlinear_gradient (line 141) | def test_qlinear_gradient(tokens, embeddings, activations, weights, devi... function test_move_qlinear (line 182) | def test_move_qlinear(dtype, use_bias, weights, device): function test_qlinear_serialization (line 200) | def test_qlinear_serialization(features, use_bias, activations, weights,... FILE: tests/nn/test_qmodule.py function test_qmodule_freeze (line 26) | def test_qmodule_freeze(in_features, out_features, use_bias, dtype): function test_qmodule_qtype_as_string (line 50) | def test_qmodule_qtype_as_string(weights, activations): FILE: tests/quantize/test_quantize_mlp.py class MLP (line 40) | class MLP(torch.nn.Module): method __init__ (line 41) | def __init__(self, input_size, output_size, hidden_size): method forward (line 47) | def forward(self, inputs): function check_mlp (line 53) | def check_mlp(model, frozen): function _test_quantize_mlp (line 63) | def _test_quantize_mlp(weights, activations, optimizer, frozen, device, ... function test_quantize_mlp_weights_only (line 85) | def test_quantize_mlp_weights_only(weights, frozen, device): function test_quantize_mlp_weights_only_float8 (line 92) | def test_quantize_mlp_weights_only_float8(weights, frozen, device): function test_quantize_mlp_int8_activations (line 99) | def test_quantize_mlp_int8_activations(weights, frozen, device): function test_quantize_mlp_float8_activations (line 111) | def test_quantize_mlp_float8_activations(weights, activations, frozen, d... function test_quantized_mlp_device_memory (line 120) | def test_quantized_mlp_device_memory(weights, dtype, weights_only, device): function test_quantize_mlp_weights_only_optimizers (line 140) | def test_quantize_mlp_weights_only_optimizers(weights, optimizer, frozen... function test_quantize_mlp_wrong_optimizer (line 148) | def test_quantize_mlp_wrong_optimizer(weights, optimizer, device): FILE: tests/quantize/test_quantize_patterns.py class MLP (line 25) | class MLP(torch.nn.Module): method __init__ (line 26) | def __init__(self, input_size, output_size, hidden_size): method forward (line 32) | def forward(self, inputs): class ClassificationModel (line 38) | class ClassificationModel(torch.nn.Module): method __init__ (line 39) | def __init__(self, input_size, output_size, hidden_size, classes): method forward (line 44) | def forward(self, inputs): function has_children (line 49) | def has_children(module: torch.nn.Module): function leaf_module_names (line 53) | def leaf_module_names(module: torch.nn.Module): function parent_module_names (line 57) | def parent_module_names(module: torch.nn.Module): function test_quantize_mlp_include_explicit_layers (line 61) | def test_quantize_mlp_include_explicit_layers(): function test_quantize_mlp_exclude_explicit_layers (line 74) | def test_quantize_mlp_exclude_explicit_layers(): function test_quantize_mlp_include_layer_patterns (line 87) | def test_quantize_mlp_include_layer_patterns(): function test_quantize_mlp_exclude_layer_patterns (line 100) | def test_quantize_mlp_exclude_layer_patterns(): FILE: tests/quantize/test_requantize.py function save_and_reload_state_dict (line 28) | def save_and_reload_state_dict(state_dict, serialization): function test_requantize_serialized_model (line 50) | def test_requantize_serialized_model( function test_requantized_model_device_memory (line 78) | def test_requantized_model_device_memory(weights, dtype, serialization, ... FILE: tests/tensor/activations/test_activations_compile.py function compile_for_device (line 22) | def compile_for_device(f, device): function test_compile_quantize_tensor (line 34) | def test_compile_quantize_tensor(input_shape, qtype, dtype, device): function test_compile_qtensor_to (line 51) | def test_compile_qtensor_to(device): FILE: tests/tensor/activations/test_activations_dispatch.py function test_qactivation_mul_scalar (line 24) | def test_qactivation_mul_scalar(input_shape, scalar, device): function test_qactivation_relu (line 40) | def test_qactivation_relu(batch_size, tokens, embeddings, device): function test_qactivation_softmax (line 49) | def test_qactivation_softmax(batch_size, tokens, embeddings, device): function test_qactivation_view (line 58) | def test_qactivation_view(input_shape, device): function test_qactivation_cat (line 65) | def test_qactivation_cat(input_shape, device): function test_qactivation_transpose_2d (line 75) | def test_qactivation_transpose_2d(device): function test_qactivation_transpose (line 84) | def test_qactivation_transpose(device): FILE: tests/tensor/activations/test_activations_quantize.py function test_symmetric_quantize_int (line 33) | def test_symmetric_quantize_int(input_shape, dtype, qtype, device): function test_symmetric_quantize_float8 (line 52) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, device): FILE: tests/tensor/ops/test_linear_dispatch.py function test_qactivation_qweight_linear (line 28) | def test_qactivation_qweight_linear( function test_linear_fp16_int4 (line 48) | def test_linear_fp16_int4(batch_size, tokens, embeddings, use_bias, devi... function test_linear_bf16_int4 (line 63) | def test_linear_bf16_int4(batch_size, tokens, embeddings, use_bias, devi... FILE: tests/tensor/ops/test_mm_dispatch.py function test_qactivation_qweight_matmul (line 26) | def test_qactivation_qweight_matmul(dtype, in_features, hidden, out_feat... function test_qactivation_qactivation_bmm (line 38) | def test_qactivation_qactivation_bmm(dtype, batch_size, a_shape, b_shape... FILE: tests/tensor/optimizers/test_hqq_optimizer.py function compare_quantized_tensor (line 28) | def compare_quantized_tensor(a, qtype, axis, group_size, scale, shift): function test_hqq_optimizer (line 42) | def test_hqq_optimizer(input_shape, dtype, qtype, axis, group_size, devi... FILE: tests/tensor/test_absmax.py function test_absmax_scale (line 26) | def test_absmax_scale(input_shape, axis, dtype, qtype, device): FILE: tests/tensor/test_packed_tensor.py function test_pack_tensor (line 26) | def test_pack_tensor(shape, bits, device): function test_packed_tensor_serialization (line 39) | def test_packed_tensor_serialization(bits, device): FILE: tests/tensor/weights/optimized/test_awq_packed_tensor.py function test_pack_awq_tensor (line 30) | def test_pack_awq_tensor(in_features, out_features, random, packing, reo... function test_move_awq_tensor (line 51) | def test_move_awq_tensor(packing, reorder, device): FILE: tests/tensor/weights/optimized/test_awq_weight_qbits_tensor.py function test_awq_weight_qbits_tensor_from_qbits_tensor (line 30) | def test_awq_weight_qbits_tensor_from_qbits_tensor(in_features, out_feat... function test_awq_weight_qbits_tensor_move (line 66) | def test_awq_weight_qbits_tensor_move(device): function _test_awq_weight_qbits_tensor_linear (line 94) | def _test_awq_weight_qbits_tensor_linear( function test_awq_weight_qbits_tensor_linear (line 124) | def test_awq_weight_qbits_tensor_linear(batch_size, tokens, in_features,... FILE: tests/tensor/weights/optimized/test_marlin_fp8_packed_tensor.py function get_fp8_tensor (line 25) | def get_fp8_tensor(shape, device, random=False): function test_pack_marlin_fp8_tensor (line 44) | def test_pack_marlin_fp8_tensor(in_features, out_features, random): function test_move_marlin_fp8_tensor (line 55) | def test_move_marlin_fp8_tensor(): FILE: tests/tensor/weights/optimized/test_marlin_int4_packed_tensor.py function get_uint4_tensor (line 24) | def get_uint4_tensor(shape, device, random=False): function test_pack_marlin_int4_tensor (line 39) | def test_pack_marlin_int4_tensor(in_features, out_features, random): function test_move_marlin_int4_packed_tensor (line 50) | def test_move_marlin_int4_packed_tensor(device): FILE: tests/tensor/weights/optimized/test_marlin_int4_weight_qbits_tensor.py function test_marlin_int4_weight_qbits_tensor_from_qbits_tensor (line 31) | def test_marlin_int4_weight_qbits_tensor_from_qbits_tensor(in_features, ... function test_marlin_int4_weight_qbits_tensor_move (line 67) | def test_marlin_int4_weight_qbits_tensor_move(device): function _test_marlin_int4_weight_qbits_tensor_linear (line 96) | def _test_marlin_int4_weight_qbits_tensor_linear( function test_marlin_int4_weight_qbits_tensor_linear (line 125) | def test_marlin_int4_weight_qbits_tensor_linear(batch_size, tokens, in_f... function test_marlin_int4_weight_qbits_tensor_linear_failing (line 144) | def test_marlin_int4_weight_qbits_tensor_linear_failing(batch_size, toke... FILE: tests/tensor/weights/optimized/test_marlin_qbytes_tensor.py function test_pack_unpack (line 29) | def test_pack_unpack(in_features: int, out_features: int): FILE: tests/tensor/weights/optimized/test_tinygemm_packed_tensor.py function test_pack_tinygemm_tensor (line 29) | def test_pack_tinygemm_tensor(in_features, out_features, random, device): function test_move_tinygemm_packed_tensor (line 53) | def test_move_tinygemm_packed_tensor(device): FILE: tests/tensor/weights/optimized/test_tinygemm_weight_qbits_tensor.py function test_tinygemm_weight_qbits_tensor_from_qbits_tensor (line 28) | def test_tinygemm_weight_qbits_tensor_from_qbits_tensor(in_features, out... function test_tinygemm_weight_qbits_tensor_move (line 71) | def test_tinygemm_weight_qbits_tensor_move(device): function test_tinygemm_weight_qbits_tensor_linear (line 101) | def test_tinygemm_weight_qbits_tensor_linear(batch_size, tokens, embeddi... FILE: tests/tensor/weights/test_weight_qbits_tensor.py function test_weight_qbits_tensor_serialization (line 26) | def test_weight_qbits_tensor_serialization(qtype, axis): function test_weight_qbits_tensor_requires_grad (line 43) | def test_weight_qbits_tensor_requires_grad(qtype, axis, group_size, devi... function test_weight_qbits_tensor_backward (line 54) | def test_weight_qbits_tensor_backward(qtype, axis, group_size, device): FILE: tests/tensor/weights/test_weight_qbits_tensor_dispatch.py function test_qbitstensor_to_device (line 25) | def test_qbitstensor_to_device(dtype, group_size, device): function test_qbitstensor_detach (line 45) | def test_qbitstensor_detach(): function test_qbitstensor_equal (line 54) | def test_qbitstensor_equal(dtype, qtype, axis, device): function test_weight_qbits_tensor_linear (line 68) | def test_weight_qbits_tensor_linear(dtype, batch_size, tokens, in_featur... function test_weight_qbits_tensor_linear_gpu (line 82) | def test_weight_qbits_tensor_linear_gpu(dtype, batch_size, tokens, in_fe... FILE: tests/tensor/weights/test_weight_qbits_tensor_instantiate.py function random_data_scale_shift (line 23) | def random_data_scale_shift(input_shape, dtype, qtype, axis, group_size): function test_weight_qbits_tensor_instantiate (line 40) | def test_weight_qbits_tensor_instantiate(input_shape, dtype, qtype, axis... function test_weight_qbits_tensor_equal (line 56) | def test_weight_qbits_tensor_equal(input_shape, dtype, qtype, axis, grou... FILE: tests/tensor/weights/test_weight_qbits_tensor_quantize.py function test_weight_qbits_tensor_quantize (line 33) | def test_weight_qbits_tensor_quantize(input_shape, dtype, qtype, axis, g... function test_weight_qbits_tensor_quantize_integer_tensor (line 58) | def test_weight_qbits_tensor_quantize_integer_tensor(dtype, qtype, device): FILE: tests/tensor/weights/test_weight_qbytes_tensor_backward.py function test_weight_qbytes_tensor_requires_grad (line 22) | def test_weight_qbytes_tensor_requires_grad(device): function test_weight_qbytes_tensor_backward (line 30) | def test_weight_qbytes_tensor_backward(device): function test_weight_qbytes_tensor_chained_backward (line 41) | def test_weight_qbytes_tensor_chained_backward(device): FILE: tests/tensor/weights/test_weight_qbytes_tensor_dispatch.py function test_weight_qytes_tensor_to_device (line 8) | def test_weight_qytes_tensor_to_device(device): function test_weight_qbytes_tensor_equal (line 20) | def test_weight_qbytes_tensor_equal(dtype, qtype, axis, device): function test_weight_qbytes_tensor_transpose_contiguous (line 30) | def test_weight_qbytes_tensor_transpose_contiguous(axis, qtype, device): function test_weight_qbytes_tensor_transposed_stride (line 43) | def test_weight_qbytes_tensor_transposed_stride(axis, qtype, device): FILE: tests/tensor/weights/test_weight_qbytes_tensor_instantiate.py function random_data_scale (line 22) | def random_data_scale(input_shape, dtype, qtype): function test_qbytestensor_instantiate (line 37) | def test_qbytestensor_instantiate(input_shape, dtype, qtype, device): function test_qbytestensor_equal (line 53) | def test_qbytestensor_equal(input_shape, dtype, qtype, device): FILE: tests/tensor/weights/test_weight_qbytes_tensor_quantize.py function test_symmetric_quantize_int (line 38) | def test_symmetric_quantize_int(input_shape, dtype, qtype, axis, device): function test_symmetric_quantize_float8 (line 62) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, axis, devi... function test_quantize_weight_axis_dim_1 (line 74) | def test_quantize_weight_axis_dim_1(axis, device): FILE: tests/tensor/weights/test_weight_qbytes_tensor_serialization.py function test_weights_qbytes_tensor_serialization (line 28) | def test_weights_qbytes_tensor_serialization(input_shape, qtype, dtype, ... FILE: tests/tensor/weights/weight_helpers.py function check_weight_qtensor_linear (line 19) | def check_weight_qtensor_linear(qweight, batch_size, tokens, use_bias, r...