SYMBOL INDEX (641 symbols across 138 files)

FILE: bench/generation/evaluate_configurations.py
  function evaluate_model_configurations (line 26) | def evaluate_model_configurations(
  function main (line 64) | def main():

FILE: bench/generation/evaluate_model.py
  function calibrate (line 36) | def calibrate(model, tokenizer, batch_size, batches):
  function evaluate (line 51) | def evaluate(
  function main (line 86) | def main():

FILE: bench/generation/gen_barchart.py
  function save_bar_chart (line 23) | def save_bar_chart(title, labels, ylabel, series, save_path):
  function gen_barchart (line 50) | def gen_barchart(model_id, title, label, results, dtype):
  function main (line 76) | def main():

FILE: bench/generation/metrics/latency.py
  function latency (line 24) | def latency(model, tokenizer, device, batch_size=1, prompt_length=512, n...
  function get_device_memory (line 108) | def get_device_memory(device):

FILE: bench/generation/metrics/perplexity.py
  class Perplexity (line 23) | class Perplexity:
    method __init__ (line 28) | def __init__(self, model, tokenizer, dataset_path="wikitext", dataset_...
    method _prepare_data (line 55) | def _prepare_data(self):
    method softmax (line 74) | def softmax(logits):
    method calculate_perplexity (line 91) | def calculate_perplexity(self, n_ctx=512, n_batch=512):
    method _process_batch (line 128) | def _process_batch(self, i, n_ctx, n_batch, tokens, nll, count):
    method _compute_batch_logits (line 197) | def _compute_batch_logits(self, tokens, batch_start, batch_size):
  function perplexity (line 221) | def perplexity(

FILE: bench/generation/metrics/prediction.py
  function prediction_accuracy (line 22) | def prediction_accuracy(model, tokenizer, batch_size, samples=None):

FILE: bench/generation/setup/awq.py
  function prepare_inputs_for_generation (line 19) | def prepare_inputs_for_generation(input_ids, past_key_values=None, atten...
  function setup (line 69) | def setup(model_id: str, weights: str, activations: str, group_size: int...

FILE: bench/generation/setup/bnb.py
  function setup (line 19) | def setup(

FILE: bench/generation/setup/hqq.py
  function setup (line 21) | def setup(model_id: str, weights: str, activations: str, device: torch.d...

FILE: bench/generation/setup/quanto.py
  function calibrate (line 25) | def calibrate(model, tokenizer, batch_size, batches):
  function setup (line 40) | def setup(
  function keyword_to_qtype (line 71) | def keyword_to_qtype(k):

FILE: bench/kernels/benchmark.py
  function get_unpack_bench (line 26) | def get_unpack_bench(bits, device):
  function timing (line 36) | def timing(get_bench_func, device, iterations=10):
  function main (line 95) | def main():

FILE: bench/kernels/benchmark_marlin_fp8.py
  function run_benchmark (line 28) | def run_benchmark(
  function shape_generator (line 132) | def shape_generator():
  function shape_generator (line 137) | def shape_generator():

FILE: bench/kernels/benchmark_w4a16.py
  function benchmark (line 12) | def benchmark(f, warmup=1, iter=10):
  function get_problem (line 28) | def get_problem(m, n, k, groupsize=128):
  function benchmark_dense (line 44) | def benchmark_dense(A, B, m, n, k):
  function benchmark_awq (line 53) | def benchmark_awq(A, B, s, sz, m, n, k):
  function benchmark_marlin (line 64) | def benchmark_marlin(A, B, s, sz, m, n, k):
  function run_benchmark (line 87) | def run_benchmark(model, tokens=None):
  function main (line 130) | def main():

FILE: bench/torch_kernels/test_int_mm.py
  function main (line 21) | def main():

FILE: bench/torch_kernels/test_int_mm_inductor.py
  function mm (line 20) | def mm(a, b):

FILE: bench/torch_kernels/test_weight_int4pack_mm.py
  function _group_quantize_tensor (line 21) | def _group_quantize_tensor(w, n_bit=4, q_group_size=16):
  function main (line 64) | def main():

FILE: bench/torch_kernels/test_weight_int8pack_mm.py
  function main (line 21) | def main():

FILE: examples/nlp/text-classification/sst2/quantize_sst2_model.py
  function evaluate_model (line 28) | def evaluate_model(model, tokenizer, dataset, device, batch_size):
  function keyword_to_itype (line 38) | def keyword_to_itype(k):
  function main (line 42) | def main():

FILE: examples/nlp/text-generation/quantize_causal_lm_model.py
  function generate (line 26) | def generate(model, tokenizer, device, prompt, max_new_tokens):
  function calibrate (line 43) | def calibrate(model, tokenizer, dataset, device, batch_size, samples=None):
  function keyword_to_itype (line 56) | def keyword_to_itype(k):
  function main (line 65) | def main():

FILE: examples/speech/speech_recognition/quantize_asr_model.py
  function map_to_feats (line 31) | def map_to_feats(batch, processor):
  function transcribe_batch (line 42) | def transcribe_batch(batch, model, processor):
  function evaluate_model (line 51) | def evaluate_model(model, processor, dataset, metric: evaluate.Evaluatio...
  function keyword_to_itype (line 61) | def keyword_to_itype(k):
  function main (line 65) | def main():

FILE: examples/vision/StableDiffusion/quantize_StableDiffusion.py
  function load_pipeline (line 25) | def load_pipeline(torch_dtype, unet_dtype=None, device="cpu"):
  function run_inference (line 36) | def run_inference(pipe, batch_size=1):
  function benchmark_fn (line 45) | def benchmark_fn(f, *args, **kwargs):
  function bytes_to_giga_bytes (line 50) | def bytes_to_giga_bytes(bytes):
  function get_device_memory (line 54) | def get_device_memory(device):

FILE: examples/vision/image-classification/mnist/quantize_mnist_model.py
  function test (line 39) | def test(model, device, test_loader):
  function train (line 65) | def train(log_interval, model, device, train_loader, optimizer, epoch):
  function keyword_to_itype (line 89) | def keyword_to_itype(k):
  function main (line 93) | def main():

FILE: examples/vision/image-classification/pets/quantize_vit_model.py
  function test (line 29) | def test(model, device, test_loader):
  function keyword_to_itype (line 56) | def keyword_to_itype(k):
  function main (line 60) | def main():

FILE: examples/vision/object-detection/quantize_owl_model.py
  function detect (line 14) | def detect(model, processor, image, texts):
  function get_device_memory (line 52) | def get_device_memory(device):
  function keyword_to_qtype (line 66) | def keyword_to_qtype(k):
  function main (line 70) | def main():

FILE: examples/vision/text-to-image/quantize_pixart_sigma.py
  function load_pipeline (line 21) | def load_pipeline(model_id, torch_dtype, qtype=None, device="cpu"):
  function get_device_memory (line 34) | def get_device_memory(device):

FILE: external/awq/conftest.py
  function device (line 27) | def device(request):
  function pytest_configure (line 31) | def pytest_configure(config):
  function pytest_runtest_call (line 36) | def pytest_runtest_call(item):

FILE: external/awq/pack_intweight.py
  function pack_intweight (line 25) | def pack_intweight(unpacked_qweight, interleave, kstride):

FILE: external/awq/packing_utils.py
  function pack_awq (line 8) | def pack_awq(intweight: torch.Tensor, reorder=False):
  function unpack_awq (line 23) | def unpack_awq(qweight: torch.Tensor, bits: int):
  function reverse_awq_order (line 35) | def reverse_awq_order(iweights: torch.Tensor, bits: int):
  function pack_exllama (line 50) | def pack_exllama(iweights: torch.Tensor, izeros: torch.Tensor, bits: int):
  function unpack_reorder_pack (line 72) | def unpack_reorder_pack(qweight, qzeros, bits):
  function dequantize_gemm (line 91) | def dequantize_gemm(qweight, qzeros, scales, bits, group_size):

FILE: external/awq/test_awq_kernels.py
  function assert_similar (line 21) | def assert_similar(a, b, atol=None, rtol=None):
  function test_standalone_kernel (line 41) | def test_standalone_kernel(in_features, out_features, kernel):
  function test_integrated_kernel (line 103) | def test_integrated_kernel(in_features, out_features, kernel):

FILE: external/awq/test_awq_packing.py
  function test_awq_pack (line 28) | def test_awq_pack(in_features, out_features, reorder, random):
  function test_awq_pack_v2 (line 64) | def test_awq_pack_v2(in_features, out_features, random):

FILE: external/awq/test_awq_quantize.py
  function awq_quantize (line 7) | def awq_quantize(base, scales, zeros, group_size):
  function test_awq_quantize (line 24) | def test_awq_quantize(in_features, out_features):

FILE: external/smoothquant/smoothquant.py
  function get_act_scales (line 16) | def get_act_scales(model, tokenizer, dataset, num_samples=512, seq_len=5...
  function smooth_ln_fcs (line 53) | def smooth_ln_fcs(ln, fcs, act_scales, alpha=0.5):
  function smooth_lm (line 77) | def smooth_lm(model, scales, alpha=0.5):
  function main (line 111) | def main():

FILE: optimum/quanto/calibrate.py
  function _updated_scale (line 31) | def _updated_scale(scale, new_scale, momentum):
  function absmax_scale (line 37) | def absmax_scale(base: torch.Tensor, qtype: qtype = qint8, axis: Optiona...
  class Calibration (line 64) | class Calibration(TorchFunctionMode):
    method __init__ (line 81) | def __init__(self, *args, momentum: float = 0.9, streamline=True, debu...
    method __torch_function__ (line 90) | def __torch_function__(self, func, types, args=(), kwargs=None):
    method __enter__ (line 107) | def __enter__(self):
    method __exit__ (line 112) | def __exit__(self, exc_type, exc_val, exc_tb):
    method calibrate_input (line 120) | def calibrate_input(self, module: torch.nn.Module, input, momentum: fl...
    method calibrate_output (line 139) | def calibrate_output(
    method tag_outputs (line 176) | def tag_outputs(

FILE: optimum/quanto/library/extensions/__init__.py
  function _is_xpu_available (line 34) | def _is_xpu_available():

FILE: optimum/quanto/library/extensions/cpp/__init__.py
  function unpack_cpp (line 35) | def unpack_cpp(t: torch.Tensor, bits: int):

FILE: optimum/quanto/library/extensions/cpp/pybind_module.cpp
  function PYBIND11_MODULE (line 24) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: optimum/quanto/library/extensions/cpp/unpack.cpp
  function unpack_4bit (line 19) | static torch::Tensor unpack_4bit(torch::Tensor &t) {
  function unpack_2bit (line 27) | static torch::Tensor unpack_2bit(torch::Tensor &t) {
  function unpack (line 37) | torch::Tensor unpack(torch::Tensor &t, int bits) {

FILE: optimum/quanto/library/extensions/cuda/__init__.py
  function get_max_cuda_arch (line 25) | def get_max_cuda_arch():
  function unpack_cuda (line 78) | def unpack_cuda(t: torch.Tensor, bits: int):
  function gemm_f16i4_awq (line 98) | def gemm_f16i4_awq(
  function fp8_marlin_gemm (line 139) | def fp8_marlin_gemm(
  function gptq_marlin_repack (line 162) | def gptq_marlin_repack(
  function gemm_f16i4_marlin (line 177) | def gemm_f16i4_marlin(

FILE: optimum/quanto/library/extensions/cuda/awq/v2/semaphore.h
  function class (line 44) | class Semaphore

FILE: optimum/quanto/library/extensions/cuda/marlin/marlin_cuda.cpp
  function mul (line 28) | void mul(

FILE: optimum/quanto/library/extensions/cuda/pybind_module.cpp
  function PYBIND11_MODULE (line 30) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: optimum/quanto/library/extensions/extension.py
  class Extension (line 13) | class Extension(object):
    method __init__ (line 14) | def __init__(
    method lib (line 30) | def lib(self):
  function register_extension (line 60) | def register_extension(extension: Extension):
  function get_extension (line 65) | def get_extension(extension_type: str):
  function is_extension_available (line 77) | def is_extension_available(extension_type: str):

FILE: optimum/quanto/library/extensions/hip/__init__.py
  function unpack_hip (line 35) | def unpack_hip(t: torch.Tensor, bits: int):

FILE: optimum/quanto/library/extensions/hip/pybind_module.cpp
  function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: optimum/quanto/library/extensions/mps/__init__.py
  function unpack_mps (line 35) | def unpack_mps(t: torch.Tensor, bits: int):

FILE: optimum/quanto/library/extensions/mps/pybind_module.cpp
  function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: optimum/quanto/library/extensions/xpu/__init__.py
  function unpack_xpu (line 41) | def unpack_xpu(t: torch.Tensor, bits: int):
  function gemm_f16i4_awq (line 61) | def gemm_f16i4_awq(

FILE: optimum/quanto/library/extensions/xpu/pybind_module.cpp
  function PYBIND11_MODULE (line 25) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: optimum/quanto/library/qbytes_mm.py
  function qbytes_mm (line 25) | def qbytes_mm(activations: torch.Tensor, weights: torch.Tensor, output_s...
  function qbytes_int_mm (line 36) | def qbytes_int_mm(activations: torch.Tensor, weights: torch.Tensor, outp...
  function qbytes_int8pack_mm (line 53) | def qbytes_int8pack_mm(activations: torch.Tensor, weights: torch.Tensor,...
  function qbytes_mm_impl_default (line 67) | def qbytes_mm_impl_default(
  function qbytes_mm_impl_cuda (line 74) | def qbytes_mm_impl_cuda(activations: torch.Tensor, weights: torch.Tensor...
  function qbytes_mm_impl_cpu (line 92) | def qbytes_mm_impl_cpu(activations: torch.Tensor, weights: torch.Tensor,...
  function qbytes_mm_impl_mps (line 109) | def qbytes_mm_impl_mps(activations: torch.Tensor, weights: torch.Tensor,...

FILE: optimum/quanto/library/quantize.py
  function quantize_symmetric (line 28) | def quantize_symmetric(
  function quantize_affine (line 65) | def quantize_affine(

FILE: optimum/quanto/library/unpack.py
  function unpack (line 22) | def unpack(packed: torch.Tensor, bits: int) -> torch.Tensor:

FILE: optimum/quanto/models/__init__.py
  function is_transformers_available (line 21) | def is_transformers_available() -> bool:
  function is_diffusers_available (line 25) | def is_diffusers_available() -> bool:

FILE: optimum/quanto/models/diffusers_models.py
  class QuantizedDiffusersModel (line 44) | class QuantizedDiffusersModel(ModelHubMixin):
    method __init__ (line 48) | def __init__(self, model: ModelMixin):
    method __getattr__ (line 53) | def __getattr__(self, name: str) -> Any:
    method forward (line 61) | def forward(self, *args, **kwargs):
    method __call__ (line 64) | def __call__(self, *args, **kwargs):
    method _qmap_name (line 68) | def _qmap_name():
    method quantize (line 72) | def quantize(
    method from_pretrained (line 119) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os....
    method _save_pretrained (line 180) | def _save_pretrained(self, save_directory: Path) -> None:
  class QuantizedPixArtTransformer2DModel (line 189) | class QuantizedPixArtTransformer2DModel(QuantizedDiffusersModel):

FILE: optimum/quanto/models/shared_dict.py
  class ShardedStateDict (line 22) | class ShardedStateDict(Mapping):
    method __init__ (line 30) | def __init__(self, base_dir: str, tensor_index: Dict[str, str]):
    method __iter__ (line 35) | def __iter__(self):
    method __len__ (line 38) | def __len__(self):
    method __getitem__ (line 41) | def __getitem__(self, key: Any) -> Any:
    method __contains__ (line 49) | def __contains__(self, key: object) -> bool:
    method keys (line 52) | def keys(self):

FILE: optimum/quanto/models/transformers_models.py
  class QuantizedTransformersModel (line 38) | class QuantizedTransformersModel(ModelHubMixin):
    method __init__ (line 42) | def __init__(self, model: PreTrainedModel):
    method __getattr__ (line 47) | def __getattr__(self, name: str) -> Any:
    method forward (line 55) | def forward(self, *args, **kwargs):
    method __call__ (line 58) | def __call__(self, *args, **kwargs):
    method __repr__ (line 61) | def __repr__(self):
    method _qmap_name (line 65) | def _qmap_name():
    method quantize (line 69) | def quantize(
    method from_pretrained (line 115) | def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os....
    method _save_pretrained (line 165) | def _save_pretrained(self, save_directory: Path) -> None:
  class QuantizedModelForCausalLM (line 182) | class QuantizedModelForCausalLM(QuantizedTransformersModel):

FILE: optimum/quanto/nn/qconv2d.py
  class QConv2d (line 27) | class QConv2d(QModuleMixin, torch.nn.Conv2d):
    method qcreate (line 29) | def qcreate(
    method forward (line 54) | def forward(self, input: torch.Tensor) -> torch.Tensor:

FILE: optimum/quanto/nn/qlayernorm.py
  class QLayerNorm (line 27) | class QLayerNorm(QModuleMixin, torch.nn.LayerNorm):
    method qcreate (line 29) | def qcreate(
    method forward (line 52) | def forward(self, input: torch.Tensor) -> torch.Tensor:

FILE: optimum/quanto/nn/qlinear.py
  class QLinear (line 27) | class QLinear(QModuleMixin, torch.nn.Linear):
    method qcreate (line 29) | def qcreate(
    method forward (line 49) | def forward(self, input: torch.Tensor) -> torch.Tensor:

FILE: optimum/quanto/nn/qmodule.py
  function register_qmodule (line 44) | def register_qmodule(module_cls):
  function quantize_module (line 81) | def quantize_module(
  class QModuleMixin (line 94) | class QModuleMixin(ABC):
    method __init__ (line 95) | def __init__(
    method disable_output_quantization (line 143) | def disable_output_quantization(self):
    method _save_to_state_dict (line 147) | def _save_to_state_dict(self, destination, prefix, keep_vars):
    method _load_from_state_dict (line 161) | def _load_from_state_dict(
    method from_module (line 210) | def from_module(
    method qcreate (line 235) | def qcreate(
    method qweight (line 246) | def qweight(self):
    method qforward (line 281) | def qforward(self, input: torch.Tensor) -> torch.Tensor:
    method quantize_input (line 284) | def quantize_input(self, module: torch.nn.Module, input: torch.Tensor)...
    method quantize_output (line 296) | def quantize_output(
    method freeze (line 304) | def freeze(self):
    method frozen (line 311) | def frozen(self):

FILE: optimum/quanto/quantize.py
  function set_module_by_name (line 27) | def set_module_by_name(parent_module, name, child_module):
  function _quantize_submodule (line 37) | def _quantize_submodule(
  function quantize (line 55) | def quantize(
  function requantize (line 101) | def requantize(
  function freeze (line 143) | def freeze(model):
  function quantization_map (line 149) | def quantization_map(model: torch.nn.Module) -> Dict[str, Dict[str, str]]:

FILE: optimum/quanto/subpackage/commands/base.py
  class QuantoCommand (line 25) | class QuantoCommand(BaseOptimumCLICommand):

FILE: optimum/quanto/subpackage/commands/quantize.py
  function parse_quantize_args (line 32) | def parse_quantize_args(parser: "ArgumentParser"):
  class QuantizeCommand (line 95) | class QuantizeCommand(BaseOptimumCLICommand):
    method parse_args (line 97) | def parse_args(parser: "ArgumentParser"):
    method run (line 100) | def run(self):

FILE: optimum/quanto/tensor/activations/qbytes.py
  class ActivationQBytesQuantizer (line 28) | class ActivationQBytesQuantizer(Function):
    method forward (line 30) | def forward(ctx, base: torch.Tensor, qtype: qtype, scale: torch.Tensor...
    method backward (line 41) | def backward(ctx, gO):
  class ActivationQBytesTensor (line 46) | class ActivationQBytesTensor(QBytesTensor):
    method __new__ (line 48) | def __new__(cls, qtype, size, stride, data, scale, requires_grad=False):
    method __init__ (line 54) | def __init__(self, qtype, size, stride, data, scale, requires_grad=Fal...
    method quantize (line 58) | def quantize(cls, base: torch.Tensor, qtype: qtype, scale: torch.Tenso...
    method __tensor_flatten__ (line 61) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 71) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 82) | def __torch_dispatch__(cls, op, types, args, kwargs=None):

FILE: optimum/quanto/tensor/activations/qbytes_ops.py
  function register_qbytestensor_op (line 34) | def register_qbytestensor_op(aten_ops: List[Callable]):
  function get_qbytestensor_op_dispatch (line 52) | def get_qbytestensor_op_dispatch(aten_op):
  function is_scalar (line 56) | def is_scalar(t):
  function _to_copy (line 61) | def _to_copy(op, t, dtype=None, **kwargs):
  function detach (line 70) | def detach(op, t):
  function cat (line 78) | def cat(op, inputs, dim=0):
  function lt (line 97) | def lt(op, input, other):
  function clone (line 109) | def clone(op, t, memory_format=torch.preserve_format):
  function copy_ (line 121) | def copy_(op, dest, src):
  function div (line 129) | def div(op, input, other):
  function neg (line 137) | def neg(op, input, *args, **kwargs):
  function unary_type_agnostic_op (line 154) | def unary_type_agnostic_op(op, input, *args, **kwargs):
  function is_same_size (line 164) | def is_same_size(op, input, other):
  function cannot_mm (line 170) | def cannot_mm(t: QTensor):
  function bmm (line 176) | def bmm(op, input, other):
  function mul (line 190) | def mul(op, input, other):
  function relu (line 200) | def relu(op, input):
  function _softmax (line 209) | def _softmax(op, input, dim, half_to_float):
  function stack (line 219) | def stack(op, inputs, dim=0):
  function split (line 237) | def split(op, input, *args, **kwargs):
  function transpose (line 248) | def transpose(op, input, *args):
  function transpose2d (line 257) | def transpose2d(op, input):
  function view (line 268) | def view(op, input, *shape):
  function where (line 277) | def where(op, condition, input, other):

FILE: optimum/quanto/tensor/activations/quantization.py
  function quantize_activation (line 24) | def quantize_activation(t: torch.Tensor, qtype: qtype, scale: torch.Tens...

FILE: optimum/quanto/tensor/core.py
  function dtype_info (line 22) | def dtype_info(dtype):
  function axis_to_dim (line 27) | def axis_to_dim(t, axis):

FILE: optimum/quanto/tensor/function.py
  class QuantizedLinearFunction (line 21) | class QuantizedLinearFunction(torch.autograd.Function):
    method forward (line 42) | def forward(ctx, input, other, bias=None):
    method backward (line 49) | def backward(ctx, gO):

FILE: optimum/quanto/tensor/grouped.py
  function grouped_shape (line 10) | def grouped_shape(shape: List, axis: int, group_size: int) -> List:
  function group (line 17) | def group(base: torch.Tensor, axis: int, group_size: int):
  function ungroup (line 39) | def ungroup(grouped: torch.Tensor, axis: int, orig_shape: torch.Size):

FILE: optimum/quanto/tensor/optimizers/absmax_optimizer.py
  class AbsmaxOptimizer (line 26) | class AbsmaxOptimizer(SymmetricOptimizer):
    method optimize (line 27) | def optimize(

FILE: optimum/quanto/tensor/optimizers/affine_optimizer.py
  class AffineOptimizer (line 27) | class AffineOptimizer(Optimizer):
    method __call__ (line 28) | def __call__(
    method optimize (line 63) | def optimize(self, base: torch.Tensor, qtype: qtype, axis: int) -> Tup...

FILE: optimum/quanto/tensor/optimizers/hqq_optimizer.py
  function shrink_lp_op (line 28) | def shrink_lp_op(x: torch.Tensor, beta: float, lp_norm: float) -> torch....
  class HqqOptimizer (line 37) | class HqqOptimizer(MaxOptimizer):
    method __init__ (line 46) | def __init__(
    method optimize (line 60) | def optimize(

FILE: optimum/quanto/tensor/optimizers/max_optimizer.py
  class MaxOptimizer (line 26) | class MaxOptimizer(AffineOptimizer):
    method optimize (line 27) | def optimize(

FILE: optimum/quanto/tensor/optimizers/optimizer.py
  class Optimizer (line 24) | class Optimizer(ABC):
    method __call__ (line 25) | def __call__(

FILE: optimum/quanto/tensor/optimizers/symmetric_optimizer.py
  class SymmetricOptimizer (line 26) | class SymmetricOptimizer(Optimizer):
    method __call__ (line 27) | def __call__(self, base: torch.Tensor, qtype: qtype, axis: Optional[in...
    method optimize (line 37) | def optimize(self, base: torch.Tensor, qmax: float, axis: Optional[int...

FILE: optimum/quanto/tensor/packed.py
  function pack_weights (line 24) | def pack_weights(intweights: torch.Tensor, bits: int) -> torch.Tensor:
  class PackedTensor (line 72) | class PackedTensor(torch.Tensor):
    method __new__ (line 74) | def __new__(cls, data, bits, size, stride, requires_grad=False):
    method __init__ (line 82) | def __init__(self, data, bits, size, stride, requires_grad=False):
    method __repr__ (line 86) | def __repr__(self):
    method pack (line 93) | def pack(cls, t, bits=4):
    method unpack (line 101) | def unpack(self):
    method bits (line 107) | def bits(self):
    method dtype (line 111) | def dtype(self):
    method load_from_state_dict (line 115) | def load_from_state_dict(state_dict, prefix, bits, size, stride, missi...
    method __tensor_flatten__ (line 125) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 132) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 145) | def __torch_dispatch__(cls, op, types, args, kwargs=None):
    method numpy (line 162) | def numpy(self):

FILE: optimum/quanto/tensor/qbits.py
  class QBitsDequantizer (line 27) | class QBitsDequantizer(Function):
    method forward (line 29) | def forward(ctx, t):
    method backward (line 52) | def backward(ctx, gO):
  class QBitsTensor (line 56) | class QBitsTensor(QTensor):
    method __init__ (line 57) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,...
    method __repr__ (line 64) | def __repr__(self):
    method dequantize (line 67) | def dequantize(self):

FILE: optimum/quanto/tensor/qbytes.py
  class QBytesDequantizer (line 23) | class QBytesDequantizer(Function):
    method forward (line 25) | def forward(ctx, t):
    method backward (line 34) | def backward(ctx, gO):
  class QBytesTensor (line 39) | class QBytesTensor(QTensor):
    method __init__ (line 40) | def __init__(self, qtype, axis, size, stride, data, scale, requires_gr...
    method __repr__ (line 45) | def __repr__(self):
    method dequantize (line 48) | def dequantize(self):

FILE: optimum/quanto/tensor/qtensor.py
  function qfallback (line 21) | def qfallback(callable, *args, **kwargs):
  class QTensor (line 32) | class QTensor(torch.Tensor):
    method __init__ (line 33) | def __init__(self, qtype, axis):
    method dequantize (line 37) | def dequantize(self):
    method save_to_state_dict (line 40) | def save_to_state_dict(self, destination, prefix, keep_vars):
    method axis (line 56) | def axis(self):
    method qtype (line 60) | def qtype(self):
    method numpy (line 63) | def numpy(self):
    method equal (line 66) | def equal(self, other):

FILE: optimum/quanto/tensor/qtype.py
  class qtype (line 21) | class qtype:
    method __str__ (line 32) | def __str__(self):
    method __hash__ (line 35) | def __hash__(self):
  function qint (line 42) | def qint(bits):
  function qfloat (line 55) | def qfloat(dtype: torch.dtype):

FILE: optimum/quanto/tensor/weights/awq/packed.py
  function pack (line 33) | def pack(unpacked: torch.Tensor, reorder=False):
  function reverse_awq_order (line 64) | def reverse_awq_order(t: torch.Tensor):
  function unpack (line 80) | def unpack(packed: torch.Tensor, reorder=False):
  function pack_v2 (line 100) | def pack_v2(unpacked: torch.Tensor) -> torch.Tensor:
  function unpack_v2 (line 156) | def unpack_v2(packed):
  class AWQPacking (line 204) | class AWQPacking(Enum):
  class AWQPackedTensor (line 209) | class AWQPackedTensor(torch.Tensor):
    method __new__ (line 211) | def __new__(cls, data, packing, reorder, size, stride, requires_grad=F...
    method __init__ (line 220) | def __init__(self, data, packing, reorder, size, stride, requires_grad...
    method __repr__ (line 225) | def __repr__(self):
    method pack (line 229) | def pack(cls, t, packing=AWQPacking.V1, reorder=False):
    method unpack (line 237) | def unpack(self):
    method dtype (line 243) | def dtype(self):
    method __tensor_flatten__ (line 246) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 258) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 272) | def __torch_dispatch__(cls, op, types, args, kwargs=None):
    method numpy (line 293) | def numpy(self):

FILE: optimum/quanto/tensor/weights/awq/qbits.py
  class AWQWeightQBitsDequantizer (line 30) | class AWQWeightQBitsDequantizer(Function):
    method forward (line 32) | def forward(ctx, t):
    method backward (line 49) | def backward(ctx, gO):
  class AWQWeightQBitsLinearFunction (line 53) | class AWQWeightQBitsLinearFunction(QuantizedLinearFunction):
    method forward (line 55) | def forward(ctx, input, other, bias):
  class AWQWeightQBitsTensor (line 77) | class AWQWeightQBitsTensor(WeightQBitsTensor):
    method __new__ (line 79) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s...
    method __init__ (line 87) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,...
    method dequantize (line 106) | def dequantize(self):
    method weight_qbits_tensor (line 109) | def weight_qbits_tensor(self):
    method __tensor_flatten__ (line 123) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 136) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 149) | def __torch_function__(cls, func, types, args=(), kwargs=None):

FILE: optimum/quanto/tensor/weights/marlin/fp8/packed.py
  function pack_fp8_as_int32 (line 22) | def pack_fp8_as_int32(fp8_tensor: torch.Tensor) -> torch.Tensor:
  function unpack_int32_to_fp8 (line 51) | def unpack_int32_to_fp8(int32_tensor: torch.Tensor) -> torch.Tensor:
  function get_scale_perms (line 71) | def get_scale_perms() -> torch.Tensor:
  function get_row_permutation (line 78) | def get_row_permutation(n_rows: int) -> torch.Tensor:
  function get_column_permutation (line 116) | def get_column_permutation(n_col: int) -> torch.Tensor:
  class MarlinF8PackedTensor (line 160) | class MarlinF8PackedTensor(torch.Tensor):
    method __new__ (line 161) | def __new__(cls, data, size, stride, requires_grad=False):
    method __init__ (line 169) | def __init__(self, data, size, stride, requires_grad=False):
    method __repr__ (line 172) | def __repr__(self):
    method pack (line 176) | def pack(cls, tensor: torch.Tensor):
    method unpack (line 189) | def unpack(self) -> torch.Tensor:
    method dtype (line 220) | def dtype(self):
    method __tensor_flatten__ (line 223) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 233) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 245) | def __torch_dispatch__(cls, op, types, args, kwargs=None):

FILE: optimum/quanto/tensor/weights/marlin/fp8/qbits.py
  class MarlinF8QBytesLinearFunction (line 28) | class MarlinF8QBytesLinearFunction(QuantizedLinearFunction):
    method forward (line 30) | def forward(ctx, input, other, bias=None):
  class MarlinF8QBytesTensor (line 54) | class MarlinF8QBytesTensor(WeightQBytesTensor):
    method __new__ (line 56) | def __new__(cls, qtype, axis, size, stride, data, scale, requires_grad...
    method __init__ (line 63) | def __init__(self, qtype, axis, size, stride, data, scale, requires_gr...
    method dequantize (line 88) | def dequantize(self):
    method __repr__ (line 102) | def __repr__(self):
    method weight_qbytes_tensor (line 105) | def weight_qbytes_tensor(self):
    method __tensor_flatten__ (line 119) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 130) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 142) | def __torch_function__(cls, func, types, args=(), kwargs=None):

FILE: optimum/quanto/tensor/weights/marlin/int4/packed.py
  function _get_perm (line 19) | def _get_perm():
  function pack (line 59) | def pack(unpacked: torch.Tensor):
  function unpack (line 78) | def unpack(packed, orig_shape):
  class MarlinInt4PackedTensor (line 91) | class MarlinInt4PackedTensor(torch.Tensor):
    method __new__ (line 93) | def __new__(cls, data, size, stride, requires_grad=False):
    method __init__ (line 101) | def __init__(self, data, size, stride, requires_grad=False):
    method __repr__ (line 104) | def __repr__(self):
    method pack (line 108) | def pack(cls, t):
    method unpack (line 112) | def unpack(self):
    method dtype (line 116) | def dtype(self):
    method __tensor_flatten__ (line 119) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 128) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 139) | def __torch_dispatch__(cls, op, types, args, kwargs=None):
    method numpy (line 159) | def numpy(self):

FILE: optimum/quanto/tensor/weights/marlin/int4/qbits.py
  class MarlinQBitsDequantizer (line 31) | class MarlinQBitsDequantizer(Function):
    method forward (line 33) | def forward(ctx, t):
    method backward (line 49) | def backward(ctx, gO):
  class MarlinQBitsLinearFunction (line 53) | class MarlinQBitsLinearFunction(QuantizedLinearFunction):
    method forward (line 55) | def forward(ctx, input, other, bias):
  class MarlinInt4WeightQBitsTensor (line 72) | class MarlinInt4WeightQBitsTensor(WeightQBitsTensor):
    method __new__ (line 74) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s...
    method __init__ (line 82) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,...
    method dequantize (line 103) | def dequantize(self):
    method weight_qbits_tensor (line 106) | def weight_qbits_tensor(self):
    method __tensor_flatten__ (line 121) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 134) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 147) | def __torch_function__(cls, func, types, args=(), kwargs=None):

FILE: optimum/quanto/tensor/weights/marlin/permutations.py
  function _get_perms (line 28) | def _get_perms() -> Tuple[List[int], List[int]]:
  function _get_inverted_perms (line 39) | def _get_inverted_perms() -> Tuple[List[int], List[int]]:
  function marlin_permute (line 44) | def marlin_permute(t: torch.Tensor, reverse=False):

FILE: optimum/quanto/tensor/weights/packing.py
  function unpack_int32_to_uint8 (line 18) | def unpack_int32_to_uint8(packed: torch.Tensor, bits: int):

FILE: optimum/quanto/tensor/weights/qbits.py
  class WeightsQBitsQuantizer (line 34) | class WeightsQBitsQuantizer(Function):
    method forward (line 36) | def forward(
    method backward (line 60) | def backward(ctx, gO):
  class WeightQBitsTensor (line 65) | class WeightQBitsTensor(QBitsTensor):
    method create (line 67) | def create(qtype, axis, group_size, size, stride, data, scale, shift, ...
    method __new__ (line 141) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale, s...
    method __init__ (line 148) | def __init__(self, qtype, axis, group_size, size, stride, data, scale,...
    method quantize (line 154) | def quantize(
    method load_from_state_dict (line 167) | def load_from_state_dict(state_dict, prefix, qtype, axis, group_size, ...
    method optimize (line 201) | def optimize(self):
    method save_to_state_dict (line 223) | def save_to_state_dict(self, destination, prefix, keep_vars):
    method weight_qbits_tensor (line 230) | def weight_qbits_tensor(self):
    method __tensor_flatten__ (line 237) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 250) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 263) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __torch_dispatch__ (line 290) | def __torch_dispatch__(cls, op, types, args, kwargs=None):

FILE: optimum/quanto/tensor/weights/qbytes.py
  class WeightQBytesQuantizer (line 31) | class WeightQBytesQuantizer(Function):
    method forward (line 33) | def forward(
    method backward (line 63) | def backward(ctx, gO):
  class WeightQBytesLinearFunction (line 68) | class WeightQBytesLinearFunction(QuantizedLinearFunction):
    method forward (line 70) | def forward(ctx, input, other, bias=None):
  class WeightQBytesTensor (line 85) | class WeightQBytesTensor(QBytesTensor):
    method create (line 87) | def create(
    method __new__ (line 146) | def __new__(cls, qtype, axis, size, stride, data, scale, activation_qt...
    method __init__ (line 152) | def __init__(self, qtype, axis, size, stride, data, scale, activation_...
    method quantize (line 157) | def quantize(
    method load_from_state_dict (line 169) | def load_from_state_dict(state_dict, prefix, qtype, axis, size, stride...
    method optimize (line 191) | def optimize(self):
    method save_to_state_dict (line 211) | def save_to_state_dict(self, destination, prefix, keep_vars):
    method weight_qbytes_tensor (line 218) | def weight_qbytes_tensor(self):
    method __tensor_flatten__ (line 225) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 237) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 250) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __torch_dispatch__ (line 277) | def __torch_dispatch__(cls, op, types, args, kwargs=None):

FILE: optimum/quanto/tensor/weights/quantization.py
  function quantize_weight (line 27) | def quantize_weight(

FILE: optimum/quanto/tensor/weights/reordering.py
  function reorder (line 23) | def reorder(t: torch.Tensor, permutation: Union[torch.Tensor, List[int]]):
  function reverse (line 38) | def reverse(permutation: Union[torch.Tensor, List[int]]):

FILE: optimum/quanto/tensor/weights/tinygemm/packed.py
  class TinyGemmPackedTensor (line 25) | class TinyGemmPackedTensor(torch.Tensor):
    method __new__ (line 27) | def __new__(cls, data, size, stride, requires_grad=False):
    method __init__ (line 34) | def __init__(self, data, size, stride, requires_grad=False):
    method __repr__ (line 37) | def __repr__(self):
    method pack (line 41) | def pack(cls, t):
    method unpack (line 66) | def unpack(self):
    method dtype (line 98) | def dtype(self):
    method __tensor_flatten__ (line 101) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 111) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_dispatch__ (line 123) | def __torch_dispatch__(cls, op, types, args, kwargs=None):
    method numpy (line 147) | def numpy(self):

FILE: optimum/quanto/tensor/weights/tinygemm/qbits.py
  class TinyGemmQBitsDequantizer (line 30) | class TinyGemmQBitsDequantizer(Function):
    method forward (line 32) | def forward(ctx, t):
    method backward (line 38) | def backward(ctx, gO):
  class TinyGemmQBitsLinearFunction (line 42) | class TinyGemmQBitsLinearFunction(QuantizedLinearFunction):
    method forward (line 44) | def forward(ctx, input, other, bias):
  class TinyGemmWeightQBitsTensor (line 65) | class TinyGemmWeightQBitsTensor(WeightQBitsTensor):
    method __new__ (line 67) | def __new__(cls, qtype, axis, group_size, size, stride, data, scale_sh...
    method __init__ (line 82) | def __init__(self, qtype, axis, group_size, size, stride, data, scale_...
    method dequantize (line 111) | def dequantize(self):
    method weight_qbits_tensor (line 114) | def weight_qbits_tensor(self):
    method __tensor_flatten__ (line 130) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 143) | def __tensor_unflatten__(inner_tensors, meta, outer_size, outer_stride):
    method __torch_function__ (line 156) | def __torch_function__(cls, func, types, args=(), kwargs=None):

FILE: tests/cli/test_quantize_cli.py
  function test_export_decoder_cli (line 26) | def test_export_decoder_cli(weights):

FILE: tests/conftest.py
  function device (line 29) | def device(request):
  function pytest_configure (line 33) | def pytest_configure(config):
  function pytest_runtest_call (line 38) | def pytest_runtest_call(item):

FILE: tests/helpers.py
  function torch_min_version (line 33) | def torch_min_version(v):
  function device_eq (line 46) | def device_eq(a, b):
  function random_tensor (line 54) | def random_tensor(shape, dtype=torch.float32, device="cpu"):
  function random_qactivation (line 65) | def random_qactivation(shape, qtype=qint8, dtype=torch.float32, device="...
  function random_qweight (line 71) | def random_qweight(shape, qtype, dtype=torch.float32, axis=0, group_size...
  function assert_similar (line 85) | def assert_similar(a, b, atol=None, rtol=None):
  function get_device_memory (line 102) | def get_device_memory(device):

FILE: tests/library/test_extensions.py
  function _is_xpu_available (line 10) | def _is_xpu_available():
  function test_extension_available (line 32) | def test_extension_available(extension_name):
  function test_extension_compilation (line 37) | def test_extension_compilation(extension_name):

FILE: tests/library/test_mm.py
  function test_qbytes_mm (line 35) | def test_qbytes_mm(batch_size, input_features, input_dtype, weight_dtype...
  function test_gemm_fp16_int4 (line 59) | def test_gemm_fp16_int4(batch_size, tokens, in_features, out_features):
  function test_fp8_marlin (line 112) | def test_fp8_marlin(tokens, in_features, out_features, dtype):
  function test_gemm_marlin_fp16_int4 (line 155) | def test_gemm_marlin_fp16_int4(batch_size, tokens, in_features, out_feat...

FILE: tests/library/test_quantize.py
  function test_symmetric_quantize_int (line 41) | def test_symmetric_quantize_int(input_shape, dtype, qtype, axis, device):
  function test_symmetric_quantize_float8 (line 63) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, axis, devi...
  function test_affine_quantize (line 78) | def test_affine_quantize(input_shape, dtype, qtype, axis, group_size, sh...
  function test_affine_quantize_integer_tensor (line 107) | def test_affine_quantize_integer_tensor(dtype, qtype, device):

FILE: tests/library/test_unpack.py
  function test_unpack (line 24) | def test_unpack(bits, shape, device):

FILE: tests/models/conftest.py
  function staging (line 6) | def staging():
  function skip_if_staging (line 25) | def skip_if_staging(request):

FILE: tests/models/test_quantized_model_for_causal_lm.py
  function quantized_model_for_causal_lm (line 11) | def quantized_model_for_causal_lm(model_id, qtype, exclude, from_config=...
  function compare_models (line 49) | def compare_models(a_model, b_model):
  function test_quantized_model_for_causal_lm_base (line 79) | def test_quantized_model_for_causal_lm_base(model_id, qtype, exclude_lm_...
  function test_quantized_model_for_causal_lm_sharded (line 92) | def test_quantized_model_for_causal_lm_sharded():
  function test_causal_lm_base_push_to_hub (line 107) | def test_causal_lm_base_push_to_hub(staging, in_org):
  function test_quantized_model_load_state_dict_non_strict (line 134) | def test_quantized_model_load_state_dict_non_strict(model_id, qtype):

FILE: tests/models/test_quantized_model_for_pixart.py
  function quantized_model_for_pixart (line 11) | def quantized_model_for_pixart(qtype, exclude):
  function compare_models (line 40) | def compare_models(a_model, b_model):
  function test_quantized_model_for_pixart (line 80) | def test_quantized_model_for_pixart(qtype, exclude_proj_out):
  function test_push_to_hub (line 94) | def test_push_to_hub(staging, in_org):

FILE: tests/nn/test_calibrate.py
  function _test_calibrate_qlinear (line 23) | def _test_calibrate_qlinear(batch_size, tokens, embeddings, use_bias, ac...
  function test_calibrate_qlinear_activations_int8 (line 45) | def test_calibrate_qlinear_activations_int8(batch_size, tokens, embeddin...
  function test_calibrate_qlinear_activations_float8 (line 58) | def test_calibrate_qlinear_activations_float8(batch_size, tokens, embedd...
  function _test_calibrate_custom_module (line 62) | def _test_calibrate_custom_module(activations, device):
  function test_calibrate_custom_module_activations_int8 (line 88) | def test_calibrate_custom_module_activations_int8(device):
  function test_calibrate_custom_module_activations_float8 (line 98) | def test_calibrate_custom_module_activations_float8(activations, device):

FILE: tests/nn/test_qattention.py
  class RotaryEmbedding (line 27) | class RotaryEmbedding(nn.Module):
    method __init__ (line 28) | def __init__(self, dim, max_position_embeddings=2048, base=10000, devi...
    method _set_cos_sin_cache (line 42) | def _set_cos_sin_cache(self, seq_len, device, dtype):
    method forward (line 52) | def forward(self, x, seq_len=None):
  function rotate_half (line 63) | def rotate_half(x):
  function apply_rotary_pos_emb (line 70) | def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
  function repeat_kv (line 98) | def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
  class Attention (line 110) | class Attention(nn.Module):
    method __init__ (line 113) | def __init__(self, hidden_size=128, num_heads=4, max_position_embeddin...
    method _shape (line 130) | def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
    method forward (line 133) | def forward(
  function _test_quantize_attention (line 174) | def _test_quantize_attention(device, dtype=torch.float32, weights=qint8,...
  function test_quantize_attention_weights_only (line 193) | def test_quantize_attention_weights_only(weights, device):
  function test_quantize_attention_weights_only_float8 (line 198) | def test_quantize_attention_weights_only_float8(device):
  function test_quantize_attention_activations_int8 (line 203) | def test_quantize_attention_activations_int8(weights, device):
  function test_quantize_attention_activations_float8 (line 214) | def test_quantize_attention_activations_float8(weights, activations, dev...

FILE: tests/nn/test_qconv2d.py
  function _test_quantize_conv2d (line 31) | def _test_quantize_conv2d(batch_size, img_shape, out_channels, use_bias,...
  function test_quantize_conv2d_float16_activations_int8 (line 59) | def test_quantize_conv2d_float16_activations_int8(batch_size, img_shape,...
  function test_quantize_conv2d_float32_activations_int8 (line 68) | def test_quantize_conv2d_float32_activations_int8(batch_size, img_shape,...
  function test_quantize_conv2d_float16_activations_float8 (line 83) | def test_quantize_conv2d_float16_activations_float8(
  function test_quantize_conv2d_float32_activations_float8 (line 100) | def test_quantize_conv2d_float32_activations_float8(
  function test_quantize_conv2d_float16_weight_only (line 111) | def test_quantize_conv2d_float16_weight_only(batch_size, img_shape, out_...
  function test_quantize_conv2d_float32_weight_only (line 120) | def test_quantize_conv2d_float32_weight_only(batch_size, img_shape, out_...
  function test_qconv2d_gradient (line 128) | def test_qconv2d_gradient(img_shape, out_channels, activations, weights,...

FILE: tests/nn/test_qlayernorm.py
  function _test_quantize_layernorm (line 23) | def _test_quantize_layernorm(batch_size, tokens, embeddings, affine, dty...
  function test_quantize_layernorm_float16_activations_int8 (line 47) | def test_quantize_layernorm_float16_activations_int8(batch_size, tokens,...
  function test_quantize_layernorm_float32_activations_int8 (line 54) | def test_quantize_layernorm_float32_activations_int8(batch_size, tokens,...
  function test_quantize_layernorm_float16_activations_float8 (line 67) | def test_quantize_layernorm_float16_activations_float8(batch_size, token...
  function test_quantize_layernorm_float32_activations_float8 (line 80) | def test_quantize_layernorm_float32_activations_float8(batch_size, token...
  function test_quantize_layernom_no_activation (line 84) | def test_quantize_layernom_no_activation():

FILE: tests/nn/test_qlinear.py
  function _test_quantize_linear (line 37) | def _test_quantize_linear(batch_size, tokens, embeddings, use_bias, weig...
  function test_quantize_linear_float16_activations_int8 (line 65) | def test_quantize_linear_float16_activations_int8(batch_size, tokens, em...
  function test_quantize_linear_float32_activations_int8 (line 73) | def test_quantize_linear_float32_activations_int8(batch_size, tokens, em...
  function test_quantize_linear_float16_activations_float8 (line 90) | def test_quantize_linear_float16_activations_float8(
  function test_quantize_linear_float32_activations_float8 (line 107) | def test_quantize_linear_float32_activations_float8(
  function test_quantize_linear_float16_weight_only (line 120) | def test_quantize_linear_float16_weight_only(batch_size, tokens, embeddi...
  function test_quantize_linear_float32_weight_only (line 134) | def test_quantize_linear_float32_weight_only(batch_size, tokens, embeddi...
  function test_qlinear_gradient (line 141) | def test_qlinear_gradient(tokens, embeddings, activations, weights, devi...
  function test_move_qlinear (line 182) | def test_move_qlinear(dtype, use_bias, weights, device):
  function test_qlinear_serialization (line 200) | def test_qlinear_serialization(features, use_bias, activations, weights,...

FILE: tests/nn/test_qmodule.py
  function test_qmodule_freeze (line 26) | def test_qmodule_freeze(in_features, out_features, use_bias, dtype):
  function test_qmodule_qtype_as_string (line 50) | def test_qmodule_qtype_as_string(weights, activations):

FILE: tests/quantize/test_quantize_mlp.py
  class MLP (line 40) | class MLP(torch.nn.Module):
    method __init__ (line 41) | def __init__(self, input_size, output_size, hidden_size):
    method forward (line 47) | def forward(self, inputs):
  function check_mlp (line 53) | def check_mlp(model, frozen):
  function _test_quantize_mlp (line 63) | def _test_quantize_mlp(weights, activations, optimizer, frozen, device, ...
  function test_quantize_mlp_weights_only (line 85) | def test_quantize_mlp_weights_only(weights, frozen, device):
  function test_quantize_mlp_weights_only_float8 (line 92) | def test_quantize_mlp_weights_only_float8(weights, frozen, device):
  function test_quantize_mlp_int8_activations (line 99) | def test_quantize_mlp_int8_activations(weights, frozen, device):
  function test_quantize_mlp_float8_activations (line 111) | def test_quantize_mlp_float8_activations(weights, activations, frozen, d...
  function test_quantized_mlp_device_memory (line 120) | def test_quantized_mlp_device_memory(weights, dtype, weights_only, device):
  function test_quantize_mlp_weights_only_optimizers (line 140) | def test_quantize_mlp_weights_only_optimizers(weights, optimizer, frozen...
  function test_quantize_mlp_wrong_optimizer (line 148) | def test_quantize_mlp_wrong_optimizer(weights, optimizer, device):

FILE: tests/quantize/test_quantize_patterns.py
  class MLP (line 25) | class MLP(torch.nn.Module):
    method __init__ (line 26) | def __init__(self, input_size, output_size, hidden_size):
    method forward (line 32) | def forward(self, inputs):
  class ClassificationModel (line 38) | class ClassificationModel(torch.nn.Module):
    method __init__ (line 39) | def __init__(self, input_size, output_size, hidden_size, classes):
    method forward (line 44) | def forward(self, inputs):
  function has_children (line 49) | def has_children(module: torch.nn.Module):
  function leaf_module_names (line 53) | def leaf_module_names(module: torch.nn.Module):
  function parent_module_names (line 57) | def parent_module_names(module: torch.nn.Module):
  function test_quantize_mlp_include_explicit_layers (line 61) | def test_quantize_mlp_include_explicit_layers():
  function test_quantize_mlp_exclude_explicit_layers (line 74) | def test_quantize_mlp_exclude_explicit_layers():
  function test_quantize_mlp_include_layer_patterns (line 87) | def test_quantize_mlp_include_layer_patterns():
  function test_quantize_mlp_exclude_layer_patterns (line 100) | def test_quantize_mlp_exclude_layer_patterns():

FILE: tests/quantize/test_requantize.py
  function save_and_reload_state_dict (line 28) | def save_and_reload_state_dict(state_dict, serialization):
  function test_requantize_serialized_model (line 50) | def test_requantize_serialized_model(
  function test_requantized_model_device_memory (line 78) | def test_requantized_model_device_memory(weights, dtype, serialization, ...

FILE: tests/tensor/activations/test_activations_compile.py
  function compile_for_device (line 22) | def compile_for_device(f, device):
  function test_compile_quantize_tensor (line 34) | def test_compile_quantize_tensor(input_shape, qtype, dtype, device):
  function test_compile_qtensor_to (line 51) | def test_compile_qtensor_to(device):

FILE: tests/tensor/activations/test_activations_dispatch.py
  function test_qactivation_mul_scalar (line 24) | def test_qactivation_mul_scalar(input_shape, scalar, device):
  function test_qactivation_relu (line 40) | def test_qactivation_relu(batch_size, tokens, embeddings, device):
  function test_qactivation_softmax (line 49) | def test_qactivation_softmax(batch_size, tokens, embeddings, device):
  function test_qactivation_view (line 58) | def test_qactivation_view(input_shape, device):
  function test_qactivation_cat (line 65) | def test_qactivation_cat(input_shape, device):
  function test_qactivation_transpose_2d (line 75) | def test_qactivation_transpose_2d(device):
  function test_qactivation_transpose (line 84) | def test_qactivation_transpose(device):

FILE: tests/tensor/activations/test_activations_quantize.py
  function test_symmetric_quantize_int (line 33) | def test_symmetric_quantize_int(input_shape, dtype, qtype, device):
  function test_symmetric_quantize_float8 (line 52) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, device):

FILE: tests/tensor/ops/test_linear_dispatch.py
  function test_qactivation_qweight_linear (line 28) | def test_qactivation_qweight_linear(
  function test_linear_fp16_int4 (line 48) | def test_linear_fp16_int4(batch_size, tokens, embeddings, use_bias, devi...
  function test_linear_bf16_int4 (line 63) | def test_linear_bf16_int4(batch_size, tokens, embeddings, use_bias, devi...

FILE: tests/tensor/ops/test_mm_dispatch.py
  function test_qactivation_qweight_matmul (line 26) | def test_qactivation_qweight_matmul(dtype, in_features, hidden, out_feat...
  function test_qactivation_qactivation_bmm (line 38) | def test_qactivation_qactivation_bmm(dtype, batch_size, a_shape, b_shape...

FILE: tests/tensor/optimizers/test_hqq_optimizer.py
  function compare_quantized_tensor (line 28) | def compare_quantized_tensor(a, qtype, axis, group_size, scale, shift):
  function test_hqq_optimizer (line 42) | def test_hqq_optimizer(input_shape, dtype, qtype, axis, group_size, devi...

FILE: tests/tensor/test_absmax.py
  function test_absmax_scale (line 26) | def test_absmax_scale(input_shape, axis, dtype, qtype, device):

FILE: tests/tensor/test_packed_tensor.py
  function test_pack_tensor (line 26) | def test_pack_tensor(shape, bits, device):
  function test_packed_tensor_serialization (line 39) | def test_packed_tensor_serialization(bits, device):

FILE: tests/tensor/weights/optimized/test_awq_packed_tensor.py
  function test_pack_awq_tensor (line 30) | def test_pack_awq_tensor(in_features, out_features, random, packing, reo...
  function test_move_awq_tensor (line 51) | def test_move_awq_tensor(packing, reorder, device):

FILE: tests/tensor/weights/optimized/test_awq_weight_qbits_tensor.py
  function test_awq_weight_qbits_tensor_from_qbits_tensor (line 30) | def test_awq_weight_qbits_tensor_from_qbits_tensor(in_features, out_feat...
  function test_awq_weight_qbits_tensor_move (line 66) | def test_awq_weight_qbits_tensor_move(device):
  function _test_awq_weight_qbits_tensor_linear (line 94) | def _test_awq_weight_qbits_tensor_linear(
  function test_awq_weight_qbits_tensor_linear (line 124) | def test_awq_weight_qbits_tensor_linear(batch_size, tokens, in_features,...

FILE: tests/tensor/weights/optimized/test_marlin_fp8_packed_tensor.py
  function get_fp8_tensor (line 25) | def get_fp8_tensor(shape, device, random=False):
  function test_pack_marlin_fp8_tensor (line 44) | def test_pack_marlin_fp8_tensor(in_features, out_features, random):
  function test_move_marlin_fp8_tensor (line 55) | def test_move_marlin_fp8_tensor():

FILE: tests/tensor/weights/optimized/test_marlin_int4_packed_tensor.py
  function get_uint4_tensor (line 24) | def get_uint4_tensor(shape, device, random=False):
  function test_pack_marlin_int4_tensor (line 39) | def test_pack_marlin_int4_tensor(in_features, out_features, random):
  function test_move_marlin_int4_packed_tensor (line 50) | def test_move_marlin_int4_packed_tensor(device):

FILE: tests/tensor/weights/optimized/test_marlin_int4_weight_qbits_tensor.py
  function test_marlin_int4_weight_qbits_tensor_from_qbits_tensor (line 31) | def test_marlin_int4_weight_qbits_tensor_from_qbits_tensor(in_features, ...
  function test_marlin_int4_weight_qbits_tensor_move (line 67) | def test_marlin_int4_weight_qbits_tensor_move(device):
  function _test_marlin_int4_weight_qbits_tensor_linear (line 96) | def _test_marlin_int4_weight_qbits_tensor_linear(
  function test_marlin_int4_weight_qbits_tensor_linear (line 125) | def test_marlin_int4_weight_qbits_tensor_linear(batch_size, tokens, in_f...
  function test_marlin_int4_weight_qbits_tensor_linear_failing (line 144) | def test_marlin_int4_weight_qbits_tensor_linear_failing(batch_size, toke...

FILE: tests/tensor/weights/optimized/test_marlin_qbytes_tensor.py
  function test_pack_unpack (line 29) | def test_pack_unpack(in_features: int, out_features: int):

FILE: tests/tensor/weights/optimized/test_tinygemm_packed_tensor.py
  function test_pack_tinygemm_tensor (line 29) | def test_pack_tinygemm_tensor(in_features, out_features, random, device):
  function test_move_tinygemm_packed_tensor (line 53) | def test_move_tinygemm_packed_tensor(device):

FILE: tests/tensor/weights/optimized/test_tinygemm_weight_qbits_tensor.py
  function test_tinygemm_weight_qbits_tensor_from_qbits_tensor (line 28) | def test_tinygemm_weight_qbits_tensor_from_qbits_tensor(in_features, out...
  function test_tinygemm_weight_qbits_tensor_move (line 71) | def test_tinygemm_weight_qbits_tensor_move(device):
  function test_tinygemm_weight_qbits_tensor_linear (line 101) | def test_tinygemm_weight_qbits_tensor_linear(batch_size, tokens, embeddi...

FILE: tests/tensor/weights/test_weight_qbits_tensor.py
  function test_weight_qbits_tensor_serialization (line 26) | def test_weight_qbits_tensor_serialization(qtype, axis):
  function test_weight_qbits_tensor_requires_grad (line 43) | def test_weight_qbits_tensor_requires_grad(qtype, axis, group_size, devi...
  function test_weight_qbits_tensor_backward (line 54) | def test_weight_qbits_tensor_backward(qtype, axis, group_size, device):

FILE: tests/tensor/weights/test_weight_qbits_tensor_dispatch.py
  function test_qbitstensor_to_device (line 25) | def test_qbitstensor_to_device(dtype, group_size, device):
  function test_qbitstensor_detach (line 45) | def test_qbitstensor_detach():
  function test_qbitstensor_equal (line 54) | def test_qbitstensor_equal(dtype, qtype, axis, device):
  function test_weight_qbits_tensor_linear (line 68) | def test_weight_qbits_tensor_linear(dtype, batch_size, tokens, in_featur...
  function test_weight_qbits_tensor_linear_gpu (line 82) | def test_weight_qbits_tensor_linear_gpu(dtype, batch_size, tokens, in_fe...

FILE: tests/tensor/weights/test_weight_qbits_tensor_instantiate.py
  function random_data_scale_shift (line 23) | def random_data_scale_shift(input_shape, dtype, qtype, axis, group_size):
  function test_weight_qbits_tensor_instantiate (line 40) | def test_weight_qbits_tensor_instantiate(input_shape, dtype, qtype, axis...
  function test_weight_qbits_tensor_equal (line 56) | def test_weight_qbits_tensor_equal(input_shape, dtype, qtype, axis, grou...

FILE: tests/tensor/weights/test_weight_qbits_tensor_quantize.py
  function test_weight_qbits_tensor_quantize (line 33) | def test_weight_qbits_tensor_quantize(input_shape, dtype, qtype, axis, g...
  function test_weight_qbits_tensor_quantize_integer_tensor (line 58) | def test_weight_qbits_tensor_quantize_integer_tensor(dtype, qtype, device):

FILE: tests/tensor/weights/test_weight_qbytes_tensor_backward.py
  function test_weight_qbytes_tensor_requires_grad (line 22) | def test_weight_qbytes_tensor_requires_grad(device):
  function test_weight_qbytes_tensor_backward (line 30) | def test_weight_qbytes_tensor_backward(device):
  function test_weight_qbytes_tensor_chained_backward (line 41) | def test_weight_qbytes_tensor_chained_backward(device):

FILE: tests/tensor/weights/test_weight_qbytes_tensor_dispatch.py
  function test_weight_qytes_tensor_to_device (line 8) | def test_weight_qytes_tensor_to_device(device):
  function test_weight_qbytes_tensor_equal (line 20) | def test_weight_qbytes_tensor_equal(dtype, qtype, axis, device):
  function test_weight_qbytes_tensor_transpose_contiguous (line 30) | def test_weight_qbytes_tensor_transpose_contiguous(axis, qtype, device):
  function test_weight_qbytes_tensor_transposed_stride (line 43) | def test_weight_qbytes_tensor_transposed_stride(axis, qtype, device):

FILE: tests/tensor/weights/test_weight_qbytes_tensor_instantiate.py
  function random_data_scale (line 22) | def random_data_scale(input_shape, dtype, qtype):
  function test_qbytestensor_instantiate (line 37) | def test_qbytestensor_instantiate(input_shape, dtype, qtype, device):
  function test_qbytestensor_equal (line 53) | def test_qbytestensor_equal(input_shape, dtype, qtype, device):

FILE: tests/tensor/weights/test_weight_qbytes_tensor_quantize.py
  function test_symmetric_quantize_int (line 38) | def test_symmetric_quantize_int(input_shape, dtype, qtype, axis, device):
  function test_symmetric_quantize_float8 (line 62) | def test_symmetric_quantize_float8(input_shape, dtype, qtype, axis, devi...
  function test_quantize_weight_axis_dim_1 (line 74) | def test_quantize_weight_axis_dim_1(axis, device):

FILE: tests/tensor/weights/test_weight_qbytes_tensor_serialization.py
  function test_weights_qbytes_tensor_serialization (line 28) | def test_weights_qbytes_tensor_serialization(input_shape, qtype, dtype, ...

FILE: tests/tensor/weights/weight_helpers.py
  function check_weight_qtensor_linear (line 19) | def check_weight_qtensor_linear(qweight, batch_size, tokens, use_bias, r...