gitextract_e7pf933s/

├── .github/
│   ├── CODEOWNERS
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── check-commits.yml
│       ├── linux-cpu-tests.yml
│       ├── linux-cuda-tests.yml
│       ├── linux-examples.yml
│       ├── python-quality.yml
│       ├── security.yml
│       └── stale.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── bench/
│   ├── generation/
│   │   ├── README.md
│   │   ├── evaluate_configurations.py
│   │   ├── evaluate_many_models.sh
│   │   ├── evaluate_model.py
│   │   ├── gen_barchart.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   ├── latency.py
│   │   │   ├── perplexity.py
│   │   │   └── prediction.py
│   │   └── setup/
│   │       ├── __init__.py
│   │       ├── awq.py
│   │       ├── bnb.py
│   │       ├── hqq.py
│   │       └── quanto.py
│   ├── kernels/
│   │   ├── benchmark.py
│   │   ├── benchmark_marlin_fp8.py
│   │   └── benchmark_w4a16.py
│   └── torch_kernels/
│       ├── README.md
│       ├── test_int_mm.py
│       ├── test_int_mm_inductor.py
│       ├── test_weight_int4pack_mm.py
│       └── test_weight_int8pack_mm.py
├── examples/
│   ├── nlp/
│   │   ├── text-classification/
│   │   │   └── sst2/
│   │   │       └── quantize_sst2_model.py
│   │   └── text-generation/
│   │       └── quantize_causal_lm_model.py
│   ├── speech/
│   │   └── speech_recognition/
│   │       ├── quantize_asr_model.py
│   │       └── requirements.txt
│   └── vision/
│       ├── StableDiffusion/
│       │   ├── README.md
│       │   ├── quantize_StableDiffusion.py
│       │   └── requirements.txt
│       ├── image-classification/
│       │   ├── mnist/
│       │   │   └── quantize_mnist_model.py
│       │   └── pets/
│       │       └── quantize_vit_model.py
│       ├── object-detection/
│       │   └── quantize_owl_model.py
│       └── text-to-image/
│           └── quantize_pixart_sigma.py
├── external/
│   ├── awq/
│   │   ├── conftest.py
│   │   ├── pack_intweight.py
│   │   ├── packing_utils.py
│   │   ├── test_awq_kernels.py
│   │   ├── test_awq_packing.py
│   │   └── test_awq_quantize.py
│   └── smoothquant/
│       ├── README.md
│       └── smoothquant.py
├── optimum/
│   └── quanto/
│       ├── __init__.py
│       ├── calibrate.py
│       ├── library/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── extensions/
│       │   │   ├── README.md
│       │   │   ├── __init__.py
│       │   │   ├── cpp/
│       │   │   │   ├── README.md
│       │   │   │   ├── __init__.py
│       │   │   │   ├── pybind_module.cpp
│       │   │   │   ├── unpack.cpp
│       │   │   │   └── unpack.h
│       │   │   ├── cuda/
│       │   │   │   ├── README.md
│       │   │   │   ├── __init__.py
│       │   │   │   ├── awq/
│       │   │   │   │   ├── dequantize.cuh
│       │   │   │   │   └── v2/
│       │   │   │   │       ├── gemm_cuda.cu
│       │   │   │   │       ├── gemm_cuda.h
│       │   │   │   │       ├── gemv_cuda.cu
│       │   │   │   │       ├── gemv_cuda.h
│       │   │   │   │       └── semaphore.h
│       │   │   │   ├── marlin/
│       │   │   │   │   ├── COPYRIGHT
│       │   │   │   │   ├── fp8_marlin.cu
│       │   │   │   │   ├── fp8_marlin.cuh
│       │   │   │   │   ├── gptq_marlin.cuh
│       │   │   │   │   ├── gptq_marlin_dtypes.cuh
│       │   │   │   │   ├── gptq_marlin_repack.cu
│       │   │   │   │   ├── gptq_marlin_repack.cuh
│       │   │   │   │   ├── marlin_cuda.cpp
│       │   │   │   │   ├── marlin_cuda.h
│       │   │   │   │   ├── marlin_cuda_kernel.cu
│       │   │   │   │   └── marlin_cuda_kernel.cuh
│       │   │   │   ├── pybind_module.cpp
│       │   │   │   ├── unpack.cu
│       │   │   │   └── unpack.h
│       │   │   ├── extension.py
│       │   │   ├── hip/
│       │   │   │   ├── __init__.py
│       │   │   │   ├── pybind_module.cpp
│       │   │   │   ├── unpack.cu
│       │   │   │   └── unpack.h
│       │   │   ├── mps/
│       │   │   │   ├── README.md
│       │   │   │   ├── __init__.py
│       │   │   │   ├── pybind_module.cpp
│       │   │   │   ├── unpack.h
│       │   │   │   └── unpack.mm
│       │   │   └── xpu/
│       │   │       ├── __init__.py
│       │   │       ├── pybind_module.cpp
│       │   │       ├── unpack.h
│       │   │       └── unpack.sycl
│       │   ├── qbytes_mm.py
│       │   ├── quantize.py
│       │   └── unpack.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── diffusers_models.py
│       │   ├── shared_dict.py
│       │   └── transformers_models.py
│       ├── nn/
│       │   ├── __init__.py
│       │   ├── qconv2d.py
│       │   ├── qlayernorm.py
│       │   ├── qlinear.py
│       │   └── qmodule.py
│       ├── quantize.py
│       ├── subpackage/
│       │   ├── __init__.py
│       │   └── commands/
│       │       ├── __init__.py
│       │       ├── base.py
│       │       └── quantize.py
│       └── tensor/
│           ├── __init__.py
│           ├── activations/
│           │   ├── __init__.py
│           │   ├── qbytes.py
│           │   ├── qbytes_ops.py
│           │   └── quantization.py
│           ├── core.py
│           ├── function.py
│           ├── grouped.py
│           ├── optimizers/
│           │   ├── __init__.py
│           │   ├── absmax_optimizer.py
│           │   ├── affine_optimizer.py
│           │   ├── hqq_optimizer.py
│           │   ├── max_optimizer.py
│           │   ├── optimizer.py
│           │   └── symmetric_optimizer.py
│           ├── packed.py
│           ├── qbits.py
│           ├── qbytes.py
│           ├── qtensor.py
│           ├── qtype.py
│           └── weights/
│               ├── __init__.py
│               ├── awq/
│               │   ├── __init__.py
│               │   ├── packed.py
│               │   └── qbits.py
│               ├── marlin/
│               │   ├── __init__.py
│               │   ├── fp8/
│               │   │   ├── __init__.py
│               │   │   ├── packed.py
│               │   │   └── qbits.py
│               │   ├── int4/
│               │   │   ├── __init__.py
│               │   │   ├── packed.py
│               │   │   └── qbits.py
│               │   └── permutations.py
│               ├── packing.py
│               ├── qbits.py
│               ├── qbytes.py
│               ├── quantization.py
│               ├── reordering.py
│               └── tinygemm/
│                   ├── __init__.py
│                   ├── packed.py
│                   └── qbits.py
├── pyproject.toml
├── setup.sh
└── tests/
    ├── cli/
    │   ├── cli_helpers.py
    │   └── test_quantize_cli.py
    ├── conftest.py
    ├── helpers.py
    ├── library/
    │   ├── test_extensions.py
    │   ├── test_mm.py
    │   ├── test_quantize.py
    │   └── test_unpack.py
    ├── models/
    │   ├── conftest.py
    │   ├── test_quantized_model_for_causal_lm.py
    │   └── test_quantized_model_for_pixart.py
    ├── nn/
    │   ├── test_calibrate.py
    │   ├── test_qattention.py
    │   ├── test_qconv2d.py
    │   ├── test_qlayernorm.py
    │   ├── test_qlinear.py
    │   └── test_qmodule.py
    ├── quantize/
    │   ├── test_quantize_mlp.py
    │   ├── test_quantize_patterns.py
    │   └── test_requantize.py
    └── tensor/
        ├── activations/
        │   ├── test_activations_compile.py
        │   ├── test_activations_dispatch.py
        │   └── test_activations_quantize.py
        ├── ops/
        │   ├── test_linear_dispatch.py
        │   └── test_mm_dispatch.py
        ├── optimizers/
        │   └── test_hqq_optimizer.py
        ├── test_absmax.py
        ├── test_packed_tensor.py
        └── weights/
            ├── optimized/
            │   ├── test_awq_packed_tensor.py
            │   ├── test_awq_weight_qbits_tensor.py
            │   ├── test_marlin_fp8_packed_tensor.py
            │   ├── test_marlin_int4_packed_tensor.py
            │   ├── test_marlin_int4_weight_qbits_tensor.py
            │   ├── test_marlin_qbytes_tensor.py
            │   ├── test_tinygemm_packed_tensor.py
            │   └── test_tinygemm_weight_qbits_tensor.py
            ├── test_weight_qbits_tensor.py
            ├── test_weight_qbits_tensor_dispatch.py
            ├── test_weight_qbits_tensor_instantiate.py
            ├── test_weight_qbits_tensor_quantize.py
            ├── test_weight_qbytes_tensor_backward.py
            ├── test_weight_qbytes_tensor_dispatch.py
            ├── test_weight_qbytes_tensor_instantiate.py
            ├── test_weight_qbytes_tensor_quantize.py
            ├── test_weight_qbytes_tensor_serialization.py
            └── weight_helpers.py