Full Code of facebookresearch/xformers for AI

main ce4f89d54286 cached

905 files

3.3 MB

939.6k tokens

2093 symbols

1 requests

Download .txt

Showing preview only (3,741K chars total). Download the full file or copy to clipboard to get everything.

Repository: facebookresearch/xformers
Branch: main
Commit: ce4f89d54286
Files: 905
Total size: 3.3 MB

Directory structure:
gitextract_wkps4m_l/

├── .clang-format
├── .coveragerc
├── .editorconfig
├── .flake8
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   ├── feature-request.md
│   │   └── questions-help-support.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── actions/
│   │   ├── setup-build-cuda/
│   │   │   └── action.yml
│   │   └── setup-env-build/
│   │       └── action.yml
│   ├── compute_wheel_version.py
│   ├── gpu_benchmark_diff.py
│   ├── run-clang-format.py
│   ├── run_benchmark_wrapper.py
│   ├── selective_ci/
│   │   ├── requirements.txt
│   │   └── selective_ci.py
│   └── workflows/
│       ├── gh-pages.yml
│       ├── gpu_test_gh.yml
│       ├── linters.yml
│       ├── linters_reusable.yml
│       ├── rocm_build.yml
│       ├── rocm_ci.yml
│       ├── rocm_docker.yml
│       ├── wheels.yml
│       ├── wheels_build.yml
│       ├── wheels_upload_pip.yml
│       ├── wheels_upload_s3.yml
│       └── win-build.yml
├── .gitignore
├── .gitmodules
├── .isort.cfg
├── .markdownlint.json
├── .pre-commit-config.yaml
├── .pyre_configuration
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs/
│   ├── Makefile
│   ├── requirements.txt
│   └── source/
│       ├── 2d_attention_patterns.ipynb
│       ├── _static/
│       │   └── css/
│       │       └── customize.css
│       ├── _templates/
│       │   ├── layout.html
│       │   └── theme_variables.jinja
│       ├── components/
│       │   ├── index.rst
│       │   └── ops.rst
│       ├── conf.py
│       ├── index.rst
│       ├── swin_transformer.ipynb
│       └── what_is_xformers.rst
├── examples/
│   └── llama_inference/
│       ├── README.md
│       ├── generate.py
│       ├── model.py
│       ├── mp_utils.py
│       ├── requirements.txt
│       ├── sample_utils.py
│       ├── stats.py
│       └── tokenizer.py
├── pyproject.toml
├── requirements-benchmark.txt
├── requirements-test.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── stubs/
│   ├── fvcore/
│   │   └── nn.pyi
│   ├── matplotlib/
│   │   └── pyplot.pyi
│   ├── numpy/
│   │   └── __init__.pyi
│   ├── pandas.pyi
│   ├── recommonmark/
│   │   └── transform.pyi
│   ├── seaborn.pyi
│   ├── sklearn/
│   │   └── model_selection.pyi
│   ├── submitit.pyi
│   ├── tensorflow.pyi
│   ├── torch/
│   │   ├── __init__.pyi
│   │   ├── autograd/
│   │   │   ├── __init__.pyi
│   │   │   └── profiler.pyi
│   │   ├── cuda/
│   │   │   └── __init__.pyi
│   │   ├── fft/
│   │   │   └── __init__.pyi
│   │   ├── hub.pyi
│   │   ├── linalg/
│   │   │   └── __init__.pyi
│   │   ├── nn/
│   │   │   ├── __init__.pyi
│   │   │   ├── functional/
│   │   │   │   └── __init__.pyi
│   │   │   ├── functional.pyi
│   │   │   ├── init.pyi
│   │   │   └── utils/
│   │   │       └── __init__.pyi
│   │   ├── onnx.pyi
│   │   ├── ops.pyi
│   │   ├── optim/
│   │   │   └── __init__.pyi
│   │   ├── profiler/
│   │   │   └── __init__.pyi
│   │   ├── random/
│   │   │   └── __init__.pyi
│   │   ├── sparse/
│   │   │   └── __init__.pyi
│   │   └── utils/
│   │       ├── data.pyi
│   │       └── model_zoo.pyi
│   ├── torch_stub_tests.py
│   ├── tqdm.pyi
│   └── triton/
│       ├── __init__.pyi
│       ├── language.pyi
│       └── ops/
│           └── blocksparse.pyi
├── tests/
│   ├── __init__.py
│   ├── multiprocessing_utils.py
│   ├── readme_test_on_rocm.txt
│   ├── test_attention_patterns.py
│   ├── test_checkpoint.py
│   ├── test_fmha_flop_formula.py
│   ├── test_fmha_merge_attentions.py
│   ├── test_fwbw_overlap.py
│   ├── test_indexing.py
│   ├── test_mem_eff_attention.py
│   ├── test_multiprocessing_utils.py
│   ├── test_profiler.py
│   ├── test_rmsnorm.py
│   ├── test_rope_padded.py
│   ├── test_seqpar.py
│   ├── test_sequence_parallel_fused_ops.py
│   ├── test_sparse_tensors.py
│   ├── test_sparsity24.py
│   ├── test_splitk_reference.py
│   ├── test_tiled_matmul.py
│   ├── test_tree_attention.py
│   ├── test_triton_varargs.py
│   ├── test_unbind.py
│   └── utils.py
├── version.txt
└── xformers/
    ├── __init__.py
    ├── _cpp_lib.py
    ├── _deprecation_warning.py
    ├── attn_bias_utils.py
    ├── benchmarks/
    │   ├── __init__.py
    │   ├── benchmark_attn_decoding.py
    │   ├── benchmark_indexing.py
    │   ├── benchmark_mem_eff_attention.py
    │   ├── benchmark_merge_attentions.py
    │   ├── benchmark_sequence_parallel_fused.py
    │   ├── benchmark_sp24.py
    │   ├── benchmark_tiled_matmul.py
    │   ├── readme_benchmark_on_rocm.txt
    │   └── utils.py
    ├── checkpoint.py
    ├── components/
    │   └── attention/
    │       └── attention_patterns.py
    ├── csrc/
    │   ├── attention/
    │   │   ├── attention.cpp
    │   │   ├── hip_decoder/
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── attention_forward_splitk.cpp
    │   │   │   ├── ck_tile_attention_forward_decoder_splitk.h
    │   │   │   └── ck_tile_attention_inner_product.h
    │   │   └── hip_fmha/
    │   │       ├── GENERATE_INSTANCES.md
    │   │       ├── attention_backward_generic_ck_tiled.cpp
    │   │       ├── attention_ck_rand_uniform.cpp
    │   │       ├── attention_forward_generic_ck_tiled.cpp
    │   │       ├── ck_fmha_test.cpp
    │   │       ├── ck_fmha_util.h
    │   │       ├── ck_tiled_bool_switch.h
    │   │       ├── ck_tiled_fmha_batched_backward.h
    │   │       ├── ck_tiled_fmha_batched_backward_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_backward_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward.h
    │   │       ├── ck_tiled_fmha_batched_forward_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_forward_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_forward_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer.h
    │   │       ├── ck_tiled_fmha_batched_infer_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_infer_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_infer_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_bwd_setting.h
    │   │       ├── ck_tiled_fmha_fwd_setting.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_selector.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_setting.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_smallq_selector.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_smallq_setting.h
    │   │       ├── ck_tiled_fmha_fwd_type_config.h
    │   │       ├── ck_tiled_fmha_grouped_backward.h
    │   │       ├── ck_tiled_fmha_grouped_backward_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_backward_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward.h
    │   │       ├── ck_tiled_fmha_grouped_forward_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_forward_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_forward_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer.h
    │   │       ├── ck_tiled_fmha_grouped_infer_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_infer_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_infer_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_num_kv_split_switch.h
    │   │       ├── ck_tiled_fmha_params.h
    │   │       ├── ck_tiled_fmha_seqlen_q_switch.h
    │   │       ├── ck_tiled_headdim_switch.h
    │   │       ├── ck_tiled_rand_uniform_kernel.h
    │   │       ├── generate_instances.py
    │   │       └── instances/
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_instances_ref.h
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_instances_ref.h
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_instances_ref.h
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_instances_ref.h
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_instances_ref.h
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_instances_ref.h
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_instances_ref.h
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_instances_ref.h
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_instances_ref.h
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_instances_ref.h
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_instances_ref.h
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_instances_ref.h
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           └── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   ├── nvcc_info.cu
    │   ├── pt_stable_utils.cu
    │   ├── pt_stable_utils.h
    │   └── sparse24/
    │       ├── compute_sparse_tile.h
    │       ├── gemm.cu
    │       ├── meta_utils.cu
    │       ├── sparse24.cpp
    │       ├── sparse24_apply.cu
    │       ├── sparse24_apply_dense_output.cu
    │       ├── sparse24_gemm_sm90.cu
    │       ├── sparse24_largest_mask_2d.cu
    │       ├── sparse24_metadata.h
    │       ├── sparse24_pack.cu
    │       ├── sparse24_pack.h
    │       ├── sparse24_pack_test.cu
    │       ├── sparseNM_dense.cu
    │       ├── static_sort.h
    │       └── warp_tensor.h
    ├── flash_attn_3/
    │   └── __init__.py
    ├── fwbw_overlap.py
    ├── info.py
    ├── ops/
    │   ├── __init__.py
    │   ├── _triton/
    │   │   ├── __init__.py
    │   │   ├── k_index_select_cat.py
    │   │   ├── k_scaled_index_add.py
    │   │   ├── matmul_perf_model.py
    │   │   ├── rmsnorm_kernels.py
    │   │   ├── rope_padded_kernels.py
    │   │   └── tiled_matmul_kernels.py
    │   ├── common.py
    │   ├── differentiable_collectives.py
    │   ├── fmha/
    │   │   ├── __init__.py
    │   │   ├── _triton/
    │   │   │   ├── __init__.py
    │   │   │   └── splitk_kernels.py
    │   │   ├── attn_bias.py
    │   │   ├── ck.py
    │   │   ├── ck_splitk.py
    │   │   ├── common.py
    │   │   ├── cutlass.py
    │   │   ├── cutlass_blackwell.py
    │   │   ├── dispatch.py
    │   │   ├── flash.py
    │   │   ├── flash3.py
    │   │   ├── merge_training.py
    │   │   ├── torch_attention_compat.py
    │   │   └── triton_splitk.py
    │   ├── indexing.py
    │   ├── modpar_layers.py
    │   ├── rmsnorm.py
    │   ├── rope_padded.py
    │   ├── seqpar.py
    │   ├── sequence_parallel_fused_ops.py
    │   ├── sp24.py
    │   ├── swiglu_op.py
    │   ├── tiled_matmul.py
    │   ├── tree_attention.py
    │   └── unbind.py
    ├── profiler/
    │   ├── __init__.py
    │   ├── api.py
    │   ├── device_limits.py
    │   ├── find_slowest.py
    │   ├── profile_analyzer.py
    │   ├── profiler.py
    │   ├── profiler_dcgm.py
    │   └── profiler_dcgm_impl.py
    ├── sparse/
    │   ├── __init__.py
    │   ├── blocksparse_tensor.py
    │   └── utils.py
    ├── test.py
    ├── triton/
    │   ├── __init__.py
    │   ├── importing.py
    │   └── vararg_kernel.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands:   false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit:     80
CommentPragmas:  '^ IWYU pragma:'
#CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat:   false
ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
  - Regex:           '^<.*\.h(pp)?>'
    Priority:        1
  - Regex:           '^<.*'
    Priority:        2
  - Regex:           '.*'
    Priority:        3
IndentCaseLabels: true
IndentWidth:     2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 2000000
PointerAlignment: Left
ReflowComments:  true
SortIncludes:    true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles:  false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard:        Cpp11
TabWidth:        8
UseTab:          Never
...


================================================
FILE: .coveragerc
================================================
[run]
omit =
    docs/*
    tests/*
    setup.py
    xformers/benchmarks/*
    xformers/triton/k_*
    stubs/*
    third_party/*


================================================
FILE: .editorconfig
================================================
root = true

[*.py]
charset = utf-8
trim_trailing_whitespace = true
end_of_line = lf
insert_final_newline = true
indent_style = space
indent_size = 4

[*.md]
trim_trailing_whitespace = false


================================================
FILE: .flake8
================================================
[flake8]
exclude =
    .git
    ,.github/run-clang-format.py
    ,third_party
max-line-length = 140
copyright-check = True
select = E,F,W,C
copyright-regexp=Copyright \(c\) Facebook, Inc. and its affiliates. All Rights Reserved
ignore=W503,E203,E704


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: "\U0001F41B Bug Report"
about: Submit a bug report to help us improve xFormers

---

# 🐛 Bug

<!-- A clear and concise description of what the bug is. -->

## Command

## To Reproduce

Steps to reproduce the behavior:

<!-- If you were running a command, post the exact command that you were running -->

1.
2.
3.

<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->

## Expected behavior

<!-- A clear and concise description of what you expected to happen. -->

## Environment

Please copy and paste the output from the
environment collection script from PyTorch
(or fill out the checklist below manually).

You can run the script with:

```bash
# For security purposes, please check the contents of collect_env.py before running it.
python -m torch.utils.collect_env
```

- PyTorch Version (e.g., 1.0):
- OS (e.g., Linux):
- How you installed PyTorch (`conda`, `pip`, source):
- Build command you used (if compiling from source):
- Python version:
- CUDA/cuDNN version:
- GPU models and configuration:
- Any other relevant information:

## Additional context

<!-- Add any other context about the problem here. -->


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: "\U0001F680Feature Request"
about: Submit a proposal/request for a new xFormers feature

---

# 🚀 Feature

<!-- A clear and concise description of the feature proposal -->

## Motivation

<!-- Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too -->

## Pitch

<!-- A clear and concise description of what you want to happen. -->

## Alternatives

<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->

## Additional context

<!-- Add any other context or screenshots about the feature request here. -->


================================================
FILE: .github/ISSUE_TEMPLATE/questions-help-support.md
================================================
---
name: "❓Questions/Help/Support"
about: Do you need support?

---

# ❓ Questions and Help


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## What does this PR do?
Fixes # (issue).

## Before submitting

- [ ] Did you have fun?
  - Make sure you had fun coding 🙃
- [ ] Did you read the [contributor guideline](https://github.com/facebookresearch/xformers/blob/master/CONTRIBUTING.md)?
- [ ] Was this discussed/approved via a Github issue? (no need for typos, doc improvements)
  - [ ] N/A
- [ ] Did you make sure to update the docs?
  - [ ] N/A
- [ ] Did you write any new necessary tests?
  - [ ] N/A
- [ ] Did you update the [changelog](https://github.com/facebookresearch/xformers/blob/master/CHANGELOG.md)? (if needed)
  - [ ] N/A


## PR review
Anyone in the community is free to review the PR once the tests have passed.
If we didn't discuss your PR in Github issues there's a high chance it will not be merged.


================================================
FILE: .github/actions/setup-build-cuda/action.yml
================================================
name: Set up Runner for build

inputs:
  toolkit_type:
    description: cuda or rocm
    type: string
  toolkit_short_version:
    required: true
    type: string
    description: "Example: 117 for 11.7"
  python:
    description: Python version to install
    type: string
    default: "3.10"

runs:
  using: composite
  steps:
    - id: cuda_info
      shell: python3 "{0}"
      run: |
        import os
        import sys
        print(sys.version)
        cushort = "${{ inputs.toolkit_short_version }}"
        # Version uploaded to pypi (rather than PyTorch s3)
        TORCH_CUDA_DEFAULT = "128"  # since pytorch 2.9.0
        # https://github.com/Jimver/cuda-toolkit/blob/master/src/links/linux-links.ts
        full_version, install_script = {
          "130": ("13.0.1", "https://developer.download.nvidia.com/compute/cuda/13.0.1/local_installers/cuda_13.0.1_580.82.07_linux.run"),
          "129": ("12.9.1", "https://developer.download.nvidia.com/compute/cuda/12.9.1/local_installers/cuda_12.9.1_575.57.08_linux.run"),
          "128": ("12.8.1", "https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run"),
          # (Build with nvcc 12.8 on linux even when building for 12.6 to avoid seg fault in Flash3 build)
          "126": ("12.8.1", "https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run"),
          "118": ("11.8.0", "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"),
          "6.0": ("6.0.2", "https://repo.radeon.com/amdgpu-install/6.0.2/rhel/8.9/amdgpu-install-6.0.60002-1.el8.noarch.rpm"),
          "6.1": ("6.1.3", "https://repo.radeon.com/amdgpu-install/6.1.3/rhel/8.9/amdgpu-install-6.1.60103-1.el8.noarch.rpm"),
          "6.2.4": ("6.2.4", "https://repo.radeon.com/amdgpu-install/6.2.4/rhel/8.9/amdgpu-install-6.2.60204-1.el8.noarch.rpm"),
          "6.3": ("6.3.1", "https://repo.radeon.com/amdgpu-install/6.3.1/rhel/8.9/amdgpu-install-6.3.60301-1.el8.noarch.rpm"),
          "6.4": ("6.4.2", "https://repo.radeon.com/amdgpu-install/6.4.2/rhel/8.9/amdgpu-install-6.4.60402-1.el8.noarch.rpm"),
          "7.0": ("7.0.3", "https://repo.radeon.com/amdgpu-install/7.0.3/rhel/8/amdgpu-install-7.0.3.70003-1.el8.noarch.rpm"),
          "7.1": ("7.1.0", "https://repo.radeon.com/amdgpu-install/7.1/rhel/8/amdgpu-install-7.1.70100-1.el8.noarch.rpm"),
        }[cushort]
        with open(os.environ['GITHUB_OUTPUT'], "r+") as fp:
          fp.write("CUDA_VERSION=" + full_version + "\n")
          if cushort == TORCH_CUDA_DEFAULT:
            fp.write("CUDA_VERSION_SUFFIX=\n")
          else:
            fp.write("CUDA_VERSION_SUFFIX=+" + ("cu" if "cuda" == "${{ inputs.toolkit_type }}" else "rocm") + cushort + "\n")
          fp.write("CUDA_INSTALL_SCRIPT=" + install_script + "\n")
    - run: echo "CUDA_VERSION_SUFFIX=${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }}" >> ${GITHUB_ENV}
      shell: bash

    # WINDOWS STEPS
    - name: Install cuda
      if: runner.os == 'Windows' && inputs.toolkit_type == 'cuda'
      id: cuda-toolkit
      # Using N-Storm fork until https://github.com/Jimver/cuda-toolkit/issues/395 is resolved
      uses: N-Storm/cuda-toolkit@v0.2.28
      with:
        cuda: ${{ steps.cuda_info.outputs.CUDA_VERSION }}
        method: network
    - if: runner.os == 'Windows' && inputs.toolkit_type == 'cuda'
      shell: bash
      run: |
        echo "Installed cuda version is: ${{ steps.cuda-toolkit.outputs.cuda }}"
        echo "Cuda install location: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}"
        echo "CUDA_HOME=${{ steps.cuda-toolkit.outputs.CUDA_PATH }}" >> ${GITHUB_ENV}
        cat ${GITHUB_ENV}

    - name: Install python
      if: runner.os == 'Windows'
      uses: actions/setup-python@v4
      with:
        python-version: ${{ inputs.python }}

    - name: Setup MSVC
      if: runner.os == 'Windows'
      uses: ilammy/msvc-dev-cmd@v1

    # really unfortunate: https://github.com/ilammy/msvc-dev-cmd#name-conflicts-with-shell-bash
    - name: Remove link.exe
      if: runner.os == 'Windows'
      shell: bash
      run: rm /usr/bin/link

    # LINUX STEPS
    - if: ${{ runner.os == 'Linux' && !(contains(inputs.toolkit_type, 'cuda') && fromJSON(inputs.toolkit_short_version) > 124) }}
      shell: bash
      run: |
        # Use GCC11 for ROCM / cu118 / cu124
        yum list installed
        yum install gcc-toolset-11-gcc gcc-toolset-11-gcc-c++ gcc-toolset-11-libstdc++-devel wget git -y
        echo "source /opt/rh/gcc-toolset-11/enable" >> ~/.profile

    - if: ${{ runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda') && fromJSON(inputs.toolkit_short_version) > 124 }}
      shell: bash
      run: |
        # Use GCC13 for cu126+
        yum list installed
        yum install gcc-toolset-13-gcc gcc-toolset-13-gcc-c++ gcc-toolset-13-libstdc++-devel wget git -y
        echo "source /opt/rh/gcc-toolset-13/enable" >> ~/.profile

    - if: runner.os == 'Linux'
      shell: bash -l {0}
      run: |
        yum list installed
        yum install wget git -y
        which g++
        g++ --version

    - if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda')
      name: (Linux) install cuda
      shell: bash -l {0}
      run: |
        wget -q "${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}" -O cuda.run && \
        sh ./cuda.run --silent --toolkit && \
        rm ./cuda.run
        echo "CUDA_HOME=/usr/local/cuda" >> ${GITHUB_ENV}

    - if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'cuda')
      name: (Linux) print cuda setup info
      shell: bash -l {0}
      run: |
        echo "CUDA_HOME=$CUDA_HOME"
        echo "###############################"
        echo "############ NVCC  ############"
        echo "###############################"
        $CUDA_HOME/bin/nvcc --version
        md5sum $CUDA_HOME/bin/nvcc
        echo "###############################"
        echo "############ PTXAS ############"
        echo "###############################"
        $CUDA_HOME/bin/ptxas --version
        md5sum $CUDA_HOME/bin/ptxas

    - if: runner.os == 'Linux' && contains(inputs.toolkit_type, 'rocm')
      name: (Linux) install rocm
      shell: bash
      run: |
        yum install -y libzstd
        yum install -y ${{ steps.cuda_info.outputs.CUDA_INSTALL_SCRIPT }}
        amdgpu-install -y --usecase=rocm --no-dkms
        echo "ROCM_PATH=/opt/rocm" >> ${GITHUB_ENV}
        echo "PATH=$PATH:/opt/rocm/bin" >> ${GITHUB_ENV}
        echo "MAX_JOBS=16" >> ${GITHUB_ENV}

    # host compiler is too new for cuda 12.1 :(
    - run: echo "NVCC_FLAGS=-allow-unsupported-compiler" >> $GITHUB_ENV
      shell: bash


================================================
FILE: .github/actions/setup-env-build/action.yml
================================================
name: Install env + build
inputs:
  arch:
    description: 'GPU architecture'
    required: true
  python:
    description: 'Python version'
    required: false
    default: "3.11"

runs:
  using: composite
  steps:
    - name: Cleanup
      shell: bash
      run: rm -f ~/.profile ~/.bashrc
    - id: prepare_conda_env_paths
      shell: python
      run: |
        import os
        import subprocess
        import hashlib
        import glob
        import datetime
        from pathlib import Path

        CONDA_INSTALL_CMD = "micromamba create python=${{ inputs.python }} zlib pip ninja ccache=4.8 -c conda-forge -q -y"

        conda_env_key = CONDA_INSTALL_CMD + "[cu130][v2]"
        for file in sorted(glob.glob("requirement*.txt")):
          conda_env_key += f"\n########## {file}\n"
          conda_env_key += Path(file).read_text()
        env_name_key = hashlib.sha224(conda_env_key.encode("ascii")).hexdigest()[:8]
        env_name_key += "-${{ inputs.arch }}"
        # Nightly or Test, update every week
        env_name_key += "-"+datetime.date.today().strftime("%Y-week%W")
        shared_dir = os.environ.get("GHRUNNER_SHARED_DIR", os.getcwd())
        env_path = os.path.join(shared_dir, "tmp", "${{ inputs.arch }}", os.environ["GITHUB_RUN_ID"])
        final_env = Path(shared_dir) / f"env_{env_name_key}.txt"
        pkg_dir = Path(shared_dir) / "pkgs-sm${{ inputs.arch }}"
        (Path(shared_dir) / f"env_{env_name_key}_content.txt").write_text(conda_env_key)
        CONDA_INSTALL_CMD += " -p " + env_path
        env_already_built = False
        # If environment is already built
        if final_env.is_file():
          final_env_link = final_env.read_text().strip()
          if (Path(final_env_link) / "bin" / "python").is_file():
            print("Found valid env - skipping env setup")
            CONDA_INSTALL_CMD = "true"
            env_already_built = True
            env_path = final_env_link
          else:
            print("Invalid env")
        with open(os.environ['GITHUB_ENV'], "r+") as fp:
            fp.write("CONDA_ENV_LINK=" + str(final_env) + "\n")
            fp.write("CONDA_PREFIX=" + env_path + "\n")
            fp.write("CONDA_PKGS_DIRS=" + str(pkg_dir) + "\n")
            fp.write("CONDA_INSTALL_CMD=" + CONDA_INSTALL_CMD + "\n")
            fp.write("CONDA_ENV_HASH=" + env_name_key + "\n")
            fp.write("PY=" + os.path.join(env_path, "bin", "python") + "\n")
            fp.write("PIP=" + os.path.join(env_path, "bin", "pip") + "\n")
        with open(os.environ['GITHUB_OUTPUT'], "r+") as fp:
          fp.write(f"ENV_CACHED={int(env_already_built)}\n")
    - name: Print conda commands
      shell: bash -l {0}
      run: |
        echo "CONDA_PREFIX=$CONDA_PREFIX"
        echo "CONDA_INSTALL_CMD=$CONDA_INSTALL_CMD"
        echo "CONDA_ENV_HASH=$CONDA_ENV_HASH"
        echo "PY=$PY"
    - name: Install micromamba
      shell: bash -l {0}
      run: |
        set -ex
        curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest -o micromamba.tar.bz2
        tar --extract --verbose --bzip2 --file=micromamba.tar.bz2 bin/micromamba
        echo "eval \"\$($(pwd)/bin/micromamba shell hook --shell bash)\"" >> ~/.profile
    - name: Conda/pip setup
      shell: bash -l {0}
      if: steps.prepare_conda_env_paths.outputs.ENV_CACHED == 0
      run: |
        set -ex
        micromamba config set channel_priority strict
        # Retry if failed after removing downloaded packages cache
        $CONDA_INSTALL_CMD || (rm -rf $CONDA_PKGS_DIRS && rm -rf $CONDA_PREFIX && $CONDA_INSTALL_CMD)
        $PY -m pip install cmake
        $PY -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
        $PY -m pip install -r requirements-benchmark.txt --progress-bar off
    - name: Activate environment
      shell: bash -l {0}
      run: |
        echo "micromamba activate $CONDA_PREFIX" >> ~/.profile
        echo "==== .profile ====="
        cat ~/.profile
    - run: which python
      shell: bash -l {0}
    - name: Setup ccache nvcc
      shell: bash -l {0}
      if: steps.prepare_conda_env_paths.outputs.ENV_CACHED == 0
      run: |
        echo "#!/bin/bash" > $CONDA_PREFIX/bin/nvcc-ccache
        echo "ccache nvcc \"\$@\"" >> $CONDA_PREFIX/bin/nvcc-ccache
        cat $CONDA_PREFIX/bin/nvcc-ccache
        chmod +x $CONDA_PREFIX/bin/nvcc-ccache
        which nvcc
        ccache --version

    - name: Setup ccache g++
      shell: bash -l {0}
      if: steps.prepare_conda_env_paths.outputs.ENV_CACHED == 0
      run: |
        echo "#!/bin/bash" > $CONDA_PREFIX/bin/g++-ccache
        echo "ccache g++ \"\$@\"" >> $CONDA_PREFIX/bin/g++-ccache
        cat $CONDA_PREFIX/bin/g++-ccache
        chmod +x $CONDA_PREFIX/bin/g++-ccache
        which g++-ccache

    - name: Patch for https://github.com/pytorch/pytorch/issues/114962
      shell: bash -l {0}
      run: |
        CPP_EXTENSIONS_PY=$(python -c "import torch.utils.cpp_extension; print(torch.utils.cpp_extension.__file__)")
        echo "Patching $CPP_EXTENSIONS_PY"
        sed -i "/generate-dependencies-with-compile/d" $CPP_EXTENSIONS_PY
    - name: Check NVIDIA libs
      shell: bash -l {0}
      run: |
        ldconfig -p | grep libcuda.so
        ls /.singularity.d/libs/
    - name: Mark env as ready
      shell: bash -l {0}
      if: steps.prepare_conda_env_paths.outputs.ENV_CACHED == 0
      run: echo $CONDA_PREFIX > $CONDA_ENV_LINK
    - name: Setup ccache
      shell: bash -l {0}
      run: |
        export CCACHE_DIR=$GHRUNNER_SHARED_DIR/ccache
        echo "CCACHE_DIR=$CCACHE_DIR" >> ${GITHUB_ENV}
        mkdir -p $CCACHE_DIR
        ccache -s
    - name: Build
      shell: bash -l {0}
      run: |
        PYTORCH_NVCC="$CONDA_PREFIX/bin/nvcc-ccache" CXX="g++-ccache" TORCH_CUDA_ARCH_LIST=${{ inputs.arch }} python -m pip install -v --no-build-isolation -e .
    - name: Check for PyTorch stable symbols
      shell: bash -l {0}
      run: |
        bad_symbols=$(nm --dynamic --undefined-only --demangle xformers/_C.so | grep --extended-regexp "(torch|at|c10|c10d)::" || true)
        if [[ $bad_symbols != "" ]]; then echo "These non-stable PyTorch symbols made it into the xFormers shared library:"; echo $bad_symbols; exit 1; fi
    - name: Build info
      run: |
        printenv
        python -m xformers.info
        python xformers/_triton_version_fairinternal.py
        ccache -s
      shell: bash -l {0}


================================================
FILE: .github/compute_wheel_version.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import subprocess
from pathlib import Path
from typing import Optional

# TODO: consolidate with the code in build_conda.py
THIS_PATH = Path(__file__).resolve()
version_from_file = (THIS_PATH.parents[1] / "version.txt").read_text().strip()


def get_tagged_version() -> Optional[str]:
    """
    Return whether we are at an exact version (namely the version variable).
    """
    try:
        tag = subprocess.check_output(
            ["git", "describe", "--tags", "--exact-match", "HEAD"],
            text=True,
            stderr=subprocess.DEVNULL,
        ).strip()
    except subprocess.CalledProcessError:  # no tag
        return None

    if not tag.startswith("v"):
        return None
    return tag[1:]


def get_dev_version() -> str:
    assert ".dev" not in version_from_file
    num_commits = subprocess.check_output(
        ["git", "rev-list", "--count", "HEAD"], text=True
    ).strip()
    return f"{version_from_file}.dev{num_commits}"


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--source", choices=["tag", "dev", "tag,dev"], required=False, default="tag,dev"
    )
    args = parser.parse_args()

    if "tag" in args.source:
        tagged_version = get_tagged_version()
        if args.source == "tag" and tagged_version is None:
            raise ValueError("No tag found")
    else:
        tagged_version = None
    if tagged_version is not None:
        print(tagged_version, end="")
    else:
        print(get_dev_version(), end="")


================================================
FILE: .github/gpu_benchmark_diff.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import glob
import os
import subprocess

import xformers.benchmarks.utils as utils


class NamedObject:
    def __init__(self, name) -> None:
        self.__name__ = name


def git_file_at(filename: str, ref: str) -> str:
    try:
        return subprocess.check_output(
            ["git", "show", f"{ref}:{filename}"], text=True
        ).strip()
    except subprocess.CalledProcessError:
        return ""  # File does not exist in that revision


GITHUB_BASE_REF = subprocess.check_output(
    ["git", "rev-parse", "origin/" + os.environ["GITHUB_BASE_REF"]], text=True
).strip()
XFORMERS_BENCHMARKS_CACHE = os.environ["XFORMERS_BENCHMARKS_CACHE"]
GITHUB_CURRENT_REF = subprocess.check_output(
    ["git", "rev-parse", "HEAD"], text=True
).strip()

for f in glob.glob(os.path.join(XFORMERS_BENCHMARKS_CACHE, "*", "*.csv")):
    before = git_file_at(f, ref=GITHUB_BASE_REF)
    now = git_file_at(f, ref=GITHUB_CURRENT_REF)
    if before == "" or before == now:
        continue
    benchmark_name = os.path.basename(os.path.dirname(f))

    print("#" * 100)
    print(f"# UPDATED: {f}")
    print("#" * 100)

    filename_before = f.replace("reference", "before")
    filename_now = f.replace("reference", "now")
    with open(filename_before, "w+") as fd:
        fd.write(before)
    with open(filename_now, "w+") as fd:
        fd.write(now)
    utils.benchmark_run_and_compare(
        benchmark_fn=NamedObject(benchmark_name),
        cases=[],
        compare=[
            os.path.basename(filename_before)[: -len(".csv")],
            os.path.basename(filename_now)[: -len(".csv")],
        ],
    )


================================================
FILE: .github/run-clang-format.py
================================================
#!/usr/bin/env python3
"""
MIT License
Copyright (c) 2017 Guillaume Papin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
"""

"""A wrapper script around clang-format, suitable for linting multiple files
and to use for continuous integration.
This is an alternative API for the clang-format command line.
It runs over multiple files and directories in parallel.
A diff output is produced and a sensible exit code is returned.
"""

import argparse  # noqa: E402
import difflib  # noqa: E402
import fnmatch  # noqa: E402
import io  # noqa: E402
import multiprocessing  # noqa: E402
import os  # noqa: E402
import signal  # noqa: E402
import subprocess  # noqa: E402
import sys  # noqa: E402
import traceback  # noqa: E402
from functools import partial  # noqa: E402
from subprocess import DEVNULL  # noqa: E402

DEFAULT_EXTENSIONS = "c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx,cu"


class ExitStatus:
    SUCCESS = 0
    DIFF = 1
    TROUBLE = 2


def list_files(files, recursive=False, extensions=None, exclude=None):
    if extensions is None:
        extensions = []
    if exclude is None:
        exclude = []

    out = []
    for file in files:
        if recursive and os.path.isdir(file):
            for dirpath, dnames, fnames in os.walk(file):
                fpaths = [os.path.join(dirpath, fname) for fname in fnames]
                for pattern in exclude:
                    # os.walk() supports trimming down the dnames list
                    # by modifying it in-place,
                    # to avoid unnecessary directory listings.
                    dnames[:] = [
                        x
                        for x in dnames
                        if not fnmatch.fnmatch(os.path.join(dirpath, x), pattern)
                    ]
                    fpaths = [x for x in fpaths if not fnmatch.fnmatch(x, pattern)]
                for f in fpaths:
                    ext = os.path.splitext(f)[1][1:]
                    if ext in extensions:
                        out.append(f)
        else:
            out.append(file)
    return out


def make_diff(file, original, reformatted):
    return list(
        difflib.unified_diff(
            original,
            reformatted,
            fromfile="a/{}\t(original)".format(file),
            tofile="b/{}\t(reformatted)".format(file),
            n=3,
        )
    )


class DiffError(Exception):
    def __init__(self, message, errs=None):
        super(DiffError, self).__init__(message)
        self.errs = errs or []


class UnexpectedError(Exception):
    def __init__(self, message, exc=None):
        super(UnexpectedError, self).__init__(message)
        self.formatted_traceback = traceback.format_exc()
        self.exc = exc


def run_clang_format_diff_wrapper(args, file):
    try:
        ret = run_clang_format_diff(args, file)
        return ret
    except DiffError:
        raise
    except Exception as e:
        raise UnexpectedError("{}: {}: {}".format(file, e.__class__.__name__, e), e)


def run_clang_format_diff(args, file):
    try:
        with io.open(file, "r", encoding="utf-8") as f:
            original = f.readlines()
    except IOError as exc:
        raise DiffError(str(exc))
    invocation = [args.clang_format_executable, file]

    # Use of utf-8 to decode the process output.
    #
    # Hopefully, this is the correct thing to do.
    #
    # It's done due to the following assumptions (which may be incorrect):
    # - clang-format will returns the bytes read from the files as-is,
    #   without conversion, and it is already assumed that the files use utf-8.
    # - if the diagnostics were internationalized, they would use utf-8:
    #   > Adding Translations to Clang
    #   >
    #   > Not possible yet!
    #   > Diagnostic strings should be written in UTF-8,
    #   > the client can translate to the relevant code page if needed.
    #   > Each translation completely replaces the format string
    #   > for the diagnostic.
    #   > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation

    try:
        proc = subprocess.Popen(
            invocation,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            encoding="utf-8",
        )
    except OSError as exc:
        raise DiffError(
            "Command '{}' failed to start: {}".format(
                subprocess.list2cmdline(invocation), exc
            )
        )
    proc_stdout = proc.stdout
    proc_stderr = proc.stderr

    # hopefully the stderr pipe won't get full and block the process
    outs = list(proc_stdout.readlines())
    errs = list(proc_stderr.readlines())
    proc.wait()
    if proc.returncode:
        raise DiffError(
            "Command '{}' returned non-zero exit status {}".format(
                subprocess.list2cmdline(invocation), proc.returncode
            ),
            errs,
        )
    return make_diff(file, original, outs), errs


def bold_red(s):
    return "\x1b[1m\x1b[31m" + s + "\x1b[0m"


def colorize(diff_lines):
    def bold(s):
        return "\x1b[1m" + s + "\x1b[0m"

    def cyan(s):
        return "\x1b[36m" + s + "\x1b[0m"

    def green(s):
        return "\x1b[32m" + s + "\x1b[0m"

    def red(s):
        return "\x1b[31m" + s + "\x1b[0m"

    for line in diff_lines:
        if line[:4] in ["--- ", "+++ "]:
            yield bold(line)
        elif line.startswith("@@ "):
            yield cyan(line)
        elif line.startswith("+"):
            yield green(line)
        elif line.startswith("-"):
            yield red(line)
        else:
            yield line


def print_diff(diff_lines, use_color):
    if use_color:
        diff_lines = colorize(diff_lines)
    sys.stdout.writelines(diff_lines)


def print_trouble(prog, message, use_colors):
    error_text = "error:"
    if use_colors:
        error_text = bold_red(error_text)
    print("{}: {} {}".format(prog, error_text, message), file=sys.stderr)


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--clang-format-executable",
        metavar="EXECUTABLE",
        help="path to the clang-format executable",
        default="clang-format",
    )
    parser.add_argument(
        "--extensions",
        help="comma separated list of file extensions (default: {})".format(
            DEFAULT_EXTENSIONS
        ),
        default=DEFAULT_EXTENSIONS,
    )
    parser.add_argument(
        "-r",
        "--recursive",
        action="store_true",
        help="run recursively over directories",
    )
    parser.add_argument("files", metavar="file", nargs="+")
    parser.add_argument("-q", "--quiet", action="store_true")
    parser.add_argument(
        "-j",
        metavar="N",
        type=int,
        default=0,
        help="run N clang-format jobs in parallel" " (default number of cpus + 1)",
    )
    parser.add_argument(
        "--color",
        default="auto",
        choices=["auto", "always", "never"],
        help="show colored diff (default: auto)",
    )
    parser.add_argument(
        "-e",
        "--exclude",
        metavar="PATTERN",
        action="append",
        default=[],
        help="exclude paths matching the given glob-like pattern(s)"
        " from recursive search",
    )

    args = parser.parse_args()

    # use default signal handling, like diff return SIGINT value on ^C
    # https://bugs.python.org/issue14229#msg156446
    signal.signal(signal.SIGINT, signal.SIG_DFL)
    try:
        signal.SIGPIPE
    except AttributeError:
        # compatibility, SIGPIPE does not exist on Windows
        pass
    else:
        signal.signal(signal.SIGPIPE, signal.SIG_DFL)

    colored_stdout = False
    colored_stderr = False
    if args.color == "always":
        colored_stdout = True
        colored_stderr = True
    elif args.color == "auto":
        colored_stdout = sys.stdout.isatty()
        colored_stderr = sys.stderr.isatty()

    version_invocation = [args.clang_format_executable, str("--version")]
    try:
        subprocess.check_call(version_invocation, stdout=DEVNULL)
    except subprocess.CalledProcessError as e:
        print_trouble(parser.prog, str(e), use_colors=colored_stderr)
        return ExitStatus.TROUBLE
    except OSError as e:
        print_trouble(
            parser.prog,
            "Command '{}' failed to start: {}".format(
                subprocess.list2cmdline(version_invocation), e
            ),
            use_colors=colored_stderr,
        )
        return ExitStatus.TROUBLE

    retcode = ExitStatus.SUCCESS
    files = list_files(
        args.files,
        recursive=args.recursive,
        exclude=args.exclude,
        extensions=args.extensions.split(","),
    )

    if not files:
        return

    njobs = args.j
    if njobs == 0:
        njobs = multiprocessing.cpu_count() + 1
    njobs = min(len(files), njobs)

    if njobs == 1:
        # execute directly instead of in a pool,
        # less overhead, simpler stacktraces
        it = (run_clang_format_diff_wrapper(args, file) for file in files)
        pool = None
    else:
        pool = multiprocessing.Pool(njobs)
        it = pool.imap_unordered(partial(run_clang_format_diff_wrapper, args), files)
    while True:
        try:
            outs, errs = next(it)
        except StopIteration:
            break
        except DiffError as e:
            print_trouble(parser.prog, str(e), use_colors=colored_stderr)
            retcode = ExitStatus.TROUBLE
            sys.stderr.writelines(e.errs)
        except UnexpectedError as e:
            print_trouble(parser.prog, str(e), use_colors=colored_stderr)
            sys.stderr.write(e.formatted_traceback)
            retcode = ExitStatus.TROUBLE
            # stop at the first unexpected error,
            # something could be very wrong,
            # don't process all files unnecessarily
            if pool:
                pool.terminate()
            break
        else:
            sys.stderr.writelines(errs)
            if outs == []:
                continue
            if not args.quiet:
                print_diff(outs, use_color=colored_stdout)
            if retcode == ExitStatus.SUCCESS:
                retcode = ExitStatus.DIFF
    return retcode


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: .github/run_benchmark_wrapper.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import glob
import os
import shlex
import subprocess
import sys

import torch

import xformers

# Build failed - return early
if not xformers._has_cpp_library:
    print("xFormers wasn't built correctly - can't run benchmarks")
    sys.exit(0)

benchmark_script = os.path.join("xformers", "benchmarks", sys.argv[1])
benchmark_fn = sys.argv[2]
label = subprocess.check_output(["git", "rev-parse", "HEAD"], text=True).strip()[:8]
cmd = [
    sys.executable,
    benchmark_script,
    "--label",
    label,
    "--fn",
    benchmark_fn,
    "--fail_if_regression",
    "--quiet",
]
env = (
    torch.cuda.get_device_name(torch.cuda.current_device())
    .replace(" ", "_")
    .replace("-", "_")
    .replace(".", "_")
)

# Figure out the name of the baseline
pattern = os.path.join(os.environ["XFORMERS_BENCHMARKS_CACHE"], benchmark_fn, "*.csv")
ref_names = glob.glob(pattern)
baseline_names = set(
    os.path.basename(s)[: -len(".csv")]
    for s in ref_names
    # Only compare to benchmark data on same hardware
    if env in os.path.basename(s)
)
if baseline_names:
    if len(baseline_names) > 1:
        raise RuntimeError(
            f"Supplied more than one reference for this benchmark: {','.join(baseline_names)}"
        )
    cmd += ["--compare", ",".join(baseline_names)]

print("EXEC:", shlex.join(cmd))

retcode = 0
try:
    subprocess.check_call(cmd)
except subprocess.CalledProcessError as e:
    retcode = e.returncode

# Remove original benchmark files
for f in ref_names:
    os.remove(f)
# Rename new ones as 'ref'
for f in glob.glob(pattern):
    os.rename(f, f.replace(label, "reference"))

sys.exit(retcode)


================================================
FILE: .github/selective_ci/requirements.txt
================================================
GitPython


================================================
FILE: .github/selective_ci/selective_ci.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#
# This source code is licensed under the BSD license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import fnmatch
import os
from dataclasses import dataclass, field
from pathlib import Path

import git


@dataclass
class ComponentInfo:
    """
    A component is deemed to have changed if any of its
    files or dependencies have changed.
    If it has not changed, its files will be removed.
    """

    name: str
    # These files will be deleted if the component is not enabled
    files: list[str]
    dependencies: list[str]
    disable_set_env: dict[str, str] = field(default_factory=dict)


COMMON_PATTERNS = [
    # All components will be tested if something in there changes
    "setup.py",
]

COMPONENTS = [
    ComponentInfo(
        name="attention",
        files=[
            "tests/test_mem_eff_attention.py",
            "tests/test_find_sparse_locations*.py",
            "tests/test_block_sparse_mem_eff_attention*.py",
            "tests/test_attention_patterns.py",
            "tests/test_rope_padded.py",
            "tests/test_tree_attention*.py",
            "tests/test_fmha*.py",
        ],
        dependencies=[
            "xformers/ops/fmha/*",
            "third_party/cutlass",
            "third_party/composable_kernel_tiled",
            "xformers/csrc/attention/*",
            "xformers/triton/*",
        ],
        disable_set_env={
            "XFORMERS_DISABLE_FLASH_ATTN": "1",
        },
    ),
    ComponentInfo(
        name="sp24",
        files=[
            "tests/test_sparsity24.py",
            "xformers/csrc/sparse24/*",
        ],
        dependencies=[
            "xformers/ops/sp24.py",
        ],
    ),
    ComponentInfo(
        name="sequence_parallel_fused",
        files=[
            "tests/test_seqpar.py",
            "tests/test_sequence_parallel_fused_ops.py",
            "tests/test_tiled_matmul.py",
        ],
        dependencies=[
            "tests/multiprocessing_utils.py",
            "xformers/ops/sequence_parallel_fused_ops.py",
        ],
    ),
]

repo_root_path = Path(__file__).parent.parent.parent.resolve().absolute()
repo = git.Repo(repo_root_path)


def list_files_in_commit(commit: git.Commit):
    file_list = []
    stack = [commit.tree]
    while len(stack) > 0:
        tree = stack.pop()
        # enumerate blobs (files) at this level
        for b in tree.blobs:
            file_list.append(str(Path(b.path).absolute().relative_to(repo_root_path)))
        for subtree in tree.trees:
            stack.append(subtree)
    # you can return dir_list if you want directories too
    return file_list


def check_patterns_are_valid(patterns):
    # Only check patterns in `fairinternal` repo
    if os.environ.get("GITHUB_REPOSITORY", "") != "fairinternal/xformers":
        return
    found_patterns = set()
    for f in all_files:
        for pattern in patterns:
            if fnmatch.fnmatch(f, pattern):
                found_patterns.add(pattern)
    for pattern in patterns:
        if pattern not in found_patterns:
            assert False, f"Pattern does not match any file: `{pattern}`"


parser = argparse.ArgumentParser("xFormers selective CI")
parser.add_argument("--base_commit", default="origin/main")
args = parser.parse_args()

base_commit = repo.rev_parse(args.base_commit)
all_files = list_files_in_commit(repo.head.commit) + [sm.path for sm in repo.submodules]
all_modified_files = set()
for item in repo.head.commit.diff(base_commit):
    if item.a_path is not None:
        all_modified_files.add(item.a_path)
    if item.b_path is not None:
        all_modified_files.add(item.b_path)

check_patterns_are_valid(COMMON_PATTERNS)
for component in COMPONENTS:
    # Sanity check that all files exist
    check_patterns_are_valid(component.files + component.dependencies)

    # Check if module is updated
    skip_module = True
    for pattern in COMMON_PATTERNS + component.files + component.dependencies:
        for f in all_modified_files:
            if fnmatch.fnmatch(f, pattern):
                skip_module = False
                break
    print(component.name, "SKIP" if skip_module else "TEST")
    if not skip_module:
        continue

    # Delete component files
    for f in all_files:
        for pattern in component.files:
            if fnmatch.fnmatch(f, pattern):
                if Path(f).exists():
                    Path(f).unlink()

    # Set env variable
    for env_k, env_v in component.disable_set_env.items():
        if "GITHUB_ENV" not in os.environ:
            print(f"{env_k}={env_v}")
            continue
        with open(os.environ["GITHUB_ENV"], "a") as fd:
            fd.write(f"{env_k}={env_v}\n")


================================================
FILE: .github/workflows/gh-pages.yml
================================================
name: Build & deploy documentation

on:
  push:
    branches:
      - main
  pull_request:

jobs:
  deploy:
    runs-on: ubuntu-24.04
    concurrency:
      group: ${{ github.workflow }}-${{ github.ref }}

    steps:
      - uses: actions/checkout@v2

      - name: Setup Python
        uses: actions/setup-python@v2
        with:
          python-version: '3.9'

      - name: Upgrade pip
        run: |
          # install pip=>20.1 to use "pip cache dir"
          python3 -m pip install --upgrade pip

      - name: Get pip cache dir
        id: pip-cache
        run: echo "::set-output name=dir::$(pip cache dir)"

      - name: Cache dependencies
        uses: actions/cache@v4
        with:
          path: ${{ steps.pip-cache.outputs.dir }}
          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
          restore-keys: |
            ${{ runner.os }}-pip-

      - name: Build docs

        run: |
          cd docs
          pip install --progress-bar off -r requirements.txt
          make help
          make html

      - name: Deploy
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: docs/build/html
        if: github.event_name != 'pull_request'


================================================
FILE: .github/workflows/gpu_test_gh.yml
================================================
name: gpu_test_gh

on:
  workflow_dispatch: {}
  pull_request:
    paths:
      - "xformers/**"
      - "!xformers/benchmarks/**"
      - "!xformers/version.txt"
      - ".github/workflows/gpu_test_gh*"
      - "tests/**"
      - "setup.py"
      - "requirements*.txt"
      - "third_party/**"
  push:
    branches:
      - main

env:
  XFORMERS_BUILD_TYPE: "Release"
  CI: "1"
  TORCHINDUCTOR_COMPILE_THREADS: "1"

jobs:
  gpu_test_gh:
    strategy:
      fail-fast: false
      matrix:
        gpu:
          - runner: "h100-256GB"
            sm: "9.0a"
          - runner: "4-core-ubuntu-gpu-t4"
            sm: "7.5"
        python: [3.11]

    name: test_sm${{ matrix.gpu.sm }}
    runs-on: ${{ matrix.gpu.runner }}

    timeout-minutes: 360
    defaults:
      run:
        shell: bash -l {0}
    steps:
      - name: Recursive checkout
        uses: actions/checkout@v3
        with:
          submodules: recursive
          path: "."
          fetch-depth: 0 # We need commits history as well
      - run: nvidia-smi
      - name: Install micromamba
        run: |
          set -ex
          curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
          echo "eval \"\$($(pwd)/bin/micromamba shell hook --shell bash)\"" >> ~/.profile
          cat ~/.profile
      - name: Create environment
        run: |
          set -ex
          micromamba config set channel_priority strict
          micromamba create -n env python=${{ matrix.python }} \
            zlib pip ninja ccache=4.8 cuda-toolkit \
            -c "nvidia/label/cuda-12.6" -c conda-forge -q -y
      - name: Activate environment
        shell: bash -l {0}
        run: |
          echo "micromamba activate env" >> ~/.profile
          echo "==== .profile ====="
          cat ~/.profile
      - name: Selective build/tests
        if: github.event_name == 'pull_request'
        run: |
          pip install -r .github/selective_ci/requirements.txt
          python .github/selective_ci/selective_ci.py --base_commit ${{ github.event.pull_request.base.sha }}
      - name: Setup test requirements
        run: |
          which python
          which nvcc
          pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126
          pip install --pre flash_attn_3 --index-url https://download.pytorch.org/whl/cu126
          pip install -r requirements-test.txt --progress-bar off
      - run: TORCH_CUDA_ARCH_LIST=${{ matrix.gpu.sm }} python -m pip install -v --no-build-isolation -e .
        env:
          TORCH_DONT_CHECK_COMPILER_ABI: 1
      - run: python -m xformers.info
      - name: xFormers import should not init cuda context
        run: |
          # NOTE: we check GPU version by default to determine if triton should be used
          # and this initializes CUDA context, unless we set `XFORMERS_ENABLE_TRITON`
          XFORMERS_ENABLE_TRITON=1 python -c "import xformers; import xformers.ops; import torch; assert not torch.cuda.is_initialized()"
      - name: Check for PyTorch stable symbols
        run: |
          bad_symbols=$(nm --dynamic --undefined-only --demangle xformers/_C.so | grep --extended-regexp "(torch|at|c10|c10d)::" || true)
          if [[ $bad_symbols != "" ]]; then echo "These non-stable PyTorch symbols made it into the xFormers shared library:"; echo $bad_symbols; exit 1; fi
      - name: Unit tests
        run: |
          python -m pytest --verbose --random-order-bucket=global --maxfail=20 --junitxml=test-results/junit.xml --cov-report=xml --cov=./ tests
      - name: Publish Test Report
        uses: mikepenz/action-junit-report@v3
        if: success() || failure() # always run even if the previous step fails
        with:
          report_paths: 'test-results/*.xml'


================================================
FILE: .github/workflows/linters.yml
================================================
on:
  pull_request: {}
  push:
    branches:
      - main

jobs:
  repo:
    uses: ./.github/workflows/linters_reusable.yml


================================================
FILE: .github/workflows/linters_reusable.yml
================================================
name: lint

on:
  workflow_call:
    inputs:
      pre-script:
        type: string

jobs:
  linters:
    runs-on: ubuntu-22.04

    steps:
      - uses: actions/checkout@v3
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.10'
      - name: Cleanup host
        run: |
          # Github's ubuntu-latest comes with a ton of stuff;
          # https://carlosbecker.com/posts/github-actions-disk-space suggests
          # this hotfix:
          df -h
          sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL
          sudo docker image prune --all --force
          sudo docker builder prune -a
          df -h
      - name: Run pre-script
        if: ${{ inputs.pre-script }}
        run: ${{ inputs.pre-script }}
      # Triton is too slow to install, and beside it's not needed
      - run: sed -i '/triton/d' requirements-test.txt
      - name: Install deps
        run: pip install -r requirements-test.txt
      - name: ufmt
        if: success() || failure()
        run: ufmt check
      - name: mypy
        if: success() || failure()
        run: |
          python -m mypy --version
          python -m mypy --ignore-missing-imports --scripts-are-modules --pretty --exclude "(build|stubs|third_party|docs|examples|setup.py)" .
      - name: flake8
        if: success() || failure()
        run: python -m flake8 --config .flake8 --show-source --statistics
      - name: clang-format
        if: success() || failure()
        run: |
          pip install clang-format
          clang-format --version

          # apply to our files - excluding autogenerated files
          ./.github/run-clang-format.py -e "*fmha/autogen" -r xformers/csrc
      - name: PyTorch stable API includes
        if: success() || failure()
        run: |
          bad_files=$(git grep --extended-regex -e "#\s*include\s*<.*(torch|ATen|c10)" --and --not -e "#\s*include\s*<torch/(headeronly|csrc/stable)/" --files-with-matches -- ':(exclude)xformers/csrc/attention/hip_*' || true)
          if [[ $bad_files != "" ]]; then echo "These files contain non-stable PyTorch includes:"; echo $bad_files; exit 1; fi


================================================
FILE: .github/workflows/rocm_build.yml
================================================
name: rocm-build

on:
  push:
    branches:
      - develop
  pull_request:
    paths:
      - ".github/compute_wheel_version.py"
      - ".github/workflows/rocm_build.yml"
      - ".github/workflows/wheels_build.yml"
      - "setup.py"
      - "requirements*.txt"
      - "xformers/csrc/attention/hip_fmha/**"
      - "third_party/composable_kernel_tiled/**"
  workflow_dispatch:

jobs:
  build:
    strategy:
      fail-fast: false
      matrix:
        os: ['ubuntu-alola']
        python: ['3.11']
        torch_version: ['2.10.0']
        toolkit_type: ['rocm']
        toolkit_short_version: ['7.0', '7.1']

    uses: ./.github/workflows/wheels_build.yml
    if: github.repository == 'rocm/xformers'
    with:
      os: ${{ matrix.os }}
      python: ${{ matrix.python }}
      torch_version: ${{ matrix.torch_version }}
      toolkit_type: ${{ matrix.toolkit_type }}
      toolkit_short_version: ${{ matrix.toolkit_short_version }}
      artifact_tag: ${{ github.run_id }}

  clean:
    runs-on: 'ubuntu-alola'
    if: ${{ needs.build.result != 'skipped' }}
    needs: [build]
    steps:
      - name: Remove dangling Docker images
        run: |
          docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi


================================================
FILE: .github/workflows/rocm_ci.yml
================================================
name: rocm-ci

on:
  pull_request:
    types: [labeled, synchronize, reopened]
  workflow_dispatch: {}
  push:
    branches:
      - main
      - develop

jobs:
  build:
    if: github.repository == 'rocm/xformers'
    runs-on: self-hosted-rocm-ci
    container:
      image: 'rocm/pytorch-nightly:latest'
      options: ' --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 8G --memory 32G '
    steps:
    - uses: actions/checkout@v4
      with:
        path: '_xformers'
        submodules: 'recursive'
        set-safe-directory: true
        fetch-depth: 0
    - name: Get CPU info on Ubuntu
      if: contains(runner.os, 'linux')
      run: |
        cat /proc/cpuinfo
    - name: Get env vars
      run: |
        echo GITHUB_WORKFLOW   = $GITHUB_WORKFLOW
        echo HOME              = $HOME
        echo PWD               = $PWD
        echo GITHUB_ACTION     = $GITHUB_ACTION
        echo GITHUB_ACTIONS    = $GITHUB_ACTIONS
        echo GITHUB_REPOSITORY = $GITHUB_REPOSITORY
        echo GITHUB_EVENT_NAME = $GITHUB_EVENT_NAME
        echo GITHUB_EVENT_PATH = $GITHUB_EVENT_PATH
        echo GITHUB_WORKSPACE  = $GITHUB_WORKSPACE
        echo GITHUB_SHA        = $GITHUB_SHA
        echo GITHUB_REF        = $GITHUB_REF

        export GIT_BRANCH=${GITHUB_BASE_REF:-${GITHUB_REF#refs/heads/}}
        echo GIT_BRANCH        = $GIT_BRANCH

        export ROCM_PATH=/opt/rocm
        echo ROCM_PATH         = $ROCM_PATH

        hipcc --version
        rocm-smi
        rocminfo | grep "gfx"

    - name: Setup build env
      run: |
        conda create -n xformers python=3.11
        export PATH=/opt/conda/envs/xformers/bin:$PATH
        python -VV

        python -m pip install -U torch==2.10.0 --index-url=https://download.pytorch.org/whl/rocm7.1
        python -c "import torch; print(f'PyTorch version {torch.__version__}')"

        python -m pip install ninja scipy pytest pytest-html

    - name: Pre-build clean
      run: |
        cd _xformers
        git clean -ffdx
        cd ..

    - name: Build xformers
      run: |
        export PATH=/opt/conda/envs/xformers/bin:$PATH
        export MAX_JOBS=20

        python -m pip install -e ./_xformers --verbose
        python -m xformers.info

    - name: Run python tests
      run: |
        export PATH=/opt/conda/envs/xformers/bin:$PATH

        python -m pytest --html=test_mem_eff_attention.html --self-contained-html -rpfs ./_xformers/tests/test_mem_eff_attention.py

    - name: Archive logs
      if: '!cancelled()'
      uses: actions/upload-artifact@v4
      with:
        name: test results
        path: test_mem_eff_attention.html

    - name: Post-build clean
      if: '!cancelled()'
      run: |
        cd _xformers
        git clean -ffdx
        cd ..

  clean:
    runs-on: self-hosted-rocm-ci
    if: ${{ needs.build.result != 'skipped' }}
    needs: [build]
    steps:
      - name: Remove dangling Docker images
        run: |
          docker images -q -f dangling=true | xargs --no-run-if-empty docker rmi


================================================
FILE: .github/workflows/rocm_docker.yml
================================================
name: Build and Publish ROCm Docker Image

on:
  push:
    branches:
      - develop

jobs:
  build-and-push:
    runs-on: rocm
    if: github.repository == 'rocm/xformers'
    steps:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
          username: ${{ vars.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Build and push
        uses: docker/build-push-action@v6
        with:
          push: true
          tags: rocm/xformers:latest
          file: Dockerfile.rocm


================================================
FILE: .github/workflows/wheels.yml
================================================
name: wheels

on:
  pull_request:
    paths:
      - ".github/compute_wheel_version.py"
      - ".github/workflows/wheel*"
      - ".github/actions/setup-build-cuda/action.yml"
      - "setup.py"
      - "requirements*.txt"
  push:
    branches:
      - main
    tags:
      - "v[0-9]+*"

jobs:
  target_determinator:
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    steps:
    - id: set-matrix
      shell: python
      run: |
        import os
        import json
        import itertools
        environ = os.environ

        # All builds are python-version agnostic,
        # and built with python 3.10
        PYTHON_VERSION = "3.10"
        # NOTE: Don't forget to update `upload_pt`'s matrix
        # when changing the CUDA/ROCM versions below!
        CU_VERSIONS = ['126', '128', '130']
        ROCM_VERSIONS = ['7.1']

        include = []
        for os in ['8-core-ubuntu', 'windows-8-core']:
          for torch_version in ['2.10.0']:
            # CUDA builds
            for cuda_short_version in CU_VERSIONS:
              if cuda_short_version < "124" and "windows" in os:
                print("Windows builder no longer compatible with cu<124")
                continue
              include.append(dict(
                os=os,
                python=PYTHON_VERSION,
                torch_version=torch_version,
                toolkit_type="cuda",
                toolkit_short_version=cuda_short_version,
              ))
              print(include[-1])
            # ROCM builds
            for rocm_short_version in ROCM_VERSIONS:
              if os == 'windows-8-core':
                continue
              include.append(dict(
                os="16-core-ubuntu",  # use for ROCm wheels only to avoid CI timeouts
                python=PYTHON_VERSION,
                torch_version=torch_version,
                toolkit_type="rocm",
                toolkit_short_version=rocm_short_version,
              ))
              print(include[-1])
        matrix = {'include': include}
        print(json.dumps(matrix))
        with open(environ["GITHUB_OUTPUT"], "a") as fd:
          fd.write("matrix="+json.dumps(matrix))
  build:
    needs: target_determinator
    strategy:
      fail-fast: false
      matrix: ${{ fromJson(needs.target_determinator.outputs.matrix) }}

    uses: ./.github/workflows/wheels_build.yml
    if: github.repository == 'facebookresearch/xformers' || github.event_name == 'pull_request'
    with:
      os: ${{ matrix.os }}
      python: ${{ matrix.python }}
      torch_version: ${{ matrix.torch_version }}
      toolkit_type: ${{ matrix.toolkit_type }}
      toolkit_short_version: ${{ matrix.toolkit_short_version }}

  upload_pip:
    needs: build
    uses: ./.github/workflows/wheels_upload_pip.yml
    with:
      twine_username: __token__
      filter: "*torch2.10.0+cu128*"
      execute: ${{ github.repository == 'facebookresearch/xformers' && github.event_name != 'pull_request' }}
    secrets:
      twine_password: ${{ secrets.PYPI_TOKEN }}

  upload_pt:
    needs: build
    strategy:
      fail-fast: false
      matrix:
        suffix:
          - cu126
          - cu128
          - cu130
          - rocm7.1
    uses: ./.github/workflows/wheels_upload_s3.yml
    with:
      aws_role: "arn:aws:iam::749337293305:role/pytorch_bot_uploader_role"
      s3_path: s3://pytorch/whl/${{ matrix.suffix }}/
      aws_s3_cp_extra_args: --acl public-read
      filter: "*torch2.10.0+${{ matrix.suffix }}*"
      execute: ${{ github.repository == 'facebookresearch/xformers' && github.ref_type == 'tag' }}


================================================
FILE: .github/workflows/wheels_build.yml
================================================
name: wheels_build

on:
  workflow_call:
    inputs:
      os:
        required: true
        type: string
      python:
        required: true
        type: string
      torch_version:
        required: true
        type: string
        description: "Example: 1.13.1"
      toolkit_type:
        required: true
        type: string
        description: "Example: cuda for cuda, rocm for rocm"
      toolkit_short_version:
        required: true
        type: string
        description: "Example: 117 for 11.7"
      artifact_tag:
        default: "facebookresearch"
        type: string

# this yaml file can be cleaned up using yaml anchors, but they're not supported in github actions yet
# https://github.com/actions/runner/issues/1182

env:
  # you need at least cuda 5.0 for some of the stuff compiled here.
  TORCH_CUDA_ARCH_LIST: ${{ contains(inputs.toolkit_type, 'cuda') && '7.5 8.0+PTX' || '' }}
  HIP_ARCHITECTURES: ${{ contains(inputs.toolkit_type, 'rocm') && 'gfx90a gfx942' || '' }}
  MAX_JOBS: ${{ contains(inputs.os, 'ubuntu') && '2' || '3' }} # (FA3 is memory hungry!)
  DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
  XFORMERS_BUILD_TYPE: "Release"
  TWINE_USERNAME: __token__
  XFORMERS_PACKAGE_FROM: "wheel-${{ github.ref_name }}"

jobs:
  build:
    name: ${{ contains(inputs.os, 'ubuntu') && 'ubuntu' || 'win' }}-py${{ inputs.python }}-pt${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}
    runs-on: ${{ inputs.os }}
    env:
      # alias for the current python version
      # windows does not have per version binary, it is just 'python3'
      PY: python${{ contains(inputs.os, 'ubuntu') && inputs.python || '3' }}

    container: ${{ contains(inputs.os, 'ubuntu') && 'quay.io/pypa/manylinux_2_28_x86_64' || null }}
    timeout-minutes: 360
    defaults:
      run:
        shell: bash
    steps:
      - if: contains(inputs.toolkit_type, 'cuda') && fromJSON(inputs.toolkit_short_version) >= 120 && fromJSON(inputs.toolkit_short_version) < 130
        run: |
          echo "TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST 8.0 9.0a" >> ${GITHUB_ENV}

      - if: contains(inputs.toolkit_type, 'cuda') && fromJSON(inputs.toolkit_short_version) >= 130
        run: |
          echo "TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST 8.0 9.0a 10.0a 10.3a 11.0a 12.0a 12.1a" >> ${GITHUB_ENV}

      - if: runner.os == 'Windows'
        run: git config --system core.longpaths true
      - name: Recursive checkout
        uses: actions/checkout@v4
        with:
          submodules: recursive
          path: "."
          fetch-depth: 0 # for tags

      - name: HACKFIX for cutlass compiler bug
        if: runner.os == 'Windows'
        run: |
          # See https://github.com/NVIDIA/cutlass/issues/1732
          rm -f third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp
          touch third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp
      - name: Setup Runner
        uses: ./.github/actions/setup-build-cuda
        with:
          toolkit_type: ${{ inputs.toolkit_type }}
          toolkit_short_version: ${{ inputs.toolkit_short_version }}
          python: ${{ inputs.python }}
      - if: runner.os == 'Linux'
        run: printenv

      - if: runner.os != 'Windows'
        name: (Linux) Setup venv for linux
        shell: bash -l {0}
        run: |
          $PY -m venv venv
          . ./venv/bin/activate
          which pip
          echo "PY=$(which python)" >> ${GITHUB_ENV}
          echo "PATH=$PATH" >> ${GITHUB_ENV}
          git config --global --add safe.directory "*"
          pip install packaging ninja wheel setuptools twine

      - name: Define version
        id: xformers_version
        env:
          VERSION_SOURCE: ${{ github.ref_type == 'tag' && 'tag' || 'dev'  }}
        run: |
          set -Eeuo pipefail
          git config --global --add safe.directory "*"
          version=`python .github/compute_wheel_version.py --source $VERSION_SOURCE`
          echo $version > version.txt
          echo "BUILD_VERSION=$version${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }}" >> ${GITHUB_ENV}
          echo "BUILD_VERSION=$version${{ steps.cuda_info.outputs.CUDA_VERSION_SUFFIX }}" >> ${GITHUB_OUTPUT}
          which ninja
          ninja --version
          cat ${GITHUB_ENV}
      - run: echo "xformers-${BUILD_VERSION}"
      - run: echo "release version (will upload to PyTorch)"
        if: ${{ !contains(steps.xformers_version.outputs.BUILD_VERSION, '.dev') }}

      - name: Install corresponding PyTorch
        run: |
          PYTORCH_INDEX_URL="https://download.pytorch.org/whl/${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}"
          $PY -m pip install wheel -r requirements.txt --extra-index-url $PYTORCH_INDEX_URL

      - name: Build wheel
        shell: bash -l {0}
        run: |
          $PY setup.py bdist_wheel -d dist/ -k $PLAT_ARG
        env:
          PLAT_ARG: ${{ contains(inputs.os, 'ubuntu') && '--plat-name manylinux_2_28_x86_64' || '' }}

      - run: du -h dist/*
      - uses: actions/upload-artifact@v4
        with:
          name: ${{ inputs.os }}-py${{ inputs.python }}-torch${{ inputs.torch_version }}+${{ contains(inputs.toolkit_type, 'cuda') && 'cu' || 'rocm' }}${{ inputs.toolkit_short_version }}_${{ inputs.artifact_tag }}
          path: dist/*.whl
# Note: it might be helpful to have additional steps that test if the built wheels actually work


================================================
FILE: .github/workflows/wheels_upload_pip.yml
================================================
name: wheels_upload_pip

on:
  workflow_call:
    secrets:
      twine_password:
        required: true
    inputs:
      twine_username:
        required: true
        type: string
      pypirc:
        required: false
        type: string
      filter:
        required: true
        type: string
        description: Filter which runs to upload. Example '*+cu121*'
      execute:
        required: true
        type: boolean
        description: Actually upload the wheels. Dry-run if false
      artifact_tag:
        default: "facebookresearch"
        type: string

env:
  TWINE_USERNAME: __token__

jobs:
  wheels_upload_pip:
    name: wheels_upload_pip
    runs-on: ubuntu-24.04

    timeout-minutes: 360
    defaults:
      run:
        shell: bash
    steps:
      - name: Recursive checkout
        uses: actions/checkout@v3
        with:
          submodules: recursive
          path: "."
          fetch-depth: 0 # for tags

      # inspired by https://github.com/jlumbroso/free-disk-space/blob/main/action.yml
      - name: Free disk space
        run: |
          sudo rm -rf /usr/local/lib/android || true
          sudo rm -rf /usr/share/dotnet || true

      - name: Setup twine config
        if: inputs.pypirc
        run: |
          echo "${{ inputs.pypirc }}" > ~/.pypirc
          cat ~/.pypirc

      - uses: actions/download-artifact@v4
        with:
          path: dist

      # Filter builds (eg vN+cu118 for instance)
      - run: ls -R dist/
      - name: Extract builds to upload
        run: |
          set -ex
          mv dist all-dist
          mkdir dist
          for f in all-dist/${{ inputs.filter }}_${{ inputs.artifact_tag }}/*.whl; do
            cp $f dist/
          done;
      - run: ls -R dist/

      - name: Setup venv
        run: |
          python3 -m venv venv
          . ./venv/bin/activate
          which pip
          # (we need pytorch to create a source distr...)
          pip install torch packaging twine
          echo "PY=$(which python)" >> ${GITHUB_ENV}
          echo "PATH=$PATH" >> ${GITHUB_ENV}

      - name: Create source distribution
        env:
          VERSION_SOURCE: ${{ github.ref_type == 'tag' && 'tag' || 'dev'  }}
        run: |
          version=`$PY .github/compute_wheel_version.py --source $VERSION_SOURCE`
          echo $version > version.txt
          cat version.txt

          BUILD_VERSION=$version $PY setup.py sdist -d sdist/

      - run: ls -R sdist/
      - name: Upload wheel to PyPi
        if: inputs.execute
        run: $PY -m twine upload --skip-existing dist/*.whl sdist/*
        env:
          TWINE_USERNAME: ${{ inputs.twine_username }}
          TWINE_PASSWORD: ${{ secrets.twine_password }}


================================================
FILE: .github/workflows/wheels_upload_s3.yml
================================================
name: wheels_upload_s3

on:
  workflow_call:
    inputs:
      aws_role:
        required: true
        type: string
      s3_path:
        required: true
        type: string
        description: Example 's3://bucket/path/xformers/'
      aws_s3_cp_extra_args:
        required: false
        type: string
        default: ''
        description: Example '--acl public-read'
      filter:
        required: true
        type: string
        description: Filter which runs to upload. Example '*+cu121*'
      execute:
        required: true
        type: boolean
        description: Actually upload the wheels. Dry-run if false
      artifact_tag:
        default: "facebookresearch"
        type: string

jobs:
  wheels_upload_s3:
    permissions:
      id-token: write # Needed to assume AWS role
      pull-requests: read
      contents: read
    name: ${{ inputs.s3_path }}
    runs-on: ubuntu-24.04

    timeout-minutes: 360
    defaults:
      run:
        shell: bash
    steps:
      - name: Recursive checkout
        uses: actions/checkout@v3
        with:
          submodules: recursive
          path: "."
          fetch-depth: 0 # for tags

      # inspired by https://github.com/jlumbroso/free-disk-space/blob/main/action.yml
      - name: Free disk space
        run: |
          sudo rm -rf /usr/local/lib/android || true
          sudo rm -rf /usr/share/dotnet || true

      - uses: actions/download-artifact@v4
        with:
          path: dist
      # Filter builds (eg vN+cu118 for instance)
      - run: ls -R dist/
      - name: Extract builds to upload
        run: |
          set -ex
          mv dist all-dist
          mkdir dist
          for f in all-dist/${{ inputs.filter }}_${{ inputs.artifact_tag }}/*.whl; do
            cp $f dist/
          done;
      - run: ls -R dist/

      - name: configure aws credentials
        if: inputs.execute
        uses: aws-actions/configure-aws-credentials@v1.7.0
        with:
          role-to-assume: ${{ inputs.aws_role }}
          role-session-name: GitHub_CI
          aws-region: "us-east-1"

      - name: Sts GetCallerIdentity
        if: inputs.execute
        run: |
          aws sts get-caller-identity

      - name: Upload wheels to ${{ inputs.s3_path }}
        if: inputs.execute
        run: |
          set -ex
          for f in dist/*.whl; do
            echo $f;
            aws s3 cp $f ${{ inputs.s3_path }} ${{ inputs.aws_s3_cp_extra_args }}
          done;
          aws s3 ls ${{ inputs.s3_path }}


================================================
FILE: .github/workflows/win-build.yml
================================================
name: win-build

on:
  pull_request:
    paths:
      - "third_party/**"
      - "xformers/csrc/**"
      - ".github/workflows/win-build.yml"
      - ".github/actions/setup-build-cuda/action.yml"
      - "setup.py"
      - "requirements*.txt"

env:
  FORCE_CUDA: 1
  MAX_JOBS: 6
  DISTUTILS_USE_SDK: 1 # otherwise distutils will complain on windows about multiple versions of msvc
  XFORMERS_BUILD_TYPE: "Release"
  TMPDIR: "./x"

jobs:
  win_build:
    strategy:
      fail-fast: false
      matrix:
        arch:
          - "8.0"
    name: win-build-${{ matrix.arch }}
    runs-on: windows-8-core
    env:
      PY: python3
      TORCH_CUDA_ARCH_LIST: ${{ matrix.arch }}

    timeout-minutes: 360
    defaults:
      run:
        shell: bash
    steps:
      - name: Workarounds for longpaths - git-config
        run: |
          git config --system core.longpaths true
      - name: Recursive checkout
        uses: actions/checkout@v3
        with:
          submodules: recursive
          path: "."

      - name: Workarounds for longpaths - TMPDIR
        run: |
          mkdir x
          python -c "import tempfile; print(tempfile.gettempdir())"
          python -c "import tempfile; assert(len(tempfile.gettempdir()) < 30)"

      - name: HACKFIX for cutlass compiler bug
        if: runner.os == 'Windows'
        run: |
          # See https://github.com/NVIDIA/cutlass/issues/1732
          rm -f third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp
          touch third_party/cutlass/include/cutlass/gemm/kernel/sm90_gemm_tma_warpspecialized_pingpong.hpp

      - name: Setup Runner
        uses: ./.github/actions/setup-build-cuda
        with:
          toolkit_type: "cuda"
          toolkit_short_version: "130"
          python: "3.10"

      - name: Remove internal code
        run: |
          mkdir -p .github/sync.fairinternal/
          touch .github/sync.fairinternal/ossify.sh
          chmod +x .github/sync.fairinternal/ossify.sh
          .github/sync.fairinternal/ossify.sh

      - name: Install build dependencies
        run: |
          $PY -m pip install wheel setuptools ninja -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cu130
          git config --global --add safe.directory "*"
          $PY -c "import torch; print('torch', torch.__version__)"
          $PY -c "import torch; print('torch.cuda', torch.version.cuda)"
          ninja --version

      - name: Create sdist
        run: $PY setup.py sdist

      - name: Build from sdist
        shell: bash -l {0}
        run: |
          $PY -m pip install -v --no-build-isolation dist/*

      - name: Info
        run: |
          cd ../../  # So we don't have a folder named `xformers`
          XFORMERS_MORE_DETAILS=1 $PY -m xformers.info

      # - name: Open an SSH session on failure to debug
      #   if: ${{ failure() }}
      #   uses: mxschmitt/action-tmate@v3


================================================
FILE: .gitignore
================================================
*~
*.swp

*.pyc
*.pyo
*.so

.mypy_cache/
*.egg-info/

build/
dist/

# for autocomplete
compile_commands.json

# Pytest verbose output
test-results/

# Coverage reports
.coverage
.coverage.*

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
.vscode/*
xformers/benchmarks/LRA/datasets
xformers/benchmarks/LRA/logs

my_runs.md

# Triton cache
.cache

# JetBrains PyCharm IDE
.idea/

# Pyre cache
.pyre/

# Watchman config files
.watchmanconfig

# examples demo files
examples/input.txt
examples/lightning_logs
examples/data

# Hydra default output dir
multirun
outputs

.benchmarks
xformers/version.py
xformers/cpp_lib.json

## temporary files
xformers/csrc/attention/hip_fmha/*.cu
xformers/csrc/attention/hip_fmha/*.hip
xformers/csrc/attention/hip_fmha/*_hip.h
xformers/csrc/attention/hip_fmha/instances/*.cu
xformers/csrc/attention/hip_fmha/instances/*.hip
xformers/csrc/attention/hip_fmha/instances/*_hip.h
xformers/csrc/attention/hip_decoder/*.cu
xformers/csrc/attention/hip_decoder/*.hip
xformers/csrc/attention/hip_decoder/*_hip.h


================================================
FILE: .gitmodules
================================================
[submodule "third_party/cutlass"]
	path = third_party/cutlass
	url = https://github.com/NVIDIA/cutlass.git
[submodule "third_party/composable_kernel_tiled"]
	path = third_party/composable_kernel_tiled
	url = https://github.com/ROCm/composable_kernel.git
	branch = develop


================================================
FILE: .isort.cfg
================================================
[settings]
known_third_party =fvcore,hydra,input_pipeline,matplotlib,numpy,omegaconf,pandas,pl_bolts,pyre_extensions,pytest,pytorch_lightning,ragged_inference,recommonmark,seaborn,setuptools,sklearn,submitit,tensorflow,timm,torch,torchmetrics,torchvision,tqdm,triton,typing_extensions
skip_glob=third_party/*


================================================
FILE: .markdownlint.json
================================================
{
    "MD013": false,
    "MD033": false
}


================================================
FILE: .pre-commit-config.yaml
================================================
exclude: 'build|stubs'

default_language_version:
    python: python3

repos:
-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v3.4.0
    hooks:
    -   id: trailing-whitespace
    -   id: check-ast
    -   id: check-merge-conflict
    -   id: no-commit-to-branch
        args: ['--branch=master']
    -   id: check-added-large-files
        args: ['--maxkb=500']
    -   id: end-of-file-fixer

- repo: https://github.com/omnilib/ufmt
  rev: v2.8.0
  hooks:
    - id: ufmt
      additional_dependencies:
        - black == 26.3.1
        - usort == 1.0.8.post1

-   repo: https://github.com/pycqa/flake8
    rev: 6.1.0
    hooks:
    -   id: flake8
        additional_dependencies: [flake8-copyright]

-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: 'v1.10.0'
    hooks:
    -   id: mypy


================================================
FILE: .pyre_configuration
================================================
{
  "ignore_all_errors": ["xformers/benchmarks/"],
  "python_version": "3.9",
  "source_directories": [
    "stubs",
    {"import_root": ".", "source": "xformers"}
  ]
}


================================================
FILE: CHANGELOG.md
================================================
# Changelog
All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.0.36] - 2026-??-??


## [0.0.35] - 2026-02-20
Pre-built binary wheels are available for PyTorch 2.10.0 (and later).

### Improved
- Supported free-threading Python.

### Removed
- Stopped bundling pre-built versions of Flash-Attention 3, and instead started relying on the wheels provided by the PyTorch indices.


## [0.0.34] - 2026-01-23
Pre-built binary wheels are available for PyTorch 2.10.0 (and later).

### Improved
- Migrated xFormers to the PyTorch stable API/ABI, which means that binary builds targeting PyTorch 2.10+ will be compatible with any later version

### Removed
- Removed optimized fast-path of SwiGLU (which was only available for A100 GPUs)
- Removed most legacy components


## [0.0.33.post2] - 2025-12-03
Pre-built binary wheels are available for PyTorch 2.9.1.


## [0.0.33.post1] - 2025-11-13
Fixed issues with wheel upload to PyPI


## [0.0.33] - 2025-11-12
Pre-built binary wheels are available for PyTorch 2.9.0.

### Added
- cutlass fmha Op for Blackwell GPUs
- Support flash-attention package up to 2.8.3
- expose FA3 deterministic mode
- FW+BW pass overlap for DeepSeek-like comms/compute overlap

### Improved
- merge_attentions support for irregular head dimension


## [0.0.32] - 2025-08-13
Pre-built binary wheels are available for PyTorch 2.8.0.

### Added
- Support flash-attention package up to 2.8.2
- Speed improvements to `python -m xformers.profiler.find_slowest`

### Removed
- Removed autograd backward pass for merge_attentions as it is easy to use incorrectly.
- Attention biases are no longer `torch.Tensor` subclasses. This is no longer
necessary for torch.compile to work, and was adding more complexity


## [0.0.31] - 2025-06-25
Pre-built binary wheels are available for PyTorch 2.7.1.
### Added
- xFormers wheels are now python-version agnostic: this means that the same wheel can be used for python 3.9, 3.10, ... 3.13
- Added support for Flash-Attention 3 on Ampere GPUs
### Removed
- We will no longer support V100 or older GPUs, following PyTorch (pytorch/pytorch#147607)
- Deprecated support for building Flash-Attention 2 as part of xFormers. For Ampere GPUs, we now use Flash-Attention 3 on windows, and Flash-Attention 2 can still be used through PyTorch on linux.

## [0.0.30] - 2025-04-28
Pre-built binary wheels are available for PyTorch 2.7.0. Following PyTorch, we build wheels for CUDA 11.8, 12.6, and 12.8 only (we no longer build for CUDA 12.4).
xFormers now requires PyTorch >= 2.7
### Added
- [fMHA] Added support for local attention on the Flash3 backend (H100)
- [fMHA] Added a new paged gappy attention bias
### Improved
- [fMHA] The FlashAttention3 backend now ships with more head dimensions to support MLA, and with a FLOPs formula in order to be compatible with PyTorch's partitioner-base automatic activation checkpointing
- The fused operators for sequence parallelism were migrated to PyTorch's SymmetricMemory
- The profiler prepends the traces' filenames with the rank of the process when doing distributed training
### Removed
- Removed documentation for legacy unmaintained components

## [0.0.29.post2] - 2025-01-31
Pre-built binary wheels are available for PyTorch 2.6.0. Following PyTorch, we build wheels for CUDA 11.8, 12.4, and 12.6 only (we no longer build for CUDA 12.1).
xFormers now requires PyTorch >= 2.6


## [0.0.29] - 2024-12-27
### Improved:
- [fMHA] Creating a `LowerTriangularMask` no longer creates a CUDA tensor
- [fMHA] Updated Flash-Attention to `v2.7.2.post1`
- [fMHA] Flash-Attention v3 will now be used by `memory_efficient_attention` by default when available, unless the operator is enforced with the `op` keyword-argument. Switching from Flash2 to Flash3 can make transformer trainings ~10% faster end-to-end on H100s
- [fMHA] Fixed a performance regression with the `cutlass` backend for the backward pass (facebookresearch/xformers#1176) - mostly used on older GPUs (eg V100)
- Fixed swiglu operator compatibility with torch-compile with PyTorch 2.6
- Fix activation checkpointing of SwiGLU when AMP is enabled (facebookresearch/xformers#1152)
### Removed:
- Following PyTorch, xFormers no longer builds binaries for conda. Pip is now the only recommended way to get xFormers
- Removed unmaintained/deprecated components in `xformers.components.*` (see facebookresearch/xformers#848)

## [0.0.28.post3] - 2024-10-30
Pre-built binary wheels require PyTorch 2.5.1

## [0.0.28.post2] - 2024-10-18
Pre-built binary wheels require PyTorch 2.5.0

## [0.0.28.post1] - 2024-09-13
Properly upload wheels for cuda 12.4

## [0.0.28] - 2024-09-12
Pre-built binary wheels require PyTorch 2.4.1
### Added
- Added wheels for cuda 12.4
- Added conda builds for python 3.11
- Added wheels for rocm 6.1
### Improved
- Profiler: Fix computation of FLOPS for the attention when using xFormers
- Profiler: Fix MFU/HFU calculation when multiple dtypes are used
- Profiler: Trace analysis to compute MFU & HFU is now much faster
- fMHA/splitK: Fixed `nan` in the output when using a `torch.Tensor` bias where a lot of consecutive keys are masked with `-inf`
- Update Flash-Attention version to `v2.6.3` *when building from scratch*
- When using the most recent version of Flash-Attention, it is no longer possible to mix it with the cutlass backend. In other words, it is no longer possible to use the cutlass Fw with the flash Bw.
### Removed
- fMHA: Removed `decoder` and `small_k` backends
- profiler: Removed `DetectSlowOpsProfiler` profiler
- Removed compatibility with PyTorch < 2.4
- Removed conda builds for python 3.11
- Removed windows pip wheels for cuda 12.1 and 11.8

## [0.0.27.post2] - 2024-07-26
Pre-built binary wheels require PyTorch 2.4.0

## [0.0.27.post1] - 2024-07-25
Pre-built binary wheels require PyTorch 2.4.0

## [0.0.27] - 2024-07-10
Pre-built binary wheels require PyTorch 2.3.1
### Added
- fMHA: `PagedBlockDiagonalGappyKeysMask`
- fMHA: heterogeneous queries in `triton_splitk`
- fMHA: support for paged attention in flash
- fMHA: Added backwards pass for `merge_attentions`
- fMHA: Added `torch.compile` support for 3 biases (`LowerTriangularMask`, `LowerTriangularMaskWithTensorBias` and `BlockDiagonalMask`) - some might require PyTorch 2.4
- fMHA: Added `torch.compile` support in `memory_efficient_attention` when passing the flash operator explicitely (eg `memory_efficient_attention(..., op=(flash.FwOp, flash.BwOp))`)
- fMHA: `memory_efficient_attention` now expects its `attn_bias` argument to be on the same device as the other input tensor. Previously, it would convert the bias to the right device.
- fMHA: `AttentionBias` subclasses are now constructed by default on the `cuda` device if available - they used to be created on the CPU device
- 2:4 sparsity: Added `xformers.ops.sp24.sparsify24_ste` for Straight Through Estimator (STE) with options to rescale the gradient differently for masked out/kept values
### Improved
- fMHA: Fixed out-of-bounds reading for Split-K triton implementation
- Profiler: fix bug with modules that take a single tuple as argument
- Profiler: Added manual trigger for a profiling step, by creating a `trigger` file in the profiling directory
### Removed
- Removed support for PyTorch version older than 2.2

## [0.0.26] - 2024-04-29
Pre-built binary wheels require PyTorch 2.3.0
### Added
- [2:4 sparsity] Added support for Straight-Through Estimator for `sparsify24` gradient (`GRADIENT_STE`)
- [2:4 sparsity] `sparsify24_like` now supports the cuSparseLt backend, and the STE gradient
- Basic support for `torch.compile` for the `memory_efficient_attention` operator. Currently only supports Flash-Attention, and without any bias provided. We want to expand this coverage progressively.
### Improved
- merge_attentions no longer needs inputs to be stacked.
- fMHA: triton_splitk now supports additive bias
- fMHA: benchmark cleanup

## [0.0.25.post1] - 2024-03-29
Pre-built binary wheels require PyTorch 2.2.2

## [0.0.25] - 2024-03-14
Pre-built binary wheels require PyTorch 2.2.1
### Added
- New `merge_attentions` function
- fMHA: New gappy attention biases.
### Improved
- fMHA: Updated Flash-Attention to v2.5.6: this has a performance improvement for multiquery.
- fMHA: triton_splitk changed and expanded. Now amalgamates using LSE. Can autotune, supports causal with a small number of queries - not just 1. Experimental support for paged attention.
- `rope_padded`: Fixed CUDA error with many queries (more than 65k)
- `rmsnorm`: Fixed CUDA error with large inputs (enables 512k+ sequence length on Llama2 70B)
### Removed
- fMHA: Removed triton operator (`fmha.triton.*`, `xformers.ops.MemoryEfficientAttentionTritonFwdFlashBwOp`, `xformers.ops.TritonFlashAttentionOp`), as it has correctness issues under some conditions, and is slower than other implementations.

## [0.0.24] - 2024-01-31
Pre-built binary wheels require PyTorch 2.2.0
### Added
- Added components for model/sequence parallelism, as near-drop-in replacements for FairScale/Megatron Column&RowParallelLinear modules. They support fusing communication and computation for sequence parallelism, thus making the communication effectively free. [Read more](https://twitter.com/d_haziza/status/1753030654118211593)
- Added kernels for training models with 2:4-sparsity. We introduced a very fast kernel for converting a matrix A into 24-sparse format, which can be used during training to sparsify weights dynamically, activations etc... xFormers also provides an API that is compatible with torch-compile, see `xformers.ops.sparsify24`.
### Improved
- Make selective activation checkpointing be compatible with torch.compile.
### Removed
- Triton kernels now require a GPU with compute capability 8.0 at least (A100 or newer). This is due to newer versions of triton not supporting older GPUs correctly
- Removed support for PyTorch version older than 2.1.0

## [0.0.23] - 2023-12-05
Pre-built binary wheels require PyTorch 2.1.1 (xFormers `0.0.23`) or PyTorch 2.1.2 (xFormers `0.0.23.post1`).
### Fixed
- fMHA: Fixed a bug in cutlass backend forward pass where the logsumexp was not correctly calculated, resulting in wrong results in the BW pass. This would happen with MQA when one sequence has a query with `length%64 == 1`
- fMHA: Updated Flash-Attention to v2.3.6 - this fixes a performance regression in causal backward passes, and now supports `BlockDiagonalCausalWithOffsetPaddedKeysMask`
### Added
- fMHA: Added `LocalAttentionFromBottomRightMask` (local)
- fMHA: Added `LowerTriangularFromBottomRightMask` (causal)
- fMHA: Added `LowerTriangularFromBottomRightLocalAttentionMask` (local + causal)
### Removed
- Removed `xformers.triton.sum_strided`

## [0.0.22] - 2023-09-27
### Fixed
- fMHA: Backward pass now works in PyTorch deterministic mode (although slower)
### Added
- fMHA: Added experimental support for Multi-Query Attention and Grouped-Query Attention. This is handled by passing 5-dimensional inputs to `memory_efficient_attention`, see the documentation for more details
- fMHA: Added experimental support for Local Attention biases to `memory_efficient_attention`
- Added an example of efficient [LLaMa decoding](https://github.com/facebookresearch/xformers/tree/main/examples/llama_inference) using xformers operators
- Added Flash-Decoding for faster attention during Large Language Model (LLM) decoding - up to 50x faster for long sequences (token decoding up to 8x faster end-to-end)
- Added an efficient rope implementation in triton, to be used in LLM decoding
- Added selective activation checkpointing, which gives fine-grained control of which activations to keep and which activations to recompute
- `xformers.info` now indicates the Flash-Attention version used
### Removed
- fMHA: Removed `smallK` backend support for CPU. `memory_efficient_attention` only works for CUDA/GPU tensors now
- **DEPRECATION**: Many classes in `xformers.factory`, `xformers.triton` and `xformers.components` have been or will be deprecated soon (see tracking issue facebookresearch/xformers#848)

## [0.0.21] - 2023-08-18
### Improved
- fMHA: Updated [flash-attention](https://github.com/Dao-AILab/flash-attention) to v2, with massive performance improvements for both the forward pass and backward pass. This implementation is now used by default when it's available
### Bug fixes
- fMHA/cutlass: Fix potential race condition in the FW/BW passes
- fMHA/cutlass: Fix `attn_bias` stride overflow for very long sequences (>32k)
- `LowerTriangularMask` is now backward compatible with older xformers versions
### Breaking changes
- `memory_efficient_attention` now expects the `attn_bias` argument to have a head dimension
- `memory_efficient_attention` no longer broadcasts the batch/head dimensions of `attn_bias`. Please use `.expand` if you need to broadcast the bias
- Remove `causal_diagonal` argument from `BlockDiagonalCausalWithOffsetPaddedKeysMask`
### Added
- Binary wheels on pypi/conda now contain H100 kernels
- fMHA: Added backend specialized for decoding that does not use TensorCores - useful when not using multiquery

**NOTE**: Binary wheels are now provided only for PyTorch 2 with cuda 11.8. It is still possible to use xFormers with older versions of PyTorch by building from source or using conda.


## [0.0.20] - 2023-05-23
### Improved
- fMHA/cutlass (backward): Massive performance improvements when `batch_size * num_heads` is low (10x+)
- fMHA/cutlass: Further performance improvements for both the forward & backward kernels
- fMHA (backward): Now dispatching to cutlass when `embed_dim>64`
- fMHA: Updated Flash-Attention to `v1.0.5`
### Added
- fMHA now runs on H100 (support is experimental)

## [0.0.19] - 2023-04-28
### Added
- Display `nvcc` version used to compile `xformers` in `python -m xformers.info`

### Fixed
- Fixed performance regression with `nvcc>11.6` (facebookresearch/xformers#712)
- fMHA/cutlass: Fixed `nan` in the output when using a `torch.Tensor` with `-inf` prefixes as `attn_bias` (facebookresearch/xformers#722)
- fMHA/cutlass: Fixed `nan` in the output when the sequence length is larger than `2 ** 15` (facebookresearch/xformers#719)
- fMHA/cutlass: Significative performance improvements (up to 2x) for both the forward pass and backward pass
- fMHA/cutlass: The kernel are now deterministic
- fMHA/cutlass: Fixed backward pass correctness when using dropout (facebookresearch/xformers#724)

## [0.0.18] - 2023-03-31
### Added
- Added `xformers.ops.index_select_cat` and `xformers.ops.scaled_index_add` - those are experimental functions that only work with a few shapes, and can be used to write efficient stochastic depth in transformer architectures for instance

### Fixed
- fMHA: `memory_efficient_attention` now accepts `torch.Tensor` as attention bias for any seqlen, although there are still requirements on the alignment of the bias tensor (see facebookresearch/xformers#683)

## [0.0.17] - 2023-03-28
### Fixed
- fMHA: Fixed BW pass on Sm86/Sm89 GPUs when `K > 64` (RTX 3090, RTX 4090, A6000, ..) [facebookresearch/xformers#631]

### Added
- fMHA/CUTLASS: Added tensor attn bias support [facebookresearch/xformers#587] - contribution from [@jfc4050](https://github.com/jfc4050)
- fMHA/CUTLASS: Added tensor attn bias grad support [facebookresearch/xformers#587] - contribution from [@jfc4050](https://github.com/jfc4050)
- fMHA/CUTLASS: Added dropout support [facebookresearch/xformers#587] - contribution from [@jfc4050](https://github.com/jfc4050)
- fMHA: Added support for varying sequence lengths [facebookresearch/xformers#500]


## [0.0.16] - 2023-01-31
### Fixed
- Updated triton dependency [facebookresearch/xformers#418]
- Stripe lineinfo from binaries, reducing the binary size [facebookresearch/xformers#549]
- Added support for pip wheels [facebookresearch/xformers#588, facebookresearch/xformers#573, facebookresearch/xformers#534, facebookresearch/xformers#523, ...] big thanks to [@AbdBarho](https://github.com/AbdBarho)!
- Fixed compatibility with Python 3.7 [facebookresearch/xformers#541] - thanks to [@susumuota](https://github.com/susumuota)
- fMHA: Fixed strides for QKV gradients for cutlass attention [facebookresearch/xformers#535]
- fMHA: Stricter inputs validation to avoid CUDA errors for unsupported inputs [facebookresearch/xformers#592]
- fMHA/Flash-Attention: Updated to https://github.com/HazyResearch/flash-attention/commit/a1f49a2b92b6fa022379bbebafed9d7f5e96a675 with multiple changes from [@TriDao](https://github.com/tridao) that make the operator up to 20% faster
- fMHA/Flash-Attention: Fixed backward pass wrapper, where non-contiguous gradients could give the wrong result [facebookresearch/xformers#548]
- fMHA: Separate each operator into forward and backward operators. It's now possible to use any combination of forward+backward (for instance Triton forward and Flash-Attention backward) [facebookresearch/xformers#560]

### Added
- fMHA: Added Triton operator for forward pass from [Flash-Attention](https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attn_triton.py) authored by [@TriDao](https://github.com/tridao), will be automatically used on A100 when compatible
- fMHA: Added [`xformers.ops.memory_efficient_attention_forward`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention_forward), [`xformers.ops.memory_efficient_attention_forward_requires_grad`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention_forward_requires_grad), [`xformers.ops.memory_efficient_attention_backward`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention_backward) for power-users who write custom autograd functions [facebookresearch/xformers#560]
- fMHA: Support for custom scaling for the CUTLASS-based kernel [facebookresearch/xformers#530] - contribution from [@comaniac](https://github.com/comaniac)

## [0.0.15] - Skipped

## [0.0.14] - 2022-11-10
### Fixed
- fMHA/CUTLASS: The current CUDA stream is now used by the kernel [facebookresearch/xformers#491]
- fMHA/CUTLASS: Improve overall performance

### Added
- SwiGLU: Added `xformers.ops.SwiGLU` and its functional counterpart (`xformers.ops.swiglu`) [facebookresearch/xformers#490]
- fMHA: Possible to combine CUTLASS's forward with flash-attention's backward pass [facebookresearch/xformers#469] - improves performance on A100 for K = 128
- fMHA: Add custom `xformers.ops.unbind` operator to avoid a cat in the attention block [facebookresearch/xformers#458]

## [0.0.13] - 2022-09-26
### Added
- fMHA: Added CUTLASS-based kernel for `xformers.ops.memory_efficient_attention`. This kernel is automatically depending on the inputs, and works on any GPU after P100 [facebookresearch/xformers#362]

## [0.0.12] - 2022-08-08
### Fixed
- Removed duplicated biases in the FusedMLP layers [facebookresearch/xformers#317]
- Rotary embeddings respecting input types [facebookresearch/xformers#326]
- Poolformer style instantiating useless projection layers [facebookresearch/xformers#349]
- Fix layer position not being properly tracked, causing extra layernorms for programmatic xformers [facebookresearch/xformers#348]
- Pass use_triton flag to LayerNorm module [facebookresearch/xformers#336]

### Added
- Four blocksparsity layouts from DeepSpeed [facebookresearch/xformers#320]
- Support several initialization options [facebookresearch/xformers#312]
- Conv2DFeedforward feedforward part [facebookresearch/xformers#321]
- VisualAttention [facebookresearch/xformers#329]
- Automatic blocksparse for causal attention [facebookresearch/xformers#334]
- Better hierarchical transformer generation [facebookresearch/xformers#345]
- Fused operations with AOTAutograd/NVFuser, integration into MLP [facebookresearch/xformers#357]
- Refactor LRA code to use Pytorch Lightning [facebookresearch/xformers#343]

## [0.0.11] - 2022-05-30
### Fixed
- Fix some torchscriptability [facebookresearch/xformers#246]
- Fix FourierMix being compatible with AMP [facebookresearch/xformers#258]
- Better asserts on QKV dimensions [facebookresearch/xformers#264]
- Better perfs for FusedMLP and FusedLinearLayer [facebookresearch/xformers#283]
- Deepnorm init missing self-attention [facebookresearch/xformers#284]

### Added
- Simplicial Embeddings [facebookresearch/xformers#259]
- Mem efficient attention, FW pass [facebookresearch/xformers#267]
- MHA benchmark
- MLP benchmark
- Move all triton kernels to triton v2 [facebookresearch/xformers#272]
- Mem efficient attention, BW pass [facebookresearch/xformers#281]
- Metaformer support [facebookresearch/xformers#294]

## [0.0.10] - 2022-03-14
### Fixed
- Expose bias flag for feedforwards, same default as Timm [facebookresearch/xformers#220]
- Update eps value for layernorm, same default as torch [facebookresearch/xformers#221]
- PreNorm bugfix, only one input was normalized [facebookresearch/xformers#233]
- Fix bug where embedding dimensions that did not match model dim would lead to a crash [facebookresearch/xformers#244]

### Added
- Add DeepNet (DeepNorm) residual path and init [facebookresearch/xformers#227]

## [0.0.9] - 2022-02-09
### Added
- Compositional Attention [facebookresearch/xformers#41]
- Experimental Ragged attention [facebookresearch/xformers#189]
- Mixture of Experts [facebookresearch/xformers#181]
- BlockSparseTensor [facebookresearch/xformers#202]
- Nd-tensor support for triton softmax [facebookresearch/xformers#210]

### Fixed
- Bugfix Favor, single feature map [facebookresearch/xformers#183]
- Sanity check blocksparse settings [facebookresearch/xformers#207]
- Fixed some picklability [facebookresearch/xformers#204]

## [0.0.8] - 2022-01-07
### Fixed
- Much faster fused dropout [facebookresearch/xformers#164]
- Fused dropout repeatability [facebookresearch/xformers#173]

### Added
- Embedding weight tying option [facebookresearch/xformers#172]

## [0.0.7] - 2021-11-30
### Fixed
- Dropout setting not properly passed in many attentions [facebookresearch/xformers#123]

## [0.0.6] - 2021-11-24
### Fixed
- Fix self attention optimization not being triggered, broken residual path [facebookresearch/xformers#119]
- Improve speed by not using contiguous Tensors when not needed [facebookresearch/xformers#119]

### Added
- Attention mask wrapper [facebookresearch/xformers#113]
- ViT comparison benchmark [facebookresearch/xformers#117]

## [0.0.4] - 2021-11-16
### Fixed
- Homogenizing the masks, additive or bool [facebookresearch/xformers#79][facebookresearch/xformers#85][facebookresearch/xformers#86]
- Fix causality flag not being respected [facebookresearch/xformers#103]
- Enabling FusedLayerNorm by default in the factory if Triton is available
- Fixing Favor with fp16
- Fixing Favor trainability

### Added
- Fused dropout/bias/activation layer [facebookresearch/xformers#58]
- Fused layernorm used by default in the factory [facebookresearch/xformers#92]


## [0.0.3] - 2021-11-01
### Fixed
- Nystrom causal attention [facebookresearch/xformers#75]


## [0.0.2] - 2021-11-01
### Fixed
- More robust blocksparse [facebookresearch/xformers#24]

### Added
- Rotary embeddings [facebookresearch/xformers#32]
- More flexible layernorm [facebookresearch/xformers#50]


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

This Code of Conduct also applies outside the project spaces when there is a
reasonable belief that an individual's behavior may have a negative impact on
the project or its community.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <opensource-conduct@fb.com>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to the xFormers repo

We want to make contributing to this project as easy and transparent as
possible.

## Our Development Process

Minor changes and improvements will be released on an ongoing basis. Larger
changes (e.g., changesets implementing a new paper) will be released on a
more periodic basis.

## Pull Requests

We actively welcome your pull requests.

1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").

## Contributor License Agreement ("CLA")

In order to accept your pull request, we need you to submit a CLA. You only need
to do this once to work on any of Facebook's open source projects.

Complete your CLA here: <https://code.facebook.com/cla>

## Issues

We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.

Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.

## Environment setup

```bash
~$ python3 -m venv venv2
~$ source venv2/bin/activate
(venv2) ~$ cd git/template/
(venv2) ~/git/template $ pip3 install -r requirements-test.txt
```

## Coding Style

In your editor, install the [editorconfig](https://editorconfig.org/) extension
which should ensure that you are following the same standards as us.

Two options to make sure that the code is formatted and linted properly:
* either you run mypy and ufmt before opening up your PR.

```bash
ufmt format
flake8 --config .flake8
mypy --ignore-missing-imports --scripts-are-modules --pretty --exclude build/ --exclude stubs/ .
```

* or you can just install [pre-commit](https://pre-commit.com/), which will make sure that all of the above is run automatically anytime you commit
in that case, you would need to
```bash
pip install pre-commit
```
then (in the xformers repository, just once)
```bash
pre-commit install
```

After these steps each of your commits will run the same linting and formatting routines as the xformers continuous integration, which greatly helps getting your PRs all green !

_Read the [editorconfig](.editorconfig) file to understand the exact coding style preferences._

## Testing

### Static analysis

```bash
mypy --ignore-missing-imports --scripts-are-modules --pretty --exclude stubs/ .
```

### Unit tests

```bash
pytest
```

or

``` bash
python -m pytest
```

### Check test coverage

``` bash
python -m pytest --cov-report term --cov=template  tests
```

### CircleCI status

From your PR page, you can expand on the CircleCI results. For GPU test, you should see
what CI has run, like:

``` bash
...
----- generated xml file: /home/circleci/template/test-results/junit.xml ------
================== 217 passed, 2 xfailed in 218.74s (0:03:38) ==================
CircleCI received exit code 0
```

The number of passed and failed should give you an idea on whether your local
test was the same or not.

## Commit Guidelines

We follow the same guidelines as AngularJS. Each commit message consists of a **header**,
a **body** and a **footer**.  The header has a special format that includes a **type**,
and a **subject**:

```bash
[<type>] <subject>
<BLANK LINE>
<body>
<BLANK LINE>
<footer>
```

Any line of the commit message cannot be longer 100 characters! This allows the message to be easier
to read on github as well as in various git tools.

### Type

Must be one of the following:

* **feat**: A new feature
* **fix**: A bug fix
* **cleanup**: Changes that do not affect the meaning of the code (white-space, formatting, missing
  semi-colons, dead code removal etc.)
* **refactor**: A code change that neither fixes a bug or adds a feature
* **perf**: A code change that improves performance
* **test**: Adding missing tests or fixing them
* **chore**: Changes to the build process or auxiliary tools and libraries such as documentation
generation
* **docs**: Documentation only changes

## License

By contributing to *xFormers*, you agree that your contributions will be licensed
under the LICENSE file in the root directory of this source tree.


================================================
FILE: LICENSE
================================================
From xFormers:

Copyright (c) Facebook, Inc. and its affiliates


===

BSD 3-Clause License

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
   and IDIAP Research Institute nor the names of its contributors may be
   used to endorse or promote products derived from this software without
   specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: MANIFEST.in
================================================
include LICENSE
include requirements.txt
include version.txt

recursive-include xformers/csrc *
recursive-include third_party/cutlass/include *
recursive-include third_party/cutlass/tools/util/include *
recursive-include third_party/cutlass/examples *


================================================
FILE: README.md
================================================
<img src="./docs/assets/logo.png" width=800>

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/facebookresearch/xformers/blob/main/docs/source/xformers_mingpt.ipynb)
<br/><!--
![PyPI](https://img.shields.io/pypi/v/xformers)
![PyPI - License](https://img.shields.io/pypi/l/xformers)
[![Documentation Status](https://github.com/facebookresearch/xformers/actions/workflows/gh-pages.yml/badge.svg)](https://github.com/facebookresearch/xformers/actions/workflows/gh-pages.yml/badge.svg)
-->
[![CircleCI](https://circleci.com/gh/facebookresearch/xformers.svg?style=shield)](https://app.circleci.com/pipelines/github/facebookresearch/xformers/)
[![Codecov](https://codecov.io/gh/facebookresearch/xformers/branch/main/graph/badge.svg?token=PKGKDR4JQM)](https://codecov.io/gh/facebookresearch/xformers)
[![black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
<br/>
[![PRs welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
<!--
[![Downloads](https://pepy.tech/badge/xformers)](https://pepy.tech/project/xformers)
-->
--------------------------------------------------------------------------------

## xFormers - Toolbox to Accelerate Research on Transformers

xFormers is:
- **Customizable building blocks**: Independent/customizable building blocks that can be used without boilerplate code. The components are domain-agnostic and xFormers is used by researchers in vision, NLP and more.
- **Research first**: xFormers contains bleeding-edge components, that are not yet available in mainstream libraries like PyTorch.
- **Built with efficiency in mind**: Because speed of iteration matters, components are as fast and memory-efficient as possible. xFormers contains its own CUDA kernels, but dispatches to other libraries when relevant.

## Installing xFormers

* **(RECOMMENDED, linux & win) Install latest stable with pip**: Requires [PyTorch 2.10.0](https://pytorch.org/get-started/locally/)

```bash
# [linux & win] cuda 12.6 version
pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu126
# [linux & win] cuda 12.8 version
pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu128
# [linux & win] cuda 13.0 version
pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu130
# [linux only] (EXPERIMENTAL) rocm 7.1 version
pip3 install -U xformers --index-url https://download.pytorch.org/whl/rocm7.1
```

* **Development binaries**:

```bash
# Same requirements as for the stable version above
pip install --pre -U xformers
```

* **Install from source**: If you want to use with another version of PyTorch for instance (including nightly-releases)

```bash
# (Optional) Makes the build much faster
pip install ninja
# Set TORCH_CUDA_ARCH_LIST if running and building on different GPU types
# NOTE: pytorch must already be installed!
pip install -v --no-build-isolation -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
# (this can take dozens of minutes)
```


## Benchmarks

**Memory-efficient MHA**
![Benchmarks for ViTS](./docs/plots/mha/mha_vit.png)
*Setup: A100 on f16, measured total time for a forward+backward pass*

Note that this is exact attention, not an approximation, just by calling [`xformers.ops.memory_efficient_attention`](https://facebookresearch.github.io/xformers/components/ops.html#xformers.ops.memory_efficient_attention)

**More benchmarks**

xFormers provides many components, and more benchmarks are available in [BENCHMARKS.md](BENCHMARKS.md).

### (Optional) Testing the installation

This command will provide information on an xFormers installation, and what kernels are built/available:

```python
python -m xformers.info
```

## Using xFormers

### Key Features

1. Optimized building blocks, beyond PyTorch primitives
   1. Memory-efficient exact attention - up to 10x faster
   2. sparse attention
   3. block-sparse attention
   4. fused softmax
   5. fused linear layer
   6. fused layer norm
   7. fused dropout(activation(x+bias))
   8. fused SwiGLU

### Install troubleshooting


* NVCC and the current CUDA runtime match. Depending on your setup, you may be able to change the CUDA runtime with `module unload cuda; module load cuda/xx.x`, possibly also `nvcc`
* the version of GCC that you're using matches the current NVCC capabilities
* the `TORCH_CUDA_ARCH_LIST` env variable is set to the architectures that you want to support. A suggested setup (slow to build but comprehensive) is `export TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.2;7.5;8.0;8.6"`
* If the build from source OOMs, it's possible to reduce the parallelism of ninja with `MAX_JOBS` (eg `MAX_JOBS=2`)
* If getting error message `Filename longer than 260 characters` on Windows, make sure long paths are enabled at OS level, and also execute the command `git config --global core.longpaths true`


### License

xFormers has a BSD-style license, as found in the [LICENSE](LICENSE) file.
It includes code from the [triton-lang/kernels](https://github.com/triton-lang/kernels) repo.

## Citing xFormers

If you use xFormers in your publication, please cite it by using the following BibTeX entry.

``` bibtex
@Misc{xFormers2022,
  author =       {Benjamin Lefaudeux and Francisco Massa and Diana Liskovich and Wenhan Xiong and Vittorio Caggiano and Sean Naren and Min Xu and Jieru Hu and Marta Tintore and Susan Zhang and Patrick Labatut and Daniel Haziza and Luca Wehrstedt and Jeremy Reizenstein and Grigory Sizov},
  title =        {xFormers: A modular and hackable Transformer modelling library},
  howpublished = {\url{https://github.com/facebookresearch/xformers}},
  year =         {2022}
}
```

## Credits

The following repositories are used in xFormers, either in close to original form or as an inspiration:

* [Sputnik](https://github.com/google-research/sputnik)
* [GE-SpMM](https://github.com/hgyhungry/ge-spmm)
* [Triton](https://github.com/openai/triton)
* [LucidRain Reformer](https://github.com/lucidrains/reformer-pytorch)
* [RevTorch](https://github.com/RobinBruegger/RevTorch)
* [Nystromformer](https://github.com/mlpen/Nystromformer)
* [FairScale](https://github.com/facebookresearch/fairscale/)
* [Pytorch Image Models](https://github.com/rwightman/pytorch-image-models)
* [CUTLASS](https://github.com/nvidia/cutlass)
* [Flash-Attention](https://github.com/HazyResearch/flash-attention)


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

setup:
	pip install -r requirements.txt

.PHONY: help Makefile setup

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/requirements.txt
================================================
recommonmark==0.5.0
docutils==0.17.1
sphinx==5.0.0
git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
torch>=1.6.0
numpy>=1.19.5
pyre-extensions==0.0.29
jinja2==3.1.6
einops


================================================
FILE: docs/source/2d_attention_patterns.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating complex sparsity patterns with xformers\n",
    "\n",
    "`xformers` contains optimized GPU implementations for sparse transformers, which are specially useful when dealing with large sequences like images.\n",
    "\n",
    "In this notebook, we illustrate how one can leverage some helper functions from `xformers` to construct complex sparsity patterns via a particular structure of the `attn_mask`, which are enough to re-implement axial attention, local 2d attention and many more.\n",
    "\n",
    "Let's start with some imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import torch\n",
    "import xformers.components.attention.attention_patterns as AP\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's assume our sequence length is `H * W`, and let's select the middle point in the `(H * W) ** 2` for visualization purposes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "H, W = 20, 30\n",
    "middle_point = H * W // 2 + W // 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Axial attention\n",
    "\n",
    "Different instantiations of the [axial attention](https://arxiv.org/abs/1912.12180) for images can be obtained via the following helper functions.\n",
    "\n",
    "They create distance matrices that can be used to generate the axial attention at a given distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0QAAAN9CAYAAACzbLlLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAADUoUlEQVR4nOz9e5xkeV3Y/7/en3NO3au7uvpS3dPdwy6wLMvCsmSXAReiJKhJQDTBr8YrGs2Db/LToKLfgOg3X2LEEGPQ5JtEQ9QIJnhJYqLxBsJPQESCgFEi4zLLzuzuzOzM7Mz0THfX/ZzP+/vHqZrp6emqPrPsbNdOvZ+PR+9sV3/q1Oddt/N+n9tbVBVjjDHGGGOMmUbuoCdgjDHGGGOMMQfFCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY54hRORnRURF5F1f5HJURN7+JO73CyJy4ot57B3Luk1E3i4iz97jb28Xkb/6VDzOPnP4myLy5j1uf9XgOXrVzZ6DMcaYg2cFkTHGPAOISBH4usGv3ywi4RexuC8BfvaLn9UX5Tbg/wGuK4gGt9/0ggj4m8B1BRHwGdLn6DNPwxyMMcYcMCuIjDHmmeFvATPAbwNLwF9/sgtS1U+o6smnamK3GlXdHDxHmwc9F2OMMTefFUTGGPPM8G3ABvDtQBt4w84/ikhZRP5CRD4pItGO279SRLyIfNeO2645ZE5Enisivygix0WkLSIPi8hPi8jck5moiHy3iPyRiFwUkUsi8gkRee2Ov78K+P3Br783mI8OD1Ub3P5DO27fOdcvE5EPiciWiDRF5P0i8sJdj/9hEfmYiHy5iHxGRFoi8r9F5G/uGPMLpM/p6o7HOTGc3+5D5iT1fSLyoIj0RORxEfnXIjKz67FVRH5URN40eD63ROQjInL3k3kujTHG3HxWEBljzIQTkUPAlwO/oqpPAP8d+OqdBYuqNoFvBF4M/JPB/ZaA9wK/qar/ZsxDHAJOAt8L/DXgR4BXk+6NejJuIz0k7+uAvw18CvhNEfkbg79/BhgWaG8iPTxteIjalwxu/4Udt//sIJ7XAh8CtoFvAb4JqAJ/ICLru+bwHOBfAu8CXg88DvwXEXnu4O//ZBDfEzse52+Niekdg2X9HvA64MdJi9PfEpHd69JvAV4LfA/wd4DDwK9/kYc5GmOMuUnsy9kYYybft5JuwHrv4Pf3kBY/fxv4meEgVf0TEXkr8C9E5IPADwAJ8B3jFq6qHwU+OvxdRD4OPERaaLxEVf/kRiarqj+wY1mOtIh5HvD3gN9R1U0R+dxgyFFV/cSOu39CRABO7bod0gLnI6r6NTuW//vAw8D3kxZ0QwvAl6rqscG4z5AWRV8P/JiqfkFEngB6ezzONUSkTnqu0XtU9bsHN79/cP9fBL4K+I0dd+kDX6Wq/cH9Af4zcAT4+LjHMsYY8/SzPUTGGDP53gAcU9U/Gvz+QeA0uw6bG/gp4HeB3wS+EniDqp4ft3ARyYnI2waH3LVJE/o/GPz5zhudrIjcJyK/KSJngXiwvK94Msvascw7SPf6/CcRCYc/QAv4I+BLd93l2LAYAlDVc8A50r01N+rlQB74j7tu/2XS+L5s1+2/NyyGBj47+PfJPLYxxpibzAoiY4yZYCLyUuAFwK+JSE1EaqSHif0a8CUi8ryd41VVSfda5IE/VdUPZXiYfwq8nTThfy3pnozXD/5WuMH5rpPuEaoD/wB4AHgpaZF2Q8vaZWnw78+RFlg7f74KmN81/uIey+g+yTnUB/8+vvNGVY2BCzv+Puqxu4N/v5j4jTHG3CR2yJwxxky2bxv8+5bBz25vAH54+IuILJPuJfoM8BIR+R5V/Zf7PMY3AO9V1R/dsZzKk5zvXwdmga/feSU7ESk9yeUNXRj8+4Oke8h2632Ryx9nWOAsA38+vHGwh2p+x9yMMcY8A1lBZIwxE0pEcqTFyv8E3rrHkJ8EvlVE/m9VVUlPVnkPaXHwFaSF0j8Tkd9X1T8b81Al0j0tO/2dJzntYeFzZXmDvVivIL1ww9Bwr0lxj2X09rj9QeAEcLeqvvNJzm237ojH3+0Tg7HfQLr3a+hvk65HP/IUzccYY8wBsILIGGMm1/BQsO9X1Q/v/qOI/Dvgp4FXkV7G+s2kV6P7q6p6cXCBhVcBvyQi96tqe8Tj/C7wbSLyWdKLKbye9FC3J+ODpOfVvFdE/gWwAvxj4FGuPUz784Nx3yEiF0kLjgdVdQv4HPBaEfld0kuNn1bV04NLh//6oFD8VeA80BjM9VFVfdcNzvVzQF1E/j7plfA6qvrZ3YMGz+W7gB8UkSbp1enuAn4U+BjwWzf4uMYYYyaInUNkjDGT69uALdIrlO3ll0h7En2biLwE+DHgn6rqRwBUtUd6NbrbSC8ZPco/IL1K2juAXyE9R+kbn8yEVfXPgW8GnjVY5j8k3bv10V3jLgDfTXqZ8I8AfwzcN/jzdwNN4H8Mbn/j4D6/TXrxhDLppbjfT3r562XSCyvcqJ8lvTDCjwGfHDzeKD9EWnD+DdILVryV9Kp/r1VV/yQe2xhjzISQ9PxbY4wxxhhjjJk+tofIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLIGGOMMcYYM7WsIDLGGGOMMcZMLSuIjDHGGGOMMVPLCiJjjDHGGGPM1LKCyBhjjDHGGDO1rCAyxhhjjDHGTC0riIwxxhhjjDFTywoiY4wxxhhjzNSygsgYY4wxxhgztawgMsYYY4wxxkwtK4iMMcYYY4wxU8sKImOMMcYYY8zUsoLoaSAit4mIikg4+P3DIvJ3x4x/gYh86umbYTYi8tUi8stj/v61IvJ/DeN8GueVF5HPicjy0/m4owzm8xcisjTi788RkbeLyAtuwmP/goj86OD//7KIPPhUP8atTET+XEReddDzMGaa2Dryps/L1pFXl23ryC/CrbyOtILoBonICRFpi8j2jp9DT/HD/BPgJ3Y95pfvmse3i8jH9pnr20XktjF/f1BEvn7H768YrJR237YtIqGq/gbwQhG5Z49l/W3gZ4FvBn5eRGTX339CRI6JyNbgi/AN4+Z+g94IfFRVzzyZOw9Wxm8f8/cfFJHf3nXbsRG3fYOqdoGfB96yx7KWgQ8AfwX4gIgc3vX314rIx0TkkoicEZF/LyLVJxOXqv6Bqt6537jB++Q/PpnHeKbYuRIcR1XvVtUPPw1TMuaWZOtIW0cObrN15DOIrSOtIHqyXqeqlR0/p5+qBYvICukXwX//IpbxNhH5y4NfQxH5IRF5+R5DPwp82Y7fvxT4iz1u+7iqxoPff4n0y3Xn43058FPAVwzGPxv48V2P1QReB8wC3wb8SxF54AZDG+X/BH7xRu8kIi8XkR8Chlslv1RE3rbH0I8CrxCRYDBuGYiAv7TrtucOxgK8D/g2EcnveLwZ4HeA96nqlwE/CfyuiMzveKxZ4EeBQ8BdwBrwz280NnNj5GneYmvMLc7Wkdc+nq0jbR35jDYV60hVtZ8b+AFOAF++3+3A24H/OPj/2wAFwsHvHwb+7ojlvwH44H6PCXw78LERyygD7yD94v4g8LdGjPtW4LM7fv/twXJ33/bDO35/BXB8x+/3A18A7tn1+B8CfmDM8/gbwPeP+NtbgE/seL7+PvDnQGGPsYeB9o6xOeB/Af9g8HsA/CHwj0Y81tcAvzd4rv4pUNljTA5oAfcNfv964D8AH9l120O77ncM+LLB/+eB3wd+cNeY/x/wcaA8Yn6v3/l67PH3lwCfAbaAXwF+GfjRwd9eBZzc9byeGox9EHg18NeBHtAHtoE/HYz9O8DRwdiHgf9zx3JeBZwEvh84BzwO/J0dfy8C/wJ4BLgMfAwoDv728kG8l4A/BV61z2ft/wL+jDRZ+DmgQbrC3CJ9b8/tGP+fgTODx/wocPfg9jcO4usNYvwfO5b/lsHyu6Qr/RMMPmuk7/1/sWP5vwL8/FP1XWI/9nMr/mDrSFtH2jryVdg68hn3Y3uIJs+LSD+IXyzd8W8yYsxHgLtFpC4ijvSL+1eA2o7bHuDqFh1IvwBuG2zJQVU/parPUdU/u/LAqk1VfbWq/gR7EJEi8FLSL/C9/HPSD+YPi8gdwI8B36KqnT3Gvgh4WAdb51S1B3wL8CMichfwVtIv/HeMeCzd8f/Jrt+H8fSA/0m6ZY/Bv39A+iW287aP7rrrUeDFg2V0VfWvqOo/3bXsf6uqD6hqc8T8vpQRz5OI5Ei3kv4iUCf9svvaEWPvBL4beKmqVoG/BpxQ1d8lfX5/RdMtuS8e3OUc8FXADOkX/0+KyF/aschl0i11q8B3Av9GROYGf/sJ4D7S904d+IeAF5FV4LdIt+7VgR8A/quILI6InUE8XwE8j3Tr6e8AbwMWSPdwv2nH2N8B7gCWSFeA/wlAVd89+P8fH8T4uh33+UbgtUBNr27hHfoO4FtF5K+KyDeTvme/Z8xcjTE3n60jbR25k60jbR35lLCC6Mn574PjVy+JyH9/ipddI63sxz3mJeDfjlnG95Aeg/vLpFuO7tnrcABVfRR4FPjLpF9Kx1S1Tbq1aHhbgfSLbmg4t1r2kK7zM6RbPt6/1x9V1ZNuBXwT6VayH1fVPxmxrBq7ni9V/d+kXyj/jfQL5VtV9boV3uA5uYf0Ofpl0uds1If5I1z9Yv/LpF/2f7Drto/sus8WX8TzJCJfQXroxD8aMeTlpIcl/JSq9lX1vwB/PGJsQroF7gUiEqnqCVX9wqjHVtXfUtUvaOojpM/NX94xpA/8yOBxf5t0q9KdgwThO4DvUdVTqpqo6sc1PWb8W4DfVtXfVlWvqr8HfAp4zZin4f9V1bOqeor0+f6fqvong+X9N9Ktf8M5/7yqbg3+9nbgxSIyO2bZAP9KVR8bvO93PwdngL8HvAf4l8AbVHWvz6Yx5lq2jrR1pK0jbR35jGIF0ZPzN1W1Nvj5m0/xsjeAvU4Q3PmYNdLdyHtS1R9T1eGWmFhVf1RVPzFi+EdJv7CGW3Tg6ladLyX9cHV3jB/O7VKWYHYTkX8OvBD4elW9bkvTjhhOkO4+vw34N2MWOer5es/gvr+tqsdGPMYnVPVHgeGWs4+q6o+NeJyPAq8cbOFZHCzz48ADg9teyPVbv6o8+efp5aTHWP8fqvr5EcMOAad2PY+P7DVQVR8Cvpf0S/CciPyyjDnRWUT+hoh8QkQuDpKL15BucRq6sGtrUQuoDMYUSA8R2e1ZwNftSlpeCayMmgdwdsf/t/f4vTKYbyAi7xSRL4jIJulufXbNeS+P7fP33yTdevqgqo49QdsYc4WtI20daetIW0c+o1hB9NRpAqUdvz/Zy1v+Gemuzy+aqr598KU5zvDLfrhFB65u1fnLXP8FdhfpbuTNG52PiPxj4G8AX7nf/UXkNcCXkB5nPe6EyT8Dnr3HCX//lvSD+tdE5JXjHmuwJejt+0z/j0h3f7+RdOsggxhOD247rarHd93nLtKtfDdERF5CutXvO1T1Q2OGPg6silxztaLDowar6vtU9ZWkX7oK/LPhn3Y9fh74r6S79RuD5OK3gWuuijTCeaADPGePvz0G/OLOpEVVy6r6zgzL3c83kR7r/uWkr9Ntg9uHcx6VWIxMOAbeQXpYx4qIfOMXOUdjppmtI/dh68hsbB35pNg6ch9WED11/hfwDSISicj9wP/xJJfze6RXZik8ZTMb76Oku1S/jMGXGPBZ4HbSK/ns/rL/MtLjUG+IiPwg6QfyK1T1wj5jF0hPDvy7pLvDXzf48r+Oqp4kPTHzyI77fyvp8bnfTnpIwXtEpHKjc971OG3SXddv5upKEdIthW9m1/M0OBa4Tnria2Yi8kLgd0lPeP0f+wz/I9Itd28SkVBEXs+O52HXcu8cHOebJ/0ybnP1uPmzpMe8D78PcqSHDjwBxCLyN4CvzDL/waEcPw+8S0QODbZKfcngcf8j6Wv51wa3F0TkVSKylmXZ+6iSnvR5gTTp2r0V8yzplZ0yE5EvJT02/A2Dn/938LoaY27c/8LWkSPZOjIbW0c+abaO3IcVRE+d/5u04t8A/jHprtwbpqpngf8/aSV/0w12NZ8DHlfVS4PbPPBJ0pMFP77rLt8I/Lsn8VA/Rrpl5phc7U2x1+U7Ad4N/PrgONoLpCck/qxce+nNnf4d6dWAkLRnwU+RHsu6rarvI/2S/sknMefdPkJ6MuLO3cJ/MLht90rxm4D37DqUIovvBxaBn9vxPO15wqimJ7K+nnSltgH8beDXRiw3D7yTdOvUmcGch8//fx78e0FEPjM4BvhNwK8OlvtNpFvjsvoB0oThj4GLpFvZnKo+Rvq+fhvpiuQx0ivkPBXfQ+8lPRTiFPA5rl/J/hzpseGZzmmQ9ITo9wLfPTjO+2ODZfyHXVsbjTHZ2DpyPFtHZmPryCfH1pH7kDGHqJoDIml35vcAR8YdQ/x0E5HXkZ58+fX7Dn4aDbas/AnwalV9fELm86fAl6rquYOejzHG3EpsHXljbB1pzP6sIDLGGGOMMcZMLTtk7ikgIn9dRB4UkYdE5K0HPR9jjDFmEtj60RjzTGB7iL5IIhIAnydtjHWS9JjQb1TVzx3oxIwxxpgDZOtHY8wzhe0h+uIdAR5S1YcHJ/D9Mk/TyZ7GGGPMBLP1ozHmGcEKoi/eKtc2rjo5uM0YY4yZZrZ+NMY8I+xu1GVu3F6XF7zuOEQReSNpczICgvtKzGRbeOAg6xUM3Q2MFUFdxnpYQF3W5YIG2a+4qDdQkqsDzbpoufFlZxbcwFin2Vq1ASKKSLZDWEWU4AbGhs5nmwQQSZL5bRRJjNu3b1sqFE9wpa3DeIEoIdnm7IBIsr0oJx7rc/5i8oy8JKgxz0BPYv0Y3lfOzaO5AB85fAQ+Ag0VF3qKUZ9y0KXsupx6dBHX6qG56PqxkScfxpSDHpWgw6NPLJI730FzERo5kkgGY0EiTy6MKYV9qq7No806hUd7EIX4XIAfjPUhEHmiMKEU9qkEHTbjIv1jAmGAz4W7xiphmFAM0zk7US4eq4Bi8Vl8ExdfxXX53Gf751V18WZ9IUwyK4i+eCeB9R2/r5F2Zr6Gqr6btHcAM1LXl8mrMy08qMxAlO1lklIJzUeZxpLP4Uu5TEN96IjLGd8qTuhVg8xFQL/k8BkLjKQASS7bgn0EST7bcjWAuJTxXDqBpOwzx6elGAmyLTvKx0RRtoIhH/Wp5nvZlhskzBeamcY6lJXC5cwF1EruEgXpZxq7GG5RC7LNo+o6LLps7SkKAithtp6CR/7aY/sPMsY8VW54/TibX9aXF16Dm5+jtz5P81Ce5rKj3VD6izG1pS3uWjjLS2Yf5dff/uXMfOAoUq0Qry/QXC3SbAS0G9BtxJQWm9yxcJ4jcyf4T7/0ap71038OUQ6/tkR7rcz2SkB7Seg0EqKlNrctXOS++qP80p8c4a63PAJxjK426KxV2V4JaS0LnUWPNLqsLW5wT/0UD15uEH476OVNWF6ku16juRLRXHZ0lpRkqcfS0mXurp+hnmvy2TfejTz4iMVn8U1cfC+bfZi///yPPnLzvxYmkxVEX7w/Bu4QkdtJG159A2mTLmOMMWaa3fD6UXMBbn4Of2GDdJPdPGnPTAeEXKLKUSBWR7fqYHkRPfME4WNQZgEoku7GD2lR5thguT4CXVtBTp3FnTxHkSWgPBgb0KHICerp4L7g15ZwJ88hp85SAKBKmjI5OuQ5yRxeha1OnsX1PJH36JknSLfD1YBoMDbHOWYBqBdbdJaKlM9bfBbf5MWX2t0/d3pYQfRFUtVYRL4beD/pO/fnVXXPrsnGGGPMtHgy60cfOXrr8+RgbNL5oArMCN31GnnYN+n0kdJZq1KA/ZPORGivlSkyPuk8TQ2A8qEcZb9A+BhXk06tcSXp1DTp3J7JU1oMiSw+i28C4/OZz0m4NVlB9BRQ1d8Gfvug52GMMcZMkhtdP/oImofywPikc5MKxSo0VyJg/6QzCmF7JQT2TzrFC9srAuyfdPqSp9kIQIuU2ZV0So2dW+KbfYdbEMKOxWfxTV58T2i2c9tvVVYQGWOMMWYi+Aiay440uRyfdPYrOhi7f9IZl5TWspCmPeOTzl7N016Swe+jk05RR2deaC+RnozKHknnzsOTNKRbVyS2+Cy+CYyPbOeV36qsIDLGGGPMRNBQaTeUNKkcn3Ru3hnTkWAwdnzSufk8pbOoiKbJ6rikM66kJ7IPE9BxSWeSEzor8WCZ45NODRzN22P0ypwtPotvcuKTG7rc7q3HCiJjjDHGTAQXevqLwwRuTNIpebbv75OUksGW7UHSqXsnnZdfAG6uR5s8KuOTzu31gGipTefKFvzRSWd3LkIWW7SlxH5JZ5zPEd/XphsULD6Lb+LiS4u26WUFkTHGGGMmQjHqU1va4tKOcyH2TDrdPKVKl3K+xzlmr00699gSr2GBQwuXOE2Nzo7DmvZKOt1LZjm8cJET1PdNOnNrcxxeuMBxUVqUGZd0lirzzNYvczHft/gsvomLL73QwvSygsgYY4wxE6EcdLlr4SxHYXzS6RwLFeXZ1QsA+yadRIe4d/4kwJWkc+fhSUVVOH0Od/IcLp7lvvqjABzX8UlnfrPGfXOPEornGIxNOov1Ms+ZPcelYtHis/gmLr68ZuzHeIuygsgY0j6rz6SvAr2By2M+mUtpZrmPkxt/xpIpP0bZGDNe2XV5yeyjxOp4UIVNKlxJOiUPbp6cc+jmFo2S8tKZ43iExDvOa/VK0qmSQ12NQuDQM+eR0HN/5TixBngVHtfZq4cnSYjKDMVAkNNPIAkcqTxMXwM8wiNAR68mnerKlIIGwcknCFueI+UvAGnvmYdVaOvg8CQJQIqU3CLRY47gcpu7y6doFXMWn8U3cfHpExdu3gf7GUB0yivCgzAbLuiXVL4m01jf7qBJkmlsUClDsZBprORyaCnbWMKAeCbjWIF+NQLJloT3Kw4fZBsbFwSf8RDXJCckGaesAcTFbGMRiEtK1rzelzwaZPyM5ROCnM80NAgTCvl+prFhkFArdjIVMA5lsbiNk2zzWClsEkm292cj2qQatDONrboOi+FmprEF6bMedDONfc1rzvOnf9af7mYLxkywam1NX/SV30u36ujNCP0q9CtKPJPgqn1KlS4LlSaN0haf/8U7KZ/1dGcc/arQq0J/RokrHqoxhUqXeqXFocpl/uxDd7L0mZhe1Q3GQ7+qxBXFV2Py1S61Spvl8hZ/+uBhDv+G0BvMoVcdzKPq8ZWEqNpjttpmsbzN45szzPz7GeKSozsj6Zwr6bKTakJQ7VOttFmqbBM4z+WfPox4tfgsvomL71D5Mu992X/4tKref9DfAwfB9hAdBBGIsj31up2Az5ZwapIgcZxtDs5BxkILwMXZEmQVcLGiWfYeOEGSNAnPNIcE1GXLZV2i+CR73psxpwcBUSDr+ASEbPPQRDLPWcSR+Ox7WxLvMk1ZROn5ACfZlt31YXoEQAYdDYkyVrSRJPQ1yLZgoKPZCqJs72JjzEFxrR4zHzgKy4t012s0VyKay46OBCSlhHK+x7OrF3jpzHFOnb2Dygc/x8z8HL31eZqH8jSXHe2Go19w5KOYw9UNjtSO8+edO6n8/l8g1Qrx+gLN1SLNRkC7IXQLjiDwLJe3ODJ3gj+N1ij/4cOUwxC/tkR7rcz2SkB7ydGJFJlV6sUW99ZOAmvIpzbQbg9dbdBZq7K9EtJuCO3AQRWqhS53zp5lLmrxyQdnkeOnLD6Lb+Lie6D6EO892I//gbKCyBhjjDETQXMRUq2M7JNyjlkAPEJ3xjEzPzeyD8wlqhwdLjcElhfHNsc8NpxE36GrjbHNMU9QB+Biu8T8Wg73yNnrLnmskjbHPE0NJ0q90KTbKFM8b/FZfJMXn1cH/DHTygoiY4wxxkwEHzni9fHNI88xS+IdcVXorY9vjjlMOn2kdNfGN8e8knQmQmdtfHPMYdKpXiiu5ikle/eBSeec5yRzXK4UyC+GRBafxTeB8T2Z841vJVYQGWOMMWYi+Aiaq+P6pDg6kuO8VilVoXlofHPMYdIZhdA8NL455jDplL6wvRIwrjnmMOnUvKfZCBA/ujnmMOm81HPMLDjCjsVn8U1efKfsstvGGGOMMQfPR9Bs7Nc80tHRHP2q0lwe0xxzR9KpIYOx+yedccXTbgh7XfJ4d9LZrQvtJWFcc8xh0tn1Ed06uL7FZ/FNXnydQRTTygoiY4wxxkwEDZV2A8Y3j0yTzuazEto4RjbH3JF0bt/m6SzClT4wMjrp3M4L7SWfXvKYEJihILLnlngfOTpLCeOaY+5MOturO8dafBbf5MQnU94WwwoiY4wxxkwEF3m6jZiRzSMBpIZKjuYLY/qFYVI4Punceq6SzPXHNsccJp2tQw5d7w62mF9tjrnX4Um9mRB/Z5uOjG6OOUw6fS6k96Iu3TBn8Vl8Exef6HSXBNMdvTHGGGMmRj6MKS02aVFmXNKpUmOr0iUfxVzasQV/VNKpUURj6TLnmN036bzwwgqNhUucpkaH/GDL+d5JZ+7QDMuLG2lzTMYnnf3yLMX6Jhv5osVn8U1cfOmeoullBZExxhhjJkI56HHHwnmOwdiksxA46pWEw9UNjsL+SWe4xN31MwD7Jp2uX+He+ZM4UU4yR5v8jsOTrk06c8+vcu/cSRzKCamPTTqLtQJrtSfYKJYsPotv8uLTjE3kb1FWEBljjDFmIlSCDkfmTgDsk3Se51DFcaR2HGDfpFPCRR6YfejK41yXdOrVpFMSeOXM5wHwKmO3xOe2PK+oHiMadPc+weikM3pimxdXH6NVylt8Ft/ExcepM0wz0SmvCA/CbL6hDyx/U6axenkTTZJsC04Ssr6eLp9HyqVsyw1DdKacaaiKkMzkQbJdz75fjdAg2zTioiOJsi03yQlJIdtyfSDEGZ8KBOIyZL1cf1JUNMj2mviCopHPtuDIE+azvS9c4CkXu4jsPw8nUC+2Mo0FWCpukXfZ5lHPNakGnUxjK0GHxXAr09iC9FmPLmQa+3de9zhH/6w73c0WjJlgxZV1ve073oyP0t4sGoIPQSNFw/SHyCOhJ3eiQNBJm1r6SNNx4fB+ikYKoUdCJTibI3dJ0CC9kp2G6fhrxw7Gb4WUTgf4ADQCPxirw8eI/GCsQiyUH45Qx645Kz4CQkXDwXinFB/KI373WIvP4jv4+Fwu4cS3/NCnVfX+g/0WOBi2h+ggiKD5KNvYYgGJ40xD/XYT7XazjQVclPHl9x662eYrzuF6YeaCyPUCsl7YJHA3lsdqxvESKD5joYWA62UviHwE+KzzgHRr0P4USDIWWqpCL872Woso7SjCZSyItvt5ekG2gijKWDgNFaSfaVzfBVxKSgSyfzGZTHnjOWMmXe58h2f9zFF0tUFnrcr2SkhrWegsKm6ux9riBvfUT3F/5Tg/+2uvp/L7fwHLi3TXazRXIprLjs4SJHN9GkuXubt+hgdmH+Jd7309h//1Z3Hzc/TW52keytNcdrQb0F9MqC1tcdfCWY7UjvOv/vjVHP7ho0i1Qry+QHO1SLMR0G4I3UZ6jtMdC+c5MneCT27cRu9tMYjDry3RXiuzvRLQXnJ0GgnRQpvbFi5yX/1RZsM2H/mJvwSnz1l8Ft/ExfeK6uf56oP+AjhAVhAZY4wxZiJoLoIw3POSwG3ynGQOr0KsAb2qQ6qVPS953NEc55i9utwA3Pzc2OaYR4dj+w6WF8c2xzw2GHumWWXuUIg8du3VvUTTQ5I6FDmu9fQRC016S2Vy5yw+i2/y4ktwwP9mWllBZIwxxpiJoFG6JXuvPikqafPI09TwKnRnHPH69Zc8vpJ0cjXp9BH01sc3x7ySdMZCd318c8xh0pkkjsJqgaK//pLHKleTzhNSZ6NSJFzKEVp8Ft8ExjftrCAyxhhjzERIIqG9Nr45Zoc8j+ssxSo0V/foA0ON3UlnFCrNQ+ObYw6TTomF5sroPjA7k04iT3MlRHTvPjDDk9c7FLnQC5hZcEQti8/im7z4hnuKppUVRMYYY4yZCD6C7ZVxfVKuJp39qtJs7N0HZnfS6UNoLo9vjjlMOpOCDsbun3T2Zz2thqRny49ojjlMOrtJns48uL7FZ/FNXnwdLTLNrCAyxhhjzETQENpLMjjHYZ+ks+FpN4RRzTF3Jp2tQ552A8Y1xxwmna1D0FlU9rrk8e6kUwOhs+jHNsccJp3qHL26R7yz+Cy+iYsv/ZleVhAZY4wxZiJI5Ok0EiAYnOMwIumUkNZzY7qFYaI3PulsHlb6iwljm2MOks7OktBf7Y1tjjlMOvvVgPi2zt7NMU8+fk3S6aOQ3h1dOkHO4rP4Ji4+ydoD5RZlBZExxhhjJkIujImW2iObRw6TTpUqW9UuQeDHNMe8mnRqFFBb2hrbHHOYdG7cVWRp6fL1zTH3SDqbjQq1hcuckVk6Ow5r2mtLfL9UJapvczlXtPgsvomLD83Wb/JWZQWRMcYYYyZCKexz28JFjmt9bNJZdFCrxCyXtzgG+yedYZ27Fs5yFPZNOqW/xt31MwDXJ51ybdKZf26Je+ZP40Q5TW1s0lmq5VmoXWCjULL4LL6Jiy/dUzS9rCAyxhhjzESoujb31R8F4ISMSTpPP8FyOeTI3AmAfZNOwhpHascB9k06XbLGA7MPXZnTuC3xuS3Py6sPEUqCE+UkcyOTztzZbe6pnmK7lLf4LL6Ji889doZpJqrZOtJPCxH5eeCrgHOq+sLBbXXgV4DbgBPA16vqxuBvPwh8J5AAb1LV9+/3GLPFFf2S535ntvlsNiGOM43VZgvf7WZbrggE2Y4XlSBAZmcyjcUJvlqGQDINTyp5NMw2Ni6GJPlsY30kxIWMYwMhLmUaijohLoNmWzRxCdRl+4z5PPhctrEaKZpPsk0iUHKlfqahIkq11EUk2zzqxRa5INs8arkWlbCXaWw17LAQbWUaW5CYQ9FGprFv/Vt/wRc+28z46hljdno61o/5Z6/q8j96E/QFEkH6DtcHFwvSB9cXXB/EQ/M5fYg89N1grOD6g3/jHWMTaK0mMBOnTS1jQeKr44djpQ8uge6ckix3B2PdNWNl13J9HrrP7qRj+w4ZzOPqHLhyPwRad3TTFYjFZ/FNWHzi4eg73/xpVb3/qf/2mHy2h+h6vwD8a+C9O257K/AhVX2niLx18PtbROQFwDcAdwOHgA+KyPNUdWyGqM7hS7lMk3FxAkm2hFMAF2V7SbXTxTebmcbiAoKMy0UEFwSoy5BziuCcg9BlWzYgcbaxPu9It4BkGBuQbb4AomggmQsidTewbBR8trGagM94RRgNlH6QrcARUVqBRzJOOQoSIuezDQZ6Ptv7qJtk/2qKJCGSbBsNYs3+XjPGXOcXuMnrx8KjPe56yyP4tSXaa2W2V4T2ktBpJERLbQ4vXOS++qMcqTzMT37fN1H+w4fR1QadtSrbKwGt5fSqWrrepbFwiXvnT/LKmc/zIz//zRz+4aOwvEh3vUZzJaK57OgsKv3VHktLl7m7foYHZh/iHZ98DXd+9zHc/By99Xmah/I0lx3thtJfjKktbXHXwlmO1I7z8Y3nsP06j5SKxOsLNFeLNBuOdgO6jZjSYpM7Fs5zZO4EpaDLB15/P1y8bPFZfBMX3ysrD/JX3nlTvjeeEawg2kVVPyoit+26+WuAVw3+/z3Ah4G3DG7/ZVXtAsdF5CHgCPBHT8tkjTHGmKfJ07J+jEKI45HNI0+QNo/sa0Cv6iiH4cg+MKep4QZ7u30AUq2MbY55Jc6+w83PjW2OeXQw9vT2LDPLAXrq7MjmmMcGY+fzTfqLFaJzFyw+i2/i4vMIcPVQvGljBVE2DVV9HEBVHxeR4Zlnq8Andow7ObjNGGOMmQZP6frR5wJ0tTG6eeQg6fQIvRnBr41vjnmSObwKGkG8PqY5puxIOmNHb318c8xh0tnth+RW8xQSP7I55jDpPF8uQyNPeMnis/gmL760IJpeVhB9cfZ69+x5bJKIvBF4I0AhN7vXEGOMMeZW8aTWj7nyHJ21KkVVOH1uZNL5iEK+KrTX9mmOOdgSH4RKc3V8c8zhlniJheah8c0xh0mnRJ7mSg7xo5tjDpPOXjdiZsERNS0+i2/y4vvCXh/OKWIFUTZnRWRlsPVrBTg3uP0ksL5j3Bpweq8FqOq7gXcDzJRX7UoWxhhjbgVP6fqxuLKu2yshMENBZPSWeC3iqrC9sncfmN1Jpw+h2RhxyWOtcSXp1Bw+pzSXxzfHHCadcdXTagii11/yeHfS2YuFzrzgehafxTd58bWsD5HJ4DeAbwPeOfj313fc/j4ReRfpSaN3AJ88kBkaY4wxT7+ndP3oQ2gtC6P7pFxNOrt1T3vJMao55jDpFHV0Fj3thjCyD4zUuJJ0LkG7oYxrjjlMOtuSnvg+rjnm1aQznTM6rg+MxWfxHUx8otNdEkx39HsQkV8iPUF0QUROAv8P6Rf9r4rIdwKPAl8HoKp/LiK/CnwOiIHv2u8KOsYYY8wz0dOyfow8nUWPZEg624cTOpEyqjnmzqSztZJeVWtsc8zB4UndutBfHI4dn3TGJUfred2RfWB2Jp0+Cuge7tNxkcVn8U1cfGS8cu2tygqiXVT1G0f86dUjxr8DeMfNm5Exxhhz8J6O9WMUJkijS5s8KqOTTnVltqo9ZFZHN8fckXSef6lQWmyObY45TDov3ZGntrQ1tjnmMOlsLxYpLW5xQar7Jp1xoYSrN9nKFS0+i2/i4hMt7v2hnBJWEBljjDFmIpTCPmuLG5xkjs6OcyF2J52loMFstUe92OIE9f2TzrDKHQvnOQb7Jp2u3+CuhbMchX2TzujZq9y98DhHneccs9cmnXpt0lmaOcRybYNLxY7FZ/FNXnx+fv8P6C3MCiJjjDHGTIRK0OGe+im8Cqep0SG/5+FJwcknWCwH3Fs7CbBv0klY4f65RwD2TTolaXCkdhxg36Qzv7nCy2Yexg0uoHdd0rljS3z+bJMXzp5mMy5YfBbfxMUXPpa9yfqtyAqigyDgQ5dpqAtv4JjOMASf7Q0tQQwu27IlCKAfZ5uDE/Ae0WzXs5ck2fs6rHuNjRXJ+HRIkv5k4VBcImSasgyWnfFy/XID3y/iBfEZn41EIMk2VhA0yTphIUlcpvhElH6S/f3ZSwLCjC9gzjm6PuPXk4OORpmGTnufBWMm3WZc5MHLDbY6eRDFlzydeSHJCb1aRLQ2R36zRtRMuLy5hZNVLrZLqBe04OnWBR85ejMhuZUZcs+vkt9MIIbPXFrnbKtCkjiIPP1ZjwZCvxrQWiqTe06J/KbH5+HjG8/hTHOGXhwgkSeueto44pKjvVgkd/sq+c1lWkuOj248j8dbM7R6ERJ44mpCRwOSvKM9nyf3rCXymwuog89srJN4Z/FZfBMXX9jyI64DOR2sIDoA6oS4nPWpL+DibFm1CwPoZksMJZcjiDLOoR+TbG5mGytCIC4tjDJwgGYs+sT7weUu9+dzAZIxodaMcwUGRZPLXDyB4LMWcR58mG0uPgf4bEW1BuAzftRVlJ5Ee3cQ2Wu8CkGQ7f3ZTwI6Ubb3ZyvO0cv4+uVcxmId6E/5VXSMmXT9Y0L47bC4nqd8KE+zEdBegs5KjNzTYn3hAvfNPcqR8hf48e99A/qpDebXchRXh2OFzlKCv7PN8uIG986d5BXVY/zwz72Bzts8tUMR+dUCzZWQVkPoLHri2zrUFi5zz/xpXl59iLd/8qvZfp2nshwSrhZoruQGY5XW87qUFrd4wcLjvGzmYT68cSeXvsZRmhHC9QLNlTzNhqOzpLRv6xMtbnP7wjleWjtByfX4ra99OeF22+Kz+CYuvgdKx/jS2w/6G+DgWHZgDp7epLZMWfe23EBBdEOs29TTxqvgxJ5wY57xwgC9vEnkPWW/AFpMt+oQ0pYSx0UJB7ve45JDuz3cI2cpJUuI39EHRoo8QnoEQCRJeqVkcchjZyl6EK2Cppc8bpPnjMziRAklQfsOKRXRU2cpJB7RWtrHZdAc84JUOeo8DuVsq0ppRvBPXCDnPfh50PTcEpWQTSocG8y5nmsR10qEp85afBbfxMUXSQI8fNM+2pPOCiJjjDHGTASfC2F5cXTzSMocA2J1dGcEXW2Mbo5JkRNST5cbgV/b+5LHKmlzzNPU0g0rfUe8vvclj4dJ5zlmAWh2cyyv58l5P7I55iWqHAXq5Rb9pTyVCxafxTd58U37IeVWEBljjDFmIvhI6K6P65OSJp0PqxDOCJ218c0xOxQ5QR0ipb02ug9MmkjmOckckgjN1dF9YHYmnRJ4mit58KObYw6TzvZsRGUhILL4LL4JjO9oxnO/b1VWEBljjDFmIvgImivXXxJ4d9LZ1hKFCmyvjG+OOUw6wxC2V0b3gdmZdIqDZsMxrjnmMOmMq0k6Vkc3xxwmnd2kSLgghF2Lz+KbvPg2qTDNrCAyxhhjzETwITSXRzeP3Jl09qtKuyGMao65M+ns1TztJYfo/klnt660GzCuOeaVpFMDOkvKuOaYw6RTNKA7p0hi8Vl8kxjfdJcE0x29McYYYyZHpHQWhwncmKRTAi69KKEdOFR29YE5+fh1SWdnETqNBAhQGZ909meg24gZ1xxzmHQmBUf7WX1URjfHHCadGjq2nt+nc2WsxWfxTU58aT+l6WUFkTHGGGMmQhgmJI3etc0jZY+kU4psPdCHKnR2HPYzakv8xouFaKE9tjnmMOncuj2ktNgc2xwzD6A12vN5osXtweFG45POuFDAz7doRQWLz+KbuPgYlFLTygoiY4wxxkyEYthnaeky55i9NunctSW+5BapVrpUC11OU9s36dSwxG0LFzmu9X2TTtevc8fCeY7Bvkln7llL3L5wjmOiXNpxWNNeSWep2mBhboOLhZLFZ/FNXHxoPdNn9FZlBZExxhhjJkI56HJ3/QzA2KQzesyxVIE7Z8/iRDnJ3Nikk/BZ3Fd/FIATMj7plLjOkbkTAPsnnVsLvLR2glA8R2Fs0lk4W+WumTNcKpYsPotv4uJL9xRNLyuIDoKQvRmoQNYrIaoI4jIeA+oEJOOCb2SsOFAPPuM8VJGMDVQ10exNXFXJ2qdTVTM3URUAJfvV+pXM85AbHXsDc8gen6BeEJfxNbmhl0TwGd/MNzLWq8s81prlGjPZnCj1XJN6scX2TJ5m39HREA0ccT5HqTJPsV4muNwmcE3mohb1QpPLlQKXeo6uT08m97mQfnmWYq1A9MQ2OKUSdJkvNNmoFLnQC+gmedQ5fBTSL1Up1fLkzm6DQCnoMp9vcr5cpteN6MUCBPgoIC6UKM0cIn+2iTqh5HrUcy3q5Rbt2YhuUkQ0QENHXChQqjYonK3icyF5F1t8Ft9Exqeb7YP74E8AK4gOgAZCrxpkGyvg4mxZXBA6XC/bSyqdCBdkmwNJQiBZCxxPsrmdFkUZBOohzDZnlyRoL9s2DMlFSJxtDho6RDMWnoPCMPvl+h1Zz1OURPBRtrG+DxJnLC4CyD4JSAjRLJWZQE8FCbI9z0ni6ETZXut2FNHz2d6fOZdkLoh6mvE9b4w5EBePVfjsG++ms1SktBjiFoRuXWneHhPf12a2fpnnzJ7j7vIp3vf21/LJB2fpNsrkF0NmFhzdeaW9mtB7UZdifZO12hO8uPoYJ973Gj72E/fSWyoTLuWYWXB05qFX9/Tu6BLVt1moXeCe6imOfvqVfOD199NfrEAjPxgrdOue7uE+rt5kubbBC2dP86mLh/mtr305ca2UNu1cCAgXhO6csvX8Pn6+xcLcBnfNnCHvYj71XS/B9RKLz+KbuPheXH6U99950N8AB8cKooOSNaEWyZacDsZm3pMTCJpxL5Woy75Hyw/2EGXdbeBvYBeDV/DZkm+5oT1E6bIzRei4oT1E4rMXT+KzVmXc0F6fGxo7GC9ZIhy+dDew10cz7/Uh+1hk6jtsG3PLUJAHH6F8fo5ofZ6wk0dih0pANyhwMd/nUrFIq5hDvCLHT1E8XyFaXyDsFHH9ADSgG+bYyBfZKJZolfKIB06dIXcuR7i2RNQq4/oB4h2dIMflXJGNQontUj79Trt4mejcBcJLDaJmNd3YqI6Oi9jKFblU7LAZF1AVZLtNeOoslQuLROs1wm6EJI6OC2lFBS4WSlwqlqjnmrheYvFZfBMZX0en+6A5K4iMMcYYMxE0F+Dm50Y2j7xElaNArI5u1cHy4sg+MC3KHBss10egaytjm2OeYHBSeV/wa+ObY55kDq/CVifP4nqeyPuRzTHPMQtAvdiis1SkfN7is/gmL77UR5lWVhAZY4wxZiL4yNFbH908cph0PqgCM0J3fXxzzGHS6SOlsza+OeaVpDMR2mvjm2N2yHOaGgDlQznK/vpLHl9tjpkmndszeUqLIZHFZ/FNYHyZz8W9RVlBZIwxxpiJ4CNoHhrXJyVNOjepUKxCc2VMc8wdSWcUwvbK6D4wO5NO8cL2ijCuOeYw6fQlT7MRgO7RB0Zq7NwS3+w73IIQdiw+i2/y4ntCZ5hmVhAZY4wxZiL4CJrL+zWPTJPOfkUHY/dPOuOS0loWxjXHHCadvZqnvSSMa44JIaLpyeztJQZXr9kj6dx5eJKGdOuKxBafxTeB8U35hbetIDLGGGPMRNBQaTeU0X1Sriadm3fGdCRgVHPMnUnn5vOUzqKmFwnaJ+mMK0KnkTCuOeYw6UxyQmclZlxzzGHSqYGjeXuMXpmzxWfxTU58kvVqtLcoK4iMMcYYMxFc6OkvDhO4MUmn5Nm+v09SSq5tjql7J52XXwBurkebPCrjk87t9YBoqT22OeYw6ezORchii7aU2C/pjPM54vvadIOCxWfxTVx8adE2vawgMsYYY8xEKEZ9aktbXNpxLsSeSaebp1TpUs73OMfstUnnHlviNSxwaOESp6nR2XFY015Jp3vJLIcXLnKC+r5JZ25tjsMLFzguSosy45LOUmWe2frl9NLMFp/FN2HxpRdamF5WEBljjDFmIpSDLnctnOUojE86nWOhojy7egFg36ST6BD3zp8EuJJ07jw8qagKp8/hTp7DxbPcV38UgOM6PunMb9a4b+5RQvEcg7FJZ7Fe5jmz57hULFp8Ft/ExZfP2hPyFmUF0TNB1qaoNyprE1fzjCfcWG/Wg5a1KSsw9ZcKNeZWUnZdXjL7KLE6HlRhkwpXkk7Jg5sn5xy6uUWjpLx05jgeIfGO81q9knSq5FBXoxA49Mx5JPTcXzlOrAFehcd19urhSRKiMkMxEOT0E0gCRyoP09cAj/AI0NGrSae6MqWgQXDyCcKW50j5C0Dae+ZhFdo6ODxJApAiJbdI9JgjuNzm7vIpWsWcxWfxTVx8+sSFm/fBfgYQnfKK8CCUF9b1BV/1fZnGRi2PJNmWG7Y9ruczjQ26Ca7VzzRWkgS31c42CVX00mXw2d5Xvt1Bk2wBBpUyFAuZxkouh5ayjSUMiGcyjhXoV6PMxWS/4vBBtrFxQfAZD+FNckKSccoaQFzMNhaBuKRkPbfSlzwaZPwOyScEuYzvzzChkM/2/gyDhFqxg5P95/HHf+8/sfngWaugjJlQ1dqavugrv5du1dGbEfpV6FeUeCbBVfuUKl0WKk0apS0+/4t3Uj7r6c44+lWhV4X+jBJXPFRjCpUu9UqLQ5XL/NmH7mTpMzG9qhuMh35ViSuKr8bkq11qlTbL5S3+9MHDHP4NoTeYQ686mEfV4ysJUbXHbLXNYnmbxzdnmPn3M8QlR3dG0jlX0mUn1YSg2qdaabNU2SZwnss/fRjxavFZfBMX36HyZd77sv/waVW9/6C/Bw6C7SE6ID7IOk5wGbfta0DmRFZDgTDbYAU0DCBD8SxeIQwzjQXQ7QR8toJIkwSJ40xjcQ4yFloALs6WqKuAixXNkHzjBEnI/Pq5BDTj3kCXKD7JntdnLaoREAWyjk9AyDYPTSTznEUcic9+xZvEu0xTvpE9T8aYp59r9Zj5wFFYXqS7XqO5EtFcdnQkICkllPM9nl29wEtnjnPq7B1UPvg5Zubn6K3P0zyUp7nsaDcc/YIjH8Ucrm5wpHacP+/cSeX3/wKpVojXF2iuFmk2AtoNoVtwBIFnubzFkbkT/Gm0RvkPH6Ychvi1JdprZbZXAtpLjk6kyKxSL7a4t3YSWEM+tYF2e+hqg85ale2VkHZDaAcOqlAtdLlz9ixzUYtPPjiLHD9l8Vl8ExffA9WHeO/BfvwPlBVExhhjjJkImouQamVkn5RzzALgEbozjpn5uZF9YC5R5ehwuSGwvDi2Oeax4ST6Dl1tjG2OeYI6ABfbJebXcrhHzl53yWOVtDnmaWo4UeqFJt1GmeJ5i8/im7z4vDrgj5lWVhAZY4wxZiL4yBGvj28eeY5ZEu+Iq0JvfXxzzGHS6SOluza+OeaVpDMROmvjm2MOk071QnE1TynZuw9MOuc8J5njcqVAfjEksvgsvgmMb9rPx7WCaAcRWQfeCywDHni3qv5LEakDvwLcBpwAvl5VNwb3+UHgO0kPMnqTqr7/AKZujDHG3FRPxzrSR9BcHdcnxdGRHOe1SqkKzUPjm2MOk84ohOah8c0xh0mn9IXtlYBxzTGHSafmPc1GgPjRzTGHSeelnmNmwRF2LD6Lb/LiO2WX3TY7xMD3q+pnRKQKfFpEfg/4duBDqvpOEXkr8FbgLSLyAuAbgLuBQ8AHReR5qpr95BVjjDHmmeGmryN9BM3Gfs0jHR3N0a8qzeUxzTF3JJ0aMhi7f9IZVzzthrDXJY93J53dutBeEsY1xxwmnV0f0a2D61t8Ft/kxdcZRDGtrCDaQVUfBx4f/P+WiBwFVoGvAV41GPYe4MPAWwa3/7KqdoHjIvIQcAT4o6d35sYYY8zN9XSsIzVU2g0Y3zwyTTqbz0po4xjZHHNH0rl9m6ezCFf6wMjopHM7L7SXfHrJY0JghoLInlvifeToLCWMa465M+lsr+4ca/FZfJMTn2S9KtctygqiEUTkNuAlwP8EGoMVAar6uIgsDYatAp/YcbeTg9uMMcaYW9bNWke6yNNtxIxsHgkgNVRyNF8Y0y8Mk8LxSefWc5Vkrj+2OeYw6Wwdcuh6d7DF/GpzzL0OT+rNhPg723RkdHPMYdLpcyG9F3XphjmLz+KbuPhEp7skmO7oRxCRCvBfge9V1U0Z3XNmrz/seY1lEXkj8EaAXHnuqZimMcYY87R7qteRO9ePxUaF0mKTFmXGJZ0qNbYqXfJRzKUdW/BHJZ0aRTSWLnOO2X2TzgsvrNBYuMRpanTID7ac75105g7NsLy4kTbHZHzS2S/PUqxvspEvWnwW38TFl+4pml5WEO0iIhHpF/1/UtVfG9x8VkRWBlu+VoBzg9tPAus77r4GnN5ruar6buDdkDZmvSmTN8YYY26im7GO3Ll+XL27pncsnOcYjE06C4GjXkk4XN3gKOyfdIZL3F0/A7Bv0un6Fe6dP4kT5SRztMnvODzp2qQz9/wq986dxKGckPrYpLNYK7BWe4KNYsnis/gmL76M/SNvVVYQ7SDpZq6fA46q6rt2/Ok3gG8D3jn499d33P4+EXkX6QmjdwCffPpmbIwxxjw9no51ZCXocGTuBMA+Sed5DlUcR2rHAfZNOiVc5IHZh648znVJp15NOiWBV858HgCvMnZLfG7L84rqMaJB9+sTjE46oye2eXH1MVqlvMVn8U1cfJw6wzQTnfKKcCcReSXwB8BnSS8pCvA20mOkfxU4DDwKfJ2qXhzc54eA7yC9+s73qurv7Pc4paV1fd7XfV+mOYUtcEm21yjsKEE321jXV8J2nGmsxEqw3c00lkRxW03w2eahlzfRJONF+ZKErO9Xl88j5VK25YYhOlPONFRFSGbyMPoQkWv0qxEaZJtGXHQkUbblJjkhKWRbrg+EOONTgUBchqztCJKiokG218QXFI38/gMBIk+Yz/a+cIGnXOwisv88Hvzen6d17PHpbrZgzJP0dKwjiyvrett3vBkfpb1ZNAQfgkaKhukPkUdCT+5EgaCTNrX0kabjwuH9FI0UQo+ESnA2R+6SoEF6JTsN0/HXjh2M3wopnQ7wAWgEfjBWh48R+cFYhVgoPxyhjl1zVnwEhIqGg/FOKT6UR/zusRafxXfw8blcwolv+aFPq+r9T/23x+SzPUQ7qOrH2PuYZ4BXj7jPO4B33NDjSJrQZuH6irpsY+UGLvatDiTOdkURCcD1Mr5VVNFeDnzGxLdYQOJshZnfbqLdbIWZB1yUcc7eQzfKNFScS5+LjAWR6wVkvXBLkPF1Hsr8vggUn7HQQsD1shdEPgJ81nlAurVrfwokGQstVaEXZ3utdcobzxnzxXg61pG58x2e9TNH0dUGnbUq2yshrWWhs6i4uR5rixvcUz/F/ZXj/OyvvZ7K7/8FLC/SXa/RXIloLjs6S5DM9WksXebu+hkemH2Id7339Rz+15/Fzc/RW5+neShPc9nRbkB/MaG2tMVdC2c5UjvOv/rjV3P4h48i1Qrx+gLN1SLNRkC7IXQbMaXFJncsnOfI3Ak+uXEbvbfFIA6/tkR7rcz2SkB7ydFpJEQLbW5buMh99UeZDdt85Cf+Epw+Z/FZfBMX3yuqn+ern8T3wq3CCiJjjDHGTATNRRCGe14SuE2ek8zhVYg1oFd1SLWy5yWPO5rjHLNXlxuAm58b2xzz6HBs38Hy4tjmmMcGY880q8wdCpHHrr26l2h6SFKHIse1nj5ioUlvqUzunMVn8U1efAkO+N9MKyuIjDHGGDMRNEq3ZO/VJ0UlbR55mhpehe6MI16//pLHV5JOriadPoLe+vjmmFeSzljoro9vjjlMOpPEUVgtUPTXX/JY5WrSeULqbFSKhEs5QovP4pvA+KadFUTGGGOMmQhJJLTXxjfH7JDncZ2lWIXm6h59YKixO+mMQqV5aHxzzGHSKbHQXBndB2Zn0knkaa6EiO7dB2Z48nqHIhd6ATMLjqhl8Vl8kxffcE/RtLKCyBhjjDETwUewvTKuT8rVpLNfVZqNvfvA7E46fQjN5fHNMYdJZ1LQwdj9k87+rKfVkPRs+RHNMYdJZzfJ05kH17f4LL7Ji6+jRaaZFUTGGGOMmQgaQntJBuc47JN0NjzthjCqOebOpLN1yNNuwLjmmMOks3UIOovKXpc83p10aiB0Fv3Y5pjDpFOdo1f3iHcWn8U3cfGlP9PLCiJjjDHGTASJPJ1GAgSDcxxGJJ0S0npuTLcwTPTGJ53Nw0p/MWFsc8xB0tlZEvqrvbHNMYdJZ78aEN/W2bs55snHr0k6fRTSu6NLJ8hZfBbfxMUnWXuE3KKsIDLGGGPMRMiFMdFSe2TzyGHSqVJlq9olCPyY5phXk06NAmpLW2ObYw6Tzo27iiwtXb6+OeYeSWezUaG2cJkzMktnx2FNe22J75eqRPVtLueKFp/FN3Hxodn6Md6qrCAyxhhjzEQohX1uW7jIca2PTTqLDmqVmOXyFsdg/6QzrHPXwlmOwr5Jp/TXuLt+BuD6pFOuTTrzzy1xz/xpnCinqY1NOku1PAu1C2wUShafxTdx8aV7iqaXFUTGGGOMmQhV1+a++qMAnJAxSefpJ1guhxyZOwGwb9JJWONI7TjAvkmnS9Z4YPahK3MatyU+t+V5efUhQklwopxkbmTSmTu7zT3VU2yX8hafxTdx8bnHzjDNRDVbN3jz1Ckur+uz3/DmTGPDFrgk22sUdCDoZRzbV8K2zzRWEoi2+pnGokqw2UUyvq9kswlxnG3RzRa+2822XBEIsh0PK0GAzM5kGosTfLUMwahm7ddKKnk0zDY2LoYk+WxjfSTEhYxjAyEuZRqKOiEug2ZbNHEJ1GV7rX0efC7bWI0UzSfZJhEouVK29+djb/0ZOl84lTE6Y8zTLf/sVV3+R2+CvkAiSN/h+uBiQfrg+oLrg3hoPqcPkYe+G4wVXH/wb7xjbAKt1QRm4rSpZSxIfHX8cKz0wSXQnVOS5e5grLtmrOxars9D99mddGzfIYN5XJ0DV+6HQOuObvoFa/FZfBMWn3g4+s43f1pV7z/gr4EDYXuIDoA6SPLZxooHn2TN3xR12cam5865bHNIQHwIfv9kVhQk9kjGOtvFCSTZEl8BXJTtLaudLr7ZzDiJgCDjchHBBUG251kE5xyE2Z5nAImzjfV5R9bXzwdkfl8gigaSuSBSdwPLRsFnfH8m4DNe8UYDpR9kLLSyBmaMORCFR3vc9ZZH8GtLtNfKbK8I7SWh00iIltocXrjIffVHOVJ5mJ/8vm+i/IcPo6sNOmtVtlcCWsvpVbV0vUtj4RL3zp/klTOf50d+/ps5/MNHYXmR7nqN5kpEc9nRWVT6qz2Wli5zd/0MD8w+xDs++Rru/O5juPk5euvzNA/laS472g2lvxhTW9riroWzHKkd5+Mbz2H7dR4pFYnXF2iuFmk2HO0GdBsxpcUmdyyc58jcCUpBlw+8/n64eNnis/gmLr5XVh7kr7zzoL8BDo4VRMYYY4yZDFEIcTyyeeQJ0uaRfQ3oVR3lMBzZB+Y0Ndxg65wPQKqVsc0xh7TvcPNzY5tjHh2MPb09y8xygJ46O7I55rHB2Pl8k/5ihejcBYvP4pu4+DwCXD0Ub9pYQWSMMcaYieBzAbraGN08cpB0eoTejODXxjfHPMkcXgWNIF4f0xxTdiSdsaO3Pr455jDp7PZDcqt5Cokf2RxzmHSeL5ehkSe8ZPFZfJMXX1oQTS8riIwxxhgzEXwkdNaqFFXh9LmRSecjCvmq0F7bpznmYEt8ECrN1fHNMYdb4iUWmofGN8ccJp0SeZorOcSPbo45TDp73YiZBUfUtPgsvsmL7wujP5ZTwQoiY4wxxkwEH8H2SgjMUBAZvSVei7gqbK+MaY65I+n0ITQbIy55rDWuJJ2aw+eU5vL45pjDpDOueloNQfT6Sx7vTjp7sdCZF1zP4rP4Ji++lvUhMsYYY4w5eD6E1rIwuk/K1aSzW/e0lxyjmmMOk05RR2fR024II/vASI0rSecStBvKuOaYw6SzLemJ7+OaY15NOtM5o+P6wFh8Ft/BxCc63SXBdEdvjDHGmMkReTqLHsmQdLYPJ3QiZVRzzJ1JZ2slvarW2OaYg8OTunWhvzgcOz7pjEuO1vO6I/vA7Ew6fRTQPdyn4yKLz+KbuPjIeGXXW5UVRMYYY4yZCFGYII0ubfKojE461ZXZqvaQWR3dHHNH0nn+pUJpsTm2OeYw6bx0R57a0tbY5pjDpLO9WKS0uMUFqe6bdMaFEq7eZCtXtPgsvomLT7S494dySlhBZIwxxpiJUAr7rC1ucJI5OjvOhdiddJaCBrPVHvViixPU9086wyp3LJznGOybdLp+g7sWznIU9k06o2evcvfC4xx1nnPMXpt06rVJZ2nmEMu1DS4VOxafxTd58fn5/T+gtzAriIwxxhgzESpBh3vqp/AqnKZGh/yehycFJ59gsRxwb+0kwL5JJ2GF++ceAdg36ZSkwZHacYB9k8785govm3kYR9pP5rqkc8eW+PzZJi+cPc1mXLD4LL6Jiy98zDPNrCA6IJrxUE112ZfpA0ECzTbWCz7jHByKOgG3/zXqVRUNHZptGrjwBo5ZDUPw2T6wEsTgsi1bggD6cbY5OAHvEc12vX5JEjI+FUisSManQ5L0JwuH4hIh05RlsOyM7QjkBr4/xQviMz4biUCSbawgaJJxwllfDGPMgdiMizx4ucFWJw+i+JKnMy8kOaFXi4jW5shv1oiaCZc3t3CyysV2CfWCFjzduuAjR28mJLcyQ+75VfKbCcTwmUvrnG1VSBIHkac/69FA6FcDWktlcs8pkd/0+Dx8fOM5nGnO0IsDJPLEVU8bR1xytBeL5G5fJb+5TGvJ8dGN5/F4a4ZWL0ICT1xN6GhAkne05/PknrVEfnMBdfCZjXUS7yw+i2/i4gtbHk4f9DfAwbGC6ACog7i0f2Ymg/9mTXzVpT0csgh6gyInA5c12QRQEAWyJr4UcHG2rNqFAXSjTGMllyOIMr69+zHJ5ma2sSIE4jIVh5Bug9GMRZ94P7ic5/58LkB8trFZX2dgUDS5zMUTZC+sxYMPs83F5wCfbWuABuCzfpX56W48Z8yk6x8Twm+HxfU85UN5mo2A9hJ0VmLknhbrCxe4b+5RjpS/wI9/7xvQT20wv5ajuDocK3SWEvydbZYXN7h37iSvqB7jh3/uDXTe5qkdisivFmiuhLQaQmfRE9/WobZwmXvmT/Py6kO8/ZNfzfbrPJXlkHC1QHMlNxirtJ7XpbS4xQsWHudlMw/z4Y07ufQ1jtKMEK4XaK7kaTYcnSWlfVufaHGb2xfO8dLaCUqux2997csJt9sWn8U3cfE9UDrGl95+0N8AB8cKogn2TN2YrcKU9zveJevushuVtei8gYLohjxT36DGmMkVBujlTSLvKfsF0OLgkIqQtpQ4Lko42DUdlxza7eEeOUspWUL8jj4wUuQR0j3kkSTp0RbikMfOUvQgWgVNL3ncJs8ZmcWJEkqC9h1SKqKnzlJIPKK1tI/LoDnmBaly1HkcytlWldKM4J+4QM578POg6bklKiGbVDg2mHM91yKulQhPnbX4LL6Jiy+SBHj4pn20J50VRMYYY4yZCD4XwvLi6OaRlDkGxOrozgi62hjdHJMiJ6SeLjcCv7b3JY9V0uaYp6nhRKHviNf3vuTxMOk8xywAzW6O5fU8Oe9HNse8RJWjQL3cor+Up3LB4rP4Ji8+P+Wbsq0gMsYYY8xE8JHQXR/XJyVNOh9WIZwROmvjm2N2KHKCOkRKe210H5g0kcxzkjkkEZqro/vA7Ew6JfA0V/LgRzfHHCad7dmIykJAZPFZfBMY39GM50bfqqwgMsYYY8xE8BE0V66/JPDupLOtJQoV2F4Z3xxzmHSGIWyvjG+OOUw6xUGz4RjXHHOYdMbVJB2ro5tjDpPOblIkXBDCrsVn8U1efJtUmGZWEBljjDFmIvgQmsujm0fuTDr7VaXdEEY1x9yZdPZqnvaSQ3T/pLNbV9oNGNcc80rSqQGdJWVcc8xh0ika0J1TJLH4LL5JjG+6S4Lpjt4YY4wxkyNSOovDBG5M0ikBl16U0A4cKrv6wJx8/Lqks7MInUYCBKiMTzr7M9BtxIxrjjlMOpOCo/2sPiqjm2MOk04NHVvP79O5Mtbis/gmJz65kT4vtyAriHYQkQLwUdJ3TAj8F1X9f0SkDvwKcBtwAvh6Vd0Y3OcHge8EEuBNqvr+A5i6McYYc1M9HevIMExIGr1rm0fKHkmnFNl6oA9V6Ow47GfUlviNFwvRQntsc8xh0rl1e0hpsTm2OWYeQGu05/NEi9uDw43GJ51xoYCfb9GKChafxTdx8TEopaaVFUTX6gJ/VVW3RSQCPiYivwO8HviQqr5TRN4KvBV4i4i8APgG4G7gEPBBEXmeqmbsHGSMMcY8Y9z0dWQx7LO0dJlzzF6bdO7aEl9yi1QrXaqFLqep7Zt0aljitoWLHNf6vkmn69e5Y+E8x2DfpDP3rCVuXzjHMVEu7Tisaa+ks1RtsDC3wcVCyeKz+CYuPrQ+6mM5Fawg2kFVFdge/BoNfhT4GuBVg9vfA3wYeMvg9l9W1S5wXEQeAo4Af/T0zdoYY4y5+Z6OdWQ56HJ3/QzA2KQzesyxVIE7Z8/iRDnJ3Nikk/BZ3Fd/FIATMj7plLjOkbkTAPsnnVsLvLR2glA8R2Fs0lk4W+WumTNcKpYsPotv4uJL9xRNLyuIdhGRAPg08Fzg36jq/xSRhqo+DqCqj4vI0mD4KvCJHXc/ObgtwwNlndDNGatOQLJ11lSXNlvNOgVE0s9axjtkXbaKIC7jgp2k83iqx4oD9eAzzkMVydhAVRPN3sRVNevLh6pmbqIqAHoDjXU189sIudGxNzAHaxJrzNPjZq8jnSj1XJN6scX2TJ5m39HREA0ccT5HqTJPsV4muNwmcE3mohb1QpPLlQKXeo6uT08m97mQfnmWYq1A9MQ2OKUSdJkvNNmoFLnQC+gmedQ5fBTSL1Up1fLkzm6DQCnoMp9vcr5cpteN6MUCBPgoIC6UKM0cIn+2iTqh5HrUcy3q5Rbt2YhuUkQ0QENHXChQqjYonK3icyF5F1t8Ft9Exqeb7afwm+KZxwqiXQa78u8VkRrw30TkhWOG75Wz7ZmaicgbgTcCBPUaSdlnnJHLnviGgutlXGoMGmRLOSVJ55FtErv+3W+4gIuzDQ5Ch+tle8tKJ8IFQbZJJAmBZI3Pk2xup0VRBoF6CLPN2SUJ2su2jUZyERJnm4OG6XsoU+E5KAyztyNwZD0PUxLBR9nG+j5InG0SGkDmSdjBrMZ8UW7GOnLn+rEQzvDZN95NZ6lIaTHELQjdutK8PSa+r81s/TLPmT3H3eVTvO/tr+WTD87SbZTJL4bMLDi680p7NaH3oi7F+iZrtSd4cfUxTrzvNXzsJ+6lt1QmXMoxs+DozEOv7und0SWqb7NQu8A91VMc/fQr+cDr76e/WIFGfjBW6NY93cN9XL3Jcm2DF86e5lMXD/NbX/ty4lopbdq5EBAuCN05Zev5ffx8i4W5De6aOUPexXzqu16C6yUWn8U3cfG9uPwo77/zqfiWeGaygmgEVb0kIh8G/jpwVkRWBlu+VoBzg2EngfUdd1sDTo9Y3ruBdwPkn7Wu2ffkkDmJ0xva23IDe31uZOxg2ZnzaRE0826DG9iTE0i6FyzLYtWle4my8IM9RFn35Pgb2OvjFXy2IkduaA9RuuxMETpuaA+R+Bt4b/isVRk3ttfH9hAZ87R7KteRO9ePs/lllQcfoXx+jmh9nrCTR2KHSkA3KHAx3+dSsUirmEO8IsdPUTxfIVpfIOwUcf0ANKAb5tjIF9kolmiV8ogHTp0hdy5HuLZE1Crj+gHiHZ0gx+VckY1Cie1SPv2euniZ6NwFwksNomY13Rinjo6L2MoVuVTssBkXUBVku0146iyVC4tE6zXCboQkjo4LaUUFLhZKXCqWqOeauF6CxWfxTWJ8HZ3ug+asINpBRBaB/uCLvgh8OfDPgN8Avg145+DfXx/c5TeA94nIu0hPGL0D+OTTPnFjjDHmJns61pGaC3DzcyObR16iylEgVke36mB5cWQfmBZljg2W6yPQtZWxzTFPMDipvC/4tfHNMU8yh1dhq5NncT1P5P3I5pjnmAWgXmzRWSpSPm/xWXyTF1/qo0wrK4iutQK8Z3CMtAN+VVV/U0T+CPhVEflO4FHg6wBU9c9F5FeBzwEx8F12hTljjDG3qJu+jvSRo7c+unnkMOl8UAVmhO76+OaYw6TTR0pnbXxzzCtJZyK018Y3x+yQ5zQ1AMqHcpT99Zc8vtocM006t2fylBZDIovP4pvA+Hz2Y+VvSVYQ7aCqfwa8ZI/bLwCvHnGfdwDvuMlTM8YYYw7U07GO9BE0D43rk5ImnZtUKFahuTKmOeaOpDMKYXtldB+YnUmneGF7RRjXHHOYdPqSp9kIQPfoAyM1dm6Jb/YdbkEIOxafxTd58T2hM0wzK4iMMcYYMxF8BM3l/ZpHpklnv6KDsfsnnXFJaS0L45pjDpPOXs3TXhLGNceEENH0ZPb2EoOru+yRdO48PElDunVFYovP4pvA+Kb8wttWEBljjDFmImiotBvK6D4pV5POzTtjOleO3hufdG4+T+ksanoRnX2SzrgidBoJ45pjDpPOJCd0VmLGNcccJp0aOJq3x+iVOVt8Ft/kxCdZr9Z6i7KCyBhjjDETwYWe/uIwgRuTdEqe7fv7JKXk2uaYunfSefkF4OZ6tMmjMj7p3F4PiJbaY5tjDpPO7lyELLZoS4n9ks44nyO+r003KFh8Ft/ExZcWbdPLCiJjjDHGTIRi1Ke2tMWlHedC7Jl0unlKlS7lfI9zzF6bdO6xJV7DAocWLnGaGp0dhzXtlXS6l8xyeOEiJ6jvm3Tm1uY4vHCB46K0KDMu6SxV5pmtX04vzWzxWXwTFl96oYXpZQWRMcYYYyZCOehy18JZjsL4pNM5FirKs6sXAPZNOokOce/8SYArSefOw5OKqnD6HO7kOVw8y331RwE4ruOTzvxmjfvmHiUUzzEYm3QW62WeM3uOS8WixWfxTVx8+aw9E29RVhCZbISb1/wya1PUG5W1iat5xruZb09jzNOn7Lq8ZPZRYnU8qMImFa4knZIHN0/OOXRzi0ZJeenMcTxC4h3ntXol6VTJoa5GIXDomfNI6Lm/cpxYA7wKj+vs1cOTJERlhmIgyOknkASOVB6mrwEe4RGgo1eTTnVlSkGD4OQThC3PkfIXgLT3zMMqtHVweJIEIEVKbpHoMUdwuc3d5VO0ijmLz+KbuPj0iQs374P9DCA65RXhQcjfvqbLb//uTGOlFULGzkZBxyFxtrGuLwSdbGPFQ9TMNhaFqKlp1+UMopZHMsYXtj2ul23BQTfBtfqZxkqS4Lba2Sahil66DD7b58a3O2iSLcCgUoZiIdNYyeXQUraxhAHxTMaxAv1qlLmY7FccPsg2Ni4IPuMhyklOSDJOWQOIi9nGPvLud9E5/ZhVysZMqGptTV/0ld9Lt+rozQj9KvQrSjyT4Kp9SpUuC5UmjdIWn//FOymf9XRnHP2q0KtCf0aJKx6qMYVKl3qlxaHKZf7sQ3ey9JmYXtUNxkO/qsQVxVdj8tUutUqb5fIWf/rgYQ7/htAbzKFXHcyj6vGVhKjaY7baZrG8zeObM8z8+xnikqM7I+mcK+myk2pCUO1TrbRZqmwTOM/lnz6MeLX4LL6Ji+9Q+TLvfdl/+LSq3n/Q3wMHwfYQHQQBCbIl1BooQrb8TQMFn3GsU/QG9sz4INs4UVAHWft7+UBwGbfta5AuO9PYUCDMNlgBDQPIsHFAvEIYZhoLoNsJ+GwFkSYJEmetaB1kLLQAXJytkFQBFysqGeJzgiRkfv1cQub3nEsUn2R/f2Ytqo0xk821esx84CgsL9Jdr9FciWguOzoSkJQSyvkez65e4KUzxzl19g4qH/wcM/Nz9NbnaR7K01x2tBuOfsGRj2IOVzc4UjvOn3fupPL7f4FUK8TrCzRXizQbAe2G0C04gsCzXN7iyNwJ/jRao/yHD1MOQ/zaEu21MtsrAe0lRydSZFapF1vcWzsJrCGf2kC7PXS1QWetyvZKSLshtAMHVagWutw5e5a5qMUnH5xFjp+y+Cy+iYvvgepDvPdgP/4HygoiY4wxxkwEzUVItTKyT8o5ZgHwCN0Zx8z83Mg+MJeocnS43BBYXhzbHPPYcBJ9h642xjbHPEEdgIvtEvNrOdwjZ6+75LFK2hzzNDWcKPVCk26jTPG8xWfxTV58Xh3wx0wrK4iMMcYYMxF85IjXxzePPMcsiXfEVaG3Pr455jDp9JHSXRvfHPNK0pkInbXxzTGHSad6obiap5Ts3QcmnXOek8xxuVIgvxgSWXwW3wTG57Me2nOLsoLIGGOMMRPBR9BcHdcnxdGRHOe1SqkKzUPjm2MOk84ohOah8c0xh0mn9IXtlYBxzTGHSafmPc1GgPjRzTGHSeelnmNmwRF2LD6Lb/LiO2WX3TbGGGOMOXg+gmZjv+aRjo7m6FeV5vKY5pg7kk4NGYzdP+mMK552Q9jrkse7k85uXWgvCeOaYw6Tzq6P6NbB9S0+i2/y4usMophWVhAZY4wxZiJoqLQbML55ZJp0Np+V0MYxsjnmjqRz+zZPZxGu9IGR0Unndl5oL/n0kseEwAwFkT23xPvI0VlKGNccc2fS2V7dOdbis/gmJz7JetWqW5QVRMYYY4yZCC7ydBsxI5tHAkgNlRzNF8b0C8OkcHzSufVcJZnrj22OOUw6W4ccut4dbDG/2hxzr8OTejMh/s42HRndHHOYdPpcSO9FXbphzuKz+CYuPtHpLgmmO3pjjDHGTIx8GFNabNKizLikU6XGVqVLPoq5tGML/qikU6OIxtJlzjG7b9J54YUVGguXOE2NDvnBlvO9k87coRmWFzfS5piMTzr75VmK9U028kWLz+KbuPjSPUXTywoiY4wxxkyEctDjjoXzHIOxSWchcNQrCYerGxyF/ZPOcIm762cA9k06Xb/CvfMncaKcZI42+R2HJ12bdOaeX+XeuZM4lBNSH5t0FmsF1mpPsFEsWXwW3+TFl7G/4q3KCiJjjDHGTIRK0OHI3AmAfZLO8xyqOI7UjgPsm3RKuMgDsw9deZzrkk69mnRKAq+c+TwAXmXslvjclucV1WNEg+7QJxiddEZPbPPi6mO0SnmLz+KbuPg4dYZpJjrlFeFBKDxnVdff+fcyje21IkiyXRteugHSzzbW9QTXzTQU8ULYyjhWIWyC+Gzvq7AFLsk4tqME3WxjXV8J23GmsRIrwXbGJyNR3FYTMsanlzfRJMm47ISsn0eXzyPlUrblhiE6U840VEVIZvIg2d5H/WqEBtmmERcdSZRtuUlOSArZlusDIc74VDz83nfRPvPYdDdbMGaCFVfW9bbveDM+SnuzaAg+BI0UDdMfIo+EntyJAkEnbWrpI03HhcP7KRophB4JleBsjtwlQYP0SnYapuOvHTsYvxVSOh3gA9AI/GCsDh8j8oOxCrFQfjhCHbvmrPgICBUNB+OdUnwoj/jdYy0+i+/g43O5hBPf8kOfVtX7D/Zb4GDYHqIDIKJEUbYkOckF+IwFkXoh3ZqwPw/gMxZaXvFhxrGafujI2ODL9RV1GZedsa4AUAcSZ3suJADXy/hRUEV7OfA+2/hiAYmzFWZ+u4l2sxVmHnBRxjl7D90o01BxLn0uMhZErheQ9cI0QcbXeSjz+yJQfMZCC9v+Y8xEy53v8KyfOYquNuisVdleCWktC51Fxc31WFvc4J76Ke6vHOdnf+31VH7/L2B5ke56jeZKRHPZ0VmCZK5PY+kyd9fP8MDsQ7zrva/n8L/+LG5+jt76PM1DeZrLjnYD+osJtaUt7lo4y5Hacf7VH7+awz98FKlWiNcXaK4WaTYC2g2h20jPcbpj4TxH5k7wyY3b6L0tBnH4tSXaa2W2VwLaS45OIyFaaHPbwkXuqz/KbNjmIz/xl+D0OYvP4pu4+F5R/TxffdBfAAfICiJjjDHGTATNRRCGe14SuE2ek8zhVYg1oFd1SLWy5yWPO5rjHLNXlxuAm58b2xzz6HBs38Hy4tjmmMcGY880q8wdCpHHrr26l2h6SFKHIse1nj5ioUlvqUzunMVn8U1efAkO+N9MKyuIjDHGGDMRNEq3ZO/VJ0UlbR55mhpehe6MI16//pLHV5JOriadPoLe+vjmmFeSzljoro9vjjlMOpPEUVgtUPTXX/JY5WrSeULqbFSKhEs5QovP4pvA+KadFUTGGGOMmQhJJLTXxjfH7JDncZ2lWIXm6h59YKixO+mMQqV5aHxzzGHSKbHQXBndB2Zn0knkaa6EiO7dB2Z48nqHIhd6ATMLjqhl8Vl8kxffcE/RtLKCyBhjjDETwUewvTKuT8rVpLNfVZqNvfvA7E46fQjN5fHNMYdJZ1LQwdj9k87+rKfVkPRs+RHNMYdJZzfJ05kH17f4LL7Ji6+jRaaZFUTGGGOMmQgaQntJBuc47JN0NjzthjCqOebOpLN1yNNuwLjmmMOks3UIOovKXpc83p10aiB0Fv3Y5pjDpFOdo1f3iHcWn8U3cfGlP9PLCiJjjDHGTASJPJ1GAgSDcxxGJJ0S0npuTLcwTPTGJ53Nw0p/MWFsc8xB0tlZEvqrvbHNMYdJZ78aEN/W2bs55snHr0k6fRTSu6NLJ8hZfBbfxMUnWXto3KKsIDLGGGPMRMiFMdFSe2TzyGHSqVJlq9olCPyY5phXk06NAmpLW2ObYw6Tzo27iiwtXb6+OeYeSWezUaG2cJkzMktnx2FNe22J75eqRPVtLueKFp/FN3Hxodn6Fd6qrCAyxhhjzEQohX1uW7jIca2PTTqLDmqVmOXyFsdg/6QzrHPXwlmOwr5Jp/TXuLt+BuD6pFOuTTrzzy1xz/xpnCinqY1NOku1PAu1C2wUShafxTdx8aV7iqaXFUTGGGOMmQhV1+a++qMAnJAxSefpJ1guhxyZOwGwb9JJWONI7TjAvkmnS9Z4YPahK3MatyU+t+V5efUhQklwopxkbmTSmTu7zT3VU2yX8hafxTdx8bnHzjDNRNVat+8mIgHwKeCUqn6ViNSBXwFuA04AX6+qG4OxPwh8J5AAb1LV9++3/NIdK3rnT33HvvNQFZrtPD5xmeYddwPoZxsrfYfrSLaxiRC0M45VCJtAxrdV2AKXZBscdCDoZRzbV8K2zzRWEoi2+pnGokqw2UUyfm5kswlxnG3RzRa+2822XBEIsh3vK0GAzM5kGosTfLUMQbbXO6nk0TDb2LgYkuSzjfWREBcyjg2EuJRpKJ//zz9J69xj2RZsjLnOzV4/5p+9qsv/6E3QF0gkXVf1wcWC9MH1BdcH8dB8Th8in673EkH6gusP/o13jE2gtZrATJw2tYwFia+OH46VPrgEunNKstwdjHXXjJVdy/V56D67k47tO2Qwj6tz4Mr9EGjd0QUVi8/im7j4xMPRd77506p6/8347ph0todob99DWqQPs8i3Ah9S1XeKyFsHv79FRF4AfANwN3AI+KCIPE9Vk3ELD0Sp5nuZJpJ4R+KzFTkASZAtUVen+KxXFEkUMs5BAIklc0EkHnySNT9V1GUbm54bmHHOCYgPwe8/aVGQ2CMZ43NxAsnYt8PVZQMuyvaR1E4X32xmnERAkHG5iOCCINvzLIJzDsLs70+Js431eUfW188HZH5fSLYa2Rgz2k1dPxYe7XHXWx7Bry3RXiuzvSK0l4ROIyFaanN44SL31R/lSOVhfvL7vonyHz6MrjborFXZXgloLadX1dL1Lo2FS9w7f5JXznyeH/n5b+bwDx+F5UW66zWaKxHNZUdnUemv9lhauszd9TM8MPsQ7/jka7jzu4/h5uforc/TPJSnuexoN5T+YkxtaYu7Fs5ypHacj288h+3XeaRUJF5foLlapNlwtBvQbcSUFpvcsXCeI3MnKAVdPvD6++HiZYvP4pu4+F5ZeZC/8s6b98Ux6awg2kVE1oDXAu8A3jy4+WuAVw3+/z3Ah4G3DG7/ZVXtAsdF5CHgCPBHT+OUjTHGmJvuaVk/RiHE8cjmkSdIm0f2NaBXdZTDcGQfmNPUcIOtVz4AqVbGNscc0r7Dzc+NbY55dDD29PYsM8sBeursyOaYxwZj5/NN+osVonMXLD6Lb+Li8whw9VC8aWMF0fV+CviHpO/coYaqPg6gqo+LyPDMs1XgEzvGnRzcZowxxtxqfoqbvH70uQBdbYxuHjlIOj1Cb0bwa+ObY55kDq+CRhCvj2mOKTuSztjRWx/fHHOYdHb7IbnVPIXEj2yOOUw6z5fL0MgTXrL4LL7Jiy8tiKaXFUQ7iMhXAedU9dMi8qosd9njtj0PphKRNwJvBMgvVfcaYowxxkykp2v9mCvP0VmrUlSF0+dGJp2PKOSrQnttn+aYgy3xQag0V8c3xxxuiZdYaB4a3xxzmHRK5Gmu5BA/ujnmMOnsdSNmFhxR0+Kz+CYvvi/s9eGcIlYQXesVwFeLyGuAAjAjIv8ROCsiK4OtXyvAucH4k8D6jvuvAaf3WrCqvht4N0D1ect2JQtjjDHPJE/L+rG4sq7bKyEwQ0Fk9JZ4LeKqsL2ydx+Y3UmnD6HZGHHJY61xJenUHD6nNJfHN8ccJp1x1dNqCKLXX/J4d9LZi4XOvOB6Fp/FN3nxtawPkRlS1R8EfhBgsAXsB1T1W0TknwPfBrxz8O+vD+7yG8D7RORdpCeN3gF88mmetjHGGHNTPV3rRx9Ca1kY3SflatLZrXvaS45RzTGHSaeoo7PoaTeEkX1gpMaVpHMJ2g1lXHPMYdLZlvTE93HNMa8mnemc0XF9YCw+i+9g4hOd7pJguqPP7p3Ar4rIdwKPAl8HoKp/LiK/CnwOiIHv2u8KOsYYY8wt5KldP0aezqJHMiSd7cMJnUgZ1RxzZ9LZWkmvqjW2Oebg8KRuXegvDseOTzrjkqP1vO7IPjA7k04fBXQP9+m4yOKz+CYuPrJeefgWZQXRCKr6YdKr5aCqF4BXjxj3DtIr7hhjjDG3vJu5fozCBGl0aZNHZXTSqa7MVrWHzOro5pg7ks7zLxVKi82xzTGHSeelO/LUlrbGNsccJp3txSKlxS0uSHXfpDMulHD1Jlu5osVn8U1cfKLFvT+UU8IKImOMMcZMhFLYZ21xg5PM0dlxLsTupLMUNJit9qgXW5ygvn/SGVa5Y+E8x2DfpNP1G9y1cJajsG/SGT17lbsXHueo85xj9tqkU69NOkszh1iubXCp2LH4LL7Ji8/P7/8BvYVZQWSMMcaYiVAJOtxTP4VX4TQ1OuT3PDwpOPkEi+WAe2snAfZNOgkr3D/3CMC+SackDY7UjgPsm3TmN1d42czDuMEF9K5LOndsic+fbfLC2dNsxgWLz+KbuPjCx6a7c7kVRAdBIHDZ3nhhkP2UJBd4VLNdRz4JFA2yXexOEDTroaWedGzG6+ipy7hcwAeCZJyz94LPOGeHok7A7f/cqSoaOjRjfC68gWNywxB8tveFBDG4bMuWIIB+nG0OTsB7JOP7SJIk60uNxIpkfDokSX+ycCguETJO2RgzwTbjIg9ebrDVyYMovuTpzAtJTujVIqK1OfKbNaJmwuXNLZyscrFdQr2gBU+3LvjI0ZsJya3MkHt+lfxmAjF85tI6Z1sVksRB5OnPejQQ+tWA1lKZ3HNK5Dc9Pg8f33gOZ5oz9OIAiTxx1dPGEZcc7cUiudtXyW8u01pyfHTjeTzemqHVi5DAE1cTOhqQ5B3t+Ty5Zy2R31xAHXxmY53EO4vP4pu4+MKWH3EdyOlgBdEBiFzCYnE78/jEZ6saNp2nF2d7SfthQD9jcaGJ4LO+VRTAZSqIZPDfrImvOvBRtqw36A2KnAxccgOZtIIo4LOWAQVcnK3IcWEA3SjTWMnlCKKMr0k/JtnczDZWhEBcpuIQ0m1MmrHoE+8Hlyvdn88FiM82NuvrnM4h81BjzAHoHxPCb4fF9TzlQ3majYD2EnRWYuSeFusLF7hv7lGOlL/Aj3/vG9BPbTC/lqO4OhwrdJYS/J1tlhc3uHfuJK+oHuOHf+4NdN7mqR2KyK8WaK6EtBpCZ9ET39ahtnCZe+ZP8/LqQ7z9k1/N9us8leWQcLVAcyU3GKu0nteltLjFCxYe52UzD/PhjTu59DWO0owQrhdoruRpNhydJaV9W59ocZvbF87x0toJSq7Hb33tywm32xafxTdx8T1QOsaX3n7Q3wAHxwqiCedEybqPSOSZ1d7omTXbq1T27jg4tbLuLrtRWYvOGyiIjDETLgzQy5tE3lP2C6DFwWEHIW0pcVyUcLBlIy45tNvDPXKWUrKE+B19YKTII6R7kCNJ0qMRxCGPnaXoQbQKml7yuE2eMzKLEyWUBO07pFRET52lkHhEa2kfl0FzzAtS5ajzOJSzrSqlGcE/cYGc9+DnQdNzS1RCNqlwbDDneq5FXCsRnjpr8Vl8ExdfJAnw8E37aE86K4iMMcYYMxF8LoTlxdHNIylzDIjV0Z0RdLUxujkmRU5IPV1uBH5t70seq6TNMU9Tw4lC3xGv733J42HSeY5ZAJrdHMvreXLej2yOeYkqR4F6uUV/KU/lgsVn8U1efH7KN/VaQWSMMcaYieAjobs+rk9KmnQ+rEI4I3TWxjfH7FDkBHWIlPba6D4waSKZ5yRzSCI0V0f3gdmZdErgaa7kwY9ujjlMOtuzEZWFgMjis/gmML6jU34irhVExhhjjJkIPoLmyvWXBN6ddLa1RKEC2yvjm2MOk84whO2V0X1gdiad4qDZcIxrjjlMOuNqko7V0c0xh0lnNykSLghh1+Kz+CYvvk0qTDMriIwxxhgzEXwIzeXRzSN3Jp39qtJuCKOaY+5MOns1T3vJIbp/0tmtK+0GjGuOeSXp1IDOkjKuOeYw6RQN6M4pklh8Ft8kxjfdJcF0R2+MMcaYyREpncVhAjcm6ZSASy9KaAcOlV19YE4+fl3S2VmETiMBAlTGJ539Geg2YsY1xxwmnUnB0X5WH5XRzTGHSaeGjq3n9+lcGWvxWXyTE5/cSB+UW5AVRMYYY4yZCGGYkDR61zaPlD2STimy9UAfqtDZcdjPqC3xGy8WooX22OaYw6Rz6/aQ0mJzbHPMPIDWaM/niRa3B4cbjU8640IBP9+iFRUsPotv4uJjUEpNKyuIjDHGGDMRimGfpaXLnGP22qRz15b4klukWulSLXQ5TW3fpFPDErctXOS41vdNOl2/zh0L5zkG+yaduWctcfvCOY6JcmnHYU17JZ2laoOFuQ0uFkoWn8U3cfGh9Uyf0VuVFUTGGGOMmQjloMvd9TMAY5PO6DHHUgXunD2LE+Ukc2OTTsJncV/9UQBOyPikU+I6R+ZOAOyfdG4t8NLaCULxHIWxSWfhbJW7Zs5wqViy+Cy+iYsv3VM0vawgOiAuY1tSh2ZuuOoke3NWkezLRQTNulxusGupkH38DYxVJ5BxzurSZqtZp4BI+l2S8Q5Zl60iiMu44PTFfurHigP14DPOQxXJ2EBVE83exFU168uHqj5zu/waY67hRKnnmtSLLbZn8jT7jo6GaOCI8zlKlXmK9TLB5TaBazIXtagXmlyuFLjUc3R9ejK5z4X0y7MUawWiJ7bBKZWgy3yhyUalyIVeQDfJo87ho5B+qUqplid3dhsESkGX+XyT8+UyvW5ELxYgwEcBcaFEaeYQ+bNN1Akl16Oea1Evt2jPRnSTIqIBGjriQoFStUHhbBWfC8m72OKz+CYyPt1sH9wHfwJYQXQAIklYKVzONNaJ0vNBprE5l9COokxj2/2IVuAzjU0SR0+yLVe9kBDeQILqsie+oeB6GZcagwbZigBJ0nlkm8Suf/cbLuDibIOD0OF62T6S0olwQbb3BUlCIFnj8ySb22lRlEGgHsJsc3ZJgvaybYOSXITE2eagYfoeylJ4SmKVkzGT7OKxCp994910loqUFkPcgtCtK83bY+L72szWL/Oc2XPcXT7F+97+Wj754CzdRpn8YsjMgqM7r7RXE3ov6lKsb7JWe4IXVx/jxPtew8d+4l56S2XCpRwzC47OPPTqnt4dXaL6Ngu1C9xTPcXRT7+SD7z+fvqLFWjkB2OFbt3TPdzH1Zss1zZ44expPnXxML/1tS8nrpXSpp0LAeGC0J1Ttp7fx8+3WJjb4K6ZM+RdzKe+6yW4XmLxWXwTF9+Ly4/y/jsP+hvg4FhBdABEIHT7J3teBScelzGZFdG0i3HGsZl3GtzAnhlxioqme4oyUAck2ZatN7S35Qb2+tzI2MGyM+8Eu4G9a8gN7MkJJN0LlmWx6tK9RFn4wR6irHty/A3s9fEKPluRIze0hyhddqYIrR4yZrIpyIOPUD4/R7Q+T9jJI7FDJaAbFLiY73OpWKRVzCFekeOnKJ6vEK0vEHaKuH4AGtANc2zki2wUS7RKecQDp86QO5cjXFsiapVx/QDxjk6Q43KuyEahxHYpn37JX7xMdO4C4aUGUbOabqxSR8dFbOWKXCp22IwLqAqy3SY8dZbKhUWi9RphN0ISR8eFtKICFwslLhVL1HNNXC+x+Cy+iYyvo9N90JwVRMYYY4yZCJoLcPNzI5tHXqLKUSBWR7fqYHlxZB+YFmWODZbrI9C1lbHNMU8wOKm8L/i18c0xTzKHV2Grk2dxPU/k/cjmmOeYBaBebNFZKlI+b/FZfJMXX+qjTCsriIwxxhgzEXzk6K2Pbh45TDofVIEZobs+vjnmMOn0kdJZG98c80rSmQjttfHNMTvkOU0NgPKhHGV//SWPrzbHTJPO7Zk8pcWQyOKz+CYwPp/1UJlblBVExhhjjJkIPoLmoXF9UtKkc5MKxSo0V8Y0x9yRdEYhbK+M7gOzM+kUL2yvCOOaYw6TTl/yNBsB6B59YKTGzi3xzb7DLQhhx+Kz+CYvvid0hmlmBZExxhhjJoKPoLm8X/PINOnsV3Qwdv+kMy4prWVhXHPMYdLZq3naS8K45pgQIpqezN5eAnREc8ydhydpSLeuSGzxWXwTGN+UX3jbCiJjjDHGTAQNlXZDGd0n5WrSuXlnTEcCRjXH3Jl0bj5P6SxqepGZfZLOuCJ0GgnjmmMOk84kJ3RWYsY1xxwmnRo4mrfH6JU5W3wW3+TElz729LKCyBhjjDETwYWe/uIwgRuTdEqe7fv7JKXk2uaYunfSefkF4OZ6tMmjMj7p3F4PiJbaY5tjDpPO7lyELLZoS4n9ks44nyO+r003KFh8Ft/ExZcWbdPLCiJjjDHGTIRi1Ke2tMWlHedC7Jl0unlKlS7lfI9zzF6bdO6xJV7DAocWLnGaGp0dhzXtlXS6l8xyeOEiJ6jvm3Tm1uY4vHCB46K0KDMu6SxV5pmtX04vzWzxWXwTFl96oYXpZQWRMcYYYyZCOehy18JZjsL4pNM5FirKs6sXAPZNOokOce/8SYArSefOw5OKqnD6HO7kOVw8y331RwE4ruOTzvxmjfvmHiUUzzEYm3QW62WeM3uOS8WixWfxTVx8+aw9BW9RVhBNsKxNVieO8MxrgHkz55y1KeqNytrE1RhjniHKrstLZh8lVseDKmxS4UrSKXlw8+ScQze3aJSUl84cxyMk3nFeq1eSTpUc6moUAoeeOY+Envsrx4k1wKvwuM5ePTxJQlRmKAaCnH4CSeBI5WH6GuARHgE6ejXpVFemFDQITj5B2PIcKX8BSHvPPKxCWweHJ0kAUqTkFokecwSX29xdPkWrmLP4LL6Ji0+fuHDzPtjPAKJTXhEehPUXzuj3/eeXZRr7eK9G12erWy/2Smz385nGtuIcW71sY/tJwNb/196/x0l21wX+/+tdXd09t57M/T5JuIRIQIg4BgT0G+RiyCJh/a2arBdAvkb8yW9l9fsVRFdZV1bW9bYuLjEKBpSLrBqIGoXowgaUW2ADBJKQIZmQySQzuc9MT9/r/fujTpNKp6q7pi+pOn1ez8ejZqrO+dQ5709V9Xl/3nVOnXNqTVdtM2Hy1BDdfqziVB1mums7MF4jprtrW5sKBsa7jKEBg6PdtSVhcDSbV5XuwuCpBtFl/+pjDWqT3S14YGKG2qmprtrGzAy1E2PdBZFJPvwINLp7Axtj4+RMdx0c2LAe1nb3OYqhIXJdd22pDzC9sbu2n/vSOzl+8m4rSalPjWzal9/+sjcyMVJjcmMwNQJTG5LpjTPURqZYt2GCbRtG2bnuBF//s3NZf7TBxMYaUyPB5AhMbUymNzRgZJo1GybYsuEUezY8wpf/6Vx2fHGayZFa0R6mRpLpDUljZJrhkQk2bRhj1/oTfOnWMznzmmCyiGFypIhjpEFjwwyDI5OcMTLG9vUnuef4Rjb+8Uam19WY2BjNmDc0lz0zMsPAyBQjG8bYseEkA7UGj7zzTKKR9s/+9V3/9qx/hPc+90+/kJkHer0d6AX3EPVAjWRNdDeYHYyZ5h7WLgzXZpgc6G5wOp0zDNa6HNUDAwPdtc2EGGhAlxf4yoEk6L4tjS7b1pI8jT0zjYHu2kVC1rruHo2BoNblrqccaC67q7b1gHp3jRPI+gDdVKnRSKjXu2oLkCdnoNHdZy5nZojpbivaGnRZaAHUprv8LPv9j9TXaqcm2fixm2HXdib2b2J09yCju2qMxwAz62ZYPzzJk0ce4Ls23sHdR89hwz9+jY1bNzO5fyuje4YZ3VVjbGeNqTU1hgenOXPkIS7YdAdfHT+XDR+/hRjZwPT+bYzuXcvozgHGdgYTa2oMDDTYtf4EF2w+xJcG97H+n29nfb1OY98Oxvat5+TuAcZ21BgfTOKMZMvaU5y/6TCwj7jhIXJikty7k/F9I5zcXWdsZzA2UIMRGFkzwblnHGXz4Ck+d+sZxB132z/713f9e/7IQd7b2z//nrIgkiRJfSGHBomRDR2vk3KMMwBoEExsrLFx6+aO14F5mBFunl1uHdi1fd6LY942G8RUjdy7c96LYx5iCwAPjq1j674hancefdwpjzOaF8c8wiZqkWxZM8rEzvWsvd/+2b/+618ja8DnqSoLIkmS1BcagzWm989/8chjnMFMo8b0SDC5f/6LY84OOhuDycS++S+O+a1B50wwvm/+i2PODjqzEazdO8y6mfbXgWnGPMxhNvPIhjUMb68zaP/sXx/2r9HtoS+rlAXRHBFxCDhB85ct05l5ICK2AH8BnA0cAn44Mx8q2v8S8Lqi/b/LzI/2IGxJklbcSufIxiCM7p3vOik1xmOI+3OEdSMwumf+i2PODjoH6zC6Z/6LY84OOmMqOLl7gPkujjk76MzhBqM7B4hG54tjzg46H56ssXFbjfq4/bN//de/uz3tttp4UWbe3/L4zcA/ZebbI+LNxeM3RcR5wKXAM4A9wD9GxNMys/sfP0iSVC4rliMbgzC6c6GLR9YYzyGmRpLRXfNcHLNl0Jl1irYLDzqnNzQY2xm0O+Xx3EHnxJZgbEcw38UxZwedE41BJrZAbcr+2b/+69843Z1oa7WyIOrOJcCFxf33AJ8A3lRM/2BmTgB3RMRB4ALg0z2IUZKkXli2HJn1ZGwnzH/xyOagc/SsGcao0fHimC2DzpNnNxjfDt+6Dkx0HnSeHA7GdjSapzymDmxkTUTbb+IbgzXGd8ww38UxWwedY3tb29o/+9c//Ytuz+q0SlkQPV4CH4uIBP4oM68EdmbmPQCZeU9E7Cja7gU+0/Lcw8W0x4mIy4HLAbbsqXYVLkkqrWXPka35sb79DCZ2TtPx4pEAsYmMIUafOc3UmtlB4fyDzhNPTWY2T817cczZQeepPTVy/0TxjfmjF8dsd3jS5MY6jXPHGI/OF8ecHXQ2hupMfvsEE/Uh+2f/+q5/kdUuCard+/ZekJlHig36dRFxyzxt2/0Cre2JfYukcSXAWc8c8eS/kqQyWvYc2ZofN33bjly3fZRTrGe+QWfGJk5smGB4cJqHW77B7zTozMFBdu54hGOcseCg84FnbmDntoc5wibGGS6+OW8/6Bzas5Fd2x9qXhyT+QedU+vPYO2W4zw0vNb+2b++619zT1F1WRDNkZlHiv+PRcTVNHfvH42I3cU3X7uBY0Xzw8D+lqfvA448oQFLkvQEWekcuX5gknO23c9tMO+gc81AjS0bZjhz5CFuhoUHnfUdPGPLvQALDjprUxs4f+thapEcZjNjDLccnvTYQefQt41w/ubD1EgOxZZ5B51rN61h36b7eGjtOvtn//qvf11ef3C1siBqERHrgVpmnijuvwz4deAa4NXA24v/P1I85Rrg/RHxuzR/MHoO8LknPHBJklbYE5EjNwyMc8HmQwALDDrvZ8+GGhdsugNgwUFn1Lfz/DMOfms9jxt05qODzpiBF278OgCNjHm/iR860eAFI7c1L6IOHKLzoHPwvpM8e+QuTq0btn/2r+/6x933UmWRFa8IW0XEk4Gri4d14P2Z+baI2Ap8CDgT+CbwQ5n5YPGcXwZ+EpgG3piZf7/Qep7y7evz7Vd/W1cxHZnazHiXx3XePzXCiek1XbU9OT3Ew5Prumo7OTPAg2Pdtc0MTpwaJrs8n/3kqUGY6a5tTAwQU921rU0GtYmumhKNoH6qy7YJ9VGIRnd/N/VTUJvpsu14MjDRXdvaVFIfm+6qbUwnAye7fDFmktqJUeiyf/nIcXKmy5MqzszQ7famNjxMrO/uM0e9Tm5c31XTTx98F4+M3VPtiy1Ii/RE5Mi1u/fn2T/58zQGm9dmyTo06pCDSdabNwYbRL3B0KE1DIw3L2rZGMxmu/rs85IcTKg3iHoycHSIoYeDHGieyS7rzfaPbVu0P1Fn3ZEBGgOQg9Ao2ubsOgYbRduE6WD97YNkjTkxJ41BoJ5kvWhfS9YeHCYac9vaP/vX+/7VhmY49GO//IXMPLAS249+5x6iFpl5O/DsNtMfAF7c4TlvA952OusZYIZNA6MLtpvJGiMDaxhsDHa13ImB7trNmmx09/bXY4Dxwe6W3chgfLDedUE0MzRAo8uCKBtB89uSLuIAaHRZaDWSRr3LttncqNBl/2pTSda6XPZpnKw9axDT3b0WMQC1yS7/1DPJySFoNLprv3YNMd1dYdY4OUpOdFeYNYDaYJcxNxow0eVn3y+ApEV7InLk0P3jnHXFzeTenYzvG+Hk7jqndgXj25Pa5kn2bX+IZ225mwMb7uBP/voH2fDxW2DXdib2b2J09yCju2qM74CZzVPs3PEIz9hyL88/4yC/+94f5Mx3fIXa1s1M7t/K6J5hRnfVGNsJU9tn2LTjBE/fdpQLNt3BH3z+xZz5KzcTIxuY3r+N0b1rGd05wNjOYGLnNOu2j3LOtvu5YPMhPvfQ2Uy+ZRqiRmPfDsb2refk7gHGdtQY3znD4LYxzt72IN+55ZucUR/jf//2c+DIMftn//qufy8Y+TqvXPTWofwsiCRJUl/IoUGo19ueEniMYQ6zmUYG0znA5EiNGNnQ9pTH4znEMc54dLkDUNu6ed6LY94823aqBru2z3txzNuKtveOjrB5T52467Fn94psHpI0zlruyC3NNa4ZZXLHeoaO2T/713/9m6EG3ERVWRBJkqS+kIPNb7LbXSclo3nxyCNsopHBxMYa0/sff8rjbw06eXTQ2RiEyf3zXxzzW4PO6WBi//wXx5wddM7M1Fizdw1rG48/5XHGo4POQ7GFhzaspb5jiLr9s3992L+qsyCSJEl9YWYwGNs3/8UxxxnmnjyDtSMwurfNdWDYxNxB52A9Gd0z/8UxZwedMR2M7u58HZjWQSeDDUZ314lsfx2Y2R+vj7OWByYH2LitxuAp+2f/+q9/s3uKqsqCSJIk9YXGIJzcPd91Uh4ddE6NJKM7218HZu6gs1GH0V3zXxxzdtA5syaLtgsPOqfOaHBqZzR/Ld/h4pizg86JmWHGt0Jtyv7Zv/7r33iupcosiCRJUl/IOoztiOI3DgsMOnc2GNsZdLo4Zuug89SeBmM7Yb6LY84OOk/tgfHtSbtTHs8ddOZAML69Me/FMWcHnVmrMbmlQTRq9s/+9V3/mrfqsiCSJEl9IQYbjO+cAQaK3zh0GHRGnVNPnWZizexAb/5B5+iZydT2Gea9OGYx6BzfEUztnZz34pizg86pkQGmzx5vf3HMw/c8ZtDZGKwzec4E4wND9s/+9V3/mkVedVkQSZKkvjBUn2Zwx1jHi0fODjozRjgxMsHAQGOei2M+OujMwQE27Tgx78UxZwedDz19LTt2PPL4i2O2GXSO7tzApm2PcG+cwXjLYU3tvomfWjfC4JaTPDK01v7Zv77rH9nd9fxWKwsiSZLUF9bVpzh724PckVvmHXSurcGmDdPsWn+C22DhQWd9C0/fdpSbYcFBZ0zt4xlb7gV4/KAzHjvoHH7qOp619Qi1SI6wad5B57pNw2zb9AAPrVln/+xf3/WvuaeouiyIJElSXxipjfGdW74JwKGYZ9B55D52ra9zweZDAAsOOqlv4oJNdwAsOOiszezj+Wcc/FZM830TP3SiwfNGDlKPGWqRHGZzx0Hn0NGTPGvkbk6uG7Z/9q/v+le7616qLNIrtz/hnv6s4fzTv9ndVdu7prYynoNdtb1veoSTM2u6antiZg33T2zoqu1U1jg2NtJV28zgwbF1NLr4WGUGo2PDNGZqXS17emIAprprG1M1auPRXduZYGCsy7YJ9VGgyz+b+imozXTXeGAcBia7bDuV1McaXbWNGRg8MdVVWzIZOD5BdLldiOOjMD3d3aJHT9GYmOhuuREw0N3xzDEwQJyxsau2/3Lv+3lk4mh3b7akJ9zwk/fmrl/9dzAVMBPNbfkU1KaDmILaVFCbgmjA6FOmYLDRzAszQUwFtani/+mWtjNwau8MbJxuXtRyOojpR9vPto0pqM3AxOZkZtdE0bb2mLYxZ7mNYZh48niz7VSNKOJ4NAa+9TwCTp0zARn2z/71Xf+iATe//ee/kJkHerwZ6An3EPVAnQbba90NDMfrx5k6jR+6rYnuBr7DtS4HyMBEo85ko7uPSiODycYAmd2NOWcaNWYa3RU5ADMD3Q3Us5Y0uj1jykxClzEEENPRdUEUDWjMdDv+TrLWXdvmR6LLmGcgGnW6qVIjIaYbRJf9q03PwMxMd3EAtcHuPkc5PkFjdLTLIAYY6HK5NLorIiX1xppvTvL0N91JY98Oxvat5+TuYGxHML5zhsEdY5y57UG+c8s3uWDD7fzev/+3rP/n28m9OxnfN8LJ3QOc2tU8q1bun2Dntoc5f+thXrjx6/z6u3+UM3/lZti1nYn9mxjdPcjorhrj25OpvZPs2PEIz9hyL88/4yBv+9zFnPuG26ht3czk/q2M7hlmdFeNsZ3J1PZpNu04wdO3HeWCTXfwLw89hZM/0CDWrWV6/zZG965ldGeNsZ0wsXOaddtHOWfb/Vyw+RDrBib42A8egAcfsX/2r+/698INt/Kit/d6C9A7FkSSJKk/DNZherrjxSMP0bx45FQOMDlSY3293vE6MEfYRK34dqcxADGyYd6LY87KqRq1rZvnvTjmzUXbIyfPYOOuAfLuox0vjnlb0Xbr8ChT2zcweOwB+2f/+q5/DQJ49FC8qrEgkiRJfaExNEDu3dn54pHFoLNBMLkxaOyb/+KYh9lMI4MchOn981wcM1oGndM1JvfPf3HM2UHnxFSdob3DrJlpdLw45uyg8/7162HnMPWH7Z/967/+NQui6rIgkiRJfaExGIzvG2FtJhw51nHQeWfC8Egwtm+Bi2MW38QP1JPRvfNfHHP2m/iYDkb3zH9xzNlBZww2GN09RDQ6XxxzdtA5OTHIxm01Bkftn/3rv/59o/OfZSVYEEmSpL7QGISTu+vARtZEdP4mPtdSG4GTu+e5OGbLoLNRh9GdHU55nJv41qAzh2gMJaO75r845uygc3qkwamdQeTjT3k8d9A5OR2Mbw1qk/bP/vVf/055HSJJkqTea9Th1K6g83VSHh10TmxpMLajRqeLY84OOiNrjG9vMLYz6HgdmNjEtwadO2BsZzLfxTFnB51j0fzh+3wXx3x00NmMmZzvOjD2z/71pn+R1S4Jqt17SZLUPwYbjG9vEF0MOsfOnGF8MOl0cczWQeep3c2zas17cczi8KSJLcHU9tm28w86p9fVOPW0iY7XgWkddDYGB5g4c4rx2qD9s3991z+6PTPvKmVBJEmS+sJgfYbYOcEYw2R0HnRmbT0nRiaJM7LzxTFbBp33f1ewbvvovBfHnB10PnzOMJt2nJj34pizg86x7WtZt/0ED8TIgoPO6TXrqG0Z5cTQWvtn//quf5Fr2/9RVoQFkSRJ6gvr6lPs2/4Qh9nMeMtvIeYOOtcN7OSMkUm2rD3FIbYsPOisj3DOtvu5DRYcdNamdvL0bUe5GRYcdA4+eS/P2HYPN9caHOOMxw4687GDznUb97Br00M8vHbc/tm//utfY+vCf6CrmAWRJEnqCxsGxnnWlrtpZHCETYwz3PbwpIHD97F9/QDnbzoMsOCgk/oGDmy+E2DBQWfM7OSCTXcALDjoHD6+m+duvJ1acbXuxw06W76JHz46yjPPOMLx6TX2z/71Xf/qd1X7wuUWRD1QA9Z0ebr3NTHV9XLXxBRTte6OAZ3KOoMx092CazBUm+6qaSNrDNVmmM5aV+3rA13GANQGGmR298LNDCQ5kF21DYLs9tDZBs223S2aLl+G5qIHgugy5kYjaHQZc40kawG1hV+7zCTrNbLL/tXqp3HMcb0Oje42uDEwDV1+lmNgAKa6+3x23TFJPXF8ei23PrKTE+PDEEljXYPxrcHMUDC5aZDBfZsZPr6JwdEZHjl+glrs5cGxdWQjyDUNJrYEjcEakxvrDO3eyNC3jTB8fAam4YsP7+foqQ3MzNRgsMHUGQ1yIJgaGeDUjvUMPWUdw8cbNIbhXx56CveObmRyeoAYbDA90mCMGtPraoxtX8vQk/YyfHwXp3bUuP6hp3HPqY2cmhwkBhpMj8wwngPMDNcY2zrM0Fk7GD6+jazBFx/az0yjZv/sX9/1r36qAUd6vQXoHQuiHhiMAXbXN3TZ+iTjOdFVyzUxxcMz67pqe6I2zmB0N4gcz0EAGl0UI42M5u00LvA10+iuajheazA53d1Hdqo+wFSXxUXOBI1u/xQSoNZVQRTFv93WnVlrXoOjGwOTRZHThdrMaVxsLSESaHRbOKyhNt1dkVOrD8DEYFdtY2iIgcEu35OpaWaOH++qaWa1vwGT+t3UbUH9NbB9/zDr9wwzunOAsR0wvnuaeNYp9m97gO/c/E0uWP8NfuuNP0He8BBb9w2xdu9s22B8xwyNc8fYtf0hzt98mBeM3MavvOsnGH9Lg017Bhneu4bR3XVO7QzGtzeYPnucTdse4Vlbj/C8kYO89XOv5OQPNNiwq0597xpGdw8VbZNTT5tg3fYTnLftHp678XY+8dC5PHxJjXUbg/r+NYzuHmZ0Z43xHcnY2VMMbj/Jk7Yd47s2HWJdbZK/+/88j/rJMftn//quf89fdxvf+6RebwF6x4JolRmIlRvw1WL5v12vRdLtPqJYgfWvpHJF+6gMKn69akk9Ux8gHznOYKPB+sY2yLXFbvk6Y7GOOyKpF3luel2NnJikdudR1s3sIBot14GJtdxJcw/5YMw099ZHjbjrKGsbEDkC2Tzl8RjD3BtnUIukHjPkVI1Yt5a8+yhrZhpEbmpex6W4OOYDMcLNtQY1kqOnRli3MWjc9wBDjQY0tkI2f1uSUec4G7itiHnL0CmmN62jfvdR+2f/+q5/zaOGbl+xP+1+Z0EkSZL6QmOoDru2d754JOu5DZjOGhMbg9y7s/PFMVnLodjSXO4gNPa1P+VxRvPimEfY1Pzib6rG9P72pzyeHXQe4wwARieG2LV/mKFGo+PFMR9mhJuBLetPMbVjmA0P2D/713/9O50je1YjCyJJktQXGoPBxP75rpPSHHTenkF9YzC+b/6LY46zlkNsgcFkbF/n68A0B5LDHGYzMROM7u18HZjWQWcMNBjdPQyNzhfHnB10jp0xyIZtAwzaP/vXh/27ucvfaK9WFkSSJKkvNAZhdPfjTwk8d9A5lutYswFO7p7/4pizg856HU7unv/imLODzqjB6M4a810cc3bQOT0y02ybnS+OOTvonJhZS31bUJ+wf/av//p3nG5/2746WRBJkqS+0KjD6K7OF49sHXROjSRjO4NOF8dsHXRObmowtqNG5MKDzoktydhOmO/imN8adOYA4zuS+S6OOTvojBxgYnMSM/bP/vVj/6pdElS795IkqX8MJuPbZwdw8ww6Y4CHv32GsYEaGXOuA3P4nscNOse3w/jOGWCAjPkHnVMbYWLnNPNdHHN20DmzpsbYWVNkdL445uygM+s1TnzbFOPfamv/7F//9C9O5zohq5AFkSRJ6gv1+gwzOycfe/HIaDPojLWceP4UjMB4y2E/nb6Jf+jZweC2sXkvjjk76DzxpDrrto/Oe3HMYYDcxNjWYQa3nywON5p/0Dm9Zg2Nrac4NbjG/tm/vusfRSlVVRZEkiSpL6ytT7FjxyMc44zHDjrnfBO/rradkQ0TjKyZ4AibFhx0Zn0dZ297kDtyy4KDztrUFs7Zdj+3wYKDzqGzdvCkbce4LZKHWw5rajfoXDeyk22bH+LBNevsn/3ru/6RW7r6G12tLIgkSVJfWD8wwTO23Asw76Bz8K4aOzbAuWccpRbJYTbPO+ikfhbfueWbAByK+QedMb2FCzYfAlh40HliG9+16RD1aHAzzDvoXHN0hKdvvJeH166zf/av7/rX3FNUXRZEkiSpL2yoTfDcM5oXh2xkcF9uZJyh4vcNxdm9Msn7HmDP+gbPHzlII2s0Mrg7NzGRzd9CNH8gPsLaTLj7XmpDM7xg5OvM0PydxB25hfFsDjojByCLQedd9xINeOGGW2kQNAi+AZzK9cUyB4hcy7rGVup3NaifavD8dbcxGDM0CG7O4DgbiOKimjAMuYUhII+P8ez132Q8h+yf/eu7/jXue2B5/5hLJjKz1zFUTkScAG7tdRxzbAPu73UQc/RjTNCfcRlTd87KzO29DkJSe32aH6E/t2fG1J1+jAn6M67K5kj3EPXGrZl5oNdBtIqIG4ypO/0YlzFJWiX6Lj9Cf27PjKk7/RgT9G9cVVXtc+xJkiRJqjQLIkmSJEmVZUHUG1f2OoA2jKl7/RiXMUlaDfp1u9GPcRlTd/oxJujfuCrJkypIkiRJqiz3EEmSJEmqLAuiJ1BEXBQRt0bEwYh48xO87ndHxLGIuKll2paIuC4ibiv+39wy75eKOG+NiO9foZj2R8THI+LmiPhqRPxcr+OKiDUR8bmI+FIR03/sdUwt6xmIiP8TEX/bDzFFxKGI+EpE3BgRN/RDTJLKq1c50vzYdUzmx9OLyRxZJpnp7Qm40byE8DeAJwNDwJeA857A9X8v8BzgppZpvwW8ubj/ZuC/FPfPK+IbBp5UxD2wAjHtBp5T3B8Bvl6su2dxAQFsKO4PAp8Fntfr16pY188D7wf+tk/ev0PAtjnTev46efPmrXy3XuZI82PXMZkfTy8mc2SJbu4heuJcABzMzNszcxL4IHDJE7XyzLweeHDO5EuA9xT33wO8qmX6BzNzIjPvAA7SjH+5Y7onM79Y3D8B3Azs7WVc2XSyeDhY3LKXMQFExD7gXwF/0jK5pzF10I8xSep/PcuR5seuYzI/Ll2/xlV5FkRPnL3AXS2PDxfTemlnZt4DzY0vsKOY/oTHGhFnA99B8xunnsZV7Hq/ETgGXJeZPY8J+H3gF4FGy7Rex5TAxyLiCxFxeZ/EJKmc+m0b0TfbMvPjgn6f/suPYI4slXqvA6iQaDOtX0/x94TGGhEbgL8C3piZxyParf6JiyszZ4DzI2ITcHVEPHOe5iseU0S8AjiWmV+IiAu7ecpKx1R4QWYeiYgdwHURcUsfxCSpnMqyjTA/mh+7ZY4sEfcQPXEOA/tbHu8DjvQolllHI2I3QPH/sWL6ExZrRAzS3Ni/LzP/ul/iAsjMh4FPABf1OKYXAK+MiEM0DyP5voj48x7HRGYeKf4/BlxNc/d+X7x3kkqn37YRPd+WmR+70pf5EcyRZWNB9MT5PHBORDwpIoaAS4FrehzTNcCri/uvBj7SMv3SiBiOiCcB5wCfW+6VR/OrrncBN2fm7/ZDXBGxvfjmi4hYC7wEuKWXMWXmL2Xmvsw8m+bn5n9l5o/1MqaIWB8RI7P3gZcBN/UyJkml1m850vz4+JjMj10yR5ZQr8/qUKUbcDHNM8V8A/jlJ3jdHwDuAaZofhPxOmAr8E/AbcX/W1ra/3IR563Ay1cophfS3CX8ZeDG4nZxL+MCngX8nyKmm4BfLab39LVqWdeFPHoWnV6+Tk+meUacLwFfnf0898vr5M2bt/LdepUjzY9dx2R+7D4Wc2TJblG8CZIkSZJUOR4yJ0mSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCqIQi4oqI+A+9jmOxIuJnIuJoRJyMiK29jueJEBFfjYgLex1HNyLiUES8pLj/1oj482Va7oURcXg5liVJnZgjy8ccaY7sNQuiPlP8oY1FxImIeDgi/iUiXh8R33qvMvP1mfmfulzWS1Y24tMTEYPA7wIvy8wNmflAr2N6ImTmMzLzE920Xeh9K8tGMyIyIp7a6zgkrR7myNXJHKlesyDqTz+QmSPAWcDbgTcB7+ptSMtmJ7AG+OrpPjGa/MyKiKj3OgZJPWOObMMcqVnmyNPnH04fy8xHMvMa4EeAV0fEMwEi4qqI+I3i/raI+Nvim7IHI+KTEVGLiD8DzgT+ptjt/otF+/8ZEfdGxCMRcX1EPGN2fcVy/zAi/q749u2zEfGUlvnPiIjrivUcjYi3FNNrEfHmiPhGRDwQER+KiC1z+xMRTwNuLR4+HBH/q5j+/Ij4fBHT5yPi+S3P+UREvC0i/hk4BTy5zXKfXrR7uNjt/srT6NO3tfTp1oj44U7vR7GO34yIzxWxfqS1nxHxymL9Dxdtn94yb+4u9g9FxHuLmL4aEQeKeW3ft5blrAf+HthTzD8ZEXsiYjgifj8ijhS334+I4Q79eEpE/K/ivbo/It4XEZs69Xs+EfFTEXGweP2uiYg9xfTriyZfKmL8kZbn/EJEHIuIeyLitS3ThyPityPim8Xn64qIWFvMuzAiDkfEmyLiXuBPO332F9MPSeVjjjRHhjnSHLlcMtNbH92AQ8BL2kz/JvAzxf2rgN8o7v8mcAUwWNy+B4hOywJ+EhgBhoHfB25smXcV8CBwAVAH3gd8sJg3AtwD/ALNb69GgOcW894IfAbYVyz3j4APdOjf2UAC9eLxFuAh4MeLdV5WPN5azP9E0fdnFPMH5yxvEDgIvAUYAr4POAGc20Wf1gN3Aa8t5j0HuB94RofYPwHcDTyzeO5fAX9ezHsaMAq8tIjpF4u4hua+F8BbgXHgYmCgeA8/s9BnoGX+hcDhOdN+vXgPdgDbgX8B/lOH5z+1iHO4aHs98Pvt1l/E+ucdlvN9xev1nGJZ/x24vmV+Ak+dE/d0Eetg0f9TwOZi/u8D1xSfiRHgb4DfnPPc/1Ksay3zfPa9efO2Om+dto+YI82Rj86/EHOkOfI0bz0PwNucN6Tzxv4zwC8X96/i0Y39rwMfaf2jWmhZLfM3FX+QZ7Qs909a5l8M3FLcvwz4Px2WczPw4pbHu4Epig36nLZn89iN/Y8Dn5vT5tPAa4r7nwB+fZ4+fA9wL1BrmfYB4K1d9OlHgE/OWd4fAb/WYV2fAN7e8vg8YJLmBvs/AB9qmVejmRgunPte0NyA/uOc5Yydxvt2IY/f2H8DuLjl8fcDh7r8zL2q9b1tE2unjf27gN9qebyheN/PLh6329iPtX4ugGPA84CgmSyf0jLvu4E7Wp47Caxpmd/xs+/Nm7fVeeu0fcQc2akP5sg0Ry7H395qv7nrrDz20vwWZ67/SvNblo9FxO0R8eZOC4iIgYh4e7Hb/jjNP2qAbS3N7m25f4rmHzDAfpoblHbOAq4udss+THPjP0PzWOiF7AHunDPtTpr9nXXXAs+/KzMb8zy/U5/OAp47G3cR+48Cu+ZZX2ssd9L81mXb3H4U8dw1J45Wc2NaE0s75nfu63hnMe1xImJHRHwwIu4uPgd/zmM/A4taZ2aeBB6gc58BHsjM6ZbHs+/HdmAd8IWW9+Ifiumz7svM8ZbHXX/2Ja165sjOzzdHmiPNkQuwICqBiPgumn9An5o7LzNPZOYvZOaTgR8Afj4iXjw7e07zfwtcArwEOIPmN1HQ/OZhIXcBT5ln3sszc1PLbU1m3t3Fco/Q3Oi2OpPmN0ez5vZj7vP3zzkudu7zO7kL+N9z4t6QmT8zz3P2z1nPFM1d4o/pR0RE0babOOaar7+d5s99Hc8sprXzm8UynpWZG4Efo7vPwLzrLI7d3sri+nw/zW/GntHyXpyRmRta2jym3wt89iVVhDnSHNnFfHOkOXJeFkR9LCI2RsQrgA/S3CX7lTZtXhERTy02Lsdpfus0U8w+ymN/YDkCTND8hmId8J9PI5y/BXZFxBuLH/aNRMRzi3lXAG+LiLOKmLZHxCVdLvda4GkR8W8jol78sPC8Yn3d+CzN3ci/GBGD0byOwQ/QfM266dPTIuLHi+cORsR3RcsPPdv4sYg4LyLW0dwd/ZeZOQN8CPhXEfHiaJ429Rdovtb/0mU/Ws1939rN3xoRZ7RM+wDwK8Vrvw34VZrfarUzApyk+aPdvcD/u4gYAd4PvDYizi9+nPqfgc9m5qEu+/EtxbeFfwz8XkTsAIiIvRHx/Z2es8BnX9IqZ47sijmyyRxpjpyXBVF/+puIOEHz25lfpnlNgtd2aHsO8I80/3g/DfyPfPRc/r9JcwPwcET8P8B7ae6+vRv4Gs1jrruSmSdo/sjwB2juyr4NeFEx+7/R/KHfx4q4PwM8t91y2iz3AeAVNDeOD9D8oeUrMvP+Lp8/CbwSeDnNb1D+B/ATmXlLl316GXApzW9y7uXRHyR28mc0j7m+l+YPZ/9dsaxbaX6L9N+LOH6A5qlhJ7vpxxxz37e5cd9Cc+N+e9FmD/AbwA3Al4GvAF8sprXzH2n+yPMR4O+Av15EjGTmP9E8LvyvaP6Y+Ck0X8tZbwXeU8TY8cxELd5Ec/f+Z4rDFP4ROHee9vN99iWtXuZIc6Q50hy5rGbPtCJpARHxCZrfQv5Jr2ORJKmfmCNVZu4hkiRJklRZFkSSJEmSKstD5iRJkiRVlnuIJEmSJFXWUi5ytWK2bRnIs/cP9joMSSqVQ3dNcf+DM4u5XoZK4nTy49hjrsW5UNvTy7ljjaGu247PdD/UmGx033Z6+jS+0z3NP4s4jfZxGicyjumF28yqndZyT+9on9p0958NprsPOqe6bxuDpzEErZ/ecLVR7/6zkfXu3+vGQPcx5GmEnKex3Gb703i/T6Ntvd7952Ko1v17vWbgND74wNpa9ycfXBtTXbf92lem7s/M7e3m9WVBdPb+QT730f0LN5QkfcsF3z/fBeu1GpxOfvzq5FjXy/3KxJ7TiuOmsX1dt/36yR1dt/3m8c1dt33g4Q0LNypMP9J9AQcw+HD3I9Shh7sfUA8/1P3gdM1D3Q9O1zxwegPOoftHu25bO/ZQ122n7z3addv6tp1dt23s6P5zATC5bX3Xbce3dj8UHt/cfaE1sbn7z8XkptMraKc2dV8t18/ovrjYuulk123P3Nj95+JpG4513RbgmWsPd93224c7XV/38Z511t13dpq3pEPmIuKiiLg1Ig5GxJvbzI+I+INi/pcj4jlLWZ8kSWVhjpSkclh0QRQRA8Af0rzY13nAZRFx3pxmL6d5YahzgMuBdy52fZIklYU5UpLKYyl7iC4ADmbm7cWVhj8IXDKnzSXAe7PpM8CmiNi9hHVKklQG5khJKomlFER7gdYD1g8X0063jSRJq405UpJKYikFUbtfi839VVg3bZoNIy6PiBsi4ob7HjiNU6tIktR/li1Hmh8laWUtpSA6DLSe6mYfMPdUD920ASAzr8zMA5l5YPvW0zz/oCRJ/WXZcqT5UZJW1lIKos8D50TEkyJiCLgUuGZOm2uAnyjOpPM84JHMvGcJ65QkqQzMkZJUEou+DlFmTkfEG4CPAgPAuzPzqxHx+mL+FcC1wMXAQeAU8NqlhyxJUn8zR0pSeSzpwqyZeS3NDXrrtCta7ifws0tZhyRJZWSOlKRyWFJBtFJONOATY0u6ZqwkVc6J7i9sL0mSClYdkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmXVex1AO8cba/nHE8/odRiSVCrHG8d6HYIkSaXjHiJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmUtuiCKiP0R8fGIuDkivhoRP9emzYUR8UhE3FjcfnVp4UqS1P/MkZJUHku5MOs08AuZ+cWIGAG+EBHXZebX5rT7ZGa+YgnrkSSpbMyRklQSi95DlJn3ZOYXi/sngJuBvcsVmCRJZWWOlKTyWJbfEEXE2cB3AJ9tM/u7I+JLEfH3EfGM5VifJEllYY6UpP62lEPmAIiIDcBfAW/MzONzZn8ROCszT0bExcCHgXM6LOdy4HKANTtH+PwDZy01NEmqlNHpoV6HoDmWI0e25scz9y45bUuS5ljSHqKIGKS5oX9fZv713PmZeTwzTxb3rwUGI2Jbu2Vl5pWZeSAzDwyesXYpYUmS1HPLlSNb8+P2rQMrHrckVc1SzjIXwLuAmzPzdzu02VW0IyIuKNb3wGLXKUlSGZgjJak8lrLv/QXAjwNfiYgbi2lvAc4EyMwrgH8D/ExETANjwKWZmUtYpyRJZWCOlKSSWHRBlJmfAmKBNu8A3rHYdUiSVEbmSEkqj2U5y5wkSZIklZEFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqrKVch2jFTEzVuf3o4y7WLUmax8RUX27SJUnqa+4hkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkiqr3usA2pqu0Ti6ptdRSFK5TPsdlyRJp8vsKUmSJKmyllQQRcShiPhKRNwYETe0mR8R8QcRcTAivhwRz1nK+iRJKgtzpCSVw3IcMveizLy/w7yXA+cUt+cC7yz+lySpCsyRktTnVvqQuUuA92bTZ4BNEbF7hdcpSVIZmCMlqQ8stSBK4GMR8YWIuLzN/L3AXS2PDxfTJEla7cyRklQCSz1k7gWZeSQidgDXRcQtmXl9y/xo85xst6AiWVwOMLB58xLDkiSp55YlR7bmxzP39ufJYSWpzJa0hygzjxT/HwOuBi6Y0+QwsL/l8T7gSIdlXZmZBzLzwMCG9UsJS5KknluuHNmaH7dvHVipcCWpshZdEEXE+ogYmb0PvAy4aU6za4CfKM6k8zzgkcy8Z9HRSpJUAuZISSqPpex73wlcHRGzy3l/Zv5DRLweIDOvAK4FLgYOAqeA1y4tXEmSSsEcKUklseiCKDNvB57dZvoVLfcT+NnFrkOSpDIyR0pSefTlrzNrU7D23pU+I7gkrS61qV5HIElS+Vh1SJIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVV73UA7dSmYP2R7HUYklQqtaleRyBJUvm4h0iSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFmLLogi4tyIuLHldjwi3jinzYUR8UhLm19dcsSSJPU5c6QklceiL8yambcC5wNExABwN3B1m6afzMxXLHY9kiSVjTlSkspjuQ6ZezHwjcy8c5mWJ0nSamGOlKQ+tlwF0aXABzrM++6I+FJE/H1EPGOZ1idJUlmYIyWpjy36kLlZETEEvBL4pTazvwiclZknI+Ji4MPAOR2WczlwOcDwmk1suHtyqaFJUqXUJrPXIWiO5ciRrfnxzL1LTtuSpDmWYw/Ry4EvZubRuTMy83hmnizuXwsMRsS2dgvJzCsz80BmHhgcXL8MYUmS1HNLzpGt+XH71oGVj1iSKmY5CqLL6HAoQETsiogo7l9QrO+BZVinJEllYI6UpD63pH3vEbEOeCnw0y3TXg+QmVcA/wb4mYiYBsaASzPTYzokSaueOVKSymFJBVFmngK2zpl2Rcv9dwDvWMo6JEkqI3OkJJXDcp1lTpIkSZJKx4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFXWkk67vVJqk9OsOeS16STpdNQmp3sdgiRJpeMeIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmy6r0OoJ2cmGT69kO9DkOSSiVzstchSJJUOu4hkiRJklRZCxZEEfHuiDgWETe1TNsSEddFxG3F/5s7PPeiiLg1Ig5GxJuXM3BJknrNHClJ5dfNHqKrgIvmTHsz8E+ZeQ7wT8Xjx4iIAeAPgZcD5wGXRcR5S4pWkqT+chXmSEkqtQULosy8HnhwzuRLgPcU998DvKrNUy8ADmbm7dk8sP2DxfMkSVoVzJGSVH6L/Q3Rzsy8B6D4f0ebNnuBu1oeHy6mSZK0mpkjJalEVvKkCtFmWnZsHHF5RNwQETdMMbGCYUmS1HNd58jW/HjfAzMrHJYkVc9iC6KjEbEboPj/WJs2h4H9LY/3AUc6LTAzr8zMA5l5YJDhRYYlSVLPLWuObM2P27cOLHuwklR1iy2IrgFeXdx/NfCRNm0+D5wTEU+KiCHg0uJ5kiStZuZISSqRbk67/QHg08C5EXE4Il4HvB14aUTcBry0eExE7ImIawEycxp4A/BR4GbgQ5n51ZXphiRJTzxzpCSVX32hBpl5WYdZL27T9ghwccvja4FrFx2dJEl9zBwpSeW3YEHUCzE8RH3f2b0OQ5JKJQ4P9ToESZJKZyXPMidJkiRJfc2CSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZdV7HUA7jaE642dv7XUYklQqjWN9uUmXJKmvuYdIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZCxZEEfHuiDgWETe1TPuvEXFLRHw5Iq6OiE0dnnsoIr4SETdGxA3LGLckST1njpSk8utmD9FVwEVzpl0HPDMznwV8HfileZ7/osw8PzMPLC5ESZL61lWYIyWp1BYsiDLzeuDBOdM+lpnTxcPPAPtWIDZJkvqaOVKSym85fkP0k8Dfd5iXwMci4gsRcfkyrEuSpDIxR0pSn6sv5ckR8cvANPC+Dk1ekJlHImIHcF1E3FJ8m9ZuWZcDlwMMrd/Myb1DSwlNkiqnMRS9DkEtlitHtubHM/cuKW1LktpY9B6iiHg18ArgRzMz27XJzCPF/8eAq4ELOi0vM6/MzAOZeaC+Zv1iw5IkqeeWM0e25sftWwdWKmRJqqxFFUQRcRHwJuCVmXmqQ5v1ETEyex94GXBTu7aSJK0W5khJKpduTrv9AeDTwLkRcTgiXge8AxihuYv/xoi4omi7JyKuLZ66E/hURHwJ+Bzwd5n5DyvSC0mSesAcKUnlt+DByJl5WZvJ7+rQ9ghwcXH/duDZS4pOkqQ+Zo6UpPJbjrPMSZIkSVIpWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMpa8LTbvdAYhNE90eswJKlUGoO9jkCSpPJxD5EkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWfVeB9BOYxDGdjV6HYYklUpjsNcRSJJUPu4hkiRJklRZCxZEEfHuiDgWETe1THtrRNwdETcWt4s7PPeiiLg1Ig5GxJuXM3BJknrNHClJ5dfNHqKrgIvaTP+9zDy/uF07d2ZEDAB/CLwcOA+4LCLOW0qwkiT1maswR0pSqS1YEGXm9cCDi1j2BcDBzLw9MyeBDwKXLGI5kiT1JXOkJJXfUn5D9IaI+HJxuMDmNvP3Ane1PD5cTJMkabUzR0pSSSy2IHon8BTgfOAe4HfatIk207LTAiPi8oi4ISJumDk5usiwJEnquWXNka358b4HZpYtSElS06IKosw8mpkzmdkA/pjmrv+5DgP7Wx7vA47Ms8wrM/NAZh4Y2LB+MWFJktRzy50jW/Pj9q0Dyx+wJFXcogqiiNjd8vBfAze1afZ54JyIeFJEDAGXAtcsZn2SJJWFOVKSymXBC7NGxAeAC4FtEXEY+DXgwog4n+bu/UPATxdt9wB/kpkXZ+Z0RLwB+CgwALw7M7+6Ep2QJKkXzJGSVH4LFkSZeVmbye/q0PYIcHHL42uBx51uVJKk1cAcKUnlt2BB1BP1BrWd472OQpLKpd7odQSSJJXOUk67LUmSJEmlZkEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmy6r0OoJ3hwWmevPP+XochSaVy3+B0r0OQJKl03EMkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaqsBa9DFBHvBl4BHMvMZxbT/gI4t2iyCXg4M89v89xDwAlgBpjOzAPLErUkSX3AHClJ5dfNhVmvAt4BvHd2Qmb+yOz9iPgd4JF5nv+izPQqq5Kk1egqzJGSVGoLFkSZeX1EnN1uXkQE8MPA9y1zXJIk9T1zpCSV31J/Q/Q9wNHMvK3D/AQ+FhFfiIjLl7guSZLKxBwpSSXQzSFz87kM+MA881+QmUciYgdwXUTckpnXt2tYJIPLATbuXst3bb1ziaFJUrXcVJ/sdQh6rGXJka358cy9S03bkqS5Fr2HKCLqwA8Cf9GpTWYeKf4/BlwNXDBP2ysz80BmHli3eXixYUmS1HPLmSNb8+P2rQMrEa4kVdpSDpl7CXBLZh5uNzMi1kfEyOx94GXATUtYnyRJZWGOlKSSWLAgiogPAJ8Gzo2IwxHxumLWpcw5FCAi9kTEtcXDncCnIuJLwOeAv8vMf1i+0CVJ6i1zpCSVXzdnmbusw/TXtJl2BLi4uH878OwlxidJUt8yR0pS+S31LHOSJEmSVFoWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpshY87XYvbKyN8ZKRr/Y6DEkqlb+qjfU6BEmSSsc9RJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlRWb2OobHiYj7gDvnTN4G3N+DcJ4o9q/c7F+5rZb+nZWZ23sdhFZOh/wIq+cz3In9Kzf7V26rpX8dc2RfFkTtRMQNmXmg13GsFPtXbvav3FZ7/7T6rfbPsP0rN/tXbqu9f+Ahc5IkSZIqzIJIkiRJUmWVqSC6stcBrDD7V272r9xWe/+0+q32z7D9Kzf7V26rvX/l+Q2RJEmSJC23Mu0hkiRJkqRlVYqCKCIuiohbI+JgRLy51/Est4g4FBFfiYgbI+KGXsezVBHx7og4FhE3tUzbEhHXRcRtxf+bexnjUnTo31sj4u7iPbwxIi7uZYyLFRH7I+LjEXFzRHw1In6umL4q3r95+rcq3j9Vj/mxXMyP5d6+miPL/x520veHzEXEAPB14KXAYeDzwGWZ+bWeBraMIuIQcCAzV8M53omI7wVOAu/NzGcW034LeDAz314k7c2Z+aZexrlYHfr3VuBkZv52L2NbqojYDezOzC9GxAjwBeBVwGtYBe/fPP37YVbB+6dqMT+Wj/mx3MyR5X8POynDHqILgIOZeXtmTgIfBC7pcUyaR2ZeDzw4Z/IlwHuK+++h+QdWSh36typk5j2Z+cXi/gngZmAvq+T9m6d/UhmZH0vG/Fhu5sjVqwwF0V7grpbHh1l9b04CH4uIL0TE5b0OZoXszMx7oPkHB+zocTwr4Q0R8eXikIFS7i5vFRFnA98BfJZV+P7N6R+ssvdPlWB+XB1W3fa1jVW3fTVHri5lKIiizbT+Ps7v9L0gM58DvBz42WKXs8rlncBTgPOBe4Df6Wk0SxQRG4C/At6Ymcd7Hc9ya9O/VfX+qTLMjyqDVbd9NUeW/z2cqwwF0WFgf8vjfcCRHsWyIjLzSPH/MeBqmodBrDZHi2NTZ49RPdbjeJZVZh7NzJnMbAB/TInfw4gYpLkhfF9m/nUxedW8f+36t5reP1WK+XF1WDXb13ZW2/bVHFn+97CdMhREnwfOiYgnRcQQcClwTY9jWjYRsb744RoRsR54GXDT/M8qpWuAVxf3Xw18pIexLLvZDWHhX1PS9zAiAngXcHNm/m7LrFXx/nXq32p5/1Q55sfVYVVsXztZTdtXcyRQ8vewk74/yxxAcXq/3wcGgHdn5tt6G9HyiYgn0/zWC6AOvL/s/YuIDwAXAtuAo8CvAR8GPgScCXwT+KHMLOUPLzv070Kau5ITOAT89OzxxGUSES8EPgl8BWgUk99C8xji0r9/8/TvMlbB+6fqMT+Wi/mx3NtXc2T538NOSlEQSZIkSdJKKMMhc5IkSZK0IiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkjziogrIuI/9DqOxYqIQxHxki7bviAibouIkxHxqhUOreci4hMR8X/3Oo75FO/Fk3sdhyS1Y45cvcyR1WJB1IeKP8KHImJ4zvTHbLgi4uyIyIioL9N6XxMRn2qdlpmvz8z/tBzLL4FfB96RmRsy88O9DqYMIuKtEfHnK7X84r24vctYMiKeulKxSOoP5sieMUeeJnNkeVgQ9ZmIOBv4HiCBV/Y2mso5C/jqYp64XAlXktSZObKnzJFatSyI+s9PAJ8BrgJePTsxIv4MOBP4m2IX6S8C1xezHy6mfXfR9icj4ubiG7SPRsRZLcvJiHh9sdv7oYj4w2h6OnAF8N3Fsh4u2l8VEb/R8vyfioiDEfFgRFwTEXsWWna7ThbfmvzPiPjziDgREV+JiKdFxC9FxLGIuCsiXtbS/rVFn05ExO0R8dMt87ZFxN9GxMNFXJ+MiMd9tiPi2yLijoi4tM28bwBPbnl9hyNiT9HHB4s+/9Sc+P+yiP848Jo2yzwjIt4bEfdFxJ0R8Suzcc1+0xgRv128VndExMvnPPddEXFPRNwdEb8REQMdXssLIuKGiDgeEUcj4ndb5j0vIv6leG2+FBEXtltG0Xa+z80zIuK64rU4GhFviYiLgLcAP1K8Zl/qsNxDxfv6tWLZfxoRa1rmL/SZempx/6riM/V3xefgsxHxlGLe7N/Cl4pYfqRTPyWVmjnSHGmOfHSeOXK5ZKa3ProBB4H/L/CdwBSws2XeIeAlLY/PpvktWb1l2quKZTwdqAO/AvxLy/wE/hbYRDN53AdcVMx7DfCpOfFcBfxGcf/7gPuB5wDDwH8Hru9m2W36+VZgHPj+Is73AncAvwwMAj8F3NHS/l8BTwEC+L+AU8Bzinm/STNRDRa37wGi9TUrYv4m8Ip5Xvu5r+//Bv4HsAY4v+jPi1vinype7xqwts3y3gt8BBgp3quvA69rea2nin4OAD8DHGmJ+8PAHwHrgR3A54Cf7hD3p4EfL+5vAJ5X3N8LPABcXMT40uLx9mL+J4D/e6HPTRH/PcAvFK/FCPDcltfhzxf4TB8CbgL2A1uAf+b0PlNPbfksPghcUMT4PuCD7dp68+Ztdd4wR5oj0xw55zNljlyGW88D8NbyZsALiw3AtuLxLcC/b5k/d2N0No/f2P/97AaleFyjuWE8q3icwAtb5n8IeHNx/zXMv7F/F/BbLfM2FPGevdCy2/T1rcB1LY9/ADgJDBSPR4rlberw/A8DP1fc/3WaG9XH/aEXr9l/BA4DL1rg9f/W61tsmGaAkZb5vwlc1RL/9fMsawCYAM5rmfbTwCdaXuuDLfPWFf3dBewsnru2Zf5lwMc7rOv6oo/b5kx/E/Bnc6Z9FHh1cf8TPLqx7/i5Kdb9f+Z5H7vZ2L++5fHFwDdO4zPVurH/kznLuaXlsRt7b95W8Q1zpDnSHGmOXKGbh8z1l1cDH8vM+4vH76flkIAunQX8t2L378M0vy0Imt+EzLq35f4pmn9g3dgD3Dn7IDNP0vw2ZbHLPtpyfwy4PzNnWh4z+/yIeHlEfKbYZfwwzT/0bUWb/0rzm5uPFYcKvHnOel5P85ucjy/Qv1Z7gAcz80TLtDt5bF/vmuf524AhWl6vNs//1muVmaeKuxtovoeDwD0t7+Mf0fwWrJ3XAU8DbomIz0fEK4rpZwE/NLuMYjkvBHa3WcZ8n5v9wDfm6Ws3Wl+rO2m+vtDdZ6rVYj+7ksrPHGmONEeaI1eEP3LrExGxFvhhYCAiZj/Qw8CmiHh2Zn6JZnXfau5jaP5RvS0z37eIMNotr9URmhuF2ZjXA1uBuxexrq5F80xCf0Xz2PGPZOZURHyY5saIYoP8C8AvRMQzgI9HxOcz85+KRbweeFNE/F5m/vsuV3sE2BIRIy0b/DN5bF/ne73up/ktzlnA1zo8v5O7aH77tS0zpxdqnJm3AZcVx17/IPCXEbG1WM6fZeZPzbuAR9fZ9nNTHCd9WafVd7FsaCaMWWfSfH2hR58pSeVijuzMHDk/c6S64R6i/vEqmrufz6N5LO75NI9V/STNjRw0vy1qPd/8fUBjzrQrgF8qNnqzPzz8oS5jOArsi4ihDvPfD7w2Is4vNsD/GfhsZh7qcvmLNUQz8d0HTBc/rGz9MekrIuKpERHAcZqv40zL808AFwHfGxFv72aFmXkX8C/Ab0bEmoh4Fs1vmbpKosW3eB8C3hYRI8UG8+eBBU+/mZn3AB8DficiNkZELSKeEhH/V7v2EfFjEbE9MxvAw8XkmWJdPxAR3x8RA0U/LoyIfW0WM9/n5m+BXRHxxmj+kHYkIp5bzDsKnB1tfqA7x89GxL6I2ELzR6Z/UUxfzs/U3L8PSavHqzBHdmKONEd2wxw5Dwui/vFq4E8z85uZee/sDXgH8KPRPGXlbwK/Uuyy/X+KXchvA/65mPa8zLwa+C/AB6N5ZpebgJd3WOdc/4vmKTXvjYj7584svk36DzS/ibqH5g84H3c2muVWfPv072huPB8C/i1wTUuTc4B/pHl89aeB/5GZn5izjIdp/mDy5RHR7TUjLqN5DPoR4Grg1zLzutMI/f8HjAK3A5+iuWF7d5fP/QmaSe5rNPv8l7TfjQ/NRPbViDgJ/Dfg0swcLxLWJTQ3rvfR/Ibr/6XN3/18n5vi9X8pzWPY7wVuA15UPPV/Fv8/EBFfnKc/76eZwG4vbr9RLHs5P1NvBd5T/C388CKXIak/mSM7MEeaI7v0VsyRHc2erUOSVkREHKL5w9R/7HUskiT1E3Nkf3APkSRJkqTKsiCSJEmSVFkeMidJkiSpstxDJEmSJKmyLIgkSZIkVVZfXph1KIZzDet7HYYklco4o0zmRPQ6Dq0c86P62dOedarrtl//8roVjER6vBM8dH9mbm83ry8LojWs57nx4l6HIUml8tlvXXheq5X5Uf3sox+9seu237/n/BWLQ2rnH/Mv7+w0b0mHzEXERRFxa0QcjIg3t5kfEfEHxfwvR8RzlrI+SZLKwhwpSeWw6IIoIgaAP6R5pd7zgMsi4rw5zV5O8wrJ5wCXA+9c7PokSSoLc6QklcdS9hBdABzMzNszcxL4IHDJnDaXAO/Nps8AmyJi9xLWKUlSGZgjJakkllIQ7QXuanl8uJh2um0kSVptzJGSVBJLOalCuzMZzb3Kazdtmg0jLqd5yABr8MwjkqRSW7YcaX6UpJW1lD1Eh4H9LY/3AUcW0QaAzLwyMw9k5oFBhpcQliRJPbdsOdL8KEkraykF0eeBcyLiSRExBFwKXDOnzTXATxRn0nke8Ehm3rOEdUqSVAbmSEkqiUUfMpeZ0xHxBuCjwADw7sz8akS8vph/BXAtcDFwEDgFvHbpIUuS1N/MkZJUHku6MGtmXktzg9467YqW+wn87FLWIUlSGZkjJakclnRhVkmSJEkqMwsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyFl0QRcT+iPh4RNwcEV+NiJ9r0+bCiHgkIm4sbr+6tHAlSep/5khJKo/6Ep47DfxCZn4xIkaAL0TEdZn5tTntPpmZr1jCeiRJKhtzpCSVxKL3EGXmPZn5xeL+CeBmYO9yBSZJUlmZIyWpPJblN0QRcTbwHcBn28z+7oj4UkT8fUQ8YznWJ0lSWZgjJam/LeWQOQAiYgPwV8AbM/P4nNlfBM7KzJMRcTHwYeCcDsu5HLgcYA3rlhqWJEk9txw50vwoSStrSXuIImKQ5ob+fZn513PnZ+bxzDxZ3L8WGIyIbe2WlZlXZuaBzDwwyPBSwpIkqeeWK0eaHyVpZS3lLHMBvAu4OTN/t0ObXUU7IuKCYn0PLHadkiSVgTlSkspjKYfMvQD4ceArEXFjMe0twJkAmXkF8G+An4mIaWAMuDQzcwnrlCSpDMyRklQSiy6IMvNTQCzQ5h3AOxa7DkmSysgcKUnlsSxnmZMkSZKkMrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqa0kFUUQcioivRMSNEXFDm/kREX8QEQcj4ssR8ZylrE+SpLIwR0pSOdSXYRkvysz7O8x7OXBOcXsu8M7if0mSqsAcKUl9bqUPmbsEeG82fQbYFBG7V3idkiSVgTlSkvrAUguiBD4WEV+IiMvbzN8L3NXy+HAxTZKk1c4cKUklsNRD5l6QmUciYgdwXUTckpnXt8yPNs/JdgsqksXlAGtYt8SwJEnquWXJkeZHSVpZS9pDlJlHiv+PAVcDF8xpchjY3/J4H3Ckw7KuzMwDmXlgkOGlhCVJUs8tV440P0rSylp0QRQR6yNiZPY+8DLgpjnNrgF+ojiTzvOARzLznkVHK0lSCZgjJak8lnLI3E7g6oiYXc77M/MfIuL1AJl5BXAtcDFwEDgFvHZp4UqSVArmSEkqiUUXRJl5O/DsNtOvaLmfwM8udh2SJJWROVKSymOlT7stSZIkSX3LgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqqxFF0QRcW5E3NhyOx4Rb5zT5sKIeKSlza8uOWJJkvqcOVKSyqO+2Cdm5q3A+QARMQDcDVzdpuknM/MVi12PJEllY46UpPJYrkPmXgx8IzPvXKblSZK0WpgjJamPLVdBdCnwgQ7zvjsivhQRfx8Rz+i0gIi4PCJuiIgbpphYprAkSeq5JeVI86MkrawlF0QRMQS8EvifbWZ/ETgrM58N/Hfgw52Wk5lXZuaBzDwwyPBSw5IkqeeWI0eaHyVpZS3HHqKXA1/MzKNzZ2Tm8cw8Wdy/FhiMiG3LsE5JksrAHClJfW45CqLL6HAoQETsiogo7l9QrO+BZVinJEllYI6UpD636LPMAUTEOuClwE+3THs9QGZeAfwb4GciYhoYAy7NzFzKOiVJKgNzpCSVw5IKosw8BWydM+2KlvvvAN6xlHVIklRG5khJKoflOsucJEmSJJWOBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZVkQSZIkSaosCyJJkiRJlWVBJEmSJKmyLIgkSZIkVVa91wG087RnneKjH72x12FIUqlc8P2neh2CJEml4x4iSZIkSZW1YEEUEe+OiGMRcVPLtC0RcV1E3Fb8v7nDcy+KiFsj4mBEvHk5A5ckqdfMkZJUft3sIboKuGjOtDcD/5SZ5wD/VDx+jIgYAP4QeDlwHnBZRJy3pGglSeovV2GOlKRSW7AgyszrgQfnTL4EeE9x/z3Aq9o89QLgYGbenpmTwAeL50mStCqYIyWp/Bb7G6KdmXkPQPH/jjZt9gJ3tTw+XExrKyIuj4gbIuKG+x6YWWRYkiT13LLmyNb8OMXEsgcrSVW3kidViDbTslPjzLwyMw9k5oHtWwdWMCxJknqu6xzZmh8HGV7hsCSpehZbEB2NiN0Axf/H2rQ5DOxvebwPOLLI9UmSVBbmSEkqkcUWRNcAry7uvxr4SJs2nwfOiYgnRcQQcGnxPEmSVjNzpCSVSDen3f4A8Gng3Ig4HBGvA94OvDQibgNeWjwmIvZExLUAmTkNvAH4KHAz8KHM/OrKdEOSpCeeOVKSyq++UIPMvKzDrBe3aXsEuLjl8bXAtYuOTpKkPmaOlKTyW7Ag6oWvf3kd37/n/F6HIUml8vV8oNchSJJUOit5ljlJkiRJ6msWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkiprwYIoIt4dEcci4qaWaf81Im6JiC9HxNURsanDcw9FxFci4saIuGEZ45YkqefMkZJUft3sIboKuGjOtOuAZ2bms4CvA780z/NflJnnZ+aBxYUoSVLfugpzpCSV2oIFUWZeDzw4Z9rHMnO6ePgZYN8KxCZJUl8zR0pS+S3Hb4h+Evj7DvMS+FhEfCEiLp9vIRFxeUTcEBE3TDGxDGFJktRzS86R5kdJWln1pTw5In4ZmAbe16HJCzLzSETsAK6LiFuKb9MeJzOvBK4E2BhbcilxSZLUa8uVI82PkrSyFr2HKCJeDbwC+NHMbLuBzswjxf/HgKuBCxa7PkmSysIcKUnlsaiCKCIuAt4EvDIzT3Vosz4iRmbvAy8DbmrXVpKk1cIcKUnl0s1ptz8AfBo4NyIOR8TrgHcAIzR38d8YEVcUbfdExLXFU3cCn4qILwGfA/4uM/9hRXohSVIPmCMlqfwW/A1RZl7WZvK7OrQ9Alxc3L8dePaSopMkqY+ZIyWp/JbjLHOSJEmSVEoWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirLgkiSJElSZS1YEEXEuyPiWETc1DLtrRFxd0TcWNwu7vDciyLi1og4GBFvXs7AJUnqNXOkJJVfN3uIrgIuajP99zLz/OJ27dyZETEA/CHwcuA84LKIOG8pwUqS1GeuwhwpSaW2YEGUmdcDDy5i2RcABzPz9sycBD4IXLKI5UiS1JfMkZJUfkv5DdEbIuLLxeECm9vM3wvc1fL4cDGtrYi4PCJuiIgbpphYQliSJPXcsuVI86MkrazFFkTvBJ4CnA/cA/xOmzbRZlp2WmBmXpmZBzLzwCDDiwxLkqSeW9YcaX6UpJW1qIIoM49m5kxmNoA/prnrf67DwP6Wx/uAI4tZnyRJZWGOlKRyWVRBFBG7Wx7+a+CmNs0+D5wTEU+KiCHgUuCaxaxPkqSyMEdKUrnUF2oQER8ALgS2RcRh4NeACyPifJq79w8BP1203QP8SWZenJnTEfEG4KPAAPDuzPzqSnRCkqReMEdKUvktWBBl5mVtJr+rQ9sjwMUtj68FHne6UUmSVgNzpCSV31LOMidJkiRJpWZBJEmSJKmyLIgkSZIkVZYFkSRJkqTKsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVVn2hBhHxbuAVwLHMfGYx7S+Ac4smm4CHM/P8Ns89BJwAZoDpzDywLFFLktQHzJGSVH4LFkTAVcA7gPfOTsjMH5m9HxG/Azwyz/NflJn3LzZASZL62FWYIyWp1BYsiDLz+og4u928iAjgh4HvW+a4JEnqe+ZISSq/pf6G6HuAo5l5W4f5CXwsIr4QEZfPt6CIuDwiboiIG6aYWGJYkiT13LLkSPOjJK2sbg6Zm89lwAfmmf+CzDwSETuA6yLilsy8vl3DzLwSuBJgY2zJJcYlSVKvLUuOND9K0spa9B6iiKgDPwj8Rac2mXmk+P8YcDVwwWLXJ0lSWZgjJak8lnLI3EuAWzLzcLuZEbE+IkZm7wMvA25awvokSSoLc6QklcSCBVFEfAD4NHBuRByOiNcVsy5lzqEAEbEnIq4tHu4EPhURXwI+B/xdZv7D8oUuSVJvmSMlqfy6OcvcZR2mv6bNtCPAxcX924FnLzE+SZL6ljlSkspvqWeZkyRJkqTSsiCSJEmSVFkWRJIkSZIqy4JIkiRJUmVZEEmSJEmqLAsiSZIkSZVlQSRJkiSpsiyIJEmSJFWWBZEkSZKkyrIgkiRJklRZFkSSJEmSKsuCSJIkSVJlWRBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqKzOx1DI8TEfcBd86ZvA24vwfhPFHsX7nZv3JbLf07KzO39zoIrZwO+RFWz2e4E/tXbvav3FZL/zrmyL4siNqJiBsy80Cv41gp9q/c7F+5rfb+afVb7Z9h+1du9q/cVnv/wEPmJEmSJFWYBZEkSZKkyipTQXRlrwNYYfav3Oxfua32/mn1W+2fYftXbvav3FZ7/8rzGyJJkiRJWm5l2kMkSZIkScuqFAVRRFwUEbdGxMGIeHOv41luEXEoIr4SETdGxA29jmepIuLdEXEsIm5qmbYlIq6LiNuK/zf3Msal6NC/t0bE3cV7eGNEXNzLGBcrIvZHxMcj4uaI+GpE/FwxfVW8f/P0b1W8f6oe82O5mB/LvX01R5b/Peyk7w+Zi4gB4OvAS4HDwOeByzLzaz0NbBlFxCHgQGauhnO8ExHfC5wE3puZzyym/RbwYGa+vUjamzPzTb2Mc7E69O+twMnM/O1exrZUEbEb2J2ZX4yIEeALwKuA17AK3r95+vfDrIL3T9Vifiwf82O5mSPL/x52UoY9RBcABzPz9sycBD4IXNLjmDSPzLweeHDO5EuA9xT330PzD6yUOvRvVcjMezLzi8X9E8DNwF5Wyfs3T/+kMjI/loz5sdzMkatXGQqivcBdLY8Ps/renAQ+FhFfiIjLex3MCtmZmfdA8w8O2NHjeFbCGyLiy8UhA6XcXd4qIs4GvgP4LKvw/ZvTP1hl758qwfy4Oqy67Wsbq277ao5cXcpQEEWbaf19nN/pe0FmPgd4OfCzxS5nlcs7gacA5wP3AL/T02iWKCI2AH8FvDEzj/c6nuXWpn+r6v1TZZgfVQarbvtqjiz/ezhXGQqiw8D+lsf7gCM9imVFZOaR4v9jwNU0D4NYbY4Wx6bOHqN6rMfxLKvMPJqZM5nZAP6YEr+HETFIc0P4vsz862Lyqnn/2vVvNb1/qhTz4+qwarav7ay27as5svzvYTtlKIg+D5wTEU+KiCHgUuCaHse0bCJiffHDNSJiPfAy4Kb5n1VK1wCvLu6/GvhID2NZdrMbwsK/pqTvYUQE8C7g5sz83ZZZq+L969S/1fL+qXLMj6vDqti+drKatq/mSKDk72EnfX+WOYDi9H6/DwwA787Mt/U2ouUTEU+m+a0XQB14f9n7FxEfAC4EtgFHgV8DPgx8CDgT+CbwQ5lZyh9edujfhTR3JSdwCPjp2eOJyyQiXgh8EvgK0Cgmv4XmMcSlf//m6d9lrIL3T9VjfiwX82O5t6/myPK/h52UoiCSJEmSpJVQhkPmJEmSJGlFWBBJkiRJqiwLIkmSJEmVZUEkSZIkqbIsiCRJkiRVlgWRJEmSpMqyIJIkSZJUWRZEkiRJkirr/w/2YF0VIp9vWAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1080x1008 with 6 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "hoz_dist = AP.horizontal_axial_2d_distance(H, W)\n",
    "vert_dist = AP.vertical_axial_2d_distance(H, W)\n",
    "\n",
    "fig, axs = plt.subplots(3, 2, figsize=(15, 14))\n",
    "# full distance matrix between every two points\n",
    "axs[0, 0].imshow(hoz_dist)\n",
    "axs[0, 0].set_title(\"Full (H * W)^2 x (x * W)^2 distance matrix\")\n",
    "axs[0, 1].imshow(vert_dist)\n",
    "axs[0, 1].set_title(\"Full (H * W)^2 x (x * W)^2 distance matrix\")\n",
    "\n",
    "# select one point in the matrix and see the corresponding distance\n",
    "# for all other points\n",
    "axs[1, 0].imshow(hoz_dist[middle_point].reshape(H, W))\n",
    "axs[1, 0].set_title(\"Distance for one point to all others\")\n",
    "axs[1, 1].imshow(vert_dist[middle_point].reshape(H, W))\n",
    "axs[1, 1].set_title(\"Distance for one point to all others\")\n",
    "\n",
    "# standard instantiation of separate axial attention for a given point\n",
    "axs[2, 0].imshow(hoz_dist[middle_point].reshape(H, W) < 1)\n",
    "axs[2, 0].set_title(\"Attention mask for one select point\")\n",
    "axs[2, 1].imshow(vert_dist[middle_point].reshape(H, W) < 1)\n",
    "axs[2, 1].set_title(\"Attention mask for one select point\")\n",
    "\n",
    "fig.suptitle('Axial attention', fontsize=16)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For the vanilla axial attention, we have the following convenience function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA20AAAHOCAYAAAAL5eGjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOydd3iUZfa/PyeTkEhIkFDS62QSBAQEpK2urq59/a1l97v2tiu6FjoK2LABSseOrmVVdNe+KlYsq0tISAIBEkgmk14hhUxIzCQz8/z+eN+EEOnnQQc493VxQSYzZ94MEeeTc55zk1IKgiAIgiAIgiAIgm/i92tfgCAIgiAIgiAIgrB/JLQJgiAIgiAIgiD4MBLaBEEQBEEQBEEQfBgJbYIgCIIgCIIgCD6MhDZBEARBEARBEAQfRkKbIAiCIAiCIAiCDyOhTRAE4QSEiF4iIkVES5l1FBHNO4LHvUpEJZzn7lYrgYjmEVHSPj43j4jO0fE8B7mGy4ho+j5uP9t8jc4+2tcgCIIgHL9IaBMEQTjBIKKTAPzZ/PBaIvJnlJsA4CX+VbFIAPAQgJ+FNvP2ox7aAFwG4GehDUA2jNco+xe4BkEQBOE4RUKbIAjCicflAEIBrAEwCMCFR1pIKbVeKVWh68KON5RSTvM1cv7a1yIIgiAcu0hoEwRBOPG4EUAjgJsA/ATghu6fJKJgItpORBlEFNDt9vOJyEtEd3a7ba/xSCJKJqLXiaiYiH4ioiIieo6I+h3JhRLRXUSURkQNRLSLiNYT0SXdPn82gG/ND78yr0d1jiWat9/X7fbu13oWEa0lomYiaiGiL4hoWI/n/46IfiSi3xNRNhG1EtFWIrqs231ehfGaRnd7npLO6+s5HkkG04gon4jaiaiaiJ4motAez62I6DEimmy+ns1E9D0RDT2S11IQBEE4dpHQJgiCcAJBRFEAfg/gX0qpnQA+BPD/uocqpVQLgKsBjADwqPm4QQD+CeATpdQzB3iKKAAVAKYCuADAIwDOhdHVOxISYIxf/hnAXwBkAviEiC4yP58NoDNEToYxitg5jjjBvP3Vbre/ZH49lwBYC2A3gOsAXAMgBMAPRBTb4xqsAFYAWArgCgDVAN4lomTz84+aX9/Obs9z+QG+psfNWl8BuBTAkzAC9KdE1PP/y9cBuATAFAA3A4gD8BFzpFUQBEE4xpB/9AVBEE4srofxA7t/mh+/BiOg/QXA8513UkptJKLZAJYQ0dcAZgLwALjlQMWVUv8F8N/Oj4loHYBCGGHoNKXUxsO5WKXUzG61/GAErRQAtwP4TCnlJKI88y7blFLruz18PREBQGWP2wEjhH2vlPpjt/rfAigCMANG6OxkAIDfKqXs5v2yYQS3/wMwXynlIKKdANr38Tx7QURhMM6+vaaUusu8+Qvz8a8D+AOA/3R7SAeAPyilOszHA8A7AMYCWHeg5xIEQRCOH6TTJgiCcGJxAwC7UirN/PhrAFXoMSJpshzA5wA+AXA+gBuUUnUHKk5EvYhorjle+ROM0PGD+enUw71YIhpNRJ8QUS0At1nvvCOp1a2mDUb37E0i8u/8BaAVQBqA3/Z4iL0zsAGAUmoHgB0wul6Hy3gAgQDe6HH72zC+vrN63P5VZ2Az2WL+fiTPLQiCIByjSGgTBEE4QSCi0wEMAfA+EZ1MRCfDGAl8H8AEIkrpfn+llILR/QkEkKOUWnsIT7MAwDwYoeQSGB2hK8zPBR3m9cbC6KyFAbgbwEQAp8MIkodVqweDzN//ASMEdv/1BwD9e9y/YR81XEd4DWHm79Xdb1RKuQHUd/v8/p7bZf7O+foFQRCEYwwZjxQEQThxuNH8/V7zV09uAHB/5wdEFAGj25YN4DQimqKUWnGQ57gKwD+VUo91q9PnCK/3QgB9Afxf9w2VRNT7COt1Um/+PgdGp7En7cz6B6IzhEUAyO280ez09e92bYIgCILQhYQ2QRCEEwAi6gUjUKUDmL2PuywDcD0RPaCUUmQcnnoNRoA5D0aYe4KIvlVKbT7AU/WG0bHqzs1HeNmd4ayrntkN/A2MZSeddHafTtpHjfZ93J4PoATAUKXUwiO8tp649vP8PVlv3vcqGF3ETv4C4//J32u6HkEQBOE4QkKbIAjCiUHn2N8MpdR3PT9JRC8AeA7A2TBW6E+HsWXyHKVUg7mU5GwAbxHRGKXUT/t5ns8B3EhEW2AsILkCxljjkfA1jHNe/ySiJQAiATwMoAx7j/cXmPe7hYgaYISifKVUM4A8AJcQ0ecwNAdVSqkqU1vwkRlm/w2gDkC4ea1lSqmlh3mteQDCiOjvMDZctimltvS8k/laLgUwh4haYGydPAXAYwB+BPDpYT6vIAiCcAIgZ9oEQRBODG4E0Axj8+C+eAuGs+1GIjoNwHwAC5RS3wOAUqodxpbJBBjr6vfH3TC2Hz4O4F8wzsxdfSQXrJTKBXAtgHiz5j0wuoT/7XG/egB3wVAUfA9gA4DR5qfvAtAC4GPz9knmY9bAWDgSDEMD8AWM1fsRMJaRHC4vwVgmMh9Ahvl8++M+GKH4IhhLXmbD2OZ5iVLKewTPLQiCIBznkHHOXBAEQRAEQRAEQfBFpNMmCIIgCIIgCILgw0hoEwRBEARBEARB8GEktAmCIAiCIAiCIPgwEtoEQRAEQRAEQRB8GAltgiAIgiAIgiAIPoyENkEQBEEQBEEQBB9GQpsgCIIgCIIgCIIPI6FNEARBEARBEATBh5HQJgiCIAiCIAiC4MNIaBMEQRAEQRAEQfBhJLQJgiAIgiAIgiD4MBLaBEEQBEEQBEEQfBgJbYIgCIIgCIIgCD6MhDZBEARBEARBEAQfRkKbIAiCIAiCIAiCDyOhTRAEQRAEQRAEwYeR0CYIgiAIgiAIguDDSGgTBEEQBEEQBEHwYSS0CYIgCIIgCIIg+DAS2gRBEARBEARBEHwYCW2CIAiCIAiCIAg+jIQ2QRAEQRAEQRAEH0ZCmyAIgiAIgiAIgg8joc1HIaIEIlJE5G9+/B0R/e0A9x9CRJm/3BUeGkT0/4jo7QN8/koimtX5df6C1xVIRHlEFPFLPu/+MK9nOxEN2s/nrUQ0j4iG/NLXdqxBRK8S0WO/9nXsDyL6jIhu/LWvQxAEQRCEYwcJbb8ARFRCRD8R0e5uv6I0P82jABb3eM7f97iOm4jox4Nc6zwiSjjA5/OJ6P+6ffwbM1z2vG03Efkrpf4DYBgRDd9Hrb8AeAnAtQBeJiLq8fnFRGQnomYz0NxwoGs/TCYB+K9SquZIHmyG6nkH+PwcIlrT4zb7fm67SinlAvAygHv3USsCwJcAfgfgSyKK6/H5S4joRyLaRUQ1RPQiEYUc4df1s+8RXw9BvoL5384bB7ufUuoipdRrv8Q1CYIgCIJwfCCh7ZfjUqVUn26/qnQVJqJIGG/oP2TUmEtEZ5of+hPRfUQ0fh93/S+As7p9/FsA2/dx2zqllNv8+C0YIan78/0ewHIA55n3TwLwZI/nagFwKYC+AG4EsIKIJh7ml7Y/bgPw+uE+iIjGE9F9ADo7oL8lorn7uOt/AfyGiCzm/SIABAAY1eO2ZPO+ALAawI1EFNjt+UIBfAZgtVLqLADLAHxORP27PVdfAI8BiAJwCoAYAIsO92sTji5kIP/mCoIgCIJw2MgbiF+Rnt2wQ/1J/T44D0C2UqqNcTkrAFwI4CoAzwPIU0qt38f9/gsjZHVyJoAn9nHbf7t9/B2ASzo/IKIxAF4AcIFSKlMp5QRwAYxAM7Pzfkqph5RS25VSXqVUOoAfAEzY18UT0b1EtL7bOOnfiSiXiIL2cd84AFYA6ebHvYhoExHdbX5sIaL/EdGDPR9rviZbATxnvlYXAVi5j0vaACOkjTQ//i2AbwHk97jN0RnglVIVABoBjDevIxDARwD+rZR6wLzPEgBPA/iYiILN21YrpT5XSrUqpRoBvAjgN/t6ncy6s4nIYXYw84jocvP2U2D83U8wO6W7iGgSjE7oPeZtH5v3LSGimUS0mYiaiOhf+3qtezzv2URUQUT3ENEOIqomosuI6GIiKiCihu4BmIjGElGaeR3VRPQ0EfUyP0dEtMys02Rex7B9PGcIEX1LRCt7dnLNz39HRI8R0brOr4+I+hPRm0TkJKIN1K3zTEQriKjc/FxW5w86iOhCAHMB/MWsk9Ot/uNE9D8ArQCSqNuoMxE9R0Tvdqv/BBGt3de1CoIgCIJw4iKh7fjgVBhhgIvq9rtnP/f5HsBQIgozuwZjAPwLwMndbpuIvUPbNgAJZtcIZlCzKqU2dz2xUi1KqXOVUouxD4joJACnA8jdz3UtAtAO4H4isgGYD+C6/QTZUwEUdXYClVLtAK4D8IgZXGYDsAB4fD/Ppbr92dPj486vpx1GKOwMs7+

Download .txt

gitextract_wkps4m_l/

├── .clang-format
├── .coveragerc
├── .editorconfig
├── .flake8
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   ├── feature-request.md
│   │   └── questions-help-support.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   ├── actions/
│   │   ├── setup-build-cuda/
│   │   │   └── action.yml
│   │   └── setup-env-build/
│   │       └── action.yml
│   ├── compute_wheel_version.py
│   ├── gpu_benchmark_diff.py
│   ├── run-clang-format.py
│   ├── run_benchmark_wrapper.py
│   ├── selective_ci/
│   │   ├── requirements.txt
│   │   └── selective_ci.py
│   └── workflows/
│       ├── gh-pages.yml
│       ├── gpu_test_gh.yml
│       ├── linters.yml
│       ├── linters_reusable.yml
│       ├── rocm_build.yml
│       ├── rocm_ci.yml
│       ├── rocm_docker.yml
│       ├── wheels.yml
│       ├── wheels_build.yml
│       ├── wheels_upload_pip.yml
│       ├── wheels_upload_s3.yml
│       └── win-build.yml
├── .gitignore
├── .gitmodules
├── .isort.cfg
├── .markdownlint.json
├── .pre-commit-config.yaml
├── .pyre_configuration
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs/
│   ├── Makefile
│   ├── requirements.txt
│   └── source/
│       ├── 2d_attention_patterns.ipynb
│       ├── _static/
│       │   └── css/
│       │       └── customize.css
│       ├── _templates/
│       │   ├── layout.html
│       │   └── theme_variables.jinja
│       ├── components/
│       │   ├── index.rst
│       │   └── ops.rst
│       ├── conf.py
│       ├── index.rst
│       ├── swin_transformer.ipynb
│       └── what_is_xformers.rst
├── examples/
│   └── llama_inference/
│       ├── README.md
│       ├── generate.py
│       ├── model.py
│       ├── mp_utils.py
│       ├── requirements.txt
│       ├── sample_utils.py
│       ├── stats.py
│       └── tokenizer.py
├── pyproject.toml
├── requirements-benchmark.txt
├── requirements-test.txt
├── requirements.txt
├── setup.cfg
├── setup.py
├── stubs/
│   ├── fvcore/
│   │   └── nn.pyi
│   ├── matplotlib/
│   │   └── pyplot.pyi
│   ├── numpy/
│   │   └── __init__.pyi
│   ├── pandas.pyi
│   ├── recommonmark/
│   │   └── transform.pyi
│   ├── seaborn.pyi
│   ├── sklearn/
│   │   └── model_selection.pyi
│   ├── submitit.pyi
│   ├── tensorflow.pyi
│   ├── torch/
│   │   ├── __init__.pyi
│   │   ├── autograd/
│   │   │   ├── __init__.pyi
│   │   │   └── profiler.pyi
│   │   ├── cuda/
│   │   │   └── __init__.pyi
│   │   ├── fft/
│   │   │   └── __init__.pyi
│   │   ├── hub.pyi
│   │   ├── linalg/
│   │   │   └── __init__.pyi
│   │   ├── nn/
│   │   │   ├── __init__.pyi
│   │   │   ├── functional/
│   │   │   │   └── __init__.pyi
│   │   │   ├── functional.pyi
│   │   │   ├── init.pyi
│   │   │   └── utils/
│   │   │       └── __init__.pyi
│   │   ├── onnx.pyi
│   │   ├── ops.pyi
│   │   ├── optim/
│   │   │   └── __init__.pyi
│   │   ├── profiler/
│   │   │   └── __init__.pyi
│   │   ├── random/
│   │   │   └── __init__.pyi
│   │   ├── sparse/
│   │   │   └── __init__.pyi
│   │   └── utils/
│   │       ├── data.pyi
│   │       └── model_zoo.pyi
│   ├── torch_stub_tests.py
│   ├── tqdm.pyi
│   └── triton/
│       ├── __init__.pyi
│       ├── language.pyi
│       └── ops/
│           └── blocksparse.pyi
├── tests/
│   ├── __init__.py
│   ├── multiprocessing_utils.py
│   ├── readme_test_on_rocm.txt
│   ├── test_attention_patterns.py
│   ├── test_checkpoint.py
│   ├── test_fmha_flop_formula.py
│   ├── test_fmha_merge_attentions.py
│   ├── test_fwbw_overlap.py
│   ├── test_indexing.py
│   ├── test_mem_eff_attention.py
│   ├── test_multiprocessing_utils.py
│   ├── test_profiler.py
│   ├── test_rmsnorm.py
│   ├── test_rope_padded.py
│   ├── test_seqpar.py
│   ├── test_sequence_parallel_fused_ops.py
│   ├── test_sparse_tensors.py
│   ├── test_sparsity24.py
│   ├── test_splitk_reference.py
│   ├── test_tiled_matmul.py
│   ├── test_tree_attention.py
│   ├── test_triton_varargs.py
│   ├── test_unbind.py
│   └── utils.py
├── version.txt
└── xformers/
    ├── __init__.py
    ├── _cpp_lib.py
    ├── _deprecation_warning.py
    ├── attn_bias_utils.py
    ├── benchmarks/
    │   ├── __init__.py
    │   ├── benchmark_attn_decoding.py
    │   ├── benchmark_indexing.py
    │   ├── benchmark_mem_eff_attention.py
    │   ├── benchmark_merge_attentions.py
    │   ├── benchmark_sequence_parallel_fused.py
    │   ├── benchmark_sp24.py
    │   ├── benchmark_tiled_matmul.py
    │   ├── readme_benchmark_on_rocm.txt
    │   └── utils.py
    ├── checkpoint.py
    ├── components/
    │   └── attention/
    │       └── attention_patterns.py
    ├── csrc/
    │   ├── attention/
    │   │   ├── attention.cpp
    │   │   ├── hip_decoder/
    │   │   │   ├── CMakeLists.txt
    │   │   │   ├── attention_forward_splitk.cpp
    │   │   │   ├── ck_tile_attention_forward_decoder_splitk.h
    │   │   │   └── ck_tile_attention_inner_product.h
    │   │   └── hip_fmha/
    │   │       ├── GENERATE_INSTANCES.md
    │   │       ├── attention_backward_generic_ck_tiled.cpp
    │   │       ├── attention_ck_rand_uniform.cpp
    │   │       ├── attention_forward_generic_ck_tiled.cpp
    │   │       ├── ck_fmha_test.cpp
    │   │       ├── ck_fmha_util.h
    │   │       ├── ck_tiled_bool_switch.h
    │   │       ├── ck_tiled_fmha_batched_backward.h
    │   │       ├── ck_tiled_fmha_batched_backward_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_backward_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward.h
    │   │       ├── ck_tiled_fmha_batched_forward_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_forward_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_forward_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_forward_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer.h
    │   │       ├── ck_tiled_fmha_batched_infer_bf16.cpp
    │   │       ├── ck_tiled_fmha_batched_infer_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer_fp16.cpp
    │   │       ├── ck_tiled_fmha_batched_infer_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_batched_infer_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_bwd_setting.h
    │   │       ├── ck_tiled_fmha_fwd_setting.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_selector.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_setting.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_smallq_selector.h
    │   │       ├── ck_tiled_fmha_fwd_splitkv_smallq_setting.h
    │   │       ├── ck_tiled_fmha_fwd_type_config.h
    │   │       ├── ck_tiled_fmha_grouped_backward.h
    │   │       ├── ck_tiled_fmha_grouped_backward_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_backward_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward.h
    │   │       ├── ck_tiled_fmha_grouped_forward_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_forward_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_forward_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_forward_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer.h
    │   │       ├── ck_tiled_fmha_grouped_infer_bf16.cpp
    │   │       ├── ck_tiled_fmha_grouped_infer_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer_fp16.cpp
    │   │       ├── ck_tiled_fmha_grouped_infer_splitkv_dispatch.h
    │   │       ├── ck_tiled_fmha_grouped_infer_splitkv_smallq_dispatch.h
    │   │       ├── ck_tiled_fmha_num_kv_split_switch.h
    │   │       ├── ck_tiled_fmha_params.h
    │   │       ├── ck_tiled_fmha_seqlen_q_switch.h
    │   │       ├── ck_tiled_headdim_switch.h
    │   │       ├── ck_tiled_rand_uniform_kernel.h
    │   │       ├── generate_instances.py
    │   │       └── instances/
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_instances_ref.h
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_instances_ref.h
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_instances_ref.h
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_instances_ref.h
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_forward_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_instances_ref.h
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_instances_ref.h
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_batched_infer_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_instances_ref.h
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_bf16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_has_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_instances_ref.h
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_has_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_has_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_backward_fp16_no_mask_no_bias_no_biasgrad_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_instances_ref.h
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_instances_ref.h
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_forward_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_instances_ref.h
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_bf16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_has_mask_no_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_instances_ref.h
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_has_bias_no_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_64.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_has_dropout_maxk_96.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_128.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_256.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_32.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_512.cpp
    │   │           ├── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_64.cpp
    │   │           └── fmha_grouped_infer_fp16_no_mask_no_bias_no_dropout_maxk_96.cpp
    │   ├── nvcc_info.cu
    │   ├── pt_stable_utils.cu
    │   ├── pt_stable_utils.h
    │   └── sparse24/
    │       ├── compute_sparse_tile.h
    │       ├── gemm.cu
    │       ├── meta_utils.cu
    │       ├── sparse24.cpp
    │       ├── sparse24_apply.cu
    │       ├── sparse24_apply_dense_output.cu
    │       ├── sparse24_gemm_sm90.cu
    │       ├── sparse24_largest_mask_2d.cu
    │       ├── sparse24_metadata.h
    │       ├── sparse24_pack.cu
    │       ├── sparse24_pack.h
    │       ├── sparse24_pack_test.cu
    │       ├── sparseNM_dense.cu
    │       ├── static_sort.h
    │       └── warp_tensor.h
    ├── flash_attn_3/
    │   └── __init__.py
    ├── fwbw_overlap.py
    ├── info.py
    ├── ops/
    │   ├── __init__.py
    │   ├── _triton/
    │   │   ├── __init__.py
    │   │   ├── k_index_select_cat.py
    │   │   ├── k_scaled_index_add.py
    │   │   ├── matmul_perf_model.py
    │   │   ├── rmsnorm_kernels.py
    │   │   ├── rope_padded_kernels.py
    │   │   └── tiled_matmul_kernels.py
    │   ├── common.py
    │   ├── differentiable_collectives.py
    │   ├── fmha/
    │   │   ├── __init__.py
    │   │   ├── _triton/
    │   │   │   ├── __init__.py
    │   │   │   └── splitk_kernels.py
    │   │   ├── attn_bias.py
    │   │   ├── ck.py
    │   │   ├── ck_splitk.py
    │   │   ├── common.py
    │   │   ├── cutlass.py
    │   │   ├── cutlass_blackwell.py
    │   │   ├── dispatch.py
    │   │   ├── flash.py
    │   │   ├── flash3.py
    │   │   ├── merge_training.py
    │   │   ├── torch_attention_compat.py
    │   │   └── triton_splitk.py
    │   ├── indexing.py
    │   ├── modpar_layers.py
    │   ├── rmsnorm.py
    │   ├── rope_padded.py
    │   ├── seqpar.py
    │   ├── sequence_parallel_fused_ops.py
    │   ├── sp24.py
    │   ├── swiglu_op.py
    │   ├── tiled_matmul.py
    │   ├── tree_attention.py
    │   └── unbind.py
    ├── profiler/
    │   ├── __init__.py
    │   ├── api.py
    │   ├── device_limits.py
    │   ├── find_slowest.py
    │   ├── profile_analyzer.py
    │   ├── profiler.py
    │   ├── profiler_dcgm.py
    │   └── profiler_dcgm_impl.py
    ├── sparse/
    │   ├── __init__.py
    │   ├── blocksparse_tensor.py
    │   └── utils.py
    ├── test.py
    ├── triton/
    │   ├── __init__.py
    │   ├── importing.py
    │   └── vararg_kernel.py
    └── utils.py

Download .txt

SYMBOL INDEX (2093 symbols across 170 files)

FILE: .github/compute_wheel_version.py
  function get_tagged_version (line 15) | def get_tagged_version() -> Optional[str]:
  function get_dev_version (line 33) | def get_dev_version() -> str:

FILE: .github/gpu_benchmark_diff.py
  class NamedObject (line 13) | class NamedObject:
    method __init__ (line 14) | def __init__(self, name) -> None:
  function git_file_at (line 18) | def git_file_at(filename: str, ref: str) -> str:

FILE: .github/run-clang-format.py
  class ExitStatus (line 45) | class ExitStatus:
  function list_files (line 51) | def list_files(files, recursive=False, extensions=None, exclude=None):
  function make_diff (line 81) | def make_diff(file, original, reformatted):
  class DiffError (line 93) | class DiffError(Exception):
    method __init__ (line 94) | def __init__(self, message, errs=None):
  class UnexpectedError (line 99) | class UnexpectedError(Exception):
    method __init__ (line 100) | def __init__(self, message, exc=None):
  function run_clang_format_diff_wrapper (line 106) | def run_clang_format_diff_wrapper(args, file):
  function run_clang_format_diff (line 116) | def run_clang_format_diff(args, file):
  function bold_red (line 172) | def bold_red(s):
  function colorize (line 176) | def colorize(diff_lines):
  function print_diff (line 202) | def print_diff(diff_lines, use_color):
  function print_trouble (line 208) | def print_trouble(prog, message, use_colors):
  function main (line 215) | def main():

FILE: .github/selective_ci/selective_ci.py
  class ComponentInfo (line 16) | class ComponentInfo:
  function list_files_in_commit (line 86) | def list_files_in_commit(commit: git.Commit):
  function check_patterns_are_valid (line 100) | def check_patterns_are_valid(patterns):

FILE: docs/source/conf.py
  function setup (line 136) | def setup(app):

FILE: examples/llama_inference/generate.py
  class GenArgs (line 29) | class GenArgs:
  class FastGen (line 37) | class FastGen:
    method build (line 42) | def build(
    method __init__ (line 87) | def __init__(
    method generate_all (line 100) | def generate_all(
  function get_prompts (line 207) | def get_prompts(interactive: bool) -> Iterable[list[str]]:
  function main (line 224) | def main(ckpt_dir: str, interactive: bool, add_instruction_tags: bool):

FILE: examples/llama_inference/model.py
  class ModelArgs (line 21) | class ModelArgs:
  class Attention (line 51) | class Attention(nn.Module):
    method __init__ (line 52) | def __init__(
    method load_hook (line 84) | def load_hook(
    method forward (line 100) | def forward(
  class FeedForward (line 154) | class FeedForward(nn.Module):
    method __init__ (line 155) | def __init__(
    method load_hook (line 186) | def load_hook(
    method forward (line 201) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class TransformerBlock (line 209) | class TransformerBlock(nn.Module):
    method __init__ (line 210) | def __init__(self, args: ModelArgs, layer_index: int):
    method forward (line 243) | def forward(
  class Transformer (line 266) | class Transformer(nn.Module):
    method __init__ (line 267) | def __init__(self, args: ModelArgs):
    method forward_with_attn_bias (line 292) | def forward_with_attn_bias(
    method forward (line 308) | def forward(
  function make_cache (line 324) | def make_cache(
  function cache_prefix (line 371) | def cache_prefix(cache: list[LayerCache], length: int) -> list[LayerCache]:

FILE: examples/llama_inference/mp_utils.py
  function initialize (line 18) | def initialize(
  function get_world_size (line 83) | def get_world_size() -> int:
  function get_rank (line 90) | def get_rank() -> int:
  function all_gather (line 96) | def all_gather(x: torch.Tensor) -> torch.Tensor:
  function all_reduce (line 110) | def all_reduce(x: torch.Tensor):

FILE: examples/llama_inference/sample_utils.py
  function top_p (line 9) | def top_p(probs: torch.Tensor, p: float) -> torch.Tensor:

FILE: examples/llama_inference/stats.py
  class PhaseStats (line 12) | class PhaseStats:
    method show (line 17) | def show(self) -> str:
  class Stats (line 27) | class Stats:
    method __init__ (line 32) | def __init__(self):
    method end_phase (line 36) | def end_phase(self, tokens: int, now: Optional[float] = None):
    method phase (line 50) | def phase(self, name: str, tokens: int = 0):

FILE: examples/llama_inference/tokenizer.py
  class Tokenizer (line 11) | class Tokenizer:
    method __init__ (line 14) | def __init__(self, model_path: str):
    method encode (line 36) | def encode(self, s: str, bos: bool = True, eos: bool = False) -> list[...
    method decode (line 56) | def decode(self, t: list[int]) -> str:

FILE: setup.py
  function get_extra_nvcc_flags_for_build_type (line 54) | def get_extra_nvcc_flags_for_build_type(cuda_version: int) -> List[str]:
  function fetch_requirements (line 72) | def fetch_requirements():
  function get_local_version_suffix (line 78) | def get_local_version_suffix() -> str:
  function generate_version_py (line 89) | def generate_version_py(version: str) -> str:
  function get_cuda_version (line 98) | def get_cuda_version(cuda_dir) -> int:
  function get_hip_version (line 111) | def get_hip_version(rocm_dir) -> Optional[str]:
  function rename_cpp_cu (line 128) | def rename_cpp_cu(cpp_files):
  function get_extensions (line 133) | def get_extensions():
  class clean (line 362) | class clean(distutils.command.clean.clean):  # type: ignore
    method run (line 363) | def run(self):
  class bdist_wheel_abi_none (line 378) | class bdist_wheel_abi_none(_bdist_wheel if _bdist_wheel else object):  #...
    method get_tag (line 386) | def get_tag(self):
  class BuildExtensionWithExtraFiles (line 399) | class BuildExtensionWithExtraFiles(BuildExtension):
    method __init__ (line 400) | def __init__(self, *args, **kwargs) -> None:
    method get_export_symbols (line 405) | def get_export_symbols(self, ext):
    method build_extensions (line 411) | def build_extensions(self) -> None:
    method copy_extensions_to_source (line 420) | def copy_extensions_to_source(self) -> None:
    method get_ext_filename (line 434) | def get_ext_filename(self, ext_name):

FILE: stubs/fvcore/nn.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/matplotlib/pyplot.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/numpy/__init__.pyi
  class _ArrayOrScalarCommon (line 37) | class _ArrayOrScalarCommon(
  class float (line 45) | class float: ...
  class ndarray (line 47) | class ndarray(_ArrayOrScalarCommon[DType, Unpack[Ts]], Iterable, Sized, ...
    method __init__ (line 48) | def __init__(
    method __getitem__ (line 58) | def __getitem__(
    method __getitem__ (line 62) | def __getitem__(
    method __setitem__ (line 65) | def __setitem__(self, key, value): ...
    method shape (line 67) | def shape(self) -> Tuple[Unpack[Ts]]: ...
    method reshape (line 69) | def reshape(self, shape: Tuple[Unpack[Ts2]]) -> ndarray[DType, Unpack[...
    method reshape (line 71) | def reshape(self, *shape: Unpack[Ts2]) -> ndarray[DType, Unpack[Ts2]]:...
    method __add__ (line 72) | def __add__(self, other) -> ndarray[DType, Unpack[Ts]]: ...
    method __div__ (line 73) | def __div__(self, other) -> ndarray[DType, Unpack[Ts]]: ...
    method __truediv__ (line 74) | def __truediv__(self, other) -> ndarray[DType, Unpack[Ts]]: ...
    method astype (line 77) | def astype(self, dtype: Type[NewDType]) -> ndarray[NewDType, Unpack[Ts...
    method astype (line 79) | def astype(self, dtype: Literal["int64"]) -> ndarray[int64, Unpack[Ts]...
    method astype (line 81) | def astype(self, dtype: Literal["float32"]) -> ndarray[float32, Unpack...
    method astype (line 83) | def astype(self, dtype: Literal["float64"]) -> ndarray[float64, Unpack...
  function empty (line 89) | def empty(
  function empty (line 95) | def empty(
  function empty (line 101) | def empty(shape: N, dtype: Type[DType]) -> ndarray[DType, N]: ...
  function array (line 104) | def array(
  function sin (line 111) | def sin(x: ndarray[DType, Unpack[Ts]]) -> ndarray[DType, Unpack[Ts]]: ...
  class int64 (line 113) | class int64:
    method __init__ (line 114) | def __init__(self, value=...): ...
  class float32 (line 116) | class float32:
    method __init__ (line 117) | def __init__(self, value=...): ...
  class float64 (line 119) | class float64:
    method __init__ (line 120) | def __init__(self, value=...): ...

FILE: stubs/pandas.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/recommonmark/transform.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/seaborn.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/sklearn/model_selection.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/submitit.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/tensorflow.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/__init__.pyi
  class complex64 (line 66) | class complex64: ...
  class complex128 (line 67) | class complex128: ...
  class float16 (line 68) | class float16: ...
  class float32 (line 69) | class float32: ...
  class float64 (line 70) | class float64: ...
  class int64 (line 71) | class int64: ...
  class int32 (line 72) | class int32: ...
  class bool (line 73) | class bool: ...
  class memory_format (line 74) | class memory_format: ...
  class long (line 80) | class long: ...
  class layout (line 81) | class layout: ...
  class MaxNamedTuple (line 87) | class MaxNamedTuple(Generic[DType, Unpack[Ts]]):
    method __getitem__ (line 91) | def __getitem__(self, key: L[0]) -> Tensor[DType, Unpack[Ts]]: ...
    method __getitem__ (line 93) | def __getitem__(self, key: L[1]) -> Tensor[int64, Unpack[Ts]]: ...
  class device (line 95) | class device:
    method __init__ (line 96) | def __init__(self, device_str: str): ...
  class Size (line 101) | class Size(Tuple[builtins.int, ...]):
    method __getitem__ (line 103) | def __getitem__(self: Size, key: builtins.int) -> builtins.int: ...
    method __getitem__ (line 105) | def __getitem__(self: Size, key: slice) -> Size: ...
    method numel (line 106) | def numel(self: Size) -> builtins.int: ...
  class Generator (line 108) | class Generator(object):
    method __init__ (line 110) | def __init__(self, device: Union[_device, str, None] = None) -> None: ...
    method get_state (line 111) | def get_state(self) -> Tensor: ...
    method set_state (line 112) | def set_state(self, _new_state: Tensor) -> Generator: ...
    method manual_seed (line 113) | def manual_seed(self, seed: builtins.int) -> Generator: ...
    method seed (line 114) | def seed(self) -> builtins.int: ...
    method initial_seed (line 115) | def initial_seed(self) -> builtins.int: ...
  class Storage (line 119) | class Storage(object):
    method __deepcopy__ (line 121) | def __deepcopy__(self, memo) -> "Storage": ...
    method _new_shared (line 122) | def _new_shared(self, int) -> "Storage": ...
    method _write_file (line 123) | def _write_file(
    method element_size (line 126) | def element_size(self) -> int: ...
    method is_shared (line 127) | def is_shared(self) -> bool: ...
    method share_memory_ (line 128) | def share_memory_(self) -> "Storage": ...
    method size (line 129) | def size(self) -> int: ...
  class Tensor (line 131) | class Tensor(Generic[DType, Unpack[Ts]]):
    method __init__ (line 147) | def __init__(self, other: Tensor[DType, Unpack[Ts]]) -> None: ...
    method __init__ (line 149) | def __init__(
    method __init__ (line 153) | def __init__(self, storage: Storage) -> None: ...
    method __init__ (line 155) | def __init__(
    method device (line 159) | def device(self) -> _device: ...
    method dtype (line 161) | def dtype(self) -> Type[DType]: ...
    method long (line 162) | def long(self) -> "LongTensor[DType, Unpack[Ts]]": ...
    method size (line 171) | def size(self: Tensor[DType, N1, Unpack[Rs]], axis: L[0]) -> N1: ...
    method size (line 173) | def size(self: Tensor[DType, N1, N2, Unpack[Rs]], axis: L[1]) -> N2: ...
    method size (line 175) | def size(self: Tensor[DType, Unpack[Rs], N1], axis: L[-1]) -> N1: ...
    method size (line 177) | def size(self: Tensor[DType, Unpack[Rs], N1, N2], axis: L[-2]) -> N1: ...
    method size (line 179) | def size(self: Tensor[DType, Unpack[Rs]]) -> Tuple[Unpack[Rs]]: ...
    method split (line 181) | def split(
    method split (line 185) | def split(
    method item (line 191) | def item(self: Tensor[DType, L[1]]) -> DType: ...
    method item (line 193) | def item(self: Tensor[DType]) -> DType: ...
    method numel (line 194) | def numel(self) -> builtins.int: ...
    method backward (line 195) | def backward(self) -> None: ...
    method __getitem__ (line 197) | def __getitem__(
    method __getitem__ (line 201) | def __getitem__(
    method __getitem__ (line 205) | def __getitem__(
    method __getitem__ (line 209) | def __getitem__(self, item: Any) -> Any: ...
    method expand (line 211) | def expand(
    method expand (line 215) | def expand(
    method detach (line 218) | def detach(self: T) -> T: ...
    method numpy (line 220) | def numpy(self) -> ndarray[DType, Unpack[Ts]]: ...
    method to (line 224) | def to(
    method to (line 228) | def to(
    method __add__ (line 233) | def __add__(
    method __add__ (line 237) | def __add__(
    method __iadd__ (line 242) | def __iadd__(
    method __iadd__ (line 246) | def __iadd__(
    method __radd__ (line 251) | def __radd__(
    method __radd__ (line 255) | def __radd__(
    method __sub__ (line 260) | def __sub__(
    method __sub__ (line 264) | def __sub__(
    method __isub__ (line 269) | def __isub__(
    method __isub__ (line 273) | def __isub__(
    method __rsub__ (line 278) | def __rsub__(
    method __rsub__ (line 282) | def __rsub__(
    method __mul__ (line 287) | def __mul__(
    method __mul__ (line 292) | def __mul__(
    method __imul__ (line 297) | def __imul__(
    method __imul__ (line 302) | def __imul__(
    method __rmul__ (line 307) | def __rmul__(
    method __rmul__ (line 312) | def __rmul__(
    method __pow__ (line 317) | def __pow__(
    method __pow__ (line 322) | def __pow__(
    method __truediv__ (line 327) | def __truediv__(
    method __truediv__ (line 332) | def __truediv__(
    method __itruediv__ (line 337) | def __itruediv__(
    method __itruediv__ (line 342) | def __itruediv__(
    method __rtruediv__ (line 347) | def __rtruediv__(
    method __floordiv__ (line 352) | def __floordiv__(
    method __floordiv__ (line 357) | def __floordiv__(
    method __ifloordiv__ (line 362) | def __ifloordiv__(
    method __ifloordiv__ (line 367) | def __ifloordiv__(
    method __rfloordiv__ (line 372) | def __rfloordiv__(
    method __invert__ (line 376) | def __invert__(self) -> Tensor[DType, Unpack[Ts]]: ...
    method __neg__ (line 377) | def __neg__(self) -> Tensor[DType, Unpack[Ts]]: ...
    method __iand__ (line 378) | def __iand__(
    method __and__ (line 382) | def __and__(
    method __matmul__ (line 387) | def __matmul__(
    method __matmul__ (line 392) | def __matmul__(
    method __ne__ (line 398) | def __ne__(
    method abs (line 401) | def abs(self) -> Tensor[DType, Unpack[Ts]]: ...
    method all (line 403) | def all(
    method all (line 407) | def all(
    method all (line 412) | def all(
    method argmax (line 417) | def argmax(
    method argmax (line 423) | def argmax(
    method argmax (line 429) | def argmax(
    method argmax (line 435) | def argmax(
    method argmax (line 441) | def argmax(
    method argmax (line 447) | def argmax(
    method argmax (line 453) | def argmax(
    method argmax (line 459) | def argmax(
    method argmax (line 465) | def argmax(
    method argmin (line 471) | def argmin(
    method argmin (line 477) | def argmin(
    method argmin (line 483) | def argmin(
    method argmin (line 489) | def argmin(
    method argmin (line 495) | def argmin(
    method argmin (line 501) | def argmin(
    method argmin (line 507) | def argmin(
    method argmin (line 513) | def argmin(
    method argmin (line 519) | def argmin(
    method chunk (line 528) | def chunk(self: Tensor[DType, Unpack[Rs], N], chunks: L[2], dim: L[-1]...
    method chunk (line 533) | def chunk(
    method clone (line 539) | def clone(
    method count_nonzero (line 543) | def count_nonzero(
    method count_nonzero (line 548) | def count_nonzero(
    method count_nonzero (line 553) | def count_nonzero(
    method count_nonzero (line 558) | def count_nonzero(
    method count_nonzero (line 563) | def count_nonzero(
    method dim (line 569) | def dim(self: Tensor[DType]) -> L[0]: ...
    method dim (line 571) | def dim(self: Tensor[DType, builtins.int]) -> L[1]: ...
    method dim (line 573) | def dim(self: Tensor[DType, builtins.int, builtins.int]) -> L[2]: ...
    method dim (line 575) | def dim(self: Tensor[DType, builtins.int, builtins.int, builtins.int])...
    method half (line 576) | def half(
    method is_contiguous (line 579) | def is_contiguous(
    method indices (line 582) | def indices(self) -> Tensor: ...
    method masked_select (line 584) | def masked_select(self, mask: Tensor, *, out: Optional[Tensor] = ...) ...
    method max (line 586) | def max(
    method max (line 592) | def max(
    method max (line 598) | def max(
    method max (line 604) | def max(
    method max (line 610) | def max(
    method max (line 616) | def max(
    method max (line 622) | def max(
    method max (line 628) | def max(
    method max (line 634) | def max(
    method max (line 640) | def max(
    method max (line 646) | def max(
    method mean (line 650) | def mean(
    method mean (line 656) | def mean(
    method mean (line 662) | def mean(
    method mean (line 668) | def mean(
    method mean (line 674) | def mean(
    method mean (line 680) | def mean(
    method mean (line 686) | def mean(
    method mean (line 692) | def mean(
    method mean (line 698) | def mean(
    method bitwise_not (line 703) | def bitwise_not(self) -> Tensor[DType, Unpack[Ts]]: ...
    method bitwise_not_ (line 704) | def bitwise_not_(self) -> Tensor[DType, Unpack[Ts]]: ...
    method diff (line 706) | def diff(
    method diff (line 711) | def diff(
    method diff (line 716) | def diff(
    method diff (line 721) | def diff(
    method is_sparse (line 725) | def is_sparse(self) -> builtins.bool: ...
    method coalesce (line 726) | def coalesce(self: Tensor[DType, Unpack[Rs]]) -> Tensor[DType, Unpack[...
    method values (line 727) | def values(self: Tensor[DType, Unpack[Rs]]) -> Tensor[DType, Unpack[Rs...
    method to_sparse (line 728) | def to_sparse(self: Tensor[DType, Unpack[Ts]]) -> Tensor[DType, Unpack...
    method __eq__ (line 733) | def __eq__(
    method __eq__ (line 738) | def __eq__(
    method argsort (line 742) | def argsort(
    method bmm (line 745) | def bmm(
    method diag_embed (line 748) | def diag_embed(
    method matmul (line 752) | def matmul(
    method matmul (line 757) | def matmul(
    method multinomial (line 763) | def multinomial(
    method new_ones (line 771) | def new_ones(
    method new_ones (line 779) | def new_ones(
    method unsqueeze (line 787) | def unsqueeze(
    method unsqueeze (line 791) | def unsqueeze(
    method unsqueeze (line 795) | def unsqueeze(
    method unsqueeze (line 799) | def unsqueeze(
    method unsqueeze_ (line 803) | def unsqueeze_(
    method unsqueeze_ (line 807) | def unsqueeze_(
    method unsqueeze_ (line 811) | def unsqueeze_(
    method unsqueeze_ (line 815) | def unsqueeze_(
    method real (line 819) | def real(self: Tensor[complex64, Unpack[Rs]]) -> Tensor[float32, Unpac...
    method repeat (line 821) | def repeat(
    method repeat (line 825) | def repeat(
    method repeat (line 829) | def repeat(
    method repeat_interleave (line 833) | def repeat_interleave(
    method repeat_interleave (line 837) | def repeat_interleave(
    method repeat_interleave (line 841) | def repeat_interleave(
    method repeat_interleave (line 845) | def repeat_interleave(
    method repeat_interleave (line 850) | def repeat_interleave(
    method __setitem__ (line 853) | def __setitem__(self, item: object, other: object) -> None: ...
    method scatter (line 855) | def scatter(
    method scatter_ (line 863) | def scatter_(
    method softmax (line 871) | def softmax(self, dim: builtins.int) -> Tensor[DType, Unpack[Ts]]: ...
    method softmax (line 873) | def softmax(
    method stride (line 877) | def stride(
    method stride (line 881) | def stride(
    method stride (line 885) | def stride(
    method stride (line 889) | def stride(self) -> Tuple[Unpack[Ts]]: ...
    method squeeze (line 891) | def squeeze(
    method squeeze (line 895) | def squeeze(
    method squeeze (line 899) | def squeeze(
    method squeeze (line 906) | def squeeze(
    method squeeze (line 913) | def squeeze(
    method type_as (line 916) | def type_as(
    method squeeze_ (line 920) | def squeeze_(
    method squeeze_ (line 924) | def squeeze_(
    method squeeze_ (line 928) | def squeeze_(
    method squeeze_ (line 935) | def squeeze_(
    method squeeze_ (line 942) | def squeeze_(
    method view (line 946) | def view(
    method view (line 952) | def view(
    method view (line 958) | def view(
    method view (line 964) | def view(self, *shape: Unpack[Rs]) -> Tensor[DType, Unpack[Rs]]: ...
    method transpose (line 966) | def transpose(
    method transpose (line 970) | def transpose(
    method transpose (line 974) | def transpose(
    method transpose (line 978) | def transpose(
    method transpose (line 982) | def transpose(
    method flatten (line 986) | def flatten(
    method flatten (line 992) | def flatten(
    method flatten (line 998) | def flatten(
    method flatten (line 1004) | def flatten(
    method flatten (line 1010) | def flatten(
    method __lt__ (line 1016) | def __lt__(
    method __lt__ (line 1020) | def __lt__(
    method __gt__ (line 1024) | def __gt__(
    method __gt__ (line 1028) | def __gt__(
    method logical_and (line 1031) | def logical_and(
    method logical_and_ (line 1037) | def logical_and_(
    method reshape (line 1044) | def reshape(
    method reshape (line 1050) | def reshape(
    method reshape (line 1056) | def reshape(self, *shape: Unpack[Rs]) -> Tensor[DType, Unpack[Rs]]: ...
    method unbind (line 1058) | def unbind(
    method unbind (line 1062) | def unbind(
    method unbind (line 1066) | def unbind(
    method sign (line 1069) | def sign(self, *, out: Optional[Tensor] = ...) -> Tensor[DType, Unpack...
    method sum (line 1071) | def sum(
    method sum (line 1078) | def sum(
    method sum (line 1085) | def sum(
    method sum (line 1092) | def sum(
    method sum (line 1099) | def sum(
    method cumsum (line 1105) | def cumsum(
    method contiguous (line 1110) | def contiguous(input: Tensor[DType, Unpack[Rs]]) -> Tensor[DType, Unpa...
  class LongTensor (line 1112) | class LongTensor(Tensor[DType, Unpack[Ts]], Generic[DType, Unpack[Ts]]):
    method __getitem__ (line 1114) | def __getitem__(
    method __getitem__ (line 1118) | def __getitem__(
    method __getitem__ (line 1122) | def __getitem__(
    method __eq__ (line 1125) | def __eq__(
  function allclose (line 1133) | def allclose(
  function bitwise_not (line 1140) | def bitwise_not(
  function einsum (line 1143) | def einsum(
  function eye (line 1148) | def eye(
  function eye (line 1159) | def eye(
  function eye (line 1171) | def eye(
  function eye (line 1182) | def eye(
  function zeros (line 1194) | def zeros(
  function zeros (line 1205) | def zeros(
  function zeros (line 1217) | def zeros(
  function zeros (line 1227) | def zeros(
  function ones (line 1238) | def ones(*size: Unpack[Ts]) -> Tensor[float, Unpack[Ts]]: ...
  function ones (line 1240) | def ones(
  function ones_like (line 1244) | def ones_like(
  function ones_like (line 1255) | def ones_like(
  function tril (line 1265) | def tril(
  function arange (line 1269) | def arange(
  function arange (line 1279) | def arange(
  function arange (line 1290) | def arange(
  function arange (line 1303) | def arange(
  function arange (line 1313) | def arange(
  function arange (line 1324) | def arange(
  function arange (line 1335) | def arange(
  function argmax (line 1346) | def argmax(
  function argmax (line 1352) | def argmax(
  function argmax (line 1358) | def argmax(
  function argmax (line 1364) | def argmax(
  function argmax (line 1370) | def argmax(
  function argmax (line 1376) | def argmax(
  function argmax (line 1382) | def argmax(
  function argmax (line 1388) | def argmax(
  function argmax (line 1394) | def argmax(
  function argmin (line 1400) | def argmin(
  function argmin (line 1406) | def argmin(
  function argmin (line 1412) | def argmin(
  function argmin (line 1418) | def argmin(
  function argmin (line 1424) | def argmin(
  function argmin (line 1430) | def argmin(
  function argmin (line 1436) | def argmin(
  function argmin (line 1442) | def argmin(
  function argmin (line 1448) | def argmin(
  function bmm (line 1453) | def bmm(
  function chunk (line 1457) | def chunk(input: Tensor[DType, Unpack[Ts], N], chunks: L[2], dim: L[-1])...
  function chunk (line 1462) | def chunk(input: Tensor[DType, N, Unpack[Ts]], chunks: L[2], dim: L[0] =...
  function diag (line 1466) | def diag(
  function diagonal (line 1472) | def diagonal(
  function diag_embed (line 1478) | def diag_embed(
  function empty_like (line 1485) | def empty_like(
  function empty_like (line 1497) | def empty_like(
  function logical_and (line 1508) | def logical_and(
  function log_softmax (line 1515) | def log_softmax(
  function log_softmax (line 1521) | def log_softmax(
  function masked_select (line 1527) | def masked_select(
  function max (line 1531) | def max(
  function max (line 1537) | def max(
  function max (line 1543) | def max(
  function max (line 1549) | def max(
  function max (line 1555) | def max(
  function max (line 1561) | def max(
  function max (line 1567) | def max(
  function max (line 1573) | def max(
  function max (line 1579) | def max(
  function max (line 1585) | def max(
  function max (line 1591) | def max(
  function mean (line 1595) | def mean(
  function mean (line 1601) | def mean(
  function mean (line 1607) | def mean(
  function mean (line 1613) | def mean(
  function mean (line 1619) | def mean(
  function mean (line 1625) | def mean(
  function mean (line 1631) | def mean(
  function mean (line 1637) | def mean(
  function mean (line 1643) | def mean(
  function meshgrid (line 1649) | def meshgrid(tensor1: Tensor[DType, N1]) -> Tuple[Tensor[DType, N1]]: ...
  function meshgrid (line 1651) | def meshgrid(
  function meshgrid (line 1656) | def meshgrid(
  function meshgrid (line 1664) | def meshgrid(*tensors: Tensor) -> Tuple[Tensor, ...]: ...
  function norm (line 1666) | def norm(
  function norm (line 1675) | def norm(
  function norm (line 1684) | def norm(
  function norm (line 1693) | def norm(
  function norm (line 1702) | def norm(
  function norm (line 1711) | def norm(
  function norm (line 1720) | def norm(
  function norm (line 1729) | def norm(
  function norm (line 1738) | def norm(
  function normal (line 1747) | def normal(
  function rand (line 1756) | def rand(
  function rand (line 1767) | def rand(
  function rand (line 1779) | def rand(
  function rand (line 1789) | def rand(
  function randint (line 1800) | def randint(
  function randint (line 1813) | def randint(
  function randint (line 1826) | def randint(
  function randint (line 1839) | def randint(
  function rand_like (line 1851) | def rand_like(
  function nonzero (line 1854) | def nonzero(
  function repeat_interleave (line 1858) | def repeat_interleave(
  function repeat_interleave (line 1862) | def repeat_interleave(
  function repeat_interleave (line 1866) | def repeat_interleave(
  function repeat_interleave (line 1870) | def repeat_interleave(
  function repeat_interleave (line 1876) | def repeat_interleave(
  function stack (line 1880) | def stack(
  function stack (line 1887) | def stack(
  function stack (line 1894) | def stack(
  function stack (line 1905) | def stack(
  function stack (line 1916) | def stack(
  function cdist (line 1922) | def cdist(
  function clone (line 1928) | def clone(
  function count_nonzero (line 1932) | def count_nonzero(
  function count_nonzero (line 1937) | def count_nonzero(
  function count_nonzero (line 1942) | def count_nonzero(
  function count_nonzero (line 1947) | def count_nonzero(
  function count_nonzero (line 1952) | def count_nonzero(
  function sum (line 1958) | def sum(
  function sum (line 1962) | def sum(
  function sum (line 1969) | def sum(
  function sum (line 1973) | def sum(
  function sum (line 1980) | def sum(
  function sin (line 1987) | def sin(
  function cos (line 1990) | def cos(
  function exp (line 1993) | def exp(
  function matmul (line 1997) | def matmul(
  function matmul (line 2004) | def matmul(
  function multinomial (line 2010) | def multinomial(
  function unbind (line 2018) | def unbind(
  function unbind (line 2022) | def unbind(
  function unbind (line 2026) | def unbind(
  function unsqueeze (line 2030) | def unsqueeze(
  function unsqueeze (line 2034) | def unsqueeze(
  function unsqueeze (line 2038) | def unsqueeze(
  function unsqueeze (line 2042) | def unsqueeze(
  function real (line 2046) | def real(input: Tensor[complex64, Unpack[Ts]]) -> Tensor[float32, Unpack...
  function real (line 2048) | def real(input: Tensor[complex128, Unpack[Ts]]) -> Tensor[float64, Unpac...
  function zeros_like (line 2049) | def zeros_like(
  function randn (line 2053) | def randn(
  function randn (line 2063) | def randn(
  function randn (line 2072) | def randn(
  function randn (line 2082) | def randn(
  function all (line 2091) | def all(
  function all (line 2095) | def all(
  function all (line 2100) | def all(
  function randperm (line 2105) | def randperm(
  function randperm (line 2117) | def randperm(
  function sqrt (line 2128) | def sqrt(
  function where (line 2132) | def where(
  function where (line 2146) | def where(condition: Tensor[DType, Unpack[Ts]]) -> Any: ...
  function diff (line 2148) | def diff(
  function diff (line 2153) | def diff(
  function diff (line 2158) | def diff(
  function diff (line 2163) | def diff(
  function argsort (line 2167) | def argsort(
  function cat (line 2175) | def cat(
  function cat (line 2182) | def cat(
  function cat (line 2189) | def cat(
  function cat (line 2198) | def cat(
  function cat (line 2209) | def cat(
  function cat (line 2220) | def cat(
  function cat (line 2235) | def cat(
  function sign (line 2248) | def sign(
  function sparse_coo_tensor (line 2252) | def sparse_coo_tensor(
  function sparse_coo_tensor (line 2262) | def sparse_coo_tensor(
  function sparse_coo_tensor (line 2272) | def sparse_coo_tensor(
  function softmax (line 2282) | def softmax(
  function softmax (line 2286) | def softmax(
  function transpose (line 2290) | def transpose(
  function transpose (line 2294) | def transpose(
  function transpose (line 2298) | def transpose(
  function transpose (line 2302) | def transpose(
  function transpose (line 2306) | def transpose(
  function empty (line 2310) | def empty(
  function empty (line 2320) | def empty(
  function empty (line 2331) | def empty(
  function empty (line 2343) | def empty(
  function flatten (line 2354) | def flatten(
  function flatten (line 2360) | def flatten(
  function flatten (line 2366) | def flatten(
  function flatten (line 2372) | def flatten(
  function flatten (line 2378) | def flatten(
  function reshape (line 2384) | def reshape(
  function reshape (line 2388) | def reshape(
  function reshape (line 2394) | def reshape(

FILE: stubs/torch/autograd/__init__.pyi
  class Function (line 12) | class Function:
    method apply (line 14) | def apply(cls, *args: object) -> Any: ...
  class enable_grad (line 16) | class enable_grad:
    method __enter__ (line 17) | def __enter__(self) -> None: ...
    method __exit__ (line 18) | def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> N...
  function backward (line 20) | def backward(

FILE: stubs/torch/autograd/profiler.pyi
  class record_function (line 9) | class record_function(contextlib.ContextDecorator):
    method __init__ (line 10) | def __init__(self, name: str) -> None: ...
    method __enter__ (line 11) | def __enter__(self) -> Any: ...
    method __exit__ (line 12) | def __exit__(self, *exctype: Any) -> None: ...

FILE: stubs/torch/cuda/__init__.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/fft/__init__.pyi
  function fft (line 16) | def fft(
  function fft2 (line 25) | def fft2(

FILE: stubs/torch/linalg/__init__.pyi
  function pinv (line 20) | def pinv(
  function pinv (line 28) | def pinv(
  function qr (line 36) | def qr(
  function qr (line 48) | def qr(
  function norm (line 57) | def norm(

FILE: stubs/torch/nn/__init__.pyi
  class Module (line 38) | class Module:
    method __call__ (line 39) | def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
    method parameters (line 40) | def parameters(self) -> Iterator[Any]: ...
    method double (line 41) | def double(self: T) -> T: ...
    method to (line 42) | def to(self, dtype: Type[T], device: torch._device = ...) -> Module: ...
    method eval (line 43) | def eval(self) -> Module: ...
    method train (line 44) | def train(self, mode: bool) -> Module: ...
    method register_parameter (line 45) | def register_parameter(self, name: str, param: Optional[Parameter]) ->...
  class LSTMCell (line 49) | class LSTMCell(Module, Generic[InputSize, HiddenSize]):
    method __init__ (line 50) | def __init__(
    method __call__ (line 53) | def __call__(
  class Linear (line 61) | class Linear(Module, Generic[InputSize, OutputSize]):
    method __init__ (line 62) | def __init__(
    method __call__ (line 65) | def __call__(
  class _Loss (line 70) | class _Loss(Module): ...
  class MSELoss (line 72) | class MSELoss(_Loss):
    method __init__ (line 73) | def __init__(
    method __call__ (line 79) | def __call__(
  class Conv2d (line 98) | class Conv2d(
    method __init__ (line 102) | def __init__(
    method __call__ (line 110) | def __call__(
  class ReflectionPad2d (line 123) | class ReflectionPad2d(Module, Generic[Padding]):
    method __init__ (line 124) | def __init__(
    method __call__ (line 128) | def __call__(
  class InstanceNorm2d (line 139) | class InstanceNorm2d(Generic[Channels]):
    method __init__ (line 140) | def __init__(self, num_features: Channels, affine: bool = False) -> No...
    method __call__ (line 141) | def __call__(
  class LeakyReLU (line 145) | class LeakyReLU(Module):
    method __init__ (line 146) | def __init__(self, negative_slope: float = ..., inplace: bool = ...) -...
    method __call__ (line 147) | def __call__(
  class ReLU (line 151) | class ReLU(Module):
    method __call__ (line 152) | def __call__(
  class GELU (line 156) | class GELU(Module):
    method __call__ (line 157) | def __call__(
  class Dropout (line 161) | class Dropout(Module):
    method __init__ (line 162) | def __init__(self, p: float, inplace: bool = ...) -> None: ...
    method __call__ (line 163) | def __call__(
  class Embedding (line 167) | class Embedding(Module, Generic[N, EmbeddingDimension]):
    method __init__ (line 168) | def __init__(
    method padding_idx (line 180) | def padding_idx(self) -> int: ...
    method max_norm (line 182) | def max_norm(self) -> float: ...
    method norm_type (line 184) | def norm_type(self) -> float: ...
    method scale_grad_by_freq (line 186) | def scale_grad_by_freq(self) -> bool: ...
    method sparse (line 188) | def sparse(self) -> bool: ...
    method weight (line 190) | def weight(self) -> Tensor[torch.float32, N, EmbeddingDimension]: ...
    method from_pretrained (line 192) | def from_pretrained(
    method forward (line 202) | def forward(
    method __call__ (line 205) | def __call__(
  class LayerNorm (line 211) | class LayerNorm(Module):
    method __init__ (line 212) | def __init__(
    method forward (line 220) | def forward(self, x: Tensor[DType, Unpack[Ts]]) -> Tensor[DType, Unpac...
    method __call__ (line 221) | def __call__(self, x: Tensor[DType, Unpack[Ts]]) -> Tensor[DType, Unpa...
  class AdaptiveAvgPool2d (line 223) | class AdaptiveAvgPool2d(Module, Generic[H, W]):
    method __new__ (line 225) | def __new__(
    method __new__ (line 230) | def __new__(
    method __new__ (line 235) | def __new__(
    method __new__ (line 240) | def __new__(
    method forward (line 244) | def forward(self, x: Tensor[DType, Unpack[Ts]]) -> Tensor[DType, Unpac...
    method __call__ (line 246) | def __call__(
    method __call__ (line 250) | def __call__(
    method __call__ (line 254) | def __call__(
  class ModuleList (line 258) | class ModuleList(Module):
    method __init__ (line 259) | def __init__(self, modules: Optional[Iterable[Module]] = ...) -> None:...
    method __iter__ (line 260) | def __iter__(self) -> Iterator[Module]: ...
    method __len__ (line 261) | def __len__(self) -> int: ...
  class Parameter (line 263) | class Parameter(Tensor[DType, Unpack[Ts]]):
    method __init__ (line 264) | def __init__(

FILE: stubs/torch/nn/functional.pyi
  function pad (line 26) | def pad(
  function pad (line 33) | def pad(

FILE: stubs/torch/nn/functional/__init__.pyi
  function pad (line 26) | def pad(
  function pad (line 33) | def pad(

FILE: stubs/torch/nn/init.pyi
  function _calculate_fan_in_and_fan_out (line 15) | def _calculate_fan_in_and_fan_out(tensor: Tensor) -> Tuple[int, int]: ...
  function constant_ (line 16) | def constant_(
  function kaiming_uniform_ (line 19) | def kaiming_uniform_(
  function normal_ (line 22) | def normal_(
  function uniform_ (line 25) | def uniform_(
  function _no_grad_uniform_ (line 28) | def _no_grad_uniform_(tensor: Tensor, a, b): ...
  function xavier_uniform_ (line 29) | def xavier_uniform_(tensor: Tensor, gain: float = ...) -> Tensor: ...

FILE: stubs/torch/nn/utils/__init__.pyi
  function clip_grad_norm_ (line 16) | def clip_grad_norm_(
  function clip_grad_value_ (line 22) | def clip_grad_value_(

FILE: stubs/torch/ops.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/optim/__init__.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/profiler/__init__.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/random/__init__.pyi
  function initial_seed (line 8) | def initial_seed() -> int: ...
  function __getattr__ (line 9) | def __getattr__(name) -> Any: ...

FILE: stubs/torch/sparse/__init__.pyi
  function softmax (line 20) | def softmax(
  function softmax (line 24) | def softmax(

FILE: stubs/torch_stub_tests.py
  function test_sin (line 34) | def test_sin() -> None:
  function test_unsqueeze (line 54) | def test_unsqueeze() -> None:
  function test_unsqueeze_ (line 73) | def test_unsqueeze_() -> None:
  function test_squeeze_ (line 83) | def test_squeeze_() -> None:
  function test_squeeze (line 100) | def test_squeeze() -> None:
  function test_repeat (line 117) | def test_repeat() -> None:
  function test_multiply (line 140) | def test_multiply() -> None:
  function test_floor_division (line 187) | def test_floor_division() -> None:
  function test_division (line 220) | def test_division() -> None:
  function test_setitem (line 257) | def test_setitem() -> None:
  function test_arange (line 262) | def test_arange(n: N) -> None:
  function test_embedding (line 284) | def test_embedding() -> None:
  function test_init_normal (line 307) | def test_init_normal() -> None:
  function test_view (line 317) | def test_view() -> None:
  function test_reshape (line 350) | def test_reshape() -> None:
  function test_transpose (line 364) | def test_transpose() -> None:
  function test_flatten (line 389) | def test_flatten() -> None:
  function test_empty (line 424) | def test_empty() -> None:
  function test_empty_like (line 460) | def test_empty_like() -> None:
  function test_randn (line 482) | def test_randn() -> None:
  function test_all (line 512) | def test_all() -> None:
  function test_where (line 526) | def test_where() -> None:
  function test_getitem (line 551) | def test_getitem() -> None:
  function test_expand (line 575) | def test_expand() -> None:
  function test_to (line 588) | def test_to() -> None:
  function test_Linear_to (line 606) | def test_Linear_to() -> None:
  function test_Module_eval (line 613) | def test_Module_eval() -> None:
  function test_Module_train (line 618) | def test_Module_train() -> None:
  function test_Linear_bias (line 624) | def test_Linear_bias() -> None:
  function test_sum (line 630) | def test_sum() -> None:
  function test_cumsum (line 647) | def test_cumsum() -> None:
  function test_contiguous (line 663) | def test_contiguous() -> None:
  function test_diff (line 671) | def test_diff() -> None:
  function test_argsort (line 684) | def test_argsort() -> None:
  function test_functional_pad (line 705) | def test_functional_pad() -> None:
  function test_allclose (line 715) | def test_allclose() -> None:
  function test_new_ones (line 725) | def test_new_ones() -> None:
  function test_ones_like (line 736) | def test_ones_like() -> None:
  function test_sparse_softmax (line 755) | def test_sparse_softmax() -> None:
  function test_eye (line 769) | def test_eye() -> None:
  function test_adaptive_average_pool2d (line 779) | def test_adaptive_average_pool2d() -> None:
  function test_randperm (line 801) | def test_randperm() -> None:
  function test_sqrt (line 809) | def test_sqrt() -> None:
  function test_multinomial (line 819) | def test_multinomial() -> None:
  function test_bmm (line 833) | def test_bmm() -> None:
  function test_subtract (line 847) | def test_subtract() -> None:
  function test_add (line 875) | def test_add() -> None:
  function test_torch_fft (line 899) | def test_torch_fft() -> None:
  function test_torch_real (line 907) | def test_torch_real() -> None:
  function test_logical_and (line 920) | def test_logical_and() -> None:
  function test_and (line 940) | def test_and() -> None:
  function test_linalg_pinv (line 960) | def test_linalg_pinv() -> None:
  function test_linalg_qr (line 980) | def test_linalg_qr() -> None:
  function test_torch_matmul (line 997) | def test_torch_matmul() -> None:
  function test_torch_optim (line 1016) | def test_torch_optim() -> None:
  function test_torch_cuda (line 1021) | def test_torch_cuda() -> None:
  function test_torch_profiler (line 1025) | def test_torch_profiler() -> None:
  function test_mse_loss (line 1029) | def test_mse_loss() -> None:
  function test_clip_grad_norm (line 1040) | def test_clip_grad_norm() -> None:
  function test_clip_grad_value (line 1052) | def test_clip_grad_value() -> None:
  function test_bitwise_not (line 1057) | def test_bitwise_not() -> None:
  function test_cdist (line 1071) | def test_cdist() -> None:
  function test_random_manual_seed (line 1086) | def test_random_manual_seed() -> None:
  function test_clone (line 1090) | def test_clone() -> None:
  function test_equal (line 1101) | def test_equal() -> None:
  function test_diag_embed (line 1117) | def test_diag_embed() -> None:
  function test_unbind (line 1122) | def test_unbind() -> None:
  function test_size (line 1137) | def test_size() -> None:
  function test_stack (line 1151) | def test_stack(
  function test_repeat_interleave (line 1172) | def test_repeat_interleave() -> None:
  function test_meshgrid (line 1198) | def test_meshgrid() -> None:
  function test_argmax (line 1227) | def test_argmax() -> None:
  function test_argmin (line 1258) | def test_argmin() -> None:
  function test_mean (line 1289) | def test_mean() -> None:
  function test_count_nonzero (line 1318) | def test_count_nonzero() -> None:
  function test_cat (line 1334) | def test_cat() -> None:
  function test_sign (line 1376) | def test_sign() -> None:
  function test_diagonal (line 1384) | def test_diagonal() -> None:
  function test_diag (line 1389) | def test_diag() -> None:
  function test_module_list (line 1394) | def test_module_list() -> None:
  function test_sparse_coo_tensor (line 1404) | def test_sparse_coo_tensor() -> None:
  function test_max (line 1418) | def test_max() -> None:
  function test_einsum (line 1453) | def test_einsum() -> None:
  function test_type_as (line 1457) | def test_type_as() -> None:
  function test_softmax (line 1463) | def test_softmax() -> None:
  function test_conv2d (line 1474) | def test_conv2d() -> None:
  function test_nn_Parameter (line 1488) | def test_nn_Parameter() -> None:
  function test_torch_datatypes (line 1496) | def test_torch_datatypes() -> None:
  function test_norm (line 1501) | def test_norm() -> None:
  function test_rand (line 1512) | def test_rand() -> None:
  function test_randint (line 1534) | def test_randint() -> None:
  function test_zeros (line 1554) | def test_zeros() -> None:
  function test_stride (line 1575) | def test_stride() -> None:
  function test_chunk (line 1586) | def test_chunk() -> None:
  function test_abs (line 1605) | def test_abs() -> None:
  function test_enable_grad (line 1613) | def test_enable_grad() -> None:
  function test_normal (line 1618) | def test_normal() -> None:
  function test_dim (line 1628) | def test_dim() -> None:
  function test_is_cuda (line 1642) | def test_is_cuda() -> None:
  function test_autograd_backward (line 1647) | def test_autograd_backward() -> None:
  function test_linalg_norm (line 1652) | def test_linalg_norm() -> None:
  function test_Sized (line 1659) | def test_Sized() -> None:
  function test_initial_seed (line 1663) | def test_initial_seed() -> None:
  function test_log_softmax (line 1667) | def test_log_softmax() -> None:
  function test_masked_select (line 1678) | def test_masked_select() -> None:
  function test__lt__ (line 1687) | def test__lt__() -> None:
  function test_pow (line 1693) | def test_pow() -> None:
  function test_item (line 1701) | def test_item() -> None:
  function test_uniform_ (line 1711) | def test_uniform_() -> None:
  function test_kaiming_uniform_ (line 1720) | def test_kaiming_uniform_() -> None:
  function test_constant_ (line 1729) | def test_constant_() -> None:
  function test_leaky_relu (line 1736) | def test_leaky_relu() -> None:
  function test_fft_fft2 (line 1747) | def test_fft_fft2() -> None:
  function test_real (line 1754) | def test_real() -> None:
  function test_Tensor_init (line 1767) | def test_Tensor_init() -> None:
  function test_reflection_pad2d (line 1778) | def test_reflection_pad2d() -> None:
  function test_half (line 1789) | def test_half() -> None:
  function test_is_contiguous (line 1797) | def test_is_contiguous() -> None:
  function test_scatter (line 1802) | def test_scatter() -> None:
  function test_scatter_ (line 1814) | def test_scatter_() -> None:
  function test_bool (line 1826) | def test_bool() -> None:

FILE: stubs/tqdm.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/triton/__init__.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/triton/language.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: stubs/triton/ops/blocksparse.pyi
  function __getattr__ (line 8) | def __getattr__(name) -> Any: ...

FILE: tests/multiprocessing_utils.py
  class SafeMpContext (line 17) | class SafeMpContext(multiprocessing.context.BaseContext):
    method __init__ (line 18) | def __init__(self) -> None:
    method Process (line 22) | def Process(self, *args, **kwargs) -> multiprocessing.context.SpawnPro...
    method kill_all_processes (line 28) | def kill_all_processes(self):
    method log_bad_exit_codes (line 43) | def log_bad_exit_codes(self):
    method __getattr__ (line 58) | def __getattr__(self, name: str):
    method __enter__ (line 61) | def __enter__(self):
    method __exit__ (line 64) | def __exit__(self, exc_type, exc_val, exc_tb):
  function init_process_group (line 69) | def init_process_group(init_method: str, rank: int, world_size: int):
  function _launch_subprocesses_fn_wrapper (line 87) | def _launch_subprocesses_fn_wrapper(
  function get_global_pool_allocator (line 121) | def get_global_pool_allocator(
  class ProcessPoolExecutorManager (line 142) | class ProcessPoolExecutorManager:
    method __init__ (line 143) | def __init__(self, world_size: int):
    method __enter__ (line 146) | def __enter__(self):
    method submit (line 155) | def submit(self, fn, *args, **kwargs):
    method __exit__ (line 158) | def __exit__(self, exc_type, exc_val, exc_tb):
  function launch_subprocesses (line 189) | def launch_subprocesses(world_size: int, fn, *args, **kwargs):

FILE: tests/test_attention_patterns.py
  function _local_1d_pattern (line 15) | def _local_1d_pattern(attn_size: int, window_size: int) -> torch.Tensor:
  function _generate_2d_grid (line 29) | def _generate_2d_grid(H, W):
  function _horizontal_axial_2d_distance (line 36) | def _horizontal_axial_2d_distance(H, W, p=2.0):
  function _vertical_axial_2d_distance (line 43) | def _vertical_axial_2d_distance(H, W, p=2.0):
  function _local_2d_distance (line 50) | def _local_2d_distance(H, W, p=2.0):
  function _local_2d_gaussian_distribution (line 58) | def _local_2d_gaussian_distribution(H, W, sigma=1.0):
  function test_local_1d_pattern (line 66) | def test_local_1d_pattern(attn_size, window_size):
  function test_horizontal_axial_2d_distance (line 75) | def test_horizontal_axial_2d_distance(H, W, p):
  function test_vertical_axial_2d_distance (line 84) | def test_vertical_axial_2d_distance(H, W, p):
  function test_local_2d_distance (line 93) | def test_local_2d_distance(H, W, p):
  function test_local_2d_gaussian_distribution (line 102) | def test_local_2d_gaussian_distribution(H, W, sigma):
  function test_swin_attention_pattern (line 111) | def test_swin_attention_pattern(H, W, window_size):
  function test_dilated_2d_pattern (line 156) | def test_dilated_2d_pattern(H, W, k):
  function test_pattern_to_layout (line 174) | def test_pattern_to_layout():
  function test_alibi_pattern (line 209) | def test_alibi_pattern():
  function test_layout_to_pattern (line 215) | def test_layout_to_pattern():

FILE: tests/test_checkpoint.py
  function _relu_policy (line 32) | def _relu_policy(ctx, func, *args, **kwargs):
  function _all_policy (line 36) | def _all_policy(ctx, func, *args, **kwargs):
  function test_checkpoint (line 44) | def test_checkpoint(policy_fn, input_requires_grad, device, autocast):
  function test_checkpoint_with_grad (line 80) | def test_checkpoint_with_grad(policy_fn, input_requires_grad, grad_mode):
  function test_checkpoint_attention (line 122) | def test_checkpoint_attention(policy_fn, input_requires_grad, device, au...
  function test_list_operators (line 187) | def test_list_operators():
  function test_optimize_runtime_with_given_memory (line 222) | def test_optimize_runtime_with_given_memory(max_memory, optimal_soln):
  function _get_model_blocks (line 253) | def _get_model_blocks(num_layers, dtype, device, inplace, random, first_...
  class _Model (line 278) | class _Model(torch.nn.Module):
    method __init__ (line 279) | def __init__(self, blocks, policy_fn):
    method forward (line 284) | def forward(self, x):
  function test_optimal_checkpoint_policy (line 296) | def test_optimal_checkpoint_policy(
  function test_selective_checkpoint_wrapper_compile (line 340) | def test_selective_checkpoint_wrapper_compile(

FILE: tests/test_fmha_flop_formula.py
  function test_flop_formula (line 44) | def test_flop_formula(
  function test_mask_nonzeros (line 148) | def test_mask_nonzeros() -> None:

FILE: tests/test_fmha_merge_attentions.py
  function get_supported_attn_bias_types (line 38) | def get_supported_attn_bias_types(op):
  function test_merge_attentions_nobias (line 80) | def test_merge_attentions_nobias(
  function test_partial_paged (line 155) | def test_partial_paged(
  function test_merge_attentions_decoding (line 234) | def test_merge_attentions_decoding(
  function test_merge_attentions_sharedinput (line 377) | def test_merge_attentions_sharedinput(
  function test_merge_attentions_against_ref (line 490) | def test_merge_attentions_against_ref(bmghk: bool):
  function _merge_attentions_ref (line 513) | def _merge_attentions_ref(attn_split, lse_split):
  function test_merge_attention_with_compile (line 542) | def test_merge_attention_with_compile() -> None:
  function test_merge_training (line 574) | def test_merge_training():
  function _pad_seqdim (line 628) | def _pad_seqdim(partial: Partial, left: int, right: int) -> Partial:
  function _slice (line 633) | def _slice(partial: Partial, a: int, b: int) -> Partial:
  function test_merge_training_compile (line 638) | def test_merge_training_compile():
  function test_merge_training_zilch (line 684) | def test_merge_training_zilch():
  function test_merge_training_undilate (line 690) | def test_merge_training_undilate():

FILE: tests/test_fwbw_overlap.py
  function test_fwbw_overlap (line 19) | def test_fwbw_overlap() -> None:
  function test_fwbw_nothing_to_overlap (line 116) | def test_fwbw_nothing_to_overlap() -> None:
  class ExceptionInBW (line 130) | class ExceptionInBW(Exception):
  class ExceptionInBWOp (line 134) | class ExceptionInBWOp(torch.autograd.Function):
    method forward (line 136) | def forward(ctx: Any, x: torch.Tensor) -> torch.Tensor:
    method backward (line 140) | def backward(ctx: Any, gx: torch.Tensor) -> torch.Tensor:  # type: ignore
  function test_exception_in_bw_pass (line 144) | def test_exception_in_bw_pass() -> None:
  function test_exception_in_first_bw_pass (line 164) | def test_exception_in_first_bw_pass() -> None:

FILE: tests/test_indexing.py
  function test_scaled_index_add (line 25) | def test_scaled_index_add(out_shape, with_scaling: bool) -> None:
  function test_index_select_cat (line 80) | def test_index_select_cat(D, batches) -> None:

FILE: tests/test_mem_eff_attention.py
  function _filter_unsupported_ops (line 99) | def _filter_unsupported_ops(ops: Sequence[T]) -> List[T]:
  function sample_random_supported_fw (line 122) | def sample_random_supported_fw(
  function generate_test_shapes_B_Mq_Mkv_H_K_Kv (line 143) | def generate_test_shapes_B_Mq_Mkv_H_K_Kv(op):
  function make_id (line 214) | def make_id(op, device, dtype, bias_type, *shape):
  function get_supported_attn_bias_types (line 226) | def get_supported_attn_bias_types(op):
  function _generate_op_device_dtype_biasT_B_Mq_Mkv_H_K_Kv (line 249) | def _generate_op_device_dtype_biasT_B_Mq_Mkv_H_K_Kv(
  function _rand_partition (line 346) | def _rand_partition(r: random.Random, total: int, n: int) -> List[int]:
  function get_bias_grad (line 355) | def get_bias_grad(attn_bias, clear: bool = False) -> Optional[torch.Tens...
  function create_tensors (line 367) | def create_tensors(
  function bmhk2bmk (line 483) | def bmhk2bmk(tensor) -> torch.Tensor:
  function bmk2bmhk (line 491) | def bmk2bmhk(tensor, num_heads: int) -> torch.Tensor:
  function nanify_oob_seqlen (line 497) | def nanify_oob_seqlen(x: torch.Tensor) -> torch.Tensor:
  function test_forward (line 510) | def test_forward(opFW_device_dtype_biasT_B_Mq_Mkv_H_K_Kv, packed, fmt, *...
  function _block_diag_reshape_lse (line 612) | def _block_diag_reshape_lse(
  function test_logsumexp (line 624) | def test_logsumexp(opFW_device_dtype_biasT_B_Mq_Mkv_H_K_Kv):
  function test_logsumexp_mqa (line 706) | def test_logsumexp_mqa(op):
  function test_backward (line 749) | def test_backward(
  function _vec_binom_test (line 948) | def _vec_binom_test(x, n, p):
  function _get_drop_mask (line 975) | def _get_drop_mask(op, batch_size, q_len, kv_len, p, device):
  function test_dropout_ck (line 994) | def test_dropout_ck(q_len, kv_len, batch_size, k_len, p, seed, attn_bias):
  function test_dropout_backward_ck (line 1052) | def test_dropout_backward_ck(q_len, kv_len, batch_size, k, p):
  function test_lowlevel_api_shapes (line 1125) | def test_lowlevel_api_shapes(opBW_device_dtype_biasT_B_Mq_Mkv_H_K_Kv, fmt):
  function test_cuda_streams (line 1152) | def test_cuda_streams(
  function test_custom_scale (line 1219) | def test_custom_scale(opBW_device_dtype_biasT_B_Mq_Mkv_H_K_Kv):
  function apply_attention (line 1283) | def apply_attention(query, key, value, attn_bias, op_fw, proj):
  function test_grad_checkpointing (line 1296) | def test_grad_checkpointing(
  function test_unsupported_cpu (line 1370) | def test_unsupported_cpu(op: Type[fmha.AttentionFwOpBase]):
  function test_unsupported_stride_lastdim (line 1378) | def test_unsupported_stride_lastdim(op: Type[fmha.AttentionFwOpBase]):
  function test_unsupported_stride_alignment (line 1395) | def test_unsupported_stride_alignment(op: Type[fmha.AttentionFwOpBase]):
  function test_unsupported_dropout_combine_flash_cutlass (line 1409) | def test_unsupported_dropout_combine_flash_cutlass() -> None:
  function test_attn_bias_causal (line 1425) | def test_attn_bias_causal() -> None:
  function test_attn_bias_torch_tensor (line 1440) | def test_attn_bias_torch_tensor() -> None:
  function test_attn_bias_blockdiag (line 1450) | def test_attn_bias_blockdiag() -> None:
  function test_attn_bias_blockdiag_batched (line 1472) | def test_attn_bias_blockdiag_batched() -> None:
  function test_attn_bias_blockdiag_crossattn_causal (line 1496) | def test_attn_bias_blockdiag_crossattn_causal() -> None:
  function test_attn_bias_blockdiag_crossattn_causal_with_prefix_qk_cond (line 1539) | def test_attn_bias_blockdiag_crossattn_causal_with_prefix_qk_cond() -> N...
  function test_attn_bias_blockdiag_crossattn_causal_with_prefix (line 1553) | def test_attn_bias_blockdiag_crossattn_causal_with_prefix() -> None:
  function test_attn_bias_padded (line 1589) | def test_attn_bias_padded() -> None:
  function _kv_heads_label (line 1647) | def _kv_heads_label(kv_heads: Optional[int]) -> str:
  function _test_decoder (line 1655) | def _test_decoder(
  function test_triton_splitk_decoder (line 1767) | def test_triton_splitk_decoder(
  function test_ck_splitk_decoder (line 1797) | def test_ck_splitk_decoder(
  function test_triton_splitk_decoder_manyqueries (line 1832) | def test_triton_splitk_decoder_manyqueries(
  function test_attn_bias_from_seqlens (line 1854) | def test_attn_bias_from_seqlens() -> None:
  function test_attn_bias_blockdiag_doc (line 1862) | def test_attn_bias_blockdiag_doc() -> None:
  class TestAttnBias (line 1893) | class TestAttnBias:
    method create_tensors (line 1895) | def create_tensors(
    method pad_bias (line 1912) | def pad_bias(bias: torch.Tensor) -> torch.Tensor:
    method test_f16_biasf32 (line 1920) | def test_f16_biasf32(self) -> None:
    method test_f32_biasf16 (line 1929) | def test_f32_biasf16(self) -> None:
    method test_wrong_alignment (line 1938) | def test_wrong_alignment(self, dtype) -> None:
    method test_permuted_attn_bias (line 1963) | def test_permuted_attn_bias(self) -> None:
  function test_window_size_materialize (line 1995) | def test_window_size_materialize() -> None:
  function test_forward_gqa (line 2041) | def test_forward_gqa(opFW_biasT, Mq: int):
  function test_backward_gqa (line 2074) | def test_backward_gqa(opBW):
  function test_forward_gqa_one_group (line 2121) | def test_forward_gqa_one_group(opFW):
  function test_flash_gqa_wrong_strides (line 2147) | def test_flash_gqa_wrong_strides() -> None:
  function _dispatches_to_splitK (line 2174) | def _dispatches_to_splitK(q, kv):
  function _dispatches_to_flash_decoding (line 2181) | def _dispatches_to_flash_decoding(q, kv):
  function test_dispatch_decoding_bmhk (line 2188) | def test_dispatch_decoding_bmhk() -> None:
  function test_dispatch_decoding_bmghk (line 2211) | def test_dispatch_decoding_bmghk() -> None:
  function test_forward_splitk (line 2268) | def test_forward_splitk(
  function test_mqa_decoding (line 2292) | def test_mqa_decoding(op: Type[fmha.AttentionFwOpBase], dtype, B_Mkv_H_K):
  function test_empty_tensors_empty_query (line 2315) | def test_empty_tensors_empty_query(
  function test_empty_tensors_empty_kv (line 2340) | def test_empty_tensors_empty_kv(
  function test_empty_tensors_empty_b (line 2367) | def test_empty_tensors_empty_b(
  function test_local_attn_bias (line 2387) | def test_local_attn_bias() -> None:
  function test_paged_attention (line 2415) | def test_paged_attention(
  function test_paged_attention_ck (line 2439) | def test_paged_attention_ck(B, MAX_T: int, page_size: int, gappy: bool):
  function test_paged_attention_flash (line 2458) | def test_paged_attention_flash(B, MAX_T: int, page_size: int):
  function test_paged_attention_flash3 (line 2479) | def test_paged_attention_flash3(
  function paged_attention_run_inner (line 2491) | def paged_attention_run_inner(
  function test_memeff_compile (line 2754) | def test_memeff_compile(bias_t, create_bias_inside_compiled: bool, op) -...
  function test_triton_splitk_rowwise_fp8 (line 2823) | def test_triton_splitk_rowwise_fp8(
  function fp8_per_head_quantize (line 2882) | def fp8_per_head_quantize(
  function test_fp8_attention (line 2908) | def test_fp8_attention(dtype_init, deterministic, causal, B, nheads, seq...
  function _pack_xformer_input (line 2949) | def _pack_xformer_input(
  function test_fav3_kvsplit_attn (line 2994) | def test_fav3_kvsplit_attn(
  function test_nans_in_padding (line 3051) | def test_nans_in_padding(op):

FILE: tests/test_multiprocessing_utils.py
  function inner_test (line 12) | def inner_test(present_parent_keys: List[str] = [], absent_parent_keys: ...
  function test_env_vars (line 32) | def test_env_vars():

FILE: tests/test_profiler.py
  function test_profiler_dispatcher_stream_workaround (line 34) | def test_profiler_dispatcher_stream_workaround() -> None:
  function test_profiler_overhead (line 59) | def test_profiler_overhead(device_bs_mm) -> None:
  function assert_flops (line 114) | def assert_flops(
  function test_analyze_prof (line 157) | def test_analyze_prof(dtype) -> None:
  function test_analyze_prof_sdpa (line 179) | def test_analyze_prof_sdpa(dtype, backend, causal: bool) -> None:
  function test_analyze_prof_memeff (line 209) | def test_analyze_prof_memeff(op, causal: bool) -> None:

FILE: tests/test_rmsnorm.py
  class RMSNormPytorch (line 26) | class RMSNormPytorch(torch.nn.Module):
    method __init__ (line 27) | def __init__(self, dim: int, include_weight: bool = True, eps: float =...
    method _norm (line 35) | def _norm(self, x):
    method forward (line 38) | def forward(self, x):
  function test_forward (line 48) | def test_forward(K: int, dtype: str):
  function test_increment (line 80) | def test_increment(K: int, include_weight: bool, dtype: str):

FILE: tests/test_rope_padded.py
  function apply_scaling (line 26) | def apply_scaling(
  function _slow_rope (line 53) | def _slow_rope(
  function _slow_rope2 (line 107) | def _slow_rope2(
  function test_consistency (line 166) | def test_consistency(
  function test_rope_prefill (line 269) | def test_rope_prefill(seqlen) -> None:
  function test_rope_seqpos (line 302) | def test_rope_seqpos() -> None:

FILE: tests/test_seqpar.py
  function reference_leading (line 31) | def reference_leading(input_, w1, w2):
  function reference_trailing (line 37) | def reference_trailing(hidden, w):
  function xformers_leading (line 42) | def xformers_leading(input_, w1, w2, *, fuse, group):
  function xformers_trailing (line 48) | def xformers_trailing(hidden, w, *, fuse, group):
  function inner_seqpar (line 54) | def inner_seqpar(
  function test_seqpar (line 266) | def test_seqpar(

FILE: tests/test_sequence_parallel_fused_ops.py
  function compare_fused_and_non_fused_ops (line 29) | def compare_fused_and_non_fused_ops(
  function inner_sequence_parallel_fused (line 117) | def inner_sequence_parallel_fused(
  function test_sequence_parallel_fused (line 178) | def test_sequence_parallel_fused(
  function inner_sequence_parallel_fused_handle_all_dtypes (line 195) | def inner_sequence_parallel_fused_handle_all_dtypes(
  function test_sequence_parallel_fused_handle_all_dtypes (line 229) | def test_sequence_parallel_fused_handle_all_dtypes(

FILE: tests/test_sparse_tensors.py
  function _create_blocksparse_tensor (line 23) | def _create_blocksparse_tensor(
  function _create_tensor (line 36) | def _create_tensor(tensor_type, device, dtype, shape, sparsity):
  function _seed (line 44) | def _seed():
  function _get_dtype_atol (line 49) | def _get_dtype_atol(tensor_type, device: str):
  function test_masked_matmul (line 70) | def test_masked_matmul(tensor_type, device):
  function test_bmm (line 123) | def test_bmm(tensor_type, device):
  function test_sparse_softmax (line 173) | def test_sparse_softmax(tensor_type, device):
  function test_deepcopy (line 218) | def test_deepcopy(tensor_type, device):
  function test_module_buffer (line 239) | def test_module_buffer(tensor_type, device):

FILE: tests/test_sparsity24.py
  function test_sparse24_largest_mask_2d (line 65) | def test_sparse24_largest_mask_2d() -> None:
  function test_sparse24_causal1122 (line 82) | def test_sparse24_causal1122(dtype) -> None:
  function test_sparse24_largest_abs_values_greedy (line 103) | def test_sparse24_largest_abs_values_greedy(dtype, backend) -> None:
  function test_sparse24_largest_mask_2d_notaligned (line 123) | def test_sparse24_largest_mask_2d_notaligned(dtype) -> None:
  function test_sparse24_largest_mask_2d_big (line 131) | def test_sparse24_largest_mask_2d_big(dtype) -> None:
  function create_random_mask (line 136) | def create_random_mask(shape) -> torch.Tensor:
  function test_detach_requires_grad (line 156) | def test_detach_requires_grad() -> None:
  function test_detach2 (line 174) | def test_detach2() -> None:
  function test_meta_pack_and_reorder (line 190) | def test_meta_pack_and_reorder() -> None:
  function test_pack_tensor_according_to_mask (line 239) | def test_pack_tensor_according_to_mask() -> None:
  function test_sp24_gemm (line 279) | def test_sp24_gemm(dtype) -> None:
  function test_pack_meta_shuffle (line 302) | def test_pack_meta_shuffle(transpose: bool) -> None:
  function test_pack_both_ways_meta_correctness (line 353) | def test_pack_both_ways_meta_correctness(dtype, backend) -> None:
  function test_pack_both_ways_id (line 382) | def test_pack_both_ways_id(dtype) -> None:
  function test_pack_both_ways_edge_case1 (line 416) | def test_pack_both_ways_edge_case1(dtype) -> None:
  function test_sp24_apply (line 444) | def test_sp24_apply(dtype) -> None:
  function test_sp24_api_different_pattern (line 461) | def test_sp24_api_different_pattern(dtype) -> None:
  function test_sp24_api_different_pattern_transposed (line 479) | def test_sp24_api_different_pattern_transposed(dtype) -> None:
  function _gen4x4 (line 496) | def _gen4x4(r: random.Random):
  function _gen_24_sparsifiable_both_ways (line 512) | def _gen_24_sparsifiable_both_ways(
  function test_sp24_transpose_invariant (line 532) | def test_sp24_transpose_invariant(dtype, backend) -> None:
  function test_cusparselt_format (line 558) | def test_cusparselt_format(M: int, N: int) -> None:
  function test_sp24_matmuls (line 573) | def test_sp24_matmuls(dtype) -> None:
  function test_sp24_matmuls_mat_vec (line 593) | def test_sp24_matmuls_mat_vec() -> None:
  function test_sp24_matmuls_bmm (line 604) | def test_sp24_matmuls_bmm() -> None:
  function sparsify24_dense (line 614) | def sparsify24_dense(tensor: torch.Tensor):
  function test_sp24_api_mlp_act24_correctness (line 622) | def test_sp24_api_mlp_act24_correctness(dtype, act) -> None:
  function test_sp24_api_swiglu_correctness (line 670) | def test_sp24_api_swiglu_correctness(dtype) -> None:
  function test_not_aligned (line 726) | def test_not_aligned(dtype, M):
  function test_sparsify24_like_dense (line 740) | def test_sparsify24_like_dense(dtype, input_rowmajor, backend):
  function test_sparsify24_weights (line 756) | def test_sparsify24_weights(dtype, backend):
  class LinearW24 (line 769) | class LinearW24(torch.nn.Linear):
    method forward (line 770) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  function _workaround_cusparselt_internal_error (line 795) | def _workaround_cusparselt_internal_error() -> None:
  function test_linearw24 (line 808) | def test_linearw24(dtype, bias: bool, aligned: bool, amp: bool) -> None:
  function test_wrong_alignment_error_message (line 872) | def test_wrong_alignment_error_message() -> None:
  function test_min_alignment (line 882) | def test_min_alignment() -> None:
  function test_wrong_dtype_error_message (line 891) | def test_wrong_dtype_error_message() -> None:
  function test_linear_dispatch_inference_mode (line 902) | def test_linear_dispatch_inference_mode(backend: str, with_bias: bool) -...
  function test_sp24_meta (line 929) | def test_sp24_meta() -> None:
  function test_sp24_compile (line 939) | def test_sp24_compile(backend) -> None:
  class _TransformerFFN (line 956) | class _TransformerFFN(nn.Module):
    method __init__ (line 957) | def __init__(
    method forward (line 973) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function test_linearw24_block_compile (line 982) | def test_linearw24_block_compile() -> None:
  function test_sp24_ste (line 1018) | def test_sp24_ste():
  function test_sparsify24_ste (line 1028) | def test_sparsify24_ste(dtype):
  class _Sp24X (line 1040) | class _Sp24X(torch.autograd.Function):
    method forward (line 1042) | def forward(ctx, x):
    method backward (line 1050) | def backward(ctx, x):
  function test_compile_unflatten (line 1069) | def test_compile_unflatten():
  function _to_fp8_rowwise (line 1077) | def _to_fp8_rowwise(x: torch.Tensor, dtype) -> Tuple[torch.Tensor, torch...
  function test_sparseNM_dense (line 1087) | def test_sparseNM_dense(M: int, sort_preproc: str) -> None:
  function test_sparse24_fp8_sm90_cutlass_gemm_eye (line 1116) | def test_sparse24_fp8_sm90_cutlass_gemm_eye(
  function test_sparse24_fp8_sm90_cutlass_gemm_random_tensor (line 1151) | def test_sparse24_fp8_sm90_cutlass_gemm_random_tensor(

FILE: tests/test_splitk_reference.py
  function ref_attention_splitk_bmhk (line 18) | def ref_attention_splitk_bmhk(
  function ref_attention_splitk (line 42) | def ref_attention_splitk(
  function _kv_heads_label (line 157) | def _kv_heads_label(kv_heads: Optional[int]) -> str:
  function test_splitk_reference (line 171) | def test_splitk_reference(

FILE: tests/test_tiled_matmul.py
  function generate_test_shapes (line 31) | def generate_test_shapes(*repeats, num_shapes=5):
  function ceil_of_ratio (line 51) | def ceil_of_ratio(n, k):
  function make_operands (line 55) | def make_operands(m, n, k, *, dtype):
  function test_forward_backward (line 112) | def test_forward_backward(

FILE: tests/test_tree_attention.py
  function test_tree_attention (line 92) | def test_tree_attention(
  class SplitKAutotune (line 105) | class SplitKAutotune(fmha.triton_splitk.FwOp):
  function run_tree_attention_inner (line 109) | def run_tree_attention_inner(
  function ref_tree_attention (line 293) | def ref_tree_attention(
  function tree_attention_with_sync (line 347) | def tree_attention_with_sync(
  function test_tree_attention_metadata_full_tree (line 427) | def test_tree_attention_metadata_full_tree(depth: int, branching: int) -...
  function test_tree_attention_metadata_arbitrary_tree (line 498) | def test_tree_attention_metadata_arbitrary_tree(branching: List[int]) ->...

FILE: tests/test_triton_varargs.py
  function test_triton_varargs_kernel (line 41) | def test_triton_varargs_kernel():
  function test_triton_multiple_varargs_kernel (line 71) | def test_triton_multiple_varargs_kernel(conditional: bool):
  function test_triton_varargs_conditional (line 109) | def test_triton_varargs_conditional():
  function test_subscripting_call (line 146) | def test_subscripting_call():

FILE: tests/test_unbind.py
  function test_unbind (line 17) | def test_unbind(dim: int, contiguous: bool):
  function test_unbind_get_stack_strides (line 54) | def test_unbind_get_stack_strides(dim: int, contiguous: bool):

FILE: tests/utils.py
  function use_cpu_ref (line 44) | def use_cpu_ref(device: str):
  function maybe_use_cpu_ref (line 48) | def maybe_use_cpu_ref(fn):
  function disable_tf32 (line 79) | def disable_tf32(fn):
  function assert_allclose (line 105) | def assert_allclose(
  function construct_fp8_attention_inputs (line 134) | def construct_fp8_attention_inputs(
  function _combine_scale_shift (line 311) | def _combine_scale_shift(scale: torch.Tensor, shift: torch.Tensor) -> to...
  function quantize_fp8_asymmetric (line 320) | def quantize_fp8_asymmetric(
  function dequantize_fp8_asymmetric (line 337) | def dequantize_fp8_asymmetric(

FILE: xformers/__init__.py
  function compute_once (line 32) | def compute_once(func):
  function _is_triton_available (line 45) | def _is_triton_available():
  function get_python_lib (line 69) | def get_python_lib():

FILE: xformers/_cpp_lib.py
  class _BuildInfo (line 23) | class _BuildInfo:
    method cuda_version (line 27) | def cuda_version(self) -> Optional[int]:
    method hip_version (line 31) | def hip_version(self) -> Optional[int]:
    method torch_version (line 35) | def torch_version(self) -> str:
    method python_version (line 39) | def python_version(self) -> str:
    method flash_version (line 43) | def flash_version(self) -> str:
    method use_torch_flash (line 47) | def use_torch_flash(self) -> bool:
    method build_env (line 51) | def build_env(self) -> Dict[str, Any]:
  class xFormersWasNotBuiltException (line 55) | class xFormersWasNotBuiltException(Exception):
    method __str__ (line 56) | def __str__(self) -> str:
  class xFormersInvalidLibException (line 65) | class xFormersInvalidLibException(Exception):
    method __init__ (line 66) | def __init__(self, build_info: Optional[_BuildInfo]) -> None:
    method __str__ (line 69) | def __str__(self) -> str:
  function _register_extensions (line 85) | def _register_extensions():

FILE: xformers/_deprecation_warning.py
  function deprecated_function (line 9) | def deprecated_function(self):

FILE: xformers/attn_bias_utils.py
  function _create_aligned_bias (line 16) | def _create_aligned_bias(*shape: int, **kwargs) -> torch.Tensor:
  function create_attn_bias (line 30) | def create_attn_bias(
  function _rand_seqlens (line 263) | def _rand_seqlens(
  function _rand_maxed_partition (line 336) | def _rand_maxed_partition(
  function _rand_seqlens_padded_k (line 354) | def _rand_seqlens_padded_k(
  function ref_attention (line 374) | def ref_attention(q, k, v, attn_bias=None, drop_mask=None, p=0.0, scale=...
  function ref_attention_bmhk (line 432) | def ref_attention_bmhk(q, k, v, attn_bias, scale=None) -> torch.Tensor:
  function pack_kv_cache (line 451) | def pack_kv_cache(

FILE: xformers/benchmarks/benchmark_attn_decoding.py
  function quantize_kv_int4 (line 38) | def quantize_kv_int4(k: torch.Tensor, num_groups: int = 1) -> torch.Tensor:
  class AttentionDecodingBase (line 75) | class AttentionDecodingBase:
    method __init__ (line 78) | def __init__(
    method get_inputs (line 149) | def get_inputs(self):
    method fw (line 155) | def fw(self) -> None:
  class AttentionDecodingCUTLASS (line 164) | class AttentionDecodingCUTLASS(AttentionDecodingBase):
  class AttentionDecodingCK (line 168) | class AttentionDecodingCK(AttentionDecodingBase):
    method __init__ (line 171) | def __init__(
  class AttentionDecodingSplitKV (line 239) | class AttentionDecodingSplitKV(AttentionDecodingBase):
  class AttentionDecodingCKSplitKV (line 243) | class AttentionDecodingCKSplitKV(AttentionDecodingBase):
  class AttentionDecodingSplitInt4KV (line 247) | class AttentionDecodingSplitInt4KV(AttentionDecodingBase):
    method __init__ (line 250) | def __init__(
  class AttentionDecodingPyTorchRepeat (line 335) | class AttentionDecodingPyTorchRepeat(AttentionDecodingBase):
    method fw (line 336) | def fw(self) -> None:
  class AttentionDecodingFlashAttention (line 369) | class AttentionDecodingFlashAttention(AttentionDecodingBase):
    method fw (line 370) | def fw(self) -> None:
  function get_benchmark_names (line 405) | def get_benchmark_names():
  function test_flash_attention_decoder (line 416) | def test_flash_attention_decoder(name, case):
  function main (line 453) | def main() -> None:

FILE: xformers/benchmarks/benchmark_indexing.py
  class ScaledIndexAddBenchmark (line 50) | class ScaledIndexAddBenchmark:
    method __init__ (line 51) | def __init__(self, dtype, scaling: bool, shape, bw: bool) -> None:
    method fw (line 76) | def fw(self) -> None:
    method bw (line 85) | def bw(self):
  class ScaledIndexAddBenchmarkBaseline (line 93) | class ScaledIndexAddBenchmarkBaseline(ScaledIndexAddBenchmark):
    method fw (line 94) | def fw(self) -> None:
  class IndexSelectBenchmark (line 106) | class IndexSelectBenchmark:
    method __init__ (line 107) | def __init__(self, dtype, batches, D, keep_ratio, bw: bool) -> None:
    method fw (line 131) | def fw(self) -> None:
    method bw (line 134) | def bw(self):
  class IndexSelectBenchmarkBaseline (line 140) | class IndexSelectBenchmarkBaseline(IndexSelectBenchmark):
    method fw (line 141) | def fw(self) -> None:

FILE: xformers/benchmarks/benchmark_mem_eff_attention.py
  function product_dict (line 80) | def product_dict(**kwargs):
  function create_tensors (line 142) | def create_tensors(shape_q, Hkv, dtype, requires_grad=False, packed=True):
  function mem_eff_attention_fw (line 168) | def mem_eff_attention_fw(
  function mem_eff_attention_bw (line 266) | def mem_eff_attention_bw(
  function main (line 343) | def main():

FILE: xformers/benchmarks/benchmark_merge_attentions.py
  function _merge_attentions_varargs_ref (line 12) | def _merge_attentions_varargs_ref(attn_split, lse_split):
  function benchmark_merge_attentions_backward (line 34) | def benchmark_merge_attentions_backward(split_k, B, M, G, N_H_L, D_H, dt...
  function main (line 80) | def main():

FILE: xformers/benchmarks/benchmark_sequence_parallel_fused.py
  class Scenario (line 23) | class Scenario:
  class Step (line 33) | class Step(enum.Enum):
    method __str__ (line 37) | def __str__(self):
  class Bench (line 42) | class Bench:
    method __getitem__ (line 46) | def __getitem__(self, step: Step):
  function round_up_to_nearest_multiple (line 62) | def round_up_to_nearest_multiple(n: int, m: int) -> int:
  function llama_07B_MHA (line 66) | def llama_07B_MHA(world_size: int) -> Scenario:
  function llama_07B_FFN (line 76) | def llama_07B_FFN(world_size: int) -> Scenario:
  function llama_70B_MHA (line 87) | def llama_70B_MHA(world_size: int) -> Scenario:
  function llama_70B_FFN (line 97) | def llama_70B_FFN(world_size: int) -> Scenario:
  function run_one_rank (line 120) | def run_one_rank(
  function main (line 422) | def main():

FILE: xformers/benchmarks/benchmark_sp24.py
  class Mlp (line 43) | class Mlp(nn.Module):
    method __init__ (line 46) | def __init__(
    method fw (line 65) | def fw(self):
    method bw (line 72) | def bw(self):
  class MlpDenseMask (line 76) | class MlpDenseMask(Mlp):
    method fw (line 77) | def fw(self):
  class MlpAct24 (line 89) | class MlpAct24(Mlp):
    method fw (line 90) | def fw(self):
  class LinearW24 (line 101) | class LinearW24(torch.nn.Linear):
    method forward (line 102) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  class MlpW24 (line 111) | class MlpW24(Mlp):
  class MicrobenchmarkBase (line 115) | class MicrobenchmarkBase:
    method __init__ (line 116) | def __init__(
    method bw (line 131) | def bw(self) -> None:
  class MicrobenchmarkSparsify24 (line 135) | class MicrobenchmarkSparsify24(MicrobenchmarkBase):
    method fw (line 136) | def fw(self) -> torch.Tensor:
  class MicrobenchmarkSp24ApplyDense (line 141) | class MicrobenchmarkSp24ApplyDense(MicrobenchmarkBase):
    method fw (line 142) | def fw(self) -> torch.Tensor:
  class MicrobenchmarkSp24ApplyDenseT (line 147) | class MicrobenchmarkSp24ApplyDenseT(MicrobenchmarkBase):
    method fw (line 148) | def fw(self) -> torch.Tensor:
  class MicrobenchmarkInputClone (line 153) | class MicrobenchmarkInputClone(MicrobenchmarkBase):
    method fw (line 154) | def fw(self) -> torch.Tensor:

FILE: xformers/benchmarks/benchmark_tiled_matmul.py
  function product_dict (line 34) | def product_dict(**kwargs):
  function matmul_per_tile (line 53) | def matmul_per_tile(a, b):
  function benchmark_tiled_matmul (line 64) | def benchmark_tiled_matmul(shape_name, dtype):

FILE: xformers/benchmarks/utils.py
  class NotSupportedInputError (line 34) | class NotSupportedInputError(Exception):
  function get_func_name (line 47) | def get_func_name(fn):
  function pretty_print (line 53) | def pretty_print(results, title, units) -> None:
  function pretty_plot (line 79) | def pretty_plot(
  function bench_functions (line 123) | def bench_functions(
  function pretty_barplot (line 153) | def pretty_barplot(results, title, units: str, filename=None, dash_key=""):
  function rmf (line 211) | def rmf(filename: str) -> None:
  function temp_files_ctx (line 220) | def temp_files_ctx(num: int) -> Generator:
  function _benchmark_results_from_csv (line 237) | def _benchmark_results_from_csv(filename: str) -> List[Tuple[Dict[str, A...
  function _benchmark_results_to_csv (line 280) | def _benchmark_results_to_csv(
  function _finalize_results (line 306) | def _finalize_results(results: List[Tuple[Dict[str, Any], Any]]) -> List...
  function _render_bar_plot (line 343) | def _render_bar_plot(results: List[Any], store_results_folder: str) -> N...
  function create_argparser (line 403) | def create_argparser() -> argparse.ArgumentParser:
  function benchmark_main_helper (line 438) | def benchmark_main_helper(
  function benchmark_run_and_compare (line 463) | def benchmark_run_and_compare(
  function _is_oom_error (line 643) | def _is_oom_error(e):
  function _fail_if_regressions (line 649) | def _fail_if_regressions(
  function benchmark_main_helper2 (line 707) | def benchmark_main_helper2(
  function product_dict (line 756) | def product_dict(**kwargs):

FILE: xformers/checkpoint.py
  class _NotAvailable (line 44) | class _NotAvailable:
    method __init__ (line 45) | def __init__(self, *args, **kwargs):
  class ProfileMetadata (line 71) | class ProfileMetadata:
  function _get_default_policy (line 82) | def _get_default_policy(allow_list=None):
  class VerboseTorchDispatchMode (line 98) | class VerboseTorchDispatchMode(TorchDispatchMode):
    method __init__ (line 99) | def __init__(self):
    method __torch_dispatch__ (line 102) | def __torch_dispatch__(self, func, types, args=(), kwargs=None):
  function list_operators (line 109) | def list_operators(function, *args, **kwargs):
  class CachedTorchDispatchMode (line 120) | class CachedTorchDispatchMode(_CachedTorchDispatchMode):
    method __init__ (line 121) | def __init__(self, policy_fn, storage, allow_cache_entry_mutation):
    method pop_from_storage (line 129) | def pop_from_storage(self, func, args, kwargs):
  class NullTorchDispatchMode (line 137) | class NullTorchDispatchMode(TorchDispatchMode):
    method __torch_dispatch__ (line 138) | def __torch_dispatch__(self, func, types, args=(), kwargs=None):
  function selective_checkpoint_context_fn (line 144) | def selective_checkpoint_context_fn(policy_fn=None):
  function checkpoint (line 175) | def checkpoint(
  class ProfileOperatorsTorchDispatchMode (line 209) | class ProfileOperatorsTorchDispatchMode(TorchDispatchMode):
    method __init__ (line 210) | def __init__(self, num_runs: int = 10) -> None:
    method _get_inplace_metadata (line 214) | def _get_inplace_metadata(self, func, out) -> Tuple[int, int, Tuple[in...
    method __torch_dispatch__ (line 245) | def __torch_dispatch__(self, func, types, args=(), kwargs=None):
  function _analyze_operators (line 287) | def _analyze_operators(function, *args) -> List[ProfileMetadata]:
  function get_optimal_checkpoint_policy (line 309) | def get_optimal_checkpoint_policy(function, *args, memory_budget: float)...
  function _optimize_runtime_with_given_memory (line 386) | def _optimize_runtime_with_given_memory(
  class _OptimalPolicy (line 461) | class _OptimalPolicy:
    method __init__ (line 462) | def __init__(self, optim_output: torch.Tensor):
    method __call__ (line 466) | def __call__(self, ctx, func, *args, **kwargs) -> bool:
  class SelectiveCheckpointWrapper (line 475) | class SelectiveCheckpointWrapper(ActivationWrapper):
    method __init__ (line 476) | def __init__(self, mod, memory_budget=None, policy_fn=None):
    method _get_policy_fn (line 492) | def _get_policy_fn(self, *args, **kwargs):
    method get_policy_fn (line 515) | def get_policy_fn(self, *args, **kwargs):
    method forward (line 520) | def forward(self, *args, **kwargs):
  function selective_checkpoint_wrapper (line 527) | def selective_checkpoint_wrapper(

FILE: xformers/components/attention/attention_patterns.py
  function _generate_nd_grid (line 15) | def _generate_nd_grid(*sizes):
  function local_nd_distance (line 20) | def local_nd_distance(*sizes, p=2.0, weights=None):
  function local_nd_gaussian_distribution (line 31) | def local_nd_gaussian_distribution(*sizes, sigma=1):
  function local_nd_pattern (line 37) | def local_nd_pattern(*sizes, distance, p=2.0):
  function axial_nd_pattern (line 42) | def axial_nd_pattern(*sizes):
  function random_pattern_from_probability_matrix (line 48) | def random_pattern_from_probability_matrix(dist_matrix, nnz):
  function global_token_pattern (line 69) | def global_token_pattern(attention_query_mask: torch.Tensor) -> torch.Te...
  function random_pattern (line 77) | def random_pattern(attn_size: int, sparsity: float) -> torch.Tensor:
  function local_1d_pattern (line 84) | def local_1d_pattern(attn_size: int, window_size: int) -> torch.Tensor:
  function causal_1d_pattern (line 92) | def causal_1d_pattern(attn_size: int) -> torch.Tensor:
  function horizontal_axial_2d_distance (line 98) | def horizontal_axial_2d_distance(H, W, p=2.0):
  function vertical_axial_2d_distance (line 103) | def vertical_axial_2d_distance(H, W, p=2.0):
  function local_2d_distance (line 108) | def local_2d_distance(H, W, p=2.0):
  function local_2d_gausian_distribution (line 112) | def local_2d_gausian_distribution(H, W, sigma=1):
  function local_2d_pattern (line 116) | def local_2d_pattern(H, W, distance, p=2.0):
  function axial_2d_pattern (line 120) | def axial_2d_pattern(H, W):
  function swin_attention_pattern (line 124) | def swin_attention_pattern(H, W, window_size, shift_size=0):
  function dilated_2d_pattern (line 155) | def dilated_2d_pattern(H, W, k=2):
  function block_sparsify_tensor (line 168) | def block_sparsify_tensor(x, mask, block_size):
  function pattern_to_layout (line 186) | def pattern_to_layout(mask: torch.Tensor, block_size: int) -> torch.Tensor:
  function alibi_pattern (line 214) | def alibi_pattern(threshold: float, mask_shape: torch.Size) -> torch.Ten...
  function layout_to_pattern (line 263) | def layout_to_pattern(layout: torch.Tensor, block_size: int):

FILE: xformers/csrc/attention/attention.cpp
  function STABLE_TORCH_LIBRARY_FRAGMENT (line 10) | STABLE_TORCH_LIBRARY_FRAGMENT(xformers, m) {

FILE: xformers/csrc/attention/hip_decoder/attention_forward_splitk.cpp
  type c10_to_data_t (line 26) | struct c10_to_data_t
  type c10_to_data_t<float> (line 28) | struct c10_to_data_t<float> {
  type c10_to_data_t<c10::Half> (line 33) | struct c10_to_data_t<c10::Half> {
  type c10_to_data_t<c10::BFloat16> (line 38) | struct c10_to_data_t<c10::BFloat16> {
  function instantiate_and_launch_kernels (line 58) | void instantiate_and_launch_kernels(
  function efficient_attention_forward_decoder_splitk_ck_impl (line 251) | at::Tensor efficient_attention_forward_decoder_splitk_ck_impl(
  function efficient_attention_forward_decoder_splitk_ck (line 293) | at::Tensor efficient_attention_forward_decoder_splitk_ck(
  function TORCH_LIBRARY_IMPL (line 312) | TORCH_LIBRARY_IMPL(xformers, CUDA, m) {

FILE: xformers/csrc/attention/hip_decoder/ck_tile_attention_forward_decoder_splitk.h
  function a_u (line 18) | union {
  function __device__ (line 32) | __device__ __forceinline__ wavefrontReduce(float val, F f) {
  function load_v (line 41) | void load_v(
  function store_v (line 49) | void store_v(
  function namespace (line 58) | namespace ck_tile {

FILE: xformers/csrc/attention/hip_decoder/ck_tile_attention_inner_product.h
  function namespace (line 11) | namespace ck_tile {

FILE: xformers/csrc/attention/hip_fmha/attention_backward_generic_ck_tiled.cpp
  function efficient_attention_backward_ck (line 36) | std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
  function efficient_attention_backward_ck_meta (line 548) | std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
  function TORCH_LIBRARY_IMPL (line 630) | TORCH_LIBRARY_IMPL(xformers, CUDA, m) {
  function TORCH_LIBRARY_IMPL (line 636) | TORCH_LIBRARY_IMPL(xformers, Meta, m) {

FILE: xformers/csrc/attention/hip_fmha/attention_ck_rand_uniform.cpp
  function rand_uniform_int (line 27) | at::Tensor rand_uniform_int(
  function TORCH_LIBRARY_IMPL (line 93) | TORCH_LIBRARY_IMPL(xformers, CUDA, m) {

FILE: xformers/csrc/attention/hip_fmha/attention_forward_generic_ck_tiled.cpp
  function efficient_attention_forward_ck (line 50) | std::tuple<at::Tensor, std::optional<at::Tensor>, int64_t, int64_t>
  function efficient_attention_forward_ck_meta (line 475) | std::tuple<at::Tensor, std::optional<at::Tensor>, int64_t, int64_t>
  function TORCH_LIBRARY_IMPL (line 523) | TORCH_LIBRARY_IMPL(xformers, CUDA, m) {
  function TORCH_LIBRARY_IMPL (line 529) | TORCH_LIBRARY_IMPL(xformers, Meta, m) {

FILE: xformers/csrc/attention/hip_fmha/ck_fmha_test.cpp
  function is_ck_fmha_available (line 14) | bool is_ck_fmha_available(double val) {
  function TORCH_LIBRARY_FRAGMENT (line 21) | TORCH_LIBRARY_FRAGMENT(xformers, m) {

FILE: xformers/csrc/attention/hip_fmha/ck_fmha_util.h
  function at (line 61) | static inline at::Tensor get_bias_4d_view(
  function get_number_of_cu (line 95) | static inline int get_number_of_cu() {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward.h
  function Run (line 58) | static void Run(BatchedBackwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward_bf16.cpp
  function batched_backward_bf16 (line 16) | void batched_backward_bf16(BatchedBackwardParams& param, hipStream_t str...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward_fp16.cpp
  function batched_backward_fp16 (line 16) | void batched_backward_fp16(BatchedBackwardParams& param, hipStream_t str...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_bf16.cpp
  function batched_forward_bf16 (line 16) | void batched_forward_bf16(BatchedForwardParams& param, hipStream_t strea...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_fp16.cpp
  function batched_forward_fp16 (line 16) | void batched_forward_fp16(BatchedForwardParams& param, hipStream_t strea...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_splitkv_smallq_dispatch.h
  function else (line 25) | struct batched_forward_splitkv_smallq_mask_bias_dropout_dispatch {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_bf16.cpp
  function batched_infer_bf16 (line 15) | void batched_infer_bf16(BatchedForwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_fp16.cpp
  function batched_infer_fp16 (line 15) | void batched_infer_fp16(BatchedForwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_splitkv_smallq_dispatch.h
  function else (line 25) | struct batched_infer_splitkv_smallq_mask_bias_dropout_dispatch {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_bwd_setting.h
  function fp16_t (line 17) | struct FmhaBwdTypeConfig<ck_tile::fp16_t> {
  function bf16_t (line 36) | struct FmhaBwdTypeConfig<ck_tile::bf16_t> {
  type FmhaBwdBlockTile (line 58) | struct FmhaBwdBlockTile
  type FmhaBwdBlockTile (line 66) | struct FmhaBwdBlockTile
  type FmhaBwdBlockTile (line 74) | struct FmhaBwdBlockTile
  type FmhaBwdBlockTile (line 82) | struct FmhaBwdBlockTile
  type FmhaBwdBlockTile (line 91) | struct FmhaBwdBlockTile
  type FmhaBwdShape (line 107) | struct FmhaBwdShape
  type FmhaBwdShape (line 121) | struct FmhaBwdShape
  type FmhaBwdShape (line 135) | struct FmhaBwdShape
  type FmhaBwdShape (line 149) | struct FmhaBwdShape
  type FmhaBwdShape (line 163) | struct FmhaBwdShape
  type FmhaBwdPipelineMaker (line 183) | struct FmhaBwdPipelineMaker
  type FmhaBwdBlockDropoutMaker (line 203) | struct FmhaBwdBlockDropoutMaker

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_setting.h
  type FmhaFwdBlockTile (line 47) | struct FmhaFwdBlockTile
  type FmhaFwdBlockTile (line 54) | struct FmhaFwdBlockTile
  type FmhaFwdShape (line 83) | struct FmhaFwdShape
  type FmhaFwdShape (line 128) | struct FmhaFwdShape
  type FmhaFwdShape (line 139) | struct FmhaFwdShape
  function get_fmha_fwd_mtile (line 177) | static int get_fmha_fwd_mtile(
  function get_fmha_fwd_least_mtile (line 195) | static int get_fmha_fwd_least_mtile() {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_selector.h
  function generate_splits_list (line 19) | static int generate_splits_list(int i) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_setting.h
  type FmhaFwdSplitKVBlockTile (line 47) | struct FmhaFwdSplitKVBlockTile
  type FmhaFwdSplitKVBlockTile (line 54) | struct FmhaFwdSplitKVBlockTile
  type FmhaFwdSplitKVShape (line 72) | struct FmhaFwdSplitKVShape
  type FmhaFwdSplitKVShape (line 117) | struct FmhaFwdSplitKVShape
  type FmhaFwdSplitKVShape (line 128) | struct FmhaFwdSplitKVShape
  function fwd_splitkv_get_mtile_size (line 153) | int fwd_splitkv_get_mtile_size() {
  function get_mtile_size_for_splitkv (line 159) | static int get_mtile_size_for_splitkv(int max_seqlen_q, int max_headdim) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_smallq_selector.h
  function use_splitkv_smallq (line 13) | static bool use_splitkv_smallq(int max_seqlen_q, int max_headdim) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_smallq_setting.h
  type FmhaFwdSplitKVSmallQBlockTile (line 19) | struct FmhaFwdSplitKVSmallQBlockTile
  type FmhaFwdSplitKVSmallQBlockTile (line 26) | struct FmhaFwdSplitKVSmallQBlockTile
  type FmhaFwdSplitKVSmallQBlockTile (line 33) | struct FmhaFwdSplitKVSmallQBlockTile
  type FmhaFwdSplitKVSmallQBlockTile (line 40) | struct FmhaFwdSplitKVSmallQBlockTile
  type FmhaFwdSplitKVSmallQBlockTile (line 47) | struct FmhaFwdSplitKVSmallQBlockTile
  type FmhaFwdSplitKVSmallQShape (line 60) | struct FmhaFwdSplitKVSmallQShape
  type FmhaFwdSplitKVSmallQShape (line 71) | struct FmhaFwdSplitKVSmallQShape
  type FmhaFwdSplitKVSmallQShape (line 82) | struct FmhaFwdSplitKVSmallQShape
  type FmhaFwdSplitKVSmallQShape (line 93) | struct FmhaFwdSplitKVSmallQShape
  type FmhaFwdSplitKVSmallQShape (line 104) | struct FmhaFwdSplitKVSmallQShape
  function get_mtile_size_for_splitkv_smallq (line 121) | static int get_mtile_size_for_splitkv_smallq(int max_headdim) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_type_config.h
  function fp16_t (line 15) | struct FmhaFwdTypeConfig<ck_tile::fp16_t> {
  function bf16_t (line 31) | struct FmhaFwdTypeConfig<ck_tile::bf16_t> {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward.h
  function Run (line 58) | static void Run(GroupedBackwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward_bf16.cpp
  function grouped_backward_bf16 (line 16) | void grouped_backward_bf16(GroupedBackwardParams& param, hipStream_t str...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward_fp16.cpp
  function grouped_backward_fp16 (line 16) | void grouped_backward_fp16(GroupedBackwardParams& param, hipStream_t str...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_bf16.cpp
  function grouped_forward_bf16 (line 16) | void grouped_forward_bf16(GroupedForwardParams& param, hipStream_t strea...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_fp16.cpp
  function grouped_forward_fp16 (line 16) | void grouped_forward_fp16(GroupedForwardParams& param, hipStream_t strea...

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_splitkv_smallq_dispatch.h
  function else (line 25) | struct grouped_forward_splitkv_smallq_mask_bias_dropout_dispatch {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_bf16.cpp
  function grouped_infer_bf16 (line 15) | void grouped_infer_bf16(GroupedForwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_fp16.cpp
  function grouped_infer_fp16 (line 15) | void grouped_infer_fp16(GroupedForwardParams& param, hipStream_t stream) {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_splitkv_smallq_dispatch.h
  function else (line 25) | struct grouped_infer_splitkv_smallq_mask_bias_dropout_dispatch {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_fmha_params.h
  type BatchedInferParams (line 12) | struct BatchedInferParams {
  function BatchedInferParams (line 42) | struct BatchedForwardParams : public BatchedInferParams {
  type GroupedInferParams (line 70) | struct GroupedInferParams {
  function GroupedInferParams (line 114) | struct GroupedForwardParams : public GroupedInferParams {
  type BatchedBackwardParams (line 144) | struct BatchedBackwardParams {
  type GroupedBackwardParams (line 202) | struct GroupedBackwardParams {

FILE: xformers/csrc/attention/hip_fmha/ck_tiled_rand_uniform_kernel.h
  function __device__ (line 25) | __device__ static constexpr auto GetBlockGemm() {
  type FmhaRandUniformCommonKargs (line 54) | struct FmhaRandUniformCommonKargs {
  function FmhaRandUniformCommonKargs (line 72) | struct FmhaRandUniformBatchModeKargs : FmhaRandUniformCommonKargs {
  function FmhaRandUniformCommonKargs (line 76) | struct FmhaRandUniformGroupModeKargs : FmhaRandUniformCommonKargs {
  function Kargs (line 99) | Kargs kargs{
  function Kargs (line 129) | Kargs kargs{

FILE: xformers/csrc/attention/hip_fmha/generate_instances.py
  function create_infer_instances (line 127) | def create_infer_instances(instance_dir: Path, headdims: List) -> None:
  function create_infer_instances_ref (line 165) | def create_infer_instances_ref(instance_dir: Path, headdims: List) -> None:
  function create_forward_instances (line 198) | def create_forward_instances(instance_dir: Path, headdims: List) -> None:
  function create_forward_instances_ref (line 236) | def create_forward_instances_ref(instance_dir: Path, headdims: List) -> ...
  function create_backward_instances (line 271) | def create_backward_instances(instance_dir: Path, headdims: List) -> None:
  function create_backward_instances_ref (line 315) | def create_backward_instances_ref(instance_dir: Path, headdims: List) ->...

FILE: xformers/csrc/pt_stable_utils.h
  function T (line 56) | inline T ceil_div(T a, T b) {
  function xf_get_layout (line 68) | inline int32_t xf_get_layout(const torch::stable::Tensor& self) {
  function xf_is_sparse (line 74) | inline bool xf_is_sparse(const torch::stable::Tensor& self) {
  function torch (line 170) | inline torch::stable::Tensor xf_new_full(
  function torch (line 198) | inline torch::stable::Tensor xf_resize_(

FILE: xformers/csrc/sparse24/compute_sparse_tile.h
  function namespace (line 13) | namespace xformers {

FILE: xformers/csrc/sparse24/sparse24.cpp
  function STABLE_TORCH_LIBRARY_FRAGMENT (line 3) | STABLE_TORCH_LIBRARY_FRAGMENT(xformers, m) {

FILE: xformers/csrc/sparse24/sparse24_metadata.h
  function namespace (line 12) | namespace xformers {
  function CUTLASS_HOST_DEVICE (line 105) | CUTLASS_HOST_DEVICE
  function CUTLASS_HOST_DEVICE (line 127) | CUTLASS_HOST_DEVICE
  function CUTLASS_HOST_DEVICE (line 136) | CUTLASS_HOST_DEVICE
  function MetadataCutlassSm80 (line 147) | struct MetadataCutlassSm80 {
  function CUTLASS_HOST_DEVICE (line 198) | CUTLASS_HOST_DEVICE
  function CUTLASS_HOST_DEVICE (line 216) | CUTLASS_HOST_DEVICE
  function CUTLASS_HOST_DEVICE (line 226) | CUTLASS_HOST_DEVICE
  type MetadataCutlass8bitsSm90 (line 242) | struct MetadataCutlass8bitsSm90 {

FILE: xformers/csrc/sparse24/sparse24_pack.h
  function namespace (line 11) | namespace xformers {

FILE: xformers/csrc/sparse24/static_sort.h
  function CUTLASS_HOST_DEVICE (line 24) | CUTLASS_HOST_DEVICE Swap(A& a, const int& i0, const int& i1) {
  function CUTLASS_HOST_DEVICE (line 31) | CUTLASS_HOST_DEVICE PB(A& a) {
  type PB (line 52) | struct PB
  function CUTLASS_HOST_DEVICE (line 53) | CUTLASS_HOST_DEVICE PB(A& a) {
  type PB (line 60) | struct PB
  function CUTLASS_HOST_DEVICE (line 61) | CUTLASS_HOST_DEVICE PB(A& a) {
  type PS (line 78) | struct PS
  function CUTLASS_HOST_DEVICE (line 79) | CUTLASS_HOST_DEVICE PS(A& a) {}

FILE: xformers/csrc/sparse24/warp_tensor.h
  function namespace (line 9) | namespace xformers {
  function TileValueOrdered1d (line 313) | struct TileValueOrdered1d {
  type Identity (line 398) | struct Identity {

FILE: xformers/fwbw_overlap.py
  class EventHandle (line 24) | class EventHandle:  # type: ignore[no-redef]
    method __init__ (line 25) | def __init__(self) -> None:
    method current_stream_wait (line 28) | def current_stream_wait(self) -> None:
  class EventOverlap (line 31) | class EventOverlap:  # type: ignore[no-redef]
    method __init__ (line 32) | def __init__(self, event: Union[EventHandle, None] = None) -> None:
    method current_stream_wait (line 35) | def current_stream_wait(self) -> None:
  class EventOverlapHolder (line 43) | class EventOverlapHolder(torch.Tensor):
    method capture (line 55) | def capture(
    method __new__ (line 68) | def __new__(
    method __init__ (line 83) | def __init__(
    method __tensor_flatten__ (line 94) | def __tensor_flatten__(self):
    method __repr__ (line 97) | def __repr__(self) -> str:  # type: ignore
    method current_stream_wait (line 100) | def current_stream_wait(self) -> None:
    method __torch_dispatch__ (line 107) | def __torch_dispatch__(
  class _ExitCompute (line 137) | class _ExitCompute(torch.autograd.Function):
    method forward (line 147) | def forward(ctx: torch.autograd.function.FunctionCtx, *tensors: torch....
    method backward (line 154) | def backward(  # type: ignore
  class _EnterCompute (line 166) | class _EnterCompute(torch.autograd.Function):
    method forward (line 176) | def forward(
    method backward (line 189) | def backward(ctx: torch.autograd.function.FunctionCtx, *gtensors: torc...
  class _FillGradientForOverlapHolder (line 198) | class _FillGradientForOverlapHolder(torch.autograd.Function):
    method forward (line 206) | def forward(
    method backward (line 220) | def backward(  # type: ignore
  function enter_comm (line 242) | def enter_comm(
  function enter_compute (line 253) | def enter_compute(
  function enter_compute (line 264) | def enter_compute(
  function enter_compute (line 273) | def enter_compute(  # type: ignore
  class PhaseBoundary (line 286) | class PhaseBoundary:
    method __post_init__ (line 293) | def __post_init__(self) -> None:
    method __str__ (line 297) | def __str__(self) -> str:
    method __call__ (line 303) | def __call__(self) -> None:
  class InitialBw (line 327) | class InitialBw:
    method __init__ (line 328) | def __init__(self, trigger_bw: Callable[[], None]) -> None:
    method __call__ (line 331) | def __call__(self) -> None:
  class _GlobalAutogradThread (line 347) | class _GlobalAutogradThread:
    method run (line 355) | def run(cls) -> None:
    method cleanup_at_exit (line 379) | def cleanup_at_exit(cls) -> None:
  function async_bw (line 387) | def async_bw(backward_fn: Callable[[], None]) -> threading.Semaphore:
  class _WaitInBW (line 397) | class _WaitInBW(torch.autograd.Function):
    method forward (line 399) | def forward(
    method backward (line 411) | def backward(ctx: torch.autograd.function.FunctionCtx, *gx: torch.Tens...
  class _CurrentForwardState (line 432) | class _CurrentForwardState:
  function before_forward (line 445) | def before_forward(record_fw_chunks: bool) -> None:
  function enter_phase (line 455) | def enter_phase(enter: str, *tensors: torch.Tensor) -> tuple[torch.Tenso...
  function flush_single_bw_chunk (line 480) | def flush_single_bw_chunk() -> bool:
  function flush_pending_bw (line 488) | def flush_pending_bw() -> None:
  function overlap_fw_bw (line 497) | def overlap_fw_bw(
  function _overlap_fw_bw (line 508) | def _overlap_fw_bw(

FILE: xformers/info.py
  function get_features_status (line 16) | def get_features_status() -> Dict[str, str]:
  function print_info (line 25) | def print_info():

FILE: xformers/ops/__init__.py
  function masked_matmul (line 44) | def masked_matmul(a, b, mask=None):

FILE: xformers/ops/_triton/k_index_select_cat.py
  function index_select_cat_fwd_kernel (line 12) | def index_select_cat_fwd_kernel(
  function index_select_cat_fwd (line 38) | def index_select_cat_fwd(
  function index_select_cat_bwd_kernel (line 83) | def index_select_cat_bwd_kernel(
  function index_select_cat_bwd (line 126) | def index_select_cat_bwd(

FILE: xformers/ops/_triton/k_scaled_index_add.py
  function scaled_index_add_fwd_kernel (line 14) | def scaled_index_add_fwd_kernel(
  function scaled_index_add_fwd (line 77) | def scaled_index_add_fwd(
  function scaled_index_add_bwd_kernel (line 176) | def scaled_index_add_bwd_kernel(
  function scaled_index_add_bwd (line 256) | def scaled_index_add_bwd(

FILE: xformers/ops/_triton/matmul_perf_model.py
  function get_clock_rate_in_khz (line 45) | def get_clock_rate_in_khz():
  function get_tensorcore_tflops (line 56) | def get_tensorcore_tflops(device, num_ctas, num_warps, dtype):
  function get_simd_tflops (line 70) | def get_simd_tflops(device, num_ctas, num_warps, dtype):
  function get_tflops (line 84) | def get_tflops(device, num_ctas, num_warps, dtype):
  function estimate_matmul_time (line 91) | def estimate_matmul_time(
  function early_config_prune (line 173) | def early_config_prune(configs, named_args, **kwargs):

FILE: xformers/ops/_triton/rmsnorm_kernels.py
  function _rms_norm_kernel (line 15) | def _rms_norm_kernel(
  function _rms_norm_add_kernel (line 51) | def _rms_norm_add_kernel(
  function _rms_norm_forward (line 94) | def _rms_norm_forward(x, attn_norm_weights, eps):
  function _rms_norm_add_forward (line 125) | def _rms_norm_add_forward(x, y, attn_norm_weights, eps):

FILE: xformers/ops/_triton/rope_padded_kernels.py
  function _rope_padded_kernel (line 14) | def _rope_padded_kernel(

FILE: xformers/ops/_triton/tiled_matmul_kernels.py
  function init_to_zero (line 20) | def init_to_zero(*names):
  function gen_config (line 28) | def gen_config(
  function our_estimate_matmul_time (line 112) | def our_estimate_matmul_time(
  function our_early_config_prune (line 129) | def our_early_config_prune(config, named_args, **kwargs):
  function _xformers_tiled_matmul_kernel (line 158) | def _xformers_tiled_matmul_kernel(
  function _check_row_or_column (line 349) | def _check_row_or_column(row_or_col_type, row_or_col_idx, tensor_name, d...
  function _get_strides (line 360) | def _get_strides(
  function _launch_triton_matmul (line 383) | def _launch_triton_matmul(

FILE: xformers/ops/common.py
  function get_operator (line 11) | def get_operator(library: str, name: str):
  function get_xformers_operator (line 23) | def get_xformers_operator(name: str):
  class BaseOperator (line 27) | class BaseOperator:
    method is_available (line 33) | def is_available(cls) -> bool:
  function register_operator (line 49) | def register_operator(cls: ClsT) -> ClsT:
  function _get_storage_base (line 63) | def _get_storage_base(x: torch.Tensor) -> int:

FILE: xformers/ops/differentiable_collectives.py
  function all_reduce (line 13) | def all_reduce(
  function gather_along_first_dim_async (line 25) | def gather_along_first_dim_async(
  function reduce_scatter_along_first_dim_async (line 43) | def reduce_scatter_along_first_dim_async(
  function gather_along_first_dim (line 63) | def gather_along_first_dim(
  function reduce_scatter_along_first_dim (line 72) | def reduce_scatter_along_first_dim(
  class _CopyToModelParallelRegion (line 83) | class _CopyToModelParallelRegion(torch.autograd.Function):
    method forward (line 85) | def forward(  # type: ignore[override]
    method backward (line 92) | def backward(  # type: ignore[override]
  function copy_to_model_parallel_region (line 99) | def copy_to_model_parallel_region(
  class _ReduceFromModelParallelRegion (line 107) | class _ReduceFromModelParallelRegion(torch.autograd.Function):
    method forward (line 109) | def forward(  # type: ignore[override]
    method backward (line 117) | def backward(  # type: ignore[override]
  function reduce_from_model_parallel_region (line 123) | def reduce_from_model_parallel_region(
  class _GatherFromSequenceParallelRegion (line 131) | class _GatherFromSequenceParallelRegion(torch.autograd.Function):
    method forward (line 133) | def forward(  # type: ignore[override]
    method backward (line 140) | def backward(  # type: ignore[override]
  function gather_from_sequence_parallel_region (line 151) | def gather_from_sequence_parallel_region(
  class _ScatterToSequenceParallelRegion (line 159) | class _ScatterToSequenceParallelRegion(torch.autograd.Function):
    method forward (line 161) | def forward(  # type: ignore[override]
    method backward (line 168) | def backward(  # type: ignore[override]
  function scatter_to_sequence_parallel_region (line 177) | def scatter_to_sequence_parallel_region(

FILE: xformers/ops/fmha/__init__.py
  function _deserialize_bias (line 55) | def _deserialize_bias(attn_bias_ctx, attn_bias_tensor: Optional[torch.Te...
  function _serialize_op (line 71) | def _serialize_op(op):
  function _unserialize_op (line 77) | def _unserialize_op(op):
  class _fMHA (line 83) | class _fMHA(torch.autograd.Function):
    method forward (line 86) | def forward(ctx, op_fw, op_bw, *args: Any) -> Any:
    method backward (line 168) | def backward(ctx, grad, grad_lse):
  function memory_efficient_attention (line 199) | def memory_efficient_attention(
  function memory_efficient_attention_forward_meta (line 332) | def memory_efficient_attention_forward_meta(q, k, v):
  function memory_efficient_attention_forward_torch_wrapper (line 339) | def memory_efficient_attention_forward_torch_wrapper(
  function memory_efficient_attention_forward (line 367) | def memory_efficient_attention_forward(
  function memory_efficient_attention_forward_requires_grad (line 395) | def memory_efficient_attention_forward_requires_grad(
  function memory_efficient_attention_backward (line 431) | def memory_efficient_attention_backward(
  function _memory_efficient_attention (line 467) | def _memory_efficient_attention(
  function _memory_efficient_attention_forward (line 485) | def _memory_efficient_attention_forward(
  function _memory_efficient_attention_forward_requires_grad (line 499) | def _memory_efficient_attention_forward_requires_grad(
  function _detect_lse_packed_or_raise (line 513) | def _detect_lse_packed_or_raise(lse: torch.Tensor, inp: Inputs) -> Optio...
  function _memory_efficient_attention_backward (line 552) | def _memory_efficient_attention_backward(
  function memory_efficient_attention_partial (line 597) | def memory_efficient_attention_partial(
  function merge_attentions (line 638) | def merge_attentions(

FILE: xformers/ops/fmha/_triton/splitk_kernels.py
  function _fwd_kernel_splitK (line 31) | def _fwd_kernel_splitK(
  function gen_config (line 589) | def gen_config(
  function _get_splitk_kernel (line 607) | def _get_splitk_kernel(num_groups):
  function early_config_prune (line 631) | def early_config_prune(configs, named_args, **kwargs):
  function autotune_kernel (line 643) | def autotune_kernel(kernel: Callable):
  function get_autotuner_cache (line 683) | def get_autotuner_cache(
  function set_autotuner_cache (line 692) | def set_autotuner_cache(
  function load_dequantize_k_v_group (line 699) | def load_dequantize_k_v_group(
  function cast_uint32_to_half2 (line 784) | def cast_uint32_to_half2(scale_shift):
  function cast_uint32_to_float (line 794) | def cast_uint32_to_float(scale_shift):
  function dequantize_k_hip (line 804) | def dequantize_k_hip(
  function dequantize (line 852) | def dequantize(
  function _splitK_reduce (line 908) | def _splitK_reduce(
  function _splitK_reduce_varargs (line 1024) | def _splitK_reduce_varargs(
  function _splitK_reduce_varargs_backward (line 1136) | def _splitK_reduce_varargs_backward(

FILE: xformers/ops/fmha/attn_bias.py
  function _to_device (line 39) | def _to_device(t: torch.Tensor, device: torch.device) -> torch.Tensor:
  function _to_device_tensor (line 48) | def _to_device_tensor(seq: Sequence[int], dtype: torch.dtype, device: to...
  class AttentionBias (line 55) | class AttentionBias:
    method materialize (line 89) | def materialize(
  function _get_default_bias_device (line 104) | def _get_default_bias_device(device: Optional[torch.device] = None) -> t...
  function _materialize_causal_mask (line 114) | def _materialize_causal_mask(
  class LowerTriangularMask (line 142) | class LowerTriangularMask(AttentionBias):
    method __init__ (line 153) | def __init__(self, device: Union[torch.device, None] = None) -> None:
    method to (line 156) | def to(self, device: torch.device) -> "LowerTriangularMask":
    method materialize (line 160) | def materialize(
    method add_bias (line 168) | def add_bias(self, bias: torch.Tensor) -> "LowerTriangularMaskWithTens...
  class LocalAttentionFromBottomRightMask (line 176) | class LocalAttentionFromBottomRightMask(AttentionBias):
    method to (line 221) | def to(self, device) -> "LocalAttentionFromBottomRightMask":
    method __post_init__ (line 224) | def __post_init__(self) -> None:
    method materialize (line 238) | def materialize(
  class LowerTriangularFromBottomRightMask (line 261) | class LowerTriangularFromBottomRightMask(AttentionBias):
    method to (line 281) | def to(self, device: torch.device) -> "LowerTriangularFromBottomRightM...
    method materialize (line 287) | def materialize(
    method make_local_attention (line 297) | def make_local_attention(
  class LowerTriangularFromBottomRightLocalAttentionMask (line 309) | class LowerTriangularFromBottomRightLocalAttentionMask(
    method to (line 331) | def to(
    method __post_init__ (line 339) | def __post_init__(self) -> None:
    method materialize (line 345) | def materialize(
  class LowerTriangularMaskWithTensorBias (line 360) | class LowerTriangularMaskWithTensorBias(LowerTriangularMask):
    method __init__ (line 363) | def __init__(self, bias: torch.Tensor) -> None:
    method to (line 366) | def to(self, device: torch.device) -> "LowerTriangularMaskWithTensorBi...
    method materialize (line 372) | def materialize(
  class _SeqLenInfo (line 382) | class _SeqLenInfo:
    method to (line 400) | def to(self, device: torch.device) -> "_SeqLenInfo":
    method intervals (line 411) | def intervals(self) -> Iterable[Tuple[int, int]]:
    method _get_seqstart (line 415) | def _get_seqstart(
    method from_seqlens (line 436) | def from_seqlens(
    method from_seqlens_inplace (line 454) | def from_seqlens_inplace(self, seqlens: Iterable[int]) -> None:
    method split (line 475) | def split(
  class _PaddedSeqLenInfo (line 500) | class _PaddedSeqLenInfo(_SeqLenInfo):
    method __post_init__ (line 542) | def __post_init__(self) -> None:
    method to (line 545) | def to(self, device: torch.device) -> "_PaddedSeqLenInfo":
    method intervals (line 561) | def intervals(self) -> Iterable[Tuple[int, int]]:
    method from_seqlens (line 566) | def from_seqlens(
    method from_seqlens_padded (line 574) | def from_seqlens_padded(
    method from_seqlens_padded_inplace (line 602) | def from_seqlens_padded_inplace(self, seqlens: Sequence[int]) -> None:
    method split (line 629) | def split(
  class _GappySeqInfo (line 636) | class _GappySeqInfo(_SeqLenInfo):
    method to (line 689) | def to(self, device: torch.device) -> "_GappySeqInfo":
    method intervals (line 704) | def intervals(self) -> Iterable[Tuple[int, int]]:
    method from_seqlens (line 709) | def from_seqlens(
    method from_seqlens_gappy (line 715) | def from_seqlens_gappy(
    method split (line 746) | def split(
  class BlockDiagonalMask (line 753) | class BlockDiagonalMask(AttentionBias):
    method to (line 796) | def to(self, device) -> "BlockDiagonalMask":
    method _create_block_mask (line 804) | def _create_block_mask(
    method materialize (line 816) | def materialize(
    method from_seqlens (line 849) | def from_seqlens(
    method from_tensor_list (line 875) | def from_tensor_list(
    method from_tensor_lists_qkv (line 908) | def from_tensor_lists_qkv(
    method split_queries (line 936) | def split_queries(self, tensor: torch.Tensor) -> Sequence[torch.Tensor]:
    method split_kv (line 939) | def split_kv(self, tensor: torch.Tensor) -> Sequence[torch.Tensor]:
    method split (line 942) | def split(self, tensor: torch.Tensor) -> Sequence[torch.Tensor]:
    method make_causal (line 954) | def make_causal(self) -> "BlockDiagonalCausalMask":
    method make_causal_from_bottomright (line 962) | def make_causal_from_bottomright(self) -> "BlockDiagonalCausalFromBott...
    method make_local_attention (line 970) | def make_local_attention(
    method make_local_attention_from_bottomright (line 981) | def make_local_attention_from_bottomright(
  class BlockDiagonalCausalMask (line 994) | class BlockDiagonalCausalMask(BlockDiagonalMask):
    method to (line 1004) | def to(self, device) -> "BlockDiagonalCausalMask":
    method _create_block_mask (line 1012) | def _create_block_mask(
  class BlockDiagonalCausalFromBottomRightMask (line 1026) | class BlockDiagonalCausalFromBottomRightMask(BlockDiagonalMask):
    method to (line 1039) | def to(self, device) -> "BlockDiagonalCausalFromBottomRightMask":
    method __post_init__ (line 1049) | def __post_init__(self) -> None:
    method _create_block_mask (line 1064) | def _create_block_mask(
  class BlockDiagonalPaddedKeysMask (line 1076) | class BlockDiagonalPaddedKeysMask(AttentionBias):
    method to (line 1096) | def to(self, device) -> "BlockDiagonalPaddedKeysMask":
    method _create_block_mask (line 1103) | def _create_block_mask(
    method materialize (line 1111) | def materialize(
    method from_seqlens (line 1140) | def from_seqlens(
    method make_paged (line 1171) | def make_paged(
    method make_local_attention (line 1193) | def make_local_attention(
  class BlockDiagonalCausalWithOffsetPaddedKeysMask (line 1205) | class BlockDiagonalCausalWithOffsetPaddedKeysMask(BlockDiagonalPaddedKey...
    method to (line 1226) | def to(self, device) -> "BlockDiagonalCausalWithOffsetPaddedKeysMask":
    method _create_block_mask (line 1235) | def _create_block_mask(
    method from_seqlens (line 1246) | def from_seqlens(
  class BlockDiagonalLocalAttentionPaddedKeysMask (line 1279) | class BlockDiagonalLocalAttentionPaddedKeysMask(BlockDiagonalPaddedKeysM...
    method to (line 1300) | def to(self, device) -> "BlockDiagonalLocalAttentionPaddedKeysMask":
    method _create_block_mask (line 1311) | def _create_block_mask(
    method from_seqlens_local (line 1322) | def from_seqlens_local(
  class BlockDiagonalCausalLocalAttentionPaddedKeysMask (line 1345) | class BlockDiagonalCausalLocalAttentionPaddedKeysMask(BlockDiagonalPadde...
    method to (line 1360) | def to(self, device) -> "BlockDiagonalCausalLocalAttentionPaddedKeysMa...
    method _create_block_mask (line 1370) | def _create_block_mask(
    method from_seqlens_local (line 1385) | def from_seqlens_local(
  class PagedBlockDiagonalPaddedKeysMask (line 1402) | class PagedBlockDiagonalPaddedKeysMask(AttentionBias):
    method to (line 1419) | def to(self, device: torch.device) -> "PagedBlockDiagonalPaddedKeysMask":
    method materialize (line 1430) | def materialize(
    method from_seqlens (line 1470) | def from_seqlens(
  class PagedBlockDiagonalCausalWithOffsetPaddedKeysMask (line 1508) | class PagedBlockDiagonalCausalWithOffsetPaddedKeysMask(
    method to (line 1520) | def to(
  class BlockDiagonalGappyKeysMask (line 1535) | class BlockDiagonalGappyKeysMask(AttentionBias):
    method to (line 1546) | def to(self, device: torch.device) -> "BlockDiagonalGappyKeysMask":
    method materialize (line 1553) | def materialize(
    method from_seqlens (line 1576) | def from_seqlens(
    method make_paged (line 1598) | def make_paged(
  class BlockDiagonalCausalWithOffsetGappyKeysMask (line 1658) | class BlockDiagonalCausalWithOffsetGappyKeysMask(BlockDiagonalGappyKeysM...
    method to (line 1668) | def to(self, device: torch.device) -> "BlockDiagonalCausalWithOffsetGa...
    method materialize (line 1677) | def materialize(
  class PagedBlockDiagonalGappyKeysMask (line 1708) | class PagedBlockDiagonalGappyKeysMask(AttentionBias):
    method to (line 1725) | def to(self, device: torch.device) -> "PagedBlockDiagonalGappyKeysMask":
    method materialize (line 1736) | def materialize(
    method from_seqlens (line 1787) | def from_seqlens(
  class PagedBlockDiagonalCausalWithOffsetGappyKeysMask (line 1828) | class PagedBlockDiagonalCausalWithOffsetGappyKeysMask(PagedBlockDiagonal...
    method to (line 1838) | def to(
  class BlockDiagonalCausalLocalAttentionMask (line 1853) | class BlockDiagonalCausalLocalAttentionMask(BlockDiagonalCausalMask):
    method to (line 1865) | def to(self, device) -> "BlockDiagonalCausalLocalAttentionMask":
    method __post_init__ (line 1876) | def __post_init__(self):
    method _create_block_mask (line 1901) | def _create_block_mask(
  class BlockDiagonalCausalLocalAttentionFromBottomRightMask (line 1916) | class BlockDiagonalCausalLocalAttentionFromBottomRightMask(
    method to (line 1931) | def to(self, device) -> "BlockDiagonalCausalLocalAttentionFromBottomRi...
    method __post_init__ (line 1942) | def __post_init__(self):
    method _create_block_mask (line 1949) | def _create_block_mask(

FILE: xformers/ops/fmha/ck.py
  function _minimum_gemm_alignment (line 45) | def _minimum_gemm_alignment(inp: Inputs) -> int:
  function _get_seqlen_info (line 49) | def _get_seqlen_info(
  function _get_tensor_bias (line 90) | def _get_tensor_bias(
  function _check_bias_alignment (line 100) | def _check_bias_alignment(
  class _CustomMaskType (line 128) | class _CustomMaskType(int, Enum):
  function _custom_mask_type (line 138) | def _custom_mask_type(bias: Optional[Union[torch.Tensor, AttentionBias]]...
  class FwOp (line 165) | class FwOp(AttentionFwOpBase):
    method apply (line 222) | def apply(
    method apply_bmhk (line 280) | def apply_bmhk(
    method not_supported_reasons (line 353) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
  class BwOp (line 363) | class BwOp(AttentionBwOpBase):
    method not_supported_reasons (line 399) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 432) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...

FILE: xformers/ops/fmha/ck_splitk.py
  class FwOp (line 21) | class FwOp(AttentionFwOpBase):
    method shape_not_supported_reasons (line 47) | def shape_not_supported_reasons(
    method not_supported_reasons (line 56) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method get_split_k (line 95) | def get_split_k(cls, B: int, H: int, Mk: int) -> int:
    method apply (line 107) | def apply(
  class FwOp_S1 (line 171) | class FwOp_S1(FwOp):
  class FwOp_S2 (line 176) | class FwOp_S2(FwOp):
  class FwOp_S4 (line 181) | class FwOp_S4(FwOp):
  class FwOp_S8 (line 186) | class FwOp_S8(FwOp):
  class FwOp_S16 (line 191) | class FwOp_S16(FwOp):
  class FwOp_S32 (line 196) | class FwOp_S32(FwOp):
  class FwOp_S64 (line 201) | class FwOp_S64(FwOp):
  class FwOp_S128 (line 206) | class FwOp_S128(FwOp):

FILE: xformers/ops/fmha/common.py
  function _is_bias_type_supported_in_BMK (line 38) | def _is_bias_type_supported_in_BMK(attn_bias_type: Any) -> bool:
  function _attn_bias_apply (line 47) | def _attn_bias_apply(
  class ScaledTensor (line 58) | class ScaledTensor(torch.Tensor):
    method __new__ (line 64) | def __new__(
    method dequantize (line 95) | def dequantize(self) -> torch.Tensor:
    method unpack (line 109) | def unpack(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method __repr__ (line 117) | def __repr__(self):
  function pack_fp8_tensorwise_per_head (line 124) | def pack_fp8_tensorwise_per_head(
  class Inputs (line 145) | class Inputs:
    method device (line 160) | def device(self) -> torch.device:
    method scale_float (line 164) | def scale_float(self) -> float:
    method get_qkv_in_bmghk (line 167) | def get_qkv_in_bmghk(self) -> Tuple[torch.Tensor, torch.Tensor, torch....
    method normalize_bmhk (line 184) | def normalize_bmhk(self) -> Tuple[int, ...]:
    method validate_inputs (line 208) | def validate_inputs(self) -> None:
    method get_output_dtype (line 327) | def get_output_dtype(self) -> torch.dtype:
    method nbytes (line 335) | def nbytes(self) -> int:
  class Context (line 345) | class Context:
    method get_padded_lse (line 354) | def get_padded_lse(self, pad_to: int, force_pad_inf: bool = False) -> ...
  class Gradients (line 368) | class Gradients:
  class AttentionOpBase (line 376) | class AttentionOpBase(BaseOperator):
    method supports (line 416) | def supports(cls, d: Inputs) -> bool:
    method shape_not_supported_reasons (line 420) | def shape_not_supported_reasons(
    method not_supported_reasons (line 437) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
  class AttentionFwOpBase (line 508) | class AttentionFwOpBase(AttentionOpBase):
    method apply (line 521) | def apply(
  class AttentionBwOpBase (line 527) | class AttentionBwOpBase(AttentionOpBase):
    method not_supported_reasons (line 547) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 561) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...
  function bmk2bmhk (line 570) | def bmk2bmhk(tensor, num_heads: int) -> torch.Tensor:
  function check_lastdim_alignment_stride1 (line 578) | def check_lastdim_alignment_stride1(

FILE: xformers/ops/fmha/cutlass.py
  function _uses_tensorcores (line 40) | def _uses_tensorcores(sm: int, is_half: bool) -> bool:
  function _minimum_gemm_alignment (line 48) | def _minimum_gemm_alignment(inp: Inputs) -> int:
  function _get_seqlen_info (line 65) | def _get_seqlen_info(
  function _get_tensor_bias (line 86) | def _get_tensor_bias(
  function _check_bias_alignment (line 96) | def _check_bias_alignment(
  class _CustomMaskType (line 124) | class _CustomMaskType(int, Enum):
  function _custom_mask_type (line 134) | def _custom_mask_type(bias: Optional[Union[torch.Tensor, AttentionBias]]...
  class FwOp (line 159) | class FwOp(AttentionFwOpBase):
    method apply (line 202) | def apply(
    method apply_bmhk (line 266) | def apply_bmhk(
    method not_supported_reasons (line 317) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
  class BwOp (line 327) | class BwOp(AttentionBwOpBase):
    method not_supported_reasons (line 368) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 400) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...

FILE: xformers/ops/fmha/cutlass_blackwell.py
  function _get_operator (line 34) | def _get_operator(name: str):
  function _convert_input_format (line 63) | def _convert_input_format(
  function _is_seqlen_q_le_seqlen_k (line 156) | def _is_seqlen_q_le_seqlen_k(
  function _is_causal (line 169) | def _is_causal(attn_bias: Union[torch.Tensor, AttentionBias, None]) -> b...
  function _is_bottom_right (line 187) | def _is_bottom_right(attn_bias: Union[torch.Tensor, AttentionBias, None]...
  function _window_size (line 203) | def _window_size(
  class FwOp (line 231) | class FwOp(AttentionFwOpBase):
    method not_supported_reasons (line 267) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method shape_not_supported_reasons (line 290) | def shape_not_supported_reasons(
    method apply (line 301) | def apply(
  class BwOp (line 350) | class BwOp(AttentionBwOpBase):
    method not_supported_reasons (line 382) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method shape_not_supported_reasons (line 405) | def shape_not_supported_reasons(
    method apply (line 418) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...

FILE: xformers/ops/fmha/dispatch.py
  function _set_use_fa3 (line 22) | def _set_use_fa3(use_flash_attention3: bool) -> None:
  function _get_use_fa3 (line 27) | def _get_use_fa3() -> bool:
  function fa3_available (line 32) | def fa3_available() -> bool:
  function _format_inputs_description (line 39) | def _format_inputs_description(inp: Inputs) -> str:
  function _ensure_op_supports_or_raise (line 47) | def _ensure_op_supports_or_raise(exc_type, name: str, op, inp: Inputs) -...
  function _format_not_supported_reasons (line 56) | def _format_not_supported_reasons(op, reasons: List[str]) -> str:
  function _run_priority_list (line 60) | def _run_priority_list(
  function _dispatch_fw_priority_list (line 84) | def _dispatch_fw_priority_list(
  function _dispatch_fw (line 131) | def _dispatch_fw(inp: Inputs, needs_gradient: bool) -> Type[AttentionFwO...
  function _dispatch_bw (line 147) | def _dispatch_bw(

FILE: xformers/ops/fmha/flash.py
  function _flash_fwd (line 92) | def _flash_fwd(
  function _flash_fwd_abstract (line 177) | def _flash_fwd_abstract(
  function _flash_bwd (line 211) | def _flash_bwd(
  function _flash_bwd_abstract (line 309) | def _flash_bwd_abstract(
  function _create_dq_dk_dv (line 320) | def _create_dq_dk_dv(
  function _convert_input_format (line 336) | def _convert_input_format(
  function _is_causal (line 438) | def _is_causal(attn_bias: Optional[Union[torch.Tensor, AttentionBias]]) ...
  function _is_paged_attention_supported (line 458) | def _is_paged_attention_supported(attn_bias_type) -> bool:
  function _window_size (line 470) | def _window_size(
  function _check_needs_no_topleft (line 497) | def _check_needs_no_topleft(d: Inputs, reasons: List[str]) -> None:
  function _check_strides_for_bmghk (line 519) | def _check_strides_for_bmghk(x: torch.Tensor, name: str, reasons: List[s...
  function _post_process_lse (line 537) | def _post_process_lse(
  class FwOp (line 558) | class FwOp(AttentionFwOpBase):
    method not_supported_reasons (line 603) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 614) | def apply(
  class BwOp (line 694) | class BwOp(AttentionBwOpBase):
    method not_supported_reasons (line 727) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 747) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...

FILE: xformers/ops/fmha/flash3.py
  function maybe_contiguous (line 65) | def maybe_contiguous(x: T) -> T:
  function _flash_attention3_incompatible_reason (line 69) | def _flash_attention3_incompatible_reason() -> Optional[str]:
  function _heuristic_kvsplit (line 132) | def _heuristic_kvsplit(
  function mask_non_zeros (line 151) | def mask_non_zeros(s_q: int, s_k: int, window_left: int, window_right: i...
  function sdpa_flop_count (line 193) | def sdpa_flop_count(
  function mha_fwd (line 225) | def mha_fwd(
  function mha_fwd_fake (line 327) | def mha_fwd_fake(
  function mha_fwd_flops (line 365) | def mha_fwd_flops(
  function _create_dq_dk_dv (line 430) | def _create_dq_dk_dv(
  function mha_bwd (line 448) | def mha_bwd(
  function mha_bwd_fake (line 501) | def mha_bwd_fake(
  function mha_bwd_flops (line 521) | def mha_bwd_flops(
  function _check_different_value_headdim_ampere (line 568) | def _check_different_value_headdim_ampere(d: Inputs, reasons: List[str])...
  function _get_blocktables (line 582) | def _get_blocktables(inp_attn_bias) -> Optional[torch.Tensor]:
  class FwOp (line 594) | class FwOp(AttentionFwOpBase):
    method not_supported_reasons (line 648) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 666) | def apply(
  class BwOp (line 768) | class BwOp(AttentionBwOpBase):
    method not_supported_reasons (line 802) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method apply (line 812) | def apply(cls, ctx: Context, inp: Inputs, grad: torch.Tensor) -> Gradi...
  class FwOp_KVSplit (line 872) | class FwOp_KVSplit(FwOp):
    method apply (line 901) | def apply(  # type: ignore[override]

FILE: xformers/ops/fmha/merge_training.py
  class _PartialFunc (line 39) | class _PartialFunc(torch.autograd.Function):
    method forward (line 41) | def forward(
    method backward (line 64) | def backward(  # type: ignore[override]
  class _MergeFunc (line 86) | class _MergeFunc(torch.autograd.Function):
    method forward (line 88) | def forward(
    method backward (line 104) | def backward(  # type: ignore[override]
  class Partial (line 111) | class Partial:
    method __init__ (line 125) | def __init__(
    method is_bmghk (line 138) | def is_bmghk(self) -> bool:
    method apply (line 141) | def apply(self, fn: Callable[[torch.Tensor], torch.Tensor]) -> "Partial":
    method _tuple (line 160) | def _tuple(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
  function memory_efficient_attention_partial_autograd (line 164) | def memory_efficient_attention_partial_autograd(
  function merge_attentions_autograd (line 184) | def merge_attentions_autograd(

FILE: xformers/ops/fmha/torch_attention_compat.py
  function is_pt_cutlass_compatible (line 10) | def is_pt_cutlass_compatible(force: bool = False) -> bool:
  function ensure_pt_flash_ok (line 62) | def ensure_pt_flash_ok() -> None:

FILE: xformers/ops/fmha/triton_splitk.py
  function _strides (line 42) | def _strides(x: Optional[torch.Tensor], *stride_names: str):
  function _is_supported_causal_bias (line 49) | def _is_supported_causal_bias(attn_bias: Any) -> bool:
  function _is_supported_local_bias (line 62) | def _is_supported_local_bias(attn_bias: Any) -> bool:
  function _is_supported_gappy_bias (line 72) | def _is_supported_gappy_bias(attn_bias: Any) -> bool:
  function _is_supported_paged_bias (line 82) | def _is_supported_paged_bias(attn_bias: Any) -> bool:
  class InputsFp8 (line 93) | class InputsFp8(Inputs):
    method nbytes (line 105) | def nbytes(self) -> int:
  function _is_cuda (line 131) | def _is_cuda() -> bool:
  function _is_cuda_at_least_sm80 (line 135) | def _is_cuda_at_least_sm80(device: torch.device) -> bool:
  class FwOp (line 143) | class FwOp(AttentionFwOpBase):
    method shape_not_supported_reasons (line 252) | def shape_not_supported_reasons(
    method not_supported_reasons (line 261) | def not_supported_reasons(cls, d: Inputs) -> List[str]:
    method get_split_k (line 337) | def get_split_k(
    method get_kernel (line 383) | def get_kernel(cls):
    method get_fp8_scale_shift (line 395) | def get_fp8_scale_shift(
    method get_extra_args (line 415) | def get_extra_args(
    method apply (line 606) | def apply(
    method get_operator (line 1016) | def get_operator(
  function merge_attentions (line 1047) | def merge_attentions(
  function merge_attentions_varargs (line 1106) | def merge_attentions_varargs(
  function merge_attentions_varargs_fake (line 1158) | def merge_attentions_varargs_fake(
  function _merge_attentions_backward (line 1184) | def _merge_attentions_backward(
  function merge_attentions_varargs_backward (line 1204) | def merge_attentions_varargs_backward(
  function merge_attentions_varargs_backward_fake (line 1243) | def merge_attentions_varargs_backward_fake(
  function _prepare_reduce_kernel_params (line 1256) | def _prepare_reduce_kernel_params(

FILE: xformers/ops/indexing.py
  class ScaledIndexAddFw (line 23) | class ScaledIndexAddFw(BaseOperator):
  class ScaledIndexAddBw (line 30) | class ScaledIndexAddBw(BaseOperator):
  class IndexSelect (line 37) | class IndexSelect(BaseOperator):
  class _ScaledIndexAdd (line 43) | class _ScaledIndexAdd(torch.autograd.Function):
    method forward (line 46) | def forward(
    method backward (line 69) | def backward(ctx, grad_output):
  function scaled_index_add (line 104) | def scaled_index_add(
  class _IndexSelectCat (line 132) | class _IndexSelectCat(torch.autograd.Function):
    method forward (line 135) | def forward(
    method backward (line 180) | def backward(ctx, grad_output):
  function index_select_cat (line 215) | def index_select_cat(

FILE: xformers/ops/modpar_layers.py
  function _init_2d_weight (line 18) | def _init_2d_weight(
  class ColumnParallelLinear (line 45) | class ColumnParallelLinear(torch.nn.Module):
    method __init__ (line 46) | def __init__(
    method forward (line 94) | def forward(self, input_: torch.Tensor) -> List[torch.Tensor]:
  class RowParallelLinear (line 108) | class RowParallelLinear(torch.nn.Module):
    method __init__ (line 109) | def __init__(
    method forward (line 149) | def forward(self, input_: torch.Tensor) -> torch.Tensor:

FILE: xformers/ops/rmsnorm.py
  function rms_norm (line 13) | def rms_norm(x, weight: Optional[torch.Tensor], eps: float = 1e-6):
  function rms_norm_add (line 42) | def rms_norm_add(
  class RMSNorm (line 72) | class RMSNorm(torch.nn.Module):
    method __init__ (line 91) | def __init__(self, dim: int, include_weight: bool = True, eps: float =...
    method forward (line 99) | def forward(self, x: torch.Tensor):
    method increment_and_forward_ (line 102) | def increment_and_forward_(self, x: torch.Tensor, y: torch.Tensor):

FILE: xformers/ops/rope_padded.py
  function rope_padded (line 16) | def rope_padded(

FILE: xformers/ops/seqpar.py
  function sequence_parallel_leading_matmul_fwd (line 32) | def sequence_parallel_leading_matmul_fwd(
  function sequence_parallel_leading_matmul_fwd_fake (line 56) | def sequence_parallel_leading_matmul_fwd_fake(
  function sequence_parallel_leading_matmul_bwd (line 74) | def sequence_parallel_leading_matmul_bwd(
  function sequence_parallel_leading_matmul_bwd_fake (line 170) | def sequence_parallel_leading_matmul_bwd_fake(
  function sequence_parallel_leading_matmul_setup_context (line 180) | def sequence_parallel_leading_matmul_setup_context(ctx, inputs, output):
  function sequence_parallel_leading_matmul_bwd_bridge (line 187) | def sequence_parallel_leading_matmul_bwd_bridge(ctx, grad_gathered_outpu...
  function sequence_parallel_leading_matmul (line 209) | def sequence_parallel_leading_matmul(
  function sequence_parallel_trailing_matmul_fwd (line 227) | def sequence_parallel_trailing_matmul_fwd(
  function sequence_parallel_trailing_matmul_fwd_fake (line 248) | def sequence_parallel_trailing_matmul_fwd_fake(
  function sequence_parallel_trailing_matmul_bwd (line 265) | def sequence_parallel_trailing_matmul_bwd(
  function sequence_parallel_trailing_matmul_bwd_fake (line 316) | def sequence_parallel_trailing_matmul_bwd_fake(
  function sequence_parallel_trailing_matmul_setup_context (line 326) | def sequence_parallel_trailing_matmul_setup_context(ctx, inputs, output):
  function sequence_parallel_trailing_matmul_bwd_bridge (line 333) | def sequence_parallel_trailing_matmul_bwd_bridge(ctx, grad_scattered_out...
  function sequence_parallel_trailing_matmul (line 355) | def sequence_parallel_trailing_matmul(

FILE: xformers/ops/sequence_parallel_fused_ops.py
  function _is_fp8_dtype (line 20) | def _is_fp8_dtype(dt: torch.dtype):
  class _FusedSequenceParallel (line 26) | class _FusedSequenceParallel:
    method __init__ (line 68) | def __init__(
    method make_stream_factory (line 89) | def make_stream_factory(
    method allgather_and_linear (line 100) | def allgather_and_linear(
    method linear_and_reducescatter (line 209) | def linear_and_reducescatter(
  function _can_ranks_communicate_all_to_all_over_nvlink (line 342) | def _can_ranks_communicate_all_to_all_over_nvlink(group: dist.ProcessGro...
  function _lazy_init (line 355) | def _lazy_init(
  function _default_stream_factory (line 374) | def _default_stream_factory() -> torch.cuda.Stream:
  function fused_allgather_and_linear (line 379) | def fused_allgather_and_linear(
  function fused_allgather_and_linear (line 394) | def fused_allgather_and_linear(
  function fused_allgather_and_linear (line 408) | def fused_allgather_and_linear(
  function _fused_allgather_and_linear_custom_op (line 522) | def _fused_allgather_and_linear_custom_op(
  function fused_allgather_and_anything (line 564) | def fused_allgather_and_anything(
  function fused_linear_and_reducescatter (line 620) | def fused_linear_and_reducescatter(
  function fused_linear_and_reducescatter (line 635) | def fused_linear_and_reducescatter(
  function fused_linear_and_reducescatter (line 649) | def fused_linear_and_reducescatter(
  function _fused_linear_and_reducescatter_custom_op (line 749) | def _fused_linear_and_reducescatter_custom_op(
  function fused_anything_and_reducescatter (line 791) | def fused_anything_and_reducescatter(

FILE: xformers/ops/sp24.py
  class SparsifyBothWays (line 18) | class SparsifyBothWays(BaseOperator):
  class SparsifyApply (line 25) | class SparsifyApply(BaseOperator):
  class SparsifyApplyDenseOutput (line 32) | class SparsifyApplyDenseOutput(BaseOperator):
  class Sp24Gemm (line 39) | class Sp24Gemm(BaseOperator):
  function _get_cusparselt_torch_version (line 45) | def _get_cusparselt_torch_version() -> Tuple[int, int, int]:
  class Sp24GemmCuspltSearch (line 62) | class Sp24GemmCuspltSearch(BaseOperator):
  class Sp24GemmCusplt (line 69) | class Sp24GemmCusplt(BaseOperator):
  function _has_cusparseLt (line 75) | def _has_cusparseLt() -> bool:
  function sparse24_pointwise_op (line 90) | def sparse24_pointwise_op(
  function sparse24_mm (line 140) | def sparse24_mm(func, types, args=(), kwargs=None) -> torch.Tensor:
  function sparse24_addmm (line 155) | def sparse24_addmm(func, types, args=(), kwargs=None) -> torch.Tensor:
  function sparse24_linear (line 175) | def sparse24_linear(func, types, args=(), kwargs=None) -> torch.Tensor:
  function sparse24_t (line 188) | def sparse24_t(func, types, args=(), kwargs=None) -> torch.Tensor:
  function sparse24_view (line 203) | def sparse24_view(func, types, args=(), kwargs=None) -> torch.Tensor:
  function sparse24_detach (line 213) | def sparse24_detach(func, types, args, kwargs) -> torch.Tensor:
  function no_dispatch (line 228) | def no_dispatch():
  function fallback_dispatcher (line 236) | def fallback_dispatcher(func, types, args, kwargs):
  class Sparse24Tensor (line 289) | class Sparse24Tensor(torch.Tensor):
    method __new__ (line 300) | def __new__(
    method __repr__ (line 326) | def __repr__(self):
    method _sp24_to_dense (line 329) | def _sp24_to_dense(self) -> torch.Tensor:
    method _mm (line 337) | def _mm(
    method __tensor_flatten__ (line 348) | def __tensor_flatten__(self):
    method __tensor_unflatten__ (line 352) | def __tensor_unflatten__(
  class Sparse24TensorCutlass (line 363) | class Sparse24TensorCutlass(Sparse24Tensor):
    method _mm (line 364) | def _mm(
    method __torch_dispatch__ (line 392) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
  function _cusplt_find_alg (line 410) | def _cusplt_find_alg(
  function _cusplt_mm (line 463) | def _cusplt_mm(
  function _cusplt_mm_meta (line 483) | def _cusplt_mm_meta(
  class Sparse24TensorCuSparseLt (line 497) | class Sparse24TensorCuSparseLt(Sparse24Tensor):
    method _mm (line 498) | def _mm(
    method __torch_dispatch__ (line 548) | def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
  function _sparsify24_forward (line 571) | def _sparsify24_forward(x: torch.Tensor, *, algo: str, backend: str) -> ...
  class _Sparsify24Func (line 600) | class _Sparsify24Func(torch.autograd.Function):
    method forward (line 602) | def forward(ctx, x: torch.Tensor, algo: str, gradient: str, backend: s...
    method backward (line 617) | def backward(ctx, grad_out: torch.Tensor):  # type: ignore[override]
  class _Sparsify24STEFunc (line 646) | class _Sparsify24STEFunc(torch.autograd.Function):
    method forward (line 648) | def forward(
    method backward (line 663) | def backward(ctx, grad_out: torch.Tensor):  # type: ignore[override]
  class _Sparsify24LikeFunc (line 680) | class _Sparsify24LikeFunc(torch.autograd.Function):
    method forward (line 682) | def forward(ctx, x: torch.Tensor, pattern: Sparse24Tensor, gradient: s...
    method backward (line 728) | def backward(ctx, grad_out: torch.Tensor):  # type: ignore[override]
  function allow_in_graph (line 769) | def allow_in_graph(func: F) -> F:
  function sparsify24 (line 774) | def sparsify24(
  function sparsify24_ste (line 784) | def sparsify24_ste(
  function sparsify24_like (line 801) | def sparsify24_like(

FILE: xformers/ops/swiglu_op.py
  class _SwiGLUDecomposedFunc (line 16) | class _SwiGLUDecomposedFunc(torch.autograd.Function):
    method _silu_backward (line 32) | def _silu_backward(dy, x):
    method forward (line 39) | def forward(cls, ctx, x, w1, b1, w2, b2, w3, b3):
    method backward (line 51) | def backward(cls, ctx, dx5):
  class SwiGLUOp (line 72) | class SwiGLUOp:
    method __init__ (line 75) | def __init__(self, op, packed_weights: bool, name: str, constraints):
    method supports (line 81) | def supports(self, op: "SwiGLUOpDispatch") -> bool:
    method __call__ (line 86) | def __call__(self, *args: Optional[torch.Tensor]) -> torch.Tensor:
    method __str__ (line 89) | def __str__(self) -> str:
  class _ForwardToPythonAutogradFunc (line 93) | class _ForwardToPythonAutogradFunc(SwiGLUOp):
    method supports (line 94) | def supports(self, op: "SwiGLUOpDispatch") -> bool:
    method __call__ (line 97) | def __call__(self, *args, **kwargs):
  class _ForwardToFunc (line 101) | class _ForwardToFunc(SwiGLUOp):
    method __call__ (line 102) | def __call__(self, *args, **kwargs):
    method info (line 105) | def info(self):
  function _eager_functional_swiglu (line 111) | def _eager_functional_swiglu(
  class SwiGLUOpDispatch (line 127) | class SwiGLUOpDispatch:
    method op (line 139) | def op(self) -> SwiGLUOp:
    method from_arguments (line 148) | def from_arguments(
  function _bias_enabled (line 170) | def _bias_enabled(op: SwiGLUOpDispatch) -> bool:
  function swiglu (line 185) | def swiglu(
  function swiglu_packed (line 262) | def swiglu_packed(
  class SwiGLU (line 302) | class SwiGLU(nn.Module):
    method __init__ (line 308) | def __init__(
    method forward (line 343) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method _ordered_params (line 361) | def _ordered_params(
    method _packed_ordered_params (line 398) | def _packed_ordered_params(

FILE: xformers/ops/tiled_matmul.py
  function _should_use_triton (line 16) | def _should_use_triton(device: torch.device, dtype: torch.dtype) -> bool:
  function check_inputs (line 32) | def check_inputs(
  function check_output (line 95) | def check_output(out: List[List[torch.Tensor]], ms: List[int], ns: List[...
  function tiled_matmul_out (line 137) | def tiled_matmul_out(
  function _flatten (line 167) | def _flatten(x: List[List[torch.Tensor]], rows: int, cols: int) -> List[...
  function _unflatten (line 175) | def _unflatten(
  function _flattened_transpose (line 188) | def _flattened_transpose(
  function tiled_matmul_fwd (line 206) | def tiled_matmul_fwd(
  function tiled_matmul_fwd_fake (line 223) | def tiled_matmul_fwd_fake(
  function tiled_matmul_setup_context (line 234) | def tiled_matmul_setup_context(ctx, inputs, output):
  function tiled_matmul_bwd (line 239) | def tiled_matmul_bwd(ctx, flat_grad_c):
  function tiled_matmul (line 266) | def tiled_matmul(

FILE: xformers/ops/tree_attention.py
  class TreeAttnMetadata (line 34) | class TreeAttnMetadata:
    method from_tree_choices_cached (line 133) | def from_tree_choices_cached(
    method from_tree_choices (line 142) | def from_tree_choices(
  function _get_subtree_size_and_num_children_per_node_at_level (line 219) | def _get_subtree_size_and_num_children_per_node_at_level(
  function _get_depth_counts (line 239) | def _get_depth_counts(sorted_tree_choices: List[Tuple[int, ...]]) -> Lis...
  function _get_num_nodes_per_level (line 252) | def _get_num_nodes_per_level(
  function _prepare_tree_attn_bias (line 259) | def _prepare_tree_attn_bias(
  function _prepare_tree_indices (line 317) | def _prepare_tree_indices(
  function _prepare_retrieval_indices (line 348) | def _prepare_retrieval_indices(
  function _prepare_tree_position_ids (line 383) | def _prepare_tree_position_ids(
  function _prepare_parent_node_indices (line 404) | def _prepare_parent_node_indices(
  function _prepare_child_node_indices (line 416) | def _prepare_child_node_indices(
  function _prepare_candidate_idx (line 445) | def _prepare_candidate_idx(
  function use_triton_splitk_for_prefix (line 458) | def use_triton_splitk_for_prefix(B: int, G: int, tree_size: int) -> bool:
  function select_prefix_op (line 469) | def select_prefix_op(
  function tree_attention (line 513) | def tree_attention(
  class SplitKAutotune (line 661) | class SplitKAutotune(triton_splitk.FwOp):
  function construct_full_tree_choices (line 666) | def construct_full_tree_choices(
  function construct_tree_choices (line 679) | def construct_tree_choices(
  function get_full_tree_size (line 691) | def get_full_tree_size(tree_depth: int, branching: int) -> int:

FILE: xformers/ops/unbind.py
  function get_stack_strides (line 13) | def get_stack_strides(
  function _stack_or_none_fw (line 59) | def _stack_or_none_fw(
  function _stack_fw (line 71) | def _stack_fw(
  class _Unbind (line 81) | class _Unbind(torch.autograd.Function):
    method forward (line 88) | def forward(ctx, x: torch.Tensor, dim: int):
    method backward (line 94) | def backward(cls, ctx, *tensors: torch.Tensor):
  class _StackOrNone (line 98) | class _StackOrNone(torch.autograd.Function):
    method forward (line 105) | def forward(ctx, dim: int, *tensors: torch.Tensor):
    method backward (line 111) | def backward(cls, ctx, grad: torch.Tensor):
  function unbind (line 115) | def unbind(x: torch.Tensor, dim: int) -> Tuple[torch.Tensor, ...]:
  function stack_or_none (line 124) | def stack_or_none(tensors: Sequence[torch.Tensor], dim: int) -> torch.Te...

FILE: xformers/profiler/api.py
  function profile (line 30) | def profile(
  function step (line 87) | def step() -> None:

FILE: xformers/profiler/device_limits.py
  class DeviceLimit (line 14) | class DeviceLimit:
  function get_device_limits (line 104) | def get_device_limits(device) -> Optional[DeviceLimit]:

FILE: xformers/profiler/find_slowest.py
  function print_json_as_dataframe (line 17) | def print_json_as_dataframe(json_list):
  function compute_std_dev_of_event_durations_over_ranks (line 44) | def compute_std_dev_of_event_durations_over_ranks(events, top=5):
  function sort_nccl_events (line 61) | def sort_nccl_events(
  function read_one_file (line 80) | def read_one_file(profile_trace_path: str) -> pd.DataFrame:
  function parse_one_file (line 106) | def parse_one_file(profile_trace_path: str) -> tuple[pd.DataFrame, pd.Da...
  function print_profiling_info (line 116) | def print_profiling_info(cuda_profile_dir: str):

FILE: xformers/profiler/profile_analyzer.py
  class FakeKinetoEvent (line 14) | class FakeKinetoEvent:
    method __init__ (line 15) | def __init__(self, e: torch._C._autograd._KinetoEvent) -> None:
  function _attention_flops (line 23) | def _attention_flops(queries, values, causal: bool, fmt: str = "BHMK") -...
  function _get_arg_idx (line 44) | def _get_arg_idx(op, *arg_names: str) -> int:
  function _replace_if_needed (line 51) | def _replace_if_needed(
  class AnalyzedTrace (line 120) | class AnalyzedTrace:
    method compute_num_ops (line 125) | def compute_num_ops(
    method compute_hfu (line 135) | def compute_hfu(self, hardware_flops: Dict[torch.dtype, float]) -> float:
    method compute_mfu (line 141) | def compute_mfu(self, hardware_flops: Dict[torch.dtype, float]) -> float:
    method _find_all_root_events_with_flops (line 156) | def _find_all_root_events_with_flops(
    method from_profile (line 192) | def from_profile(

FILE: xformers/profiler/profiler.py
  class NsightProfiler (line 30) | class NsightProfiler:
    method __init__ (line 38) | def __init__(self, main_profiler: "_Profiler") -> None:
    method __enter__ (line 42) | def __enter__(self):
    method __exit__ (line 45) | def __exit__(self, exc_type, exc_val, exc_tb):
    method step (line 48) | def step(self) -> None:
  class PyTorchProfiler (line 52) | class PyTorchProfiler:
    method __init__ (line 62) | def __init__(self, main_profiler: "_Profiler") -> None:
    method _on_trace (line 74) | def _on_trace(self, prof: torch.profiler.profiler.profile) -> None:
    method _preprocess_trace (line 98) | def _preprocess_trace(
    method _analyze_trace (line 113) | def _analyze_trace(self, prof: torch.profiler.profiler.profile) -> None:
    method __enter__ (line 137) | def __enter__(self):
    method __exit__ (line 141) | def __exit__(self, exc_type, exc_val, exc_tb):
    method step (line 145) | def step(self) -> None:
  class PyTorchProfiler_CUDAOnly (line 150) | class PyTorchProfiler_CUDAOnly(PyTorchProfiler):
    method _analyze_trace (line 155) | def _analyze_trace(self, prof: torch.profiler.profiler.profile) -> None:
  class MemSnapshotsProfiler (line 160) | class MemSnapshotsProfiler:
    method __init__ (line 165) | def __init__(self, main_profiler: "_Profiler") -> None:
    method _has_trace_plot (line 170) | def _has_trace_plot(self) -> bool:
    method __enter__ (line 173) | def __enter__(self):
    method __exit__ (line 188) | def __exit__(self, exc_type, exc_val, exc_tb):
    method step (line 211) | def step(self) -> None:
  class _ProfilerState (line 216) | class _ProfilerState:
  class _Profiler (line 223) | class _Profiler:
    method __init__ (line 226) | def __init__(
    method init_schedule (line 246) | def init_schedule(self, offset: int = 0) -> None:
    method check_schedule (line 257) | def check_schedule(self, schedule: Sequence[Tuple[Any, int, int]]) -> ...
    method update_profilers_on_step (line 284) | def update_profilers_on_step(self) -> None:
    method _create_output_filename (line 304) | def _create_output_filename(self, filename: str) -> Path:
    method start (line 318) | def start(self):
    method stop (line 321) | def stop(self, exc_type=None, exc_val=None, exc_tb=None):
    method __enter__ (line 324) | def __enter__(self):
    method __exit__ (line 332) | def __exit__(self, exc_type, exc_val, exc_tb):
    method step (line 339) | def step(self) -> None:
    method format_summary (line 369) | def format_summary(self) -> str:

FILE: xformers/profiler/profiler_dcgm.py
  class DCGMProfiler (line 20) | class DCGMProfiler:  # type: ignore
    method __init__ (line 23) | def __init__(
    method __enter__ (line 32) | def __enter__(self) -> None:
    method __exit__ (line 38) | def __exit__(self, exc_type, exc_val, exc_tb) -> None:
    method step (line 41) | def step(self) -> None:

FILE: xformers/profiler/profiler_dcgm_impl.py
  class DCGMProfiler (line 18) | class DCGMProfiler:
    method __init__ (line 21) | def __init__(
    method create_dcgm_group (line 65) | def create_dcgm_group(
    method get_profilable_fields (line 96) | def get_profilable_fields(self) -> Set[int]:
    method create_profiling_field_group (line 107) | def create_profiling_field_group(
    method __enter__ (line 152) | def __enter__(self) -> None:
    method __exit__ (line 176) | def __exit__(self, exc_type, exc_val, exc_tb) -> None:
    method step (line 189) | def step(self) -> None:

FILE: xformers/sparse/blocksparse_tensor.py
  function _spmm (line 16) | def _spmm(b, layout, values):
  function _softmax (line 40) | def _softmax(layout, values):
  function _sddmm (line 61) | def _sddmm(a, b, layout):
  class BlockSparseTensor (line 76) | class BlockSparseTensor(torch.Tensor):
    method __new__ (line 78) | def __new__(cls, values, layout):
    method __init__ (line 91) | def __init__(self, values, layout):
    method __repr__ (line 104) | def __repr__(self):
    method values (line 107) | def values(self):
    method _raw_wrap (line 111) | def _raw_wrap(cls, values, layout):
    method _wrap (line 118) | def _wrap(cls, values, bmat):
    method _bmm (line 125) | def _bmm(cls, arg0, arg1):
    method _masked_matmul (line 132) | def _masked_matmul(cls, a, b, mask):
    method _softmax (line 141) | def _softmax(cls, arg0, dim):
    method _to (line 148) | def _to(cls, arg0, device):
    method _copy (line 158) | def _copy(cls, arg0, arg1):
    method _equal (line 169) | def _equal(cls, arg0, arg1):
    method _to_dense (line 181) | def _to_dense(cls, arg0):
    method __torch_function__ (line 200) | def __torch_function__(cls, func, types, args=(), kwargs=None):
    method __torch_dispatch__ (line 277) | def __torch_dispatch__(cls, func, types, args, kwargs):

FILE: xformers/sparse/utils.py
  function _coo_to_csr (line 10) | def _coo_to_csr(m, n, row_indices, column_indices):
  function _csr_to_coo (line 17) | def _csr_to_coo(m, n, row_offsets, column_indices):
  function _diffsort (line 25) | def _diffsort(a):
  function _get_transpose_info (line 29) | def _get_transpose_info(m, n, row_indices, row_offsets, column_indices):
  function _transpose_with_info (line 48) | def _transpose_with_info(values, _transpose_info):
  function _transpose (line 54) | def _transpose(m, n, row_indices, values, row_offsets, column_indices):
  function _nonzero_mask_to_sparse_csr_indices (line 61) | def _nonzero_mask_to_sparse_csr_indices(mask, device):
  function _dense_to_sparse (line 83) | def _dense_to_sparse(matrix, device):
  function _round_nnz (line 99) | def _round_nnz(mask, divisible_by=4):
  function _dense3d_to_sparse (line 108) | def _dense3d_to_sparse(matrix, device):

FILE: xformers/triton/importing.py
  function libdevice_find (line 9) | def libdevice_find(name):

FILE: xformers/triton/vararg_kernel.py
  class _ForLoopUnroller (line 19) | class _ForLoopUnroller(ast.NodeTransformer):
    method __init__ (line 20) | def __init__(self, target, inline_variables, loop_iter):
    method visit_Name (line 25) | def visit_Name(self, node):
    method visit_Subscript (line 30) | def visit_Subscript(self, node):
  class _VisitorVarargKernel (line 42) | class _VisitorVarargKernel(ast.NodeTransformer):
    method __init__ (line 43) | def __init__(self, N):
    method visit_AnnAssign (line 47) | def visit_AnnAssign(self, node):
    method visit_arguments (line 67) | def visit_arguments(self, node):
  class _VisitorUnrollKernel (line 90) | class _VisitorUnrollKernel(_VisitorVarargKernel):
    method visit_For (line 91) | def visit_For(self, node):
  class _VisitorConditionalKernel (line 119) | class _VisitorConditionalKernel(_VisitorVarargKernel):
    method __init__ (line 120) | def __init__(self, *args, **kwargs):
    method visit_Subscript (line 124) | def visit_Subscript(self, node):
    method visit_Call (line 148) | def visit_Call(self, node):
  function _monkey_patched_getlines (line 173) | def _monkey_patched_getlines(filename, module_globals=None):
  class VarargMode (line 180) | class VarargMode(Enum):
  function unroll_varargs (line 186) | def unroll_varargs(kernel, N: int, mode: VarargMode = VarargMode.UNROLL):

FILE: xformers/utils.py
  function import_all_modules (line 20) | def import_all_modules(root: str, base_module: str) -> List[str]:
  function get_registry_decorator (line 33) | def get_registry_decorator(
  function generate_matching_config (line 68) | def generate_matching_config(superset: Dict[str, Any], config_class: Any...
  function do_bench_cudagraph (line 85) | def do_bench_cudagraph(

Download .json

Condensed preview — 905 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,691K chars).

[
  {
    "path": ".clang-format",
    "chars": 2570,
    "preview": "---\nAccessModifierOffset: -1\nAlignAfterOpenBracket: AlwaysBreak\nAlignConsecutiveAssignments: false\nAlignConsecutiveDecla"
  },
  {
    "path": ".coveragerc",
    "chars": 129,
    "preview": "[run]\nomit =\n    docs/*\n    tests/*\n    setup.py\n    xformers/benchmarks/*\n    xformers/triton/k_*\n    stubs/*\n    third"
  },
  {
    "path": ".editorconfig",
    "chars": 191,
    "preview": "root = true\n\n[*.py]\ncharset = utf-8\ntrim_trailing_whitespace = true\nend_of_line = lf\ninsert_final_newline = true\nindent_"
  },
  {
    "path": ".flake8",
    "chars": 250,
    "preview": "[flake8]\nexclude =\n    .git\n    ,.github/run-clang-format.py\n    ,third_party\nmax-line-length = 140\ncopyright-check = Tr"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug-report.md",
    "chars": 1173,
    "preview": "---\nname: \"\\U0001F41B Bug Report\"\nabout: Submit a bug report to help us improve xFormers\n\n---\n\n# 🐛 Bug\n\n<!-- A clear and"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature-request.md",
    "chars": 715,
    "preview": "---\nname: \"\\U0001F680Feature Request\"\nabout: Submit a proposal/request for a new xFormers feature\n\n---\n\n# 🚀 Feature\n\n<!-"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/questions-help-support.md",
    "chars": 93,
    "preview": "---\nname: \"❓Questions/Help/Support\"\nabout: Do you need support?\n\n---\n\n# ❓ Questions and Help\n"
  },
  {
    "path": ".github/PULL_REQUEST_TEMPLATE.md",
    "chars": 779,
    "preview": "## What does this PR do?\nFixes # (issue).\n\n## Before submitting\n\n- [ ] Did you have fun?\n  - Make sure you had fun codin"
  },
  {
    "path": ".github/actions/setup-build-cuda/action.yml",
    "chars": 6733,
    "preview": "name: Set up Runner for build\n\ninputs:\n  toolkit_type:\n    description: cuda or rocm\n    type: string\n  toolkit_short_ve"
  },
  {
    "path": ".github/actions/setup-env-build/action.yml",
    "chars": 6456,
    "preview": "name: Install env + build\ninputs:\n  arch:\n    description: 'GPU architecture'\n    required: true\n  python:\n    descripti"
  },
  {
    "path": ".github/compute_wheel_version.py",
    "chars": 1737,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": ".github/gpu_benchmark_diff.py",
    "chars": 1809,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": ".github/run-clang-format.py",
    "chars": 11291,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nMIT License\nCopyright (c) 2017 Guillaume Papin\nPermission is hereby granted, free of charge, "
  },
  {
    "path": ".github/run_benchmark_wrapper.py",
    "chars": 1831,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": ".github/selective_ci/requirements.txt",
    "chars": 10,
    "preview": "GitPython\n"
  },
  {
    "path": ".github/selective_ci/selective_ci.py",
    "chars": 4781,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": ".github/workflows/gh-pages.yml",
    "chars": 1266,
    "preview": "name: Build & deploy documentation\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n\njobs:\n  deploy:\n    runs-on:"
  },
  {
    "path": ".github/workflows/gpu_test_gh.yml",
    "chars": 3791,
    "preview": "name: gpu_test_gh\n\non:\n  workflow_dispatch: {}\n  pull_request:\n    paths:\n      - \"xformers/**\"\n      - \"!xformers/bench"
  },
  {
    "path": ".github/workflows/linters.yml",
    "chars": 124,
    "preview": "on:\n  pull_request: {}\n  push:\n    branches:\n      - main\n\njobs:\n  repo:\n    uses: ./.github/workflows/linters_reusable."
  },
  {
    "path": ".github/workflows/linters_reusable.yml",
    "chars": 2238,
    "preview": "name: lint\n\non:\n  workflow_call:\n    inputs:\n      pre-script:\n        type: string\n\njobs:\n  linters:\n    runs-on: ubunt"
  },
  {
    "path": ".github/workflows/rocm_build.yml",
    "chars": 1235,
    "preview": "name: rocm-build\n\non:\n  push:\n    branches:\n      - develop\n  pull_request:\n    paths:\n      - \".github/compute_wheel_ve"
  },
  {
    "path": ".github/workflows/rocm_ci.yml",
    "chars": 3088,
    "preview": "name: rocm-ci\n\non:\n  pull_request:\n    types: [labeled, synchronize, reopened]\n  workflow_dispatch: {}\n  push:\n    branc"
  },
  {
    "path": ".github/workflows/rocm_docker.yml",
    "chars": 627,
    "preview": "name: Build and Publish ROCm Docker Image\n\non:\n  push:\n    branches:\n      - develop\n\njobs:\n  build-and-push:\n    runs-o"
  },
  {
    "path": ".github/workflows/wheels.yml",
    "chars": 3615,
    "preview": "name: wheels\n\non:\n  pull_request:\n    paths:\n      - \".github/compute_wheel_version.py\"\n      - \".github/workflows/wheel"
  },
  {
    "path": ".github/workflows/wheels_build.yml",
    "chars": 5612,
    "preview": "name: wheels_build\n\non:\n  workflow_call:\n    inputs:\n      os:\n        required: true\n        type: string\n      python:"
  },
  {
    "path": ".github/workflows/wheels_upload_pip.yml",
    "chars": 2706,
    "preview": "name: wheels_upload_pip\n\non:\n  workflow_call:\n    secrets:\n      twine_password:\n        required: true\n    inputs:\n    "
  },
  {
    "path": ".github/workflows/wheels_upload_s3.yml",
    "chars": 2502,
    "preview": "name: wheels_upload_s3\n\non:\n  workflow_call:\n    inputs:\n      aws_role:\n        required: true\n        type: string\n   "
  },
  {
    "path": ".github/workflows/win-build.yml",
    "chars": 2934,
    "preview": "name: win-build\n\non:\n  pull_request:\n    paths:\n      - \"third_party/**\"\n      - \"xformers/csrc/**\"\n      - \".github/wor"
  },
  {
    "path": ".gitignore",
    "chars": 1046,
    "preview": "*~\n*.swp\n\n*.pyc\n*.pyo\n*.so\n\n.mypy_cache/\n*.egg-info/\n\nbuild/\ndist/\n\n# for autocomplete\ncompile_commands.json\n\n# Pytest v"
  },
  {
    "path": ".gitmodules",
    "chars": 272,
    "preview": "[submodule \"third_party/cutlass\"]\n\tpath = third_party/cutlass\n\turl = https://github.com/NVIDIA/cutlass.git\n[submodule \"t"
  },
  {
    "path": ".isort.cfg",
    "chars": 309,
    "preview": "[settings]\nknown_third_party =fvcore,hydra,input_pipeline,matplotlib,numpy,omegaconf,pandas,pl_bolts,pyre_extensions,pyt"
  },
  {
    "path": ".markdownlint.json",
    "chars": 43,
    "preview": "{\n    \"MD013\": false,\n    \"MD033\": false\n}\n"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 819,
    "preview": "exclude: 'build|stubs'\n\ndefault_language_version:\n    python: python3\n\nrepos:\n-   repo: https://github.com/pre-commit/pr"
  },
  {
    "path": ".pyre_configuration",
    "chars": 170,
    "preview": "{\n  \"ignore_all_errors\": [\"xformers/benchmarks/\"],\n  \"python_version\": \"3.9\",\n  \"source_directories\": [\n    \"stubs\",\n   "
  },
  {
    "path": "CHANGELOG.md",
    "chars": 23410,
    "preview": "# Changelog\nAll notable changes to this project will be documented in this file.\n\nThe format is based on [Keep a Changel"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 3535,
    "preview": "# Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 4395,
    "preview": "# Contributing to the xFormers repo\n\nWe want to make contributing to this project as easy and transparent as\npossible.\n\n"
  },
  {
    "path": "LICENSE",
    "chars": 1610,
    "preview": "From xFormers:\n\nCopyright (c) Facebook, Inc. and its affiliates\n\n\n===\n\nBSD 3-Clause License\n\nRedistribution and use in s"
  },
  {
    "path": "MANIFEST.in",
    "chars": 252,
    "preview": "include LICENSE\ninclude requirements.txt\ninclude version.txt\n\nrecursive-include xformers/csrc *\nrecursive-include third_"
  },
  {
    "path": "README.md",
    "chars": 6442,
    "preview": "<img src=\"./docs/assets/logo.png\" width=800>\n\n[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg"
  },
  {
    "path": "docs/Makefile",
    "chars": 685,
    "preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
  },
  {
    "path": "docs/requirements.txt",
    "chars": 204,
    "preview": "recommonmark==0.5.0\ndocutils==0.17.1\nsphinx==5.0.0\ngit+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_s"
  },
  {
    "path": "docs/source/2d_attention_patterns.ipynb",
    "chars": 925624,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"metadata\": {},\n   \"source\": [\n    \"# Creating complex sparsity pattern"
  },
  {
    "path": "docs/source/_static/css/customize.css",
    "chars": 546,
    "preview": "/* Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. */\n/*\n * some extra css to make markdown look s"
  },
  {
    "path": "docs/source/_templates/layout.html",
    "chars": 13379,
    "preview": "{# TEMPLATE VAR SETTINGS #}\n{%- set url_root = pathto('', 1) %}\n{%- if url_root == '#' %}{% set url_root = '' %}{% endif"
  },
  {
    "path": "docs/source/_templates/theme_variables.jinja",
    "chars": 912,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "docs/source/components/index.rst",
    "chars": 66,
    "preview": "API Reference\n=============\n\n.. toctree::\n   :maxdepth: 2\n\n   ops\n"
  },
  {
    "path": "docs/source/components/ops.rst",
    "chars": 1522,
    "preview": "xFormers optimized operators\n============================================================\n\nMemory-efficient attention\n--"
  },
  {
    "path": "docs/source/conf.py",
    "chars": 4995,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "docs/source/index.rst",
    "chars": 464,
    "preview": "\nWelcome to xFormers's documentation!\n=====================================\n\n*xFormers* is a PyTorch based library which"
  },
  {
    "path": "docs/source/swin_transformer.ipynb",
    "chars": 71697,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"markdown\",\n   \"id\": \"e69e9896-4be5-4706-9b49-cb772d02e8d4\",\n   \"metadata\": {},\n   \"so"
  },
  {
    "path": "docs/source/what_is_xformers.rst",
    "chars": 1203,
    "preview": "What is xFormers?\n====================\n\nFlexible Transformers, defined by interoperable and optimized building blocks.\n\n"
  },
  {
    "path": "examples/llama_inference/README.md",
    "chars": 1068,
    "preview": "# Llama inference\n\nThis example showcases how to use xformers kernels and cuda graphs to generate efficiently from large"
  },
  {
    "path": "examples/llama_inference/generate.py",
    "chars": 9037,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "examples/llama_inference/model.py",
    "chars": 11498,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "examples/llama_inference/mp_utils.py",
    "chars": 3084,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "examples/llama_inference/requirements.txt",
    "chars": 44,
    "preview": "sentencepiece\ntorch>=2.2.0\nxformers>=0.0.22\n"
  },
  {
    "path": "examples/llama_inference/sample_utils.py",
    "chars": 1083,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "examples/llama_inference/stats.py",
    "chars": 1450,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "examples/llama_inference/tokenizer.py",
    "chars": 2046,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "pyproject.toml",
    "chars": 292,
    "preview": "[build-system]\n# XXX: If your project needs other packages to build properly, add them to this list.\nrequires = [\"setupt"
  },
  {
    "path": "requirements-benchmark.txt",
    "chars": 285,
    "preview": "# Get core deps\n-r requirements-test.txt\n\n# Example requirement, can be anything that pip knows\n# install with `pip inst"
  },
  {
    "path": "requirements-test.txt",
    "chars": 469,
    "preview": "# Get core deps.\n-r requirements.txt\n\n\n# Tools for static checking.\nblack==26.3.1\nstdlibs==2024.1.28\nufmt==2.8.0\nusort=="
  },
  {
    "path": "requirements.txt",
    "chars": 160,
    "preview": "# Example requirement, can be anything that pip knows\n# install with `pip install -r requirements.txt`, and make sure th"
  },
  {
    "path": "setup.cfg",
    "chars": 58,
    "preview": "[flake8]\nmax-line-length = 140\nextend-ignore = E203, W503\n"
  },
  {
    "path": "setup.py",
    "chars": 18029,
    "preview": "#!/usr/bin/env python3\n\n# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is "
  },
  {
    "path": "stubs/fvcore/nn.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/matplotlib/pyplot.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/numpy/__init__.pyi",
    "chars": 3352,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/pandas.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/recommonmark/transform.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/seaborn.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/sklearn/model_selection.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/submitit.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/tensorflow.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/__init__.pyi",
    "chars": 72067,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/autograd/__init__.pyi",
    "chars": 836,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/autograd/profiler.pyi",
    "chars": 427,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/cuda/__init__.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/fft/__init__.pyi",
    "chars": 824,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/hub.pyi",
    "chars": 278,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/linalg/__init__.pyi",
    "chars": 1739,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/nn/__init__.pyi",
    "chars": 8485,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/nn/functional/__init__.pyi",
    "chars": 1113,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/nn/functional.pyi",
    "chars": 1113,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/nn/init.pyi",
    "chars": 1053,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/nn/utils/__init__.pyi",
    "chars": 672,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/onnx.pyi",
    "chars": 198,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/ops.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/optim/__init__.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/profiler/__init__.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/random/__init__.pyi",
    "chars": 288,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/sparse/__init__.pyi",
    "chars": 735,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/utils/data.pyi",
    "chars": 240,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch/utils/model_zoo.pyi",
    "chars": 251,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/torch_stub_tests.py",
    "chars": 73713,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/tqdm.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/triton/__init__.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/triton/language.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "stubs/triton/ops/blocksparse.pyi",
    "chars": 257,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/__init__.py",
    "chars": 198,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/multiprocessing_utils.py",
    "chars": 7802,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/readme_test_on_rocm.txt",
    "chars": 337,
    "preview": "\n   1. #> pip install -e ./\n\n   2. verify testing for generic fmha inference on ROCM\n\n      #> pytest tests/test_mem_eff"
  },
  {
    "path": "tests/test_attention_patterns.py",
    "chars": 7750,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_checkpoint.py",
    "chars": 12003,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_fmha_flop_formula.py",
    "chars": 5203,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_fmha_merge_attentions.py",
    "chars": 24564,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_fwbw_overlap.py",
    "chars": 5330,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_indexing.py",
    "chars": 3818,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_mem_eff_attention.py",
    "chars": 104565,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_multiprocessing_utils.py",
    "chars": 1845,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_profiler.py",
    "chars": 6974,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_rmsnorm.py",
    "chars": 3541,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_rope_padded.py",
    "chars": 11198,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_seqpar.py",
    "chars": 9384,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_sequence_parallel_fused_ops.py",
    "chars": 7594,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_sparse_tensors.py",
    "chars": 7788,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_sparsity24.py",
    "chars": 38350,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_splitk_reference.py",
    "chars": 7227,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_tiled_matmul.py",
    "chars": 6071,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_tree_attention.py",
    "chars": 24948,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_triton_varargs.py",
    "chars": 5280,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/test_unbind.py",
    "chars": 3205,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "tests/utils.py",
    "chars": 11538,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "version.txt",
    "chars": 7,
    "preview": "0.0.35\n"
  },
  {
    "path": "xformers/__init__.py",
    "chars": 1710,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/_cpp_lib.py",
    "chars": 4958,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/_deprecation_warning.py",
    "chars": 456,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/attn_bias_utils.py",
    "chars": 19787,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/__init__.py",
    "chars": 198,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_attn_decoding.py",
    "chars": 14403,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_indexing.py",
    "chars": 5288,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_mem_eff_attention.py",
    "chars": 10101,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_merge_attentions.py",
    "chars": 3042,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_sequence_parallel_fused.py",
    "chars": 14454,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_sp24.py",
    "chars": 4861,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/benchmark_tiled_matmul.py",
    "chars": 3446,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/benchmarks/readme_benchmark_on_rocm.txt",
    "chars": 466,
    "preview": "\n\n    1. #> pip install -e ./\n\n    2. Benchmark for generic fmha inference on ROCM\n\n       #> python xformers/benchmarks"
  },
  {
    "path": "xformers/benchmarks/utils.py",
    "chars": 24509,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/checkpoint.py",
    "chars": 20337,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/components/attention/attention_patterns.py",
    "chars": 8990,
    "preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
  },
  {
    "path": "xformers/csrc/attention/attention.cpp",
    "chars": 1340,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_decoder/CMakeLists.txt",
    "chars": 3329,
    "preview": "cmake_minimum_required(VERSION 3.26)\n\nproject(FMHADecoderMain LANGUAGES CXX HIP)\n\nmessage(\"CMAKE_CXX_COMPILER: ${CMAKE_C"
  },
  {
    "path": "xformers/csrc/attention/hip_decoder/attention_forward_splitk.cpp",
    "chars": 10323,
    "preview": "#include <ATen/Dispatch.h>\n#include <ATen/Functions.h>\n#include <ATen/Tensor.h>\n#include <c10/cuda/CUDAStream.h>\n#includ"
  },
  {
    "path": "xformers/csrc/attention/hip_decoder/ck_tile_attention_forward_decoder_splitk.h",
    "chars": 16164,
    "preview": "#pragma once\n\n#include <ck_tile/core.hpp>\n\n#include \"ck_tile_attention_inner_product.h\"\n\nnamespace {\n\ntemplate <typename"
  },
  {
    "path": "xformers/csrc/attention/hip_decoder/ck_tile_attention_inner_product.h",
    "chars": 4441,
    "preview": "/*\n * Copyright (c) 2023-2025, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/GENERATE_INSTANCES.md",
    "chars": 1598,
    "preview": "\n# Instances generator\n\n  The instances generator is a simple python tool used to generate several hundred of instances "
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/attention_backward_generic_ck_tiled.cpp",
    "chars": 22239,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/attention_ck_rand_uniform.cpp",
    "chars": 2840,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/attention_forward_generic_ck_tiled.cpp",
    "chars": 18548,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_fmha_test.cpp",
    "chars": 710,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_fmha_util.h",
    "chars": 3935,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_bool_switch.h",
    "chars": 3494,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward.h",
    "chars": 13869,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward_bf16.cpp",
    "chars": 1817,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_backward_fp16.cpp",
    "chars": 1817,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward.h",
    "chars": 2466,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_bf16.cpp",
    "chars": 1366,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_dispatch.h",
    "chars": 6848,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_fp16.cpp",
    "chars": 1366,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_splitkv_dispatch.h",
    "chars": 14402,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_forward_splitkv_smallq_dispatch.h",
    "chars": 14398,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer.h",
    "chars": 2481,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_bf16.cpp",
    "chars": 1319,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_dispatch.h",
    "chars": 8994,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_fp16.cpp",
    "chars": 1319,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_splitkv_dispatch.h",
    "chars": 15028,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_batched_infer_splitkv_smallq_dispatch.h",
    "chars": 19054,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_bwd_setting.h",
    "chars": 8851,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_setting.h",
    "chars": 5815,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_selector.h",
    "chars": 2912,
    "preview": "/*\n * Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_setting.h",
    "chars": 5929,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_smallq_selector.h",
    "chars": 728,
    "preview": "/*\n * Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_splitkv_smallq_setting.h",
    "chars": 4544,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_fwd_type_config.h",
    "chars": 1727,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward.h",
    "chars": 13031,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward_bf16.cpp",
    "chars": 1817,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_backward_fp16.cpp",
    "chars": 1817,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward.h",
    "chars": 2536,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_bf16.cpp",
    "chars": 1366,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_dispatch.h",
    "chars": 6261,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_fp16.cpp",
    "chars": 1366,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_splitkv_dispatch.h",
    "chars": 13177,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_forward_splitkv_smallq_dispatch.h",
    "chars": 13148,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer.h",
    "chars": 2551,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_bf16.cpp",
    "chars": 1319,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_dispatch.h",
    "chars": 8569,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_fp16.cpp",
    "chars": 1319,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_splitkv_dispatch.h",
    "chars": 14342,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_grouped_infer_splitkv_smallq_dispatch.h",
    "chars": 18311,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_num_kv_split_switch.h",
    "chars": 1049,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_params.h",
    "chars": 6366,
    "preview": "/*\n * Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_fmha_seqlen_q_switch.h",
    "chars": 848,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_headdim_switch.h",
    "chars": 5039,
    "preview": "/*\n * Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/ck_tiled_rand_uniform_kernel.h",
    "chars": 12389,
    "preview": "// SPDX-License-Identifier: MIT\n// Copyright (c) 2018-2024, Advanced Micro Devices, Inc. All rights reserved.\n\n#pragma o"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/generate_instances.py",
    "chars": 15165,
    "preview": "# noqa: C801\n# Copyright (c) 2023-2024, Advanced Micro Devices, Inc. All rights reserved.\n#\n# This source code is licens"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/instances/fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_128.cpp",
    "chars": 635,
    "preview": "\n/*\n  Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/instances/fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_256.cpp",
    "chars": 635,
    "preview": "\n/*\n  Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/instances/fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_32.cpp",
    "chars": 634,
    "preview": "\n/*\n  Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  },
  {
    "path": "xformers/csrc/attention/hip_fmha/instances/fmha_batched_backward_bf16_has_mask_has_bias_has_biasgrad_has_dropout_maxk_64.cpp",
    "chars": 634,
    "preview": "\n/*\n  Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.\n *\n * This source code is licensed under the"
  }
]

// ... and 705 more files (download for full content)

About this extraction

This page contains the full source code of the facebookresearch/xformers GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 905 files (3.3 MB), approximately 939.6k tokens, and a symbol index with 2093 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo