gitextract_5vrtmrhk/

├── .gitignore
├── LICENSE
├── README.md
├── install_dependency.sh
├── native_sparse_attention/
│   ├── __init__.py
│   ├── infer/
│   │   ├── __init__.py
│   │   ├── inference_func.py
│   │   └── nsa_inference.py
│   ├── model/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── toy_llama.py
│   │   └── toy_nsa_llama.py
│   ├── module/
│   │   ├── __init__.py
│   │   ├── kv_cache.py
│   │   ├── native_sparse_attention.py
│   │   ├── rope.py
│   │   └── self_attention.py
│   └── ops/
│       ├── README.md
│       ├── __init__.py
│       ├── torch/
│       │   ├── __init__.py
│       │   ├── compress_key_value.py
│       │   ├── compressed_attention.py
│       │   ├── compressed_attention_decode.py
│       │   └── topk_sparse_attention.py
│       └── triton/
│           ├── __init__.py
│           ├── compressed_attention.py
│           ├── flash_attention.py
│           ├── flash_attention_decode.py
│           ├── linear_compress.py
│           ├── topk_sparse_attention.py
│           ├── topk_sparse_attention_decode.py
│           ├── utils.py
│           └── weighted_pool.py
├── setup.py
└── test/
    ├── test_compress_key_value.py
    ├── test_compressed_attention.py
    ├── test_flash_attention.py
    ├── test_kv_cache.py
    ├── test_linear_compress.py
    ├── test_nsa_infer.py
    ├── test_nsa_model.py
    ├── test_nsa_module.py
    ├── test_rope.py
    └── test_topk_sparse_attention.py