Full Code of leonardodalinky/fpsample for AI

main 4124a21dc664 cached
20 files
173.5 KB
45.7k tokens
218 symbols
1 requests
Download .txt
Repository: leonardodalinky/fpsample
Branch: main
Commit: 4124a21dc664
Files: 20
Total size: 173.5 KB

Directory structure:
gitextract_4aj034v5/

├── .github/
│   └── workflows/
│       ├── release.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── bench/
│   └── test_bench.py
├── pyproject.toml
└── src/
    ├── _ext/
    │   ├── Interval.h
    │   ├── KDLineTree.h
    │   ├── KDNode.h
    │   ├── KDTree.h
    │   ├── KDTreeBase.h
    │   ├── Point.h
    │   └── utils.h
    ├── fpsample/
    │   └── __init__.py
    ├── lib.cpp
    ├── nanoflann.hpp
    └── wrapper.hpp

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/release.yml
================================================
name: release-upload

on:
  push:
    tags:
      - 'v*.*.*'

permissions:
  id-token: write
  contents: read

jobs:
  sdist:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Install build dependencies
        run: pip install build
      - name: Build sdist
        run: python -m build --sdist
      - name: Publish package distributions to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          repository-url: https://upload.pypi.org/legacy/
          user: __token__
          password: ${{ secrets.PYPI_TOKEN }}


================================================
FILE: .github/workflows/test.yml
================================================
name: test-upload

on:
  push:
    tags:
      - 'test*'

permissions:
  id-token: write
  contents: read

jobs:
  sdist:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.11'
      - name: Install build dependencies
        run: pip install build
      - name: Build sdist
        run: python -m build --sdist
      - name: Publish package distributions to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          repository-url: https://test.pypi.org/legacy/
          user: __token__
          password: ${{ secrets.TEST_PYPI_TOKEN }}


================================================
FILE: .gitignore
================================================
/target

# Byte-compiled / optimized / DLL files
__pycache__/
.pytest_cache/
*.py[cod]

# C extensions
*.so

# Distribution / packaging
.Python
.venv/
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
include/
man/
venv/
*.egg-info/
.installed.cfg
*.egg

# Installer logs
pip-log.txt
pip-delete-this-directory.txt
pip-selfcheck.json

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml

# Translations
*.mo

# Mr Developer
.mr.developer.cfg
.project
.pydevproject

# Rope
.ropeproject

# Django stuff:
*.log
*.pot

.DS_Store

# Sphinx documentation
docs/_build/

# PyCharm
.idea/

# VSCode
.vscode/

# Pyenv
.python-version

# tmp files
tmp*
*.out

# tmp benchmark
.benchmarks/

# uv
uv.lock


================================================
FILE: .pre-commit-config.yaml
================================================
exclude: "point-e/.*"
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
  rev: v4.4.0
  hooks:
  - id: trailing-whitespace
  - id: end-of-file-fixer
  - id: name-tests-test
  - id: requirements-txt-fixer
- repo: https://github.com/pycqa/isort
  rev: 5.12.0
  hooks:
    - id: isort
      args: ["--profile", "black", "--line-length=100", "--python-version=39"]
- repo: https://github.com/psf/black
  rev: 23.7.0
  hooks:
    - id: black
      args: ["--line-length=100", "--target-version=py39"]


================================================
FILE: CMakeLists.txt
================================================
# Require CMake 3.15+ (matching scikit-build-core) Use new versions of all
# policies up to CMake 4.0
cmake_minimum_required(VERSION 3.15...4.0)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

project(
  ${SKBUILD_PROJECT_NAME}
  VERSION ${SKBUILD_PROJECT_VERSION}
  LANGUAGES CXX)

find_package(pybind11 CONFIG REQUIRED)

pybind11_add_module(_fpsample MODULE src/lib.cpp)
target_link_libraries(_fpsample PRIVATE pybind11::headers)

target_compile_definitions(_fpsample PRIVATE VERSION_INFO=${PROJECT_VERSION})
install(TARGETS _fpsample DESTINATION fpsample)


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2023 AyajiLin

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# fpsample
[![pypi package version badge](https://img.shields.io/pypi/v/fpsample)](https://pypi.org/project/fpsample/)
![python version badge](https://img.shields.io/badge/python-%3E%3D3.7-blue)
[![license badge](https://img.shields.io/github/license/leonardodalinky/fpsample)](https://github.com/leonardodalinky/fpsample/blob/main/LICENSE)
[![star badge](https://img.shields.io/github/stars/leonardodalinky/fpsample?style=social)](https://github.com/leonardodalinky/fpsample)

Python efficient farthest point sampling (FPS) library, 100x faster than `numpy` implementation.

`fpsample` is coupled with `numpy` and built upon Rust pyo3 bindings. This library aims at achieving the best performance for FPS in single-threaded CPU environment.

🎉 **PyTorch version with native multithreading, batch ops, Autograd and CUDA supports is in [pytorch_fpsample](https://github.com/leonardodalinky/pytorch_fpsample).**

## Installation

### Install from PyPI

`numpy>=1.16.0` is required. Install `fpsample` using pip:

```shell
pip install -U fpsample
```

If you encounter any installation errors, please make an issue and try to compile from source.

### Build from source

Since Version 1.0.0, fpsample is built using [Scikit-build-core + pybind11](https://github.com/pybind/scikit_build_example). Therefore, you can build and install this library from source just with pip.

C++ compiler must support C++17.

CMake>=3.15 is required.

```shell
git clone https://github.com/leonardodalinky/fpsample.git
cd fpsample
pip install .
```

#### Direct porting of `QuickFPS`

See `src/warpper.hpp` and `src/_ext/` for details.

## Usage

```python
import fpsample
import numpy as np

# Generate random point cloud
pc = np.random.rand(4096, 3)
## sample 1024 points

# Vanilla FPS
fps_samples_idx = fpsample.fps_sampling(pc, 1024)

# FPS + NPDU
fps_npdu_samples_idx = fpsample.fps_npdu_sampling(pc, 1024)
## or specify the windows size
fps_npdu_samples_idx = fpsample.fps_npdu_sampling(pc, 1024, w=64)

# FPS + NPDU + KDTree
fps_npdu_kdtree_samples_idx = fpsample.fps_npdu_kdtree_sampling(pc, 1024)
## or specify the windows size
fps_npdu_kdtree_samples_idx = fpsample.fps_npdu_kdtree_sampling(pc, 1024, w=64)

# KDTree-based FPS
kdtree_fps_samples_idx = fpsample.bucket_fps_kdtree_sampling(pc, 1024)

# NOTE: Probably the best
# Bucket-based FPS or QuickFPS
kdline_fps_samples_idx = fpsample.bucket_fps_kdline_sampling(pc, 1024, h=3)
```

* `FPS`: Vanilla farthest point sampling. Implemented in Rust. Achieve the same performance as `numpy`.
* `FPS + NPDU`: Farthest point sampling with nearest-point-distance-updating (NPDU) heuristic strategy. 5x~10x faster than vanilla FPS. **Require dimensional locality and give sub-optimal answers**.
* `FPS + NPDU + KDTree`: Farthest point sampling with NPDU heuristic strategy and KDTree. 3x~8x faster than vanilla FPS. Slightly slower than `FPS + NPDU`. But **DOES NOT** require dimensional locality.
* `KDTree-based FPS`: A farthest point sampling algorithm based on KDTree. About 40~50x faster than vanilla FPS.
* `Bucket-based FPS` or `QuickFPS`: A bucket-based farthest point sampling algorithm. About 80~100x faster than vanilla FPS. Require an additional hyperparameter for the height of the KDTree. In practice, `h=3` or `h=5` is recommended for small data, `h=7` is recommended for medium data, and `h=9` for extremely large data.

> **NOTE**: 🔥 In most cases, `Bucket-based FPS` is the best choice, with proper hyperparameter setting.

### Determinism

For deterministic results, fix the first sampled point index by passing the `start_idx` parameter.
```python
kdline_fps_samples_idx = fpsample.bucket_fps_kdline_sampling(pc, 1024, h=3, start_idx=0)
```

**OR** set the random seed before calling the function.
```python
np.random.seed(42)
```

## Development

Install dependencies:
```shell
uv sync
```

## Performance
Setup:
  - CPU: Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz
  - RAM: 512 GiB
  - SYSTEM: Ubuntu 20.04.6 LTS

Run benchmark:
```shell
py.test bench/ --benchmark-columns=mean,stddev --benchmark-sort=mean
```

Results:
```
------------------------------------------------------------------------------- benchmark '1024 of 4096': 8 tests -------------------------------------------------------------------------------
Name (time in ms)                    Min                Max               Mean            StdDev             Median               IQR            Outliers       OPS            Rounds  Iterations
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_bucket_fps_kdline_4k_h5      1.7892 (1.0)       2.2010 (1.01)      1.8301 (1.0)      0.0273 (1.20)      1.8255 (1.0)      0.0182 (1.0)         53;31  546.4295 (1.0)         491           1
test_bucket_fps_kdline_4k_h3      2.0452 (1.14)      2.1714 (1.0)       2.0862 (1.14)     0.0227 (1.0)       2.0817 (1.14)     0.0220 (1.21)        99;34  479.3375 (0.88)        436           1
test_bucket_fps_kdline_4k_h7      3.0348 (1.70)      3.2599 (1.50)      3.1082 (1.70)     0.0391 (1.72)      3.1005 (1.70)     0.0568 (3.12)        107;4  321.7322 (0.59)        306           1
test_fps_npdu_4k                  3.9024 (2.18)      4.3511 (2.00)      3.9514 (2.16)     0.0395 (1.74)      3.9457 (2.16)     0.0283 (1.55)        29;14  253.0768 (0.46)        245           1
test_bucket_fps_kdtree_4k         6.5697 (3.67)      7.3863 (3.40)      6.7525 (3.69)     0.1150 (5.07)      6.7240 (3.68)     0.0926 (5.08)        26;11  148.0923 (0.27)        126           1
test_fps_npdu_kdtree_4k           9.4114 (5.26)      9.7609 (4.50)      9.4979 (5.19)     0.0613 (2.70)      9.4807 (5.19)     0.0362 (1.99)        14;13  105.2866 (0.19)         97           1
test_vanilla_fps_4k              12.3702 (6.91)     12.8378 (5.91)     12.5875 (6.88)     0.1840 (8.11)     12.5024 (6.85)     0.3712 (20.38)        40;0   79.4438 (0.15)         77           1
test_vanilla_fps_4k_multiple     12.4058 (6.93)     13.2153 (6.09)     12.4706 (6.81)     0.1639 (7.22)     12.4139 (6.80)     0.0268 (1.47)         7;12   80.1886 (0.15)         80           1
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

--------------------------------------------------------------------------------- benchmark '4096 of 50000': 7 tests --------------------------------------------------------------------------------
Name (time in ms)                      Min                 Max                Mean            StdDev              Median               IQR            Outliers      OPS            Rounds  Iterations
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_bucket_fps_kdline_50k_h7      24.7285 (1.0)       25.2791 (1.0)       25.0179 (1.0)      0.1237 (1.0)       25.0139 (1.0)      0.1184 (1.0)          10;3  39.9714 (1.0)          40           1
test_bucket_fps_kdline_50k_h5      31.7191 (1.28)      32.9791 (1.30)      32.1800 (1.29)     0.3308 (2.68)      32.0584 (1.28)     0.3701 (3.13)          6;3  31.0752 (0.78)         31           1
test_bucket_fps_kdline_50k_h3      66.7806 (2.70)      67.5433 (2.67)      67.1429 (2.68)     0.2265 (1.83)      67.0898 (2.68)     0.2654 (2.24)          5;0  14.8936 (0.37)         15           1
test_bucket_fps_kdtree_50k         82.7767 (3.35)      85.9608 (3.40)      84.5684 (3.38)     0.9575 (7.74)      84.5108 (3.38)     1.2572 (10.62)         3;0  11.8248 (0.30)         11           1
test_fps_npdu_50k                 179.8323 (7.27)     181.9054 (7.20)     180.5911 (7.22)     0.7326 (5.92)     180.4329 (7.21)     0.7742 (6.54)          2;0   5.5374 (0.14)          6           1
test_fps_npdu_kdtree_50k          253.4978 (10.25)    255.2537 (10.10)    254.2810 (10.16)    0.7408 (5.99)     253.9220 (10.15)    1.1832 (9.99)          2;0   3.9327 (0.10)          5           1
test_vanilla_fps_50k              592.6405 (23.97)    594.1419 (23.50)    593.2049 (23.71)    0.5970 (4.83)     593.2359 (23.72)    0.7815 (6.60)          1;0   1.6858 (0.04)          5           1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------- benchmark '50000 of 100000': 7 tests ---------------------------------------------------------------------------------------
Name (time in ms)                          Min                    Max                   Mean              StdDev                 Median                 IQR            Outliers     OPS            Rounds  Iterations
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_bucket_fps_kdline_100k_h7        232.5230 (1.0)         234.3285 (1.0)         233.3726 (1.0)        0.7632 (1.49)        233.5817 (1.0)        1.2734 (1.66)          2;0  4.2850 (1.0)           5           1
test_bucket_fps_kdtree_100k           381.3098 (1.64)        388.0433 (1.66)        384.0891 (1.65)       3.5171 (6.84)        382.9141 (1.64)       5.0501 (6.56)          1;0  2.6036 (0.61)          3           1
test_bucket_fps_kdline_100k_h9        409.0352 (1.76)        420.7832 (1.80)        414.3774 (1.78)       5.9458 (11.57)       413.3137 (1.77)       8.8110 (11.45)         1;0  2.4133 (0.56)          3           1
test_bucket_fps_kdline_100k_h5        410.1506 (1.76)        411.1764 (1.75)        410.6451 (1.76)       0.5139 (1.0)         410.6085 (1.76)       0.7693 (1.0)           1;0  2.4352 (0.57)          3           1
test_fps_npdu_kdtree_100k           3,134.7921 (13.48)     3,137.0544 (13.39)     3,135.7064 (13.44)      1.1919 (2.32)      3,135.2726 (13.42)      1.6967 (2.21)          1;0  0.3189 (0.07)          3           1
test_fps_npdu_100k                  4,325.4402 (18.60)     4,341.7247 (18.53)     4,333.3156 (18.57)      8.1554 (15.87)     4,332.7821 (18.55)     12.2134 (15.87)         1;0  0.2308 (0.05)          3           1
test_vanilla_fps_100k              14,460.2331 (62.19)    14,664.4396 (62.58)    14,561.1574 (62.39)    102.1237 (198.73)   14,558.7995 (62.33)    153.1549 (199.07)        1;0  0.0687 (0.02)          3           1
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

```

## Reference
The nearest-point-distance-updating (NPDU) heuristic strategy is proposed in the following paper:
```bibtex
@INPROCEEDINGS{Li2022adjust,
  author={Li, Jingtao and Zhou, Jian and Xiong, Yan and Chen, Xing and Chakrabarti, Chaitali},
  booktitle={2022 IEEE Workshop on Signal Processing Systems (SiPS)},
  title={An Adjustable Farthest Point Sampling Method for Approximately-sorted Point Cloud Data},
  year={2022},
  volume={},
  number={},
  pages={1-6},
  doi={10.1109/SiPS55645.2022.9919246}
}
```

Bucket-based farthest point sampling (QuickFPS) is proposed in the following paper. The implementation is based on the author's [Repo](https://github.com/hanm2019/bucket-based_farthest-point-sampling_CPU). To port the implementation to other C++ program, check [this](https://github.com/leonardodalinky/fpsample/tree/main/src/bucket_fps/_ext) for details.
```bibtex
@article{han2023quickfps,
  title={QuickFPS: Architecture and Algorithm Co-Design for Farthest Point Sampling in Large-Scale Point Clouds},
  author={Han, Meng and Wang, Liang and Xiao, Limin and Zhang, Hao and Zhang, Chenhao and Xu, Xiangrong and Zhu, Jianfeng},
  journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
  year={2023},
  publisher={IEEE}
}
```

The KDTree-based NN search algorithm implemented by [nanoflann](https://github.com/jlblancoc/nanoflann)

Thanks to the authors for their great work.


================================================
FILE: bench/test_bench.py
================================================
import numpy as np
import pytest

import fpsample

TEST_SEED = 42
TEST_BENCHMARK_SETTINGS = {
    "4k": {"group": "1024 of 4096", "warmup": False},
    "50k": {"group": "4096 of 50000", "warmup": False},
    "100k": {"group": "50000 of 100000", "warmup": False, "min_rounds": 3},
}
TEST_CASE_SETTINGS = {
    "4k": (4096, 1024, 3),
    "50k": (50000, 4096, 3),
    "100k": (100_000, 50_000, 3),
}


def create_sample_data(n_points: int, n_dim: int = 3, seed: int = TEST_SEED):
    np.random.seed(seed)
    return np.random.rand(n_points, n_dim)


####################
#                  #
#    4k setting    #
#                  #
####################
@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_vanilla_fps_4k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_sampling, pc, n_samples)

@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_vanilla_fps_4k_multiple(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_sampling, pc, n_samples, start_idx=[0, 23, 64, 128])


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_fps_npdu_4k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_fps_npdu_kdtree_4k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_bucket_fps_kdtree_4k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_bucket_fps_kdline_4k_h3(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 3)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_bucket_fps_kdline_4k_h5(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 5)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["4k"])
def test_bucket_fps_kdline_4k_h7(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["4k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 7)


#####################
#                   #
#    50k setting    #
#                   #
#####################
@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_vanilla_fps_50k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_fps_npdu_50k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_fps_npdu_kdtree_50k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_bucket_fps_kdtree_50k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_bucket_fps_kdline_50k_h3(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 3)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_bucket_fps_kdline_50k_h5(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 5)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["50k"])
def test_bucket_fps_kdline_50k_h7(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["50k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 7)


######################
#                    #
#    100k setting    #
#                    #
######################
@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_vanilla_fps_100k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_fps_npdu_100k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_fps_npdu_kdtree_100k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.fps_npdu_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_bucket_fps_kdtree_100k(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdtree_sampling, pc, n_samples)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_bucket_fps_kdline_100k_h5(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 5)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_bucket_fps_kdline_100k_h7(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 7)


@pytest.mark.benchmark(**TEST_BENCHMARK_SETTINGS["100k"])
def test_bucket_fps_kdline_100k_h9(benchmark):
    n_points, n_samples, n_dim = TEST_CASE_SETTINGS["100k"]
    pc = create_sample_data(n_points, n_dim)
    benchmark(fpsample.bucket_fps_kdline_sampling, pc, n_samples, 9)


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["scikit-build-core>=0.11", "pybind11>=2.6"]
build-backend = "scikit_build_core.build"

[project]
name = "fpsample"
version = "1.0.2"
authors = [
    { name = "Leonard Lin", email = "leonard.keilin@gmail.com" },
    { name = "guch8017", email = "guch8017@gmail.com" }
]
description = "An efficient CPU implementation of farthest point sampling (FPS) for point clouds."
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.8"
classifiers = [
    "Programming Language :: Python :: Implementation :: CPython",
    "Programming Language :: Python :: Implementation :: PyPy",
    "Programming Language :: Python :: 3 :: Only",
]
dependencies = [
    "numpy>=1.16.0",
    "pybind11>=2.6",
]

[project.urls]
Repository = "https://github.com/leonardodalinky/fpsample"
Tracker = "https://github.com/leonardodalinky/fpsample/issues"

[dependency-groups]
test = ["pytest"]
dev = [
    { include-group = "test" },
    "jupyterlab>=4.3.8",
    "pytest-benchmark>=4.0.0",
]


[tool.scikit-build]
minimum-version = "build-system.requires"


================================================
FILE: src/_ext/Interval.h
================================================
//
// Created by 韩萌 on 2022/6/14.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPLING_INTERVAL_HPP
#define KD_TREE_BASED_FARTHEST_POINT_SAMPLING_INTERVAL_HPP

namespace quickfps {
template <typename S> class Interval {
  public:
    S low, high;
    Interval() : low(0), high(0){};
    Interval(S low, S high) : low(low), high(high){};
    Interval(const Interval &o) : low(o.low), high(o.high){};
};
} // namespace quickfps

#endif // KD_TREE_BASED_FARTHEST_POINT_SAMPLING_INTERVAL_HPP


================================================
FILE: src/_ext/KDLineTree.h
================================================
//
// Created by hanm on 22-6-15.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef FPS_CPU_KDLINETREE_H
#define FPS_CPU_KDLINETREE_H

#include <limits>
#include <vector>

#include "KDTreeBase.h"

namespace quickfps {

template <typename T, size_t DIM, typename S = T>
class KDLineTree : public KDTreeBase<T, DIM, S> {
  public:
    using typename KDTreeBase<T, DIM, S>::_Point;
    using typename KDTreeBase<T, DIM, S>::_Points;
    using typename KDTreeBase<T, DIM, S>::NodePtr;

    KDLineTree(_Points data, size_t pointSize, size_t treeHigh,
               _Points samplePoints);
    ~KDLineTree();

    std::vector<NodePtr> KDNode_list;

    size_t high_;

    _Point max_point() override;

    void update_distance(const _Point &ref_point) override;

    void sample(size_t sample_num) override;

    bool leftNode(size_t high, size_t count) const override {
        return high == this->high_ || count == 1;
    };

    void addNode(NodePtr p) override;
};

template <typename T, size_t DIM, typename S>
KDLineTree<T, DIM, S>::KDLineTree(_Points data, size_t pointSize,
                                  size_t treeHigh, _Points samplePoints)
    : KDTreeBase<T, DIM, S>(data, pointSize, samplePoints), high_(treeHigh) {
    KDNode_list.clear();
}

template <typename T, size_t DIM, typename S>
KDLineTree<T, DIM, S>::~KDLineTree() {
    KDNode_list.clear();
}

template <typename T, size_t DIM, typename S>
typename KDLineTree<T, DIM, S>::_Point KDLineTree<T, DIM, S>::max_point() {
    _Point tmpPoint;
    S max_distance = std::numeric_limits<S>::lowest();
    for (const auto &bucket : KDNode_list) {
        if (bucket->max_point.dis > max_distance) {
            max_distance = bucket->max_point.dis;
            tmpPoint = bucket->max_point;
        }
    }
    return tmpPoint;
}

template <typename T, size_t DIM, typename S>
void KDLineTree<T, DIM, S>::update_distance(const _Point &ref_point) {
    for (const auto &bucket : KDNode_list) {
        bucket->send_delay_point(ref_point);
        bucket->update_distance();
    }
}

template <typename T, size_t DIM, typename S>
void KDLineTree<T, DIM, S>::sample(size_t sample_num) {
    _Point ref_point;
    for (size_t i = 1; i < sample_num; i++) {
        ref_point = this->max_point();
        this->sample_points[i] = ref_point;
        this->update_distance(ref_point);
    }
}

template <typename T, size_t DIM, typename S>
void KDLineTree<T, DIM, S>::addNode(NodePtr p) {
    size_t nodeIdx = KDNode_list.size();
    p->idx = nodeIdx;
    KDNode_list.push_back(p);
}

} // namespace quickfps

#endif // FPS_CPU_KDLINETREE_H


================================================
FILE: src/_ext/KDNode.h
================================================
//
// Created by 韩萌 on 2022/6/14.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDNODE_H
#define KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDNODE_H
#include <array>
#include <limits>
#include <queue>
#include <vector>

#include "Interval.h"
#include "Point.h"

namespace quickfps {

template <typename T, size_t DIM, typename S> class KDNode {
  public:
    using _Point = Point<T, DIM, S>;
    using _Points = _Point *;
    _Points points;
    size_t pointLeft, pointRight;
    size_t idx;

    std::array<Interval<T>, DIM> bboxs;
    std::vector<_Point> waitpoints;
    std::vector<_Point> delaypoints;
    _Point max_point;
    KDNode *left;
    KDNode *right;

    KDNode();

    KDNode(const KDNode &a);

    KDNode(const std::array<Interval<T>, DIM> &bboxs);

    void init(const _Point &ref);

    void updateMaxPoint(const _Point &lpoint, const _Point &rpoint) {
        if (lpoint.dis > rpoint.dis)
            this->max_point = lpoint;
        else
            this->max_point = rpoint;
    }

    S bound_distance(const _Point &ref_point) const;

    void send_delay_point(const _Point &point) {
        this->waitpoints.push_back(point);
    }

    void update_distance();

    void reset();

    size_t size() const;
};

template <typename T, size_t DIM, typename S>
KDNode<T, DIM, S>::KDNode()
    : points(nullptr), pointLeft(0), pointRight(0), left(nullptr),
      right(nullptr) {}

template <typename T, size_t DIM, typename S>
KDNode<T, DIM, S>::KDNode(const std::array<Interval<T>, DIM> &other_bboxs)
    : points(nullptr), pointLeft(0), pointRight(0), left(nullptr),
      right(nullptr) {
    std::copy(other_bboxs.cbegin(), other_bboxs.cend(), this->bboxs.begin());
}

template <typename T, size_t DIM, typename S>
KDNode<T, DIM, S>::KDNode(const KDNode &a)
    : points(a.points), pointLeft(a.pointLeft), pointRight(a.pointRight),
      left(a.left), right(a.right), idx(a.idx) {
    std::copy(a.bboxs.cbegin(), a.bboxs.cend(), this->bboxs.begin());
    std::copy(a.waitpoints.cbegin(), a.waitpoints.cend(),
              this->waitpoints.begin());
    std::copy(a.delaypoints.cbegin(), a.delaypoints.cend(),
              this->delaypoints.begin());
}

template <typename T, size_t DIM, typename S>
void KDNode<T, DIM, S>::init(const _Point &ref) {
    waitpoints.clear();
    delaypoints.clear();
    if (this->left && this->right) {
        this->left->init(ref);
        this->right->init(ref);
        updateMaxPoint(this->left->max_point, this->right->max_point);
    } else {
        S dis;
        S maxdis = std::numeric_limits<S>::lowest();
        for (size_t i = pointLeft; i < pointRight; i++) {
            dis = points[i].updatedistance(ref);
            if (dis > maxdis) {
                maxdis = dis;
                max_point = points[i];
            }
        }
    }
}

template <typename T, size_t DIM, typename S>
S KDNode<T, DIM, S>::bound_distance(const _Point &ref_point) const {
    S bound_dis(0);
    S dim_distance;
    for (size_t cur_dim = 0; cur_dim < DIM; cur_dim++) {
        dim_distance = 0;
        if (ref_point[cur_dim] > this->bboxs[cur_dim].high)
            dim_distance = ref_point[cur_dim] - this->bboxs[cur_dim].high;
        else if (ref_point[cur_dim] < this->bboxs[cur_dim].low)
            dim_distance = this->bboxs[cur_dim].low - ref_point[cur_dim];
        bound_dis += powi(dim_distance, 2);
    }
    return bound_dis;
}

template <typename T, size_t DIM, typename S>
void KDNode<T, DIM, S>::update_distance() {
    for (const auto &ref_point : this->waitpoints) {
        S lastmax_distance = this->max_point.dis;
        S cur_distance = this->max_point.distance(ref_point);
        // cur_distance >
        // lastmax_distance意味着当前Node的max_point不会进行更新
        if (cur_distance > lastmax_distance) {
            S boundary_distance = bound_distance(ref_point);
            if (boundary_distance < lastmax_distance)
                this->delaypoints.push_back(ref_point);
        } else {
            if (this->right && this->left) {
                if (!delaypoints.empty()) {
                    for (const auto &delay_point : delaypoints) {
                        this->left->send_delay_point(delay_point);
                        this->right->send_delay_point(delay_point);
                    }
                    delaypoints.clear();
                }
                this->left->send_delay_point(ref_point);
                this->left->update_distance();

                this->right->send_delay_point(ref_point);
                this->right->update_distance();

                updateMaxPoint(this->left->max_point, this->right->max_point);
            } else {
                S dis;
                S maxdis;
                this->delaypoints.push_back(ref_point);
                for (const auto &delay_point : delaypoints) {
                    maxdis = std::numeric_limits<S>::lowest();
                    for (size_t i = pointLeft; i < pointRight; i++) {
                        dis = points[i].updatedistance(delay_point);
                        if (dis > maxdis) {
                            maxdis = dis;
                            max_point = points[i];
                        }
                    }
                }
                this->delaypoints.clear();
            }
        }
    }
    this->waitpoints.clear();
}

template <typename T, size_t DIM, typename S> void KDNode<T, DIM, S>::reset() {
    for (size_t i = pointLeft; i < pointRight; i++) {
        points[i].reset();
    }
    this->waitpoints.clear();
    this->delaypoints.clear();
    this->max_point.reset();
    if (this->left && this->right) {
        this->left->reset();
        this->right->reset();
    }
}

template <typename T, size_t DIM, typename S>
size_t KDNode<T, DIM, S>::size() const {
    if (this->left && this->right)
        return this->left->size() + this->right->size();
    return (pointRight - pointLeft);
}

} // namespace quickfps

#endif // KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDNODE_H


================================================
FILE: src/_ext/KDTree.h
================================================
//
// Created by hanm on 22-6-15.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef FPS_CPU_KDTREE_H
#define FPS_CPU_KDTREE_H

#include "KDTreeBase.h"

namespace quickfps {

template <typename T, size_t DIM, typename S = T>
class KDTree : public KDTreeBase<T, DIM, S> {
  public:
    using typename KDTreeBase<T, DIM, S>::_Point;
    using typename KDTreeBase<T, DIM, S>::_Points;
    using typename KDTreeBase<T, DIM, S>::NodePtr;
    explicit KDTree(_Points data, size_t pointSize, _Points samplePoints);

    _Point max_point() override { return this->root_->max_point; };

    void update_distance(const _Point &ref_point) override;

    void sample(size_t sample_num) override;

    bool leftNode(size_t, size_t count) const override { return count == 1; };

    void addNode(NodePtr) override{};
};

template <typename T, size_t DIM, typename S>
KDTree<T, DIM, S>::KDTree(_Points data, size_t pointSize, _Points samplePoints)
    : KDTreeBase<T, DIM, S>(data, pointSize, samplePoints) {}

template <typename T, size_t DIM, typename S>
void KDTree<T, DIM, S>::update_distance(const _Point &ref_point) {
    this->root_->send_delay_point(ref_point);
    this->root_->update_distance();
}

template <typename T, size_t DIM, typename S>
void KDTree<T, DIM, S>::sample(size_t sample_num) {
    _Point ref_point;
    for (size_t i = 1; i < sample_num; i++) {
        ref_point = this->max_point();
        this->sample_points[i] = ref_point;
        this->update_distance(ref_point);
    }
}

} // namespace quickfps

#endif // FPS_CPU_KDTREE_H


================================================
FILE: src/_ext/KDTreeBase.h
================================================
//
// Created by 韩萌 on 2022/6/14.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDTREE_H
#define KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDTREE_H

#include "KDNode.h"
#include "Point.h"
#include <algorithm>
#include <array>
#include <numeric>

namespace quickfps {

template <typename T, size_t DIM, typename S> class KDTreeBase {
  public:
    using _Point = Point<T, DIM, S>;
    using _Points = _Point *;
    using NodePtr = KDNode<T, DIM, S> *;
    using _Interval = Interval<T>;

    size_t pointSize;
    _Points sample_points;
    NodePtr root_;
    _Points points_;

  public:
    KDTreeBase(_Points data, size_t pointSize, _Points samplePoints);

    ~KDTreeBase();

    void buildKDtree();

    NodePtr get_root() const { return this->root_; };

    void init(const _Point &ref);

    virtual _Point max_point() = 0;

    virtual void sample(size_t sample_num) = 0;

  protected:
    void deleteNode(NodePtr node_p);
    virtual void addNode(NodePtr p) = 0;
    virtual bool leftNode(size_t high, size_t count) const = 0;
    virtual void update_distance(const _Point &ref_point) = 0;

    NodePtr divideTree(ssize_t left, ssize_t right,
                       const std::array<_Interval, DIM> &bboxs,
                       size_t curr_high);

    size_t planeSplit(ssize_t left, ssize_t right, size_t split_dim,
                      T split_val);

    T qSelectMedian(size_t dim, size_t left, size_t right);
    static size_t findSplitDim(const std::array<_Interval, DIM> &bboxs);
    inline std::array<_Interval, DIM> computeBoundingBox(size_t left,
                                                         size_t right);
};

template <typename T, size_t DIM, typename S>
KDTreeBase<T, DIM, S>::KDTreeBase(_Points data, size_t pointSize,
                                  _Points samplePoints)
    : pointSize(pointSize), sample_points(samplePoints), root_(nullptr),
      points_(data) {}

template <typename T, size_t DIM, typename S>
KDTreeBase<T, DIM, S>::~KDTreeBase() {
    if (root_ != nullptr)
        deleteNode(root_);
}

template <typename T, size_t DIM, typename S>
void KDTreeBase<T, DIM, S>::deleteNode(NodePtr node_p) {
    if (node_p->left)
        deleteNode(node_p->left);
    if (node_p->right)
        deleteNode(node_p->right);
    delete node_p;
}

template <typename T, size_t DIM, typename S>
void KDTreeBase<T, DIM, S>::buildKDtree() {
    size_t left = 0;
    size_t right = pointSize;
    std::array<_Interval, DIM> bboxs = this->computeBoundingBox(left, right);
    this->root_ = divideTree(left, right, bboxs, 0);
}

template <typename T, size_t DIM, typename S>
typename KDTreeBase<T, DIM, S>::NodePtr
KDTreeBase<T, DIM, S>::divideTree(ssize_t left, ssize_t right,
                                  const std::array<_Interval, DIM> &bboxs,
                                  size_t curr_high) {
    NodePtr node = new KDNode<T, DIM, S>(bboxs);

    ssize_t count = right - left;
    if (this->leftNode(curr_high, count)) {
        node->pointLeft = left;
        node->pointRight = right;
        node->points = this->points_;
        this->addNode(node);
        return node;
    } else {
        size_t split_dim = this->findSplitDim(bboxs);
        T split_val = this->qSelectMedian(split_dim, left, right);

        size_t split_delta = planeSplit(left, right, split_dim, split_val);

        std::array<_Interval, DIM> bbox_cur =
            this->computeBoundingBox(left, left + split_delta);
        node->left =
            this->divideTree(left, left + split_delta, bbox_cur, curr_high + 1);
        bbox_cur = this->computeBoundingBox(left + split_delta, right);
        node->right = this->divideTree(left + split_delta, right, bbox_cur,
                                       curr_high + 1);
        return node;
    }
}

template <typename T, size_t DIM, typename S>
size_t KDTreeBase<T, DIM, S>::planeSplit(ssize_t left, ssize_t right,
                                         size_t split_dim, T split_val) {
    ssize_t start = left;
    ssize_t end = right - 1;

    for (;;) {
        while (start <= end && points_[start].pos[split_dim] < split_val)
            ++start;
        while (start <= end && points_[end].pos[split_dim] >= split_val)
            --end;

        if (start > end)
            break;
        std::swap(points_[start], points_[end]);
        ++start;
        --end;
    }

    ssize_t lim1 = start - left;
    if (start == left)
        lim1 = 1;
    if (start == right)
        lim1 = (right - left - 1);

    return static_cast<ssize_t>(lim1);
}

template <typename T, size_t DIM, typename S>
T KDTreeBase<T, DIM, S>::qSelectMedian(size_t dim, size_t left, size_t right) {
    T sum = std::accumulate(this->points_ + left, this->points_ + right, 0.0,
                            [dim](const T &acc, const _Point &point) {
                                return acc + point.pos[dim];
                            });
    return sum / (right - left);
}

template <typename T, size_t DIM, typename S>
size_t
KDTreeBase<T, DIM, S>::findSplitDim(const std::array<_Interval, DIM> &bboxs) {
    T min_, max_;
    T span = 0;
    size_t best_dim = 0;

    for (size_t cur_dim = 0; cur_dim < DIM; cur_dim++) {
        min_ = bboxs[cur_dim].low;
        max_ = bboxs[cur_dim].high;
        T cur_span = (max_ - min_);

        if (cur_span > span) {
            best_dim = cur_dim;
            span = cur_span;
        }
    }

    return best_dim;
}

template <typename T, size_t DIM, typename S>
inline std::array<Interval<T>, DIM>
KDTreeBase<T, DIM, S>::computeBoundingBox(size_t left, size_t right) {
    T min_vals[DIM];
    T max_vals[DIM];
    std::fill(min_vals, min_vals + DIM, std::numeric_limits<T>::max());
    std::fill(max_vals, max_vals + DIM, std::numeric_limits<T>::lowest());

    for (size_t i = left; i < right; ++i) {
        const _Point &pos = points_[i];

        for (size_t cur_dim = 0; cur_dim < DIM; cur_dim++) {
            T val = pos[cur_dim];
            min_vals[cur_dim] = std::min(min_vals[cur_dim], val);
            max_vals[cur_dim] = std::max(max_vals[cur_dim], val);
        }
    }

    std::array<_Interval, DIM> bboxs;

    for (size_t cur_dim = 0; cur_dim < DIM; cur_dim++) {
        bboxs[cur_dim].low = min_vals[cur_dim];
        bboxs[cur_dim].high = max_vals[cur_dim];
    }

    return bboxs;
}

template <typename T, size_t DIM, typename S>
void KDTreeBase<T, DIM, S>::init(const _Point &ref) {
    this->sample_points[0] = ref;
    this->root_->init(ref);
}

} // namespace quickfps

#endif // KD_TREE_BASED_FARTHEST_POINT_SAMPLING_KDTREE_H


================================================
FILE: src/_ext/Point.h
================================================
//
// Created by 韩萌 on 2022/6/14.
// Refactored by AyajiLin on 2023/9/16.
//

#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPLING_POINT_H
#define KD_TREE_BASED_FARTHEST_POINT_SAMPLING_POINT_H

#include "cmath"
#include "utils.h"
#include <algorithm>
#include <limits>

namespace quickfps {

template <typename T, size_t DIM, typename S = T> class Point {
  public:
    T pos[DIM]; // x, y, z
    S dis;
    size_t id;

    Point();
    Point(const T pos[DIM], size_t id);
    Point(const T pos[DIM], size_t id, S dis);
    Point(const Point &obj);
    ~Point(){};

    bool operator<(const Point &aii) const;

    constexpr T operator[](size_t i) const { return pos[i]; }

    Point &operator=(const Point &obj) {
        std::copy(obj.pos, obj.pos + DIM, this->pos);
        this->dis = obj.dis;
        this->id = obj.id;
        return *this;
    }

    constexpr S distance(const Point &b) { return _distance(b, DIM); }

    void reset();

    S updatedistance(const Point &ref);

    S updateDistanceAndCount(const Point &ref, size_t &count);

  private:
    constexpr S _distance(const Point &b, size_t dim_left) {
        return (dim_left == 0)
                   ? 0
                   : powi((this->pos[dim_left - 1] - b.pos[dim_left - 1]), 2) +
                         _distance(b, dim_left - 1);
    }
};

template <typename T, size_t DIM, typename S>
Point<T, DIM, S>::Point() : dis(std::numeric_limits<S>::max()), id(0) {
    std::fill(pos, pos + DIM, 0);
}

template <typename T, size_t DIM, typename S>
Point<T, DIM, S>::Point(const T pos[DIM], size_t id)
    : dis(std::numeric_limits<S>::max()), id(id) {
    std::copy(pos, pos + DIM, this->pos);
}

template <typename T, size_t DIM, typename S>
Point<T, DIM, S>::Point(const T pos[DIM], size_t id, S dis) : dis(dis), id(id) {
    std::copy(pos, pos + DIM, this->pos);
}

template <typename T, size_t DIM, typename S>
Point<T, DIM, S>::Point(const Point &obj) : dis(obj.dis), id(obj.id) {
    std::copy(obj.pos, obj.pos + DIM, this->pos);
}

template <typename T, size_t DIM, typename S>
bool Point<T, DIM, S>::operator<(const Point &aii) const {
    return dis < aii.dis;
}

template <typename T, size_t DIM, typename S>
S Point<T, DIM, S>::updatedistance(const Point &ref) {
    this->dis = std::min(this->dis, this->distance(ref));
    return this->dis;
}

template <typename T, size_t DIM, typename S>
S Point<T, DIM, S>::updateDistanceAndCount(const Point &ref, size_t &count) {
    S tempDistance = this->distance(ref);
    if (tempDistance < this->dis) {
        this->dis = tempDistance;
        count++;
    }
    return this->dis;
}

template <typename T, size_t DIM, typename S> void Point<T, DIM, S>::reset() {
    this->dis = std::numeric_limits<S>::max();
}

} // namespace quickfps

#endif // KD_TREE_BASED_FARTHEST_POINT_SAMPLING_POINT_H


================================================
FILE: src/_ext/utils.h
================================================
// Refactored by AyajiLin on 2023/9/16.

/* functional  */
#ifndef KD_TREE_UTILS_HPP
#define KD_TREE_UTILS_HPP
#include <cstddef>
#include <type_traits>

namespace quickfps {
using ssize_t = std::make_signed_t<size_t>;

template <typename T>
inline constexpr T powi(const T base, const size_t exponent) {
    // (parentheses not required in next line)
    return (exponent == 0) ? 1 : (base * pow(base, exponent - 1));
}
} // namespace quickfps

#endif // KD_TREE_UTILS_HPP


================================================
FILE: src/fpsample/__init__.py
================================================
from __future__ import annotations

import warnings
from typing import List, Optional, Union

import numpy as np

from ._fpsample import (
    __doc__,
    __version__,
    _bucket_fps_kdline_sampling,
    _bucket_fps_kdtree_sampling,
    _fps_npdu_kdtree_sampling,
    _fps_npdu_sampling,
    _fps_sampling,
)


def get_start_idx(n_pts: int, start_idx: Optional[Union[int, List[int]]]) -> Union[int, np.ndarray]:
    # Random pick a start or use the given start indices
    if start_idx is None:
        start_idx = np.random.randint(low=0, high=n_pts)
    elif isinstance(start_idx, int):
        # just use the int start_idx
        start_idx = start_idx
    elif isinstance(start_idx, list):
        # convert to numpy array for rust
        # TODO: maybe better performance with fortran array?
        start_idx = np.array(start_idx, dtype=np.uint64)
    else:
        raise ValueError("start_idx should be None, int or list")
    return start_idx


def fps_sampling(
    pc: np.ndarray, n_samples: int, start_idx: Optional[Union[int, List[int]]] = None
) -> np.ndarray:
    """
    Vanilla FPS sampling.

    Args:
        pc (np.ndarray): The input point cloud of shape (n_pts, D).
        n_samples (int): Number of samples.
        start_idx (int or list[int], default=None): The starting index of sampling. If set to None, it will be randomly picked.
            If multiple start indices are given, they are used as a start set for the FPS and will all be present in the final samples.
    Returns:
        np.ndarray: The selected indices of shape (n_samples,).
    """
    assert n_samples >= 1, "n_samples should be >= 1"
    assert pc.ndim == 2
    n_pts, _ = pc.shape
    assert n_pts >= n_samples, "n_pts should be >= n_samples"
    if isinstance(start_idx, int):
        assert (
            start_idx is None or 0 <= start_idx < n_pts
        ), "start_idx should be None or 0 <= start_idx < n_pts"
    if isinstance(start_idx, list):
        assert len(start_idx) <= n_samples, "len(start_idx) should be <= n_samples"
        for idx in start_idx:
            assert 0 <= idx < n_pts, "start_idx should be None or 0 <= start_idx < n_pts"
    # best performance with fortran array
    pc = np.asfortranarray(pc, dtype=np.float32)
    # Random pick a start if not given
    start_idx = get_start_idx(n_pts, start_idx)
    return _fps_sampling(pc, n_samples, start_idx)


def fps_npdu_sampling(
    pc: np.ndarray,
    n_samples: int,
    w: Optional[int] = None,
    start_idx: Optional[Union[int, List[int]]] = None,
) -> np.ndarray:
    """
    FPS sampling with nearest-point-distance-updating (NPDU) heuristic strategy.
    **Requires dimensional locality for best samples**.

    Args:
        pc (np.ndarray): The input point cloud of shape (n_pts, D).
        n_samples (int): Number of samples.
        w (int, default=None): Windows size of local heuristic search. If set to None, it will be set to `n_pts / n_samples * 16`.
        start_idx (int or list[int], default=None): The starting index of sampling. If set to None, it will be randomly picked.
            If multiple start indices are given, they are used as a start set for the FPS and will all be present in the final samples.
    Returns:
        np.ndarray: The selected indices of shape (n_samples,).
    """
    assert n_samples >= 1, "n_samples should be >= 1"
    assert pc.ndim == 2
    n_pts, _ = pc.shape
    assert n_pts >= n_samples, "n_pts should be >= n_samples"
    assert (
        start_idx is None or 0 <= start_idx < n_pts
    ), "start_idx should be None or 0 <= start_idx < n_pts"
    if isinstance(start_idx, list):
        assert len(start_idx) <= n_samples, "len(start_idx) should be <= n_samples"
    pc = np.ascontiguousarray(pc, dtype=np.float32)
    w = w or int(n_pts / n_samples * 16)
    if w >= n_pts - 1:
        warnings.warn(f"k is too large, set to {n_pts - 1}")
        w = n_pts - 1
    # Random pick a start if not given
    start_idx = get_start_idx(n_pts, start_idx)
    return _fps_npdu_sampling(pc, n_samples, w, start_idx)


def fps_npdu_kdtree_sampling(
    pc: np.ndarray,
    n_samples: int,
    w: Optional[int] = None,
    start_idx: Optional[Union[int, List[int]]] = None,
) -> np.ndarray:
    """
    FPS sampling with nearest-point-distance-updating (NPDU) heuristic strategy.
    Using KDTree to eliminate the need of dimensional locality.
    Slower than `fps_npdu_sampling` but more robust.

    Args:
        pc (np.ndarray): The input point cloud of shape (n_pts, D).
        n_samples (int): Number of samples.
        w (int, default=None): Windows size of local heuristic search. If set to None, it will be set to `n_pts / n_samples * 16`.
        start_idx (int or list[int], default=None): The starting index of sampling. If set to None, it will be randomly picked.
            If multiple start indices are given, they are used as a start set for the FPS and will all be present in the final samples.
    Returns:
        np.ndarray: The selected indices of shape (n_samples,).
    """
    assert n_samples >= 1, "n_samples should be >= 1"
    assert pc.ndim == 2
    n_pts, _ = pc.shape
    assert n_pts >= n_samples, "n_pts should be >= n_samples"
    assert (
        start_idx is None or 0 <= start_idx < n_pts
    ), "start_idx should be None or 0 <= start_idx < n_pts"
    if isinstance(start_idx, list):
        assert len(start_idx) <= n_samples, "len(start_idx) should be <= n_samples"
    pc = np.ascontiguousarray(pc, dtype=np.float32)
    w = w or int(n_pts / n_samples * 16)
    if w >= n_pts:
        warnings.warn(f"k is too large, set to {n_pts}")
        w = n_pts
    # Random pick a start if not given
    start_idx = get_start_idx(n_pts, start_idx)
    return _fps_npdu_kdtree_sampling(pc, n_samples, w, start_idx)


def bucket_fps_kdtree_sampling(
    pc: np.ndarray, n_samples: int, start_idx: Optional[Union[int, List[int]]] = None
) -> np.ndarray:
    """
    Bucket-based FPS sampling using KDTree. Also called "QuickFPS" in the paper.

    Args:
        pc (np.ndarray): The input point cloud of shape (n_pts, D).
        n_samples (int): Number of samples.
        start_idx (int or list[int], default=None): The starting index of sampling. If set to None, it will be randomly picked.
            If multiple start indices are given, they are used as a start set for the FPS and will all be present in the final samples.
    Returns:
        np.ndarray: The selected indices of shape (n_samples,).
    """
    assert n_samples >= 1, "n_samples should be >= 1"
    assert pc.ndim == 2
    n_pts, _ = pc.shape
    assert n_pts >= n_samples, "n_pts should be >= n_samples"
    assert (
        start_idx is None or 0 <= start_idx < n_pts
    ), "start_idx should be None or 0 <= start_idx < n_pts"
    if isinstance(start_idx, list):
        assert len(start_idx) <= n_samples, "len(start_idx) should be <= n_samples"
    pc = np.ascontiguousarray(pc, dtype=np.float32)
    # Random pick a start if not given
    start_idx = get_start_idx(n_pts, start_idx)
    return _bucket_fps_kdtree_sampling(pc, n_samples, start_idx)


def bucket_fps_kdline_sampling(
    pc: np.ndarray, n_samples: int, h: int, start_idx: Optional[Union[int, List[int]]] = None
) -> np.ndarray:
    """
    Bucket-based FPS sampling using KDTree, with multiple points in each bucket. Also called "QuickFPS" in the paper.

    Args:
        pc (np.ndarray): The input point cloud of shape (n_pts, D).
        n_samples (int): Number of samples.
        h (int, default=None): Height of KDTree. The bucket size is `2**h`.
            According to the paper, for small workload, h=3 is enough;
            for medium workload, h=5 or 7 is enough; for large workload, h=9 is enough.
        start_idx (int or list[int], default=None): The starting index of sampling. If set to None, it will be randomly picked.
            If multiple start indices are given, they are used as a start set for the FPS and will all be present in the final samples.

    Returns:
        np.ndarray: The selected indices of shape (n_samples,).
    """
    assert n_samples >= 1, "n_samples should be >= 1"
    assert pc.ndim == 2
    n_pts, _ = pc.shape
    assert n_pts >= n_samples, "n_pts should be >= n_samples"
    assert h >= 1, "h should be >= 1"
    assert 2**h <= n_pts, "2**h should be <= n_pts"
    assert (
        start_idx is None or 0 <= start_idx < n_pts
    ), "start_idx should be None or 0 <= start_idx < n_pts"
    if isinstance(start_idx, list):
        assert len(start_idx) <= n_samples, "len(start_idx) should be <= n_samples"
    pc = np.ascontiguousarray(pc, dtype=np.float32)
    # Random pick a start if not given
    start_idx = get_start_idx(n_pts, start_idx)
    return _bucket_fps_kdline_sampling(pc, n_samples, h, start_idx)


__all__ = [
    "__doc__",
    "__version__",
    "fps_sampling",
    "fps_npdu_sampling",
    "fps_npdu_kdtree_sampling",
    "bucket_fps_kdtree_sampling",
    "bucket_fps_kdline_sampling",
]


================================================
FILE: src/lib.cpp
================================================
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include "nanoflann.hpp"
#include "wrapper.hpp"

#if defined(_MSC_VER)
    #include <BaseTsd.h>
    typedef SSIZE_T ssize_t;
#endif

#define STRINGIFY(x) #x
#define MACRO_STRINGIFY(x) STRINGIFY(x)

namespace py = pybind11;

class StartIndex {
public:
    enum Type { SINGLE, ARRAY } type;
    size_t single_idx;
    py::array_t<size_t> array_idx;

    StartIndex(size_t idx) : type(SINGLE), single_idx(idx) {}
    StartIndex(py::array_t<size_t> arr) : type(ARRAY), array_idx(arr) {}
};

struct PointCloud {
    size_t N, dim;
    const float* data;  // row-major: N x dim

    inline size_t kdtree_get_point_count() const { return N; }

    inline float kdtree_distance(const float* a, const size_t b_idx, size_t /*dim_*/) const {
        const float* b = data + b_idx * dim;
        float dist = 0.0f;
        for (size_t i = 0; i < dim; i++) {
            float d = a[i] - b[i];
            dist += d * d;
        }
        return dist;
    }

    inline float kdtree_get_pt(const size_t idx, int dim_) const {
        return data[idx * dim + dim_];
    }

    template <class BBOX>
    bool kdtree_get_bbox(BBOX&) const {
        return false;
    }
};

void check_py_input(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    const StartIndex& start_idx,
    std::optional<size_t> max_dim = std::nullopt
) {
    if (points.ndim() != 2) {
        throw py::value_error(
            "points must be a 2D array, but got shape " + std::to_string(points.ndim())
        );
    }

    ssize_t P = points.shape(0);
    ssize_t C = points.shape(1);

    if (C == 0) {
        throw py::value_error("points must have at least one column");
    }

    if (max_dim.has_value() && C > max_dim.value()) {
        throw py::value_error(
            "points must have at most " + std::to_string(max_dim.value()) +
            " columns, but got " + std::to_string(C)
        );
    }

    if (n_samples > static_cast<size_t>(P)) {
        throw py::value_error(
            "n_samples must be less than the number of points: n_samples=" +
            std::to_string(n_samples) + ", P=" + std::to_string(P)
        );
    }

    if (start_idx.type == StartIndex::SINGLE) {
        if (start_idx.single_idx >= static_cast<size_t>(P)) {
            throw py::value_error(
                "start_idx must be less than the number of points: start_idx=" +
                std::to_string(start_idx.single_idx) + ", P=" + std::to_string(P)
            );
        }
    } else {
        auto arr = start_idx.array_idx.unchecked<1>();
        if (arr.shape(0) > n_samples) {
            throw py::value_error(
                "The number of start indices must be less than or equal to n_samples: " +
                std::to_string(arr.shape(0)) + ", n_samples=" + std::to_string(n_samples)
            );
        }
        for (ssize_t i = 0; i < arr.shape(0); ++i) {
            if (arr(i) >= static_cast<size_t>(P)) {
                throw py::value_error(
                    "All indices in start_idx must be less than the number of points: " +
                    std::to_string(arr(i)) + ", P=" + std::to_string(P)
                );
            }
        }
    }
}

py::array_t<size_t>
fps_sampling_multi_start_index(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    py::array_t<size_t, py::array::c_style | py::array::forcecast> start_idx)
{
    auto pts = points.unchecked<2>();   // 2D view
    auto starts = start_idx.unchecked<1>();

    ssize_t P = pts.shape(0);
    ssize_t C = pts.shape(1);

    if (P <= 0 || C <= 0) {
        throw std::runtime_error("points must be a 2D array");
    }

    size_t res_selected_idx = 0;
    bool has_prev = false;

    std::vector<float> dist_min(P, std::numeric_limits<float>::infinity());
    std::vector<size_t> selected;
    selected.reserve(n_samples);

    size_t start_counter = 0;

    while (selected.size() < n_samples) {
        if (has_prev) {
            size_t i0 = res_selected_idx;

            for (ssize_t i = 0; i < P; i++) {
                float dist = 0.0f;
                for (ssize_t j = 0; j < C; j++) {
                    float d = pts(i, j) - pts(i0, j);
                    dist += d * d;
                }
                if (dist < dist_min[i]) {
                    dist_min[i] = dist;
                }
            }

            if (start_counter < (size_t) starts.shape(0)) {
                size_t idx = starts(start_counter);
                selected.push_back(idx);
                res_selected_idx = idx;
                start_counter++;
            } else {
                size_t max_idx = 0;
                float max_val = -1.0f;

                for (ssize_t i = 0; i < P; i++) {
                    if (dist_min[i] >= max_val) {
                        max_val = dist_min[i];
                        max_idx = i;
                    }
                }

                selected.push_back(max_idx);
                res_selected_idx = max_idx;
            }

        } else {
            size_t idx = starts(start_counter);
            selected.push_back(idx);
            res_selected_idx = idx;
            start_counter++;
            has_prev = true;
        }
    }

    py::array_t<size_t> out(selected.size());
    auto out_buf = out.mutable_unchecked<1>();
    for (size_t i = 0; i < selected.size(); i++) {
        out_buf(i) = selected[i];
    }
    return out;
}

py::array_t<size_t> fps_sampling(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    size_t start_idx
) {
    auto pts = points.unchecked<2>();
    ssize_t P = pts.shape(0);
    ssize_t C = pts.shape(1);

    if (P <= 0 || C <= 0) {
        throw py::value_error("points must be a 2D array with at least one column");
    }
    if (start_idx >= static_cast<size_t>(P)) {
        throw py::value_error("start_idx out of range");
    }

    size_t res_selected_idx = start_idx;
    bool has_prev = false;
    std::vector<float> dist_min(P, std::numeric_limits<float>::infinity());

    std::vector<size_t> selected;
    selected.reserve(n_samples);

    while (selected.size() < n_samples) {
        if (has_prev) {
            size_t i0 = res_selected_idx;
            for (ssize_t i = 0; i < P; ++i) {
                float dist = 0.0f;
                for (ssize_t j = 0; j < C; ++j) {
                    float d = pts(i, j) - pts(i0, j);
                    dist += d * d;
                }
                if (dist < dist_min[i]) dist_min[i] = dist;
            }

            size_t max_idx = 0;
            float max_val = -1.0f;
            for (ssize_t i = 0; i < P; ++i) {
                if (dist_min[i] >= max_val) {
                    max_val = dist_min[i];
                    max_idx = i;
                }
            }
            selected.push_back(max_idx);
            res_selected_idx = max_idx;
        } else {
            selected.push_back(start_idx);
            res_selected_idx = start_idx;
            has_prev = true;
        }
    }

    py::array_t<size_t> out(selected.size());
    auto out_buf = out.mutable_unchecked<1>();
    for (size_t i = 0; i < selected.size(); ++i) {
        out_buf(i) = selected[i];
    }
    return out;
}

// EXPORT TO _fps_sample
py::array_t<size_t> _fps_sampling(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    py::object start_idx_obj
) {
    StartIndex start_idx = [&]() -> StartIndex {
        if (py::isinstance<py::int_>(start_idx_obj)) {
            return StartIndex(start_idx_obj.cast<size_t>());
        } else if (py::isinstance<py::array_t<size_t>>(start_idx_obj)) {
            return StartIndex(start_idx_obj.cast<py::array_t<size_t>>());
        } else {
            throw py::type_error("start_idx must be int or 1D numpy array of size_t");
        }
    }();

    check_py_input(points, n_samples, start_idx, std::nullopt);

    if (start_idx.type == StartIndex::SINGLE)
        return fps_sampling(points, n_samples, start_idx.single_idx);
    else
        return fps_sampling_multi_start_index(points, n_samples, start_idx.array_idx);
}

py::array_t<size_t> fps_npdu_sampling(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    size_t k,
    size_t start_idx
) {
    auto pts = points.unchecked<2>();
    ssize_t P = pts.shape(0);
    ssize_t C = pts.shape(1);

    if (P <= 0 || C <= 0)
        throw py::value_error("points must be a 2D array with at least one column");
    if (start_idx >= static_cast<size_t>(P))
        throw py::value_error("start_idx out of range");

    std::vector<float> dist_min(P, std::numeric_limits<float>::infinity());
    std::vector<size_t> selected;
    selected.reserve(n_samples);

    size_t res_selected_idx = start_idx;
    bool has_prev = false;

    while (selected.size() < n_samples) {
        if (has_prev) {
            ssize_t hw = static_cast<ssize_t>(k / 2);
            ssize_t start = static_cast<ssize_t>(res_selected_idx) - hw;
            ssize_t end   = static_cast<ssize_t>(res_selected_idx) + hw;
            if (start < 0) { end -= start; start = 0; }
            if (end >= P) { start = std::max(start - (end - P + 1), ssize_t(0)); end = P - 1; }

            for (ssize_t i = start; i <= end; ++i) {
                float dist = 0.0f;
                for (ssize_t j = 0; j < C; ++j) {
                    float d = pts(i, j) - pts(res_selected_idx, j);
                    dist += d*d;
                }
                if (dist < dist_min[i]) dist_min[i] = dist;
            }

            size_t max_idx = 0;
            float max_val = -1.0f;
            for (ssize_t i = 0; i < P; ++i) {
                if (dist_min[i] > max_val) { max_val = dist_min[i]; max_idx = i; }
            }
            selected.push_back(max_idx);
            res_selected_idx = max_idx;

        } else {
            for (ssize_t i = 0; i < P; ++i) {
                float dist = 0.0f;
                for (ssize_t j = 0; j < C; ++j) {
                    float d = pts(i,j) - pts(start_idx,j);
                    dist += d*d;
                }
                if (dist < dist_min[i]) dist_min[i] = dist;
            }
            selected.push_back(start_idx);
            res_selected_idx = start_idx;
            has_prev = true;
        }
    }

    py::array_t<size_t> out(selected.size());
    auto out_buf = out.mutable_unchecked<1>();
    for (size_t i = 0; i < selected.size(); ++i)
        out_buf(i) = selected[i];

    return out;
}

// EXPORT TO _fps_npdu_sample
py::array_t<size_t> fps_npdu_sampling_py(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    size_t k,
    py::object start_idx_obj
) {
    StartIndex start_idx = [&]() -> StartIndex {
        if (py::isinstance<py::int_>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<size_t>());
        else if (py::isinstance<py::array_t<size_t>>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<py::array_t<size_t>>());
        else
            throw py::type_error("start_idx must be int or 1D numpy array of size_t");
    }();

    check_py_input(points, n_samples, start_idx);

    if (start_idx.type == StartIndex::SINGLE)
        return fps_npdu_sampling(points, n_samples, k, start_idx.single_idx);
    else {
        PyErr_SetString(PyExc_NotImplementedError, "Array of start indices not implemented yet");
        throw py::error_already_set();
    }
}

// EXPORT TO _fps_npdu_kdtree_sample
py::array_t<size_t> fps_npdu_kdtree_sampling_py(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    size_t k,
    py::object start_idx_obj
) {
    StartIndex start_idx = [&]() -> StartIndex {
        if (py::isinstance<py::int_>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<size_t>());
        else if (py::isinstance<py::array_t<size_t>>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<py::array_t<size_t>>());
        else
            throw py::type_error("start_idx must be int or 1D numpy array of size_t");
    }();

    check_py_input(points, n_samples, start_idx);

    if (start_idx.type == StartIndex::ARRAY) {
        PyErr_SetString(PyExc_NotImplementedError, "Array of start indices not implemented yet");
        throw py::error_already_set();
    }

    auto pts = points.unchecked<2>();
    ssize_t P = pts.shape(0);
    ssize_t C = pts.shape(1);

    if (P <= 0 || C <= 0)
        throw py::value_error("points must be a 2D array with at least one column");

    PointCloud cloud;
    cloud.N = static_cast<size_t>(P);
    cloud.dim = static_cast<size_t>(C);
    cloud.data = points.data();

    using KDTree = nanoflann::KDTreeSingleIndexAdaptor<nanoflann::L2_Simple_Adaptor<float, PointCloud>, PointCloud, -1>;
    KDTree index(static_cast<int>(C), cloud, nanoflann::KDTreeSingleIndexAdaptorParams(10));
    index.buildIndex();

    std::vector<float> dist_min(P, std::numeric_limits<float>::infinity());
    std::vector<size_t> selected;
    selected.reserve(n_samples);

    size_t res_selected_idx = start_idx.single_idx;
    bool has_prev = false;

    size_t k_use = std::min<size_t>(k, static_cast<size_t>(P));
    std::vector<size_t> ret_indexes(k_use);
    std::vector<float> out_dists(k_use);

    while (selected.size() < n_samples) {
        if (has_prev) {
            std::vector<float> query(static_cast<size_t>(C));
            for (ssize_t d = 0; d < C; ++d) query[d] = pts(res_selected_idx, d);

            nanoflann::KNNResultSet<float> resultSet(static_cast<int>(k_use));
            resultSet.init(ret_indexes.data(), out_dists.data());
            nanoflann::SearchParameters params;
            index.findNeighbors(resultSet, query.data(), params);

            for (size_t idx_i = 0; idx_i < k_use; ++idx_i) {
                size_t nb = ret_indexes[idx_i];
                float dist = 0.0f;
                for (ssize_t d = 0; d < C; ++d) {
                    float diff = pts(nb, d) - pts(res_selected_idx, d);
                    dist += diff * diff;
                }
                if (dist < dist_min[nb]) dist_min[nb] = dist;
            }

            size_t max_idx = 0;
            float max_val = -1.0f;
            for (ssize_t i = 0; i < P; ++i) {
                if (dist_min[i] > max_val) { max_val = dist_min[i]; max_idx = i; }
            }

            selected.push_back(max_idx);
            res_selected_idx = max_idx;
        } else {
            for (ssize_t i = 0; i < P; ++i) {
                float dist = 0.0f;
                for (ssize_t j = 0; j < C; ++j) {
                    float d = pts(i,j) - pts(res_selected_idx,j);
                    dist += d*d;
                }
                if (dist < dist_min[i]) dist_min[i] = dist;
            }
            selected.push_back(res_selected_idx);
            has_prev = true;
        }
    }

    py::array_t<size_t> out(selected.size());
    auto out_buf = out.mutable_unchecked<1>();
    for (size_t i = 0; i < selected.size(); ++i) out_buf(i) = selected[i];

    return out;
}

py::array_t<size_t> bucket_fps_kdtree_sampling_py(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    py::object start_idx_obj
) {
    StartIndex start_idx = [&]() -> StartIndex {
        if (py::isinstance<py::int_>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<size_t>());
        else if (py::isinstance<py::array_t<size_t>>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<py::array_t<size_t>>());
        else
            throw py::type_error("start_idx must be int or 1D numpy array of size_t");
    }();

    if (start_idx.type == StartIndex::ARRAY) {
        PyErr_SetString(PyExc_NotImplementedError, "Array of start indices not implemented yet");
        throw py::error_already_set();
    }

    check_py_input(points, n_samples, start_idx);

    if (points.ndim() != 2) {
        throw py::value_error("points must be a 2D float32 array");
    }
    ssize_t P = points.shape(0);
    ssize_t C = points.shape(1);

    if (start_idx.single_idx >= static_cast<size_t>(P)) {
        throw py::value_error("start_idx out of range");
    }
    if (n_samples == 0 || n_samples > static_cast<size_t>(P)) {
        throw py::value_error("n_samples must be in [1, num_points]");
    }

    auto buf = points.unchecked<2>();

    py::array_t<size_t> out(n_samples);
    size_t* out_ptr = static_cast<size_t*>(out.mutable_data());

    int ret = bucket_fps_kdtree(
        buf.data(0,0),                       // raw_data
        static_cast<size_t>(P),              // n_points
        static_cast<size_t>(C),              // dim
        n_samples,                           // n_samples
        start_idx.single_idx,                // start_idx
        out_ptr                              // output buffer
    );

    if (ret != 0) {
        throw std::runtime_error("bucket_fps_kdtree failed with error code " + std::to_string(ret));
    }

    return out;
}

py::array_t<size_t> bucket_fps_kdline_sampling_py(
    py::array_t<float, py::array::c_style | py::array::forcecast> points,
    size_t n_samples,
    size_t height,
    py::object start_idx_obj
) {
    StartIndex start_idx = [&]() -> StartIndex {
        if (py::isinstance<py::int_>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<size_t>());
        else if (py::isinstance<py::array_t<size_t>>(start_idx_obj))
            return StartIndex(start_idx_obj.cast<py::array_t<size_t>>());
        else
            throw py::type_error("start_idx must be int or 1D numpy array of size_t");
    }();

    if (start_idx.type == StartIndex::ARRAY) {
        PyErr_SetString(PyExc_NotImplementedError, "Array of start indices not implemented yet");
        throw py::error_already_set();
    }

    if (points.ndim() != 2) {
        throw py::value_error("points must be a 2D float32 array");
    }

    ssize_t P = points.shape(0);
    ssize_t C = points.shape(1);

    if (start_idx.single_idx >= static_cast<size_t>(P)) {
        throw py::value_error("start_idx out of range");
    }
    if (n_samples == 0 || n_samples > static_cast<size_t>(P)) {
        throw py::value_error("n_samples must be in [1, num_points]");
    }
    if (height == 0) {
        throw py::value_error("height must be >= 1");
    }

    auto buf = points.unchecked<2>();

    py::array_t<size_t> out(n_samples);
    size_t* out_ptr = static_cast<size_t*>(out.mutable_data());

    int ret = bucket_fps_kdline(
        buf.data(0,0),                        // raw_data
        static_cast<size_t>(P),               // n_points
        static_cast<size_t>(C),               // dim
        n_samples,                            // n_samples
        start_idx.single_idx,                 // start_idx
        height,                               // window height
        out_ptr                               // output buffer
    );

    if (ret != 0) {
        throw std::runtime_error("bucket_fps_kdline failed with error code " + std::to_string(ret));
    }

    return out;
}

PYBIND11_MODULE(_fpsample, m, py::mod_gil_not_used(), py::multiple_interpreters::per_interpreter_gil()) {
    m.doc() = R"pbdoc(
        Python efficient farthest point sampling (FPS) library
        -----------------------

        .. currentmodule:: fpsample

        .. autosummary::
           :toctree: _generate

           _fps_sampling
           _fps_npdu_sampling
           _fps_npdu_kdtree_sampling
           _bucket_fps_kdtree_sampling
           _bucket_fps_kdline_sampling
    )pbdoc";

    m.def("_fps_sampling", &_fps_sampling, R"pbdoc(
            Farthest Point Sampling (FPS)
            Args:
                points (np.ndarray[float32, 2D]): N x C point array.
                n_samples (int): number of samples to pick.
                start_idx (int or np.ndarray[int32, 1D]): initial index or indices to start FPS.
            Returns:
                np.ndarray[int32]: sampled point indices.
    )pbdoc");

    m.def("_fps_npdu_sampling", &fps_npdu_sampling_py, R"pbdoc(
            FPS with Nearest Point Distance Update
            Args:
                points (np.ndarray[float32, 2D]): N x C point array.
                n_samples (int): number of samples to pick.
                k (int): number of neighbors for local update.
                start_idx (int): initial index to start FPS.
            Returns:
                np.ndarray[int32]: sampled point indices.
    )pbdoc");

    m.def("_fps_npdu_kdtree_sampling", &fps_npdu_kdtree_sampling_py, R"pbdoc(
            FPS with Nearest Point Distance Update using KD-tree acceleration
            Args:
                points (np.ndarray[float32, 2D]): N x C point array.
                n_samples (int): number of samples to pick.
                k (int): number of neighbors for local update.
                start_idx (int): initial index to start FPS.
            Returns:
                np.ndarray[int32]: sampled point indices.
    )pbdoc");

    m.def("_bucket_fps_kdtree_sampling",
      &bucket_fps_kdtree_sampling_py,
      R"pbdoc(
          Bucket FPS sampling using KD-tree acceleration.
          Args:
              points (np.ndarray[float32, 2D]): N x C point array.
              n_samples (int): number of samples to pick.
              start_idx (int): initial index to start FPS.
          Returns:
              np.ndarray[int32]: sampled point indices.
      )pbdoc");

m.def("_bucket_fps_kdline_sampling",
      &bucket_fps_kdline_sampling_py,
      R"pbdoc(
          Bucket FPS sampling using KD-line local window update.
          Args:
              points (np.ndarray[float32, 2D]): N x C point array.
              n_samples (int): number of samples to pick.
              height (int): window size around selected point.
              start_idx (int): first index.
          Returns:
              np.ndarray[int32]: sampled point indices.
      )pbdoc");

#ifdef VERSION_INFO
    m.attr("__version__") = MACRO_STRINGIFY(VERSION_INFO);
#else
    m.attr("__version__") = "dev";
#endif
}


================================================
FILE: src/nanoflann.hpp
================================================
/***********************************************************************
 * Software License Agreement (BSD License)
 *
 * Copyright 2008-2009  Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
 * Copyright 2008-2009  David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
 * Copyright 2011-2025  Jose Luis Blanco (joseluisblancoc@gmail.com).
 *   All rights reserved.
 *
 * THE BSD LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *************************************************************************/

/** \mainpage nanoflann C++ API documentation
 *  nanoflann is a C++ header-only library for building KD-Trees, mostly
 *  optimized for 2D or 3D point clouds.
 *
 *  nanoflann does not require compiling or installing, just an
 *  #include <nanoflann.hpp> in your code.
 *
 *  See:
 *   - [Online README](https://github.com/jlblancoc/nanoflann)
 *   - [C++ API documentation](https://jlblancoc.github.io/nanoflann/)
 */

#pragma once

#include <algorithm>
#include <array>
#include <atomic>
#include <cassert>
#include <cmath>  // for abs()
#include <cstdint>
#include <cstdlib>  // for abs()
#include <functional>  // std::reference_wrapper
#include <future>
#include <istream>
#include <limits>  // std::numeric_limits
#include <ostream>
#include <stack>
#include <stdexcept>
#include <unordered_set>
#include <vector>

/** Library version: 0xMmP (M=Major,m=minor,P=patch) */
#define NANOFLANN_VERSION 0x180

// Avoid conflicting declaration of min/max macros in Windows headers
#if !defined(NOMINMAX) && \
    (defined(_WIN32) || defined(_WIN32_) || defined(WIN32) || defined(_WIN64))
#define NOMINMAX
#ifdef max
#undef max
#undef min
#endif
#endif
// Avoid conflicts with X11 headers
#ifdef None
#undef None
#endif

namespace nanoflann
{
/** @addtogroup nanoflann_grp nanoflann C++ library for KD-trees
 *  @{ */

/** the PI constant (required to avoid MSVC missing symbols) */
template <typename T>
T pi_const()
{
    return static_cast<T>(3.14159265358979323846);
}

/**
 * Traits if object is resizable and assignable (typically has a resize | assign
 * method)
 */
template <typename T, typename = int>
struct has_resize : std::false_type
{
};

template <typename T>
struct has_resize<T, decltype((void)std::declval<T>().resize(1), 0)>
    : std::true_type
{
};

template <typename T, typename = int>
struct has_assign : std::false_type
{
};

template <typename T>
struct has_assign<T, decltype((void)std::declval<T>().assign(1, 0), 0)>
    : std::true_type
{
};

/**
 * Free function to resize a resizable object
 */
template <typename Container>
inline typename std::enable_if<has_resize<Container>::value, void>::type resize(
    Container& c, const size_t nElements)
{
    c.resize(nElements);
}

/**
 * Free function that has no effects on non resizable containers (e.g.
 * std::array) It raises an exception if the expected size does not match
 */
template <typename Container>
inline typename std::enable_if<!has_resize<Container>::value, void>::type
    resize(Container& c, const size_t nElements)
{
    if (nElements != c.size())
        throw std::logic_error("Try to change the size of a std::array.");
}

/**
 * Free function to assign to a container
 */
template <typename Container, typename T>
inline typename std::enable_if<has_assign<Container>::value, void>::type assign(
    Container& c, const size_t nElements, const T& value)
{
    c.assign(nElements, value);
}

/**
 * Free function to assign to a std::array
 */
template <typename Container, typename T>
inline typename std::enable_if<!has_assign<Container>::value, void>::type
    assign(Container& c, const size_t nElements, const T& value)
{
    for (size_t i = 0; i < nElements; i++) c[i] = value;
}

/** operator "<" for std::sort() */
struct IndexDist_Sorter
{
    /** PairType will be typically: ResultItem<IndexType,DistanceType> */
    template <typename PairType>
    bool operator()(const PairType& p1, const PairType& p2) const
    {
        return p1.second < p2.second;
    }
};

/**
 * Each result element in RadiusResultSet. Note that distances and indices
 * are named `first` and `second` to keep backward-compatibility with the
 * `std::pair<>` type used in the past. In contrast, this structure is ensured
 * to be `std::is_standard_layout` so it can be used in wrappers to other
 * languages.
 * See: https://github.com/jlblancoc/nanoflann/issues/166
 */
template <typename IndexType = size_t, typename DistanceType = double>
struct ResultItem
{
    ResultItem() = default;
    ResultItem(const IndexType index, const DistanceType distance)
        : first(index), second(distance)
    {
    }

    IndexType    first;  //!< Index of the sample in the dataset
    DistanceType second;  //!< Distance from sample to query point
};

/** @addtogroup result_sets_grp Result set classes
 *  @{ */

/** Result set for KNN searches (N-closest neighbors) */
template <
    typename _DistanceType, typename _IndexType = size_t,
    typename _CountType = size_t>
class KNNResultSet
{
   public:
    using DistanceType = _DistanceType;
    using IndexType    = _IndexType;
    using CountType    = _CountType;

   private:
    IndexType*    indices;
    DistanceType* dists;
    CountType     capacity;
    CountType     count;

   public:
    explicit KNNResultSet(CountType capacity_)
        : indices(nullptr), dists(nullptr), capacity(capacity_), count(0)
    {
    }

    void init(IndexType* indices_, DistanceType* dists_)
    {
        indices = indices_;
        dists   = dists_;
        count   = 0;
    }

    CountType size() const { return count; }
    bool      empty() const { return count == 0; }
    bool      full() const { return count == capacity; }

    /**
     * Called during search to add an element matching the criteria.
     * @return true if the search should be continued, false if the results are
     * sufficient
     */
    bool addPoint(DistanceType dist, IndexType index)
    {
        CountType i;
        for (i = count; i > 0; --i)
        {
            /** If defined and two points have the same distance, the one with
             *  the lowest-index will be returned first. */
#ifdef NANOFLANN_FIRST_MATCH
            if ((dists[i - 1] > dist) ||
                ((dist == dists[i - 1]) && (indices[i - 1] > index)))
            {
#else
            if (dists[i - 1] > dist)
            {
#endif
                if (i < capacity)
                {
                    dists[i]   = dists[i - 1];
                    indices[i] = indices[i - 1];
                }
            }
            else
                break;
        }
        if (i < capacity)
        {
            dists[i]   = dist;
            indices[i] = index;
        }
        if (count < capacity) count++;

        // tell caller that the search shall continue
        return true;
    }

    //! Returns the worst distance among found solutions if the search result is
    //! full, or the maximum possible distance, if not full yet.
    DistanceType worstDist() const
    {
        return (count < capacity || !count)
                   ? std::numeric_limits<DistanceType>::max()
                   : dists[count - 1];
    }

    void sort()
    {
        // already sorted
    }
};

/** Result set for RKNN searches (N-closest neighbors with a maximum radius) */
template <
    typename _DistanceType, typename _IndexType = size_t,
    typename _CountType = size_t>
class RKNNResultSet
{
   public:
    using DistanceType = _DistanceType;
    using IndexType    = _IndexType;
    using CountType    = _CountType;

   private:
    IndexType*    indices;
    DistanceType* dists;
    CountType     capacity;
    CountType     count;
    DistanceType  maximumSearchDistanceSquared;

   public:
    explicit RKNNResultSet(
        CountType capacity_, DistanceType maximumSearchDistanceSquared_)
        : indices(nullptr),
          dists(nullptr),
          capacity(capacity_),
          count(0),
          maximumSearchDistanceSquared(maximumSearchDistanceSquared_)
    {
    }

    void init(IndexType* indices_, DistanceType* dists_)
    {
        indices = indices_;
        dists   = dists_;
        count   = 0;
        if (capacity) dists[capacity - 1] = maximumSearchDistanceSquared;
    }

    CountType size() const { return count; }
    bool      empty() const { return count == 0; }
    bool      full() const { return count == capacity; }

    /**
     * Called during search to add an element matching the criteria.
     * @return true if the search should be continued, false if the results are
     * sufficient
     */
    bool addPoint(DistanceType dist, IndexType index)
    {
        CountType i;
        for (i = count; i > 0; --i)
        {
            /** If defined and two points have the same distance, the one with
             *  the lowest-index will be returned first. */
#ifdef NANOFLANN_FIRST_MATCH
            if ((dists[i - 1] > dist) ||
                ((dist == dists[i - 1]) && (indices[i - 1] > index)))
            {
#else
            if (dists[i - 1] > dist)
            {
#endif
                if (i < capacity)
                {
                    dists[i]   = dists[i - 1];
                    indices[i] = indices[i - 1];
                }
            }
            else
                break;
        }
        if (i < capacity)
        {
            dists[i]   = dist;
            indices[i] = index;
        }
        if (count < capacity) count++;

        // tell caller that the search shall continue
        return true;
    }

    //! Returns the worst distance among found solutions if the search result is
    //! full, or the maximum possible distance, if not full yet.
    DistanceType worstDist() const
    {
        return (count < capacity || !count) ? maximumSearchDistanceSquared
                                            : dists[count - 1];
    }

    void sort()
    {
        // already sorted
    }
};

/**
 * A result-set class used when performing a radius based search.
 */
template <typename _DistanceType, typename _IndexType = size_t>
class RadiusResultSet
{
   public:
    using DistanceType = _DistanceType;
    using IndexType    = _IndexType;

   public:
    const DistanceType radius;

    std::vector<ResultItem<IndexType, DistanceType>>& m_indices_dists;

    explicit RadiusResultSet(
        DistanceType                                      radius_,
        std::vector<ResultItem<IndexType, DistanceType>>& indices_dists)
        : radius(radius_), m_indices_dists(indices_dists)
    {
        init();
    }

    void init() { clear(); }
    void clear() { m_indices_dists.clear(); }

    size_t size() const { return m_indices_dists.size(); }
    size_t empty() const { return m_indices_dists.empty(); }

    bool full() const { return true; }

    /**
     * Called during search to add an element matching the criteria.
     * @return true if the search should be continued, false if the results are
     * sufficient
     */
    bool addPoint(DistanceType dist, IndexType index)
    {
        if (dist < radius) m_indices_dists.emplace_back(index, dist);
        return true;
    }

    DistanceType worstDist() const { return radius; }

    /**
     * Find the worst result (farthest neighbor) without copying or sorting
     * Pre-conditions: size() > 0
     */
    ResultItem<IndexType, DistanceType> worst_item() const
    {
        if (m_indices_dists.empty())
            throw std::runtime_error(
                "Cannot invoke RadiusResultSet::worst_item() on "
                "an empty list of results.");
        auto it = std::max_element(
            m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter());
        return *it;
    }

    void sort()
    {
        std::sort(
            m_indices_dists.begin(), m_indices_dists.end(), IndexDist_Sorter());
    }
};

/** @} */

/** @addtogroup loadsave_grp Load/save auxiliary functions
 * @{ */
template <typename T>
void save_value(std::ostream& stream, const T& value)
{
    stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
}

template <typename T>
void save_value(std::ostream& stream, const std::vector<T>& value)
{
    size_t size = value.size();
    stream.write(reinterpret_cast<const char*>(&size), sizeof(size_t));
    stream.write(reinterpret_cast<const char*>(value.data()), sizeof(T) * size);
}

template <typename T>
void load_value(std::istream& stream, T& value)
{
    stream.read(reinterpret_cast<char*>(&value), sizeof(T));
}

template <typename T>
void load_value(std::istream& stream, std::vector<T>& value)
{
    size_t size;
    stream.read(reinterpret_cast<char*>(&size), sizeof(size_t));
    value.resize(size);
    stream.read(reinterpret_cast<char*>(value.data()), sizeof(T) * size);
}
/** @} */

/** @addtogroup metric_grp Metric (distance) classes
 * @{ */

struct Metric
{
};

/** Manhattan distance functor (generic version, optimized for
 * high-dimensionality data sets). Corresponding distance traits:
 * nanoflann::metric_L1
 *
 * \tparam T Type of the elements (e.g. double, float, uint8_t)
 * \tparam DataSource Source of the data, i.e. where the vectors are stored
 * \tparam _DistanceType Type of distance variables (must be signed)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class T, class DataSource, typename _DistanceType = T,
    typename IndexType = size_t>
struct L1_Adaptor
{
    using ElementType  = T;
    using DistanceType = _DistanceType;

    const DataSource& data_source;

    L1_Adaptor(const DataSource& _data_source) : data_source(_data_source) {}

    DistanceType evalMetric(
        const T* a, const IndexType b_idx, size_t size,
        DistanceType worst_dist = -1) const
    {
        DistanceType result    = DistanceType();
        const T*     last      = a + size;
        const T*     lastgroup = last - 3;
        size_t       d         = 0;

        /* Process 4 items with each loop for efficiency. */
        while (a < lastgroup)
        {
            const DistanceType diff0 =
                std::abs(a[0] - data_source.kdtree_get_pt(b_idx, d++));
            const DistanceType diff1 =
                std::abs(a[1] - data_source.kdtree_get_pt(b_idx, d++));
            const DistanceType diff2 =
                std::abs(a[2] - data_source.kdtree_get_pt(b_idx, d++));
            const DistanceType diff3 =
                std::abs(a[3] - data_source.kdtree_get_pt(b_idx, d++));
            result += diff0 + diff1 + diff2 + diff3;
            a += 4;
            if ((worst_dist > 0) && (result > worst_dist)) { return result; }
        }
        /* Process last 0-3 components.  Not needed for standard vector lengths.
         */
        while (a < last)
        {
            result += std::abs(*a++ - data_source.kdtree_get_pt(b_idx, d++));
        }
        return result;
    }

    template <typename U, typename V>
    DistanceType accum_dist(const U a, const V b, const size_t) const
    {
        return std::abs(a - b);
    }
};

/** **Squared** Euclidean distance functor (generic version, optimized for
 * high-dimensionality data sets). Corresponding distance traits:
 * nanoflann::metric_L2
 *
 * \tparam T Type of the elements (e.g. double, float, uint8_t)
 * \tparam DataSource Source of the data, i.e. where the vectors are stored
 * \tparam _DistanceType Type of distance variables (must be signed)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class T, class DataSource, typename _DistanceType = T,
    typename IndexType = size_t>
struct L2_Adaptor
{
    using ElementType  = T;
    using DistanceType = _DistanceType;

    const DataSource& data_source;

    L2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {}

    DistanceType evalMetric(
        const T* a, const IndexType b_idx, size_t size,
        DistanceType worst_dist = -1) const
    {
        DistanceType result    = DistanceType();
        const T*     last      = a + size;
        const T*     lastgroup = last - 3;
        size_t       d         = 0;

        /* Process 4 items with each loop for efficiency. */
        while (a < lastgroup)
        {
            const DistanceType diff0 =
                a[0] - data_source.kdtree_get_pt(b_idx, d++);
            const DistanceType diff1 =
                a[1] - data_source.kdtree_get_pt(b_idx, d++);
            const DistanceType diff2 =
                a[2] - data_source.kdtree_get_pt(b_idx, d++);
            const DistanceType diff3 =
                a[3] - data_source.kdtree_get_pt(b_idx, d++);
            result +=
                diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3;
            a += 4;
            if ((worst_dist > 0) && (result > worst_dist)) { return result; }
        }
        /* Process last 0-3 components.  Not needed for standard vector lengths.
         */
        while (a < last)
        {
            const DistanceType diff0 =
                *a++ - data_source.kdtree_get_pt(b_idx, d++);
            result += diff0 * diff0;
        }
        return result;
    }

    template <typename U, typename V>
    DistanceType accum_dist(const U a, const V b, const size_t) const
    {
        return (a - b) * (a - b);
    }
};

/** **Squared** Euclidean (L2) distance functor (suitable for low-dimensionality
 * datasets, like 2D or 3D point clouds) Corresponding distance traits:
 * nanoflann::metric_L2_Simple
 *
 * \tparam T Type of the elements (e.g. double, float, uint8_t)
 * \tparam DataSource Source of the data, i.e. where the vectors are stored
 * \tparam _DistanceType Type of distance variables (must be signed)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class T, class DataSource, typename _DistanceType = T,
    typename IndexType = size_t>
struct L2_Simple_Adaptor
{
    using ElementType  = T;
    using DistanceType = _DistanceType;

    const DataSource& data_source;

    L2_Simple_Adaptor(const DataSource& _data_source)
        : data_source(_data_source)
    {
    }

    DistanceType evalMetric(
        const T* a, const IndexType b_idx, size_t size) const
    {
        DistanceType result = DistanceType();
        for (size_t i = 0; i < size; ++i)
        {
            const DistanceType diff =
                a[i] - data_source.kdtree_get_pt(b_idx, i);
            result += diff * diff;
        }
        return result;
    }

    template <typename U, typename V>
    DistanceType accum_dist(const U a, const V b, const size_t) const
    {
        return (a - b) * (a - b);
    }
};

/** SO2 distance functor
 *  Corresponding distance traits: nanoflann::metric_SO2
 *
 * \tparam T Type of the elements (e.g. double, float, uint8_t)
 * \tparam DataSource Source of the data, i.e. where the vectors are stored
 * \tparam _DistanceType Type of distance variables (must be signed) (e.g.
 * float, double) orientation is constrained to be in [-pi, pi]
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class T, class DataSource, typename _DistanceType = T,
    typename IndexType = size_t>
struct SO2_Adaptor
{
    using ElementType  = T;
    using DistanceType = _DistanceType;

    const DataSource& data_source;

    SO2_Adaptor(const DataSource& _data_source) : data_source(_data_source) {}

    DistanceType evalMetric(
        const T* a, const IndexType b_idx, size_t size) const
    {
        return accum_dist(
            a[size - 1], data_source.kdtree_get_pt(b_idx, size - 1), size - 1);
    }

    /** Note: this assumes that input angles are already in the range [-pi,pi]
     */
    template <typename U, typename V>
    DistanceType accum_dist(const U a, const V b, const size_t) const
    {
        DistanceType result = DistanceType();
        DistanceType PI     = pi_const<DistanceType>();
        result              = b - a;
        if (result > PI)
            result -= 2 * PI;
        else if (result < -PI)
            result += 2 * PI;
        return result;
    }
};

/** SO3 distance functor (Uses L2_Simple)
 *  Corresponding distance traits: nanoflann::metric_SO3
 *
 * \tparam T Type of the elements (e.g. double, float, uint8_t)
 * \tparam DataSource Source of the data, i.e. where the vectors are stored
 * \tparam _DistanceType Type of distance variables (must be signed) (e.g.
 * float, double)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class T, class DataSource, typename _DistanceType = T,
    typename IndexType = size_t>
struct SO3_Adaptor
{
    using ElementType  = T;
    using DistanceType = _DistanceType;

    L2_Simple_Adaptor<T, DataSource, DistanceType, IndexType>
        distance_L2_Simple;

    SO3_Adaptor(const DataSource& _data_source)
        : distance_L2_Simple(_data_source)
    {
    }

    DistanceType evalMetric(
        const T* a, const IndexType b_idx, size_t size) const
    {
        return distance_L2_Simple.evalMetric(a, b_idx, size);
    }

    template <typename U, typename V>
    DistanceType accum_dist(const U a, const V b, const size_t idx) const
    {
        return distance_L2_Simple.accum_dist(a, b, idx);
    }
};

/** Metaprogramming helper traits class for the L1 (Manhattan) metric */
struct metric_L1 : public Metric
{
    template <class T, class DataSource, typename IndexType = size_t>
    struct traits
    {
        using distance_t = L1_Adaptor<T, DataSource, T, IndexType>;
    };
};
/** Metaprogramming helper traits class for the L2 (Euclidean) **squared**
 * distance metric */
struct metric_L2 : public Metric
{
    template <class T, class DataSource, typename IndexType = size_t>
    struct traits
    {
        using distance_t = L2_Adaptor<T, DataSource, T, IndexType>;
    };
};
/** Metaprogramming helper traits class for the L2_simple (Euclidean)
 * **squared** distance metric */
struct metric_L2_Simple : public Metric
{
    template <class T, class DataSource, typename IndexType = size_t>
    struct traits
    {
        using distance_t = L2_Simple_Adaptor<T, DataSource, T, IndexType>;
    };
};
/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
struct metric_SO2 : public Metric
{
    template <class T, class DataSource, typename IndexType = size_t>
    struct traits
    {
        using distance_t = SO2_Adaptor<T, DataSource, T, IndexType>;
    };
};
/** Metaprogramming helper traits class for the SO3_InnerProdQuat metric */
struct metric_SO3 : public Metric
{
    template <class T, class DataSource, typename IndexType = size_t>
    struct traits
    {
        using distance_t = SO3_Adaptor<T, DataSource, T, IndexType>;
    };
};

/** @} */

/** @addtogroup param_grp Parameter structs
 * @{ */

enum class KDTreeSingleIndexAdaptorFlags
{
    None                  = 0,
    SkipInitialBuildIndex = 1
};

inline std::underlying_type<KDTreeSingleIndexAdaptorFlags>::type operator&(
    KDTreeSingleIndexAdaptorFlags lhs, KDTreeSingleIndexAdaptorFlags rhs)
{
    using underlying =
        typename std::underlying_type<KDTreeSingleIndexAdaptorFlags>::type;
    return static_cast<underlying>(lhs) & static_cast<underlying>(rhs);
}

/**  Parameters (see README.md) */
struct KDTreeSingleIndexAdaptorParams
{
    KDTreeSingleIndexAdaptorParams(
        size_t                        _leaf_max_size = 10,
        KDTreeSingleIndexAdaptorFlags _flags =
            KDTreeSingleIndexAdaptorFlags::None,
        unsigned int _n_thread_build = 1)
        : leaf_max_size(_leaf_max_size),
          flags(_flags),
          n_thread_build(_n_thread_build)
    {
    }

    size_t                        leaf_max_size;
    KDTreeSingleIndexAdaptorFlags flags;
    unsigned int                  n_thread_build;
};

/** Search options for KDTreeSingleIndexAdaptor::findNeighbors() */
struct SearchParameters
{
    SearchParameters(float eps_ = 0, bool sorted_ = true)
        : eps(eps_), sorted(sorted_)
    {
    }

    float eps;  //!< search for eps-approximate neighbours (default: 0)
    bool  sorted;  //!< only for radius search, require neighbours sorted by
                  //!< distance (default: true)
};
/** @} */

/** @addtogroup memalloc_grp Memory allocation
 * @{ */

/**
 * Pooled storage allocator
 *
 * The following routines allow for the efficient allocation of storage in
 * small chunks from a specified pool.  Rather than allowing each structure
 * to be freed individually, an entire pool of storage is freed at once.
 * This method has two advantages over just using malloc() and free().  First,
 * it is far more efficient for allocating small objects, as there is
 * no overhead for remembering all the information needed to free each
 * object or consolidating fragmented memory.  Second, the decision about
 * how long to keep an object is made at the time of allocation, and there
 * is no need to track down all the objects to free them.
 *
 */
class PooledAllocator
{
    static constexpr size_t WORDSIZE  = 16;  // WORDSIZE must >= 8
    static constexpr size_t BLOCKSIZE = 8192;

    /* We maintain memory alignment to word boundaries by requiring that all
        allocations be in multiples of the machine wordsize.  */
    /* Size of machine word in bytes.  Must be power of 2. */
    /* Minimum number of bytes requested at a time from the system.  Must be
     * multiple of WORDSIZE. */

    using Size = size_t;

    Size  remaining_ = 0;  //!< Number of bytes left in current block of storage
    void* base_ = nullptr;  //!< Pointer to base of current block of storage
    void* loc_  = nullptr;  //!< Current location in block to next allocate

    void internal_init()
    {
        remaining_   = 0;
        base_        = nullptr;
        usedMemory   = 0;
        wastedMemory = 0;
    }

   public:
    Size usedMemory   = 0;
    Size wastedMemory = 0;

    /**
        Default constructor. Initializes a new pool.
     */
    PooledAllocator() { internal_init(); }

    /**
     * Destructor. Frees all the memory allocated in this pool.
     */
    ~PooledAllocator() { free_all(); }

    /** Frees all allocated memory chunks */
    void free_all()
    {
        while (base_ != nullptr)
        {
            // Get pointer to prev block
            void* prev = *(static_cast<void**>(base_));
            ::free(base_);
            base_ = prev;
        }
        internal_init();
    }

    /**
     * Returns a pointer to a piece of new memory of the given size in bytes
     * allocated from the pool.
     */
    void* malloc(const size_t req_size)
    {
        /* Round size up to a multiple of wordsize.  The following expression
            only works for WORDSIZE that is a power of 2, by masking last bits
           of incremented size to zero.
         */
        const Size size = (req_size + (WORDSIZE - 1)) & ~(WORDSIZE - 1);

        /* Check whether a new block must be allocated.  Note that the first
           word of a block is reserved for a pointer to the previous block.
         */
        if (size > remaining_)
        {
            wastedMemory += remaining_;

            /* Allocate new storage. */
            const Size blocksize =
                size > BLOCKSIZE ? size + WORDSIZE : BLOCKSIZE + WORDSIZE;

            // use the standard C malloc to allocate memory
            void* m = ::malloc(blocksize);
            if (!m) { throw std::bad_alloc(); }

            /* Fill first word of new block with pointer to previous block. */
            static_cast<void**>(m)[0] = base_;
            base_                     = m;

            remaining_ = blocksize - WORDSIZE;
            loc_       = static_cast<char*>(m) + WORDSIZE;
        }
        void* rloc = loc_;
        loc_       = static_cast<char*>(loc_) + size;
        remaining_ -= size;

        usedMemory += size;

        return rloc;
    }

    /**
     * Allocates (using this pool) a generic type T.
     *
     * Params:
     *     count = number of instances to allocate.
     * Returns: pointer (of type T*) to memory buffer
     */
    template <typename T>
    T* allocate(const size_t count = 1)
    {
        T* mem = static_cast<T*>(this->malloc(sizeof(T) * count));
        return mem;
    }
};
/** @} */

/** @addtogroup nanoflann_metaprog_grp Auxiliary metaprogramming stuff
 * @{ */

/** Used to declare fixed-size arrays when DIM>0, dynamically-allocated vectors
 * when DIM=-1. Fixed size version for a generic DIM:
 */
template <int32_t DIM, typename T>
struct array_or_vector
{
    using type = std::array<T, DIM>;
};
/** Dynamic size version */
template <typename T>
struct array_or_vector<-1, T>
{
    using type = std::vector<T>;
};

/** @} */

/** kd-tree base-class
 *
 * Contains the member functions common to the classes KDTreeSingleIndexAdaptor
 * and KDTreeSingleIndexDynamicAdaptor_.
 *
 * \tparam Derived The name of the class which inherits this class.
 * \tparam DatasetAdaptor The user-provided adaptor, which must be ensured to
 *         have a lifetime equal or longer than the instance of this class.
 * \tparam Distance The distance metric to use, these are all classes derived
 * from nanoflann::Metric
 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    class Derived, typename Distance, class DatasetAdaptor, int32_t DIM = -1,
    typename index_t = uint32_t>
class KDTreeBaseClass
{
   public:
    /** Frees the previously-built index. Automatically called within
     * buildIndex(). */
    void freeIndex(Derived& obj)
    {
        obj.pool_.free_all();
        obj.root_node_           = nullptr;
        obj.size_at_index_build_ = 0;
    }

    using ElementType  = typename Distance::ElementType;
    using DistanceType = typename Distance::DistanceType;
    using IndexType    = index_t;

    /**
     *  Array of indices to vectors in the dataset_.
     */
    std::vector<IndexType> vAcc_;

    using Offset    = typename decltype(vAcc_)::size_type;
    using Size      = typename decltype(vAcc_)::size_type;
    using Dimension = int32_t;

    /*---------------------------
     * Internal Data Structures
     * --------------------------*/
    struct Node
    {
        /** Union used because a node can be either a LEAF node or a non-leaf
         * node, so both data fields are never used simultaneously */
        union
        {
            struct leaf
            {
                Offset left, right;  //!< Indices of points in leaf node
            } lr;
            struct nonleaf
            {
                Dimension divfeat;  //!< Dimension used for subdivision.
                /// The values used for subdivision.
                DistanceType divlow, divhigh;
            } sub;
        } node_type;

        /** Child nodes (both=nullptr mean its a leaf node) */
        Node *child1 = nullptr, *child2 = nullptr;
    };

    using NodePtr      = Node*;
    using NodeConstPtr = const Node*;

    struct Interval
    {
        ElementType low, high;
    };

    NodePtr root_node_ = nullptr;

    Size leaf_max_size_ = 0;

    /// Number of thread for concurrent tree build
    Size n_thread_build_ = 1;
    /// Number of current points in the dataset
    Size size_ = 0;
    /// Number of points in the dataset when the index was built
    Size      size_at_index_build_ = 0;
    Dimension dim_                 = 0;  //!< Dimensionality of each data point

    /** Define "BoundingBox" as a fixed-size or variable-size container
     * depending on "DIM" */
    using BoundingBox = typename array_or_vector<DIM, Interval>::type;

    /** Define "distance_vector_t" as a fixed-size or variable-size container
     * depending on "DIM" */
    using distance_vector_t = typename array_or_vector<DIM, DistanceType>::type;

    /** The KD-tree used to find neighbours */
    BoundingBox root_bbox_;

    /**
     * Pooled memory allocator.
     *
     * Using a pooled memory allocator is more efficient
     * than allocating memory directly when there is a large
     * number small of memory allocations.
     */
    PooledAllocator pool_;

    /** Returns number of points in dataset  */
    Size size(const Derived& obj) const { return obj.size_; }

    /** Returns the length of each point in the dataset */
    Size veclen(const Derived& obj) const { return DIM > 0 ? DIM : obj.dim_; }

    /// Helper accessor to the dataset points:
    ElementType dataset_get(
        const Derived& obj, IndexType element, Dimension component) const
    {
        return obj.dataset_.kdtree_get_pt(element, component);
    }

    /**
     * Computes the index memory usage
     * Returns: memory used by the index
     */
    Size usedMemory(const Derived& obj) const
    {
        return obj.pool_.usedMemory + obj.pool_.wastedMemory +
               obj.dataset_.kdtree_get_point_count() *
                   sizeof(IndexType);  // pool memory and vind array memory
    }

    /**
     * Compute the minimum and maximum element values in the specified dimension
     */
    void computeMinMax(
        const Derived& obj, Offset ind, Size count, Dimension element,
        ElementType& min_elem, ElementType& max_elem) const
    {
        min_elem = dataset_get(obj, vAcc_[ind], element);
        max_elem = min_elem;
        for (Offset i = 1; i < count; ++i)
        {
            ElementType val = dataset_get(obj, vAcc_[ind + i], element);
            if (val < min_elem) min_elem = val;
            if (val > max_elem) max_elem = val;
        }
    }

    /**
     * Create a tree node that subdivides the list of vecs from vind[first]
     * to vind[last].  The routine is called recursively on each sublist.
     *
     * @param left index of the first vector
     * @param right index of the last vector
     * @param bbox bounding box used as input for splitting and output for
     * parent node
     */
    NodePtr divideTree(
        Derived& obj, const Offset left, const Offset right, BoundingBox& bbox)
    {
        assert(left < obj.dataset_.kdtree_get_point_count());

        NodePtr node = obj.pool_.template allocate<Node>();  // allocate memory
        const auto dims = (DIM > 0 ? DIM : obj.dim_);

        /* If too few exemplars remain, then make this a leaf node. */
        if ((right - left) <= static_cast<Offset>(obj.leaf_max_size_))
        {
            node->child1 = node->child2 = nullptr; /* Mark as leaf node. */
            node->node_type.lr.left     = left;
            node->node_type.lr.right    = right;

            // compute bounding-box of leaf points
            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low  = dataset_get(obj, obj.vAcc_[left], i);
                bbox[i].high = dataset_get(obj, obj.vAcc_[left], i);
            }
            for (Offset k = left + 1; k < right; ++k)
            {
                for (Dimension i = 0; i < dims; ++i)
                {
                    const auto val = dataset_get(obj, obj.vAcc_[k], i);
                    if (bbox[i].low > val) bbox[i].low = val;
                    if (bbox[i].high < val) bbox[i].high = val;
                }
            }
        }
        else
        {
            /* Determine the index, dimension and value for split plane */
            Offset       idx;
            Dimension    cutfeat;
            DistanceType cutval;
            middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox);

            node->node_type.sub.divfeat = cutfeat;

            /* Recurse on left */
            BoundingBox left_bbox(bbox);
            left_bbox[cutfeat].high = cutval;
            node->child1 = this->divideTree(obj, left, left + idx, left_bbox);

            /* Recurse on right */
            BoundingBox right_bbox(bbox);
            right_bbox[cutfeat].low = cutval;
            node->child2 = this->divideTree(obj, left + idx, right, right_bbox);

            node->node_type.sub.divlow  = left_bbox[cutfeat].high;
            node->node_type.sub.divhigh = right_bbox[cutfeat].low;

            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low  = std::min(left_bbox[i].low, right_bbox[i].low);
                bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
            }
        }

        return node;
    }

    /**
     * Create a tree node that subdivides the list of vecs from vind[first] to
     * vind[last] concurrently.  The routine is called recursively on each
     * sublist.
     *
     * @param left index of the first vector
     * @param right index of the last vector
     * @param bbox bounding box used as input for splitting and output for
     * parent node
     * @param thread_count count of std::async threads
     * @param mutex mutex for mempool allocation
     */
    NodePtr divideTreeConcurrent(
        Derived& obj, const Offset left, const Offset right, BoundingBox& bbox,
        std::atomic<unsigned int>& thread_count, std::mutex& mutex)
    {
        std::unique_lock<std::mutex> lock(mutex);
        NodePtr node = obj.pool_.template allocate<Node>();  // allocate memory
        lock.unlock();

        const auto dims = (DIM > 0 ? DIM : obj.dim_);

        /* If too few exemplars remain, then make this a leaf node. */
        if ((right - left) <= static_cast<Offset>(obj.leaf_max_size_))
        {
            node->child1 = node->child2 = nullptr; /* Mark as leaf node. */
            node->node_type.lr.left     = left;
            node->node_type.lr.right    = right;

            // compute bounding-box of leaf points
            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low  = dataset_get(obj, obj.vAcc_[left], i);
                bbox[i].high = dataset_get(obj, obj.vAcc_[left], i);
            }
            for (Offset k = left + 1; k < right; ++k)
            {
                for (Dimension i = 0; i < dims; ++i)
                {
                    const auto val = dataset_get(obj, obj.vAcc_[k], i);
                    if (bbox[i].low > val) bbox[i].low = val;
                    if (bbox[i].high < val) bbox[i].high = val;
                }
            }
        }
        else
        {
            /* Determine the index, dimension and value for split plane */
            Offset       idx;
            Dimension    cutfeat;
            DistanceType cutval;
            middleSplit_(obj, left, right - left, idx, cutfeat, cutval, bbox);

            node->node_type.sub.divfeat = cutfeat;

            std::future<NodePtr> right_future;

            /* Recurse on right concurrently, if possible */

            BoundingBox right_bbox(bbox);
            right_bbox[cutfeat].low = cutval;
            if (++thread_count < n_thread_build_)
            {
                /* Concurrent thread for right recursion */

                right_future = std::async(
                    std::launch::async, &KDTreeBaseClass::divideTreeConcurrent,
                    this, std::ref(obj), left + idx, right,
                    std::ref(right_bbox), std::ref(thread_count),
                    std::ref(mutex));
            }
            else { --thread_count; }

            /* Recurse on left in this thread */

            BoundingBox left_bbox(bbox);
            left_bbox[cutfeat].high = cutval;
            node->child1            = this->divideTreeConcurrent(
                           obj, left, left + idx, left_bbox, thread_count, mutex);

            if (right_future.valid())
            {
                /* Block and wait for concurrent right from above */

                node->child2 = right_future.get();
                --thread_count;
            }
            else
            {
                /* Otherwise, recurse on right in this thread */

                node->child2 = this->divideTreeConcurrent(
                    obj, left + idx, right, right_bbox, thread_count, mutex);
            }

            node->node_type.sub.divlow  = left_bbox[cutfeat].high;
            node->node_type.sub.divhigh = right_bbox[cutfeat].low;

            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low  = std::min(left_bbox[i].low, right_bbox[i].low);
                bbox[i].high = std::max(left_bbox[i].high, right_bbox[i].high);
            }
        }

        return node;
    }

    void middleSplit_(
        const Derived& obj, const Offset ind, const Size count, Offset& index,
        Dimension& cutfeat, DistanceType& cutval, const BoundingBox& bbox)
    {
        const auto  dims     = (DIM > 0 ? DIM : obj.dim_);
        const auto  EPS      = static_cast<DistanceType>(0.00001);
        ElementType max_span = bbox[0].high - bbox[0].low;
        for (Dimension i = 1; i < dims; ++i)
        {
            ElementType span = bbox[i].high - bbox[i].low;
            if (span > max_span) { max_span = span; }
        }
        ElementType max_spread = -1;
        cutfeat                = 0;
        ElementType min_elem = 0, max_elem = 0;
        for (Dimension i = 0; i < dims; ++i)
        {
            ElementType span = bbox[i].high - bbox[i].low;
            if (span >= (1 - EPS) * max_span)
            {
                ElementType min_elem_, max_elem_;
                computeMinMax(obj, ind, count, i, min_elem_, max_elem_);
                ElementType spread = max_elem_ - min_elem_;
                if (spread > max_spread)
                {
                    cutfeat    = i;
                    max_spread = spread;
                    min_elem   = min_elem_;
                    max_elem   = max_elem_;
                }
            }
        }
        // split in the middle
        DistanceType split_val = (bbox[cutfeat].low + bbox[cutfeat].high) / 2;

        if (split_val < min_elem)
            cutval = min_elem;
        else if (split_val > max_elem)
            cutval = max_elem;
        else
            cutval = split_val;

        Offset lim1, lim2;
        planeSplit(obj, ind, count, cutfeat, cutval, lim1, lim2);

        if (lim1 > count / 2)
            index = lim1;
        else if (lim2 < count / 2)
            index = lim2;
        else
            index = count / 2;
    }

    /**
     *  Subdivide the list of points by a plane perpendicular on the axis
     * corresponding to the 'cutfeat' dimension at 'cutval' position.
     *
     *  On return:
     *  dataset[ind[0..lim1-1]][cutfeat] < cutval
     *  dataset[ind[lim1..lim2-1]][cutfeat] == cutval
     *  dataset[ind[lim2..count]][cutfeat] > cutval
     */
    void planeSplit(
        const Derived& obj, const Offset ind, const Size count,
        const Dimension cutfeat, const DistanceType& cutval, Offset& lim1,
        Offset& lim2)
    {
        /* First pass.
         * Determine lim1 with all values less than cutval to the left.
         */
        Offset left  = 0;
        Offset right = count - 1;
        for (;;)
        {
            while (left <= right &&
                   dataset_get(obj, vAcc_[ind + left], cutfeat) < cutval)
                ++left;
            while (right && left <= right &&
                   dataset_get(obj, vAcc_[ind + right], cutfeat) >= cutval)
                --right;
            if (left > right || !right)
                break;  // "!right" was added to support unsigned Index types
            std::swap(vAcc_[ind + left], vAcc_[ind + right]);
            ++left;
            --right;
        }
        /* Second pass
         * Determine lim2 with all values greater than cutval to the right
         * The middle is used for balancing the tree
         */
        lim1  = left;
        right = count - 1;
        for (;;)
        {
            while (left <= right &&
                   dataset_get(obj, vAcc_[ind + left], cutfeat) <= cutval)
                ++left;
            while (right && left <= right &&
                   dataset_get(obj, vAcc_[ind + right], cutfeat) > cutval)
                --right;
            if (left > right || !right)
                break;  // "!right" was added to support unsigned Index types
            std::swap(vAcc_[ind + left], vAcc_[ind + right]);
            ++left;
            --right;
        }
        lim2 = left;
    }

    DistanceType computeInitialDistances(
        const Derived& obj, const ElementType* vec,
        distance_vector_t& dists) const
    {
        assert(vec);
        DistanceType dist = DistanceType();

        for (Dimension i = 0; i < (DIM > 0 ? DIM : obj.dim_); ++i)
        {
            if (vec[i] < obj.root_bbox_[i].low)
            {
                dists[i] =
                    obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].low, i);
                dist += dists[i];
            }
            if (vec[i] > obj.root_bbox_[i].high)
            {
                dists[i] =
                    obj.distance_.accum_dist(vec[i], obj.root_bbox_[i].high, i);
                dist += dists[i];
            }
        }
        return dist;
    }

    static void save_tree(
        const Derived& obj, std::ostream& stream, const NodeConstPtr tree)
    {
        save_value(stream, *tree);
        if (tree->child1 != nullptr) { save_tree(obj, stream, tree->child1); }
        if (tree->child2 != nullptr) { save_tree(obj, stream, tree->child2); }
    }

    static void load_tree(Derived& obj, std::istream& stream, NodePtr& tree)
    {
        tree = obj.pool_.template allocate<Node>();
        load_value(stream, *tree);
        if (tree->child1 != nullptr) { load_tree(obj, stream, tree->child1); }
        if (tree->child2 != nullptr) { load_tree(obj, stream, tree->child2); }
    }

    /**  Stores the index in a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * when loading the index object it must be constructed associated to the
     * same source of data points used while building it. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void saveIndex(const Derived& obj, std::ostream& stream) const
    {
        save_value(stream, obj.size_);
        save_value(stream, obj.dim_);
        save_value(stream, obj.root_bbox_);
        save_value(stream, obj.leaf_max_size_);
        save_value(stream, obj.vAcc_);
        if (obj.root_node_) save_tree(obj, stream, obj.root_node_);
    }

    /**  Loads a previous index from a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * the index object must be constructed associated to the same source of
     * data points used while building the index. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void loadIndex(Derived& obj, std::istream& stream)
    {
        load_value(stream, obj.size_);
        load_value(stream, obj.dim_);
        load_value(stream, obj.root_bbox_);
        load_value(stream, obj.leaf_max_size_);
        load_value(stream, obj.vAcc_);
        load_tree(obj, stream, obj.root_node_);
    }
};

/** @addtogroup kdtrees_grp KD-tree classes and adaptors
 * @{ */

/** kd-tree static index
 *
 * Contains the k-d trees and other information for indexing a set of points
 * for nearest-neighbor matching.
 *
 *  The class "DatasetAdaptor" must provide the following interface (can be
 * non-virtual, inlined methods):
 *
 *  \code
 *   // Must return the number of data poins
 *   size_t kdtree_get_point_count() const { ... }
 *
 *
 *   // Must return the dim'th component of the idx'th point in the class:
 *   T kdtree_get_pt(const size_t idx, const size_t dim) const { ... }
 *
 *   // Optional bounding-box computation: return false to default to a standard
 * bbox computation loop.
 *   //   Return true if the BBOX was already computed by the class and returned
 * in "bb" so it can be avoided to redo it again.
 *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3
 * for point clouds) template <class BBOX> bool kdtree_get_bbox(BBOX &bb) const
 *   {
 *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
 *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
 *      ...
 *      return true;
 *   }
 *
 *  \endcode
 *
 * \tparam DatasetAdaptor The user-provided adaptor, which must be ensured to
 *         have a lifetime equal or longer than the instance of this class.
 * \tparam Distance The distance metric to use: nanoflann::metric_L1,
 * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
 * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType Will
 * be typically size_t or int
 */
template <
    typename Distance, class DatasetAdaptor, int32_t DIM = -1,
    typename index_t = uint32_t>
class KDTreeSingleIndexAdaptor
    : public KDTreeBaseClass<
          KDTreeSingleIndexAdaptor<Distance, DatasetAdaptor, DIM, index_t>,
          Distance, DatasetAdaptor, DIM, index_t>
{
   public:
    /** Deleted copy constructor*/
    explicit KDTreeSingleIndexAdaptor(
        const KDTreeSingleIndexAdaptor<
            Distance, DatasetAdaptor, DIM, index_t>&) = delete;

    /** The data source used by this index */
    const DatasetAdaptor& dataset_;

    const KDTreeSingleIndexAdaptorParams indexParams;

    Distance distance_;

    using Base = typename nanoflann::KDTreeBaseClass<
        nanoflann::KDTreeSingleIndexAdaptor<
            Distance, DatasetAdaptor, DIM, index_t>,
        Distance, DatasetAdaptor, DIM, index_t>;

    using Offset    = typename Base::Offset;
    using Size      = typename Base::Size;
    using Dimension = typename Base::Dimension;

    using ElementType  = typename Base::ElementType;
    using DistanceType = typename Base::DistanceType;
    using IndexType    = typename Base::IndexType;

    using Node    = typename Base::Node;
    using NodePtr = Node*;

    using Interval = typename Base::Interval;

    /** Define "BoundingBox" as a fixed-size or variable-size container
     * depending on "DIM" */
    using BoundingBox = typename Base::BoundingBox;

    /** Define "distance_vector_t" as a fixed-size or variable-size container
     * depending on "DIM" */
    using distance_vector_t = typename Base::distance_vector_t;

    /**
     * KDTree constructor
     *
     * Refer to docs in README.md or online in
     * https://github.com/jlblancoc/nanoflann
     *
     * The KD-Tree point dimension (the length of each point in the datase, e.g.
     * 3 for 3D points) is determined by means of:
     *  - The \a DIM template parameter if >0 (highest priority)
     *  - Otherwise, the \a dimensionality parameter of this constructor.
     *
     * @param inputData Dataset with the input features. Its lifetime must be
     *  equal or longer than that of the instance of this class.
     * @param params Basically, the maximum leaf node size
     *
     * Note that there is a variable number of optional additional parameters
     * which will be forwarded to the metric class constructor. Refer to example
     * `examples/pointcloud_custom_metric.cpp` for a use case.
     *
     */
    template <class... Args>
    explicit KDTreeSingleIndexAdaptor(
        const Dimension dimensionality, const DatasetAdaptor& inputData,
        const KDTreeSingleIndexAdaptorParams& params, Args&&... args)
        : dataset_(inputData),
          indexParams(params),
          distance_(inputData, std::forward<Args>(args)...)
    {
        init(dimensionality, params);
    }

    explicit KDTreeSingleIndexAdaptor(
        const Dimension dimensionality, const DatasetAdaptor& inputData,
        const KDTreeSingleIndexAdaptorParams& params = {})
        : dataset_(inputData), indexParams(params), distance_(inputData)
    {
        init(dimensionality, params);
    }

   private:
    void init(
        const Dimension                       dimensionality,
        const KDTreeSingleIndexAdaptorParams& params)
    {
        Base::size_                = dataset_.kdtree_get_point_count();
        Base::size_at_index_build_ = Base::size_;
        Base::dim_                 = dimensionality;
        if (DIM > 0) Base::dim_ = DIM;
        Base::leaf_max_size_ = params.leaf_max_size;
        if (params.n_thread_build > 0)
        {
            Base::n_thread_build_ = params.n_thread_build;
        }
        else
        {
            Base::n_thread_build_ =
                std::max(std::thread::hardware_concurrency(), 1u);
        }

        if (!(params.flags &
              KDTreeSingleIndexAdaptorFlags::SkipInitialBuildIndex))
        {
            // Build KD-tree:
            buildIndex();
        }
    }

   public:
    /**
     * Builds the index
     */
    void buildIndex()
    {
        Base::size_                = dataset_.kdtree_get_point_count();
        Base::size_at_index_build_ = Base::size_;
        init_vind();
        this->freeIndex(*this);
        Base::size_at_index_build_ = Base::size_;
        if (Base::size_ == 0) return;
        computeBoundingBox(Base::root_bbox_);
        // construct the tree
        if (Base::n_thread_build_ == 1)
        {
            Base::root_node_ =
                this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
        }
        else
        {
#ifndef NANOFLANN_NO_THREADS
            std::atomic<unsigned int> thread_count(0u);
            std::mutex                mutex;
            Base::root_node_ = this->divideTreeConcurrent(
                *this, 0, Base::size_, Base::root_bbox_, thread_count, mutex);
#else /* NANOFLANN_NO_THREADS */
            throw std::runtime_error("Multithreading is disabled");
#endif /* NANOFLANN_NO_THREADS */
        }
    }

    /** \name Query methods
     * @{ */

    /**
     * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
     * inside the result object.
     *
     * Params:
     *     result = the result object in which the indices of the
     * nearest-neighbors are stored vec = the vector for which to search the
     * nearest neighbors
     *
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     * \return  True if the requested neighbors could be found.
     * \sa knnSearch, radiusSearch
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     */
    template <typename RESULTSET>
    bool findNeighbors(
        RESULTSET& result, const ElementType* vec,
        const SearchParameters& searchParams = {}) const
    {
        assert(vec);
        if (this->size(*this) == 0) return false;
        if (!Base::root_node_)
            throw std::runtime_error(
                "[nanoflann] findNeighbors() called before building the "
                "index.");
        float epsError = 1 + searchParams.eps;

        // fixed or variable-sized container (depending on DIM)
        distance_vector_t dists;
        // Fill it with zeros.
        auto zero = static_cast<typename RESULTSET::DistanceType>(0);
        assign(dists, (DIM > 0 ? DIM : Base::dim_), zero);
        DistanceType dist = this->computeInitialDistances(*this, vec, dists);
        searchLevel(result, vec, Base::root_node_, dist, dists, epsError);

        if (searchParams.sorted) result.sort();

        return result.full();
    }

    /**
     * Find all points contained within the specified bounding box. Their
     * indices are stored inside the result object.
     *
     * Params:
     *     result = the result object in which the indices of the points
     *              within the bounding box are stored
     *     bbox = the bounding box defining the search region
     *
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     * \return  Number of points found within the bounding box.
     * \sa findNeighbors, knnSearch, radiusSearch
     *
     * \note The search is inclusive - points on the boundary are included.
     */
    template <typename RESULTSET>
    Size findWithinBox(RESULTSET& result, const BoundingBox& bbox) const
    {
        if (this->size(*this) == 0) return 0;
        if (!Base::root_node_)
            throw std::runtime_error(
                "[nanoflann] findWithinBox() called before building the "
                "index.");

        std::stack<NodePtr> stack;
        stack.push(Base::root_node_);

        while (!stack.empty())
        {
            const NodePtr node = stack.top();
            stack.pop();

            // If this is a leaf node, then do check and return.
            if (!node->child1)  // (if one node is nullptr, both are)
            {
                for (Offset i = node->node_type.lr.left;
                     i < node->node_type.lr.right; ++i)
                {
                    if (contains(bbox, Base::vAcc_[i]))
                    {
                        if (!result.addPoint(0, Base::vAcc_[i]))
                        {
                            // the resultset doesn't want to receive any more
                            // points, we're done searching!
                            return result.size();
                        }
                    }
                }
            }
            else
            {
                const int  idx        = node->node_type.sub.divfeat;
                const auto low_bound  = node->node_type.sub.divlow;
                const auto high_bound = node->node_type.sub.divhigh;

                if (bbox[idx].low <= low_bound) stack.push(node->child1);
                if (bbox[idx].high >= high_bound) stack.push(node->child2);
            }
        }

        return result.size();
    }

    /**
     * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1].
     * Their indices and distances are stored in the provided pointers to
     * array/vector.
     *
     * \sa radiusSearch, findNeighbors
     * \return Number `N` of valid points in the result set.
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     *
     * \note Only the first `N` entries in `out_indices` and `out_distances`
     *       will be valid. Return is less than `num_closest` only if the
     *       number of elements in the tree is less than `num_closest`.
     */
    Size knnSearch(
        const ElementType* query_point, const Size num_closest,
        IndexType* out_indices, DistanceType* out_distances) const
    {
        nanoflann::KNNResultSet<DistanceType, IndexType> resultSet(num_closest);
        resultSet.init(out_indices, out_distances);
        findNeighbors(resultSet, query_point);
        return resultSet.size();
    }

    /**
     * Find all the neighbors to \a query_point[0:dim-1] within a maximum
     * radius. The output is given as a vector of pairs, of which the first
     * element is a point index and the second the corresponding distance.
     * Previous contents of \a IndicesDists are cleared.
     *
     *  If searchParams.sorted==true, the output list is sorted by ascending
     * distances.
     *
     *  For a better performance, it is advisable to do a .reserve() on the
     * vector if you have any wild guess about the number of expected matches.
     *
     *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
     * \return The number of points within the given radius (i.e. indices.size()
     * or dists.size() )
     *
     * \note If L2 norms are used, search radius and all returned distances
     *       are actually squared distances.
     */
    Size radiusSearch(
        const ElementType* query_point, const DistanceType& radius,
        std::vector<ResultItem<IndexType, DistanceType>>& IndicesDists,
        const SearchParameters& searchParams = {}) const
    {
        RadiusResultSet<DistanceType, IndexType> resultSet(
            radius, IndicesDists);
        const Size nFound =
            radiusSearchCustomCallback(query_point, resultSet, searchParams);
        return nFound;
    }

    /**
     * Just like radiusSearch() but with a custom callback class for each point
     * found in the radius of the query. See the source of RadiusResultSet<> as
     * a start point for your own classes. \sa radiusSearch
     */
    template <class SEARCH_CALLBACK>
    Size radiusSearchCustomCallback(
        const ElementType* query_point, SEARCH_CALLBACK& resultSet,
        const SearchParameters& searchParams = {}) const
    {
        findNeighbors(resultSet, query_point, searchParams);
        return resultSet.size();
    }

    /**
     * Find the first N neighbors to \a query_point[0:dim-1] within a maximum
     * radius. The output is given as a vector of pairs, of which the first
     * element is a point index and the second the corresponding distance.
     * Previous contents of \a IndicesDists are cleared.
     *
     * \sa radiusSearch, findNeighbors
     * \return Number `N` of valid points in the result set.
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     *
     * \note Only the first `N` entries in `out_indices` and `out_distances`
     *       will be valid. Return is less than `num_closest` only if the
     *       number of elements in the tree is less than `num_closest`.
     */
    Size rknnSearch(
        const ElementType* query_point, const Size num_closest,
        IndexType* out_indices, DistanceType* out_distances,
        const DistanceType& radius) const
    {
        nanoflann::RKNNResultSet<DistanceType, IndexType> resultSet(
            num_closest, radius);
        resultSet.init(out_indices, out_distances);
        findNeighbors(resultSet, query_point);
        return resultSet.size();
    }

    /** @} */

   public:
    /** Make sure the auxiliary list \a vind has the same size than the
     * current dataset, and re-generate if size has changed. */
    void init_vind()
    {
        // Create a permutable array of indices to the input vectors.
        Base::size_ = dataset_.kdtree_get_point_count();
        if (Base::vAcc_.size() != Base::size_) Base::vAcc_.resize(Base::size_);
        for (IndexType i = 0; i < static_cast<IndexType>(Base::size_); i++)
            Base::vAcc_[i] = i;
    }

    void computeBoundingBox(BoundingBox& bbox)
    {
        const auto dims = (DIM > 0 ? DIM : Base::dim_);
        resize(bbox, dims);
        if (dataset_.kdtree_get_bbox(bbox))
        {
            // Done! It was implemented in derived class
        }
        else
        {
            const Size N = dataset_.kdtree_get_point_count();
            if (!N)
                throw std::runtime_error(
                    "[nanoflann] computeBoundingBox() called but "
                    "no data points found.");
            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low = bbox[i].high =
                    this->dataset_get(*this, Base::vAcc_[0], i);
            }
            for (Offset k = 1; k < N; ++k)
            {
                for (Dimension i = 0; i < dims; ++i)
                {
                    const auto val =
                        this->dataset_get(*this, Base::vAcc_[k], i);
                    if (val < bbox[i].low) bbox[i].low = val;
                    if (val > bbox[i].high) bbox[i].high = val;
                }
            }
        }
    }

    bool contains(const BoundingBox& bbox, IndexType idx) const
    {
        const auto dims = (DIM > 0 ? DIM : Base::dim_);
        for (Dimension i = 0; i < dims; ++i)
        {
            const auto point = this->dataset_.kdtree_get_pt(idx, i);
            if (point < bbox[i].low || point > bbox[i].high) return false;
        }
        return true;
    }

    /**
     * Performs an exact search in the tree starting from a node.
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     * \return true if the search should be continued, false if the results are
     * sufficient
     */
    template <class RESULTSET>
    bool searchLevel(
        RESULTSET& result_set, const ElementType* vec, const NodePtr node,
        DistanceType mindist, distance_vector_t& dists,
        const float epsError) const
    {
        // If this is a leaf node, then do check and return.
        if (!node->child1)  // (if one node is nullptr, both are)
        {
            for (Offset i = node->node_type.lr.left;
                 i < node->node_type.lr.right; ++i)
            {
                const IndexType accessor = Base::vAcc_[i];  // reorder... : i;
                DistanceType    dist     = distance_.evalMetric(
                           vec, accessor, (DIM > 0 ? DIM : Base::dim_));
                if (dist < result_set.worstDist())
                {
                    if (!result_set.addPoint(dist, Base::vAcc_[i]))
                    {
                        // the resultset doesn't want to receive any more
                        // points, we're done searching!
                        return false;
                    }
                }
            }
            return true;
        }

        /* Which child branch should be taken first? */
        Dimension    idx   = node->node_type.sub.divfeat;
        ElementType  val   = vec[idx];
        DistanceType diff1 = val - node->node_type.sub.divlow;
        DistanceType diff2 = val - node->node_type.sub.divhigh;

        NodePtr      bestChild;
        NodePtr      otherChild;
        DistanceType cut_dist;
        if ((diff1 + diff2) < 0)
        {
            bestChild  = node->child1;
            otherChild = node->child2;
            cut_dist =
                distance_.accum_dist(val, node->node_type.sub.divhigh, idx);
        }
        else
        {
            bestChild  = node->child2;
            otherChild = node->child1;
            cut_dist =
                distance_.accum_dist(val, node->node_type.sub.divlow, idx);
        }

        /* Call recursively to search next level down. */
        if (!searchLevel(result_set, vec, bestChild, mindist, dists, epsError))
        {
            // the resultset doesn't want to receive any more points, we're done
            // searching!
            return false;
        }

        DistanceType dst = dists[idx];
        mindist          = mindist + cut_dist - dst;
        dists[idx]       = cut_dist;
        if (mindist * epsError <= result_set.worstDist())
        {
            if (!searchLevel(
                    result_set, vec, otherChild, mindist, dists, epsError))
            {
                // the resultset doesn't want to receive any more points, we're
                // done searching!
                return false;
            }
        }
        dists[idx] = dst;
        return true;
    }

   public:
    /**  Stores the index in a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * when loading the index object it must be constructed associated to the
     * same source of data points used while building it. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void saveIndex(std::ostream& stream) const
    {
        Base::saveIndex(*this, stream);
    }

    /**  Loads a previous index from a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * the index object must be constructed associated to the same source of
     * data points used while building the index. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void loadIndex(std::istream& stream) { Base::loadIndex(*this, stream); }

};  // class KDTree

/** kd-tree dynamic index
 *
 * Contains the k-d trees and other information for indexing a set of points
 * for nearest-neighbor matching.
 *
 * The class "DatasetAdaptor" must provide the following interface (can be
 * non-virtual, inlined methods):
 *
 *  \code
 *   // Must return the number of data poins
 *   size_t kdtree_get_point_count() const { ... }
 *
 *   // Must return the dim'th component of the idx'th point in the class:
 *   T kdtree_get_pt(const size_t idx, const size_t dim) const { ... }
 *
 *   // Optional bounding-box computation: return false to default to a standard
 * bbox computation loop.
 *   //   Return true if the BBOX was already computed by the class and returned
 * in "bb" so it can be avoided to redo it again.
 *   //   Look at bb.size() to find out the expected dimensionality (e.g. 2 or 3
 * for point clouds) template <class BBOX> bool kdtree_get_bbox(BBOX &bb) const
 *   {
 *      bb[0].low = ...; bb[0].high = ...;  // 0th dimension limits
 *      bb[1].low = ...; bb[1].high = ...;  // 1st dimension limits
 *      ...
 *      return true;
 *   }
 *
 *  \endcode
 *
 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
 * \tparam Distance The distance metric to use: nanoflann::metric_L1,
 * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
 * \tparam DIM Dimensionality of data points (e.g. 3 for 3D points)
 * \tparam IndexType Type of the arguments with which the data can be
 * accessed (e.g. float, double, int64_t, T*)
 */
template <
    typename Distance, class DatasetAdaptor, int32_t DIM = -1,
    typename IndexType = uint32_t>
class KDTreeSingleIndexDynamicAdaptor_
    : public KDTreeBaseClass<
          KDTreeSingleIndexDynamicAdaptor_<
              Distance, DatasetAdaptor, DIM, IndexType>,
          Distance, DatasetAdaptor, DIM, IndexType>
{
   public:
    /**
     * The dataset used by this index
     */
    const DatasetAdaptor& dataset_;  //!< The source of our data

    KDTreeSingleIndexAdaptorParams index_params_;

    std::vector<int>& treeIndex_;

    Distance distance_;

    using Base = typename nanoflann::KDTreeBaseClass<
        nanoflann::KDTreeSingleIndexDynamicAdaptor_<
            Distance, DatasetAdaptor, DIM, IndexType>,
        Distance, DatasetAdaptor, DIM, IndexType>;

    using ElementType  = typename Base::ElementType;
    using DistanceType = typename Base::DistanceType;

    using Offset    = typename Base::Offset;
    using Size      = typename Base::Size;
    using Dimension = typename Base::Dimension;

    using Node    = typename Base::Node;
    using NodePtr = Node*;

    using Interval = typename Base::Interval;
    /** Define "BoundingBox" as a fixed-size or variable-size container
     * depending on "DIM" */
    using BoundingBox = typename Base::BoundingBox;

    /** Define "distance_vector_t" as a fixed-size or variable-size container
     * depending on "DIM" */
    using distance_vector_t = typename Base::distance_vector_t;

    /**
     * KDTree constructor
     *
     * Refer to docs in README.md or online in
     * https://github.com/jlblancoc/nanoflann
     *
     * The KD-Tree point dimension (the length of each point in the datase, e.g.
     * 3 for 3D points) is determined by means of:
     *  - The \a DIM template parameter if >0 (highest priority)
     *  - Otherwise, the \a dimensionality parameter of this constructor.
     *
     * @param inputData Dataset with the input features. Its lifetime must be
     *  equal or longer than that of the instance of this class.
     * @param params Basically, the maximum leaf node size
     */
    KDTreeSingleIndexDynamicAdaptor_(
        const Dimension dimensionality, const DatasetAdaptor& inputData,
        std::vector<int>&                     treeIndex,
        const KDTreeSingleIndexAdaptorParams& params =
            KDTreeSingleIndexAdaptorParams())
        : dataset_(inputData),
          index_params_(params),
          treeIndex_(treeIndex),
          distance_(inputData)
    {
        Base::size_                = 0;
        Base::size_at_index_build_ = 0;
        for (auto& v : Base::root_bbox_) v = {};
        Base::dim_ = dimensionality;
        if (DIM > 0) Base::dim_ = DIM;
        Base::leaf_max_size_ = params.leaf_max_size;
        if (params.n_thread_build > 0)
        {
            Base::n_thread_build_ = params.n_thread_build;
        }
        else
        {
            Base::n_thread_build_ =
                std::max(std::thread::hardware_concurrency(), 1u);
        }
    }

    /** Explicitly default the copy constructor */
    KDTreeSingleIndexDynamicAdaptor_(
        const KDTreeSingleIndexDynamicAdaptor_& rhs) = default;

    /** Assignment operator definiton */
    KDTreeSingleIndexDynamicAdaptor_ operator=(
        const KDTreeSingleIndexDynamicAdaptor_& rhs)
    {
        KDTreeSingleIndexDynamicAdaptor_ tmp(rhs);
        std::swap(Base::vAcc_, tmp.Base::vAcc_);
        std::swap(Base::leaf_max_size_, tmp.Base::leaf_max_size_);
        std::swap(index_params_, tmp.index_params_);
        std::swap(treeIndex_, tmp.treeIndex_);
        std::swap(Base::size_, tmp.Base::size_);
        std::swap(Base::size_at_index_build_, tmp.Base::size_at_index_build_);
        std::swap(Base::root_node_, tmp.Base::root_node_);
        std::swap(Base::root_bbox_, tmp.Base::root_bbox_);
        std::swap(Base::pool_, tmp.Base::pool_);
        return *this;
    }

    /**
     * Builds the index
     */
    void buildIndex()
    {
        Base::size_ = Base::vAcc_.size();
        this->freeIndex(*this);
        Base::size_at_index_build_ = Base::size_;
        if (Base::size_ == 0) return;
        computeBoundingBox(Base::root_bbox_);
        // construct the tree
        if (Base::n_thread_build_ == 1)
        {
            Base::root_node_ =
                this->divideTree(*this, 0, Base::size_, Base::root_bbox_);
        }
        else
        {
#ifndef NANOFLANN_NO_THREADS
            std::atomic<unsigned int> thread_count(0u);
            std::mutex                mutex;
            Base::root_node_ = this->divideTreeConcurrent(
                *this, 0, Base::size_, Base::root_bbox_, thread_count, mutex);
#else /* NANOFLANN_NO_THREADS */
            throw std::runtime_error("Multithreading is disabled");
#endif /* NANOFLANN_NO_THREADS */
        }
    }

    /** \name Query methods
     * @{ */

    /**
     * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
     * inside the result object.
     * This is the core search function, all others are wrappers around this
     * one.
     *
     * \param result The result object in which the indices of the
     *               nearest-neighbors are stored.
     * \param vec    The vector of the query point for which to search the
     *               nearest neighbors.
     * \param searchParams Optional parameters for the search.
     *
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     * \return True if the requested neighbors could be found.
     *
     * \sa knnSearch(), radiusSearch(), radiusSearchCustomCallback()
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     */
    template <typename RESULTSET>
    bool findNeighbors(
        RESULTSET& result, const ElementType* vec,
        const SearchParameters& searchParams = {}) const
    {
        assert(vec);
        if (this->size(*this) == 0) return false;
        if (!Base::root_node_) return false;
        float epsError = 1 + searchParams.eps;

        // fixed or variable-sized container (depending on DIM)
        distance_vector_t dists;
        // Fill it with zeros.
        assign(
            dists, (DIM > 0 ? DIM : Base::dim_),
            static_cast<typename distance_vector_t::value_type>(0));
        DistanceType dist = this->computeInitialDistances(*this, vec, dists);
        searchLevel(result, vec, Base::root_node_, dist, dists, epsError);
        return result.full();
    }

    /**
     * Find the "num_closest" nearest neighbors to the \a query_point[0:dim-1].
     * Their indices are stored inside the result object. \sa radiusSearch,
     * findNeighbors
     * \return Number `N` of valid points in
     * the result set.
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     *
     * \note Only the first `N` entries in `out_indices` and `out_distances`
     *       will be valid. Return may be less than `num_closest` only if the
     *       number of elements in the tree is less than `num_closest`.
     */
    Size knnSearch(
        const ElementType* query_point, const Size num_closest,
        IndexType* out_indices, DistanceType* out_distances,
        const SearchParameters& searchParams = {}) const
    {
        nanoflann::KNNResultSet<DistanceType, IndexType> resultSet(num_closest);
        resultSet.init(out_indices, out_distances);
        findNeighbors(resultSet, query_point, searchParams);
        return resultSet.size();
    }

    /**
     * Find all the neighbors to \a query_point[0:dim-1] within a maximum
     * radius. The output is given as a vector of pairs, of which the first
     * element is a point index and the second the corresponding distance.
     * Previous contents of \a IndicesDists are cleared.
     *
     * If searchParams.sorted==true, the output list is sorted by ascending
     * distances.
     *
     * For a better performance, it is advisable to do a .reserve() on the
     * vector if you have any wild guess about the number of expected matches.
     *
     *  \sa knnSearch, findNeighbors, radiusSearchCustomCallback
     * \return The number of points within the given radius (i.e. indices.size()
     * or dists.size() )
     *
     * \note If L2 norms are used, search radius and all returned distances
     *       are actually squared distances.
     */
    Size radiusSearch(
        const ElementType* query_point, const DistanceType& radius,
        std::vector<ResultItem<IndexType, DistanceType>>& IndicesDists,
        const SearchParameters& searchParams = {}) const
    {
        RadiusResultSet<DistanceType, IndexType> resultSet(
            radius, IndicesDists);
        const size_t nFound =
            radiusSearchCustomCallback(query_point, resultSet, searchParams);
        return nFound;
    }

    /**
     * Just like radiusSearch() but with a custom callback class for each point
     * found in the radius of the query. See the source of RadiusResultSet<> as
     * a start point for your own classes. \sa radiusSearch
     */
    template <class SEARCH_CALLBACK>
    Size radiusSearchCustomCallback(
        const ElementType* query_point, SEARCH_CALLBACK& resultSet,
        const SearchParameters& searchParams = {}) const
    {
        findNeighbors(resultSet, query_point, searchParams);
        return resultSet.size();
    }

    /** @} */

   public:
    void computeBoundingBox(BoundingBox& bbox)
    {
        const auto dims = (DIM > 0 ? DIM : Base::dim_);
        resize(bbox, dims);

        if (dataset_.kdtree_get_bbox(bbox))
        {
            // Done! It was implemented in derived class
        }
        else
        {
            const Size N = Base::size_;
            if (!N)
                throw std::runtime_error(
                    "[nanoflann] computeBoundingBox() called but "
                    "no data points found.");
            for (Dimension i = 0; i < dims; ++i)
            {
                bbox[i].low = bbox[i].high =
                    this->dataset_get(*this, Base::vAcc_[0], i);
            }
            for (Offset k = 1; k < N; ++k)
            {
                for (Dimension i = 0; i < dims; ++i)
                {
                    const auto val =
                        this->dataset_get(*this, Base::vAcc_[k], i);
                    if (val < bbox[i].low) bbox[i].low = val;
                    if (val > bbox[i].high) bbox[i].high = val;
                }
            }
        }
    }

    /**
     * Performs an exact search in the tree starting from a node.
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     */
    template <class RESULTSET>
    void searchLevel(
        RESULTSET& result_set, const ElementType* vec, const NodePtr node,
        DistanceType mindist, distance_vector_t& dists,
        const float epsError) const
    {
        // If this is a leaf node, then do check and return.
        if (!node->child1)  // (if one node is nullptr, both are)
        {
            for (Offset i = node->node_type.lr.left;
                 i < node->node_type.lr.right; ++i)
            {
                const IndexType index = Base::vAcc_[i];  // reorder... : i;
                if (treeIndex_[index] == -1) continue;
                DistanceType dist = distance_.evalMetric(
                    vec, index, (DIM > 0 ? DIM : Base::dim_));
                if (dist < result_set.worstDist())
                {
                    if (!result_set.addPoint(
                            static_cast<typename RESULTSET::DistanceType>(dist),
                            static_cast<typename RESULTSET::IndexType>(
                                Base::vAcc_[i])))
                    {
                        // the resultset doesn't want to receive any more
                        // points, we're done searching!
                        return;  // false;
                    }
                }
            }
            return;
        }

        /* Which child branch should be taken first? */
        Dimension    idx   = node->node_type.sub.divfeat;
        ElementType  val   = vec[idx];
        DistanceType diff1 = val - node->node_type.sub.divlow;
        DistanceType diff2 = val - node->node_type.sub.divhigh;

        NodePtr      bestChild;
        NodePtr      otherChild;
        DistanceType cut_dist;
        if ((diff1 + diff2) < 0)
        {
            bestChild  = node->child1;
            otherChild = node->child2;
            cut_dist =
                distance_.accum_dist(val, node->node_type.sub.divhigh, idx);
        }
        else
        {
            bestChild  = node->child2;
            otherChild = node->child1;
            cut_dist =
                distance_.accum_dist(val, node->node_type.sub.divlow, idx);
        }

        /* Call recursively to search next level down. */
        searchLevel(result_set, vec, bestChild, mindist, dists, epsError);

        DistanceType dst = dists[idx];
        mindist          = mindist + cut_dist - dst;
        dists[idx]       = cut_dist;
        if (mindist * epsError <= result_set.worstDist())
        {
            searchLevel(result_set, vec, otherChild, mindist, dists, epsError);
        }
        dists[idx] = dst;
    }

   public:
    /**  Stores the index in a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * when loading the index object it must be constructed associated to the
     * same source of data points used while building it. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void saveIndex(std::ostream& stream) { saveIndex(*this, stream); }

    /**  Loads a previous index from a binary file.
     *   IMPORTANT NOTE: The set of data points is NOT stored in the file, so
     * the index object must be constructed associated to the same source of
     * data points used while building the index. See the example:
     * examples/saveload_example.cpp \sa loadIndex  */
    void loadIndex(std::istream& stream) { loadIndex(*this, stream); }
};

/** kd-tree dynaimic index
 *
 * class to create multiple static index and merge their results to behave as
 * single dynamic index as proposed in Logarithmic Approach.
 *
 *  Example of usage:
 *  examples/dynamic_pointcloud_example.cpp
 *
 * \tparam DatasetAdaptor The user-provided adaptor (see comments above).
 * \tparam Distance The distance metric to use: nanoflann::metric_L1,
 * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. \tparam DIM
 * Dimensionality of data points (e.g. 3 for 3D points) \tparam IndexType
 * Will be typically size_t or int
 */
template <
    typename Distance, class DatasetAdaptor, int32_t DIM = -1,
    typename IndexType = uint32_t>
class KDTreeSingleIndexDynamicAdaptor
{
   public:
    using ElementType  = typename Distance::ElementType;
    using DistanceType = typename Distance::DistanceType;

    using Offset = typename KDTreeSingleIndexDynamicAdaptor_<
        Distance, DatasetAdaptor, DIM>::Offset;
    using Size = typename KDTreeSingleIndexDynamicAdaptor_<
        Distance, DatasetAdaptor, DIM>::Size;
    using Dimension = typename KDTreeSingleIndexDynamicAdaptor_<
        Distance, DatasetAdaptor, DIM>::Dimension;

   protected:
    Size leaf_max_size_;
    Size treeCount_;
    Size pointCount_;

    /**
     * The dataset used by this index
     */
    const DatasetAdaptor& dataset_;  //!< The source of our data

    /** treeIndex[idx] is the index of tree in which point at idx is stored.
     * treeIndex[idx]=-1 means that point has been removed. */
    std::vector<int>        treeIndex_;
    std::unordered_set<int> removedPoints_;

    KDTreeSingleIndexAdaptorParams index_params_;

    Dimension dim_;  //!< Dimensionality of each data point

    using index_container_t = KDTreeSingleIndexDynamicAdaptor_<
        Distance, DatasetAdaptor, DIM, IndexType>;
    std::vector<index_container_t> index_;

   public:
    /** Get a const ref to the internal list of indices; the number of indices
     * is adapted dynamically as the dataset grows in size. */
    const std::vector<index_container_t>& getAllIndices() const
    {
        return index_;
    }

   private:
    /** finds position of least significant unset bit */
    int First0Bit(IndexType num)
    {
        int pos = 0;
        while (num & 1)
        {
            num = num >> 1;
            pos++;
        }
        return pos;
    }

    /** Creates multiple empty trees to handle dynamic support */
    void init()
    {
        using my_kd_tree_t = KDTreeSingleIndexDynamicAdaptor_<
            Distance, DatasetAdaptor, DIM, IndexType>;
        std::vector<my_kd_tree_t> index(
            treeCount_,
            my_kd_tree_t(dim_ /*dim*/, dataset_, treeIndex_, index_params_));
        index_ = index;
    }

   public:
    Distance distance_;

    /**
     * KDTree constructor
     *
     * Refer to docs in README.md or online in
     * https://github.com/jlblancoc/nanoflann
     *
     * The KD-Tree point dimension (the length of each point in the datase, e.g.
     * 3 for 3D points) is determined by means of:
     *  - The \a DIM template parameter if >0 (highest priority)
     *  - Otherwise, the \a dimensionality parameter of this constructor.
     *
     * @param inputData Dataset with the input features. Its lifetime must be
     *  equal or longer than that of the instance of this class.
     * @param params Basically, the maximum leaf node size
     */
    explicit KDTreeSingleIndexDynamicAdaptor(
        const int dimensionality, const DatasetAdaptor& inputData,
        const KDTreeSingleIndexAdaptorParams& params =
            KDTreeSingleIndexAdaptorParams(),
        const size_t maximumPointCount = 1000000000U)
        : dataset_(inputData), index_params_(params), distance_(inputData)
    {
        treeCount_  = static_cast<size_t>(std::log2(maximumPointCount)) + 1;
        pointCount_ = 0U;
        dim_        = dimensionality;
        treeIndex_.clear();
        if (DIM > 0) dim_ = DIM;
        leaf_max_size_ = params.leaf_max_size;
        init();
        const size_t num_initial_points = dataset_.kdtree_get_point_count();
        if (num_initial_points > 0) { addPoints(0, num_initial_points - 1); }
    }

    /** Deleted copy constructor*/
    explicit KDTreeSingleIndexDynamicAdaptor(
        const KDTreeSingleIndexDynamicAdaptor<
            Distance, DatasetAdaptor, DIM, IndexType>&) = delete;

    /** Add points to the set, Inserts all points from [start, end] */
    void addPoints(IndexType start, IndexType end)
    {
        const Size count    = end - start + 1;
        int        maxIndex = 0;
        treeIndex_.resize(treeIndex_.size() + count);
        for (IndexType idx = start; idx <= end; idx++)
        {
            const int pos           = First0Bit(pointCount_);
            maxIndex                = std::max(pos, maxIndex);
            treeIndex_[pointCount_] = pos;

            const auto it = removedPoints_.find(idx);
            if (it != removedPoints_.end())
            {
                removedPoints_.erase(it);
                treeIndex_[idx] = pos;
            }

            for (int i = 0; i < pos; i++)
            {
                for (int j = 0; j < static_cast<int>(index_[i].vAcc_.size());
                     j++)
                {
                    index_[pos].vAcc_.push_back(index_[i].vAcc_[j]);
                    if (treeIndex_[index_[i].vAcc_[j]] != -1)
                        treeIndex_[index_[i].vAcc_[j]] = pos;
                }
                index_[i].vAcc_.clear();
            }
            index_[pos].vAcc_.push_back(idx);
            pointCount_++;
        }

        for (int i = 0; i <= maxIndex; ++i)
        {
            index_[i].freeIndex(index_[i]);
            if (!index_[i].vAcc_.empty()) index_[i].buildIndex();
        }
    }

    /** Remove a point from the set (Lazy Deletion) */
    void removePoint(size_t idx)
    {
        if (idx >= pointCount_) return;
        removedPoints_.insert(idx);
        treeIndex_[idx] = -1;
    }

    /**
     * Find set of nearest neighbors to vec[0:dim-1]. Their indices are stored
     * inside the result object.
     *
     * Params:
     *     result = the result object in which the indices of the
     * nearest-neighbors are stored vec = the vector for which to search the
     * nearest neighbors
     *
     * \tparam RESULTSET Should be any ResultSet<DistanceType>
     * \return  True if the requested neighbors could be found.
     * \sa knnSearch, radiusSearch
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     */
    template <typename RESULTSET>
    bool findNeighbors(
        RESULTSET& result, const ElementType* vec,
        const SearchParameters& searchParams = {}) const
    {
        for (size_t i = 0; i < treeCount_; i++)
        {
            index_[i].findNeighbors(result, &vec[0], searchParams);
        }
        return result.full();
    }
};

/** An L2-metric KD-tree adaptor for working with data directly stored in an
 * Eigen Matrix, without duplicating the data storage. You can select whether a
 * row or column in the matrix represents a point in the state space.
 *
 * Example of usage:
 * \code
 * Eigen::Matrix<num_t,Eigen::Dynamic,Eigen::Dynamic>  mat;
 *
 * // Fill out "mat"...
 * using my_kd_tree_t = nanoflann::KDTreeEigenMatrixAdaptor<
 *   Eigen::Matrix<num_t,Dynamic,Dynamic>>;
 *
 * const int max_leaf = 10;
 * my_kd_tree_t mat_index(mat, max_leaf);
 * mat_index.index->...
 * \endcode
 *
 *  \tparam DIM If set to >0, it specifies a compile-time fixed dimensionality
 * for the points in the data set, allowing more compiler optimizations.
 * \tparam Distance The distance metric to use: nanoflann::metric_L1,
 * nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
 * \tparam row_major If set to true the rows of the matrix are used as the
 *         points, if set to false  the columns of the matrix are used as the
 *         points.
 */
template <
    class MatrixType, int32_t DIM = -1, class Distance = nanoflann::metric_L2,
    bool row_major = true>
struct KDTreeEigenMatrixAdaptor
{
    using self_t =
        KDTreeEigenMatrixAdaptor<MatrixType, DIM, Distance, row_major>;
    using num_t     = typename MatrixType::Scalar;
    using IndexType = typename MatrixType::Index;
    using metric_t  = typename Distance::template traits<
        num_t, self_t, IndexType>::distance_t;

    using index_t = KDTreeSingleIndexAdaptor<
        metric_t, self_t,
        row_major ? MatrixType::ColsAtCompileTime
                  : MatrixType::RowsAtCompileTime,
        IndexType>;

    index_t* index_;  //! The kd-tree index for the user to call its methods as
                      //! usual with any other FLANN index.

    using Offset    = typename index_t::Offset;
    using Size      = typename index_t::Size;
    using Dimension = typename index_t::Dimension;

    /// Constructor: takes a const ref to the matrix object with the data points
    explicit KDTreeEigenMatrixAdaptor(
        const Dimension                                 dimensionality,
        const std::reference_wrapper<const MatrixType>& mat,
        const int leaf_max_size = 10, const unsigned int n_thread_build = 1)
        : m_data_matrix(mat)
    {
        const auto dims = row_major ? mat.get().cols() : mat.get().rows();
        if (static_cast<Dimension>(dims) != dimensionality)
            throw std::runtime_error(
                "Error: 'dimensionality' must match column count in data "
                "matrix");
        if (DIM > 0 && static_cast<int32_t>(dims) != DIM)
            throw std::runtime_error(
                "Data set dimensionality does not match the 'DIM' template "
                "argument");
        index_ = new index_t(
            dims, *this /* adaptor */,
            nanoflann::KDTreeSingleIndexAdaptorParams(
                leaf_max_size, nanoflann::KDTreeSingleIndexAdaptorFlags::None,
                n_thread_build));
    }

   public:
    /** Deleted copy constructor */
    KDTreeEigenMatrixAdaptor(const self_t&) = delete;

    ~KDTreeEigenMatrixAdaptor() { delete index_; }

    const std::reference_wrapper<const MatrixType> m_data_matrix;

    /** Query for the \a num_closest closest points to a given point (entered as
     * query_point[0:dim-1]). Note that this is a short-cut method for
     * index->findNeighbors(). The user can also call index->... methods as
     * desired.
     *
     * \note If L2 norms are used, all returned distances are actually squared
     *       distances.
     */
    void query(
        const num_t* query_point, const Size num_closest,
        IndexType* out_indices, num_t* out_distances) const
    {
        nanoflann::KNNResultSet<num_t, IndexType> resultSet(num_closest);
        resultSet.init(out_indices, out_distances);
        index_->findNeighbors(resultSet, query_point);
    }

    /** @name Interface expected by KDTreeSingleIndexAdaptor
     * @{ */

    const self_t& derived() const { return *this; }
    self_t&       derived() { return *this; }

    // Must return the number of data points
    Size kdtree_get_point_count() const
    {
        if (row_major)
            return m_data_matrix.get().rows();
        else
            return m_data_matrix.get().cols();
    }

    // Returns the dim'th component of the idx'th point in the class:
    num_t kdtree_get_pt(const IndexType idx, size_t dim) const
    {
        if (row_major)
            return m_data_matrix.get().coeff(idx, IndexType(dim));
        else
            return m_data_matrix.get().coeff(IndexType(dim), idx);
    }

    // Optional bounding-box computation: return false to default to a standard
    // bbox computation loop.
    //   Return true if the BBOX was already computed by the class and returned
    //   in "bb" so it can be avoided to redo it again. Look at bb.size() to
    //   find out the expected dimensionality (e.g. 2 or 3 for point clouds)
    template <class BBOX>
    bool kdtree_get_bbox(BBOX& /*bb*/) const
    {
        return false;
    }

    /** @} */

};  // end of KDTreeEigenMatrixAdaptor
/** @} */

/** @} */  // end of grouping
}  // namespace nanoflann


================================================
FILE: src/wrapper.hpp
================================================
#include "_ext/KDLineTree.h"
#include "_ext/KDTree.h"
#include <array>
#include <memory>
#include <utility>
#include <vector>

#ifndef BUCKET_FPS_MAX_DIM
#define BUCKET_FPS_MAX_DIM 8
#endif
constexpr size_t max_dim = BUCKET_FPS_MAX_DIM;

using quickfps::KDLineTree;
using quickfps::KDTree;
using quickfps::Point;

template <typename T, size_t DIM, typename S>
std::vector<Point<T, DIM, S>> raw_data_to_points(const float *raw_data,
                                                 size_t n_points, size_t dim) {
    std::vector<Point<T, DIM, S>> points;
    points.reserve(n_points);
    for (size_t i = 0; i < n_points; i++) {
        const float *ptr = raw_data + i * dim;
        points.push_back(Point<T, DIM, S>(ptr, i));
    }
    return points;
}

template <typename T, size_t DIM, typename S = T>
void kdtree_sample(const float *raw_data, size_t n_points, size_t dim,
                   size_t n_samples, size_t start_idx,
                   size_t *sampled_point_indices) {
    auto points = raw_data_to_points<T, DIM, S>(raw_data, n_points, dim);
    std::unique_ptr<Point<T, DIM, S>[]> sampled_points(
        new Point<T, DIM, S>[n_samples]);
    KDTree<T, DIM, S> tree(points.data(), n_points, sampled_points.get());
    tree.buildKDtree();
    tree.init(points[start_idx]);
    tree.sample(n_samples);
    for (size_t i = 0; i < n_samples; i++) {
        sampled_point_indices[i] = sampled_points[i].id;
    }
}

template <typename T, size_t DIM, typename S = T>
void kdline_sample(const float *raw_data, size_t n_points, size_t dim,
                   size_t n_samples, size_t start_idx, size_t height,
                   size_t *sampled_point_indices) {
    auto points = raw_data_to_points<T, DIM, S>(raw_data, n_points, dim);
    std::unique_ptr<Point<T, DIM, S>[]> sampled_points(
        new Point<T, DIM, S>[n_samples]);
    KDLineTree<T, DIM, S> tree(points.data(), n_points, height,
                               sampled_points.get());
    tree.buildKDtree();
    tree.init(points[start_idx]);
    tree.sample(n_samples);
    for (size_t i = 0; i < n_samples; i++) {
        sampled_point_indices[i] = sampled_points[i].id;
    }
}

////////////////////////////////////////
//                                    //
//    Compile Time Function Helper    //
//                                    //
////////////////////////////////////////
using KDTreeFuncType = void (*)(const float *, size_t, size_t, size_t, size_t,
                                size_t *);
using KDLineFuncType = void (*)(const float *, size_t, size_t, size_t, size_t,
                                size_t, size_t *);

template <typename T, size_t Count, typename M, size_t... I>
constexpr std::array<T, Count> mapIndices(M &&m, std::index_sequence<I...>) {
    std::array<T, Count> result{m.template operator()<I + 1>()...};
    return result;
}

template <typename T, size_t Count, typename M>
constexpr std::array<T, Count> map(M m) {
    return mapIndices<T, Count>(m, std::make_index_sequence<Count>());
}

template <typename T, typename S = T> struct kdtree_func_helper {
    template <size_t DIM> KDTreeFuncType operator()() {
        return &kdtree_sample<T, DIM, S>;
    }
};

template <typename T, typename S = T> struct kdline_func_helper {
    template <size_t DIM> KDLineFuncType operator()() {
        return &kdline_sample<T, DIM, S>;
    }
};

/////////////////
//             //
//    C API    //
//             //
/////////////////

extern "C" {
int bucket_fps_kdtree(const float *raw_data, size_t n_points, size_t dim,
                      size_t n_samples, size_t start_idx,
                      size_t *sampled_point_indices) {
    if (dim == 0 || dim > max_dim) {
        // only support 1 to MAX_DIM dimensions
        return 1;
    } else if (start_idx >= n_points) {
        // start_idx should be smaller than n_samples
        return 2;
    }
    auto func_arr = map<KDTreeFuncType, max_dim>(kdtree_func_helper<float>{});
    func_arr[dim - 1](raw_data, n_points, dim, n_samples, start_idx,
                      sampled_point_indices);
    return 0;
}

int bucket_fps_kdline(const float *raw_data, size_t n_points, size_t dim,
                      size_t n_samples, size_t start_idx, size_t height,
                      size_t *sampled_point_indices) {
    if (dim == 0 || dim > max_dim) {
        // only support 1 to MAX_DIM dimensions
        return 1;
    } else if (start_idx >= n_points) {
        // start_idx should be smaller than n_samples
        return 2;
    }
    auto func_arr = map<KDLineFuncType, max_dim>(kdline_func_helper<float>{});
    func_arr[dim - 1](raw_data, n_points, dim, n_samples, start_idx, height,
                      sampled_point_indices);
    return 0;
}
}
Download .txt
gitextract_4aj034v5/

├── .github/
│   └── workflows/
│       ├── release.yml
│       └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CMakeLists.txt
├── LICENSE
├── README.md
├── bench/
│   └── test_bench.py
├── pyproject.toml
└── src/
    ├── _ext/
    │   ├── Interval.h
    │   ├── KDLineTree.h
    │   ├── KDNode.h
    │   ├── KDTree.h
    │   ├── KDTreeBase.h
    │   ├── Point.h
    │   └── utils.h
    ├── fpsample/
    │   └── __init__.py
    ├── lib.cpp
    ├── nanoflann.hpp
    └── wrapper.hpp
Download .txt
SYMBOL INDEX (218 symbols across 12 files)

FILE: bench/test_bench.py
  function create_sample_data (line 19) | def create_sample_data(n_points: int, n_dim: int = 3, seed: int = TEST_S...
  function test_vanilla_fps_4k (line 30) | def test_vanilla_fps_4k(benchmark):
  function test_vanilla_fps_4k_multiple (line 36) | def test_vanilla_fps_4k_multiple(benchmark):
  function test_fps_npdu_4k (line 43) | def test_fps_npdu_4k(benchmark):
  function test_fps_npdu_kdtree_4k (line 50) | def test_fps_npdu_kdtree_4k(benchmark):
  function test_bucket_fps_kdtree_4k (line 57) | def test_bucket_fps_kdtree_4k(benchmark):
  function test_bucket_fps_kdline_4k_h3 (line 64) | def test_bucket_fps_kdline_4k_h3(benchmark):
  function test_bucket_fps_kdline_4k_h5 (line 71) | def test_bucket_fps_kdline_4k_h5(benchmark):
  function test_bucket_fps_kdline_4k_h7 (line 78) | def test_bucket_fps_kdline_4k_h7(benchmark):
  function test_vanilla_fps_50k (line 90) | def test_vanilla_fps_50k(benchmark):
  function test_fps_npdu_50k (line 97) | def test_fps_npdu_50k(benchmark):
  function test_fps_npdu_kdtree_50k (line 104) | def test_fps_npdu_kdtree_50k(benchmark):
  function test_bucket_fps_kdtree_50k (line 111) | def test_bucket_fps_kdtree_50k(benchmark):
  function test_bucket_fps_kdline_50k_h3 (line 118) | def test_bucket_fps_kdline_50k_h3(benchmark):
  function test_bucket_fps_kdline_50k_h5 (line 125) | def test_bucket_fps_kdline_50k_h5(benchmark):
  function test_bucket_fps_kdline_50k_h7 (line 132) | def test_bucket_fps_kdline_50k_h7(benchmark):
  function test_vanilla_fps_100k (line 144) | def test_vanilla_fps_100k(benchmark):
  function test_fps_npdu_100k (line 151) | def test_fps_npdu_100k(benchmark):
  function test_fps_npdu_kdtree_100k (line 158) | def test_fps_npdu_kdtree_100k(benchmark):
  function test_bucket_fps_kdtree_100k (line 165) | def test_bucket_fps_kdtree_100k(benchmark):
  function test_bucket_fps_kdline_100k_h5 (line 172) | def test_bucket_fps_kdline_100k_h5(benchmark):
  function test_bucket_fps_kdline_100k_h7 (line 179) | def test_bucket_fps_kdline_100k_h7(benchmark):
  function test_bucket_fps_kdline_100k_h9 (line 186) | def test_bucket_fps_kdline_100k_h9(benchmark):

FILE: src/_ext/Interval.h
  function namespace (line 9) | namespace quickfps {

FILE: src/_ext/KDLineTree.h
  function namespace (line 14) | namespace quickfps {

FILE: src/_ext/KDNode.h
  function namespace (line 16) | namespace quickfps {

FILE: src/_ext/KDTree.h
  function namespace (line 11) | namespace quickfps {

FILE: src/_ext/KDTreeBase.h
  function namespace (line 15) | namespace quickfps {

FILE: src/_ext/Point.h
  function namespace (line 14) | namespace quickfps {

FILE: src/_ext/utils.h
  function namespace (line 9) | namespace quickfps {

FILE: src/fpsample/__init__.py
  function get_start_idx (line 19) | def get_start_idx(n_pts: int, start_idx: Optional[Union[int, List[int]]]...
  function fps_sampling (line 35) | def fps_sampling(
  function fps_npdu_sampling (line 68) | def fps_npdu_sampling(
  function fps_npdu_kdtree_sampling (line 106) | def fps_npdu_kdtree_sampling(
  function bucket_fps_kdtree_sampling (line 145) | def bucket_fps_kdtree_sampling(
  function bucket_fps_kdline_sampling (line 174) | def bucket_fps_kdline_sampling(

FILE: src/lib.cpp
  class StartIndex (line 16) | class StartIndex {
    type Type (line 18) | enum Type { SINGLE, ARRAY }
    method StartIndex (line 22) | StartIndex(size_t idx) : type(SINGLE), single_idx(idx) {}
    method StartIndex (line 23) | StartIndex(py::array_t<size_t> arr) : type(ARRAY), array_idx(arr) {}
  type PointCloud (line 26) | struct PointCloud {
    method kdtree_get_point_count (line 30) | inline size_t kdtree_get_point_count() const { return N; }
    method kdtree_distance (line 32) | inline float kdtree_distance(const float* a, const size_t b_idx, size_...
    method kdtree_get_pt (line 42) | inline float kdtree_get_pt(const size_t idx, int dim_) const {
    method kdtree_get_bbox (line 47) | bool kdtree_get_bbox(BBOX&) const {
  function check_py_input (line 52) | void check_py_input(
  function fps_sampling_multi_start_index (line 111) | py::array_t<size_t>
  function fps_sampling (line 188) | py::array_t<size_t> fps_sampling(
  function _fps_sampling (line 249) | py::array_t<size_t> _fps_sampling(
  function fps_npdu_sampling (line 272) | py::array_t<size_t> fps_npdu_sampling(
  function fps_npdu_sampling_py (line 343) | py::array_t<size_t> fps_npdu_sampling_py(
  function fps_npdu_kdtree_sampling_py (line 369) | py::array_t<size_t> fps_npdu_kdtree_sampling_py(
  function bucket_fps_kdtree_sampling_py (line 467) | py::array_t<size_t> bucket_fps_kdtree_sampling_py(
  function bucket_fps_kdline_sampling_py (line 522) | py::array_t<size_t> bucket_fps_kdline_sampling_py(
  function PYBIND11_MODULE (line 581) | PYBIND11_MODULE(_fpsample, m, py::mod_gil_not_used(), py::multiple_inter...

FILE: src/nanoflann.hpp
  function T (line 88) | T pi_const()
  type has_resize (line 98) | struct has_resize : std::false_type
  type has_resize<T, decltype((void)std::declval<T>().resize(1), 0)> (line 103) | struct has_resize<T, decltype((void)std::declval<T>().resize(1), 0)>
  type has_assign (line 109) | struct has_assign : std::false_type
  type has_assign<T, decltype((void)std::declval<T>().assign(1, 0), 0)> (line 114) | struct has_assign<T, decltype((void)std::declval<T>().assign(1, 0), 0)>
  function resize (line 123) | inline typename std::enable_if<has_resize<Container>::value, void>::type...
  function resize (line 134) | inline typename std::enable_if<!has_resize<Container>::value, void>::type
  function assign (line 145) | inline typename std::enable_if<has_assign<Container>::value, void>::type...
  function assign (line 155) | inline typename std::enable_if<!has_assign<Container>::value, void>::type
  type IndexDist_Sorter (line 162) | struct IndexDist_Sorter
  type ResultItem (line 181) | struct ResultItem
    method ResultItem (line 183) | ResultItem() = default;
    method ResultItem (line 184) | ResultItem(const IndexType index, const DistanceType distance)
  function KNNResultSet (line 214) | explicit KNNResultSet(CountType capacity_)
  function init (line 219) | void init(IndexType* indices_, DistanceType* dists_)
  function CountType (line 226) | CountType size() const { return count; }
  function empty (line 227) | bool      empty() const { return count == 0; }
  function full (line 228) | bool      full() const { return count == capacity; }
  function addPoint (line 235) | bool addPoint(DistanceType dist, IndexType index)
  class RKNNResultSet (line 289) | class RKNNResultSet
    method RKNNResultSet (line 304) | explicit RKNNResultSet(
    method init (line 314) | void init(IndexType* indices_, DistanceType* dists_)
    method CountType (line 322) | CountType size() const { return count; }
    method empty (line 323) | bool      empty() const { return count == 0; }
    method full (line 324) | bool      full() const { return count == capacity; }
    method addPoint (line 331) | bool addPoint(DistanceType dist, IndexType index)
    class RadiusResultSet (line 384) | class RadiusResultSet
      method RadiusResultSet (line 395) | explicit RadiusResultSet(
      method init (line 403) | void init() { clear(); }
      method clear (line 404) | void clear() { m_indices_dists.clear(); }
      method size (line 406) | size_t size() const { return m_indices_dists.size(); }
      method empty (line 407) | size_t empty() const { return m_indices_dists.empty(); }
      method full (line 409) | bool full() const { return true; }
      method addPoint (line 416) | bool addPoint(DistanceType dist, IndexType index)
      method DistanceType (line 422) | DistanceType worstDist() const { return radius; }
      method worst_item (line 428) | ResultItem<IndexType, DistanceType> worst_item() const
      method sort (line 439) | void sort()
    method save_value (line 451) | void save_value(std::ostream& stream, const T& value)
    method save_value (line 457) | void save_value(std::ostream& stream, const std::vector<T>& value)
    method load_value (line 465) | void load_value(std::istream& stream, T& value)
    method load_value (line 471) | void load_value(std::istream& stream, std::vector<T>& value)
    type Metric (line 483) | struct Metric
    type L1_Adaptor (line 500) | struct L1_Adaptor
      method L1_Adaptor (line 507) | L1_Adaptor(const DataSource& _data_source) : data_source(_data_sourc...
      method DistanceType (line 509) | DistanceType evalMetric(
      method DistanceType (line 543) | DistanceType accum_dist(const U a, const V b, const size_t) const
    type L2_Adaptor (line 562) | struct L2_Adaptor
      method L2_Adaptor (line 569) | L2_Adaptor(const DataSource& _data_source) : data_source(_data_sourc...
      method DistanceType (line 571) | DistanceType evalMetric(
      method DistanceType (line 608) | DistanceType accum_dist(const U a, const V b, const size_t) const
    type L2_Simple_Adaptor (line 627) | struct L2_Simple_Adaptor
      method L2_Simple_Adaptor (line 634) | L2_Simple_Adaptor(const DataSource& _data_source)
      method DistanceType (line 639) | DistanceType evalMetric(
      method DistanceType (line 653) | DistanceType accum_dist(const U a, const V b, const size_t) const
    type SO2_Adaptor (line 672) | struct SO2_Adaptor
      method SO2_Adaptor (line 679) | SO2_Adaptor(const DataSource& _data_source) : data_source(_data_sour...
      method DistanceType (line 681) | DistanceType evalMetric(
      method DistanceType (line 691) | DistanceType accum_dist(const U a, const V b, const size_t) const
    type SO3_Adaptor (line 717) | struct SO3_Adaptor
      method SO3_Adaptor (line 725) | SO3_Adaptor(const DataSource& _data_source)
      method DistanceType (line 730) | DistanceType evalMetric(
      method DistanceType (line 737) | DistanceType accum_dist(const U a, const V b, const size_t idx) const
    type metric_L1 (line 744) | struct metric_L1 : public Metric
      type traits (line 747) | struct traits
    type metric_L2 (line 754) | struct metric_L2 : public Metric
      type traits (line 757) | struct traits
    type metric_L2_Simple (line 764) | struct metric_L2_Simple : public Metric
      type traits (line 767) | struct traits
    type metric_SO2 (line 773) | struct metric_SO2 : public Metric
      type traits (line 776) | struct traits
    type metric_SO3 (line 782) | struct metric_SO3 : public Metric
      type traits (line 785) | struct traits
    type KDTreeSingleIndexAdaptorFlags (line 796) | enum class KDTreeSingleIndexAdaptorFlags
    type KDTreeSingleIndexAdaptorParams (line 811) | struct KDTreeSingleIndexAdaptorParams
      method KDTreeSingleIndexAdaptorParams (line 813) | KDTreeSingleIndexAdaptorParams(
    type SearchParameters (line 830) | struct SearchParameters
      method SearchParameters (line 832) | SearchParameters(float eps_ = 0, bool sorted_ = true)
    class PooledAllocator (line 860) | class PooledAllocator
      method internal_init (line 877) | void internal_init()
      method PooledAllocator (line 892) | PooledAllocator() { internal_init(); }
      method free_all (line 900) | void free_all()
      method T (line 963) | T* allocate(const size_t count = 1)
    type array_or_vector (line 978) | struct array_or_vector
    type array_or_vector<-1, T> (line 984) | struct array_or_vector<-1, T>
    class KDTreeBaseClass (line 1008) | class KDTreeBaseClass
      method freeIndex (line 1013) | void freeIndex(Derived& obj)
      type Node (line 1036) | struct Node
        type leaf (line 1042) | struct leaf
        type nonleaf (line 1046) | struct nonleaf
      type Interval (line 1061) | struct Interval
      method Size (line 1099) | Size size(const Derived& obj) const { return obj.size_; }
      method Size (line 1102) | Size veclen(const Derived& obj) const { return DIM > 0 ? DIM : obj.d...
      method ElementType (line 1105) | ElementType dataset_get(
      method Size (line 1115) | Size usedMemory(const Derived& obj) const
      method computeMinMax (line 1125) | void computeMinMax(
      method NodePtr (line 1148) | NodePtr divideTree(
      method NodePtr (line 1224) | NodePtr divideTreeConcurrent(
      method middleSplit_ (line 1320) | void middleSplit_(
      method planeSplit (line 1382) | void planeSplit(
      method DistanceType (line 1429) | DistanceType computeInitialDistances(
      method save_tree (line 1454) | static void save_tree(
      method load_tree (line 1462) | static void load_tree(Derived& obj, std::istream& stream, NodePtr& t...
      method saveIndex (line 1475) | void saveIndex(const Derived& obj, std::ostream& stream) const
      method loadIndex (line 1490) | void loadIndex(Derived& obj, std::istream& stream)
    class KDTreeSingleIndexAdaptor (line 1545) | class KDTreeSingleIndexAdaptor
      method KDTreeSingleIndexAdaptor (line 1552) | explicit KDTreeSingleIndexAdaptor(
      method KDTreeSingleIndexAdaptor (line 1610) | explicit KDTreeSingleIndexAdaptor(
      method KDTreeSingleIndexAdaptor (line 1620) | explicit KDTreeSingleIndexAdaptor(
      method init (line 1629) | void init(
      method buildIndex (line 1660) | void buildIndex()
      method findNeighbors (line 1708) | bool findNeighbors(
      method Size (line 1749) | Size findWithinBox(RESULTSET& result, const BoundingBox& bbox) const
      method Size (line 1811) | Size knnSearch(
      method Size (line 1840) | Size radiusSearch(
      method Size (line 1858) | Size radiusSearchCustomCallback(
      method Size (line 1882) | Size rknnSearch(
      method init_vind (line 1899) | void init_vind()
      method computeBoundingBox (line 1908) | void computeBoundingBox(BoundingBox& bbox)
      method contains (line 1941) | bool contains(const BoundingBox& bbox, IndexType idx) const
      method searchLevel (line 1959) | bool searchLevel(
      method saveIndex (line 2041) | void saveIndex(std::ostream& stream) const
      method loadIndex (line 2051) | void loadIndex(std::istream& stream) { Base::loadIndex(*this, stream...
    class KDTreeSingleIndexDynamicAdaptor_ (line 2095) | class KDTreeSingleIndexDynamicAdaptor_
      method KDTreeSingleIndexDynamicAdaptor_ (line 2152) | KDTreeSingleIndexDynamicAdaptor_(
      method KDTreeSingleIndexDynamicAdaptor_ (line 2180) | KDTreeSingleIndexDynamicAdaptor_(
      method KDTreeSingleIndexDynamicAdaptor_ (line 2184) | KDTreeSingleIndexDynamicAdaptor_ operator=(
      method buildIndex (line 2203) | void buildIndex()
      method findNeighbors (line 2253) | bool findNeighbors(
      method Size (line 2287) | Size knnSearch(
      method Size (line 2317) | Size radiusSearch(
      method Size (line 2335) | Size radiusSearchCustomCallback(
      method computeBoundingBox (line 2346) | void computeBoundingBox(BoundingBox& bbox)
      method searchLevel (line 2385) | void searchLevel(
      method saveIndex (line 2459) | void saveIndex(std::ostream& stream) { saveIndex(*this, stream); }
      method loadIndex (line 2466) | void loadIndex(std::istream& stream) { loadIndex(*this, stream); }
    class KDTreeSingleIndexDynamicAdaptor (line 2486) | class KDTreeSingleIndexDynamicAdaptor
      method First0Bit (line 2532) | int First0Bit(IndexType num)
      method init (line 2544) | void init()
      method KDTreeSingleIndexDynamicAdaptor (line 2572) | explicit KDTreeSingleIndexDynamicAdaptor(
      method KDTreeSingleIndexDynamicAdaptor (line 2591) | explicit KDTreeSingleIndexDynamicAdaptor(
      method addPoints (line 2596) | void addPoints(IndexType start, IndexType end)
      method removePoint (line 2637) | void removePoint(size_t idx)
      method findNeighbors (line 2661) | bool findNeighbors(
    type KDTreeEigenMatrixAdaptor (line 2701) | struct KDTreeEigenMatrixAdaptor
      method KDTreeEigenMatrixAdaptor (line 2724) | explicit KDTreeEigenMatrixAdaptor(
      method KDTreeEigenMatrixAdaptor (line 2748) | KDTreeEigenMatrixAdaptor(const self_t&) = delete;
      method query (line 2762) | void query(
      method self_t (line 2774) | const self_t& derived() const { return *this; }
      method self_t (line 2775) | self_t&       derived() { return *this; }
      method Size (line 2778) | Size kdtree_get_point_count() const
      method num_t (line 2787) | num_t kdtree_get_pt(const IndexType idx, size_t dim) const
      method kdtree_get_bbox (line 2801) | bool kdtree_get_bbox(BBOX& /*bb*/) const

FILE: src/wrapper.hpp
  function raw_data_to_points (line 18) | std::vector<Point<T, DIM, S>> raw_data_to_points(const float *raw_data,
  function kdtree_sample (line 30) | void kdtree_sample(const float *raw_data, size_t n_points, size_t dim,
  function kdline_sample (line 46) | void kdline_sample(const float *raw_data, size_t n_points, size_t dim,
  function mapIndices (line 73) | constexpr std::array<T, Count> mapIndices(M &&m, std::index_sequence<I.....
  function map (line 79) | constexpr std::array<T, Count> map(M m) {
  type kdtree_func_helper (line 83) | struct kdtree_func_helper {
    method KDTreeFuncType (line 84) | KDTreeFuncType operator()() {
  type kdline_func_helper (line 89) | struct kdline_func_helper {
    method KDLineFuncType (line 90) | KDLineFuncType operator()() {
  function bucket_fps_kdtree (line 102) | int bucket_fps_kdtree(const float *raw_data, size_t n_points, size_t dim,
  function bucket_fps_kdline (line 118) | int bucket_fps_kdline(const float *raw_data, size_t n_points, size_t dim,
Condensed preview — 20 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (184K chars).
[
  {
    "path": ".github/workflows/release.yml",
    "chars": 667,
    "preview": "name: release-upload\n\non:\n  push:\n    tags:\n      - 'v*.*.*'\n\npermissions:\n  id-token: write\n  contents: read\n\njobs:\n  s"
  },
  {
    "path": ".github/workflows/test.yml",
    "chars": 666,
    "preview": "name: test-upload\n\non:\n  push:\n    tags:\n      - 'test*'\n\npermissions:\n  id-token: write\n  contents: read\n\njobs:\n  sdist"
  },
  {
    "path": ".gitignore",
    "chars": 754,
    "preview": "/target\n\n# Byte-compiled / optimized / DLL files\n__pycache__/\n.pytest_cache/\n*.py[cod]\n\n# C extensions\n*.so\n\n# Distribut"
  },
  {
    "path": ".pre-commit-config.yaml",
    "chars": 508,
    "preview": "exclude: \"point-e/.*\"\nrepos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n  rev: v4.4.0\n  hooks:\n  - id: trail"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 613,
    "preview": "# Require CMake 3.15+ (matching scikit-build-core) Use new versions of all\n# policies up to CMake 4.0\ncmake_minimum_requ"
  },
  {
    "path": "LICENSE",
    "chars": 1065,
    "preview": "MIT License\n\nCopyright (c) 2023 AyajiLin\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\no"
  },
  {
    "path": "README.md",
    "chars": 12373,
    "preview": "# fpsample\n[![pypi package version badge](https://img.shields.io/pypi/v/fpsample)](https://pypi.org/project/fpsample/)\n!"
  },
  {
    "path": "bench/test_bench.py",
    "chars": 6800,
    "preview": "import numpy as np\nimport pytest\n\nimport fpsample\n\nTEST_SEED = 42\nTEST_BENCHMARK_SETTINGS = {\n    \"4k\": {\"group\": \"1024 "
  },
  {
    "path": "pyproject.toml",
    "chars": 1073,
    "preview": "[build-system]\nrequires = [\"scikit-build-core>=0.11\", \"pybind11>=2.6\"]\nbuild-backend = \"scikit_build_core.build\"\n\n[proje"
  },
  {
    "path": "src/_ext/Interval.h",
    "chars": 525,
    "preview": "//\n// Created by 韩萌 on 2022/6/14.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPL"
  },
  {
    "path": "src/_ext/KDLineTree.h",
    "chars": 2600,
    "preview": "//\n// Created by hanm on 22-6-15.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef FPS_CPU_KDLINETREE_H\n#define FPS_C"
  },
  {
    "path": "src/_ext/KDNode.h",
    "chars": 6035,
    "preview": "//\n// Created by 韩萌 on 2022/6/14.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPL"
  },
  {
    "path": "src/_ext/KDTree.h",
    "chars": 1547,
    "preview": "//\n// Created by hanm on 22-6-15.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef FPS_CPU_KDTREE_H\n#define FPS_CPU_K"
  },
  {
    "path": "src/_ext/KDTreeBase.h",
    "chars": 6610,
    "preview": "//\n// Created by 韩萌 on 2022/6/14.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPL"
  },
  {
    "path": "src/_ext/Point.h",
    "chars": 2816,
    "preview": "//\n// Created by 韩萌 on 2022/6/14.\n// Refactored by AyajiLin on 2023/9/16.\n//\n\n#ifndef KD_TREE_BASED_FARTHEST_POINT_SAMPL"
  },
  {
    "path": "src/_ext/utils.h",
    "chars": 474,
    "preview": "// Refactored by AyajiLin on 2023/9/16.\n\n/* functional  */\n#ifndef KD_TREE_UTILS_HPP\n#define KD_TREE_UTILS_HPP\n#include "
  },
  {
    "path": "src/fpsample/__init__.py",
    "chars": 8977,
    "preview": "from __future__ import annotations\n\nimport warnings\nfrom typing import List, Optional, Union\n\nimport numpy as np\n\nfrom ."
  },
  {
    "path": "src/lib.cpp",
    "chars": 22247,
    "preview": "#include <pybind11/pybind11.h>\n#include <pybind11/numpy.h>\n#include \"nanoflann.hpp\"\n#include \"wrapper.hpp\"\n\n#if defined("
  },
  {
    "path": "src/nanoflann.hpp",
    "chars": 96573,
    "preview": "/***********************************************************************\n * Software License Agreement (BSD License)\n *\n"
  },
  {
    "path": "src/wrapper.hpp",
    "chars": 4722,
    "preview": "#include \"_ext/KDLineTree.h\"\n#include \"_ext/KDTree.h\"\n#include <array>\n#include <memory>\n#include <utility>\n#include <ve"
  }
]

About this extraction

This page contains the full source code of the leonardodalinky/fpsample GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 20 files (173.5 KB), approximately 45.7k tokens, and a symbol index with 218 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!