Repository: ma7555/evalify
Branch: main
Commit: c0d5d6c9e78e
Files: 34
Total size: 77.4 KB

Directory structure:
gitextract_pm2nxnd5/

├── .coveragerc
├── .github/
│   ├── ISSUE_TEMPLATE.md
│   └── workflows/
│       ├── codeql-analysis.yml
│       ├── dev.yml
│       └── release.yml
├── .gitignore
├── AUTHORS.md
├── CITATION.cff
├── CONTRIBUTING.md
├── HISTORY.md
├── LICENSE
├── README.md
├── codecov.yml
├── docs/
│   ├── api.md
│   ├── authors.md
│   ├── contributing.md
│   ├── history.md
│   ├── index.md
│   ├── installation.md
│   └── usage.md
├── evalify/
│   ├── __init__.py
│   ├── evalify.py
│   ├── metrics.py
│   └── utils.py
├── examples/
│   └── LFW.py
├── mkdocs.yml
├── pyproject.toml
├── tests/
│   ├── __init__.py
│   ├── data/
│   │   └── LFW.npz
│   ├── test_evalify.py
│   ├── test_experiment_real_data.py
│   ├── test_metrics.py
│   └── test_utils.py
└── tox.ini

================================================
FILE CONTENTS
================================================

================================================
FILE: .coveragerc
================================================
[run]
# uncomment the following to omit files during running
#omit =
[report]
exclude_lines =
    pragma: no cover
    def __repr__
    if self.debug:
    if settings.DEBUG
    raise AssertionError
    raise NotImplementedError
    if 0:
    if __name__ == .__main__.:
    def main


================================================
FILE: .github/ISSUE_TEMPLATE.md
================================================
* evalify version:
* Python version:
* Operating System:

### Description

Describe what you were trying to get done.
Tell us what happened, what went wrong, and what you expected to happen.

### What I Did

```
Paste the command(s) you ran and the output.
If there was a crash, please include the traceback here.
```


================================================
FILE: .github/workflows/codeql-analysis.yml
================================================
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
  push:
    branches: [ main ]
  pull_request:
    # The branches below must be a subset of the branches above
    branches: [ main ]
  schedule:
    - cron: '41 19 * * 2'

jobs:
  analyze:
    name: Analyze
    runs-on: ubuntu-latest
    permissions:
      actions: read
      contents: read
      security-events: write

    strategy:
      fail-fast: false
      matrix:
        language: [ 'python' ]
        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
        # Learn more about CodeQL language support at https://git.io/codeql-language-support

    steps:
    - name: Checkout repository
      uses: actions/checkout@v2

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
      uses: github/codeql-action/init@v1
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
        # By default, queries listed here will override any specified in a config file.
        # Prefix the list here with "+" to use these queries and those in the config file.
        # queries: ./path/to/local/query, your-org/your-repo/queries@main

    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
      uses: github/codeql-action/autobuild@v1

    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl

    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
    #    and modify them (or add more) to build your code if your project
    #    uses a compiled language

    #- run: |
    #   make bootstrap
    #   make release

    - name: Perform CodeQL Analysis
      uses: github/codeql-action/analyze@v1


================================================
FILE: .github/workflows/dev.yml
================================================
name: build

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]

  workflow_dispatch:

jobs:
  test:
    strategy:
      matrix:
        python-versions: ["3.9", "3.10", "3.11", "3.12"]
        os: [ubuntu-latest, macos-latest, windows-latest]
    runs-on: ${{ matrix.os }}

    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-versions }}

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install poetry tox tox-gh-actions

      - name: test with tox
        run: tox

      - name: list files
        run: ls -l .

  publish_dev_build:
    needs: test
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: 3.12

      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install poetry tox tox-gh-actions

      - name: test with tox
        run: tox

      - name: list files
        run: ls -l .

      - uses: codecov/codecov-action@v4
        with:
          fail_ci_if_error: false
          files: coverage.xml
          token: ${{ secrets.CODECOV_TOKEN }}
      - name: Build wheels and source tarball
        run: |
          poetry version $(poetry version --short)-dev.$GITHUB_RUN_NUMBER
          poetry version --short
          poetry build

      - name: publish to Test PyPI
        uses: pypa/gh-action-pypi-publish@v1.12.2
        with:
          user: __token__
          password: ${{ secrets.TEST_PYPI_API_TOKEN}}
          repository-url: https://test.pypi.org/legacy/
          skip-existing: true


================================================
FILE: .github/workflows/release.yml
================================================
name: release & publish workflow

on:
  push:
    tags:
      - "v1.*.*"

  workflow_dispatch:

jobs:
  release:
    name: Create Release
    runs-on: ubuntu-latest

    strategy:
      matrix:
        python-versions: [3.12]

    steps:
      - name: Checks-out
        uses: actions/checkout@v4
      - name: "Build Changelog"
        id: build_changelog
        uses: mikepenz/release-changelog-builder-action@v5.0.0
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-versions }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install tox-gh-actions poetry

      - name: pre-publish documentation
        run: |
          poetry install -E doc
          poetry run mkdocs build

      - name: publish documentation
        uses: peaceiris/actions-gh-pages@v4
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: ./site

      - name: Build wheels and source tarball
        run: >-
          poetry build

      - name: show temporary files
        run: >-
          ls -l

      - name: create github release
        id: create_release
        uses: softprops/action-gh-release@v2.0.9
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          body: ${{steps.build_changelog.outputs.changelog}}
          # body_path: ./CHANGELOG.md
          files: dist/*.whl
          draft: false
          prerelease: false

      - name: create pypi release
        uses: pypa/gh-action-pypi-publish@v1.12.2
        with:
          user: __token__
          password: ${{ secrets.PYPI_API_TOKEN }}

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# dotenv
.env

# virtualenv
.venv
venv/
ENV/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

# IDE settings
.vscode/

# mkdocs build dir
site/

# logo
logo/
poetry.lock
.ruff_cache/


================================================
FILE: AUTHORS.md
================================================
# Credits

## Development Lead

* Mahmoud Bahaa <evalify@ma7555.anonaddy.com>

## Contributors

None yet. Why not be the first?

## Others
* This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [zillionare/cookiecutter-pypackage](https://github.com/zillionare/cookiecutter-pypackage) project template.

* Logo was created using font [GlacialIndifference-Regular](https://hanken.co/product/hk-grotesk/) by [Hanken Design Co.](https://hanken.co/)
* Logo icon designed by Mauro Lucchesi


================================================
FILE: CITATION.cff
================================================
cff-version: 1.2.0
title: evalify
message: " If you use this software, please cite it using the metadata from this file."
type: software
authors:
  - given-names: Mahmoud
    family-names: Bahaa
    email: evalify@ma7555.anonaddy.com
    affiliation: Nile University
    orcid: "https://orcid.org/0000-0001-8688-6495"
doi: 10.5281/zenodo.6181723
date-released: 2022-02-20


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing

Contributions are welcomed, and they are greatly appreciated! Every little bit
helps, and credit will always be given.

You can contribute in many ways:

## Types of Contributions

### Report Bugs

Report bugs at https://github.com/ma7555/evalify/issues.

If you are reporting a bug, please include:

* Your operating system name and version.
* Any details about your local setup that might be helpful in troubleshooting.
* Detailed steps to reproduce the bug.

### Fix Bugs

Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
wanted" is open to whoever wants to implement it.

### Implement Features

Look through the GitHub issues for features. Anything tagged with "enhancement"
and "help wanted" is open to whoever wants to implement it.

### Write Documentation

evalify could always use more documentation, whether as part of the
official evalify docs, in docstrings, or even on the web in blog posts,
articles, and such.

### Submit Feedback

The best way to send feedback is to file an issue at https://github.com/ma7555/evalify/issues.

If you are proposing a feature:

* Explain in detail how it would work.
* Keep the scope as narrow as possible, to make it easier to implement.
* Remember that this is a volunteer-driven project, and that contributions
  are welcome :)

## Get Started!

Ready to contribute? Here's how to set up `evalify` for local development.

1. Fork the `evalify` repo on GitHub.
2. Clone your fork locally

```bash
git clone git@github.com:your_name_here/evalify.git
```

3. Ensure [poetry](https://python-poetry.org/docs/) is installed.
4. Install dependencies and start your virtualenv:

```bash
poetry install -E test -E doc -E dev
```

5. Create a branch for local development:

```bash
git checkout -b name-of-your-bugfix-or-feature
```

   Now you can make your changes locally.

6. When you're done making changes, check that your changes pass the
   tests, including testing other Python versions, with tox:

```bash
tox
```

7. Commit your changes and push your branch to GitHub:

```bash
git add .
git commit -m "Your detailed description of your changes."
git push origin name-of-your-bugfix-or-feature
```

8. Submit a pull request through the GitHub website.

## Pull Request Guidelines

Before you submit a pull request, check that it meets these guidelines:

1. The pull request should include tests.
2. If the pull request adds functionality, the docs should be updated. Put
   your new functionality into a function with a docstring, and add the
   feature to the list in README.md.
3. The pull request should work for Python 3.9, 3.10, 3.11, 3.12 and for PyPy. Check
   https://github.com/ma7555/evalify/actions
   and make sure that the tests pass for all supported Python versions.

## 
```bash
python -m unittest
```
or
```bash
pytest
```
To run a subset of tests.


## Deploying

A reminder for the maintainers on how to deploy.
Make sure all your changes are committed (including an entry in HISTORY.md).
Then run:

```bash
git push
git push --tags
```

Github Actions will then deploy to PyPI if tests pass.


================================================
FILE: HISTORY.md
================================================
# History

## 0.1.0 (2022-02-20)

* First release on PyPI.

## 0.1.1 (2022-02-22)

* Run time enhancement. 

## 0.1.2 (2022-02-23)

* Various enhancements and refactoring.

## 0.1.3 (2022-02-24)

* Add pearson similarity as a metric

## 0.1.4 (2022-02-24)

* Add EER calculation function.
* Drop support for python 3.7

## 1.0.0 (2024-11-08)

* Bump dependencies.
* Drop support for python 3.8
* Add support for TAR @ FAR

================================================
FILE: LICENSE
================================================
BSD 3-Clause License


Copyright (c) 2022, Mahmoud Bahaa
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this
  list of conditions and the following disclaimer in the documentation and/or
  other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
  contributors may be used to endorse or promote products derived from this
  software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.

================================================
FILE: README.md
================================================
# evalify

<p align="center">

<img src="https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg" width="292" height="120" alt="Logo"/>

</p>

<p align="center">

<a href="https://github.com/ma7555/evalify/blob/main/LICENSE">
    <img src="https://img.shields.io/github/license/ma7555/evalify"
        alt = "License">
</a>
<a href="https://doi.org/10.5281/zenodo.6181723"><img src="https://zenodo.org/badge/DOI/10.5281/zenodo.6181723.svg" alt="DOI"></a>
<a href="https://www.python.org/downloads/">
    <img src="https://img.shields.io/badge/python-3.9 | 3.10 | 3.11 | 3.12-blue.svg"
        alt = "Python 3.7 | 3.8 | 3.9 | 3">
</a>
<a href="https://pypi.python.org/pypi/evalify">
    <img src="https://img.shields.io/pypi/v/evalify.svg"
        alt = "Release Status">
</a>
<a href="https://github.com/ma7555/evalify/actions">
    <img src="https://github.com/ma7555/evalify/actions/workflows/dev.yml/badge.svg?branch=main" alt="CI Status">
</a>
<a href="https://ma7555.github.io/evalify/">
    <img src="https://img.shields.io/website/https/ma7555.github.io/evalify/index.html.svg?label=docs&down_message=unavailable&up_message=available" alt="Documentation Status">
</a>
<a href="https://github.com/astral-sh/ruff">
    <img src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" alt="Code style: Ruff">
</a>

<a href="https://codecov.io/gh/ma7555/evalify">
  <img src="https://codecov.io/gh/ma7555/evalify/branch/main/graph/badge.svg" />
</a>
<a href="https://pypi.org/project/evalify/"><img alt="PyPI Downloads/Month" src="https://img.shields.io/pypi/dm/evalify">
</a>

</p>

**Evaluate Biometric Authentication Models Literally in Seconds.**

## Installation
#### Stable release:
```bash
pip install evalify
```
#### Bleeding edge:
```bash
pip install git+https://github.com/ma7555/evalify.git
```
## Used for
Evaluating all biometric authentication models, where the model output is a high-level embeddings known as feature vectors for visual or behaviour biometrics or d-vectors for auditory biometrics.

## Usage

```python
import numpy as np
from evalify import Experiment

rng = np.random.default_rng()
nphotos = 500
emb_size = 32
nclasses = 10
X = rng.random((self.nphotos, self.emb_size))
y = rng.integers(self.nclasses, size=self.nphotos)

experiment = Experiment()
experiment.run(X, y)
experiment.get_roc_auc()
print(experiment.roc_auc)
print(experiment.find_threshold_at_fpr(0.01))
```
## How it works
* When you run an experiment, evalify tries all the possible combinations between individuals for authentication based on the `X` and `y` parameters and returns the results including FPR, TPR, FNR, TNR and ROC AUC. `X` is an array of embeddings and `y` is an array of corresponding targets.
* Evalify can find the optimal threshold based on your agreed FPR and desired similarity or distance metric.

## Documentation: 
* <https://ma7555.github.io/evalify/>


## Features

* Blazing fast implementation for metrics calculation through optimized einstein sum and vectorized calculations.
* Many operations are dispatched to canonical BLAS, cuBLAS, or other specialized routines.
* Smart sampling options using direct indexing from pre-calculated arrays with total control over sampling strategy and sampling numbers.
* Supports most evaluation metrics:
    - `cosine_similarity`
    - `pearson_similarity`
    - `cosine_distance`
    - `euclidean_distance`
    - `euclidean_distance_l2`
    - `minkowski_distance`
    - `manhattan_distance`
    - `chebyshev_distance`
* Computation time for 4 metrics 4.2 million samples experiment is **24 seconds vs 51 minutes** if looping using `scipy.spatial.distance` implemntations.

## TODO
* Safer memory allocation. I did not have issues but if you ran out of memory please manually set the `batch_size` argument.

## Contribution
* Contributions are welcomed, and they are greatly appreciated! Every little bit helps, and credit will always be given.
* Please check [CONTRIBUTING.md](https://github.com/ma7555/evalify/blob/main/CONTRIBUTING.md) for guidelines.

## Citation
* If you use this software, please cite it using the metadata from [CITATION.cff](https://github.com/ma7555/evalify/blob/main/CITATION.cff)


================================================
FILE: codecov.yml
================================================
coverage:
  status:
    project:
      default:
        target: 90%
    patch:
      default:
        target: 85%


================================================
FILE: docs/api.md
================================================
::: evalify.evalify
    handler: python


================================================
FILE: docs/authors.md
================================================
{%
  include-markdown "../AUTHORS.md"
%}

================================================
FILE: docs/contributing.md
================================================
{%
  include-markdown "../CONTRIBUTING.md"
%}

================================================
FILE: docs/history.md
================================================
{%
  include-markdown "../HISTORY.md"
%}

================================================
FILE: docs/index.md
================================================
{%
    include-markdown "../README.md"
%}


================================================
FILE: docs/installation.md
================================================
# Installation

## Stable release

To install evalify, run this command in your
terminal:

```bash
pip install evalify
```

This is the preferred method to install evalify, as it will always install the most recent stable release.

If you don't have [pip][] installed, this [Python installation guide][]
can guide you through the process.

## From source

The source for evalify can be downloaded from
the [Github repo][].

You can either clone the public repository:

```bash
git clone git://github.com/ma7555/evalify
```

Or download the [tarball][]:

```bash
curl -OJL https://github.com/ma7555/evalify/tarball/master
```

Once you have a copy of the source, you can install it with:

```bash
pip install .
```

  [pip]: https://pip.pypa.io
  [Python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/
  [Github repo]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D
  [tarball]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D/tarball/master


================================================
FILE: docs/usage.md
================================================
# Usage

To use evalify in a project

```python
import numpy as np
from evalify import Experiment

rng = np.random.default_rng()
nphotos = 500
emb_size = 32
nclasses = 10
X = rng.random((self.nphotos, self.emb_size))
y = rng.integers(self.nclasses, size=self.nphotos)

experiment = Experiment()
experiment.run(X, y)
experiment.get_roc_auc()
print(experiment.df.roc_auc)
```

For a working experiment using real face embeddings, please refer to `LFW.py` under `./examples`.

```python
python ./examples/LFW.py
```
```
Total available embeddings 2921 resulted in 4264660 samples for the experiment.
Metrics calculations executed in 24.05 seconds
ROC AUC:
OrderedDict([('euclidean_distance', 0.9991302819624498), ('cosine_distance', 0.9991302818953706), ('euclidean_distance_l2', 0.9991302818953706), ('manhattan_distance', 0.9991260462584446)])
```


================================================
FILE: evalify/__init__.py
================================================
"""Top-level package for evalify."""

from evalify.evalify import Experiment as Experiment

__author__ = """Mahmoud Bahaa"""
__email__ = "evalify@ma7555.anonaddy.com"
__version__ = "0.1.0"


================================================
FILE: evalify/evalify.py
================================================
"""Evalify main module used for creating the verification experiments.

Creates experiments with embedding pairs to compare for face verification tasks
including positive pairs, negative pairs and metrics calculations using a very
optimized einstein sum. Many operations are dispatched to canonical BLAS, cuBLAS,
or other specialized routines. Extremely large arrays are split into smaller batches,
every batch would consume the roughly the maximum available memory.

  Typical usage example:

  ```
  experiment = Experiment()
  experiment.run(X, y)
  ```
"""

import itertools
import sys
from collections import OrderedDict
from typing import Any, List, Optional, Sequence, Tuple, Union

import numpy as np
import pandas as pd
from sklearn.metrics import auc, confusion_matrix, roc_curve

from evalify.metrics import (
    DISTANCE_TO_SIMILARITY,
    METRICS_NEED_NORM,
    METRICS_NEED_ORDER,
    REVERSE_DISTANCE_TO_SIMILARITY,
    metrics_caller,
)
from evalify.utils import _validate_vectors, calculate_best_batch_size

StrOrInt = Union[str, int]
StrIntSequence = Union[str, int, Sequence[Union[str, int]]]


class Experiment:
    """Defines an experiment for evalifying.

    Args:
        metrics: The list of metrics to use. Can be one or more of the following:
            `cosine_similarity`, `pearson_similarity`, `cosine_distance`,
            `euclidean_distance`, `euclidean_distance_l2`, `minkowski_distance`,
            `manhattan_distance` and `chebyshev_distance`
        same_class_samples:
            - 'full': Samples all possible images within each class to create all
                all possible positive pairs.
            -  int: Samples specific number of images for every class to create
                nC2 pairs where n is passed integer.
        different_class_samples:
            - 'full': Samples one image from every class with all possible pairs
                of different classes. This can grow exponentially as the number
                of images increase. (N, M) = (1, "full")
            - 'minimal': Samples one image from every class with one image of
                all other classes. (N, M) = (1, 1). (Default)
            - int: Samples one image from every class with provided number of
                images of every other class.
            - tuple or list: (N, M) Samples N images from every class with M images of
                every other class.
        seed: Optional random seed for reproducibility.


    Notes:
        - `same_class_samples`:
            If the provided number is greater than the achievable for the class,
            the maximum possible combinations are used.
        - `different_class_samples`:
            If the provided number is greater than the achievable for the class,
            the maximum possible combinations are used. (N, M) can also be
            ('full', 'full') but this will calculate all possible combinations
            between all posibile negative samples. If the dataset is not small
            this will probably result in an extremely large array!.

    """

    def __init__(
        self,
        metrics: Union[str, Sequence[str]] = "cosine_similarity",
        same_class_samples: StrOrInt = "full",
        different_class_samples: StrIntSequence = "minimal",
        seed: Optional[int] = None,
    ) -> None:
        self.experiment_success = False
        self.cached_predicted_as_similarity = {}
        self.metrics = (metrics,) if isinstance(metrics, str) else metrics
        self.same_class_samples = same_class_samples
        self.different_class_samples = different_class_samples
        self.seed = seed

    def __call__(self, *args: Any, **kwds: Any) -> Any:
        return self.run(*args, **kwds)

    @staticmethod
    def _validate_args(
        metrics: Sequence[str],
        same_class_samples: StrOrInt,
        different_class_samples: StrIntSequence,
        batch_size: Optional[StrOrInt],
        p,
    ) -> None:
        """Validates passed arguments to Experiment.run() method."""
        if same_class_samples != "full" and not isinstance(same_class_samples, int):
            msg = (
                "`same_class_samples` argument must be one of 'full' or an integer "
                f"Received: same_class_samples={same_class_samples}"
            )
            raise ValueError(
                msg,
            )

        if different_class_samples not in ("full", "minimal"):
            if not isinstance(different_class_samples, (int, list, tuple)):
                msg = (
                    "`different_class_samples` argument must be one of 'full', "
                    "'minimal', an integer, a list or tuple of integers or keyword "
                    "'full'."
                    f"Received: different_class_samples={different_class_samples}."
                )
                raise ValueError(
                    msg,
                )
            if isinstance(different_class_samples, (list, tuple)) and (
                not (
                    all(
                        isinstance(i, int) or i == "full"
                        for i in different_class_samples
                    )
                )
                or (len(different_class_samples)) != 2
            ):
                msg = (
                    "When passing `different_class_samples` as a tuple or list, "
                    "elements must be exactly two of integer type or keyword 'full' "
                    "(N, M). "
                    f"Received: different_class_samples={different_class_samples}."
                )
                raise ValueError(
                    msg,
                )

        if (
            batch_size != "best"
            and not isinstance(batch_size, int)
            and batch_size is not None
        ):
            msg = (
                '`batch_size` argument must be either "best" or of type integer '
                f"Received: batch_size={batch_size} with type {type(batch_size)}."
            )
            raise ValueError(
                msg,
            )

        if any(metric not in metrics_caller for metric in metrics):
            msg = (
                f"`metric` argument must be one of {tuple(metrics_caller.keys())} "
                f"Received: metric={metrics}"
            )
            raise ValueError(
                msg,
            )

        if p < 1:
            msg = f"`p` must be an int and at least 1. Received: p={p}"
            raise ValueError(msg)

    def _get_pairs(
        self,
        y,
        same_class_samples,
        different_class_samples,
        target,
    ) -> List[Tuple]:
        """Generates experiment pairs."""
        same_ixs_full = np.argwhere(y == target).ravel()
        if isinstance(same_class_samples, int):
            same_class_samples = min(len(same_ixs_full), same_class_samples)
            same_ixs = self.rng.choice(same_ixs_full, same_class_samples)
        elif same_class_samples == "full":
            same_ixs = same_ixs_full
        same_pairs = itertools.combinations(same_ixs, 2)
        same_pairs = [(a, b, target, target, 1) for a, b in same_pairs]

        different_ixs = np.argwhere(y != target).ravel()
        diff_df = pd.DataFrame(
            data={"sample_idx": different_ixs, "target": y[different_ixs]},
        )

        diff_df = diff_df.sample(frac=1, random_state=self.seed)
        if different_class_samples in ["full", "minimal"] or isinstance(
            different_class_samples,
            int,
        ):
            N = 1
            if different_class_samples == "minimal":
                diff_df = diff_df.drop_duplicates(subset=["target"])
        else:
            N, M = different_class_samples
            N = len(same_ixs_full) if N == "full" else min(N, len(same_ixs_full))
            if M != "full":
                diff_df = (
                    diff_df.groupby("target")
                    .apply(lambda x: x[:M], include_groups=False)
                    .droplevel(0)
                )

        different_ixs = diff_df.sample_idx.to_numpy()

        different_pairs = itertools.product(
            self.rng.choice(same_ixs_full, N, replace=False),
            different_ixs,
        )
        different_pairs = [(a, b, target, y[b], 0) for a, b in different_pairs if a < b]

        return same_pairs + different_pairs

    def run(
        self,
        X: np.ndarray,
        y: np.ndarray,
        batch_size: Optional[StrOrInt] = "best",
        shuffle: bool = False,
        return_embeddings: bool = False,
        p: int = 3,
    ) -> pd.DataFrame:
        """Runs an experiment for face verification
        Args:
            X: Embeddings array
            y: Targets for X as integers
            batch_size:
                - 'best': Let the program decide based on available memory such that
                    every batch will fit into the available memory. (Default)
                - int: Manually decide the batch_size.
                - None: No batching. All experiment and intermediate results must fit
                    entirely into memory or a MemoryError will be raised.
            shuffle: Shuffle the returned experiment dataframe. Default: False.
            return_embeddings: Whether to return the embeddings instead of indexes.
                Default: False
            p:
                The order of the norm of the difference. Should be `p >= 1`, Only valid
                with minkowski_distance as a metric. Default = 3.

        Returns:
            pandas.DataFrame: A DataFrame representing the experiment results.

        Raises:
            ValueError: An error occurred with the provided arguments.

        """
        self._validate_args(
            self.metrics,
            self.same_class_samples,
            self.different_class_samples,
            batch_size,
            p,
        )
        X, y = _validate_vectors(X, y)
        all_targets = np.unique(y)
        all_pairs = []
        metric_fns = list(map(metrics_caller.get, self.metrics))
        self.rng = np.random.default_rng(self.seed)
        for target in all_targets:
            all_pairs += self._get_pairs(
                y,
                self.same_class_samples,
                self.different_class_samples,
                target,
            )

        self.df = pd.DataFrame(
            data=all_pairs,
            columns=["emb_a", "emb_b", "target_a", "target_b", "target"],
        )
        experiment_size = len(self.df)
        if shuffle:
            self.df = self.df.sample(frac=1, random_state=self.seed)
        if batch_size == "best":
            batch_size = calculate_best_batch_size(X)
        elif batch_size is None:
            batch_size = experiment_size
        kwargs = {}
        if any(metric in METRICS_NEED_NORM for metric in self.metrics):
            kwargs["norms"] = np.linalg.norm(X, axis=1)
        if any(metric in METRICS_NEED_ORDER for metric in self.metrics):
            kwargs["p"] = p

        emb_a = self.df.emb_a.to_numpy()
        emb_b = self.df.emb_b.to_numpy()

        emb_a_s = np.array_split(emb_a, np.ceil(experiment_size / batch_size))
        emb_b_s = np.array_split(emb_b, np.ceil(experiment_size / batch_size))

        for metric, metric_fn in zip(self.metrics, metric_fns):
            self.df[metric] = np.hstack(
                [metric_fn(X, i, j, **kwargs) for i, j in zip(emb_a_s, emb_b_s)],
            )
        if return_embeddings:
            self.df["emb_a"] = X[emb_a].tolist()
            self.df["emb_b"] = X[emb_b].tolist()

        self.experiment_success = True
        return self.df

    def find_optimal_cutoff(self) -> dict:
        """Finds the optimal cutoff threshold for each metric based on the ROC curve.

        This function calculates the optimal threshold for each metric by finding the
        point on the Receiver Operating Characteristic (ROC) curve where the difference
        between the True Positive Rate (TPR) and the False Positive Rate (FPR) is
        minimized.

        Returns:
            dict: A dictionary with metrics as keys and their corresponding optimal
            threshold as values.
        """

        self.check_experiment_run()
        self.optimal_cutoff = {}
        for metric in self.metrics:
            fpr, tpr, threshold = roc_curve(self.df["target"], self.df[metric])
            i = np.arange(len(tpr))
            roc = pd.DataFrame(
                {
                    "tf": pd.Series(tpr - (1 - fpr), index=i),
                    "threshold": pd.Series(threshold, index=i),
                },
            )
            roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]
            self.optimal_cutoff[metric] = roc_t["threshold"].item()
        return self.optimal_cutoff

    def threshold_at_fpr(self, fpr: float) -> dict:
        """Find the threshold at a specified False Positive Rate (FPR) for each metric.

        The function calculates the threshold at the specified FPR for each metric
        by using the Receiver Operating Characteristic (ROC) curve. If the desired
        FPR is 0 or 1, or no exact match is found, the closest thresholds are used.

        Args:
            fpr (float): Desired False Positive Rate. Must be between 0 and 1.

        Returns:
            dict: A dictionary where keys are the metrics and values are dictionaries
            containing FPR, TPR, and threshold at the specified FPR.

        Raises:
            ValueError: If the provided `fpr` is not between 0 and 1.
        """

        self.check_experiment_run()
        if not 0 <= fpr <= 1:
            msg = "`fpr` must be between 0 and 1. " f"Received wanted_fpr={fpr}"
            raise ValueError(
                msg,
            )
        threshold_at_fpr = {}
        for metric in self.metrics:
            predicted = self.predicted_as_similarity(metric)
            FPR, TPR, thresholds = roc_curve(
                self.df["target"],
                predicted,
                drop_intermediate=False,
            )
            df_fpr_tpr = pd.DataFrame({"FPR": FPR, "TPR": TPR, "threshold": thresholds})
            ix_left = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="left")
            ix_right = np.searchsorted(df_fpr_tpr["FPR"], fpr, side="right")

            if fpr == 0:
                best = df_fpr_tpr.iloc[ix_right]
            elif fpr == 1 or ix_left == ix_right:
                best = df_fpr_tpr.iloc[ix_left]
            else:
                best = (
                    df_fpr_tpr.iloc[ix_left]
                    if abs(df_fpr_tpr.iloc[ix_left].FPR - fpr)
                    < abs(df_fpr_tpr.iloc[ix_right].FPR - fpr)
                    else df_fpr_tpr.iloc[ix_right]
                )
            best = best.to_dict()
            if metric in REVERSE_DISTANCE_TO_SIMILARITY:
                best["threshold"] = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(
                    best["threshold"],
                )
            threshold_at_fpr[metric] = best
        return threshold_at_fpr

    def get_binary_prediction(self, metric: str, threshold: float) -> pd.Series:
        """Binary classification prediction based on the given metric and threshold.

        Args:
            metric: Metric name for the desired prediction.
            threshold: Cut off threshold.

        Returns:
            pd.Series: Binary predictions.

        """
        return (
            self.df[metric].apply(lambda x: 1 if x < threshold else 0)
            if metric in DISTANCE_TO_SIMILARITY
            else self.df[metric].apply(lambda x: 1 if x > threshold else 0)
        )

    def evaluate_at_threshold(self, threshold: float, metric: str) -> dict:
        """Evaluate performance at specific threshold
        Args:
            threshold: Cut-off threshold.
            metric: Metric to use.

        Returns:
            dict: A dict ontaining all evaluation metrics.

        """
        self.metrics_evaluation = {}
        self.check_experiment_run(metric)
        for metric in self.metrics:
            predicted = self.get_binary_prediction(metric, threshold)
            cm = confusion_matrix(self.df["target"], predicted)
            tn, fp, fn, tp = cm.ravel()
            TPR = tp / (tp + fn)  # recall / true positive rate
            TNR = tn / (tn + fp)  # true negative rate
            PPV = tp / (tp + fp)  # precision / positive predicted value
            NPV = tn / (tn + fn)  # negative predictive value
            FPR = fp / (fp + tn)  # false positive rate
            FNR = 1 - TPR  # false negative rate
            FDR = 1 - PPV  # false discovery rate
            FOR = 1 - NPV  # false omission rate
            F1 = 2 * (PPV * TPR) / (PPV + TPR)

            evaluation = {
                "TPR": TPR,
                "TNR": TNR,
                "PPV": PPV,
                "NPV": NPV,
                "FPR": FPR,
                "FNR": FNR,
                "FDR": FDR,
                "FOR": FOR,
                "F1": F1,
            }

        return evaluation

    def check_experiment_run(self, metric: Optional[str] = None) -> bool:
        caller = sys._getframe().f_back.f_code.co_name
        if not self.experiment_success:
            msg = (
                f"`{caller}` function can only be run after running "
                "`run_experiment`."
            )
            raise NotImplementedError(
                msg,
            )
        if metric is not None and metric not in self.metrics:
            msg = (
                f"`{caller}` function can only be called with `metric` from "
                f"{self.metrics} which were used while running the experiment"
            )
            raise ValueError(
                msg,
            )
        return True

    def roc_auc(self) -> OrderedDict:
        """Find ROC AUC for all the metrics used.

        Returns:
            OrderedDict: An OrderedDict with AUC for all metrics.

        """
        self.check_experiment_run()
        self.roc_auc = {}
        for metric in self.metrics:
            predicted = self.predicted_as_similarity(metric)
            fpr, tpr, thresholds = roc_curve(
                self.df["target"],
                predicted,
                drop_intermediate=False,
            )
            self.roc_auc[metric] = auc(fpr, tpr).item()
        self.roc_auc = OrderedDict(
            sorted(self.roc_auc.items(), key=lambda x: x[1], reverse=True),
        )
        return self.roc_auc

    def predicted_as_similarity(self, metric: str) -> pd.Series:
        """Convert distance metrics to a similarity measure.

        Args:
            metric: distance metric to convert to similarity. If a similarity metric is
                passed, It gets returned unchanged.

        Returns:
            pd.Series: Converted distance to similarity.

        """
        predicted = self.df[metric]
        if metric in DISTANCE_TO_SIMILARITY:
            predicted = (
                self.cached_predicted_as_similarity[metric]
                if metric in self.cached_predicted_as_similarity
                else DISTANCE_TO_SIMILARITY.get(metric)(predicted)
            )
            self.cached_predicted_as_similarity[metric] = predicted
        return predicted

    def eer(self) -> OrderedDict:
        """Calculates the Equal Error Rate (EER) for each metric.

        Returns:
            OrderedDict: A dictionary containing the EER value and threshold for each
            metric.
                The metrics are sorted in ascending order based on the EER values.
                Example: {'metric1': {'EER': 0.123, 'threshold': 0.456},
                        ...}

        """
        self.check_experiment_run()
        self.eer = {}
        for metric in self.metrics:
            predicted = self.predicted_as_similarity(metric)
            actual = self.df["target"]

            fpr, tpr, thresholds = roc_curve(
                actual,
                predicted,
                pos_label=1,
                drop_intermediate=False,
            )
            fnr = 1 - tpr
            eer_threshold = thresholds[np.nanargmin(np.absolute(fnr - fpr))].item()
            eer_1 = fpr[np.nanargmin(np.absolute(fnr - fpr))].item()
            eer_2 = fnr[np.nanargmin(np.absolute(fnr - fpr))].item()
            if metric in REVERSE_DISTANCE_TO_SIMILARITY:
                eer_threshold = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(
                    eer_threshold,
                )

            self.eer[metric] = {"EER": (eer_1 + eer_2) / 2, "threshold": eer_threshold}
        self.eer = OrderedDict(
            sorted(self.eer.items(), key=lambda x: x[1]["EER"], reverse=False),
        )

        return self.eer

    def tar_at_far(self, far_values: List[float]) -> OrderedDict:
        """Calculates TAR at specified FAR values for each metric.

        Args:
            far_values (List[float]): A list of False Accept Rates (FAR) to get TAR
                values for.

        Returns:
            OrderedDict: A dictionary with keys as metrics and values as dictionaries
            of FAR:TAR pairs.

        Raises:
            ValueError: If any FAR in far_values is not between 0 and 1.
        """
        if isinstance(far_values, (float, int)):
            far_values = [float(far_values)]

        if not all(0 <= far <= 1 for far in far_values):
            raise ValueError("All FAR values must be between 0 and 1.")

        self.check_experiment_run()
        tar_at_far_results = {}

        for metric in self.metrics:
            predicted = self.predicted_as_similarity(metric)
            fpr, tpr, _ = roc_curve(self.df["target"], predicted, pos_label=1)

            tar_values = {}
            for far in far_values:
                idx = np.searchsorted(fpr, far, side="right") - 1
                idx = max(0, min(idx, len(fpr) - 1))  # Ensure idx is within bounds
                tar_values[far] = tpr[idx].item()

            tar_at_far_results[metric] = tar_values

        self.tar_at_far_results = OrderedDict(
            sorted(tar_at_far_results.items(), key=lambda x: list(x[1].keys())[0])
        )

        return self.tar_at_far_results


================================================
FILE: evalify/metrics.py
================================================
"""Evalify metrics module used for calculating the evaluation metrics.

Optimized calculations using einstein sum. Embeddings array and norm arrays are indexed
with every
split and calculations happens over large data chunks very quickly.
"""

import numpy as np


def _inner1d(A, B):
    """Calculate the inner product between two arrays of vectors.

    Args:
        A (numpy.ndarray): 2D array of shape (n_samples, n_features)
        B (numpy.ndarray): 2D array of shape (n_samples, n_features)

    Returns:
        numpy.ndarray: 1D array of shape (n_samples,) where each element is the inner
        product of the corresponding rows in A and B

    """
    return np.einsum("ij,ij->i", A, B, optimize="optimal")


def cosine_similarity(embs, ix, iy, norms, return_distance=False, **kwargs):
    """Calculate the cosine similarity between two arrays of vectors.

    Args:
        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the first array
        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the second array
        norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm
        of each row in X
        return_distance (bool): Whether to return the cosine distance instead of the
        cosine similarity. Defaults to False.

    Returns:
        numpy.ndarray: 1D array of shape (n_samples,) where each element is the cosine
        similarity (or cosine distance) of the corresponding rows in X.

    """
    similarity = _inner1d(embs[ix], embs[iy]) / (norms[ix] * norms[iy])
    return 1 - similarity if return_distance else similarity


def euclidean_distance_l2(embs, ix, iy, norms, **kwargs):
    """Calculate the L2-normalized Euclidean distance between two arrays of vectors.

    Args:
        embs (numpy.ndarray): 2D array of shape (n_samples, n_features).
        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the first array.
        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the second array.
        norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm
        of each row in embs.

    Returns:
        numpy.ndarray: 1D array of shape (n_samples,) where each element is the
        L2-normalized Euclidean distance of the corresponding rows in embs.

    """
    X = embs[ix] / norms[ix].reshape(-1, 1) - embs[iy] / norms[iy].reshape(-1, 1)
    return np.linalg.norm(X, axis=1)


def minkowski_distance(embs, ix, iy, p, **kwargs):
    """Calculate the element-wise Minkowski or Manhattan or Chebyshev distance.

    Args:
        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the first array
        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the second array
        p (int): The order of the norm of the difference.

    Returns:
        numpy.ndarray: 1D array of shape (n_samples,) where each element is the
        Minkowski distance of the corresponding rows in embs.

    """
    return np.linalg.norm(embs[ix] - embs[iy], ord=p, axis=1)


def pearson_similarity(embs, ix, iy, **kwargs):
    """Calculate the Pearson correlation coefficient between two arrays of vectors.

    Args:
        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)
        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the first array
        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of
        the second array

    Returns:
        numpy.ndarray: 1D array of shape (n_samples,) where each element is the Pearson
        correlation coefficient
        of the corresponding rows in embs.

    """
    A = embs[ix]
    B = embs[iy]
    A_mA = A - np.expand_dims(A.mean(axis=1), -1)
    B_mB = B - np.expand_dims(B.mean(axis=1), -1)
    ssA = np.expand_dims((A_mA**2).sum(axis=1), -1)
    ssB = np.expand_dims((B_mB**2).sum(axis=1), -1)
    return _inner1d(A_mA, B_mB) / np.sqrt(_inner1d(ssA, ssB))


metrics_caller = {
    "cosine_similarity": cosine_similarity,
    "pearson_similarity": pearson_similarity,
    "cosine_distance": lambda embs, ix, iy, norms, **kwargs: cosine_similarity(
        embs,
        ix,
        iy,
        norms,
        return_distance=True,
    ),
    "euclidean_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
        embs,
        ix,
        iy,
        p=2,
    ),
    "euclidean_distance_l2": euclidean_distance_l2,
    "minkowski_distance": minkowski_distance,
    "manhattan_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
        embs,
        ix,
        iy,
        p=1,
    ),
    "chebyshev_distance": lambda embs, ix, iy, **kwargs: minkowski_distance(
        embs,
        ix,
        iy,
        p=np.inf,
    ),
}

METRICS_NEED_NORM = ["cosine_similarity", "cosine_distance", "euclidean_distance_l2"]
METRICS_NEED_ORDER = ["minkowski_distance"]
DISTANCE_TO_SIMILARITY = {
    "cosine_distance": lambda x: 1 - x,
    "euclidean_distance": lambda x: 1 / (1 + x),
    "euclidean_distance_l2": lambda x: 1 - x,
    "minkowski_distance": lambda x: 1 / (1 + x),
    "manhattan_distance": lambda x: 1 / (1 + x),
    "chebyshev_distance": lambda x: 1 / (1 + x),
}

REVERSE_DISTANCE_TO_SIMILARITY = {
    "cosine_distance": lambda x: 1 - x,
    "euclidean_distance": lambda x: (1 / x) - 1,
    "euclidean_distance_l2": lambda x: 1 - x,
    "minkowski_distance": lambda x: (1 / x) - 1,
    "manhattan_distance": lambda x: (1 / x) - 1,
    "chebyshev_distance": lambda x: (1 / x) - 1,
}


================================================
FILE: evalify/utils.py
================================================
"""Evalify utils module contains various utilites serving other modules."""

import numpy as np
import psutil

GB_TO_BYTE = 1024**3


def _validate_vectors(X, y):
    X = np.asarray(X, dtype=np.float32)
    y = np.asarray(y, dtype=np.int32).squeeze()
    if X.ndim != 2:
        msg = "Embeddings vector should be 2-D."
        raise ValueError(msg)
    if y.ndim != 1:
        msg = "Target vector should be 1-D."
        raise ValueError(msg)
    return X, y


def _calc_available_memory():
    """Calculate available memory in system."""
    mem = psutil.virtual_memory()
    return mem[1]


def calculate_best_batch_size(X, available_mem=None):
    """Calculate maximum rows to fetch per batch without going out of memory.

    We need 3 big arrays to be held in memory (A, B, A*B)
    """
    available_mem = _calc_available_memory() if available_mem is None else available_mem
    if available_mem > 2 * GB_TO_BYTE:
        max_total_rows = np.floor(available_mem - GB_TO_BYTE / X[0].nbytes)
        return max_total_rows // 3
    max_total_rows = np.floor(available_mem / X[0].nbytes)
    return max_total_rows // 5


================================================
FILE: examples/LFW.py
================================================
""" File LFW.npz contains sample embeddings and targets from LFW dataset"""

from pathlib import Path
import time
import numpy as np

from evalify import Experiment

lfw_npz = Path(__file__).parent.parent / Path("tests/data/LFW.npz")
X_y_array = np.load(lfw_npz)
X = X_y_array["X"][:1000]
y = X_y_array["y"][:1000]

experiment = Experiment(
    metrics=(
        "cosine_similarity",
        "pearson_similarity",
        "euclidean_distance_l2",
    ),
    same_class_samples="full",
    different_class_samples=("full", "full"),
)
start_time = time.time()
print("Starting Experiment")
experiment.run(X, y)
print(
    f"Total available embeddings {len(y)} resulted in {len(experiment.df)} "
    "samples for the experiment."
)
print(f"Metrics calculations executed in {time.time()-start_time:.2f} seconds")
print("ROC AUC:")
print(experiment.roc_auc())
print("threshold @ FPR:")
print(experiment.threshold_at_fpr(0.01))
print("EER:")
print(experiment.eer())
print("TAR@FAR:")
print(experiment.tar_at_far([0.01, 0.001]))


================================================
FILE: mkdocs.yml
================================================
site_name: evalify
repo_url: https://github.com/ma7555/evalify
repo_name: evalify
nav:
  - home: index.md
  - installation: installation.md
  - usage: usage.md
  - modules: api.md
  - contributing: contributing.md
  - authors: authors.md
  - history: history.md
theme:
  name: material
  language: en
  logo: https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg
  palette:
    - media: "(prefers-color-scheme: light)"
      scheme: default
      toggle:
        icon: material/weather-night
        name: Switch to dark mode
    - media: "(prefers-color-scheme: dark)"
      scheme: slate
      toggle:
        icon: material/weather-sunny
        name: Switch to light mode
  features:
    - navigation.indexes
    - navigation.tabs
    - navigation.instant
    - navigation.tabs.sticky
markdown_extensions:
  - pymdownx.emoji:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - pymdownx.critic
  - pymdownx.caret
  - pymdownx.mark
  - pymdownx.tilde
  - pymdownx.tabbed
  - attr_list
  - pymdownx.arithmatex:
      generic: true
  - pymdownx.highlight:
      linenums: true
  - pymdownx.superfences
  - pymdownx.details
  - admonition
  - toc:
      baselevel: 2
      permalink: true
  - meta
plugins:
  - include-markdown
  - search:
      lang: en
  - mkdocstrings
extra:
  social:
    - icon: fontawesome/brands/github
      link: https://github.com/ma7555/evalify
      name: Github
    - icon: material/email
      link: "mailto:evalify@ma7555.anonaddy.com"


================================================
FILE: pyproject.toml
================================================
[tool.poetry]
name = "evalify"
version = "1.0.0"
homepage = "https://github.com/ma7555/evalify"
description = "Evaluate your face or voice verification models literally in seconds."
authors = ["Mahmoud Bahaa <evalify@ma7555.anonaddy.com>"]
keywords = ["biometric verification", "biometric authentication", "evaluation"]
readme = "README.md"
license = "BSD-3-Clause"
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: BSD License",
    "Natural Language :: English",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
]

packages = [
    { include = "evalify" },
]

[tool.poetry.dependencies]
python = ">=3.9,<4.0"
pandas = "^2.0.0"
numpy = "^2.0.0"
psutil = "^5.9.0"
scikit-learn = "^1.2.0"

# Optional Dependencies
ruff = { version = ">=0.7.2", optional = true }
pytest = { version = "^7.2.0", optional = true }
pytest-cov = { version = "^4.0.0", optional = true }
scipy = { version = ">=1.10.0", optional = true }
tox = { version = "^4.7.0", optional = true }
virtualenv = { version = ">=20.24.0", optional = true }
pip = { version = ">=23.2.0", optional = true }
mkdocs = { version = ">=1.4.0", optional = true }
mkdocs-material = { version = "^9.2.0", optional = true }
mkdocstrings = { version = ">=0.26.0", optional = true }
mkdocstrings-python = { version = ">=1.12.2", optional = true }
mkdocs-include-markdown-plugin = { version = ">=6.0.0", optional = true }
twine = { version = "^5.0.0", optional = true }
toml = { version = ">0.8.0", optional = true }
pyreadline3 = { version = "^3.4.1", optional = true }
poetry = { version = "^1.8.0", optional = true }

[tool.poetry.extras]
test = [
    "pytest",
    "ruff",
    "pytest-cov",
    "pyreadline3",
    "scipy",
]

dev = [
    "tox",
    "virtualenv",
    "pip",
    "twine",
    "toml",
    "poetry",
]

doc = [
    "mkdocs",
    "mkdocs-material",
    "mkdocstrings",
    "mkdocstrings-python",
    "mkdocs-include-markdown-plugin",
]

[build-system]
requires = ["poetry-core>=1.8.0"]
build-backend = "poetry.core.masonry.api"

[tool.ruff]
line-length = 88
indent-width = 4

[tool.ruff.lint]
select = [
    "E",  # pycodestyle error
    "F",  # Pyflakes
    "I",  # isort
]
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

[tool.ruff.format]
quote-style = "double"

[tool.ruff.lint.isort]
known-first-party = ["evalify"]


================================================
FILE: tests/__init__.py
================================================
"""Unit test package for evalify."""


================================================
FILE: tests/test_evalify.py
================================================
#!/usr/bin/env python

"""Tests for `evalify` package."""
import unittest

import numpy as np
from scipy.special import comb

from evalify import Experiment
from evalify.metrics import metrics_caller


class TestEvalify(unittest.TestCase):
    """Tests for `evalify` package."""

    def setUp(self):
        """Set up test fixtures, if any."""
        rng = np.random.default_rng(555)
        self.nphotos = 500
        self.emb_size = 8
        self.nclasses = 10
        self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)
        self.targets = rng.integers(self.nclasses, size=self.nphotos)

    def test_run_euclidean_distance(self):
        """Test run with euclidean_distance"""
        experiment = Experiment(metrics="euclidean_distance")
        df = experiment.run(self.embs, self.targets)
        experiment = Experiment(metrics="euclidean_distance_l2")
        df_l2 = experiment.run(self.embs, self.targets)
        self.assertGreater(df.euclidean_distance.max(), 0)
        self.assertGreater(df_l2.euclidean_distance_l2.max(), 0)

    def test_run_cosine_similarity(self):
        """Test run with cosine_similarity"""
        experiment = Experiment(metrics="cosine_similarity")
        df = experiment.run(self.embs, self.targets)
        self.assertLessEqual(df.cosine_similarity.max(), 1)

    def test_run_all_metrics_separated(self):
        for metric in metrics_caller.keys():
            experiment = Experiment(metrics=metric)
            df = experiment.run(self.embs, self.targets)
            self.assertTrue(metric in df.columns)

    def test_run_all_metrics_combined(self):
        metrics = set(metrics_caller.keys())
        experiment = Experiment(metrics=metrics)
        df = experiment.run(self.embs, self.targets)
        self.assertTrue(metrics.issubset(df.columns))

    def test_run_full_class_samples(self):
        """Test run with return_embeddings"""
        experiment = Experiment(
            same_class_samples="full",
            different_class_samples=("full", "full"),
        )
        df = experiment.run(
            self.embs,
            self.targets,
        )
        self.assertEqual(len(df), comb(self.nphotos, 2))

    def test_run_custom_class_samples(self):
        """Test run with custom same_class_samples and different_class_samples"""
        N, M = (2, 5)
        experiment = Experiment(same_class_samples=2, different_class_samples=(N, M))
        same_class_samples = 3
        df = experiment.run(
            self.embs,
            self.targets,
        )

        self.assertLessEqual(
            len(df),
            (comb(same_class_samples, 2) * self.nclasses)
            + (self.nclasses * (self.nclasses - 1)) * M * N,
        )

    def test_run_shuffle(self):
        """Test run with shuffle"""
        experiment = Experiment(seed=555)
        df1 = experiment.run(self.embs, self.targets, shuffle=True)
        df2 = experiment.run(self.embs, self.targets, shuffle=True)
        self.assertEqual(len(df1), len(df2))
        self.assertEqual(sum(df1.index), sum(df2.index))
        self.assertTrue(all(ix in df2.index for ix in df1.index))

    def test_run_no_batch_size(self):
        """Test run with no batch_size"""
        experiment = Experiment(
            same_class_samples=2,
            different_class_samples=(1, 1),
            seed=555,
        )
        experiment.run(self.embs, self.targets, batch_size=None)
        self.assertTrue(experiment.check_experiment_run())

    def test_run_return_embeddings(self):
        """Test run with return_embeddings"""
        experiment = Experiment()
        df = experiment.run(self.embs, self.targets, return_embeddings=True)
        self.assertLessEqual(len(df.at[0, "emb_a"]), self.emb_size)

    def test_run_evaluate_at_threshold(self):
        """Test run with evaluate_at_threshold"""
        metrics = ["cosine_similarity", "euclidean_distance_l2"]
        experiment = Experiment(metrics=metrics)
        experiment.run(
            self.embs,
            self.targets,
        )
        evaluations = experiment.evaluate_at_threshold(0.5, "cosine_similarity")
        # self.assertEqual(len(evaluations), len(metrics))
        self.assertEqual(len(evaluations), 9)

    def test_run_find_optimal_cutoff(self):
        """Test run with find_optimal_cutoff"""
        metrics = ["cosine_similarity", "euclidean_distance_l2"]
        experiment = Experiment(metrics=metrics)
        experiment.run(
            self.embs,
            self.targets,
        )
        evaluations = experiment.find_optimal_cutoff()
        self.assertEqual(len(evaluations), len(metrics))
        self.assertTrue(all(evaluation in metrics for evaluation in evaluations))

    def test_run_get_roc_auc(self):
        """Test run with get_roc_auc"""
        metrics = ["cosine_similarity", "euclidean_distance_l2"]
        experiment = Experiment(metrics=metrics)
        experiment.run(
            self.embs,
            self.targets,
        )
        roc_auc = experiment.roc_auc()
        # self.assertEqual(len(evaluations), len(metrics))
        self.assertEqual(len(roc_auc), len(metrics))
        self.assertTrue(all(auc in metrics for auc in roc_auc))

    def test_run_predicted_as_similarity(self):
        """Test run with predicted_as_similarity"""
        experiment = Experiment(metrics=["cosine_similarity", "cosine_distance"])
        experiment.run(
            self.embs,
            self.targets,
        )
        result = experiment.predicted_as_similarity("cosine_similarity")
        result_2 = experiment.predicted_as_similarity("cosine_distance")
        self.assertTrue(np.allclose(result, result_2))

    def test_run_find_threshold_at_fpr(self):
        """Test run with find_threshold_at_fpr"""
        metric = "cosine_similarity"
        experiment = Experiment(
            metrics=metric,
            different_class_samples=("full", "full"),
        )
        experiment.run(
            self.embs,
            self.targets,
        )
        fpr_d01 = experiment.threshold_at_fpr(0.1)
        fpr_d1 = experiment.threshold_at_fpr(1)
        fpr_d0 = experiment.threshold_at_fpr(0)
        self.assertEqual(len(fpr_d01[metric]), 3)
        self.assertAlmostEqual(fpr_d01[metric]["threshold"], 0.8939142, 3)
        self.assertAlmostEqual(fpr_d0[metric]["threshold"], 0.9953355, 3)
        self.assertAlmostEqual(fpr_d1[metric]["threshold"], 0.2060538, 3)

    def test_run_calculate_eer(self):
        """Test run with calculate_eer"""
        metric = "cosine_similarity"
        experiment = Experiment(
            metrics=metric,
            different_class_samples=("full", "full"),
        )
        experiment.run(
            self.embs,
            self.targets,
        )
        eer = experiment.eer()
        self.assertTrue("EER" in eer[metric])

    def test__call__(self):
        """Test run with __call__"""
        experiment = Experiment(seed=555)
        result = experiment.run(self.embs, self.targets)
        result_2 = experiment(self.embs, self.targets)
        self.assertTrue(np.array_equal(result.to_numpy(), result_2.to_numpy()))

    def test_run_errors(self):
        """Test run errors"""
        with self.assertRaisesRegex(
            ValueError,
            "`same_class_samples` argument must be one of 'full' or an integer ",
        ):
            experiment = Experiment(same_class_samples=54.4)
            experiment.run(self.embs, self.targets)

        with self.assertRaisesRegex(
            ValueError,
            "`different_class_samples` argument must be one of 'full', 'minimal'",
        ):
            experiment = Experiment(different_class_samples="all")
            experiment.run(self.embs, self.targets)

        with self.assertRaisesRegex(
            ValueError,
            "When passing `different_class_samples` as a tuple or list. ",
        ):
            experiment = Experiment(different_class_samples=(1, 2, 3))
            experiment.run(
                self.embs,
                self.targets,
            )

        with self.assertRaisesRegex(
            ValueError,
            '`batch_size` argument must be either "best" or of type integer',
        ):
            experiment = Experiment()
            experiment.run(self.embs, self.targets, batch_size="all")

        with self.assertRaisesRegex(ValueError, "`metric` argument must be one of "):
            experiment = Experiment(metrics="dot_prod")
            experiment.run(self.embs, self.targets)

        with self.assertRaisesRegex(
            ValueError,
            "`p` must be an int and at least 1. Received: p=",
        ):
            experiment = Experiment()
            experiment.run(self.embs, self.targets, p=0.1)

        with self.assertRaisesRegex(
            NotImplementedError,
            "`evaluate_at_threshold` function can only be run after running "
            "`run_experiment`.",
        ):
            experiment = Experiment()
            experiment.evaluate_at_threshold(0.5, "euclidean_distance")

        with self.assertRaisesRegex(
            ValueError,
            "`evaluate_at_threshold` function can only be called with `metric` from ",
        ):
            experiment = Experiment(metrics="euclidean_distance")
            experiment.run(self.embs, self.targets)
            experiment.evaluate_at_threshold(0.5, "cosine_similarity")

        with self.assertRaisesRegex(
            ValueError,
            "`fpr` must be between 0 and 1. Received wanted_fpr=",
        ):
            experiment = Experiment(metrics="euclidean_distance")
            experiment.run(self.embs, self.targets)
            experiment.threshold_at_fpr(-1.1)


================================================
FILE: tests/test_experiment_real_data.py
================================================
# tests/test_experiment_real_data_small.py

import os
import pathlib
import unittest
from collections import OrderedDict

import numpy as np

from evalify import Experiment


class TestExperimentRealDataSmall(unittest.TestCase):
    """Tests for Experiment class using a subset of the LFW dataset"""

    def setUp(self):
        """Set up test fixtures."""
        # Path to LFW.npz, assuming it's in the tests/data/ directory
        self.lfw_npz = os.path.join(pathlib.Path(__file__).parent, "data", "LFW.npz")
        if not os.path.exists(self.lfw_npz):
            self.fail(f"LFW.npz not found at {self.lfw_npz}")

        X_y_array = np.load(self.lfw_npz)
        self.X = X_y_array["X"][:1000]
        self.y = X_y_array["y"][:1000]

        self.metrics = [
            "cosine_similarity",
            "pearson_similarity",
            "euclidean_distance_l2",
        ]

        self.experiment = Experiment(
            metrics=self.metrics,
            same_class_samples="full",
            different_class_samples=("full", "full"),
            seed=555,  # To ensure reproducibility
        )

        # Run the experiment once during setup to reuse the results in multiple tests
        self.df = self.experiment.run(self.X, self.y)

    def test_number_of_samples(self):
        """Test that the number of generated samples matches the expected count."""
        expected_num_samples = 499500
        actual_num_samples = len(self.df)
        self.assertEqual(
            actual_num_samples,
            expected_num_samples,
            f"Expected {expected_num_samples} samples, got {actual_num_samples}.",
        )

    def test_roc_auc(self):
        """Test that ROC AUC values match the expected results."""
        expected_roc_auc = OrderedDict(
            {
                "euclidean_distance_l2": 0.9998640116393942,
                "cosine_similarity": 0.9998640114481793,
                "pearson_similarity": 0.999858162377461,
            }
        )

        actual_roc_auc = self.experiment.roc_auc()

        self.assertEqual(
            len(actual_roc_auc),
            len(self.metrics),
            f"Expected ROC AUC for {len(self.metrics)} metrics, got "
            f"{len(actual_roc_auc)}.",
        )

        for metric in self.metrics:
            self.assertIn(
                metric, actual_roc_auc, f"ROC AUC for metric '{metric}' not found."
            )
            self.assertAlmostEqual(
                actual_roc_auc[metric],
                expected_roc_auc[metric],
                places=6,
                msg=f"ROC AUC for metric '{metric}' does not match.",
            )

    def test_threshold_at_fpr(self):
        """Test that thresholds at a specified FPR match expected values."""
        far = 0.01
        expected_threshold_at_fpr = {
            "cosine_similarity": {
                "FPR": 0.010001841326240518,
                "TPR": 0.9973539973539973,
                "threshold": 0.37717896699905396,
            },
            "pearson_similarity": {
                "FPR": 0.010001841326240518,
                "TPR": 0.9973539973539973,
                "threshold": 0.37802454829216003,
            },
            "euclidean_distance_l2": {
                "FPR": 0.010001841326240518,
                "TPR": 0.9973539973539973,
                "threshold": 1.1160835027694702,
            },
        }

        actual_threshold_at_fpr = self.experiment.threshold_at_fpr(far)

        self.assertEqual(
            len(actual_threshold_at_fpr),
            len(self.metrics),
            f"Expected Threshold @ FPR for {len(self.metrics)} metrics, got "
            f"{len(actual_threshold_at_fpr)}.",
        )

        for metric in self.metrics:
            self.assertIn(
                metric,
                actual_threshold_at_fpr,
                f"Threshold @ FPR for metric '{metric}' not found.",
            )
            expected = expected_threshold_at_fpr[metric]
            actual = actual_threshold_at_fpr[metric]

            self.assertAlmostEqual(
                actual["FPR"],
                expected["FPR"],
                places=6,
                msg=f"FPR for metric '{metric}' does not match.",
            )
            self.assertAlmostEqual(
                actual["TPR"],
                expected["TPR"],
                places=6,
                msg=f"TPR for metric '{metric}' does not match.",
            )
            self.assertAlmostEqual(
                actual["threshold"],
                expected["threshold"],
                places=6,
                msg=f"Threshold for metric '{metric}' at FAR={far} does not match.",
            )

    def test_eer(self):
        """Test that EER values and thresholds match the expected results."""
        expected_eer = OrderedDict(
            {
                "cosine_similarity": {
                    "EER": 0.004724863226023654,
                    "threshold": 0.4244731664657593,
                },
                "euclidean_distance_l2": {
                    "EER": 0.004724863226023654,
                    "threshold": 1.0728718042373657,
                },
                "pearson_similarity": {
                    "EER": 0.004914464785693375,
                    "threshold": 0.4228288531303406,
                },
            }
        )

        actual_eer = self.experiment.eer()

        self.assertEqual(
            len(actual_eer),
            len(self.metrics),
            f"Expected EER for {len(self.metrics)} metrics, got {len(actual_eer)}.",
        )

        for metric in self.metrics:
            self.assertIn(metric, actual_eer, f"EER for metric '{metric}' not found.")
            expected = expected_eer[metric]
            actual = actual_eer[metric]

            self.assertAlmostEqual(
                actual["EER"],
                expected["EER"],
                places=6,
                msg=f"EER for metric '{metric}' does not match.",
            )
            self.assertAlmostEqual(
                actual["threshold"],
                expected["threshold"],
                places=6,
                msg=f"Threshold for EER of metric '{metric}' does not match.",
            )

    def test_tar_at_far(self):
        """Test the tar_at_far method with specific FAR values."""
        # Define FAR values to test
        far_values = [0.01, 0.001]

        # Define expected TAR values based on the recent experiment
        expected_tar_at_far = OrderedDict(
            {
                "cosine_similarity": {
                    0.01: 0.9973539973539973,
                    0.001: 0.9795879795879796,
                },
                "pearson_similarity": {
                    0.01: 0.9973539973539973,
                    0.001: 0.9793989793989794,
                },
                "euclidean_distance_l2": {
                    0.01: 0.9973539973539973,
                    0.001: 0.9795879795879796,
                },
            }
        )

        # Call tar_at_far with the FAR values
        actual_tar_at_far = self.experiment.tar_at_far(far_values)

        # Assert the returned TAR@FAR matches expected values
        self.assertEqual(
            len(actual_tar_at_far),
            len(self.metrics),
            f"Expected TAR@FAR for {len(self.metrics)} metrics, got "
            f"{len(actual_tar_at_far)}.",
        )

        for metric in self.metrics:
            self.assertIn(
                metric, actual_tar_at_far, f"TAR@FAR for metric '{metric}' not found."
            )

            for far in far_values:
                self.assertIn(
                    far,
                    actual_tar_at_far[metric],
                    f"TAR@FAR for metric '{metric}' at FAR={far} not found.",
                )

                expected_tar = expected_tar_at_far[metric][far]
                actual_tar = actual_tar_at_far[metric][far]

                self.assertAlmostEqual(
                    actual_tar,
                    expected_tar,
                    places=6,
                    msg=f"TAR@FAR for metric '{metric}' at FAR={far} does not match.",
                )


# if __name__ == '__main__':
#     unittest.main()


================================================
FILE: tests/test_metrics.py
================================================
#!/usr/bin/env python

"""Tests for `evalify` package."""
import unittest

import numpy as np
from scipy.spatial import distance
from scipy.stats import pearsonr

from evalify import metrics


class TestMetrics(unittest.TestCase):
    """Tests for `evalify` package."""

    def setUp(self):
        """Set up test fixtures, if any."""
        rng = np.random.default_rng(555)
        self.nphotos = 500
        self.emb_size = 8
        self.slice_size = 100
        self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)
        self.norms = np.linalg.norm(self.embs, axis=1)
        self.ix = rng.integers(self.nphotos, size=self.slice_size)
        self.iy = rng.integers(self.nphotos, size=self.slice_size)

    def test_cosine_similarity(self):
        """Test cosine_similarity"""
        result = metrics.cosine_similarity(self.embs, self.ix, self.iy, self.norms)
        result_2 = 1 - np.array(
            [
                distance.cosine(self.embs[ix], self.embs[iy])
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))

    def test_pearson_similarity(self):
        """Test pearson_similarity"""
        result = metrics.pearson_similarity(self.embs, self.ix, self.iy)
        result_2 = np.array(
            [
                pearsonr(self.embs[ix], self.embs[iy])[0]
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))

    def test_euclidean_distance(self):
        """Test euclidean_distance"""
        result = metrics.metrics_caller.get("euclidean_distance")(
            self.embs,
            self.ix,
            self.iy,
        )
        result_2 = np.array(
            [
                distance.euclidean(self.embs[ix], self.embs[iy])
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))

    def test_euclidean_distance_l2(self):
        """Test euclidean_distance"""
        result = metrics.metrics_caller.get("euclidean_distance_l2")(
            self.embs,
            self.ix,
            self.iy,
            self.norms,
        )
        result_2 = np.array(
            [
                distance.euclidean(
                    self.embs[ix] / np.sqrt(np.sum(self.embs[ix] ** 2)),
                    self.embs[iy] / np.sqrt(np.sum(self.embs[iy] ** 2)),
                )
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )

        self.assertEqual(result.shape, (len(self.ix),))
        self.assertTrue(np.allclose(result, result_2))

    def test_minkowski_distance_distance(self):
        """Test euclidean_distance"""
        result = metrics.metrics_caller.get("minkowski_distance")(
            self.embs,
            self.ix,
            self.iy,
            p=3,
        )
        result_2 = np.array(
            [
                distance.minkowski(self.embs[ix], self.embs[iy], p=3)
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))

    def test_manhattan_distance_distance(self):
        """Test euclidean_distance"""
        result = metrics.metrics_caller.get("manhattan_distance")(
            self.embs,
            self.ix,
            self.iy,
        )
        result_2 = np.array(
            [
                distance.cityblock(self.embs[ix], self.embs[iy])
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))

    def test_chebyshev_distance_distance(self):
        """Test euclidean_distance"""
        result = metrics.metrics_caller.get("chebyshev_distance")(
            self.embs,
            self.ix,
            self.iy,
        )
        result_2 = np.array(
            [
                distance.chebyshev(self.embs[ix], self.embs[iy])
                for (ix, iy) in zip(self.ix, self.iy)
            ],
        )
        self.assertEqual(result.shape, (self.slice_size,))
        self.assertTrue(np.allclose(result, result_2))


================================================
FILE: tests/test_utils.py
================================================
#!/usr/bin/env python

"""Tests for `evalify` package."""
import unittest

import numpy as np

from evalify import utils


class TestUtils(unittest.TestCase):
    """Tests for `evalify` package."""

    def setUp(self):
        """Set up test fixtures, if any."""
        self.rng = np.random.default_rng(555)
        self.nphotos = 100
        self.emb_size = 8
        self.nclasses = 10
        self.embs = self.rng.random((self.nphotos, self.emb_size), dtype=np.float32)
        self.targets = self.rng.integers(self.nclasses, size=self.nphotos)

    def tearDown(self):
        """Tear down test fixtures, if any."""

    def test_validate_vectors(self):
        """Test _validate_vectors"""
        embs = self.embs.tolist()
        targets = self.targets.tolist()
        X, y = utils._validate_vectors(embs, targets)
        self.assertEqual(X.shape, (self.nphotos, self.emb_size))
        self.assertEqual(y.shape, (self.nphotos,))

    def test_calculate_best_batch_size(self):
        """Test calculate_best_batch_size"""
        batch_size = utils.calculate_best_batch_size(self.embs, 4 * utils.GB_TO_BYTE)
        self.assertEqual(batch_size, 1420470954)

    def test_run_errors(self):
        """Test run errors"""
        with self.assertRaisesRegex(ValueError, "Embeddings vector should be 2-D."):
            _ = utils._validate_vectors(
                X=self.rng.random(5), y=self.rng.integers(10, size=5),
            )
        with self.assertRaisesRegex(ValueError, "Target vector should be 1-D."):
            _ = utils._validate_vectors(
                X=self.rng.random((5, 5)), y=self.rng.integers(10, size=(5, 2)),
            )


================================================
FILE: tox.ini
================================================
[tox]
isolated_build = true
envlist = py39, py310, py311, py312, lint

[gh-actions]
python =
    3.12: py312
    3.11: py311
    3.10: py310
    3.9: py39

[testenv:lint]
allowlist_externals =
    python
deps =
    .[test, doc, dev]
commands =
    python -m ruff check evalify tests --fix
    python -m poetry build
    python -m mkdocs build
    python -m twine check dist/*

[testenv]
allowlist_externals = pytest
setenv =
    PYTHONPATH = {toxinidir}
    PYTHONWARNINGS = ignore
deps =
    .[test]
commands =
    pytest -s --cov=evalify --cov-append --cov-report=xml --cov-report term-missing tests