[
  {
    "path": ".coveragerc",
    "content": "[run]\n# uncomment the following to omit files during running\n#omit =\n[report]\nexclude_lines =\n    pragma: no cover\n    def __repr__\n    if self.debug:\n    if settings.DEBUG\n    raise AssertionError\n    raise NotImplementedError\n    if 0:\n    if __name__ == .__main__.:\n    def main\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE.md",
    "content": "* evalify version:\n* Python version:\n* Operating System:\n\n### Description\n\nDescribe what you were trying to get done.\nTell us what happened, what went wrong, and what you expected to happen.\n\n### What I Did\n\n```\nPaste the command(s) you ran and the output.\nIf there was a crash, please include the traceback here.\n```\n"
  },
  {
    "path": ".github/workflows/codeql-analysis.yml",
    "content": "# For most projects, this workflow file will not need changing; you simply need\n# to commit it to your repository.\n#\n# You may wish to alter this file to override the set of languages analyzed,\n# or to provide custom queries or build logic.\n#\n# ******** NOTE ********\n# We have attempted to detect the languages in your repository. Please check\n# the `language` matrix defined below to confirm you have the correct set of\n# supported CodeQL languages.\n#\nname: \"CodeQL\"\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n    # The branches below must be a subset of the branches above\n    branches: [ main ]\n  schedule:\n    - cron: '41 19 * * 2'\n\njobs:\n  analyze:\n    name: Analyze\n    runs-on: ubuntu-latest\n    permissions:\n      actions: read\n      contents: read\n      security-events: write\n\n    strategy:\n      fail-fast: false\n      matrix:\n        language: [ 'python' ]\n        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]\n        # Learn more about CodeQL language support at https://git.io/codeql-language-support\n\n    steps:\n    - name: Checkout repository\n      uses: actions/checkout@v2\n\n    # Initializes the CodeQL tools for scanning.\n    - name: Initialize CodeQL\n      uses: github/codeql-action/init@v1\n      with:\n        languages: ${{ matrix.language }}\n        # If you wish to specify custom queries, you can do so here or in a config file.\n        # By default, queries listed here will override any specified in a config file.\n        # Prefix the list here with \"+\" to use these queries and those in the config file.\n        # queries: ./path/to/local/query, your-org/your-repo/queries@main\n\n    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).\n    # If this step fails, then you should remove it and run the build manually (see below)\n    - name: Autobuild\n      uses: github/codeql-action/autobuild@v1\n\n    # ℹ️ Command-line programs to run using the OS shell.\n    # 📚 https://git.io/JvXDl\n\n    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines\n    #    and modify them (or add more) to build your code if your project\n    #    uses a compiled language\n\n    #- run: |\n    #   make bootstrap\n    #   make release\n\n    - name: Perform CodeQL Analysis\n      uses: github/codeql-action/analyze@v1\n"
  },
  {
    "path": ".github/workflows/dev.yml",
    "content": "name: build\n\non:\n  push:\n    branches: [main]\n  pull_request:\n    branches: [main]\n\n  workflow_dispatch:\n\njobs:\n  test:\n    strategy:\n      matrix:\n        python-versions: [\"3.9\", \"3.10\", \"3.11\", \"3.12\"]\n        os: [ubuntu-latest, macos-latest, windows-latest]\n    runs-on: ${{ matrix.os }}\n\n    steps:\n      - uses: actions/checkout@v4\n      - uses: actions/setup-python@v5\n        with:\n          python-version: ${{ matrix.python-versions }}\n\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          pip install poetry tox tox-gh-actions\n\n      - name: test with tox\n        run: tox\n\n      - name: list files\n        run: ls -l .\n\n  publish_dev_build:\n    needs: test\n    runs-on: ubuntu-latest\n    steps:\n      - uses: actions/checkout@v4\n      - uses: actions/setup-python@v5\n        with:\n          python-version: 3.12\n\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          pip install poetry tox tox-gh-actions\n\n      - name: test with tox\n        run: tox\n\n      - name: list files\n        run: ls -l .\n\n      - uses: codecov/codecov-action@v4\n        with:\n          fail_ci_if_error: false\n          files: coverage.xml\n          token: ${{ secrets.CODECOV_TOKEN }}\n      - name: Build wheels and source tarball\n        run: |\n          poetry version $(poetry version --short)-dev.$GITHUB_RUN_NUMBER\n          poetry version --short\n          poetry build\n\n      - name: publish to Test PyPI\n        uses: pypa/gh-action-pypi-publish@v1.12.2\n        with:\n          user: __token__\n          password: ${{ secrets.TEST_PYPI_API_TOKEN}}\n          repository-url: https://test.pypi.org/legacy/\n          skip-existing: true\n"
  },
  {
    "path": ".github/workflows/release.yml",
    "content": "name: release & publish workflow\n\non:\n  push:\n    tags:\n      - \"v1.*.*\"\n\n  workflow_dispatch:\n\njobs:\n  release:\n    name: Create Release\n    runs-on: ubuntu-latest\n\n    strategy:\n      matrix:\n        python-versions: [3.12]\n\n    steps:\n      - name: Checks-out\n        uses: actions/checkout@v4\n      - name: \"Build Changelog\"\n        id: build_changelog\n        uses: mikepenz/release-changelog-builder-action@v5.0.0\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n      - uses: actions/setup-python@v5\n        with:\n          python-version: ${{ matrix.python-versions }}\n      - name: Install dependencies\n        run: |\n          python -m pip install --upgrade pip\n          pip install tox-gh-actions poetry\n\n      - name: pre-publish documentation\n        run: |\n          poetry install -E doc\n          poetry run mkdocs build\n\n      - name: publish documentation\n        uses: peaceiris/actions-gh-pages@v4\n        with:\n          github_token: ${{ secrets.GITHUB_TOKEN }}\n          publish_dir: ./site\n\n      - name: Build wheels and source tarball\n        run: >-\n          poetry build\n\n      - name: show temporary files\n        run: >-\n          ls -l\n\n      - name: create github release\n        id: create_release\n        uses: softprops/action-gh-release@v2.0.9\n        env:\n          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}\n        with:\n          body: ${{steps.build_changelog.outputs.changelog}}\n          # body_path: ./CHANGELOG.md\n          files: dist/*.whl\n          draft: false\n          prerelease: false\n\n      - name: create pypi release\n        uses: pypa/gh-action-pypi-publish@v1.12.2\n        with:\n          user: __token__\n          password: ${{ secrets.PYPI_API_TOKEN }}"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nenv/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# dotenv\n.env\n\n# virtualenv\n.venv\nvenv/\nENV/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\n# IDE settings\n.vscode/\n\n# mkdocs build dir\nsite/\n\n# logo\nlogo/\npoetry.lock\n.ruff_cache/\n"
  },
  {
    "path": "AUTHORS.md",
    "content": "# Credits\n\n## Development Lead\n\n* Mahmoud Bahaa <evalify@ma7555.anonaddy.com>\n\n## Contributors\n\nNone yet. Why not be the first?\n\n## Others\n* This package was created with [Cookiecutter](https://github.com/audreyr/cookiecutter) and the [zillionare/cookiecutter-pypackage](https://github.com/zillionare/cookiecutter-pypackage) project template.\n\n* Logo was created using font [GlacialIndifference-Regular](https://hanken.co/product/hk-grotesk/) by [Hanken Design Co.](https://hanken.co/)\n* Logo icon designed by Mauro Lucchesi\n"
  },
  {
    "path": "CITATION.cff",
    "content": "cff-version: 1.2.0\ntitle: evalify\nmessage: \" If you use this software, please cite it using the metadata from this file.\"\ntype: software\nauthors:\n  - given-names: Mahmoud\n    family-names: Bahaa\n    email: evalify@ma7555.anonaddy.com\n    affiliation: Nile University\n    orcid: \"https://orcid.org/0000-0001-8688-6495\"\ndoi: 10.5281/zenodo.6181723\ndate-released: 2022-02-20\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing\n\nContributions are welcomed, and they are greatly appreciated! Every little bit\nhelps, and credit will always be given.\n\nYou can contribute in many ways:\n\n## Types of Contributions\n\n### Report Bugs\n\nReport bugs at https://github.com/ma7555/evalify/issues.\n\nIf you are reporting a bug, please include:\n\n* Your operating system name and version.\n* Any details about your local setup that might be helpful in troubleshooting.\n* Detailed steps to reproduce the bug.\n\n### Fix Bugs\n\nLook through the GitHub issues for bugs. Anything tagged with \"bug\" and \"help\nwanted\" is open to whoever wants to implement it.\n\n### Implement Features\n\nLook through the GitHub issues for features. Anything tagged with \"enhancement\"\nand \"help wanted\" is open to whoever wants to implement it.\n\n### Write Documentation\n\nevalify could always use more documentation, whether as part of the\nofficial evalify docs, in docstrings, or even on the web in blog posts,\narticles, and such.\n\n### Submit Feedback\n\nThe best way to send feedback is to file an issue at https://github.com/ma7555/evalify/issues.\n\nIf you are proposing a feature:\n\n* Explain in detail how it would work.\n* Keep the scope as narrow as possible, to make it easier to implement.\n* Remember that this is a volunteer-driven project, and that contributions\n  are welcome :)\n\n## Get Started!\n\nReady to contribute? Here's how to set up `evalify` for local development.\n\n1. Fork the `evalify` repo on GitHub.\n2. Clone your fork locally\n\n```bash\ngit clone git@github.com:your_name_here/evalify.git\n```\n\n3. Ensure [poetry](https://python-poetry.org/docs/) is installed.\n4. Install dependencies and start your virtualenv:\n\n```bash\npoetry install -E test -E doc -E dev\n```\n\n5. Create a branch for local development:\n\n```bash\ngit checkout -b name-of-your-bugfix-or-feature\n```\n\n   Now you can make your changes locally.\n\n6. When you're done making changes, check that your changes pass the\n   tests, including testing other Python versions, with tox:\n\n```bash\ntox\n```\n\n7. Commit your changes and push your branch to GitHub:\n\n```bash\ngit add .\ngit commit -m \"Your detailed description of your changes.\"\ngit push origin name-of-your-bugfix-or-feature\n```\n\n8. Submit a pull request through the GitHub website.\n\n## Pull Request Guidelines\n\nBefore you submit a pull request, check that it meets these guidelines:\n\n1. The pull request should include tests.\n2. If the pull request adds functionality, the docs should be updated. Put\n   your new functionality into a function with a docstring, and add the\n   feature to the list in README.md.\n3. The pull request should work for Python 3.9, 3.10, 3.11, 3.12 and for PyPy. Check\n   https://github.com/ma7555/evalify/actions\n   and make sure that the tests pass for all supported Python versions.\n\n## \n```bash\npython -m unittest\n```\nor\n```bash\npytest\n```\nTo run a subset of tests.\n\n\n## Deploying\n\nA reminder for the maintainers on how to deploy.\nMake sure all your changes are committed (including an entry in HISTORY.md).\nThen run:\n\n```bash\ngit push\ngit push --tags\n```\n\nGithub Actions will then deploy to PyPI if tests pass.\n"
  },
  {
    "path": "HISTORY.md",
    "content": "# History\n\n## 0.1.0 (2022-02-20)\n\n* First release on PyPI.\n\n## 0.1.1 (2022-02-22)\n\n* Run time enhancement. \n\n## 0.1.2 (2022-02-23)\n\n* Various enhancements and refactoring.\n\n## 0.1.3 (2022-02-24)\n\n* Add pearson similarity as a metric\n\n## 0.1.4 (2022-02-24)\n\n* Add EER calculation function.\n* Drop support for python 3.7\n\n## 1.0.0 (2024-11-08)\n\n* Bump dependencies.\n* Drop support for python 3.8\n* Add support for TAR @ FAR"
  },
  {
    "path": "LICENSE",
    "content": "BSD 3-Clause License\n\n\nCopyright (c) 2022, Mahmoud Bahaa\nAll rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n1. Redistributions of source code must retain the above copyright notice, this\n  list of conditions and the following disclaimer.\n\n2. Redistributions in binary form must reproduce the above copyright notice, this\n  list of conditions and the following disclaimer in the documentation and/or\n  other materials provided with the distribution.\n\n3. Neither the name of the copyright holder nor the names of its\n  contributors may be used to endorse or promote products derived from this\n  software without specific prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\nIN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,\nINDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\nBUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\nDATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY\nOF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\nOR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED\nOF THE POSSIBILITY OF SUCH DAMAGE."
  },
  {
    "path": "README.md",
    "content": "# evalify\n\n<p align=\"center\">\n\n<img src=\"https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg\" width=\"292\" height=\"120\" alt=\"Logo\"/>\n\n</p>\n\n<p align=\"center\">\n\n<a href=\"https://github.com/ma7555/evalify/blob/main/LICENSE\">\n    <img src=\"https://img.shields.io/github/license/ma7555/evalify\"\n        alt = \"License\">\n</a>\n<a href=\"https://doi.org/10.5281/zenodo.6181723\"><img src=\"https://zenodo.org/badge/DOI/10.5281/zenodo.6181723.svg\" alt=\"DOI\"></a>\n<a href=\"https://www.python.org/downloads/\">\n    <img src=\"https://img.shields.io/badge/python-3.9 | 3.10 | 3.11 | 3.12-blue.svg\"\n        alt = \"Python 3.7 | 3.8 | 3.9 | 3\">\n</a>\n<a href=\"https://pypi.python.org/pypi/evalify\">\n    <img src=\"https://img.shields.io/pypi/v/evalify.svg\"\n        alt = \"Release Status\">\n</a>\n<a href=\"https://github.com/ma7555/evalify/actions\">\n    <img src=\"https://github.com/ma7555/evalify/actions/workflows/dev.yml/badge.svg?branch=main\" alt=\"CI Status\">\n</a>\n<a href=\"https://ma7555.github.io/evalify/\">\n    <img src=\"https://img.shields.io/website/https/ma7555.github.io/evalify/index.html.svg?label=docs&down_message=unavailable&up_message=available\" alt=\"Documentation Status\">\n</a>\n<a href=\"https://github.com/astral-sh/ruff\">\n    <img src=\"https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json\" alt=\"Code style: Ruff\">\n</a>\n\n<a href=\"https://codecov.io/gh/ma7555/evalify\">\n  <img src=\"https://codecov.io/gh/ma7555/evalify/branch/main/graph/badge.svg\" />\n</a>\n<a href=\"https://pypi.org/project/evalify/\"><img alt=\"PyPI Downloads/Month\" src=\"https://img.shields.io/pypi/dm/evalify\">\n</a>\n\n</p>\n\n**Evaluate Biometric Authentication Models Literally in Seconds.**\n\n## Installation\n#### Stable release:\n```bash\npip install evalify\n```\n#### Bleeding edge:\n```bash\npip install git+https://github.com/ma7555/evalify.git\n```\n## Used for\nEvaluating all biometric authentication models, where the model output is a high-level embeddings known as feature vectors for visual or behaviour biometrics or d-vectors for auditory biometrics.\n\n## Usage\n\n```python\nimport numpy as np\nfrom evalify import Experiment\n\nrng = np.random.default_rng()\nnphotos = 500\nemb_size = 32\nnclasses = 10\nX = rng.random((self.nphotos, self.emb_size))\ny = rng.integers(self.nclasses, size=self.nphotos)\n\nexperiment = Experiment()\nexperiment.run(X, y)\nexperiment.get_roc_auc()\nprint(experiment.roc_auc)\nprint(experiment.find_threshold_at_fpr(0.01))\n```\n## How it works\n* When you run an experiment, evalify tries all the possible combinations between individuals for authentication based on the `X` and `y` parameters and returns the results including FPR, TPR, FNR, TNR and ROC AUC. `X` is an array of embeddings and `y` is an array of corresponding targets.\n* Evalify can find the optimal threshold based on your agreed FPR and desired similarity or distance metric.\n\n## Documentation: \n* <https://ma7555.github.io/evalify/>\n\n\n## Features\n\n* Blazing fast implementation for metrics calculation through optimized einstein sum and vectorized calculations.\n* Many operations are dispatched to canonical BLAS, cuBLAS, or other specialized routines.\n* Smart sampling options using direct indexing from pre-calculated arrays with total control over sampling strategy and sampling numbers.\n* Supports most evaluation metrics:\n    - `cosine_similarity`\n    - `pearson_similarity`\n    - `cosine_distance`\n    - `euclidean_distance`\n    - `euclidean_distance_l2`\n    - `minkowski_distance`\n    - `manhattan_distance`\n    - `chebyshev_distance`\n* Computation time for 4 metrics 4.2 million samples experiment is **24 seconds vs 51 minutes** if looping using `scipy.spatial.distance` implemntations.\n\n## TODO\n* Safer memory allocation. I did not have issues but if you ran out of memory please manually set the `batch_size` argument.\n\n## Contribution\n* Contributions are welcomed, and they are greatly appreciated! Every little bit helps, and credit will always be given.\n* Please check [CONTRIBUTING.md](https://github.com/ma7555/evalify/blob/main/CONTRIBUTING.md) for guidelines.\n\n## Citation\n* If you use this software, please cite it using the metadata from [CITATION.cff](https://github.com/ma7555/evalify/blob/main/CITATION.cff)\n\n"
  },
  {
    "path": "codecov.yml",
    "content": "coverage:\n  status:\n    project:\n      default:\n        target: 90%\n    patch:\n      default:\n        target: 85%\n"
  },
  {
    "path": "docs/api.md",
    "content": "::: evalify.evalify\n    handler: python\n"
  },
  {
    "path": "docs/authors.md",
    "content": "{%\n  include-markdown \"../AUTHORS.md\"\n%}"
  },
  {
    "path": "docs/contributing.md",
    "content": "{%\n  include-markdown \"../CONTRIBUTING.md\"\n%}"
  },
  {
    "path": "docs/history.md",
    "content": "{%\n  include-markdown \"../HISTORY.md\"\n%}"
  },
  {
    "path": "docs/index.md",
    "content": "{%\n    include-markdown \"../README.md\"\n%}\n"
  },
  {
    "path": "docs/installation.md",
    "content": "# Installation\n\n## Stable release\n\nTo install evalify, run this command in your\nterminal:\n\n```bash\npip install evalify\n```\n\nThis is the preferred method to install evalify, as it will always install the most recent stable release.\n\nIf you don't have [pip][] installed, this [Python installation guide][]\ncan guide you through the process.\n\n## From source\n\nThe source for evalify can be downloaded from\nthe [Github repo][].\n\nYou can either clone the public repository:\n\n```bash\ngit clone git://github.com/ma7555/evalify\n```\n\nOr download the [tarball][]:\n\n```bash\ncurl -OJL https://github.com/ma7555/evalify/tarball/master\n```\n\nOnce you have a copy of the source, you can install it with:\n\n```bash\npip install .\n```\n\n  [pip]: https://pip.pypa.io\n  [Python installation guide]: http://docs.python-guide.org/en/latest/starting/installation/\n  [Github repo]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D\n  [tarball]: https://github.com/%7B%7B%20cookiecutter.github_username%20%7D%7D/%7B%7B%20cookiecutter.project_slug%20%7D%7D/tarball/master\n\n"
  },
  {
    "path": "docs/usage.md",
    "content": "# Usage\n\nTo use evalify in a project\n\n```python\nimport numpy as np\nfrom evalify import Experiment\n\nrng = np.random.default_rng()\nnphotos = 500\nemb_size = 32\nnclasses = 10\nX = rng.random((self.nphotos, self.emb_size))\ny = rng.integers(self.nclasses, size=self.nphotos)\n\nexperiment = Experiment()\nexperiment.run(X, y)\nexperiment.get_roc_auc()\nprint(experiment.df.roc_auc)\n```\n\nFor a working experiment using real face embeddings, please refer to `LFW.py` under `./examples`.\n\n```python\npython ./examples/LFW.py\n```\n```\nTotal available embeddings 2921 resulted in 4264660 samples for the experiment.\nMetrics calculations executed in 24.05 seconds\nROC AUC:\nOrderedDict([('euclidean_distance', 0.9991302819624498), ('cosine_distance', 0.9991302818953706), ('euclidean_distance_l2', 0.9991302818953706), ('manhattan_distance', 0.9991260462584446)])\n```\n"
  },
  {
    "path": "evalify/__init__.py",
    "content": "\"\"\"Top-level package for evalify.\"\"\"\n\nfrom evalify.evalify import Experiment as Experiment\n\n__author__ = \"\"\"Mahmoud Bahaa\"\"\"\n__email__ = \"evalify@ma7555.anonaddy.com\"\n__version__ = \"0.1.0\"\n"
  },
  {
    "path": "evalify/evalify.py",
    "content": "\"\"\"Evalify main module used for creating the verification experiments.\n\nCreates experiments with embedding pairs to compare for face verification tasks\nincluding positive pairs, negative pairs and metrics calculations using a very\noptimized einstein sum. Many operations are dispatched to canonical BLAS, cuBLAS,\nor other specialized routines. Extremely large arrays are split into smaller batches,\nevery batch would consume the roughly the maximum available memory.\n\n  Typical usage example:\n\n  ```\n  experiment = Experiment()\n  experiment.run(X, y)\n  ```\n\"\"\"\n\nimport itertools\nimport sys\nfrom collections import OrderedDict\nfrom typing import Any, List, Optional, Sequence, Tuple, Union\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.metrics import auc, confusion_matrix, roc_curve\n\nfrom evalify.metrics import (\n    DISTANCE_TO_SIMILARITY,\n    METRICS_NEED_NORM,\n    METRICS_NEED_ORDER,\n    REVERSE_DISTANCE_TO_SIMILARITY,\n    metrics_caller,\n)\nfrom evalify.utils import _validate_vectors, calculate_best_batch_size\n\nStrOrInt = Union[str, int]\nStrIntSequence = Union[str, int, Sequence[Union[str, int]]]\n\n\nclass Experiment:\n    \"\"\"Defines an experiment for evalifying.\n\n    Args:\n        metrics: The list of metrics to use. Can be one or more of the following:\n            `cosine_similarity`, `pearson_similarity`, `cosine_distance`,\n            `euclidean_distance`, `euclidean_distance_l2`, `minkowski_distance`,\n            `manhattan_distance` and `chebyshev_distance`\n        same_class_samples:\n            - 'full': Samples all possible images within each class to create all\n                all possible positive pairs.\n            -  int: Samples specific number of images for every class to create\n                nC2 pairs where n is passed integer.\n        different_class_samples:\n            - 'full': Samples one image from every class with all possible pairs\n                of different classes. This can grow exponentially as the number\n                of images increase. (N, M) = (1, \"full\")\n            - 'minimal': Samples one image from every class with one image of\n                all other classes. (N, M) = (1, 1). (Default)\n            - int: Samples one image from every class with provided number of\n                images of every other class.\n            - tuple or list: (N, M) Samples N images from every class with M images of\n                every other class.\n        seed: Optional random seed for reproducibility.\n\n\n    Notes:\n        - `same_class_samples`:\n            If the provided number is greater than the achievable for the class,\n            the maximum possible combinations are used.\n        - `different_class_samples`:\n            If the provided number is greater than the achievable for the class,\n            the maximum possible combinations are used. (N, M) can also be\n            ('full', 'full') but this will calculate all possible combinations\n            between all posibile negative samples. If the dataset is not small\n            this will probably result in an extremely large array!.\n\n    \"\"\"\n\n    def __init__(\n        self,\n        metrics: Union[str, Sequence[str]] = \"cosine_similarity\",\n        same_class_samples: StrOrInt = \"full\",\n        different_class_samples: StrIntSequence = \"minimal\",\n        seed: Optional[int] = None,\n    ) -> None:\n        self.experiment_success = False\n        self.cached_predicted_as_similarity = {}\n        self.metrics = (metrics,) if isinstance(metrics, str) else metrics\n        self.same_class_samples = same_class_samples\n        self.different_class_samples = different_class_samples\n        self.seed = seed\n\n    def __call__(self, *args: Any, **kwds: Any) -> Any:\n        return self.run(*args, **kwds)\n\n    @staticmethod\n    def _validate_args(\n        metrics: Sequence[str],\n        same_class_samples: StrOrInt,\n        different_class_samples: StrIntSequence,\n        batch_size: Optional[StrOrInt],\n        p,\n    ) -> None:\n        \"\"\"Validates passed arguments to Experiment.run() method.\"\"\"\n        if same_class_samples != \"full\" and not isinstance(same_class_samples, int):\n            msg = (\n                \"`same_class_samples` argument must be one of 'full' or an integer \"\n                f\"Received: same_class_samples={same_class_samples}\"\n            )\n            raise ValueError(\n                msg,\n            )\n\n        if different_class_samples not in (\"full\", \"minimal\"):\n            if not isinstance(different_class_samples, (int, list, tuple)):\n                msg = (\n                    \"`different_class_samples` argument must be one of 'full', \"\n                    \"'minimal', an integer, a list or tuple of integers or keyword \"\n                    \"'full'.\"\n                    f\"Received: different_class_samples={different_class_samples}.\"\n                )\n                raise ValueError(\n                    msg,\n                )\n            if isinstance(different_class_samples, (list, tuple)) and (\n                not (\n                    all(\n                        isinstance(i, int) or i == \"full\"\n                        for i in different_class_samples\n                    )\n                )\n                or (len(different_class_samples)) != 2\n            ):\n                msg = (\n                    \"When passing `different_class_samples` as a tuple or list, \"\n                    \"elements must be exactly two of integer type or keyword 'full' \"\n                    \"(N, M). \"\n                    f\"Received: different_class_samples={different_class_samples}.\"\n                )\n                raise ValueError(\n                    msg,\n                )\n\n        if (\n            batch_size != \"best\"\n            and not isinstance(batch_size, int)\n            and batch_size is not None\n        ):\n            msg = (\n                '`batch_size` argument must be either \"best\" or of type integer '\n                f\"Received: batch_size={batch_size} with type {type(batch_size)}.\"\n            )\n            raise ValueError(\n                msg,\n            )\n\n        if any(metric not in metrics_caller for metric in metrics):\n            msg = (\n                f\"`metric` argument must be one of {tuple(metrics_caller.keys())} \"\n                f\"Received: metric={metrics}\"\n            )\n            raise ValueError(\n                msg,\n            )\n\n        if p < 1:\n            msg = f\"`p` must be an int and at least 1. Received: p={p}\"\n            raise ValueError(msg)\n\n    def _get_pairs(\n        self,\n        y,\n        same_class_samples,\n        different_class_samples,\n        target,\n    ) -> List[Tuple]:\n        \"\"\"Generates experiment pairs.\"\"\"\n        same_ixs_full = np.argwhere(y == target).ravel()\n        if isinstance(same_class_samples, int):\n            same_class_samples = min(len(same_ixs_full), same_class_samples)\n            same_ixs = self.rng.choice(same_ixs_full, same_class_samples)\n        elif same_class_samples == \"full\":\n            same_ixs = same_ixs_full\n        same_pairs = itertools.combinations(same_ixs, 2)\n        same_pairs = [(a, b, target, target, 1) for a, b in same_pairs]\n\n        different_ixs = np.argwhere(y != target).ravel()\n        diff_df = pd.DataFrame(\n            data={\"sample_idx\": different_ixs, \"target\": y[different_ixs]},\n        )\n\n        diff_df = diff_df.sample(frac=1, random_state=self.seed)\n        if different_class_samples in [\"full\", \"minimal\"] or isinstance(\n            different_class_samples,\n            int,\n        ):\n            N = 1\n            if different_class_samples == \"minimal\":\n                diff_df = diff_df.drop_duplicates(subset=[\"target\"])\n        else:\n            N, M = different_class_samples\n            N = len(same_ixs_full) if N == \"full\" else min(N, len(same_ixs_full))\n            if M != \"full\":\n                diff_df = (\n                    diff_df.groupby(\"target\")\n                    .apply(lambda x: x[:M], include_groups=False)\n                    .droplevel(0)\n                )\n\n        different_ixs = diff_df.sample_idx.to_numpy()\n\n        different_pairs = itertools.product(\n            self.rng.choice(same_ixs_full, N, replace=False),\n            different_ixs,\n        )\n        different_pairs = [(a, b, target, y[b], 0) for a, b in different_pairs if a < b]\n\n        return same_pairs + different_pairs\n\n    def run(\n        self,\n        X: np.ndarray,\n        y: np.ndarray,\n        batch_size: Optional[StrOrInt] = \"best\",\n        shuffle: bool = False,\n        return_embeddings: bool = False,\n        p: int = 3,\n    ) -> pd.DataFrame:\n        \"\"\"Runs an experiment for face verification\n        Args:\n            X: Embeddings array\n            y: Targets for X as integers\n            batch_size:\n                - 'best': Let the program decide based on available memory such that\n                    every batch will fit into the available memory. (Default)\n                - int: Manually decide the batch_size.\n                - None: No batching. All experiment and intermediate results must fit\n                    entirely into memory or a MemoryError will be raised.\n            shuffle: Shuffle the returned experiment dataframe. Default: False.\n            return_embeddings: Whether to return the embeddings instead of indexes.\n                Default: False\n            p:\n                The order of the norm of the difference. Should be `p >= 1`, Only valid\n                with minkowski_distance as a metric. Default = 3.\n\n        Returns:\n            pandas.DataFrame: A DataFrame representing the experiment results.\n\n        Raises:\n            ValueError: An error occurred with the provided arguments.\n\n        \"\"\"\n        self._validate_args(\n            self.metrics,\n            self.same_class_samples,\n            self.different_class_samples,\n            batch_size,\n            p,\n        )\n        X, y = _validate_vectors(X, y)\n        all_targets = np.unique(y)\n        all_pairs = []\n        metric_fns = list(map(metrics_caller.get, self.metrics))\n        self.rng = np.random.default_rng(self.seed)\n        for target in all_targets:\n            all_pairs += self._get_pairs(\n                y,\n                self.same_class_samples,\n                self.different_class_samples,\n                target,\n            )\n\n        self.df = pd.DataFrame(\n            data=all_pairs,\n            columns=[\"emb_a\", \"emb_b\", \"target_a\", \"target_b\", \"target\"],\n        )\n        experiment_size = len(self.df)\n        if shuffle:\n            self.df = self.df.sample(frac=1, random_state=self.seed)\n        if batch_size == \"best\":\n            batch_size = calculate_best_batch_size(X)\n        elif batch_size is None:\n            batch_size = experiment_size\n        kwargs = {}\n        if any(metric in METRICS_NEED_NORM for metric in self.metrics):\n            kwargs[\"norms\"] = np.linalg.norm(X, axis=1)\n        if any(metric in METRICS_NEED_ORDER for metric in self.metrics):\n            kwargs[\"p\"] = p\n\n        emb_a = self.df.emb_a.to_numpy()\n        emb_b = self.df.emb_b.to_numpy()\n\n        emb_a_s = np.array_split(emb_a, np.ceil(experiment_size / batch_size))\n        emb_b_s = np.array_split(emb_b, np.ceil(experiment_size / batch_size))\n\n        for metric, metric_fn in zip(self.metrics, metric_fns):\n            self.df[metric] = np.hstack(\n                [metric_fn(X, i, j, **kwargs) for i, j in zip(emb_a_s, emb_b_s)],\n            )\n        if return_embeddings:\n            self.df[\"emb_a\"] = X[emb_a].tolist()\n            self.df[\"emb_b\"] = X[emb_b].tolist()\n\n        self.experiment_success = True\n        return self.df\n\n    def find_optimal_cutoff(self) -> dict:\n        \"\"\"Finds the optimal cutoff threshold for each metric based on the ROC curve.\n\n        This function calculates the optimal threshold for each metric by finding the\n        point on the Receiver Operating Characteristic (ROC) curve where the difference\n        between the True Positive Rate (TPR) and the False Positive Rate (FPR) is\n        minimized.\n\n        Returns:\n            dict: A dictionary with metrics as keys and their corresponding optimal\n            threshold as values.\n        \"\"\"\n\n        self.check_experiment_run()\n        self.optimal_cutoff = {}\n        for metric in self.metrics:\n            fpr, tpr, threshold = roc_curve(self.df[\"target\"], self.df[metric])\n            i = np.arange(len(tpr))\n            roc = pd.DataFrame(\n                {\n                    \"tf\": pd.Series(tpr - (1 - fpr), index=i),\n                    \"threshold\": pd.Series(threshold, index=i),\n                },\n            )\n            roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]\n            self.optimal_cutoff[metric] = roc_t[\"threshold\"].item()\n        return self.optimal_cutoff\n\n    def threshold_at_fpr(self, fpr: float) -> dict:\n        \"\"\"Find the threshold at a specified False Positive Rate (FPR) for each metric.\n\n        The function calculates the threshold at the specified FPR for each metric\n        by using the Receiver Operating Characteristic (ROC) curve. If the desired\n        FPR is 0 or 1, or no exact match is found, the closest thresholds are used.\n\n        Args:\n            fpr (float): Desired False Positive Rate. Must be between 0 and 1.\n\n        Returns:\n            dict: A dictionary where keys are the metrics and values are dictionaries\n            containing FPR, TPR, and threshold at the specified FPR.\n\n        Raises:\n            ValueError: If the provided `fpr` is not between 0 and 1.\n        \"\"\"\n\n        self.check_experiment_run()\n        if not 0 <= fpr <= 1:\n            msg = \"`fpr` must be between 0 and 1. \" f\"Received wanted_fpr={fpr}\"\n            raise ValueError(\n                msg,\n            )\n        threshold_at_fpr = {}\n        for metric in self.metrics:\n            predicted = self.predicted_as_similarity(metric)\n            FPR, TPR, thresholds = roc_curve(\n                self.df[\"target\"],\n                predicted,\n                drop_intermediate=False,\n            )\n            df_fpr_tpr = pd.DataFrame({\"FPR\": FPR, \"TPR\": TPR, \"threshold\": thresholds})\n            ix_left = np.searchsorted(df_fpr_tpr[\"FPR\"], fpr, side=\"left\")\n            ix_right = np.searchsorted(df_fpr_tpr[\"FPR\"], fpr, side=\"right\")\n\n            if fpr == 0:\n                best = df_fpr_tpr.iloc[ix_right]\n            elif fpr == 1 or ix_left == ix_right:\n                best = df_fpr_tpr.iloc[ix_left]\n            else:\n                best = (\n                    df_fpr_tpr.iloc[ix_left]\n                    if abs(df_fpr_tpr.iloc[ix_left].FPR - fpr)\n                    < abs(df_fpr_tpr.iloc[ix_right].FPR - fpr)\n                    else df_fpr_tpr.iloc[ix_right]\n                )\n            best = best.to_dict()\n            if metric in REVERSE_DISTANCE_TO_SIMILARITY:\n                best[\"threshold\"] = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(\n                    best[\"threshold\"],\n                )\n            threshold_at_fpr[metric] = best\n        return threshold_at_fpr\n\n    def get_binary_prediction(self, metric: str, threshold: float) -> pd.Series:\n        \"\"\"Binary classification prediction based on the given metric and threshold.\n\n        Args:\n            metric: Metric name for the desired prediction.\n            threshold: Cut off threshold.\n\n        Returns:\n            pd.Series: Binary predictions.\n\n        \"\"\"\n        return (\n            self.df[metric].apply(lambda x: 1 if x < threshold else 0)\n            if metric in DISTANCE_TO_SIMILARITY\n            else self.df[metric].apply(lambda x: 1 if x > threshold else 0)\n        )\n\n    def evaluate_at_threshold(self, threshold: float, metric: str) -> dict:\n        \"\"\"Evaluate performance at specific threshold\n        Args:\n            threshold: Cut-off threshold.\n            metric: Metric to use.\n\n        Returns:\n            dict: A dict ontaining all evaluation metrics.\n\n        \"\"\"\n        self.metrics_evaluation = {}\n        self.check_experiment_run(metric)\n        for metric in self.metrics:\n            predicted = self.get_binary_prediction(metric, threshold)\n            cm = confusion_matrix(self.df[\"target\"], predicted)\n            tn, fp, fn, tp = cm.ravel()\n            TPR = tp / (tp + fn)  # recall / true positive rate\n            TNR = tn / (tn + fp)  # true negative rate\n            PPV = tp / (tp + fp)  # precision / positive predicted value\n            NPV = tn / (tn + fn)  # negative predictive value\n            FPR = fp / (fp + tn)  # false positive rate\n            FNR = 1 - TPR  # false negative rate\n            FDR = 1 - PPV  # false discovery rate\n            FOR = 1 - NPV  # false omission rate\n            F1 = 2 * (PPV * TPR) / (PPV + TPR)\n\n            evaluation = {\n                \"TPR\": TPR,\n                \"TNR\": TNR,\n                \"PPV\": PPV,\n                \"NPV\": NPV,\n                \"FPR\": FPR,\n                \"FNR\": FNR,\n                \"FDR\": FDR,\n                \"FOR\": FOR,\n                \"F1\": F1,\n            }\n\n        return evaluation\n\n    def check_experiment_run(self, metric: Optional[str] = None) -> bool:\n        caller = sys._getframe().f_back.f_code.co_name\n        if not self.experiment_success:\n            msg = (\n                f\"`{caller}` function can only be run after running \"\n                \"`run_experiment`.\"\n            )\n            raise NotImplementedError(\n                msg,\n            )\n        if metric is not None and metric not in self.metrics:\n            msg = (\n                f\"`{caller}` function can only be called with `metric` from \"\n                f\"{self.metrics} which were used while running the experiment\"\n            )\n            raise ValueError(\n                msg,\n            )\n        return True\n\n    def roc_auc(self) -> OrderedDict:\n        \"\"\"Find ROC AUC for all the metrics used.\n\n        Returns:\n            OrderedDict: An OrderedDict with AUC for all metrics.\n\n        \"\"\"\n        self.check_experiment_run()\n        self.roc_auc = {}\n        for metric in self.metrics:\n            predicted = self.predicted_as_similarity(metric)\n            fpr, tpr, thresholds = roc_curve(\n                self.df[\"target\"],\n                predicted,\n                drop_intermediate=False,\n            )\n            self.roc_auc[metric] = auc(fpr, tpr).item()\n        self.roc_auc = OrderedDict(\n            sorted(self.roc_auc.items(), key=lambda x: x[1], reverse=True),\n        )\n        return self.roc_auc\n\n    def predicted_as_similarity(self, metric: str) -> pd.Series:\n        \"\"\"Convert distance metrics to a similarity measure.\n\n        Args:\n            metric: distance metric to convert to similarity. If a similarity metric is\n                passed, It gets returned unchanged.\n\n        Returns:\n            pd.Series: Converted distance to similarity.\n\n        \"\"\"\n        predicted = self.df[metric]\n        if metric in DISTANCE_TO_SIMILARITY:\n            predicted = (\n                self.cached_predicted_as_similarity[metric]\n                if metric in self.cached_predicted_as_similarity\n                else DISTANCE_TO_SIMILARITY.get(metric)(predicted)\n            )\n            self.cached_predicted_as_similarity[metric] = predicted\n        return predicted\n\n    def eer(self) -> OrderedDict:\n        \"\"\"Calculates the Equal Error Rate (EER) for each metric.\n\n        Returns:\n            OrderedDict: A dictionary containing the EER value and threshold for each\n            metric.\n                The metrics are sorted in ascending order based on the EER values.\n                Example: {'metric1': {'EER': 0.123, 'threshold': 0.456},\n                        ...}\n\n        \"\"\"\n        self.check_experiment_run()\n        self.eer = {}\n        for metric in self.metrics:\n            predicted = self.predicted_as_similarity(metric)\n            actual = self.df[\"target\"]\n\n            fpr, tpr, thresholds = roc_curve(\n                actual,\n                predicted,\n                pos_label=1,\n                drop_intermediate=False,\n            )\n            fnr = 1 - tpr\n            eer_threshold = thresholds[np.nanargmin(np.absolute(fnr - fpr))].item()\n            eer_1 = fpr[np.nanargmin(np.absolute(fnr - fpr))].item()\n            eer_2 = fnr[np.nanargmin(np.absolute(fnr - fpr))].item()\n            if metric in REVERSE_DISTANCE_TO_SIMILARITY:\n                eer_threshold = REVERSE_DISTANCE_TO_SIMILARITY.get(metric)(\n                    eer_threshold,\n                )\n\n            self.eer[metric] = {\"EER\": (eer_1 + eer_2) / 2, \"threshold\": eer_threshold}\n        self.eer = OrderedDict(\n            sorted(self.eer.items(), key=lambda x: x[1][\"EER\"], reverse=False),\n        )\n\n        return self.eer\n\n    def tar_at_far(self, far_values: List[float]) -> OrderedDict:\n        \"\"\"Calculates TAR at specified FAR values for each metric.\n\n        Args:\n            far_values (List[float]): A list of False Accept Rates (FAR) to get TAR\n                values for.\n\n        Returns:\n            OrderedDict: A dictionary with keys as metrics and values as dictionaries\n            of FAR:TAR pairs.\n\n        Raises:\n            ValueError: If any FAR in far_values is not between 0 and 1.\n        \"\"\"\n        if isinstance(far_values, (float, int)):\n            far_values = [float(far_values)]\n\n        if not all(0 <= far <= 1 for far in far_values):\n            raise ValueError(\"All FAR values must be between 0 and 1.\")\n\n        self.check_experiment_run()\n        tar_at_far_results = {}\n\n        for metric in self.metrics:\n            predicted = self.predicted_as_similarity(metric)\n            fpr, tpr, _ = roc_curve(self.df[\"target\"], predicted, pos_label=1)\n\n            tar_values = {}\n            for far in far_values:\n                idx = np.searchsorted(fpr, far, side=\"right\") - 1\n                idx = max(0, min(idx, len(fpr) - 1))  # Ensure idx is within bounds\n                tar_values[far] = tpr[idx].item()\n\n            tar_at_far_results[metric] = tar_values\n\n        self.tar_at_far_results = OrderedDict(\n            sorted(tar_at_far_results.items(), key=lambda x: list(x[1].keys())[0])\n        )\n\n        return self.tar_at_far_results\n"
  },
  {
    "path": "evalify/metrics.py",
    "content": "\"\"\"Evalify metrics module used for calculating the evaluation metrics.\n\nOptimized calculations using einstein sum. Embeddings array and norm arrays are indexed\nwith every\nsplit and calculations happens over large data chunks very quickly.\n\"\"\"\n\nimport numpy as np\n\n\ndef _inner1d(A, B):\n    \"\"\"Calculate the inner product between two arrays of vectors.\n\n    Args:\n        A (numpy.ndarray): 2D array of shape (n_samples, n_features)\n        B (numpy.ndarray): 2D array of shape (n_samples, n_features)\n\n    Returns:\n        numpy.ndarray: 1D array of shape (n_samples,) where each element is the inner\n        product of the corresponding rows in A and B\n\n    \"\"\"\n    return np.einsum(\"ij,ij->i\", A, B, optimize=\"optimal\")\n\n\ndef cosine_similarity(embs, ix, iy, norms, return_distance=False, **kwargs):\n    \"\"\"Calculate the cosine similarity between two arrays of vectors.\n\n    Args:\n        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)\n        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the first array\n        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the second array\n        norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm\n        of each row in X\n        return_distance (bool): Whether to return the cosine distance instead of the\n        cosine similarity. Defaults to False.\n\n    Returns:\n        numpy.ndarray: 1D array of shape (n_samples,) where each element is the cosine\n        similarity (or cosine distance) of the corresponding rows in X.\n\n    \"\"\"\n    similarity = _inner1d(embs[ix], embs[iy]) / (norms[ix] * norms[iy])\n    return 1 - similarity if return_distance else similarity\n\n\ndef euclidean_distance_l2(embs, ix, iy, norms, **kwargs):\n    \"\"\"Calculate the L2-normalized Euclidean distance between two arrays of vectors.\n\n    Args:\n        embs (numpy.ndarray): 2D array of shape (n_samples, n_features).\n        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the first array.\n        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the second array.\n        norms (numpy.ndarray): 1D array of shape (n_samples,) containing the L2 norm\n        of each row in embs.\n\n    Returns:\n        numpy.ndarray: 1D array of shape (n_samples,) where each element is the\n        L2-normalized Euclidean distance of the corresponding rows in embs.\n\n    \"\"\"\n    X = embs[ix] / norms[ix].reshape(-1, 1) - embs[iy] / norms[iy].reshape(-1, 1)\n    return np.linalg.norm(X, axis=1)\n\n\ndef minkowski_distance(embs, ix, iy, p, **kwargs):\n    \"\"\"Calculate the element-wise Minkowski or Manhattan or Chebyshev distance.\n\n    Args:\n        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)\n        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the first array\n        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the second array\n        p (int): The order of the norm of the difference.\n\n    Returns:\n        numpy.ndarray: 1D array of shape (n_samples,) where each element is the\n        Minkowski distance of the corresponding rows in embs.\n\n    \"\"\"\n    return np.linalg.norm(embs[ix] - embs[iy], ord=p, axis=1)\n\n\ndef pearson_similarity(embs, ix, iy, **kwargs):\n    \"\"\"Calculate the Pearson correlation coefficient between two arrays of vectors.\n\n    Args:\n        embs (numpy.ndarray): 2D array of shape (n_samples, n_features)\n        ix (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the first array\n        iy (numpy.ndarray): 1D array of shape (n_samples,) containing the indices of\n        the second array\n\n    Returns:\n        numpy.ndarray: 1D array of shape (n_samples,) where each element is the Pearson\n        correlation coefficient\n        of the corresponding rows in embs.\n\n    \"\"\"\n    A = embs[ix]\n    B = embs[iy]\n    A_mA = A - np.expand_dims(A.mean(axis=1), -1)\n    B_mB = B - np.expand_dims(B.mean(axis=1), -1)\n    ssA = np.expand_dims((A_mA**2).sum(axis=1), -1)\n    ssB = np.expand_dims((B_mB**2).sum(axis=1), -1)\n    return _inner1d(A_mA, B_mB) / np.sqrt(_inner1d(ssA, ssB))\n\n\nmetrics_caller = {\n    \"cosine_similarity\": cosine_similarity,\n    \"pearson_similarity\": pearson_similarity,\n    \"cosine_distance\": lambda embs, ix, iy, norms, **kwargs: cosine_similarity(\n        embs,\n        ix,\n        iy,\n        norms,\n        return_distance=True,\n    ),\n    \"euclidean_distance\": lambda embs, ix, iy, **kwargs: minkowski_distance(\n        embs,\n        ix,\n        iy,\n        p=2,\n    ),\n    \"euclidean_distance_l2\": euclidean_distance_l2,\n    \"minkowski_distance\": minkowski_distance,\n    \"manhattan_distance\": lambda embs, ix, iy, **kwargs: minkowski_distance(\n        embs,\n        ix,\n        iy,\n        p=1,\n    ),\n    \"chebyshev_distance\": lambda embs, ix, iy, **kwargs: minkowski_distance(\n        embs,\n        ix,\n        iy,\n        p=np.inf,\n    ),\n}\n\nMETRICS_NEED_NORM = [\"cosine_similarity\", \"cosine_distance\", \"euclidean_distance_l2\"]\nMETRICS_NEED_ORDER = [\"minkowski_distance\"]\nDISTANCE_TO_SIMILARITY = {\n    \"cosine_distance\": lambda x: 1 - x,\n    \"euclidean_distance\": lambda x: 1 / (1 + x),\n    \"euclidean_distance_l2\": lambda x: 1 - x,\n    \"minkowski_distance\": lambda x: 1 / (1 + x),\n    \"manhattan_distance\": lambda x: 1 / (1 + x),\n    \"chebyshev_distance\": lambda x: 1 / (1 + x),\n}\n\nREVERSE_DISTANCE_TO_SIMILARITY = {\n    \"cosine_distance\": lambda x: 1 - x,\n    \"euclidean_distance\": lambda x: (1 / x) - 1,\n    \"euclidean_distance_l2\": lambda x: 1 - x,\n    \"minkowski_distance\": lambda x: (1 / x) - 1,\n    \"manhattan_distance\": lambda x: (1 / x) - 1,\n    \"chebyshev_distance\": lambda x: (1 / x) - 1,\n}\n"
  },
  {
    "path": "evalify/utils.py",
    "content": "\"\"\"Evalify utils module contains various utilites serving other modules.\"\"\"\n\nimport numpy as np\nimport psutil\n\nGB_TO_BYTE = 1024**3\n\n\ndef _validate_vectors(X, y):\n    X = np.asarray(X, dtype=np.float32)\n    y = np.asarray(y, dtype=np.int32).squeeze()\n    if X.ndim != 2:\n        msg = \"Embeddings vector should be 2-D.\"\n        raise ValueError(msg)\n    if y.ndim != 1:\n        msg = \"Target vector should be 1-D.\"\n        raise ValueError(msg)\n    return X, y\n\n\ndef _calc_available_memory():\n    \"\"\"Calculate available memory in system.\"\"\"\n    mem = psutil.virtual_memory()\n    return mem[1]\n\n\ndef calculate_best_batch_size(X, available_mem=None):\n    \"\"\"Calculate maximum rows to fetch per batch without going out of memory.\n\n    We need 3 big arrays to be held in memory (A, B, A*B)\n    \"\"\"\n    available_mem = _calc_available_memory() if available_mem is None else available_mem\n    if available_mem > 2 * GB_TO_BYTE:\n        max_total_rows = np.floor(available_mem - GB_TO_BYTE / X[0].nbytes)\n        return max_total_rows // 3\n    max_total_rows = np.floor(available_mem / X[0].nbytes)\n    return max_total_rows // 5\n"
  },
  {
    "path": "examples/LFW.py",
    "content": "\"\"\" File LFW.npz contains sample embeddings and targets from LFW dataset\"\"\"\n\nfrom pathlib import Path\nimport time\nimport numpy as np\n\nfrom evalify import Experiment\n\nlfw_npz = Path(__file__).parent.parent / Path(\"tests/data/LFW.npz\")\nX_y_array = np.load(lfw_npz)\nX = X_y_array[\"X\"][:1000]\ny = X_y_array[\"y\"][:1000]\n\nexperiment = Experiment(\n    metrics=(\n        \"cosine_similarity\",\n        \"pearson_similarity\",\n        \"euclidean_distance_l2\",\n    ),\n    same_class_samples=\"full\",\n    different_class_samples=(\"full\", \"full\"),\n)\nstart_time = time.time()\nprint(\"Starting Experiment\")\nexperiment.run(X, y)\nprint(\n    f\"Total available embeddings {len(y)} resulted in {len(experiment.df)} \"\n    \"samples for the experiment.\"\n)\nprint(f\"Metrics calculations executed in {time.time()-start_time:.2f} seconds\")\nprint(\"ROC AUC:\")\nprint(experiment.roc_auc())\nprint(\"threshold @ FPR:\")\nprint(experiment.threshold_at_fpr(0.01))\nprint(\"EER:\")\nprint(experiment.eer())\nprint(\"TAR@FAR:\")\nprint(experiment.tar_at_far([0.01, 0.001]))\n"
  },
  {
    "path": "mkdocs.yml",
    "content": "site_name: evalify\nrepo_url: https://github.com/ma7555/evalify\nrepo_name: evalify\nnav:\n  - home: index.md\n  - installation: installation.md\n  - usage: usage.md\n  - modules: api.md\n  - contributing: contributing.md\n  - authors: authors.md\n  - history: history.md\ntheme:\n  name: material\n  language: en\n  logo: https://user-images.githubusercontent.com/7144929/154332210-fa1fee34-faae-4567-858a-49fa53e99a2b.svg\n  palette:\n    - media: \"(prefers-color-scheme: light)\"\n      scheme: default\n      toggle:\n        icon: material/weather-night\n        name: Switch to dark mode\n    - media: \"(prefers-color-scheme: dark)\"\n      scheme: slate\n      toggle:\n        icon: material/weather-sunny\n        name: Switch to light mode\n  features:\n    - navigation.indexes\n    - navigation.tabs\n    - navigation.instant\n    - navigation.tabs.sticky\nmarkdown_extensions:\n  - pymdownx.emoji:\n      emoji_index: !!python/name:material.extensions.emoji.twemoji\n      emoji_generator: !!python/name:material.extensions.emoji.to_svg\n  - pymdownx.critic\n  - pymdownx.caret\n  - pymdownx.mark\n  - pymdownx.tilde\n  - pymdownx.tabbed\n  - attr_list\n  - pymdownx.arithmatex:\n      generic: true\n  - pymdownx.highlight:\n      linenums: true\n  - pymdownx.superfences\n  - pymdownx.details\n  - admonition\n  - toc:\n      baselevel: 2\n      permalink: true\n  - meta\nplugins:\n  - include-markdown\n  - search:\n      lang: en\n  - mkdocstrings\nextra:\n  social:\n    - icon: fontawesome/brands/github\n      link: https://github.com/ma7555/evalify\n      name: Github\n    - icon: material/email\n      link: \"mailto:evalify@ma7555.anonaddy.com\"\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[tool.poetry]\nname = \"evalify\"\nversion = \"1.0.0\"\nhomepage = \"https://github.com/ma7555/evalify\"\ndescription = \"Evaluate your face or voice verification models literally in seconds.\"\nauthors = [\"Mahmoud Bahaa <evalify@ma7555.anonaddy.com>\"]\nkeywords = [\"biometric verification\", \"biometric authentication\", \"evaluation\"]\nreadme = \"README.md\"\nlicense = \"BSD-3-Clause\"\nclassifiers = [\n    \"Development Status :: 4 - Beta\",\n    \"Intended Audience :: Developers\",\n    \"License :: OSI Approved :: BSD License\",\n    \"Natural Language :: English\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.9\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n]\n\npackages = [\n    { include = \"evalify\" },\n]\n\n[tool.poetry.dependencies]\npython = \">=3.9,<4.0\"\npandas = \"^2.0.0\"\nnumpy = \"^2.0.0\"\npsutil = \"^5.9.0\"\nscikit-learn = \"^1.2.0\"\n\n# Optional Dependencies\nruff = { version = \">=0.7.2\", optional = true }\npytest = { version = \"^7.2.0\", optional = true }\npytest-cov = { version = \"^4.0.0\", optional = true }\nscipy = { version = \">=1.10.0\", optional = true }\ntox = { version = \"^4.7.0\", optional = true }\nvirtualenv = { version = \">=20.24.0\", optional = true }\npip = { version = \">=23.2.0\", optional = true }\nmkdocs = { version = \">=1.4.0\", optional = true }\nmkdocs-material = { version = \"^9.2.0\", optional = true }\nmkdocstrings = { version = \">=0.26.0\", optional = true }\nmkdocstrings-python = { version = \">=1.12.2\", optional = true }\nmkdocs-include-markdown-plugin = { version = \">=6.0.0\", optional = true }\ntwine = { version = \"^5.0.0\", optional = true }\ntoml = { version = \">0.8.0\", optional = true }\npyreadline3 = { version = \"^3.4.1\", optional = true }\npoetry = { version = \"^1.8.0\", optional = true }\n\n[tool.poetry.extras]\ntest = [\n    \"pytest\",\n    \"ruff\",\n    \"pytest-cov\",\n    \"pyreadline3\",\n    \"scipy\",\n]\n\ndev = [\n    \"tox\",\n    \"virtualenv\",\n    \"pip\",\n    \"twine\",\n    \"toml\",\n    \"poetry\",\n]\n\ndoc = [\n    \"mkdocs\",\n    \"mkdocs-material\",\n    \"mkdocstrings\",\n    \"mkdocstrings-python\",\n    \"mkdocs-include-markdown-plugin\",\n]\n\n[build-system]\nrequires = [\"poetry-core>=1.8.0\"]\nbuild-backend = \"poetry.core.masonry.api\"\n\n[tool.ruff]\nline-length = 88\nindent-width = 4\n\n[tool.ruff.lint]\nselect = [\n    \"E\",  # pycodestyle error\n    \"F\",  # Pyflakes\n    \"I\",  # isort\n]\ndummy-variable-rgx = \"^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$\"\n\n[tool.ruff.format]\nquote-style = \"double\"\n\n[tool.ruff.lint.isort]\nknown-first-party = [\"evalify\"]\n"
  },
  {
    "path": "tests/__init__.py",
    "content": "\"\"\"Unit test package for evalify.\"\"\"\n"
  },
  {
    "path": "tests/test_evalify.py",
    "content": "#!/usr/bin/env python\n\n\"\"\"Tests for `evalify` package.\"\"\"\nimport unittest\n\nimport numpy as np\nfrom scipy.special import comb\n\nfrom evalify import Experiment\nfrom evalify.metrics import metrics_caller\n\n\nclass TestEvalify(unittest.TestCase):\n    \"\"\"Tests for `evalify` package.\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up test fixtures, if any.\"\"\"\n        rng = np.random.default_rng(555)\n        self.nphotos = 500\n        self.emb_size = 8\n        self.nclasses = 10\n        self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)\n        self.targets = rng.integers(self.nclasses, size=self.nphotos)\n\n    def test_run_euclidean_distance(self):\n        \"\"\"Test run with euclidean_distance\"\"\"\n        experiment = Experiment(metrics=\"euclidean_distance\")\n        df = experiment.run(self.embs, self.targets)\n        experiment = Experiment(metrics=\"euclidean_distance_l2\")\n        df_l2 = experiment.run(self.embs, self.targets)\n        self.assertGreater(df.euclidean_distance.max(), 0)\n        self.assertGreater(df_l2.euclidean_distance_l2.max(), 0)\n\n    def test_run_cosine_similarity(self):\n        \"\"\"Test run with cosine_similarity\"\"\"\n        experiment = Experiment(metrics=\"cosine_similarity\")\n        df = experiment.run(self.embs, self.targets)\n        self.assertLessEqual(df.cosine_similarity.max(), 1)\n\n    def test_run_all_metrics_separated(self):\n        for metric in metrics_caller.keys():\n            experiment = Experiment(metrics=metric)\n            df = experiment.run(self.embs, self.targets)\n            self.assertTrue(metric in df.columns)\n\n    def test_run_all_metrics_combined(self):\n        metrics = set(metrics_caller.keys())\n        experiment = Experiment(metrics=metrics)\n        df = experiment.run(self.embs, self.targets)\n        self.assertTrue(metrics.issubset(df.columns))\n\n    def test_run_full_class_samples(self):\n        \"\"\"Test run with return_embeddings\"\"\"\n        experiment = Experiment(\n            same_class_samples=\"full\",\n            different_class_samples=(\"full\", \"full\"),\n        )\n        df = experiment.run(\n            self.embs,\n            self.targets,\n        )\n        self.assertEqual(len(df), comb(self.nphotos, 2))\n\n    def test_run_custom_class_samples(self):\n        \"\"\"Test run with custom same_class_samples and different_class_samples\"\"\"\n        N, M = (2, 5)\n        experiment = Experiment(same_class_samples=2, different_class_samples=(N, M))\n        same_class_samples = 3\n        df = experiment.run(\n            self.embs,\n            self.targets,\n        )\n\n        self.assertLessEqual(\n            len(df),\n            (comb(same_class_samples, 2) * self.nclasses)\n            + (self.nclasses * (self.nclasses - 1)) * M * N,\n        )\n\n    def test_run_shuffle(self):\n        \"\"\"Test run with shuffle\"\"\"\n        experiment = Experiment(seed=555)\n        df1 = experiment.run(self.embs, self.targets, shuffle=True)\n        df2 = experiment.run(self.embs, self.targets, shuffle=True)\n        self.assertEqual(len(df1), len(df2))\n        self.assertEqual(sum(df1.index), sum(df2.index))\n        self.assertTrue(all(ix in df2.index for ix in df1.index))\n\n    def test_run_no_batch_size(self):\n        \"\"\"Test run with no batch_size\"\"\"\n        experiment = Experiment(\n            same_class_samples=2,\n            different_class_samples=(1, 1),\n            seed=555,\n        )\n        experiment.run(self.embs, self.targets, batch_size=None)\n        self.assertTrue(experiment.check_experiment_run())\n\n    def test_run_return_embeddings(self):\n        \"\"\"Test run with return_embeddings\"\"\"\n        experiment = Experiment()\n        df = experiment.run(self.embs, self.targets, return_embeddings=True)\n        self.assertLessEqual(len(df.at[0, \"emb_a\"]), self.emb_size)\n\n    def test_run_evaluate_at_threshold(self):\n        \"\"\"Test run with evaluate_at_threshold\"\"\"\n        metrics = [\"cosine_similarity\", \"euclidean_distance_l2\"]\n        experiment = Experiment(metrics=metrics)\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        evaluations = experiment.evaluate_at_threshold(0.5, \"cosine_similarity\")\n        # self.assertEqual(len(evaluations), len(metrics))\n        self.assertEqual(len(evaluations), 9)\n\n    def test_run_find_optimal_cutoff(self):\n        \"\"\"Test run with find_optimal_cutoff\"\"\"\n        metrics = [\"cosine_similarity\", \"euclidean_distance_l2\"]\n        experiment = Experiment(metrics=metrics)\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        evaluations = experiment.find_optimal_cutoff()\n        self.assertEqual(len(evaluations), len(metrics))\n        self.assertTrue(all(evaluation in metrics for evaluation in evaluations))\n\n    def test_run_get_roc_auc(self):\n        \"\"\"Test run with get_roc_auc\"\"\"\n        metrics = [\"cosine_similarity\", \"euclidean_distance_l2\"]\n        experiment = Experiment(metrics=metrics)\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        roc_auc = experiment.roc_auc()\n        # self.assertEqual(len(evaluations), len(metrics))\n        self.assertEqual(len(roc_auc), len(metrics))\n        self.assertTrue(all(auc in metrics for auc in roc_auc))\n\n    def test_run_predicted_as_similarity(self):\n        \"\"\"Test run with predicted_as_similarity\"\"\"\n        experiment = Experiment(metrics=[\"cosine_similarity\", \"cosine_distance\"])\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        result = experiment.predicted_as_similarity(\"cosine_similarity\")\n        result_2 = experiment.predicted_as_similarity(\"cosine_distance\")\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_run_find_threshold_at_fpr(self):\n        \"\"\"Test run with find_threshold_at_fpr\"\"\"\n        metric = \"cosine_similarity\"\n        experiment = Experiment(\n            metrics=metric,\n            different_class_samples=(\"full\", \"full\"),\n        )\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        fpr_d01 = experiment.threshold_at_fpr(0.1)\n        fpr_d1 = experiment.threshold_at_fpr(1)\n        fpr_d0 = experiment.threshold_at_fpr(0)\n        self.assertEqual(len(fpr_d01[metric]), 3)\n        self.assertAlmostEqual(fpr_d01[metric][\"threshold\"], 0.8939142, 3)\n        self.assertAlmostEqual(fpr_d0[metric][\"threshold\"], 0.9953355, 3)\n        self.assertAlmostEqual(fpr_d1[metric][\"threshold\"], 0.2060538, 3)\n\n    def test_run_calculate_eer(self):\n        \"\"\"Test run with calculate_eer\"\"\"\n        metric = \"cosine_similarity\"\n        experiment = Experiment(\n            metrics=metric,\n            different_class_samples=(\"full\", \"full\"),\n        )\n        experiment.run(\n            self.embs,\n            self.targets,\n        )\n        eer = experiment.eer()\n        self.assertTrue(\"EER\" in eer[metric])\n\n    def test__call__(self):\n        \"\"\"Test run with __call__\"\"\"\n        experiment = Experiment(seed=555)\n        result = experiment.run(self.embs, self.targets)\n        result_2 = experiment(self.embs, self.targets)\n        self.assertTrue(np.array_equal(result.to_numpy(), result_2.to_numpy()))\n\n    def test_run_errors(self):\n        \"\"\"Test run errors\"\"\"\n        with self.assertRaisesRegex(\n            ValueError,\n            \"`same_class_samples` argument must be one of 'full' or an integer \",\n        ):\n            experiment = Experiment(same_class_samples=54.4)\n            experiment.run(self.embs, self.targets)\n\n        with self.assertRaisesRegex(\n            ValueError,\n            \"`different_class_samples` argument must be one of 'full', 'minimal'\",\n        ):\n            experiment = Experiment(different_class_samples=\"all\")\n            experiment.run(self.embs, self.targets)\n\n        with self.assertRaisesRegex(\n            ValueError,\n            \"When passing `different_class_samples` as a tuple or list. \",\n        ):\n            experiment = Experiment(different_class_samples=(1, 2, 3))\n            experiment.run(\n                self.embs,\n                self.targets,\n            )\n\n        with self.assertRaisesRegex(\n            ValueError,\n            '`batch_size` argument must be either \"best\" or of type integer',\n        ):\n            experiment = Experiment()\n            experiment.run(self.embs, self.targets, batch_size=\"all\")\n\n        with self.assertRaisesRegex(ValueError, \"`metric` argument must be one of \"):\n            experiment = Experiment(metrics=\"dot_prod\")\n            experiment.run(self.embs, self.targets)\n\n        with self.assertRaisesRegex(\n            ValueError,\n            \"`p` must be an int and at least 1. Received: p=\",\n        ):\n            experiment = Experiment()\n            experiment.run(self.embs, self.targets, p=0.1)\n\n        with self.assertRaisesRegex(\n            NotImplementedError,\n            \"`evaluate_at_threshold` function can only be run after running \"\n            \"`run_experiment`.\",\n        ):\n            experiment = Experiment()\n            experiment.evaluate_at_threshold(0.5, \"euclidean_distance\")\n\n        with self.assertRaisesRegex(\n            ValueError,\n            \"`evaluate_at_threshold` function can only be called with `metric` from \",\n        ):\n            experiment = Experiment(metrics=\"euclidean_distance\")\n            experiment.run(self.embs, self.targets)\n            experiment.evaluate_at_threshold(0.5, \"cosine_similarity\")\n\n        with self.assertRaisesRegex(\n            ValueError,\n            \"`fpr` must be between 0 and 1. Received wanted_fpr=\",\n        ):\n            experiment = Experiment(metrics=\"euclidean_distance\")\n            experiment.run(self.embs, self.targets)\n            experiment.threshold_at_fpr(-1.1)\n"
  },
  {
    "path": "tests/test_experiment_real_data.py",
    "content": "# tests/test_experiment_real_data_small.py\n\nimport os\nimport pathlib\nimport unittest\nfrom collections import OrderedDict\n\nimport numpy as np\n\nfrom evalify import Experiment\n\n\nclass TestExperimentRealDataSmall(unittest.TestCase):\n    \"\"\"Tests for Experiment class using a subset of the LFW dataset\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up test fixtures.\"\"\"\n        # Path to LFW.npz, assuming it's in the tests/data/ directory\n        self.lfw_npz = os.path.join(pathlib.Path(__file__).parent, \"data\", \"LFW.npz\")\n        if not os.path.exists(self.lfw_npz):\n            self.fail(f\"LFW.npz not found at {self.lfw_npz}\")\n\n        X_y_array = np.load(self.lfw_npz)\n        self.X = X_y_array[\"X\"][:1000]\n        self.y = X_y_array[\"y\"][:1000]\n\n        self.metrics = [\n            \"cosine_similarity\",\n            \"pearson_similarity\",\n            \"euclidean_distance_l2\",\n        ]\n\n        self.experiment = Experiment(\n            metrics=self.metrics,\n            same_class_samples=\"full\",\n            different_class_samples=(\"full\", \"full\"),\n            seed=555,  # To ensure reproducibility\n        )\n\n        # Run the experiment once during setup to reuse the results in multiple tests\n        self.df = self.experiment.run(self.X, self.y)\n\n    def test_number_of_samples(self):\n        \"\"\"Test that the number of generated samples matches the expected count.\"\"\"\n        expected_num_samples = 499500\n        actual_num_samples = len(self.df)\n        self.assertEqual(\n            actual_num_samples,\n            expected_num_samples,\n            f\"Expected {expected_num_samples} samples, got {actual_num_samples}.\",\n        )\n\n    def test_roc_auc(self):\n        \"\"\"Test that ROC AUC values match the expected results.\"\"\"\n        expected_roc_auc = OrderedDict(\n            {\n                \"euclidean_distance_l2\": 0.9998640116393942,\n                \"cosine_similarity\": 0.9998640114481793,\n                \"pearson_similarity\": 0.999858162377461,\n            }\n        )\n\n        actual_roc_auc = self.experiment.roc_auc()\n\n        self.assertEqual(\n            len(actual_roc_auc),\n            len(self.metrics),\n            f\"Expected ROC AUC for {len(self.metrics)} metrics, got \"\n            f\"{len(actual_roc_auc)}.\",\n        )\n\n        for metric in self.metrics:\n            self.assertIn(\n                metric, actual_roc_auc, f\"ROC AUC for metric '{metric}' not found.\"\n            )\n            self.assertAlmostEqual(\n                actual_roc_auc[metric],\n                expected_roc_auc[metric],\n                places=6,\n                msg=f\"ROC AUC for metric '{metric}' does not match.\",\n            )\n\n    def test_threshold_at_fpr(self):\n        \"\"\"Test that thresholds at a specified FPR match expected values.\"\"\"\n        far = 0.01\n        expected_threshold_at_fpr = {\n            \"cosine_similarity\": {\n                \"FPR\": 0.010001841326240518,\n                \"TPR\": 0.9973539973539973,\n                \"threshold\": 0.37717896699905396,\n            },\n            \"pearson_similarity\": {\n                \"FPR\": 0.010001841326240518,\n                \"TPR\": 0.9973539973539973,\n                \"threshold\": 0.37802454829216003,\n            },\n            \"euclidean_distance_l2\": {\n                \"FPR\": 0.010001841326240518,\n                \"TPR\": 0.9973539973539973,\n                \"threshold\": 1.1160835027694702,\n            },\n        }\n\n        actual_threshold_at_fpr = self.experiment.threshold_at_fpr(far)\n\n        self.assertEqual(\n            len(actual_threshold_at_fpr),\n            len(self.metrics),\n            f\"Expected Threshold @ FPR for {len(self.metrics)} metrics, got \"\n            f\"{len(actual_threshold_at_fpr)}.\",\n        )\n\n        for metric in self.metrics:\n            self.assertIn(\n                metric,\n                actual_threshold_at_fpr,\n                f\"Threshold @ FPR for metric '{metric}' not found.\",\n            )\n            expected = expected_threshold_at_fpr[metric]\n            actual = actual_threshold_at_fpr[metric]\n\n            self.assertAlmostEqual(\n                actual[\"FPR\"],\n                expected[\"FPR\"],\n                places=6,\n                msg=f\"FPR for metric '{metric}' does not match.\",\n            )\n            self.assertAlmostEqual(\n                actual[\"TPR\"],\n                expected[\"TPR\"],\n                places=6,\n                msg=f\"TPR for metric '{metric}' does not match.\",\n            )\n            self.assertAlmostEqual(\n                actual[\"threshold\"],\n                expected[\"threshold\"],\n                places=6,\n                msg=f\"Threshold for metric '{metric}' at FAR={far} does not match.\",\n            )\n\n    def test_eer(self):\n        \"\"\"Test that EER values and thresholds match the expected results.\"\"\"\n        expected_eer = OrderedDict(\n            {\n                \"cosine_similarity\": {\n                    \"EER\": 0.004724863226023654,\n                    \"threshold\": 0.4244731664657593,\n                },\n                \"euclidean_distance_l2\": {\n                    \"EER\": 0.004724863226023654,\n                    \"threshold\": 1.0728718042373657,\n                },\n                \"pearson_similarity\": {\n                    \"EER\": 0.004914464785693375,\n                    \"threshold\": 0.4228288531303406,\n                },\n            }\n        )\n\n        actual_eer = self.experiment.eer()\n\n        self.assertEqual(\n            len(actual_eer),\n            len(self.metrics),\n            f\"Expected EER for {len(self.metrics)} metrics, got {len(actual_eer)}.\",\n        )\n\n        for metric in self.metrics:\n            self.assertIn(metric, actual_eer, f\"EER for metric '{metric}' not found.\")\n            expected = expected_eer[metric]\n            actual = actual_eer[metric]\n\n            self.assertAlmostEqual(\n                actual[\"EER\"],\n                expected[\"EER\"],\n                places=6,\n                msg=f\"EER for metric '{metric}' does not match.\",\n            )\n            self.assertAlmostEqual(\n                actual[\"threshold\"],\n                expected[\"threshold\"],\n                places=6,\n                msg=f\"Threshold for EER of metric '{metric}' does not match.\",\n            )\n\n    def test_tar_at_far(self):\n        \"\"\"Test the tar_at_far method with specific FAR values.\"\"\"\n        # Define FAR values to test\n        far_values = [0.01, 0.001]\n\n        # Define expected TAR values based on the recent experiment\n        expected_tar_at_far = OrderedDict(\n            {\n                \"cosine_similarity\": {\n                    0.01: 0.9973539973539973,\n                    0.001: 0.9795879795879796,\n                },\n                \"pearson_similarity\": {\n                    0.01: 0.9973539973539973,\n                    0.001: 0.9793989793989794,\n                },\n                \"euclidean_distance_l2\": {\n                    0.01: 0.9973539973539973,\n                    0.001: 0.9795879795879796,\n                },\n            }\n        )\n\n        # Call tar_at_far with the FAR values\n        actual_tar_at_far = self.experiment.tar_at_far(far_values)\n\n        # Assert the returned TAR@FAR matches expected values\n        self.assertEqual(\n            len(actual_tar_at_far),\n            len(self.metrics),\n            f\"Expected TAR@FAR for {len(self.metrics)} metrics, got \"\n            f\"{len(actual_tar_at_far)}.\",\n        )\n\n        for metric in self.metrics:\n            self.assertIn(\n                metric, actual_tar_at_far, f\"TAR@FAR for metric '{metric}' not found.\"\n            )\n\n            for far in far_values:\n                self.assertIn(\n                    far,\n                    actual_tar_at_far[metric],\n                    f\"TAR@FAR for metric '{metric}' at FAR={far} not found.\",\n                )\n\n                expected_tar = expected_tar_at_far[metric][far]\n                actual_tar = actual_tar_at_far[metric][far]\n\n                self.assertAlmostEqual(\n                    actual_tar,\n                    expected_tar,\n                    places=6,\n                    msg=f\"TAR@FAR for metric '{metric}' at FAR={far} does not match.\",\n                )\n\n\n# if __name__ == '__main__':\n#     unittest.main()\n"
  },
  {
    "path": "tests/test_metrics.py",
    "content": "#!/usr/bin/env python\n\n\"\"\"Tests for `evalify` package.\"\"\"\nimport unittest\n\nimport numpy as np\nfrom scipy.spatial import distance\nfrom scipy.stats import pearsonr\n\nfrom evalify import metrics\n\n\nclass TestMetrics(unittest.TestCase):\n    \"\"\"Tests for `evalify` package.\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up test fixtures, if any.\"\"\"\n        rng = np.random.default_rng(555)\n        self.nphotos = 500\n        self.emb_size = 8\n        self.slice_size = 100\n        self.embs = rng.random((self.nphotos, self.emb_size), dtype=np.float32)\n        self.norms = np.linalg.norm(self.embs, axis=1)\n        self.ix = rng.integers(self.nphotos, size=self.slice_size)\n        self.iy = rng.integers(self.nphotos, size=self.slice_size)\n\n    def test_cosine_similarity(self):\n        \"\"\"Test cosine_similarity\"\"\"\n        result = metrics.cosine_similarity(self.embs, self.ix, self.iy, self.norms)\n        result_2 = 1 - np.array(\n            [\n                distance.cosine(self.embs[ix], self.embs[iy])\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_pearson_similarity(self):\n        \"\"\"Test pearson_similarity\"\"\"\n        result = metrics.pearson_similarity(self.embs, self.ix, self.iy)\n        result_2 = np.array(\n            [\n                pearsonr(self.embs[ix], self.embs[iy])[0]\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_euclidean_distance(self):\n        \"\"\"Test euclidean_distance\"\"\"\n        result = metrics.metrics_caller.get(\"euclidean_distance\")(\n            self.embs,\n            self.ix,\n            self.iy,\n        )\n        result_2 = np.array(\n            [\n                distance.euclidean(self.embs[ix], self.embs[iy])\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_euclidean_distance_l2(self):\n        \"\"\"Test euclidean_distance\"\"\"\n        result = metrics.metrics_caller.get(\"euclidean_distance_l2\")(\n            self.embs,\n            self.ix,\n            self.iy,\n            self.norms,\n        )\n        result_2 = np.array(\n            [\n                distance.euclidean(\n                    self.embs[ix] / np.sqrt(np.sum(self.embs[ix] ** 2)),\n                    self.embs[iy] / np.sqrt(np.sum(self.embs[iy] ** 2)),\n                )\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n\n        self.assertEqual(result.shape, (len(self.ix),))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_minkowski_distance_distance(self):\n        \"\"\"Test euclidean_distance\"\"\"\n        result = metrics.metrics_caller.get(\"minkowski_distance\")(\n            self.embs,\n            self.ix,\n            self.iy,\n            p=3,\n        )\n        result_2 = np.array(\n            [\n                distance.minkowski(self.embs[ix], self.embs[iy], p=3)\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_manhattan_distance_distance(self):\n        \"\"\"Test euclidean_distance\"\"\"\n        result = metrics.metrics_caller.get(\"manhattan_distance\")(\n            self.embs,\n            self.ix,\n            self.iy,\n        )\n        result_2 = np.array(\n            [\n                distance.cityblock(self.embs[ix], self.embs[iy])\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n\n    def test_chebyshev_distance_distance(self):\n        \"\"\"Test euclidean_distance\"\"\"\n        result = metrics.metrics_caller.get(\"chebyshev_distance\")(\n            self.embs,\n            self.ix,\n            self.iy,\n        )\n        result_2 = np.array(\n            [\n                distance.chebyshev(self.embs[ix], self.embs[iy])\n                for (ix, iy) in zip(self.ix, self.iy)\n            ],\n        )\n        self.assertEqual(result.shape, (self.slice_size,))\n        self.assertTrue(np.allclose(result, result_2))\n"
  },
  {
    "path": "tests/test_utils.py",
    "content": "#!/usr/bin/env python\n\n\"\"\"Tests for `evalify` package.\"\"\"\nimport unittest\n\nimport numpy as np\n\nfrom evalify import utils\n\n\nclass TestUtils(unittest.TestCase):\n    \"\"\"Tests for `evalify` package.\"\"\"\n\n    def setUp(self):\n        \"\"\"Set up test fixtures, if any.\"\"\"\n        self.rng = np.random.default_rng(555)\n        self.nphotos = 100\n        self.emb_size = 8\n        self.nclasses = 10\n        self.embs = self.rng.random((self.nphotos, self.emb_size), dtype=np.float32)\n        self.targets = self.rng.integers(self.nclasses, size=self.nphotos)\n\n    def tearDown(self):\n        \"\"\"Tear down test fixtures, if any.\"\"\"\n\n    def test_validate_vectors(self):\n        \"\"\"Test _validate_vectors\"\"\"\n        embs = self.embs.tolist()\n        targets = self.targets.tolist()\n        X, y = utils._validate_vectors(embs, targets)\n        self.assertEqual(X.shape, (self.nphotos, self.emb_size))\n        self.assertEqual(y.shape, (self.nphotos,))\n\n    def test_calculate_best_batch_size(self):\n        \"\"\"Test calculate_best_batch_size\"\"\"\n        batch_size = utils.calculate_best_batch_size(self.embs, 4 * utils.GB_TO_BYTE)\n        self.assertEqual(batch_size, 1420470954)\n\n    def test_run_errors(self):\n        \"\"\"Test run errors\"\"\"\n        with self.assertRaisesRegex(ValueError, \"Embeddings vector should be 2-D.\"):\n            _ = utils._validate_vectors(\n                X=self.rng.random(5), y=self.rng.integers(10, size=5),\n            )\n        with self.assertRaisesRegex(ValueError, \"Target vector should be 1-D.\"):\n            _ = utils._validate_vectors(\n                X=self.rng.random((5, 5)), y=self.rng.integers(10, size=(5, 2)),\n            )\n"
  },
  {
    "path": "tox.ini",
    "content": "[tox]\nisolated_build = true\nenvlist = py39, py310, py311, py312, lint\n\n[gh-actions]\npython =\n    3.12: py312\n    3.11: py311\n    3.10: py310\n    3.9: py39\n\n[testenv:lint]\nallowlist_externals =\n    python\ndeps =\n    .[test, doc, dev]\ncommands =\n    python -m ruff check evalify tests --fix\n    python -m poetry build\n    python -m mkdocs build\n    python -m twine check dist/*\n\n[testenv]\nallowlist_externals = pytest\nsetenv =\n    PYTHONPATH = {toxinidir}\n    PYTHONWARNINGS = ignore\ndeps =\n    .[test]\ncommands =\n    pytest -s --cov=evalify --cov-append --cov-report=xml --cov-report term-missing tests\n"
  }
]