Repository: felixdittrich92/OnnxTR
Branch: main
Commit: b10318c76097
Files: 126
Total size: 480.5 KB

Directory structure:
gitextract_7yglu2_f/

├── .conda/
│   └── meta.yaml
├── .github/
│   ├── CODEOWNERS
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.yml
│   │   ├── config.yml
│   │   └── feature_request.yml
│   ├── dependabot.yml
│   ├── release.yml
│   └── workflows/
│       ├── builds.yml
│       ├── clear_caches.yml
│       ├── demo.yml
│       ├── docker.yml
│       ├── main.yml
│       ├── publish.yml
│       └── style.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── demo/
│   ├── README.md
│   ├── app.py
│   ├── packages.txt
│   └── requirements.txt
├── onnxtr/
│   ├── __init__.py
│   ├── contrib/
│   │   ├── __init__.py
│   │   ├── artefacts.py
│   │   └── base.py
│   ├── file_utils.py
│   ├── io/
│   │   ├── __init__.py
│   │   ├── elements.py
│   │   ├── html.py
│   │   ├── image.py
│   │   ├── pdf.py
│   │   └── reader.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _utils.py
│   │   ├── builder.py
│   │   ├── classification/
│   │   │   ├── __init__.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   └── mobilenet.py
│   │   │   ├── predictor/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base.py
│   │   │   └── zoo.py
│   │   ├── detection/
│   │   │   ├── __init__.py
│   │   │   ├── _utils/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base.py
│   │   │   ├── core.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── differentiable_binarization.py
│   │   │   │   ├── fast.py
│   │   │   │   └── linknet.py
│   │   │   ├── postprocessor/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base.py
│   │   │   ├── predictor/
│   │   │   │   ├── __init__.py
│   │   │   │   └── base.py
│   │   │   └── zoo.py
│   │   ├── engine.py
│   │   ├── factory/
│   │   │   ├── __init__.py
│   │   │   └── hub.py
│   │   ├── predictor/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   └── predictor.py
│   │   ├── preprocessor/
│   │   │   ├── __init__.py
│   │   │   └── base.py
│   │   ├── recognition/
│   │   │   ├── __init__.py
│   │   │   ├── core.py
│   │   │   ├── models/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── crnn.py
│   │   │   │   ├── master.py
│   │   │   │   ├── parseq.py
│   │   │   │   ├── sar.py
│   │   │   │   ├── viptr.py
│   │   │   │   └── vitstr.py
│   │   │   ├── predictor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── _utils.py
│   │   │   │   └── base.py
│   │   │   ├── utils.py
│   │   │   └── zoo.py
│   │   └── zoo.py
│   ├── py.typed
│   ├── transforms/
│   │   ├── __init__.py
│   │   └── base.py
│   └── utils/
│       ├── __init__.py
│       ├── common_types.py
│       ├── data.py
│       ├── fonts.py
│       ├── geometry.py
│       ├── multithreading.py
│       ├── reconstitution.py
│       ├── repr.py
│       ├── visualization.py
│       └── vocabs.py
├── pyproject.toml
├── scripts/
│   ├── convert_to_float16.py
│   ├── evaluate.py
│   ├── latency.py
│   └── quantize.py
├── setup.py
└── tests/
    ├── common/
    │   ├── test_contrib.py
    │   ├── test_core.py
    │   ├── test_engine_cfg.py
    │   ├── test_headers.py
    │   ├── test_io.py
    │   ├── test_io_elements.py
    │   ├── test_models.py
    │   ├── test_models_builder.py
    │   ├── test_models_classification.py
    │   ├── test_models_detection.py
    │   ├── test_models_detection_utils.py
    │   ├── test_models_factory.py
    │   ├── test_models_preprocessor.py
    │   ├── test_models_recognition.py
    │   ├── test_models_recognition_utils.py
    │   ├── test_models_zoo.py
    │   ├── test_transforms.py
    │   ├── test_utils_data.py
    │   ├── test_utils_fonts.py
    │   ├── test_utils_geometry.py
    │   ├── test_utils_multithreading.py
    │   ├── test_utils_reconstitution.py
    │   ├── test_utils_visualization.py
    │   └── test_utils_vocabs.py
    └── conftest.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .conda/meta.yaml
================================================
{% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %}
{% set project = pyproject.get('project') %}
{% set urls = pyproject.get('project', {}).get('urls') %}
{% set version = environ.get('BUILD_VERSION', '0.8.2a0') %}

package:
  name: onnxtr
  version: {{ version }}

source:
  fn: onnxtr-{{ version }}.tar.gz
  url: ../dist/onnxtr-{{ version }}.tar.gz

build:
  script: python setup.py install --single-version-externally-managed --record=record.txt

requirements:
  host:
    - python>=3.10, <3.12
    - setuptools

  run:
    - numpy >=1.16.0, <3.0.0
    - scipy >=1.4.0, <2.0.0
    - pillow >=9.2.0
    - opencv >=4.5.0, <5.0.0
    - pypdfium2-team::pypdfium2_helpers >=4.11.0, <5.0.0
    - pyclipper >=1.2.0, <2.0.0
    - langdetect >=1.0.9, <2.0.0
    - rapidfuzz >=3.0.0, <4.0.0
    - huggingface_hub >=0.20.0, <1.0.0
    - defusedxml >=0.7.0
    - anyascii >=0.3.2
    - tqdm >=4.30.0

test:
  requires:
    - pip
    - onnxruntime

  imports:
    - onnxtr

about:
  home: {{ urls.get('repository') }}
  license: Apache-2.0
  license_file: {{ project.get('license', {}).get('file') }}
  summary: {{ project.get('description') | replace(":", " -")}}
  dev_url: {{ urls.get('repository') }}


================================================
FILE: .github/CODEOWNERS
================================================
*       @felixdittrich92

================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: felixdittrich92
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
polar: # Replace with a single Polar username
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
thanks_dev: # Replace with a single thanks.dev username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: 🐛 Bug report
description: Create a report to help us improve the library
labels: 'type: bug'

body:
- type: markdown
  attributes:
    value: >
      #### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/felixdittrich92/onnxtr/issues).
- type: textarea
  attributes:
    label: Bug description
    description: |
      A clear and concise description of what the bug is.

      Please explain the result you observed and the behavior you were expecting.
    placeholder: |
      A clear and concise description of what the bug is.
  validations:
    required: true

- type: textarea
  attributes:
    label: Code snippet to reproduce the bug
    description: |
      Sample code to reproduce the problem.

      Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability.
    placeholder: |
      ```python
      Sample code to reproduce the problem
      ```
  validations:
    required: true
- type: textarea
  attributes:
    label: Error traceback
    description: |
      The error message you received running the code snippet, with the full traceback.

      Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability.
    placeholder: |
      ```
      The error message you got, with the full traceback.
      ```
  validations:
    required: true
- type: textarea
  attributes:
    label: Environment
    description: |
      Please describe your environment:
      OS:
      Python version:
      Library version:
      Onnxruntime version:
  validations:
    required: true
- type: markdown
  attributes:
    value: >
      Thanks for helping us improve the library!


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
  - name: Usage questions
    url: https://github.com/felixdittrich92/OnnxTR/discussions
    about: Ask questions and discuss with other OnnxTR community members

================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: 🚀 Feature request
description: >
  Submit a proposal/request for a new feature for OnnxTR. Please search for existing issues before creating a new one.
  For non-onnx related features please use the [main repository](https://github.com/mindee/doctr/issues).
labels: 'type: enhancement'

body:
- type: textarea
  attributes:
    label: 🚀 The feature
    description: >
      A clear and concise description of the feature proposal
  validations:
    required: true
- type: textarea
  attributes:
    label: Additional context
    description: >
      Add any other context or screenshots about the feature request.
- type: markdown
  attributes:
    value: >
      Thanks for contributing 🎉


================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
  - package-ecosystem: "pip"
    directory: "/"
    open-pull-requests-limit: 10
    target-branch: "main"
    labels: ["topic: build"]
    schedule:
      interval: weekly
      day: sunday
  - package-ecosystem: "github-actions"
    directory: "/"
    open-pull-requests-limit: 10
    target-branch: "main"
    labels: ["topic: CI/CD"]
    schedule:
      interval: weekly
      day: sunday
    groups:
      github-actions:
        patterns:
          - "*"


================================================
FILE: .github/release.yml
================================================
changelog:
  exclude:
    labels:
      - ignore-for-release
  categories:
    - title: Breaking Changes 🛠
      labels:
        - "type: breaking change"
    # NEW FEATURES
    - title: New Features
      labels:
        - "type: new feature"
    # BUG FIXES
    - title: Bug Fixes
      labels:
        - "type: bug"
    # IMPROVEMENTS
    - title: Improvements
      labels:
        - "type: enhancement"
    # MISC
    - title: Miscellaneous
      labels:
        - "type: misc"


================================================
FILE: .github/workflows/builds.yml
================================================
name: builds

on:
  push:
    branches: main
  pull_request:
    branches: main
  schedule:
    # Runs every 2 weeks on Monday at 03:00 UTC
    - cron: '0 3 * * 1'

jobs:
  build:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest, windows-latest]
        python: ["3.10", "3.11", "3.12", "3.13"]
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865
          python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }}
          architecture: x64
      - name: Cache python modules
        uses: actions/cache@v5
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
      - name: Install package
        run: |
          python -m pip install --upgrade pip
          pip install -e .[cpu-headless,viz] --upgrade
      - name: Import package
        run: python -c "import onnxtr; print(onnxtr.__version__)"

  conda:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: conda-incubator/setup-miniconda@v4
        with:
          auto-update-conda: true
          python-version: "3.10"
          channels: pypdfium2-team,bblanchon,defaults,conda-forge
          channel-priority: strict
      - name: Install dependencies
        shell: bash -el {0}
        run: conda install -y conda-build conda-verify anaconda-client
      - name: Install libEGL
        run: sudo apt-get update && sudo apt-get install -y libegl1
      - name: Build and verify
        shell: bash -el {0}
        run: |
          python setup.py sdist
          mkdir conda-dist
          conda build .conda/ --output-folder conda-dist
          conda-verify conda-dist/linux-64/*conda --ignore=C1115


================================================
FILE: .github/workflows/clear_caches.yml
================================================
name: Clear GitHub runner caches

on:
  workflow_dispatch:
  schedule:
    - cron: '0 0 * * *'  # Runs once a day

jobs:
  clear:
    name: Clear caches
    runs-on: ubuntu-latest
    steps:
    - uses: MyAlbum/purge-cache@v2
      with:
        max-age: 172800 # Caches older than 2 days are deleted


================================================
FILE: .github/workflows/demo.yml
================================================
name: Sync Hugging Face demo

on:
  # Run 'test-demo' on every pull request to the main branch
  pull_request:
    branches: [main]

  # Run 'sync-to-hub' on push when tagging (e.g., 'v*') and on a scheduled cron job
  push:
    tags:
      - 'v*'

  schedule:
    - cron: '0 2 10 * *'  # At 02:00 on day-of-month 10 (every month)

  # Allow manual triggering of the workflow
  workflow_dispatch:

jobs:
  # This job runs on every pull request to main
  test-demo:
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python: ["3.10"]
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Cache python modules
        uses: actions/cache@v5
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('demo/requirements.txt') }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -r demo/requirements.txt --upgrade
      - name: Start Gradio demo
        run: |
          nohup python demo/app.py &
          sleep 10  # Allow some time for the Gradio server to start
      - name: Check demo build
        run: |
          curl --fail http://127.0.0.1:7860/ || exit 1

  # This job only runs when a new version tag is pushed or during the cron job
  sync-to-hub:
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
    needs: test-demo
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.10"
      - name: Install huggingface_hub
        run: pip install huggingface-hub
      - name: Upload folder to Hugging Face
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          python -c "
          from huggingface_hub import HfApi
          api = HfApi(token='${{ secrets.HF_TOKEN }}')
          repo_id = 'Felix92/OnnxTR-OCR'
          api.upload_folder(repo_id=repo_id, repo_type='space', folder_path='demo/')
          api.restart_space(repo_id=repo_id, factory_reboot=True)
          "


================================================
FILE: .github/workflows/docker.yml
================================================
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
#
name: Docker image on ghcr.io

on:
  push:
    tags:
      - 'v*'
  pull_request:
    branches: main
  schedule:
    - cron: '0 2 1 6 *'  # At 02:00 on day-of-month 1 in June (once a year actually)

env:
  REGISTRY: ghcr.io

jobs:
  build-and-push-image:
    runs-on: ubuntu-latest

    strategy:
      fail-fast: false
      matrix:
        image:
          - "ubuntu:24.04"          # Base image for CPU variants
          - "nvidia/cuda:12.6.2-base-ubuntu24.04" # Base image for GPU
        variant:
          - "cpu-headless"           # CPU variant 1
          - "openvino-headless"  # CPU variant 2
          - "gpu-headless"           # GPU variant
        python: [3.10.13]

        # Exclude invalid combinations
        exclude:
          - image: "nvidia/cuda:12.6.2-base-ubuntu24.04"
            variant: "cpu-headless"
          - image: "nvidia/cuda:12.6.2-base-ubuntu24.04"
            variant: "openvino-headless"
          - image: "ubuntu:24.04"
            variant: "gpu-headless"

    permissions:
      contents: read
      packages: write

    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Log in to the Container registry
        uses: docker/login-action@v4
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Sanitize docker tag
        run: |
          # Start with the base prefix
          PREFIX_DOCKER_TAG="OnnxTR-${{ matrix.variant }}-py${{ matrix.python }}"

          # Replace any commas with hyphens (if needed)
          PREFIX_DOCKER_TAG=$(echo "$PREFIX_DOCKER_TAG" | sed 's/,/-/g')

          # Determine suffix based on image
          IMAGE="${{ matrix.image }}"
          case "$IMAGE" in
            "nvidia/cuda:"*)
              SUFFIX=$(echo "$IMAGE" | sed -E 's|.*/cuda:([0-9]+\.[0-9]+\.[0-9]+)-base-(ubuntu[0-9]+\.[0-9]+)|-\2-cuda\1|')
              ;;
            "ubuntu:"*)
              SUFFIX=$(echo "$IMAGE" | sed -E 's|ubuntu:([0-9]+\.[0-9]+)|-ubuntu\1|')
              ;;
            *)
              SUFFIX=""
              ;;
          esac

          # Combine the prefix, suffix, and ensure ending hyphen
          PREFIX_DOCKER_TAG="${PREFIX_DOCKER_TAG}${SUFFIX}-"

          # Export to environment
          echo "PREFIX_DOCKER_TAG=${PREFIX_DOCKER_TAG}" >> $GITHUB_ENV

          # Debugging output
          echo "Final Docker Tag: $PREFIX_DOCKER_TAG"

      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v6
        with:
          images: ${{ env.REGISTRY }}/${{ github.repository }}
          tags: |
            # used only on schedule event
            type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ env.PREFIX_DOCKER_TAG }}
            # used only if a tag following semver is published
            type=semver,pattern={{raw}},prefix=${{ env.PREFIX_DOCKER_TAG }}

      - name: Build Docker image
        id: build
        uses: docker/build-push-action@v7
        with:
          context: .
          build-args: |
            BASE_IMAGE=${{ matrix.image }}
            SYSTEM=${{ matrix.variant }}
            PYTHON_VERSION=${{ matrix.python }}
            ONNXTR_REPO=${{ github.repository }}
            ONNXTR_VERSION=${{ github.sha }}
          push: false  # push only if `import onnxtr` works
          tags: ${{ steps.meta.outputs.tags }}

      - name: Check if `import onnxtr` works
        run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import onnxtr; print(onnxtr.__version__)'

      - name: Push Docker image
        if: ${{ (github.ref == 'refs/heads/main' && github.event_name != 'pull_request') || (startsWith(github.ref, 'refs/tags') && github.event_name == 'push') }}
        uses: docker/build-push-action@v7
        with:
          context: .
          build-args: |
            BASE_IMAGE=${{ matrix.image }}
            SYSTEM=${{ matrix.variant }}
            PYTHON_VERSION=${{ matrix.python }}
            ONNXTR_REPO=${{ github.repository }}
            ONNXTR_VERSION=${{ github.sha }}
          push: true
          tags: ${{ steps.meta.outputs.tags }}

================================================
FILE: .github/workflows/main.yml
================================================
name: tests

on:
  push:
    branches: main
  pull_request:
    branches: main
  schedule:
    # Runs every 2 weeks on Monday at 03:00 UTC
    - cron: '0 3 * * 1'

jobs:
  pytest-common:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python: ["3.10", "3.11", "3.12"]
        backend: ["cpu-headless", "openvino-headless"]
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Cache python modules
        uses: actions/cache@v5
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -e .[${{ matrix.backend }},viz,html,testing] --upgrade
      - name: Run unittests
        run: |
          coverage run -m pytest tests/common/ -rs --memray
          coverage xml -o coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml
      - uses: actions/upload-artifact@v7
        with:
          name: coverage-common-${{ matrix.backend }}-${{ matrix.python }}
          path: ./coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml
          if-no-files-found: error

  codecov-upload:
    runs-on: ubuntu-latest
    needs: [ pytest-common ]
    steps:
      - uses: actions/checkout@v6
      - uses: actions/download-artifact@v8
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v6
        with:
          flags: unittests
          fail_ci_if_error: true
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/publish.yml
================================================
name: publish

on:
  release:
    types: [published]

jobs:
  pypi:
    if: "!github.event.release.prerelease"
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python: ["3.10"]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Cache python modules
        uses: actions/cache@v5
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install setuptools wheel twine --upgrade
      - name: Get release tag
        id: release_tag
        run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
      - name: Build and publish
        env:
          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
          VERSION: ${{ env.VERSION }}
        run: |
          BUILD_VERSION=$VERSION python setup.py sdist bdist_wheel
          twine check dist/*
          twine upload dist/*

  pypi-check:
    needs: pypi
    if: "!github.event.release.prerelease"
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        python: ["3.10"]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Install package
        run: |
          python -m pip install --upgrade pip
          pip install onnxtr[cpu] --upgrade
          python -c "from importlib.metadata import version; print(version('onnxtr'))"

  conda:
    if: "!github.event.release.prerelease"
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: conda-incubator/setup-miniconda@v4
        with:
          auto-update-conda: true
          python-version: "3.10"
          channels: pypdfium2-team,bblanchon,defaults,conda-forge
          channel-priority: strict
      - name: Install dependencies
        shell: bash -el {0}
        run: conda install -y conda-build conda-verify anaconda-client
      - name: Install libEGL
        run: sudo apt-get update && sudo apt-get install -y libegl1
      - name: Get release tag
        id: release_tag
        run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
      - name: Build and publish
        shell: bash -el {0}
        env:
          ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_TOKEN }}
          VERSION: ${{ env.VERSION }}
        run: |
          echo "BUILD_VERSION=${VERSION}" >> $GITHUB_ENV
          python setup.py sdist
          mkdir conda-dist
          conda build .conda/ --output-folder conda-dist
          conda-verify conda-dist/linux-64/*conda --ignore=C1115
          anaconda upload conda-dist/linux-64/*conda

  conda-check:
    if: "!github.event.release.prerelease"
    runs-on: ubuntu-latest
    needs: conda
    steps:
      - uses: conda-incubator/setup-miniconda@v4
        with:
          auto-update-conda: true
          python-version: "3.10"
      - name: Install package
        shell: bash -el {0}
        run: |
          conda config --set channel_priority strict
          conda install -c conda-forge onnxruntime
          conda install -c felix92 -c pypdfium2-team -c bblanchon -c defaults -c conda-forge onnxtr
          python -c "from importlib.metadata import version; print(version('onnxtr'))"


================================================
FILE: .github/workflows/style.yml
================================================
name: style

on:
  push:
    branches: main
  pull_request:
    branches: main

jobs:
  ruff:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python: ["3.10"]
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Run ruff
        run: |
          pip install ruff --upgrade
          ruff --version
          ruff check --diff .

  mypy:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python: ["3.10"]
    steps:
      - uses: actions/checkout@v6
      - name: Set up Python
        uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.python }}
          architecture: x64
      - name: Cache python modules
        uses: actions/cache@v5
        with:
          path: ~/.cache/pip
          key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          pip install -e .[dev] --upgrade
          pip install mypy --upgrade
      - name: Run mypy
        run: |
          mypy --version
          mypy


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# Temp files
onnxtr/version.py
logs/
wandb/
.idea/

# Model files
*.onnx
.qodo

# Profile files
yappi_profile.stats
memray_profile.bin
memray_flamegraph.html


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v6.0.0
    hooks:
      - id: check-ast
      - id: check-yaml
        exclude: .conda
      - id: check-toml
      - id: check-json
      - id: check-added-large-files
        exclude: docs/images/
      - id: end-of-file-fixer
      - id: trailing-whitespace
      - id: debug-statements
      - id: check-merge-conflict
      - id: no-commit-to-branch
        args: ['--branch', 'main']
  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.15.0
    hooks:
      - id: ruff
        args: [ --fix ]
      - id: ruff-format


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
  overall community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or
  advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
  address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
contact@mindee.com.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series
of actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior,  harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
the community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.


================================================
FILE: Dockerfile
================================================
ARG BASE_IMAGE

FROM ${BASE_IMAGE}

ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

ARG SYSTEM
ARG PYTHON_VERSION

RUN apt-get update && apt-get install -y --no-install-recommends \
    # - Other packages
    build-essential \
    pkg-config \
    curl \
    wget \
    software-properties-common \
    unzip \
    git \
    # - Packages to build Python
    tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \
    # - Packages for docTR
    libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* \
fi

# Install Python

RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
    tar -zxf Python-$PYTHON_VERSION.tgz && \
    cd Python-$PYTHON_VERSION && \
    mkdir /opt/python/ && \
    ./configure --prefix=/opt/python && \
    make && \
    make install && \
    cd .. && \
    rm Python-$PYTHON_VERSION.tgz && \
    rm -r Python-$PYTHON_VERSION

ENV PATH=/opt/python/bin:$PATH

# Install OnnxTR
ARG ONNXTR_REPO='felixdittrich92/onnxtr'
ARG ONNXTR_VERSION=main
RUN pip3 install -U pip setuptools wheel && \
    pip3 install "onnxtr[$SYSTEM,html]@git+https://github.com/$ONNXTR_REPO.git@$ONNXTR_VERSION"


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: Makefile
================================================
.PHONY: quality style test  docs-single-version docs
# this target runs checks on all files
quality:
	ruff check .
	mypy onnxtr/

# this target runs checks on all files and potentially modifies some of them
style:
	ruff format .
	ruff check --fix .

# Run tests for the library
test:
	coverage run -m pytest tests/common/ -rs --memray
	coverage report --fail-under=80 --show-missing

# Check that docs can build
docs-single-version:
	sphinx-build docs/source docs/_build -a

# Check that docs can build
docs:
	cd docs && bash build.sh

================================================
FILE: README.md
================================================
<p align="center">
  <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
</p>

[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
![Build Status](https://github.com/felixdittrich92/onnxtr/workflows/builds/badge.svg)
[![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
[![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
[![Socket Badge](https://socket.dev/api/badge/pypi/package/onnxtr/0.8.1?artifact_id=tar-gz)](https://socket.dev/pypi/package/onnxtr/overview/0.8.1/tar-gz)
[![Pypi](https://img.shields.io/badge/pypi-v0.8.1-blue.svg)](https://pypi.org/project/OnnxTR/)
[![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
[![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)
![PyPI - Downloads](https://img.shields.io/pypi/dm/onnxtr)

> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.

**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**

What you can expect from this repository:

- efficient ways to parse textual information (localize and identify each word) from your documents
- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library - no PyTorch or TensorFlow dependencies
- more lightweight package with faster inference latency and less required resources
- 8-Bit quantized models for faster inference on CPU

![OCR_example](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/ocr.png)

## Installation

### Prerequisites

Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.

### Latest release

You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:

**NOTE:**

Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU), CoreML (Apple Silicon).

For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).

- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).

```shell
# standard cpu support
pip install "onnxtr[cpu]"
pip install "onnxtr[cpu-headless]"  # same as cpu but with opencv-headless
# with gpu support
pip install "onnxtr[gpu]"
pip install "onnxtr[gpu-headless]"  # same as gpu but with opencv-headless
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
pip install "onnxtr[openvino]"
pip install "onnxtr[openvino-headless]"  # same as openvino but with opencv-headless
# with HTML support
pip install "onnxtr[html]"
# with support for visualization
pip install "onnxtr[viz]"
# with support for all dependencies
pip install "onnxtr[html, gpu, viz]"
```

**Recommendation:**

If you have:

- a NVIDIA GPU, use one of the `gpu` variants
- an Intel CPU or GPU, use one of the `openvino` variants
- an Apple Silicon Mac, use one of the `cpu` variants (CoreML is auto-detected)
- otherwise, use one of the `cpu` variants

**OpenVINO:**

By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).

### Reading files

Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:

```python
from onnxtr.io import DocumentFile

# PDF
pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
# Image
single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
# Webpage (requires `weasyprint` to be installed)
webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
# Multiple page images
multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
```

### Putting it together

Let's use the default `ocr_predictor` model for an example:

```python
from onnxtr.io import DocumentFile
from onnxtr.models import ocr_predictor, EngineConfig

model = ocr_predictor(
    det_arch="fast_base",  # detection architecture
    reco_arch="vitstr_base",  # recognition architecture
    det_bs=2,  # detection batch size
    reco_bs=512,  # recognition batch size
    # Document related parameters
    assume_straight_pages=True,  # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
    straighten_pages=False,  # set to `True` if the pages should be straightened before final processing (default: False)
    export_as_straight_boxes=False,  # set to `True` if the boxes should be exported as if the pages were straight (default: False)
    # Preprocessing related parameters
    preserve_aspect_ratio=True,  # set to `False` if the aspect ratio should not be preserved (default: True)
    symmetric_pad=True,  # set to `False` to disable symmetric padding (default: True)
    # Additional parameters - meta information
    detect_orientation=False,  # set to `True` if the orientation of the pages should be detected (default: False)
    detect_language=False,  # set to `True` if the language of the pages should be detected (default: False)
    # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
    disable_crop_orientation=False,  # set to `True` if the crop orientation classification should be disabled (default: False)
    disable_page_orientation=False,  # set to `True` if the general page orientation classification should be disabled (default: False)
    # DocumentBuilder specific parameters
    resolve_lines=True,  # whether words should be automatically grouped into lines (default: True)
    resolve_blocks=False,  # whether lines should be automatically grouped into blocks (default: False)
    paragraph_break=0.035,  # relative length of the minimum space separating paragraphs (default: 0.035)
    # OnnxTR specific parameters
    # NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy
    load_in_8_bit=False,  # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False)
    # Advanced engine configuration options
    det_engine_cfg=EngineConfig(),  # detection model engine configuration (default: internal predefined configuration)
    reco_engine_cfg=EngineConfig(),  # recognition model engine configuration (default: internal predefined configuration)
    clf_engine_cfg=EngineConfig(),  # classification (orientation) model engine configuration (default: internal predefined configuration)
)
# PDF
doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
# Analyze
result = model(doc)
# Display the result (requires matplotlib & mplcursors to be installed)
result.show()
```

![Visualization sample](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/doctr_example_script.gif)

Or even rebuild the original document from its predictions:

```python
import matplotlib.pyplot as plt

synthetic_pages = result.synthesize()
plt.imshow(synthetic_pages[0])
plt.axis("off")
plt.show()
```

![Synthesis sample](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/synthesized_sample.png)

The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
To get a better understanding of the document model, check out [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):

You can also export them as a nested dict, more appropriate for JSON format / render it or export as XML (hocr format):

```python
json_output = result.export()  # nested dict
text_output = result.render()  # human-readable text
xml_output = result.export_as_xml()  # hocr format
for output in xml_output:
    xml_bytes_string = output[0]
    xml_element = output[1]
```

<details>
  <summary>Advanced engine configuration options</summary>

You can also define advanced engine configurations for the models / predictors:

```python
from onnxruntime import SessionOptions

from onnxtr.models import ocr_predictor, EngineConfig

general_options = (
    SessionOptions()
)  # For configuartion options see: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions
general_options.enable_cpu_mem_arena = False

# NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
# List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
# [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
providers = [
    ("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})
]  # For available providers see: https://onnxruntime.ai/docs/execution-providers/

engine_config = EngineConfig(session_options=general_options, providers=providers)
# We use the default predictor with the custom engine configuration
# NOTE: You can define differnt engine configurations for detection, recognition and classification depending on your needs
predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config)
```

You can also dynamically configure whether the memory arena should shrink:

```python
from random import random
from onnxruntime import RunOptions, SessionOptions

from onnxtr.models import ocr_predictor, EngineConfig


def arena_shrinkage_handler(run_options: RunOptions) -> RunOptions:
    """
    Shrink the memory arena on 10% of inference runs.
    """
    if random() < 0.1:
        run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0")
    return run_options


engine_config = EngineConfig(run_options_provider=arena_shrinkage_handler)
engine_config.session_options.enable_mem_pattern = False

predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config)
```

</details>

## Loading custom exported models

You can also load docTR custom exported models:
For exporting please take a look at the [doctr documentation](https://mindee.github.io/doctr/using_doctr/using_model_export.html#export-to-onnx).

```python
from onnxtr.models import ocr_predictor, linknet_resnet18, parseq

reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
det_model = linknet_resnet18("path_to_custom_model.onnx")
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
```

## Loading models from HuggingFace Hub

You can also load models from the HuggingFace Hub:

```python
from onnxtr.io import DocumentFile
from onnxtr.models import ocr_predictor, from_hub

img = DocumentFile.from_images(["<image_path>"])
# Load your model from the hub
model = from_hub("onnxtr/my-model")

# Pass it to the predictor
# If your model is a recognition model:
predictor = ocr_predictor(det_arch="db_mobilenet_v3_large", reco_arch=model)

# If your model is a detection model:
predictor = ocr_predictor(det_arch=model, reco_arch="crnn_mobilenet_v3_small")

# Get your predictions
res = predictor(img)
```

HF Hub search: [here](https://huggingface.co/models?search=onnxtr).

Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842)

Or push your own models to the hub:

```python
from onnxtr.models import parseq, push_to_hf_hub, login_to_hub
from onnxtr.utils.vocabs import VOCABS

# Login to the hub
login_to_hub()

# Recogniton model
model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"])
push_to_hf_hub(
    model,
    model_name="onnxtr-parseq-multilingual-v1",
    task="recognition",  # The task for which the model is intended [detection, recognition, classification]
    arch="parseq",  # The name of the model architecture
    override=False,  # Set to `True` if you want to override an existing model / repository
)

# Detection model
model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx")
push_to_hf_hub(model, model_name="onnxtr-linknet-resnet18", task="detection", arch="linknet_resnet18", override=True)
```

## Models architectures

Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:

### Text Detection

- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)

### Text Recognition

- CRNN: [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/pdf/1507.05717.pdf).
- SAR: [Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/pdf/1811.00751.pdf).
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).

```python
predictor = ocr_predictor()
predictor.list_archs()
{
    "detection archs": [
        "db_resnet34",
        "db_resnet50",
        "db_mobilenet_v3_large",
        "linknet_resnet18",
        "linknet_resnet34",
        "linknet_resnet50",
        "fast_tiny",  # No 8-bit support
        "fast_small",  # No 8-bit support
        "fast_base",  # No 8-bit support
    ],
    "recognition archs": [
        "crnn_vgg16_bn",
        "crnn_mobilenet_v3_small",
        "crnn_mobilenet_v3_large",
        "sar_resnet31",
        "master",
        "vitstr_small",
        "vitstr_base",
        "parseqviptr_tiny",  # No 8-bit support
    ],
}
```

### Documentation

This repository is in sync with the [doctr](https://github.com/mindee/doctr) library, which provides a high-level API to perform OCR on documents.
This repository stays up-to-date with the latest features and improvements from the base project.
So we can refer to the [doctr documentation](https://mindee.github.io/doctr/) for more detailed information.

NOTE:

- `pretrained` is the default in OnnxTR, and not available as a parameter.
- docTR specific environment variables (e.g.: DOCTR_CACHE_DIR -> ONNXTR_CACHE_DIR) needs to be replaced with `ONNXTR_` prefix.

### Benchmarks

The CPU benchmarks was measured on a `i7-14700K Intel CPU`.

The GPU benchmarks was measured on a `RTX 4080 Nvidia GPU`.

Benchmarking performed on the FUNSD dataset and CORD dataset.

docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.

The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.

- CPU benchmarks:

|Library                             |FUNSD (199 pages)              |CORD  (900 pages)              |
|------------------------------------|-------------------------------|-------------------------------|
|docTR (CPU) - v0.8.1                | ~1.29s / Page                 | ~0.60s / Page                 |
|**OnnxTR (CPU)** - v0.6.0           | ~0.57s / Page                 | **~0.25s / Page**             |
|**OnnxTR (CPU) 8-bit** - v0.6.0     | **~0.38s / Page**             | **~0.14s / Page**             |
|**OnnxTR (CPU-OpenVINO)** - v0.6.0  | **~0.15s / Page**             | **~0.14s / Page**             |
|EasyOCR (CPU) - v1.7.1              | ~1.96s / Page                 | ~1.75s / Page                 |
|**PyTesseract (CPU)** - v0.3.10     | **~0.50s / Page**             | ~0.52s / Page                 |
|Surya (line) (CPU) - v0.4.4         | ~48.76s / Page                | ~35.49s / Page                |
|PaddleOCR (CPU) - no cls - v2.7.3   | ~1.27s / Page                 | ~0.38s / Page                 |

- GPU benchmarks:

|Library                              |FUNSD (199 pages)              |CORD  (900 pages)              |
|-------------------------------------|-------------------------------|-------------------------------|
|docTR (GPU) - v0.8.1                 | ~0.07s / Page                 | ~0.05s / Page                 |
|**docTR (GPU) float16** - v0.8.1     | **~0.06s / Page**             | **~0.03s / Page**             |
|OnnxTR (GPU) - v0.6.0                | **~0.06s / Page**             | ~0.04s / Page                 |
|**OnnxTR (GPU) float16 - v0.6.0**    | **~0.05s / Page**             | **~0.03s / Page**             |
|EasyOCR (GPU) - v1.7.1               | ~0.31s / Page                 | ~0.19s / Page                 |
|Surya (GPU) float16 - v0.4.4         | ~3.70s / Page                 | ~2.81s / Page                 |
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page                 | **~0.03s / Page**             |

## Citation

If you wish to cite please refer to the base project citation, feel free to use this [BibTeX](http://www.bibtex.org/) reference:

```bibtex
@misc{doctr2021,
    title={docTR: Document Text Recognition},
    author={Mindee},
    year={2021},
    publisher = {GitHub},
    howpublished = {\url{https://github.com/mindee/doctr}}
}
```

```bibtex
@misc{onnxtr2024,
    title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
    author={Felix Dittrich},
    year={2024},
    publisher = {GitHub},
    howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
}
```

## License

Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.


================================================
FILE: demo/README.md
================================================
---
title: OnnxTR OCR
emoji: 🔥
colorFrom: red
colorTo: purple
sdk: gradio
sdk_version: 5.34.2
app_file: app.py
pinned: false
license: apache-2.0
---

Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

## Run the demo locally

```bash
cd demo
pip install -r requirements.txt
python3 app.py
```


================================================
FILE: demo/app.py
================================================
import io
import os
from typing import Any

# NOTE: This is a fix to run the demo on the HuggingFace Zero GPU or CPU spaces
if os.environ.get("SPACES_ZERO_GPU") is not None:
    import spaces
else:

    class spaces:  # noqa: N801
        @staticmethod
        def GPU(func):  # noqa: N802
            def wrapper(*args, **kwargs):
                return func(*args, **kwargs)

            return wrapper


import cv2
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.figure import Figure
from PIL import Image

from onnxtr.io import DocumentFile
from onnxtr.models import EngineConfig, from_hub, ocr_predictor
from onnxtr.models.predictor import OCRPredictor
from onnxtr.utils.visualization import visualize_page

DET_ARCHS: list[str] = [
    "fast_base",
    "fast_small",
    "fast_tiny",
    "db_resnet50",
    "db_resnet34",
    "db_mobilenet_v3_large",
    "linknet_resnet18",
    "linknet_resnet34",
    "linknet_resnet50",
]
RECO_ARCHS: list[str] = [
    "crnn_vgg16_bn",
    "crnn_mobilenet_v3_small",
    "crnn_mobilenet_v3_large",
    "master",
    "sar_resnet31",
    "vitstr_small",
    "vitstr_base",
    "parseq",
    "viptr_tiny",
]

CUSTOM_RECO_ARCHS: list[str] = [
    "Felix92/onnxtr-parseq-multilingual-v1",
]


def load_predictor(
    det_arch: str,
    reco_arch: str,
    use_gpu: bool,
    assume_straight_pages: bool,
    straighten_pages: bool,
    export_as_straight_boxes: bool,
    detect_language: bool,
    load_in_8_bit: bool,
    bin_thresh: float,
    box_thresh: float,
    disable_crop_orientation: bool = False,
    disable_page_orientation: bool = False,
) -> OCRPredictor:
    """Load a predictor from doctr.models

    Args:
    ----
        det_arch: detection architecture
        reco_arch: recognition architecture
        use_gpu: whether to use the GPU or not
        assume_straight_pages: whether to assume straight pages or not
        disable_crop_orientation: whether to disable crop orientation or not
        disable_page_orientation: whether to disable page orientation or not
        straighten_pages: whether to straighten rotated pages or not
        export_as_straight_boxes: whether to export straight boxes
        detect_language: whether to detect the language of the text
        load_in_8_bit: whether to load the image in 8 bit mode
        bin_thresh: binarization threshold for the segmentation map
        box_thresh: minimal objectness score to consider a box

    Returns:
    -------
        instance of OCRPredictor
    """
    engine_cfg = (
        EngineConfig()
        if use_gpu
        else EngineConfig(providers=[("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})])
    )
    predictor = ocr_predictor(
        det_arch=det_arch,
        reco_arch=reco_arch if reco_arch not in CUSTOM_RECO_ARCHS else from_hub(reco_arch),
        assume_straight_pages=assume_straight_pages,
        straighten_pages=straighten_pages,
        detect_language=detect_language,
        load_in_8_bit=load_in_8_bit,
        export_as_straight_boxes=export_as_straight_boxes,
        detect_orientation=not assume_straight_pages,
        disable_crop_orientation=disable_crop_orientation,
        disable_page_orientation=disable_page_orientation,
        det_engine_cfg=engine_cfg,
        reco_engine_cfg=engine_cfg,
        clf_engine_cfg=engine_cfg,
    )
    predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
    predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
    return predictor


def forward_image(predictor: OCRPredictor, image: np.ndarray) -> np.ndarray:
    """Forward an image through the predictor

    Args:
    ----
        predictor: instance of OCRPredictor
        image: image to process

    Returns:
    -------
        segmentation map
    """
    processed_batches = predictor.det_predictor.pre_processor([image])
    out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
    seg_map = out["out_map"]

    return seg_map


def matplotlib_to_pil(fig: Figure | np.ndarray) -> Image.Image:
    """Convert a matplotlib figure to a PIL image

    Args:
    ----
        fig: matplotlib figure or numpy array

    Returns:
    -------
        PIL image
    """
    buf = io.BytesIO()
    if isinstance(fig, Figure):
        fig.savefig(buf)
    else:
        plt.imsave(buf, fig)
    buf.seek(0)
    return Image.open(buf)


@spaces.GPU
def analyze_page(
    uploaded_file: Any,
    page_idx: int,
    det_arch: str,
    reco_arch: str,
    use_gpu: bool,
    assume_straight_pages: bool,
    disable_crop_orientation: bool,
    disable_page_orientation: bool,
    straighten_pages: bool,
    export_as_straight_boxes: bool,
    detect_language: bool,
    load_in_8_bit: bool,
    bin_thresh: float,
    box_thresh: float,
):
    """Analyze a page

    Args:
    ----
        uploaded_file: file to analyze
        page_idx: index of the page to analyze
        det_arch: detection architecture
        reco_arch: recognition architecture
        use_gpu: whether to use the GPU or not
        assume_straight_pages: whether to assume straight pages or not
        disable_crop_orientation: whether to disable crop orientation or not
        disable_page_orientation: whether to disable page orientation or not
        straighten_pages: whether to straighten rotated pages or not
        export_as_straight_boxes: whether to export straight boxes
        detect_language: whether to detect the language of the text
        load_in_8_bit: whether to load the image in 8 bit mode
        bin_thresh: binarization threshold for the segmentation map
        box_thresh: minimal objectness score to consider a box

    Returns:
    -------
        input image, segmentation heatmap, output image, OCR output, synthesized page
    """
    if uploaded_file is None:
        return None, "Please upload a document", None, None, None

    if uploaded_file.name.endswith(".pdf"):
        doc = DocumentFile.from_pdf(uploaded_file)
    else:
        doc = DocumentFile.from_images(uploaded_file)
    try:
        page = doc[page_idx - 1]
    except IndexError:
        page = doc[-1]

    img = page

    predictor = load_predictor(
        det_arch=det_arch,
        reco_arch=reco_arch,
        use_gpu=use_gpu,
        assume_straight_pages=assume_straight_pages,
        straighten_pages=straighten_pages,
        export_as_straight_boxes=export_as_straight_boxes,
        detect_language=detect_language,
        load_in_8_bit=load_in_8_bit,
        bin_thresh=bin_thresh,
        box_thresh=box_thresh,
        disable_crop_orientation=disable_crop_orientation,
        disable_page_orientation=disable_page_orientation,
    )

    seg_map = forward_image(predictor, page)
    seg_map = np.squeeze(seg_map)
    seg_map = cv2.resize(seg_map, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LINEAR)
    seg_heatmap = matplotlib_to_pil(seg_map)

    out = predictor([page])

    page_export = out.pages[0].export()
    fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False)

    out_img = matplotlib_to_pil(fig)

    if assume_straight_pages or (not assume_straight_pages and straighten_pages):
        synthesized_page = out.pages[0].synthesize()
    else:
        synthesized_page = None

    return img, seg_heatmap, out_img, page_export, synthesized_page


with gr.Blocks(fill_height=True) as demo:
    gr.HTML(
        """
        <div style="text-align: center;">
            <p style="display: flex; justify-content: center;">
                <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="15%">
            </p>

            <h1>OnnxTR OCR Demo</h1>

            <p style="display: flex; justify-content: center; gap: 10px;">
                <a href="https://github.com/felixdittrich92/OnnxTR" target="_blank">
                    <img src="https://img.shields.io/badge/GitHub-blue?logo=github" alt="GitHub OnnxTR">
                </a>
                <a href="https://pypi.org/project/onnxtr/" target="_blank">
                    <img src="https://img.shields.io/pypi/v/onnxtr?color=blue" alt="PyPI">
                </a>
            </p>
        </div>
        <h2>To use this interactive demo for OnnxTR:</h2>
        <h3> 1. Upload a document (PDF, JPG, or PNG)</h3>
        <h3> 2. Select the model architectures for text detection and recognition you want to use</h3>
        <h3> 3. Press the "Analyze page" button to process the uploaded document</h3>
        """
    )
    with gr.Row():
        with gr.Column(scale=1):
            upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=[".pdf", ".jpg", ".png"])
            page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
            det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
            reco_model = gr.Dropdown(
                choices=RECO_ARCHS + CUSTOM_RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model"
            )
            use_gpu = gr.Checkbox(value=True, label="Use GPU")
            assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
            disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
            disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
            straighten = gr.Checkbox(value=False, label="Straighten pages")
            export_as_straight_boxes = gr.Checkbox(value=False, label="Export as straight boxes")
            det_language = gr.Checkbox(value=False, label="Detect language")
            load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
            binarization_threshold = gr.Slider(
                minimum=0.1, maximum=0.9, value=0.3, step=0.1, label="Binarization threshold"
            )
            box_threshold = gr.Slider(minimum=0.1, maximum=0.9, value=0.1, step=0.1, label="Box threshold")
            analyze_button = gr.Button("Analyze page")
        with gr.Column(scale=3):
            with gr.Row():
                input_image = gr.Image(label="Input page", width=700, height=500)
                segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=700, height=500)
                output_image = gr.Image(label="Output page", width=700, height=500)
            with gr.Row():
                with gr.Column(scale=3):
                    ocr_output = gr.JSON(label="OCR output", render=True, scale=1, height=500)
                with gr.Column(scale=3):
                    synthesized_page = gr.Image(label="Synthesized page", width=700, height=500)

    analyze_button.click(
        analyze_page,
        inputs=[
            upload,
            page_selection,
            det_model,
            reco_model,
            use_gpu,
            assume_straight,
            disable_crop_orientation,
            disable_page_orientation,
            straighten,
            export_as_straight_boxes,
            det_language,
            load_in_8_bit,
            binarization_threshold,
            box_threshold,
        ],
        outputs=[input_image, segmentation_heatmap, output_image, ocr_output, synthesized_page],
    )

demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])


================================================
FILE: demo/packages.txt
================================================
python3-opencv
fonts-freefont-ttf


================================================
FILE: demo/requirements.txt
================================================
-e "onnxtr[gpu-headless,viz] @ git+https://github.com/felixdittrich92/OnnxTR.git"
gradio>=5.30.0,<7.0.0
spaces>=0.37.0

# Quick fix to avoid HuggingFace Spaces cudnn9.x Cuda12.x issue
# NOTE: outdated
# onnxruntime-gpu==1.19.0


================================================
FILE: onnxtr/__init__.py
================================================
from . import io, models, contrib, transforms, utils
from .version import __version__  # noqa: F401


================================================
FILE: onnxtr/contrib/__init__.py
================================================
from .artefacts import ArtefactDetector

================================================
FILE: onnxtr/contrib/artefacts.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import cv2
import numpy as np

from onnxtr.file_utils import requires_package

from .base import _BasePredictor

__all__ = ["ArtefactDetector"]

default_cfgs: dict[str, dict[str, Any]] = {
    "yolov8_artefact": {
        "input_shape": (3, 1024, 1024),
        "labels": ["bar_code", "qr_code", "logo", "photo"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/yolo_artefact-f9d66f14.onnx",
    },
}


class ArtefactDetector(_BasePredictor):
    """
    A class to detect artefacts in images

    >>> from onnxtr.io import DocumentFile
    >>> from onnxtr.contrib.artefacts import ArtefactDetector
    >>> doc = DocumentFile.from_images(["path/to/image.jpg"])
    >>> detector = ArtefactDetector()
    >>> results = detector(doc)

    Args:
        arch: the architecture to use
        batch_size: the batch size to use
        model_path: the path to the model to use
        labels: the labels to use
        input_shape: the input shape to use
        mask_labels: the mask labels to use
        conf_threshold: the confidence threshold to use
        iou_threshold: the intersection over union threshold to use
        **kwargs: additional arguments to be passed to `download_from_url`
    """

    def __init__(
        self,
        arch: str = "yolov8_artefact",
        batch_size: int = 2,
        model_path: str | None = None,
        labels: list[str] | None = None,
        input_shape: tuple[int, int, int] | None = None,
        conf_threshold: float = 0.5,
        iou_threshold: float = 0.5,
        **kwargs: Any,
    ) -> None:
        super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
        self.labels = labels or default_cfgs[arch]["labels"]
        self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
        self.conf_threshold = conf_threshold
        self.iou_threshold = iou_threshold

    def preprocess(self, img: np.ndarray) -> np.ndarray:
        return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)

    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
        results = []

        for batch in zip(output, input_images):
            for out, img in zip(batch[0], batch[1]):
                org_height, org_width = img.shape[:2]
                width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
                for res in out:
                    sample_results = []
                    for row in np.transpose(np.squeeze(res)):
                        classes_scores = row[4:]
                        max_score = np.amax(classes_scores)
                        if max_score >= self.conf_threshold:
                            class_id = np.argmax(classes_scores)
                            x, y, w, h = row[0], row[1], row[2], row[3]
                            # to rescaled xmin, ymin, xmax, ymax
                            xmin = int((x - w / 2) * width_scale)
                            ymin = int((y - h / 2) * height_scale)
                            xmax = int((x + w / 2) * width_scale)
                            ymax = int((y + h / 2) * height_scale)

                            sample_results.append({
                                "label": self.labels[class_id],
                                "confidence": float(max_score),
                                "box": [xmin, ymin, xmax, ymax],
                            })

                    # Filter out overlapping boxes
                    boxes = [res["box"] for res in sample_results]
                    scores = [res["confidence"] for res in sample_results]
                    keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold)  # type: ignore[arg-type]
                    sample_results = [sample_results[i] for i in keep_indices]

                    results.append(sample_results)

        self._results = results
        return results

    def show(self, **kwargs: Any) -> None:
        """
        Display the results

        Args:
            **kwargs: additional keyword arguments to be passed to `plt.show`
        """
        requires_package("matplotlib", "`.show()` requires matplotlib installed")
        import matplotlib.pyplot as plt
        from matplotlib.patches import Rectangle

        # visualize the results with matplotlib
        if self._results and self._inputs:
            for img, res in zip(self._inputs, self._results):
                plt.figure(figsize=(10, 10))
                plt.imshow(img)
                for obj in res:
                    xmin, ymin, xmax, ymax = obj["box"]
                    label = obj["label"]
                    plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
                    plt.gca().add_patch(
                        Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
                    )
                plt.show(**kwargs)


================================================
FILE: onnxtr/contrib/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np
import onnxruntime as ort

from onnxtr.utils.data import download_from_url


class _BasePredictor:
    """
    Base class for all predictors

    Args:
        batch_size: the batch size to use
        url: the url to use to download a model if needed
        model_path: the path to the model to use
        **kwargs: additional arguments to be passed to `download_from_url`
    """

    def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
        self.batch_size = batch_size
        self.session = self._init_model(url, model_path, **kwargs)

        self._inputs: list[np.ndarray] = []
        self._results: list[Any] = []

    def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
        """
        Download the model from the given url if needed

        Args:
            url: the url to use
            model_path: the path to the model to use
            **kwargs: additional arguments to be passed to `download_from_url`

        Returns:
            Any: the ONNX loaded model
        """
        if not url and not model_path:
            raise ValueError("You must provide either a url or a model_path")
        onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs))  # type: ignore[arg-type]
        return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])

    def preprocess(self, img: np.ndarray) -> np.ndarray:
        """
        Preprocess the input image

        Args:
            img: the input image to preprocess

        Returns:
            np.ndarray: the preprocessed image
        """
        raise NotImplementedError

    def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
        """
        Postprocess the model output

        Args:
            output: the model output to postprocess
            input_images: the input images used to generate the output

        Returns:
            Any: the postprocessed output
        """
        raise NotImplementedError

    def __call__(self, inputs: list[np.ndarray]) -> Any:
        """
        Call the model on the given inputs

        Args:
            inputs: the inputs to use

        Returns:
            Any: the postprocessed output
        """
        self._inputs = inputs
        model_inputs = self.session.get_inputs()

        batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
        processed_batches = [
            np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
        ]

        outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
        return self.postprocess(outputs, batched_inputs)


================================================
FILE: onnxtr/file_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import importlib.metadata
import logging

__all__ = ["requires_package"]

ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})


def requires_package(name: str, extra_message: str | None = None) -> None:  # pragma: no cover
    """
    package requirement helper

    Args:
        name: name of the package
        extra_message: additional message to display if the package is not found
    """
    try:
        _pkg_version = importlib.metadata.version(name)
        logging.info(f"{name} version {_pkg_version} available.")
    except importlib.metadata.PackageNotFoundError:
        raise ImportError(
            f"\n\n{extra_message if extra_message is not None else ''} "
            f"\nPlease install it with the following command: pip install {name}\n"
        )


================================================
FILE: onnxtr/io/__init__.py
================================================
from .elements import *
from .html import *
from .image import *
from .pdf import *
from .reader import *


================================================
FILE: onnxtr/io/elements.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

from defusedxml import defuse_stdlib

defuse_stdlib()
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import Element as ETElement
from xml.etree.ElementTree import SubElement

import numpy as np

import onnxtr
from onnxtr.file_utils import requires_package
from onnxtr.utils.common_types import BoundingBox
from onnxtr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox
from onnxtr.utils.reconstitution import synthesize_page
from onnxtr.utils.repr import NestedObject

try:  # optional dependency for visualization
    from onnxtr.utils.visualization import visualize_page
except ModuleNotFoundError:  # pragma: no cover
    pass

__all__ = ["Element", "Word", "Artefact", "Line", "Block", "Page", "Document"]


class Element(NestedObject):
    """Implements an abstract document element with exporting and text rendering capabilities"""

    _children_names: list[str] = []
    _exported_keys: list[str] = []

    def __init__(self, **kwargs: Any) -> None:
        for k, v in kwargs.items():
            if k in self._children_names:
                setattr(self, k, v)
            else:
                raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")

    def export(self) -> dict[str, Any]:
        """Exports the object into a nested dict format"""
        export_dict = {k: getattr(self, k) for k in self._exported_keys}
        for children_name in self._children_names:
            export_dict[children_name] = [c.export() for c in getattr(self, children_name)]

        return export_dict

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        raise NotImplementedError

    def render(self) -> str:
        raise NotImplementedError


class Word(Element):
    """Implements a word element

    Args:
        value: the text string of the word
        confidence: the confidence associated with the text prediction
        geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
        the page's size
        objectness_score: the objectness score of the detection
        crop_orientation: the general orientation of the crop in degrees and its confidence
    """

    _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
    _children_names: list[str] = []

    def __init__(
        self,
        value: str,
        confidence: float,
        geometry: BoundingBox | np.ndarray,
        objectness_score: float,
        crop_orientation: dict[str, Any],
    ) -> None:
        super().__init__()
        self.value = value
        self.confidence = confidence
        self.geometry = geometry
        self.objectness_score = objectness_score
        self.crop_orientation = crop_orientation

    def render(self) -> str:
        """Renders the full text of the element"""
        return self.value

    def extra_repr(self) -> str:
        return f"value='{self.value}', confidence={self.confidence:.2}"

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        return cls(**kwargs)


class Artefact(Element):
    """Implements a non-textual element

    Args:
        artefact_type: the type of artefact
        confidence: the confidence of the type prediction
        geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
            the page's size.
    """

    _exported_keys: list[str] = ["geometry", "type", "confidence"]
    _children_names: list[str] = []

    def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
        super().__init__()
        self.geometry = geometry
        self.type = artefact_type
        self.confidence = confidence

    def render(self) -> str:
        """Renders the full text of the element"""
        return f"[{self.type.upper()}]"

    def extra_repr(self) -> str:
        return f"type='{self.type}', confidence={self.confidence:.2}"

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        return cls(**kwargs)


class Line(Element):
    """Implements a line element as a collection of words

    Args:
        words: list of word elements
        geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
            the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
            all words in it.
    """

    _exported_keys: list[str] = ["geometry", "objectness_score"]
    _children_names: list[str] = ["words"]
    words: list[Word] = []

    def __init__(
        self,
        words: list[Word],
        geometry: BoundingBox | np.ndarray | None = None,
        objectness_score: float | None = None,
    ) -> None:
        # Compute the objectness score of the line
        if objectness_score is None:
            objectness_score = float(np.mean([w.objectness_score for w in words]))
        # Resolve the geometry using the smallest enclosing bounding box
        if geometry is None:
            # Check whether this is a rotated or straight box
            box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
            geometry = box_resolution_fn([w.geometry for w in words])  # type: ignore[misc]

        super().__init__(words=words)
        self.geometry = geometry
        self.objectness_score = objectness_score

    def render(self) -> str:
        """Renders the full text of the element"""
        return " ".join(w.render() for w in self.words)

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        kwargs.update({
            "words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
        })
        return cls(**kwargs)


class Block(Element):
    """Implements a block element as a collection of lines and artefacts

    Args:
        lines: list of line elements
        artefacts: list of artefacts
        geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
            the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
            all lines and artefacts in it.
    """

    _exported_keys: list[str] = ["geometry", "objectness_score"]
    _children_names: list[str] = ["lines", "artefacts"]
    lines: list[Line] = []
    artefacts: list[Artefact] = []

    def __init__(
        self,
        lines: list[Line] = [],
        artefacts: list[Artefact] = [],
        geometry: BoundingBox | np.ndarray | None = None,
        objectness_score: float | None = None,
    ) -> None:
        # Compute the objectness score of the line
        if objectness_score is None:
            objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words]))
        # Resolve the geometry using the smallest enclosing bounding box
        if geometry is None:
            line_boxes = [word.geometry for line in lines for word in line.words]
            artefact_boxes = [artefact.geometry for artefact in artefacts]
            box_resolution_fn = (
                resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
            )
            geometry = box_resolution_fn(line_boxes + artefact_boxes)  # type: ignore

        super().__init__(lines=lines, artefacts=artefacts)
        self.geometry = geometry
        self.objectness_score = objectness_score

    def render(self, line_break: str = "\n") -> str:
        """Renders the full text of the element"""
        return line_break.join(line.render() for line in self.lines)

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        kwargs.update({
            "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
            "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
        })
        return cls(**kwargs)


class Page(Element):
    """Implements a page element as a collection of blocks

    Args:
        page: image encoded as a numpy array in uint8
        blocks: list of block elements
        page_idx: the index of the page in the input raw document
        dimensions: the page size in pixels in format (height, width)
        orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
        language: a dictionary with the language value and confidence of the prediction
    """

    _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
    _children_names: list[str] = ["blocks"]
    blocks: list[Block] = []

    def __init__(
        self,
        page: np.ndarray,
        blocks: list[Block],
        page_idx: int,
        dimensions: tuple[int, int],
        orientation: dict[str, Any] | None = None,
        language: dict[str, Any] | None = None,
    ) -> None:
        super().__init__(blocks=blocks)
        self.page = page
        self.page_idx = page_idx
        self.dimensions = dimensions
        self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
        self.language = language if isinstance(language, dict) else dict(value=None, confidence=None)

    def render(self, block_break: str = "\n\n") -> str:
        """Renders the full text of the element"""
        return block_break.join(b.render() for b in self.blocks)

    def extra_repr(self) -> str:
        return f"dimensions={self.dimensions}"

    def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
        """Overlay the result on a given image

        Args:
            interactive: whether the display should be interactive
            preserve_aspect_ratio: pass True if you passed True to the predictor
            **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
        """
        requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
        requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
        import matplotlib.pyplot as plt

        visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
        plt.show(**kwargs)

    def synthesize(self, **kwargs) -> np.ndarray:
        """Synthesize the page from the predictions

        Args:
            **kwargs: keyword arguments passed to the `synthesize_page` method

        Returns
            synthesized page
        """
        return synthesize_page(self.export(), **kwargs)

    def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
        """Export the page as XML (hOCR-format)
        convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md

        Args:
            file_title: the title of the XML file

        Returns:
            a tuple of the XML byte string, and its ElementTree
        """
        p_idx = self.page_idx
        block_count: int = 1
        line_count: int = 1
        word_count: int = 1
        height, width = self.dimensions
        language = self.language if "language" in self.language.keys() else "en"
        # Create the XML root element
        page_hocr = ETElement("html", attrib={"xmlns": "http://www.w3.org/1999/xhtml", "xml:lang": str(language)})
        # Create the header / SubElements of the root element
        head = SubElement(page_hocr, "head")
        SubElement(head, "title").text = file_title
        SubElement(head, "meta", attrib={"http-equiv": "Content-Type", "content": "text/html; charset=utf-8"})
        SubElement(
            head,
            "meta",
            attrib={"name": "ocr-system", "content": f"onnxtr {onnxtr.__version__}"},  # type: ignore[attr-defined]
        )
        SubElement(
            head,
            "meta",
            attrib={"name": "ocr-capabilities", "content": "ocr_page ocr_carea ocr_par ocr_line ocrx_word"},
        )
        # Create the body
        body = SubElement(page_hocr, "body")
        page_div = SubElement(
            body,
            "div",
            attrib={
                "class": "ocr_page",
                "id": f"page_{p_idx + 1}",
                "title": f"image; bbox 0 0 {width} {height}; ppageno 0",
            },
        )
        # iterate over the blocks / lines / words and create the XML elements in body line by line with the attributes
        for block in self.blocks:
            if len(block.geometry) != 2:
                raise TypeError("XML export is only available for straight bounding boxes for now.")
            (xmin, ymin), (xmax, ymax) = block.geometry
            block_div = SubElement(
                page_div,
                "div",
                attrib={
                    "class": "ocr_carea",
                    "id": f"block_{block_count}",
                    "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
                    {int(round(xmax * width))} {int(round(ymax * height))}",
                },
            )
            paragraph = SubElement(
                block_div,
                "p",
                attrib={
                    "class": "ocr_par",
                    "id": f"par_{block_count}",
                    "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
                    {int(round(xmax * width))} {int(round(ymax * height))}",
                },
            )
            block_count += 1
            for line in block.lines:
                (xmin, ymin), (xmax, ymax) = line.geometry
                # NOTE: baseline, x_size, x_descenders, x_ascenders is currently initalized to 0
                line_span = SubElement(
                    paragraph,
                    "span",
                    attrib={
                        "class": "ocr_line",
                        "id": f"line_{line_count}",
                        "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
                        {int(round(xmax * width))} {int(round(ymax * height))}; \
                        baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0",
                    },
                )
                line_count += 1
                for word in line.words:
                    (xmin, ymin), (xmax, ymax) = word.geometry
                    conf = word.confidence
                    word_div = SubElement(
                        line_span,
                        "span",
                        attrib={
                            "class": "ocrx_word",
                            "id": f"word_{word_count}",
                            "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
                            {int(round(xmax * width))} {int(round(ymax * height))}; \
                            x_wconf {int(round(conf * 100))}",
                        },
                    )
                    # set the text
                    word_div.text = word.value
                    word_count += 1

        return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
        return cls(**kwargs)


class Document(Element):
    """Implements a document element as a collection of pages

    Args:
        pages: list of page elements
    """

    _children_names: list[str] = ["pages"]
    pages: list[Page] = []

    def __init__(
        self,
        pages: list[Page],
    ) -> None:
        super().__init__(pages=pages)

    def render(self, page_break: str = "\n\n\n\n") -> str:
        """Renders the full text of the element"""
        return page_break.join(p.render() for p in self.pages)

    def show(self, **kwargs) -> None:
        """Overlay the result on a given image"""
        for result in self.pages:
            result.show(**kwargs)

    def synthesize(self, **kwargs) -> list[np.ndarray]:
        """Synthesize all pages from their predictions

        Args:
            **kwargs: keyword arguments passed to the `Page.synthesize` method

        Returns:
            list of synthesized pages
        """
        return [page.synthesize(**kwargs) for page in self.pages]

    def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
        """Export the document as XML (hOCR-format)

        Args:
            **kwargs: additional keyword arguments passed to the Page.export_as_xml method

        Returns:
            list of tuple of (bytes, ElementTree)
        """
        return [page.export_as_xml(**kwargs) for page in self.pages]

    @classmethod
    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
        kwargs = {k: save_dict[k] for k in cls._exported_keys}
        kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
        return cls(**kwargs)


================================================
FILE: onnxtr/io/html.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

__all__ = ["read_html"]


def read_html(url: str, **kwargs: Any) -> bytes:
    """Read a PDF file and convert it into an image in numpy format

    >>> from onnxtr.io import read_html
    >>> doc = read_html("https://www.yoursite.com")

    Args:
        url: URL of the target web page
        **kwargs: keyword arguments from `weasyprint.HTML`

    Returns:
        decoded PDF file as a bytes stream
    """
    from weasyprint import HTML

    return HTML(url, **kwargs).write_pdf()


================================================
FILE: onnxtr/io/image.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from pathlib import Path

import cv2
import numpy as np

from onnxtr.utils.common_types import AbstractFile

__all__ = ["read_img_as_numpy"]


def read_img_as_numpy(
    file: AbstractFile,
    output_size: tuple[int, int] | None = None,
    rgb_output: bool = True,
) -> np.ndarray:
    """Read an image file into numpy format

    >>> from onnxtr.io import read_img_as_numpy
    >>> page = read_img_as_numpy("path/to/your/doc.jpg")

    Args:
        file: the path to the image file
        output_size: the expected output size of each page in format H x W
        rgb_output: whether the output ndarray channel order should be RGB instead of BGR.

    Returns:
        the page decoded as numpy ndarray of shape H x W x 3
    """
    if isinstance(file, (str, Path)):
        if not Path(file).is_file():
            raise FileNotFoundError(f"unable to access {file}")
        img = cv2.imread(str(file), cv2.IMREAD_COLOR)
    elif isinstance(file, bytes):
        _file: np.ndarray = np.frombuffer(file, np.uint8)
        img = cv2.imdecode(_file, cv2.IMREAD_COLOR)
    else:
        raise TypeError("unsupported object type for argument 'file'")

    # Validity check
    if img is None:
        raise ValueError("unable to read file.")
    # Resizing
    if isinstance(output_size, tuple):
        img = cv2.resize(img, output_size[::-1], interpolation=cv2.INTER_LINEAR)
    # Switch the channel order
    if rgb_output:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img


================================================
FILE: onnxtr/io/pdf.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np
import pypdfium2 as pdfium

from onnxtr.utils.common_types import AbstractFile

__all__ = ["read_pdf"]


def read_pdf(
    file: AbstractFile,
    scale: int = 2,
    rgb_mode: bool = True,
    password: str | None = None,
    **kwargs: Any,
) -> list[np.ndarray]:
    """Read a PDF file and convert it into an image in numpy format

    >>> from onnxtr.io import read_pdf
    >>> doc = read_pdf("path/to/your/doc.pdf")

    Args:
        file: the path to the PDF file
        scale: rendering scale (1 corresponds to 72dpi)
        rgb_mode: if True, the output will be RGB, otherwise BGR
        password: a password to unlock the document, if encrypted
        **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`

    Returns:
        the list of pages decoded as numpy ndarray of shape H x W x C
    """
    # Rasterise pages to numpy ndarrays with pypdfium2
    pdf = pdfium.PdfDocument(file, password=password)
    try:
        return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
    finally:
        pdf.close()


================================================
FILE: onnxtr/io/reader.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from collections.abc import Sequence
from pathlib import Path

import numpy as np

from onnxtr.file_utils import requires_package
from onnxtr.utils.common_types import AbstractFile

from .html import read_html
from .image import read_img_as_numpy
from .pdf import read_pdf

__all__ = ["DocumentFile"]


class DocumentFile:
    """Read a document from multiple extensions"""

    @classmethod
    def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]:
        """Read a PDF file

        >>> from onnxtr.io import DocumentFile
        >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")

        Args:
            file: the path to the PDF file or a binary stream
            **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`

        Returns:
            the list of pages decoded as numpy ndarray of shape H x W x 3
        """
        return read_pdf(file, **kwargs)

    @classmethod
    def from_url(cls, url: str, **kwargs) -> list[np.ndarray]:
        """Interpret a web page as a PDF document

        >>> from onnxtr.io import DocumentFile
        >>> doc = DocumentFile.from_url("https://www.yoursite.com")

        Args:
            url: the URL of the target web page
            **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`

        Returns:
            the list of pages decoded as numpy ndarray of shape H x W x 3
        """
        requires_package(
            "weasyprint",
            "`.from_url` requires weasyprint installed.\n"
            + "Installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#installation",
        )
        pdf_stream = read_html(url)
        return cls.from_pdf(pdf_stream, **kwargs)

    @classmethod
    def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]:
        """Read an image file (or a collection of image files) and convert it into an image in numpy format

        >>> from onnxtr.io import DocumentFile
        >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])

        Args:
            files: the path to the image file or a binary stream, or a collection of those
            **kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy`

        Returns:
            the list of pages decoded as numpy ndarray of shape H x W x 3
        """
        if isinstance(files, (str, Path, bytes)):
            files = [files]

        return [read_img_as_numpy(file, **kwargs) for file in files]


================================================
FILE: onnxtr/models/__init__.py
================================================
from .engine import EngineConfig
from .classification import *
from .detection import *
from .recognition import *
from .zoo import *
from .factory import *


================================================
FILE: onnxtr/models/_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from math import floor
from statistics import median_low

import cv2
import numpy as np
from langdetect import LangDetectException, detect_langs

from onnxtr.utils.geometry import rotate_image

__all__ = ["estimate_orientation", "get_language"]


def get_max_width_length_ratio(contour: np.ndarray) -> float:
    """Get the maximum shape ratio of a contour.

    Args:
        contour: the contour from cv2.findContour

    Returns:
        the maximum shape ratio
    """
    _, (w, h), _ = cv2.minAreaRect(contour)
    if w == 0 or h == 0:
        return 0.0
    return max(w / h, h / w)


def estimate_orientation(
    img: np.ndarray,
    general_page_orientation: tuple[int, float] | None = None,
    n_ct: int = 70,
    ratio_threshold_for_lines: float = 3,
    min_confidence: float = 0.2,
    lower_area: int = 100,
) -> int:
    """Estimate the angle of the general document orientation based on the
     lines of the document and the assumption that they should be horizontal.

    Args:
        img: the img or bitmap to analyze (H, W, C)
        general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
            estimated by a model
        n_ct: the number of contours used for the orientation estimation
        ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
        min_confidence: the minimum confidence to consider the general_page_orientation
        lower_area: the minimum area of a contour to be considered

    Returns:
        the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
    """
    assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"

    # Convert image to grayscale if necessary
    if img.shape[-1] == 3:
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        gray_img = cv2.medianBlur(gray_img, 5)
        thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    else:
        thresh = img.astype(np.uint8)

    page_orientation, orientation_confidence = general_page_orientation or (0, 0.0)
    is_confident = page_orientation is not None and orientation_confidence >= min_confidence
    base_angle = page_orientation if is_confident else 0

    if is_confident:
        # We rotate the image to the general orientation which improves the detection
        # No expand needed bitmap is already padded
        thresh = rotate_image(thresh, -base_angle)
    else:  # That's only required if we do not work on the detection models bin map
        # try to merge words in lines
        (h, w) = img.shape[:2]
        k_x = max(1, (floor(w / 100)))
        k_y = max(1, (floor(h / 100)))
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
        thresh = cv2.dilate(thresh, kernel, iterations=1)

    # extract contours
    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    # Filter & Sort contours
    contours = sorted(
        [contour for contour in contours if cv2.contourArea(contour) > lower_area],
        key=get_max_width_length_ratio,
        reverse=True,
    )

    angles = []
    for contour in contours[:n_ct]:
        _, (w, h), angle = cv2.minAreaRect(contour)

        # OpenCV version-proof normalization: force 'w' to be the long side
        # so the angle is consistently relative to the major axis.
        # https://github.com/opencv/opencv/pull/28051/changes
        if w < h:
            w, h = h, w
            angle -= 90

        # Normalize angle to be within [-90, 90]
        while angle <= -90:
            angle += 180
        while angle > 90:
            angle -= 180

        if h > 0:
            if w / h > ratio_threshold_for_lines:  # select only contours with ratio like lines
                angles.append(angle)
            elif w / h < 1 / ratio_threshold_for_lines:  # if lines are vertical, substract 90 degree
                angles.append(angle - 90)

    if len(angles) == 0:
        skew_angle = 0  # in case no angles is found
    else:
        # median_low picks a value from the data to avoid outliers
        median = -median_low(angles)
        skew_angle = -round(median) if abs(median) != 0 else 0

        # Resolve the 90-degree flip ambiguity.
        # If the estimation is exactly 90/-90, it's usually a vertical detection of horizontal lines.
        if abs(skew_angle) == 90:
            skew_angle = 0

    # combine with the general orientation and the estimated angle
    # Apply the detected skew to our base orientation
    final_angle = base_angle + skew_angle

    # Standardize result to [-179, 180] range to handle wrap-around cases (e.g., 180 + -31)
    while final_angle > 180:
        final_angle -= 360
    while final_angle <= -180:
        final_angle += 360

    if is_confident:
        # If the estimated angle is perpendicular, treat it as 0 to avoid wrong flips
        if abs(skew_angle) % 90 == 0:
            return page_orientation

        # special case where the estimated angle is mostly wrong:
        # case 1: - and + swapped
        # case 2: estimated angle is completely wrong
        # so in this case we prefer the general page orientation
        if abs(skew_angle) == abs(page_orientation) and page_orientation != 0:
            return page_orientation

    return int(
        final_angle
    )  # return the clockwise angle (negative - left side rotation, positive - right side rotation)


def rectify_crops(
    crops: list[np.ndarray],
    orientations: list[int],
) -> list[np.ndarray]:
    """Rotate each crop of the list according to the predicted orientation:
    0: already straight, no rotation
    1: 90 ccw, rotate 3 times ccw
    2: 180, rotate 2 times ccw
    3: 270 ccw, rotate 1 time ccw
    """
    # Inverse predictions (if angle of +90 is detected, rotate by -90)
    orientations = [4 - pred if pred != 0 else 0 for pred in orientations]
    return (
        [crop if orientation == 0 else np.rot90(crop, orientation) for orientation, crop in zip(orientations, crops)]
        if len(orientations) > 0
        else []
    )


def rectify_loc_preds(
    page_loc_preds: np.ndarray,
    orientations: list[int],
) -> np.ndarray | None:
    """Orient the quadrangle (Polygon4P) according to the predicted orientation,
    so that the points are in this order: top L, top R, bot R, bot L if the crop is readable
    """
    return (
        np.stack(
            [
                np.roll(page_loc_pred, orientation, axis=0)
                for orientation, page_loc_pred in zip(orientations, page_loc_preds)
            ],
            axis=0,
        )
        if len(orientations) > 0
        else None
    )


def get_language(text: str) -> tuple[str, float]:
    """Get languages of a text using langdetect model.
    Get the language with the highest probability or no language if only a few words or a low probability

    Args:
        text (str): text

    Returns:
        The detected language in ISO 639 code and confidence score
    """
    try:
        lang = detect_langs(text.lower())[0]
    except LangDetectException:
        return "unknown", 0.0
    if len(text) <= 1 or (len(text) <= 5 and lang.prob <= 0.2):
        return "unknown", 0.0
    return lang.lang, lang.prob


================================================
FILE: onnxtr/models/builder.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


from typing import Any

import numpy as np
from scipy.cluster.hierarchy import fclusterdata

from onnxtr.io.elements import Block, Document, Line, Page, Word
from onnxtr.utils.geometry import estimate_page_angle, resolve_enclosing_bbox, resolve_enclosing_rbbox, rotate_boxes
from onnxtr.utils.repr import NestedObject

__all__ = ["DocumentBuilder"]


class DocumentBuilder(NestedObject):
    """Implements a document builder

    Args:
        resolve_lines: whether words should be automatically grouped into lines
        resolve_blocks: whether lines should be automatically grouped into blocks
        paragraph_break: relative length of the minimum space separating paragraphs
        export_as_straight_boxes: if True, force straight boxes in the export (fit a rectangle
            box to all rotated boxes). Else, keep the boxes format unchanged, no matter what it is.
    """

    def __init__(
        self,
        resolve_lines: bool = True,
        resolve_blocks: bool = False,
        paragraph_break: float = 0.035,
        export_as_straight_boxes: bool = False,
    ) -> None:
        self.resolve_lines = resolve_lines
        self.resolve_blocks = resolve_blocks
        self.paragraph_break = paragraph_break
        self.export_as_straight_boxes = export_as_straight_boxes

    @staticmethod
    def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        """Sort bounding boxes from top to bottom, left to right

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)

        Returns:
            tuple: indices of ordered boxes of shape (N,), boxes
                If straight boxes are passed tpo the function, boxes are unchanged
                else: boxes returned are straight boxes fitted to the straightened rotated boxes
                so that we fit the lines afterwards to the straigthened page
        """
        if boxes.ndim == 3:
            boxes = rotate_boxes(
                loc_preds=boxes,
                angle=-estimate_page_angle(boxes),
                orig_shape=(1024, 1024),
                min_angle=5.0,
            )
            boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1)
        return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes

    def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]:
        """Split a line in sub_lines

        Args:
            boxes: bounding boxes of shape (N, 4)
            word_idcs: list of indexes for the words of the line

        Returns:
            A list of (sub-)lines computed from the original line (words)
        """
        lines = []
        # Sort words horizontally
        word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()]

        # Eventually split line horizontally
        if len(word_idcs) < 2:
            lines.append(word_idcs)
        else:
            sub_line = [word_idcs[0]]
            for i in word_idcs[1:]:
                horiz_break = True

                prev_box = boxes[sub_line[-1]]
                # Compute distance between boxes
                dist = boxes[i, 0] - prev_box[2]
                # If distance between boxes is lower than paragraph break, same sub-line
                if dist < self.paragraph_break:
                    horiz_break = False

                if horiz_break:
                    lines.append(sub_line)
                    sub_line = []

                sub_line.append(i)
            lines.append(sub_line)

        return lines

    def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]:
        """Order boxes to group them in lines

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox

        Returns:
            nested list of box indices
        """
        # Sort boxes, and straighten the boxes if they are rotated
        idxs, boxes = self._sort_boxes(boxes)

        # Compute median for boxes heights
        y_med = np.median(boxes[:, 3] - boxes[:, 1])

        lines = []
        words = [idxs[0]]  # Assign the top-left word to the first line
        # Define a mean y-center for the line
        y_center_sum = boxes[idxs[0]][[1, 3]].mean()

        for idx in idxs[1:]:
            vert_break = True

            # Compute y_dist
            y_dist = abs(boxes[idx][[1, 3]].mean() - y_center_sum / len(words))
            # If y-center of the box is close enough to mean y-center of the line, same line
            if y_dist < y_med / 2:
                vert_break = False

            if vert_break:
                # Compute sub-lines (horizontal split)
                lines.extend(self._resolve_sub_lines(boxes, words))
                words = []
                y_center_sum = 0

            words.append(idx)
            y_center_sum += boxes[idx][[1, 3]].mean()

        # Use the remaining words to form the last(s) line(s)
        if len(words) > 0:
            # Compute sub-lines (horizontal split)
            lines.extend(self._resolve_sub_lines(boxes, words))

        return lines

    @staticmethod
    def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]:
        """Order lines to group them in blocks

        Args:
            boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
            lines: list of lines, each line is a list of idx

        Returns:
            nested list of box indices
        """
        # Resolve enclosing boxes of lines
        if boxes.ndim == 3:
            box_lines: np.ndarray = np.asarray([
                resolve_enclosing_rbbox([tuple(boxes[idx, :, :]) for idx in line])  # type: ignore[misc]
                for line in lines
            ])
        else:
            _box_lines = [
                resolve_enclosing_bbox([(tuple(boxes[idx, :2]), tuple(boxes[idx, 2:])) for idx in line])
                for line in lines
            ]
            box_lines = np.asarray([(x1, y1, x2, y2) for ((x1, y1), (x2, y2)) in _box_lines])

        # Compute geometrical features of lines to clusterize
        # Clusterizing only with box centers yield to poor results for complex documents
        if boxes.ndim == 3:
            box_features: np.ndarray = np.stack(
                (
                    (box_lines[:, 0, 0] + box_lines[:, 0, 1]) / 2,
                    (box_lines[:, 0, 0] + box_lines[:, 2, 0]) / 2,
                    (box_lines[:, 0, 0] + box_lines[:, 2, 1]) / 2,
                    (box_lines[:, 0, 1] + box_lines[:, 2, 1]) / 2,
                    (box_lines[:, 0, 1] + box_lines[:, 2, 0]) / 2,
                    (box_lines[:, 2, 0] + box_lines[:, 2, 1]) / 2,
                ),
                axis=-1,
            )
        else:
            box_features = np.stack(
                (
                    (box_lines[:, 0] + box_lines[:, 3]) / 2,
                    (box_lines[:, 1] + box_lines[:, 2]) / 2,
                    (box_lines[:, 0] + box_lines[:, 2]) / 2,
                    (box_lines[:, 1] + box_lines[:, 3]) / 2,
                    box_lines[:, 0],
                    box_lines[:, 1],
                ),
                axis=-1,
            )
        # Compute clusters
        clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean")

        _blocks: dict[int, list[int]] = {}
        # Form clusters
        for line_idx, cluster_idx in enumerate(clusters):
            if cluster_idx in _blocks.keys():
                _blocks[cluster_idx].append(line_idx)
            else:
                _blocks[cluster_idx] = [line_idx]

        # Retrieve word-box level to return a fully nested structure
        blocks = [[lines[idx] for idx in block] for block in _blocks.values()]

        return blocks

    def _build_blocks(
        self,
        boxes: np.ndarray,
        objectness_scores: np.ndarray,
        word_preds: list[tuple[str, float]],
        crop_orientations: list[dict[str, Any]],
    ) -> list[Block]:
        """Gather independent words in structured blocks

        Args:
            boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
            objectness_scores: objectness scores of all detected words of the page, of shape N
            word_preds: list of all detected words of the page, of shape N
            crop_orientations: list of dictoinaries containing
                the general orientation (orientations + confidences) of the crops

        Returns:
            list of block elements
        """
        if boxes.shape[0] != len(word_preds):
            raise ValueError(f"Incompatible argument lengths: {boxes.shape[0]}, {len(word_preds)}")

        if boxes.shape[0] == 0:
            return []

        # Decide whether we try to form lines
        _boxes = boxes
        if self.resolve_lines:
            lines = self._resolve_lines(_boxes if _boxes.ndim == 3 else _boxes[:, :4])
            # Decide whether we try to form blocks
            if self.resolve_blocks and len(lines) > 1:
                _blocks = self._resolve_blocks(_boxes if _boxes.ndim == 3 else _boxes[:, :4], lines)
            else:
                _blocks = [lines]
        else:
            # Sort bounding boxes, one line for all boxes, one block for the line
            lines = [self._sort_boxes(_boxes if _boxes.ndim == 3 else _boxes[:, :4])[0]]  # type: ignore[list-item]
            _blocks = [lines]

        blocks = [
            Block([
                Line([
                    Word(
                        *word_preds[idx],
                        tuple(tuple(pt) for pt in boxes[idx].tolist()),  # type: ignore[arg-type]
                        float(objectness_scores[idx]),
                        crop_orientations[idx],
                    )
                    if boxes.ndim == 3
                    else Word(
                        *word_preds[idx],
                        ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])),
                        float(objectness_scores[idx]),
                        crop_orientations[idx],
                    )
                    for idx in line
                ])
                for line in lines
            ])
            for lines in _blocks
        ]

        return blocks

    def extra_repr(self) -> str:
        return (
            f"resolve_lines={self.resolve_lines}, resolve_blocks={self.resolve_blocks}, "
            f"paragraph_break={self.paragraph_break}, "
            f"export_as_straight_boxes={self.export_as_straight_boxes}"
        )

    def __call__(
        self,
        pages: list[np.ndarray],
        boxes: list[np.ndarray],
        objectness_scores: list[np.ndarray],
        text_preds: list[list[tuple[str, float]]],
        page_shapes: list[tuple[int, int]],
        crop_orientations: list[dict[str, Any]],
        orientations: list[dict[str, Any]] | None = None,
        languages: list[dict[str, Any]] | None = None,
    ) -> Document:
        """Re-arrange detected words into structured blocks

        Args:
            pages: list of N elements, where each element represents the page image
            boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4)
                or (*, 4, 2) for all words for a given page
            objectness_scores: list of N elements, where each element represents the objectness scores
            text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
            page_shapes: shape of each page, of size N
            crop_orientations: list of N elements, where each element is
                a dictionary containing the general orientation (orientations + confidences) of the crops
            orientations: optional, list of N elements,
                where each element is a dictionary containing the orientation (orientation + confidence)
            languages: optional, list of N elements,
                where each element is a dictionary containing the language (language + confidence)

        Returns:
            document object
        """
        if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
            page_shapes
        ) != len(crop_orientations) != len(objectness_scores):
            raise ValueError("All arguments are expected to be lists of the same size")

        _orientations = orientations if isinstance(orientations, list) else [None] * len(boxes)
        _languages = languages if isinstance(languages, list) else [None] * len(boxes)
        if self.export_as_straight_boxes and len(boxes) > 0:
            # If boxes are already straight OK, else fit a bounding rect
            if boxes[0].ndim == 3:
                # Iterate over pages and boxes
                boxes = [np.concatenate((p_boxes.min(1), p_boxes.max(1)), 1) for p_boxes in boxes]

        _pages = [
            Page(
                page,
                self._build_blocks(
                    page_boxes,
                    loc_scores,
                    word_preds,
                    word_crop_orientations,
                ),
                _idx,
                shape,
                orientation,
                language,
            )
            for page, _idx, shape, page_boxes, loc_scores, word_preds, word_crop_orientations, orientation, language in zip(  # noqa: E501
                pages,
                range(len(boxes)),
                page_shapes,
                boxes,
                objectness_scores,
                text_preds,
                crop_orientations,
                _orientations,
                _languages,
            )
        ]

        return Document(_pages)


================================================
FILE: onnxtr/models/classification/__init__.py
================================================
from .models import *
from .zoo import *


================================================
FILE: onnxtr/models/classification/models/__init__.py
================================================
from .mobilenet import *

================================================
FILE: onnxtr/models/classification/models/mobilenet.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

# Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py

from copy import deepcopy
from typing import Any

import numpy as np

from ...engine import Engine, EngineConfig

__all__ = [
    "MobileNetV3",
    "mobilenet_v3_small_crop_orientation",
    "mobilenet_v3_small_page_orientation",
]

default_cfgs: dict[str, dict[str, Any]] = {
    "mobilenet_v3_small_crop_orientation": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 256, 256),
        "classes": [0, -90, 180, 90],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation-4fde60a1.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation_static_8_bit-c32c7721.onnx",
    },
    "mobilenet_v3_small_page_orientation": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 512, 512),
        "classes": [0, -90, 180, 90],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation-60606ce4.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation_static_8_bit-13b5b014.onnx",
    },
}


class MobileNetV3(Engine):
    """MobileNetV3 Onnx loader

    Args:
        model_path: path or url to onnx model file
        engine_cfg: configuration for the inference engine
        cfg: configuration dictionary
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.cfg = cfg

    def __call__(
        self,
        x: np.ndarray,
    ) -> np.ndarray:
        return self.run(x)


def _mobilenet_v3(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> MobileNetV3:
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
    _cfg = deepcopy(default_cfgs[arch])
    return MobileNetV3(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def mobilenet_v3_small_crop_orientation(
    model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> MobileNetV3:
    """MobileNetV3-Small architecture as described in
    `"Searching for MobileNetV3",
    <https://arxiv.org/pdf/1905.02244.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import mobilenet_v3_small_crop_orientation
    >>> model = mobilenet_v3_small_crop_orientation()
    >>> input_tensor = np.random.rand((1, 3, 256, 256))
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the MobileNetV3 architecture

    Returns:
        MobileNetV3
    """
    return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs)


def mobilenet_v3_small_page_orientation(
    model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> MobileNetV3:
    """MobileNetV3-Small architecture as described in
    `"Searching for MobileNetV3",
    <https://arxiv.org/pdf/1905.02244.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import mobilenet_v3_small_page_orientation
    >>> model = mobilenet_v3_small_page_orientation()
    >>> input_tensor = np.random.rand((1, 3, 512, 512))
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the MobileNetV3 architecture

    Returns:
        MobileNetV3
    """
    return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/classification/predictor/__init__.py
================================================
from .base import *


================================================
FILE: onnxtr/models/classification/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject

__all__ = ["OrientationPredictor"]


class OrientationPredictor(NestedObject):
    """Implements an object able to detect the reading direction of a text box or a page.
    4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.

    Args:
        pre_processor: transform inputs for easier batched model inference
        model: core classification architecture (backbone + classification head)
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
    """

    _children_names: list[str] = ["pre_processor", "model"]

    def __init__(
        self,
        pre_processor: PreProcessor | None,
        model: Any | None,
    ) -> None:
        self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
        self.model = model

    def __call__(
        self,
        inputs: list[np.ndarray],
    ) -> list[list[int] | list[float]]:
        # Dimension check
        if any(input.ndim != 3 for input in inputs):
            raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")

        if self.model is None or self.pre_processor is None:
            # predictor is disabled
            return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]

        processed_batches = self.pre_processor(inputs)
        predicted_batches = [self.model(batch) for batch in processed_batches]

        # confidence
        probs = [np.max(softmax(batch, axis=1), axis=1) for batch in predicted_batches]
        # Postprocess predictions
        predicted_batches = [np.argmax(out_batch, axis=1) for out_batch in predicted_batches]

        class_idxs = [int(pred) for batch in predicted_batches for pred in batch]
        classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]
        confs = [round(float(p), 2) for prob in probs for p in prob]

        return [class_idxs, classes, confs]


================================================
FILE: onnxtr/models/classification/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

from onnxtr.models.engine import EngineConfig

from .. import classification
from ..preprocessor import PreProcessor
from .predictor import OrientationPredictor

__all__ = ["crop_orientation_predictor", "page_orientation_predictor"]

ORIENTATION_ARCHS: list[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]


def _orientation_predictor(
    arch: Any,
    model_type: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    disabled: bool = False,
    **kwargs: Any,
) -> OrientationPredictor:
    if disabled:
        # Case where the orientation predictor is disabled
        return OrientationPredictor(None, None)

    if isinstance(arch, str):
        if arch not in ORIENTATION_ARCHS:
            raise ValueError(f"unknown architecture '{arch}'")
        # Load directly classifier from backbone
        _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
    else:
        if not isinstance(arch, classification.MobileNetV3):
            raise ValueError(f"unknown architecture: {type(arch)}")
        _model = arch

    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
    kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
    input_shape = _model.cfg["input_shape"][1:]
    predictor = OrientationPredictor(
        PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
        _model,
    )
    return predictor


def crop_orientation_predictor(
    arch: Any = "mobilenet_v3_small_crop_orientation",
    batch_size: int = 512,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> OrientationPredictor:
    """Crop orientation classification architecture.

    >>> import numpy as np
    >>> from onnxtr.models import crop_orientation_predictor
    >>> model = crop_orientation_predictor(arch='mobilenet_v3_small_crop_orientation')
    >>> input_crop = (255 * np.random.rand(256, 256, 3)).astype(np.uint8)
    >>> out = model([input_crop])

    Args:
        arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation')
        batch_size: number of samples the model processes in parallel
        load_in_8_bit: load the 8-bit quantized version of the model
        engine_cfg: configuration of inference engine
        **kwargs: keyword arguments to be passed to the OrientationPredictor

    Returns:
        OrientationPredictor
    """
    model_type = "crop"
    return _orientation_predictor(
        arch=arch,
        batch_size=batch_size,
        model_type=model_type,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=engine_cfg,
        **kwargs,
    )


def page_orientation_predictor(
    arch: Any = "mobilenet_v3_small_page_orientation",
    batch_size: int = 2,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> OrientationPredictor:
    """Page orientation classification architecture.

    >>> import numpy as np
    >>> from onnxtr.models import page_orientation_predictor
    >>> model = page_orientation_predictor(arch='mobilenet_v3_small_page_orientation')
    >>> input_page = (255 * np.random.rand(512, 512, 3)).astype(np.uint8)
    >>> out = model([input_page])

    Args:
        arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation')
        batch_size: number of samples the model processes in parallel
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments to be passed to the OrientationPredictor

    Returns:
        OrientationPredictor
    """
    model_type = "page"
    return _orientation_predictor(
        arch=arch,
        batch_size=batch_size,
        model_type=model_type,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=engine_cfg,
        **kwargs,
    )


================================================
FILE: onnxtr/models/detection/__init__.py
================================================
from .models import *
from .zoo import *


================================================
FILE: onnxtr/models/detection/_utils/__init__.py
================================================
from . base import *

================================================
FILE: onnxtr/models/detection/_utils/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import numpy as np

__all__ = ["_remove_padding"]


def _remove_padding(
    pages: list[np.ndarray],
    loc_preds: list[np.ndarray],
    preserve_aspect_ratio: bool,
    symmetric_pad: bool,
    assume_straight_pages: bool,
) -> list[np.ndarray]:
    """Remove padding from the localization predictions

    Args:
        pages: list of pages
        loc_preds: list of localization predictions
        preserve_aspect_ratio: whether the aspect ratio was preserved during padding
        symmetric_pad: whether the padding was symmetric
        assume_straight_pages: whether the pages are assumed to be straight

    Returns:
        list of unpaded localization predictions
    """
    if preserve_aspect_ratio:
        # Rectify loc_preds to remove padding
        rectified_preds = []
        for page, loc_pred in zip(pages, loc_preds):
            h, w = page.shape[0], page.shape[1]
            if h > w:
                # y unchanged, dilate x coord
                if symmetric_pad:
                    if assume_straight_pages:
                        loc_pred[:, [0, 2]] = (loc_pred[:, [0, 2]] - 0.5) * h / w + 0.5
                    else:
                        loc_pred[:, :, 0] = (loc_pred[:, :, 0] - 0.5) * h / w + 0.5
                else:
                    if assume_straight_pages:
                        loc_pred[:, [0, 2]] *= h / w
                    else:
                        loc_pred[:, :, 0] *= h / w
            elif w > h:
                # x unchanged, dilate y coord
                if symmetric_pad:
                    if assume_straight_pages:
                        loc_pred[:, [1, 3]] = (loc_pred[:, [1, 3]] - 0.5) * w / h + 0.5
                    else:
                        loc_pred[:, :, 1] = (loc_pred[:, :, 1] - 0.5) * w / h + 0.5
                else:
                    if assume_straight_pages:
                        loc_pred[:, [1, 3]] *= w / h
                    else:
                        loc_pred[:, :, 1] *= w / h
            rectified_preds.append(np.clip(loc_pred, 0, 1))
        return rectified_preds
    return loc_preds


================================================
FILE: onnxtr/models/detection/core.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import cv2
import numpy as np

from onnxtr.utils.repr import NestedObject

__all__ = ["DetectionPostProcessor"]


class DetectionPostProcessor(NestedObject):
    """Abstract class to postprocess the raw output of the model

    Args:
        box_thresh (float): minimal objectness score to consider a box
        bin_thresh (float): threshold to apply to segmentation raw heatmap
        assume straight_pages (bool): if True, fit straight boxes only
    """

    def __init__(self, box_thresh: float = 0.5, bin_thresh: float = 0.5, assume_straight_pages: bool = True) -> None:
        self.box_thresh = box_thresh
        self.bin_thresh = bin_thresh
        self.assume_straight_pages = assume_straight_pages
        self._opening_kernel: np.ndarray = np.ones((3, 3), dtype=np.uint8)

    def extra_repr(self) -> str:
        return f"bin_thresh={self.bin_thresh}, box_thresh={self.box_thresh}"

    @staticmethod
    def box_score(pred: np.ndarray, points: np.ndarray, assume_straight_pages: bool = True) -> float:
        """Compute the confidence score for a polygon : mean of the p values on the polygon

        Args:
            pred (np.ndarray): p map returned by the model
            points: coordinates of the polygon
            assume_straight_pages: if True, fit straight boxes only

        Returns:
            polygon objectness
        """
        h, w = pred.shape[:2]

        if assume_straight_pages:
            xmin = np.clip(np.floor(points[:, 0].min()).astype(np.int32), 0, w - 1)
            xmax = np.clip(np.ceil(points[:, 0].max()).astype(np.int32), 0, w - 1)
            ymin = np.clip(np.floor(points[:, 1].min()).astype(np.int32), 0, h - 1)
            ymax = np.clip(np.ceil(points[:, 1].max()).astype(np.int32), 0, h - 1)
            return pred[ymin : ymax + 1, xmin : xmax + 1].mean()

        else:
            mask: np.ndarray = np.zeros((h, w), np.int32)
            cv2.fillPoly(mask, [points.astype(np.int32)], 1.0)
            product = pred * mask
            return np.sum(product) / np.count_nonzero(product)

    def bitmap_to_boxes(
        self,
        pred: np.ndarray,
        bitmap: np.ndarray,
    ) -> np.ndarray:
        raise NotImplementedError

    def __call__(
        self,
        proba_map,
    ) -> list[list[np.ndarray]]:
        """Performs postprocessing for a list of model outputs

        Args:
            proba_map: probability map of shape (N, H, W, C)

        Returns:
            list of N class predictions (for each input sample), where each class predictions is a list of C tensors
            of shape (*, 5) or (*, 6)
        """
        if proba_map.ndim != 4:
            raise AssertionError(f"arg `proba_map` is expected to be 4-dimensional, got {proba_map.ndim}.")

        # Erosion + dilation on the binary map
        bin_map = [
            [
                cv2.morphologyEx(bmap[..., idx], cv2.MORPH_OPEN, self._opening_kernel)
                for idx in range(proba_map.shape[-1])
            ]
            for bmap in (proba_map >= self.bin_thresh).astype(np.uint8)
        ]

        return [
            [self.bitmap_to_boxes(pmaps[..., idx], bmaps[idx]) for idx in range(proba_map.shape[-1])]
            for pmaps, bmaps in zip(proba_map, bin_map)
        ]


================================================
FILE: onnxtr/models/detection/models/__init__.py
================================================
from .fast import *
from .differentiable_binarization import *
from .linknet import *

================================================
FILE: onnxtr/models/detection/models/differentiable_binarization.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np
from scipy.special import expit

from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor

__all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"]


default_cfgs: dict[str, dict[str, Any]] = {
    "db_resnet50": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx",
    },
    "db_resnet34": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx",
    },
    "db_mobilenet_v3_large": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large-4987e7bd.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large_static_8_bit-535a6f25.onnx",
    },
}


class DBNet(Engine):
    """DBNet Onnx loader

    Args:
        model_path: path or url to onnx model file
        engine_cfg: configuration for the inference engine
        bin_thresh: threshold for binarization of the output feature map
        box_thresh: minimal objectness score to consider a box
        assume_straight_pages: if True, fit straight bounding boxes only
        cfg: the configuration dict of the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        engine_cfg: EngineConfig | None = None,
        bin_thresh: float = 0.3,
        box_thresh: float = 0.1,
        assume_straight_pages: bool = True,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.cfg = cfg
        self.assume_straight_pages = assume_straight_pages

        self.postprocessor = GeneralDetectionPostProcessor(
            assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
        )

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
        **kwargs: Any,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}

        prob_map = expit(logits)
        if return_model_output:
            out["out_map"] = prob_map

        out["preds"] = self.postprocessor(prob_map)

        return out


def _dbnet(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DBNet:
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
    # Build the model
    return DBNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)


def db_resnet34(
    model_path: str = default_cfgs["db_resnet34"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DBNet:
    """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
    <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-34 backbone.

    >>> import numpy as np
    >>> from onnxtr.models import db_resnet34
    >>> model = db_resnet34()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _dbnet("db_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)


def db_resnet50(
    model_path: str = default_cfgs["db_resnet50"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DBNet:
    """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
    <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-50 backbone.

    >>> import numpy as np
    >>> from onnxtr.models import db_resnet50
    >>> model = db_resnet50()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _dbnet("db_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)


def db_mobilenet_v3_large(
    model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DBNet:
    """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
    <https://arxiv.org/pdf/1911.08947.pdf>`_, using a MobileNet V3 Large backbone.

    >>> import numpy as np
    >>> from onnxtr.models import db_mobilenet_v3_large
    >>> model = db_mobilenet_v3_large()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/detection/models/fast.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import logging
from typing import Any

import numpy as np
from scipy.special import expit

from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor

__all__ = ["FAST", "fast_tiny", "fast_small", "fast_base"]


default_cfgs: dict[str, dict[str, Any]] = {
    "fast_tiny": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_tiny-28867779.onnx",
    },
    "fast_small": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_small-10428b70.onnx",
    },
    "fast_base": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx",
    },
}


class FAST(Engine):
    """FAST Onnx loader

    Args:
        model_path: path or url to onnx model file
        engine_cfg: configuration for the inference engine
        bin_thresh: threshold for binarization of the output feature map
        box_thresh: minimal objectness score to consider a box
        assume_straight_pages: if True, fit straight bounding boxes only
        cfg: the configuration dict of the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        engine_cfg: EngineConfig | None = None,
        bin_thresh: float = 0.1,
        box_thresh: float = 0.1,
        assume_straight_pages: bool = True,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.cfg = cfg
        self.assume_straight_pages = assume_straight_pages

        self.postprocessor = GeneralDetectionPostProcessor(
            assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
        )

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
        **kwargs: Any,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}

        prob_map = expit(logits)
        if return_model_output:
            out["out_map"] = prob_map

        out["preds"] = self.postprocessor(prob_map)

        return out


def _fast(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> FAST:
    if load_in_8_bit:
        logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...")
    # Build the model
    return FAST(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)


def fast_tiny(
    model_path: str = default_cfgs["fast_tiny"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> FAST:
    """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
    <https://arxiv.org/pdf/2111.02394.pdf>`_, using a tiny TextNet backbone.

    >>> import numpy as np
    >>> from onnxtr.models import fast_tiny
    >>> model = fast_tiny()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _fast("fast_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs)


def fast_small(
    model_path: str = default_cfgs["fast_small"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> FAST:
    """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
    <https://arxiv.org/pdf/2111.02394.pdf>`_, using a small TextNet backbone.

    >>> import numpy as np
    >>> from onnxtr.models import fast_small
    >>> model = fast_small()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _fast("fast_small", model_path, load_in_8_bit, engine_cfg, **kwargs)


def fast_base(
    model_path: str = default_cfgs["fast_base"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> FAST:
    """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
    <https://arxiv.org/pdf/2111.02394.pdf>`_, using a base TextNet backbone.

    >>> import numpy as np
    >>> from onnxtr.models import fast_base
    >>> model = fast_base()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the DBNet architecture

    Returns:
        text detection architecture
    """
    return _fast("fast_base", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/detection/models/linknet.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np
from scipy.special import expit

from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor

__all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"]


default_cfgs: dict[str, dict[str, Any]] = {
    "linknet_resnet18": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx",
    },
    "linknet_resnet34": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx",
    },
    "linknet_resnet50": {
        "input_shape": (3, 1024, 1024),
        "mean": (0.798, 0.785, 0.772),
        "std": (0.264, 0.2749, 0.287),
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx",
    },
}


class LinkNet(Engine):
    """LinkNet Onnx loader

    Args:
        model_path: path or url to onnx model file
        engine_cfg: configuration for the inference engine
        bin_thresh: threshold for binarization of the output feature map
        box_thresh: minimal objectness score to consider a box
        assume_straight_pages: if True, fit straight bounding boxes only
        cfg: the configuration dict of the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        engine_cfg: EngineConfig | None = None,
        bin_thresh: float = 0.1,
        box_thresh: float = 0.1,
        assume_straight_pages: bool = True,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.cfg = cfg
        self.assume_straight_pages = assume_straight_pages

        self.postprocessor = GeneralDetectionPostProcessor(
            assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
        )

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
        **kwargs: Any,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}

        prob_map = expit(logits)
        if return_model_output:
            out["out_map"] = prob_map

        out["preds"] = self.postprocessor(prob_map)

        return out


def _linknet(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> LinkNet:
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
    # Build the model
    return LinkNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)


def linknet_resnet18(
    model_path: str = default_cfgs["linknet_resnet18"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> LinkNet:
    """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
    <https://arxiv.org/pdf/1707.03718.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import linknet_resnet18
    >>> model = linknet_resnet18()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the LinkNet architecture

    Returns:
        text detection architecture
    """
    return _linknet("linknet_resnet18", model_path, load_in_8_bit, engine_cfg, **kwargs)


def linknet_resnet34(
    model_path: str = default_cfgs["linknet_resnet34"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> LinkNet:
    """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
    <https://arxiv.org/pdf/1707.03718.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import linknet_resnet34
    >>> model = linknet_resnet34()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the LinkNet architecture

    Returns:
        text detection architecture
    """
    return _linknet("linknet_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)


def linknet_resnet50(
    model_path: str = default_cfgs["linknet_resnet50"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> LinkNet:
    """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
    <https://arxiv.org/pdf/1707.03718.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import linknet_resnet50
    >>> model = linknet_resnet50()
    >>> input_tensor = np.random.rand(1, 3, 1024, 1024)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the LinkNet architecture

    Returns:
        text detection architecture
    """
    return _linknet("linknet_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/detection/postprocessor/__init__.py
================================================


================================================
FILE: onnxtr/models/detection/postprocessor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

# Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization


import cv2
import numpy as np
import pyclipper

from onnxtr.utils import order_points

from ..core import DetectionPostProcessor

__all__ = ["GeneralDetectionPostProcessor"]


class GeneralDetectionPostProcessor(DetectionPostProcessor):
    """Implements a post processor for FAST model.

    Args:
        bin_thresh: threshold used to binzarized p_map at inference time
        box_thresh: minimal objectness score to consider a box
        assume_straight_pages: whether the inputs were expected to have horizontal text elements
    """

    def __init__(
        self,
        bin_thresh: float = 0.1,
        box_thresh: float = 0.1,
        assume_straight_pages: bool = True,
    ) -> None:
        super().__init__(box_thresh, bin_thresh, assume_straight_pages)
        self.unclip_ratio = 1.5

    def polygon_to_box(
        self,
        points: np.ndarray,
    ) -> np.ndarray:
        """Expand a polygon (points) by a factor unclip_ratio, and returns a polygon

        Args:
            points: The first parameter.

        Returns:
            a box in absolute coordinates (xmin, ymin, xmax, ymax) or (4, 2) array (quadrangle)
        """
        if not self.assume_straight_pages:
            # Compute the rectangle polygon enclosing the raw polygon
            rect = cv2.minAreaRect(points)
            points = cv2.boxPoints(rect)
            # Add 1 pixel to correct cv2 approx
            area = (rect[1][0] + 1) * (1 + rect[1][1])
            length = 2 * (rect[1][0] + rect[1][1]) + 2
        else:
            area = cv2.contourArea(points)
            length = cv2.arcLength(points, closed=True)
        distance = area * self.unclip_ratio / length  # compute distance to expand polygon
        offset = pyclipper.PyclipperOffset()
        offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
        _points = offset.Execute(distance)
        # Take biggest stack of points
        idx = 0
        if len(_points) > 1:
            max_size = 0
            for _idx, p in enumerate(_points):
                if len(p) > max_size:
                    idx = _idx
                    max_size = len(p)
            # We ensure that _points can be correctly casted to a ndarray
            _points = [_points[idx]]
        expanded_points: np.ndarray = np.asarray(_points)  # expand polygon
        if len(expanded_points) < 1:
            return None  # type: ignore[return-value]
        return (
            cv2.boundingRect(expanded_points)  # type: ignore[return-value]
            if self.assume_straight_pages
            else order_points(cv2.boxPoints(cv2.minAreaRect(expanded_points)))
        )

    def bitmap_to_boxes(
        self,
        pred: np.ndarray,
        bitmap: np.ndarray,
    ) -> np.ndarray:
        """Compute boxes from a bitmap/pred_map: find connected components then filter boxes

        Args:
            pred: Pred map from differentiable linknet output
            bitmap: Bitmap map computed from pred (binarized)
            angle_tol: Comparison tolerance of the angle with the median angle across the page
            ratio_tol: Under this limit aspect ratio, we cannot resolve the direction of the crop

        Returns:
            np tensor boxes for the bitmap, each box is a 6-element list
            containing x, y, w, h, alpha, score for the box
        """
        height, width = bitmap.shape[:2]
        boxes: list[np.ndarray | list[float]] = []
        # get contours from connected components on the bitmap
        contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for contour in contours:
            # Check whether smallest enclosing bounding box is not too small
            if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
                continue
            # Compute objectness
            if self.assume_straight_pages:
                x, y, w, h = cv2.boundingRect(contour)
                points: np.ndarray = np.array([[x, y], [x, y + h], [x + w, y + h], [x + w, y]])
                score = self.box_score(pred, points, assume_straight_pages=True)
            else:
                score = self.box_score(pred, contour, assume_straight_pages=False)

            if score < self.box_thresh:  # remove polygons with a weak objectness
                continue

            if self.assume_straight_pages:
                _box = self.polygon_to_box(points)
            else:
                _box = self.polygon_to_box(np.squeeze(contour))

            if self.assume_straight_pages:
                # compute relative polygon to get rid of img shape
                x, y, w, h = _box
                xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
                boxes.append([xmin, ymin, xmax, ymax, score])
            else:
                # compute relative box to get rid of img shape
                _box[:, 0] /= width
                _box[:, 1] /= height
                # Add score to box as (0, score)
                boxes.append(np.vstack([_box, np.array([0.0, score])]))

        if not self.assume_straight_pages:
            return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5, 2), dtype=pred.dtype)
        else:
            return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype)


================================================
FILE: onnxtr/models/detection/predictor/__init__.py
================================================
from .base import *


================================================
FILE: onnxtr/models/detection/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np

from onnxtr.models.detection._utils import _remove_padding
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject

__all__ = ["DetectionPredictor"]


class DetectionPredictor(NestedObject):
    """Implements an object able to localize text elements in a document

    Args:
        pre_processor: transform inputs for easier batched model inference
        model: core detection architecture
    """

    _children_names: list[str] = ["pre_processor", "model"]

    def __init__(
        self,
        pre_processor: PreProcessor,
        model: Any,
    ) -> None:
        self.pre_processor = pre_processor
        self.model = model

    def __call__(
        self,
        pages: list[np.ndarray],
        return_maps: bool = False,
        **kwargs: Any,
    ) -> list[np.ndarray] | tuple[list[np.ndarray], list[np.ndarray]]:
        # Extract parameters from the preprocessor
        preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio
        symmetric_pad = self.pre_processor.resize.symmetric_pad
        assume_straight_pages = self.model.assume_straight_pages

        # Dimension check
        if any(page.ndim != 3 for page in pages):
            raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")

        processed_batches = self.pre_processor(pages)
        predicted_batches = [
            self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
        ]

        # Remove padding from loc predictions
        preds = _remove_padding(
            pages,
            [pred[0] for batch in predicted_batches for pred in batch["preds"]],
            preserve_aspect_ratio=preserve_aspect_ratio,
            symmetric_pad=symmetric_pad,
            assume_straight_pages=assume_straight_pages,
        )

        if return_maps:
            seg_maps = [pred for batch in predicted_batches for pred in batch["out_map"]]
            return preds, seg_maps
        return preds


================================================
FILE: onnxtr/models/detection/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

from .. import detection
from ..engine import EngineConfig
from ..preprocessor import PreProcessor
from .predictor import DetectionPredictor

__all__ = ["detection_predictor"]

ARCHS = [
    "db_resnet34",
    "db_resnet50",
    "db_mobilenet_v3_large",
    "linknet_resnet18",
    "linknet_resnet34",
    "linknet_resnet50",
    "fast_tiny",
    "fast_small",
    "fast_base",
]


def _predictor(
    arch: Any,
    assume_straight_pages: bool = True,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DetectionPredictor:
    if isinstance(arch, str):
        if arch not in ARCHS:
            raise ValueError(f"unknown architecture '{arch}'")

        _model = detection.__dict__[arch](
            assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg
        )
    else:
        if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)):
            raise ValueError(f"unknown architecture: {type(arch)}")

        _model = arch
        _model.assume_straight_pages = assume_straight_pages
        _model.postprocessor.assume_straight_pages = assume_straight_pages

    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
    kwargs["batch_size"] = kwargs.get("batch_size", 2)
    predictor = DetectionPredictor(
        PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
        _model,
    )
    return predictor


def detection_predictor(
    arch: Any = "fast_base",
    assume_straight_pages: bool = True,
    preserve_aspect_ratio: bool = True,
    symmetric_pad: bool = True,
    batch_size: int = 2,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> DetectionPredictor:
    """Text detection architecture.

    >>> import numpy as np
    >>> from onnxtr.models import detection_predictor
    >>> model = detection_predictor(arch='db_resnet50')
    >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
    >>> out = model([input_page])

    Args:
        arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
        assume_straight_pages: If True, fit straight boxes to the page
        preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before
            running the detection model on it
        symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right
        batch_size: number of samples the model processes in parallel
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: optional keyword arguments passed to the architecture

    Returns:
        Detection predictor
    """
    return _predictor(
        arch=arch,
        assume_straight_pages=assume_straight_pages,
        preserve_aspect_ratio=preserve_aspect_ratio,
        symmetric_pad=symmetric_pad,
        batch_size=batch_size,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=engine_cfg,
        **kwargs,
    )


================================================
FILE: onnxtr/models/engine.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import logging
import os
from collections.abc import Callable
from typing import Any, TypeAlias

import numpy as np
from onnxruntime import (
    ExecutionMode,
    GraphOptimizationLevel,
    InferenceSession,
    RunOptions,
    SessionOptions,
    get_available_providers,
    get_device,
)
from onnxruntime.capi._pybind_state import set_default_logger_severity

set_default_logger_severity(int(os.getenv("ORT_LOG_SEVERITY_LEVEL", 4)))

from onnxtr.utils.data import download_from_url
from onnxtr.utils.geometry import shape_translate

__all__ = ["EngineConfig", "RunOptionsProvider"]

RunOptionsProvider: TypeAlias = Callable[[RunOptions], RunOptions]


class EngineConfig:
    """Implements a configuration class for the engine of a model

    Args:
        providers: list of providers to use for inference ref.: https://onnxruntime.ai/docs/execution-providers/
        session_options: configuration for the inference session ref.: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions
    """

    def __init__(
        self,
        providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None,
        session_options: SessionOptions | None = None,
        run_options_provider: RunOptionsProvider | None = None,
    ):
        self._providers = providers or self._init_providers()
        self._session_options = session_options or self._init_sess_opts()
        self.run_options_provider = run_options_provider

    def _init_providers(self) -> list[tuple[str, dict[str, Any]]]:
        providers: Any = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
        available_providers = get_available_providers()
        logging.info(f"Available providers: {available_providers}")
        if "CUDAExecutionProvider" in available_providers and get_device() == "GPU":  # pragma: no cover
            providers.insert(
                0,
                (
                    "CUDAExecutionProvider",
                    {
                        "device_id": 0,
                        "arena_extend_strategy": "kNextPowerOfTwo",
                        "cudnn_conv_algo_search": "DEFAULT",
                        "do_copy_in_default_stream": True,
                    },
                ),
            )
        elif "CoreMLExecutionProvider" in available_providers:  # pragma: no cover
            providers.insert(0, ("CoreMLExecutionProvider", {}))
        return providers

    def _init_sess_opts(self) -> SessionOptions:
        session_options = SessionOptions()
        session_options.enable_cpu_mem_arena = True
        session_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
        session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
        session_options.intra_op_num_threads = -1
        session_options.inter_op_num_threads = -1
        return session_options

    @property
    def providers(self) -> list[tuple[str, dict[str, Any]]] | list[str]:
        return self._providers

    @property
    def session_options(self) -> SessionOptions:
        return self._session_options

    def __repr__(self) -> str:
        return f"EngineConfig(providers={self.providers})"


class Engine:
    """Implements an abstract class for the engine of a model

    Args:
        url: the url to use to download a model if needed
        engine_cfg: the configuration of the engine
        **kwargs: additional arguments to be passed to `download_from_url`
    """

    def __init__(self, url: str, engine_cfg: EngineConfig | None = None, **kwargs: Any) -> None:
        engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig()
        archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
        # NOTE: older onnxruntime versions require a string path for windows
        archive_path = rf"{archive_path}"
        # Store model path for each model
        self.model_path = archive_path
        self.session_options = engine_cfg.session_options
        self.providers = engine_cfg.providers
        self.run_options_provider = engine_cfg.run_options_provider
        self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options)
        self.runtime_inputs = self.runtime.get_inputs()[0]
        self.tf_exported = int(self.runtime_inputs.shape[-1]) == 3
        self.fixed_batch_size: int | str = self.runtime_inputs.shape[
            0
        ]  # mostly possible with tensorflow exported models
        self.output_name = [output.name for output in self.runtime.get_outputs()]

    def run(self, inputs: np.ndarray) -> np.ndarray:
        run_options = RunOptions()
        if self.run_options_provider is not None:
            run_options = self.run_options_provider(run_options)
        if self.tf_exported:
            inputs = shape_translate(inputs, format="BHWC")  # sanity check
        else:
            inputs = shape_translate(inputs, format="BCHW")
        if isinstance(self.fixed_batch_size, int) and self.fixed_batch_size != 0:  # dynamic batch size is a string
            inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape))
            # combine the results
            logits = np.concatenate(
                [
                    self.runtime.run(self.output_name, {self.runtime_inputs.name: batch}, run_options=run_options)[0]
                    for batch in inputs
                ],
                axis=0,
            )
        else:
            logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs}, run_options=run_options)[0]
        return shape_translate(logits, format="BHWC")


================================================
FILE: onnxtr/models/factory/__init__.py
================================================
from .hub import *


================================================
FILE: onnxtr/models/factory/hub.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

# Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py

import json
import logging
import shutil
import subprocess
import tempfile
import textwrap
from pathlib import Path
from typing import Any

from huggingface_hub import (
    HfApi,
    get_token,
    hf_hub_download,
    login,
)

from onnxtr import models
from onnxtr.models.engine import EngineConfig

__all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]


AVAILABLE_ARCHS = {
    "classification": models.classification.zoo.ORIENTATION_ARCHS,
    "detection": models.detection.zoo.ARCHS,
    "recognition": models.recognition.zoo.ARCHS,
}


def login_to_hub() -> None:  # pragma: no cover
    """Login to huggingface hub"""
    access_token = get_token()
    if access_token is not None:
        logging.info("Huggingface Hub token found and valid")
        login(token=access_token)
    else:
        login()
    # check if git lfs is installed
    try:
        subprocess.call(["git", "lfs", "version"])
    except FileNotFoundError:
        raise OSError(
            "Looks like you do not have git-lfs installed, please install. \
                      You can install from https://git-lfs.github.com/. \
                      Then run `git lfs install` (you only have to do this once)."
        )


def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None:
    """Save model and config to disk for pushing to huggingface hub

    Args:
        model: Onnx model to be saved
        save_dir: directory to save model and config
        arch: architecture name
        task: task name
    """
    save_directory = Path(save_dir)
    shutil.copy2(model.model_path, save_directory / "model.onnx")

    config_path = save_directory / "config.json"

    # add model configuration
    model_config = model.cfg
    model_config["arch"] = arch
    model_config["task"] = task

    with config_path.open("w") as f:
        json.dump(model_config, f, indent=2, ensure_ascii=False)


def push_to_hf_hub(
    model: Any, model_name: str, task: str, override: bool = False, **kwargs
) -> None:  # pragma: no cover
    """Save model and its configuration on HF hub

    >>> from onnxtr.models import login_to_hub, push_to_hf_hub
    >>> from onnxtr.models.recognition import crnn_mobilenet_v3_small
    >>> login_to_hub()
    >>> model = crnn_mobilenet_v3_small()
    >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')

    Args:
        model: Onnx model to be saved
        model_name: name of the model which is also the repository name
        task: task name
        override: whether to override the existing model / repo on HF hub
        **kwargs: keyword arguments for push_to_hf_hub
    """
    run_config = kwargs.get("run_config", None)
    arch = kwargs.get("arch", None)

    if run_config is None and arch is None:
        raise ValueError("run_config or arch must be specified")
    if task not in ["classification", "detection", "recognition"]:
        raise ValueError("task must be one of classification, detection, recognition")

    # default readme
    readme = textwrap.dedent(
        f"""
    ---
    language:
    - en
    - fr
    license: apache-2.0
    ---

    <p align="center">
    <img src="https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/logo.jpg" width="40%">
    </p>

    **Optical Character Recognition made seamless & accessible to anyone, powered by Onnxruntime**

    ## Task: {task}

    https://github.com/felixdittrich92/OnnxTR

    ### Example usage:

    ```python
    >>> from onnxtr.io import DocumentFile
    >>> from onnxtr.models import ocr_predictor, from_hub

    >>> img = DocumentFile.from_images(['<image_path>'])
    >>> # Load your model from the hub
    >>> model = from_hub('onnxtr/my-model')

    >>> # Pass it to the predictor
    >>> # If your model is a recognition model:
    >>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large',
    >>>                           reco_arch=model)

    >>> # If your model is a detection model:
    >>> predictor = ocr_predictor(det_arch=model,
    >>>                           reco_arch='crnn_mobilenet_v3_small')

    >>> # Get your predictions
    >>> res = predictor(img)
    ```
    """
    )

    # add run configuration to readme if available
    if run_config is not None:
        arch = run_config.arch
        readme += textwrap.dedent(
            f"""### Run Configuration
                                  \n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}"""
        )

    if arch not in AVAILABLE_ARCHS[task]:
        raise ValueError(
            f"Architecture: {arch} for task: {task} not found.\
                         \nAvailable architectures: {AVAILABLE_ARCHS}"
        )

    commit_message = f"Add {model_name} model"

    # Create repository
    api = HfApi()
    api.create_repo(model_name, token=get_token(), exist_ok=False)

    # Save model files to a temporary directory
    with tempfile.TemporaryDirectory() as tmp_dir:
        _save_model_and_config_for_hf_hub(model, tmp_dir, arch=arch, task=task)
        readme_path = Path(tmp_dir) / "README.md"
        readme_path.write_text(readme)

        # Upload all files to the hub
        api.upload_folder(
            folder_path=tmp_dir,
            repo_id=model_name,
            commit_message=commit_message,
            token=get_token(),
        )


def from_hub(repo_id: str, engine_cfg: EngineConfig | None = None, **kwargs: Any):
    """Instantiate & load a pretrained model from HF hub.

    >>> from onnxtr.models import from_hub
    >>> model = from_hub("onnxtr/my-model")

    Args:
        repo_id: HuggingFace model hub repo
        engine_cfg: configuration for the inference engine (optional)
        **kwargs: kwargs of `hf_hub_download`

    Returns:
        Model loaded with the checkpoint
    """
    # Get the config
    with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f:
        cfg = json.load(f)
        model_path = hf_hub_download(repo_id, filename="model.onnx", **kwargs)

    arch = cfg["arch"]
    task = cfg["task"]
    cfg.pop("arch")
    cfg.pop("task")

    if task == "classification":
        model = models.classification.__dict__[arch](model_path, classes=cfg["classes"], engine_cfg=engine_cfg)
    elif task == "detection":
        model = models.detection.__dict__[arch](model_path, engine_cfg=engine_cfg)
    elif task == "recognition":
        model = models.recognition.__dict__[arch](
            model_path, input_shape=cfg["input_shape"], vocab=cfg["vocab"], engine_cfg=engine_cfg
        )

    # convert all values which are lists to tuples
    for key, value in cfg.items():
        if isinstance(value, list):
            cfg[key] = tuple(value)
    # update model cfg
    model.cfg = cfg

    return model


================================================
FILE: onnxtr/models/predictor/__init__.py
================================================
from .predictor import *


================================================
FILE: onnxtr/models/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from collections.abc import Callable
from typing import Any

import numpy as np

from onnxtr.models.builder import DocumentBuilder
from onnxtr.models.engine import EngineConfig
from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image

from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
from ..classification import crop_orientation_predictor, page_orientation_predictor
from ..classification.predictor import OrientationPredictor
from ..detection.zoo import ARCHS as DETECTION_ARCHS
from ..recognition.zoo import ARCHS as RECOGNITION_ARCHS

__all__ = ["_OCRPredictor"]


class _OCRPredictor:
    """Implements an object able to localize and identify text elements in a set of documents

    Args:
        assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
            without rotated textual elements.
        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
            accordingly. Doing so will improve performances for documents with page-uniform rotations.
        preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
        symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
        detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
            page. Doing so will slightly deteriorate the overall latency.
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        clf_engine_cfg: configuration of the orientation classification engine
        **kwargs: keyword args of `DocumentBuilder`
    """

    crop_orientation_predictor: OrientationPredictor | None
    page_orientation_predictor: OrientationPredictor | None

    def __init__(
        self,
        assume_straight_pages: bool = True,
        straighten_pages: bool = False,
        preserve_aspect_ratio: bool = True,
        symmetric_pad: bool = True,
        detect_orientation: bool = False,
        load_in_8_bit: bool = False,
        clf_engine_cfg: EngineConfig | None = None,
        **kwargs: Any,
    ) -> None:
        self.assume_straight_pages = assume_straight_pages
        self.straighten_pages = straighten_pages
        self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
        self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
        self.crop_orientation_predictor = (
            None
            if assume_straight_pages
            else crop_orientation_predictor(
                load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
            )
        )
        self.page_orientation_predictor = (
            page_orientation_predictor(
                load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
            )
            if detect_orientation or straighten_pages or not assume_straight_pages
            else None
        )
        self.doc_builder = DocumentBuilder(**kwargs)
        self.preserve_aspect_ratio = preserve_aspect_ratio
        self.symmetric_pad = symmetric_pad
        self.hooks: list[Callable] = []

    def _general_page_orientations(
        self,
        pages: list[np.ndarray],
    ) -> list[tuple[int, float]]:
        _, classes, probs = zip(self.page_orientation_predictor(pages))  # type: ignore[misc]
        # Flatten to list of tuples with (value, confidence)
        page_orientations = [
            (orientation, prob)
            for page_classes, page_probs in zip(classes, probs)
            for orientation, prob in zip(page_classes, page_probs)
        ]
        return page_orientations

    def _get_orientations(
        self, pages: list[np.ndarray], seg_maps: list[np.ndarray]
    ) -> tuple[list[tuple[int, float]], list[int]]:
        general_pages_orientations = self._general_page_orientations(pages)
        origin_page_orientations = [
            estimate_orientation(seq_map, general_orientation)
            for seq_map, general_orientation in zip(seg_maps, general_pages_orientations)
        ]
        return general_pages_orientations, origin_page_orientations

    def _straighten_pages(
        self,
        pages: list[np.ndarray],
        seg_maps: list[np.ndarray],
        general_pages_orientations: list[tuple[int, float]] | None = None,
        origin_pages_orientations: list[int] | None = None,
    ) -> list[np.ndarray]:
        general_pages_orientations = (
            general_pages_orientations if general_pages_orientations else self._general_page_orientations(pages)
        )
        origin_pages_orientations = (
            origin_pages_orientations
            if origin_pages_orientations
            else [
                estimate_orientation(seq_map, general_orientation)
                for seq_map, general_orientation in zip(seg_maps, general_pages_orientations)
            ]
        )
        return [
            # expand if height and width are not equal, afterwards remove padding
            remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
            for page, angle in zip(pages, origin_pages_orientations)
        ]

    @staticmethod
    def _generate_crops(
        pages: list[np.ndarray],
        loc_preds: list[np.ndarray],
        channels_last: bool,
        assume_straight_pages: bool = False,
        assume_horizontal: bool = False,
    ) -> list[list[np.ndarray]]:
        if assume_straight_pages:
            crops = [
                extract_crops(page, _boxes[:, :4], channels_last=channels_last)
                for page, _boxes in zip(pages, loc_preds)
            ]
        else:
            crops = [
                extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
                for page, _boxes in zip(pages, loc_preds)
            ]
        return crops

    @staticmethod
    def _prepare_crops(
        pages: list[np.ndarray],
        loc_preds: list[np.ndarray],
        channels_last: bool,
        assume_straight_pages: bool = False,
        assume_horizontal: bool = False,
    ) -> tuple[list[list[np.ndarray]], list[np.ndarray]]:
        crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)

        # Avoid sending zero-sized crops
        is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
        crops = [
            [crop for crop, _kept in zip(page_crops, page_kept) if _kept]
            for page_crops, page_kept in zip(crops, is_kept)
        ]
        loc_preds = [_boxes[_kept] for _boxes, _kept in zip(loc_preds, is_kept)]

        return crops, loc_preds

    def _rectify_crops(
        self,
        crops: list[list[np.ndarray]],
        loc_preds: list[np.ndarray],
    ) -> tuple[list[list[np.ndarray]], list[np.ndarray], list[tuple[int, float]]]:
        # Work at a page level
        orientations, classes, probs = zip(*[self.crop_orientation_predictor(page_crops) for page_crops in crops])  # type: ignore[misc]
        rect_crops = [rectify_crops(page_crops, orientation) for page_crops, orientation in zip(crops, orientations)]
        rect_loc_preds = [
            rectify_loc_preds(page_loc_preds, orientation) if len(page_loc_preds) > 0 else page_loc_preds
            for page_loc_preds, orientation in zip(loc_preds, orientations)
        ]
        # Flatten to list of tuples with (value, confidence)
        crop_orientations = [
            (orientation, prob)
            for page_classes, page_probs in zip(classes, probs)
            for orientation, prob in zip(page_classes, page_probs)
        ]
        return rect_crops, rect_loc_preds, crop_orientations  # type: ignore[return-value]

    @staticmethod
    def _process_predictions(
        loc_preds: list[np.ndarray],
        word_preds: list[tuple[str, float]],
        crop_orientations: list[dict[str, Any]],
    ) -> tuple[list[np.ndarray], list[list[tuple[str, float]]], list[list[dict[str, Any]]]]:
        text_preds = []
        crop_orientation_preds = []
        if len(loc_preds) > 0:
            # Text & crop orientation predictions at page level
            _idx = 0
            for page_boxes in loc_preds:
                text_preds.append(word_preds[_idx : _idx + page_boxes.shape[0]])
                crop_orientation_preds.append(crop_orientations[_idx : _idx + page_boxes.shape[0]])
                _idx += page_boxes.shape[0]

        return loc_preds, text_preds, crop_orientation_preds

    def add_hook(self, hook: Callable) -> None:
        """Add a hook to the predictor

        Args:
            hook: a callable that takes as input the `loc_preds` and returns the modified `loc_preds`
        """
        self.hooks.append(hook)

    def list_archs(self) -> dict[str, list[str]]:
        return {"detection_archs": DETECTION_ARCHS, "recognition_archs": RECOGNITION_ARCHS}


================================================
FILE: onnxtr/models/predictor/predictor.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

import numpy as np

from onnxtr.io.elements import Document
from onnxtr.models._utils import get_language
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.engine import EngineConfig
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.utils.geometry import detach_scores
from onnxtr.utils.repr import NestedObject

from .base import _OCRPredictor

__all__ = ["OCRPredictor"]


class OCRPredictor(NestedObject, _OCRPredictor):
    """Implements an object able to localize and identify text elements in a set of documents

    Args:
        det_predictor: detection module
        reco_predictor: recognition module
        assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
            without rotated textual elements.
        straighten_pages: if True, estimates the page general orientation based on the median line orientation.
            Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
            accordingly. Doing so will improve performances for documents with page-uniform rotations.
        detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
            page. Doing so will slightly deteriorate the overall latency.
        detect_language: if True, the language prediction will be added to the predictions for each
            page. Doing so will slightly deteriorate the overall latency.
        clf_engine_cfg: configuration of the orientation classification engine
        **kwargs: keyword args of `DocumentBuilder`
    """

    _children_names = ["det_predictor", "reco_predictor", "doc_builder"]

    def __init__(
        self,
        det_predictor: DetectionPredictor,
        reco_predictor: RecognitionPredictor,
        assume_straight_pages: bool = True,
        straighten_pages: bool = False,
        preserve_aspect_ratio: bool = True,
        symmetric_pad: bool = True,
        detect_orientation: bool = False,
        detect_language: bool = False,
        clf_engine_cfg: EngineConfig | None = None,
        **kwargs: Any,
    ) -> None:
        self.det_predictor = det_predictor
        self.reco_predictor = reco_predictor
        _OCRPredictor.__init__(
            self,
            assume_straight_pages,
            straighten_pages,
            preserve_aspect_ratio,
            symmetric_pad,
            detect_orientation,
            clf_engine_cfg=clf_engine_cfg,
            **kwargs,
        )
        self.detect_orientation = detect_orientation
        self.detect_language = detect_language

    def __call__(
        self,
        pages: list[np.ndarray],
        **kwargs: Any,
    ) -> Document:
        # Dimension check
        if any(page.ndim != 3 for page in pages):
            raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")

        origin_page_shapes = [page.shape[:2] for page in pages]

        # Localize text elements
        loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)

        # Detect document rotation and rotate pages
        seg_maps = [
            np.where(out_map > getattr(self.det_predictor.model.postprocessor, "bin_thresh"), 255, 0).astype(np.uint8)
            for out_map in out_maps
        ]
        if self.detect_orientation:
            general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
            orientations = [
                {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
            ]
        else:
            orientations = None
            general_pages_orientations = None
            origin_pages_orientations = None
        if self.straighten_pages:
            pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
            # update page shapes after straightening
            origin_page_shapes = [page.shape[:2] for page in pages]

            # forward again to get predictions on straight pages
            loc_preds = self.det_predictor(pages, **kwargs)  # type: ignore[assignment]

        # Detach objectness scores from loc_preds
        loc_preds, objectness_scores = detach_scores(loc_preds)  # type: ignore[arg-type]

        # Apply hooks to loc_preds if any
        for hook in self.hooks:
            loc_preds = hook(loc_preds)

        # Crop images
        crops, loc_preds = self._prepare_crops(
            pages,
            loc_preds,
            channels_last=True,
            assume_straight_pages=self.assume_straight_pages,
            assume_horizontal=self._page_orientation_disabled,
        )
        # Rectify crop orientation and get crop orientation predictions
        crop_orientations: Any = []
        if not self.assume_straight_pages:
            crops, loc_preds, _crop_orientations = self._rectify_crops(crops, loc_preds)
            crop_orientations = [
                {"value": orientation[0], "confidence": orientation[1]} for orientation in _crop_orientations
            ]

        # Identify character sequences
        word_preds = self.reco_predictor([crop for page_crops in crops for crop in page_crops], **kwargs)
        if not crop_orientations:
            crop_orientations = [{"value": 0, "confidence": None} for _ in word_preds]

        boxes, text_preds, crop_orientations = self._process_predictions(loc_preds, word_preds, crop_orientations)

        if self.detect_language:
            languages = [get_language(" ".join([item[0] for item in text_pred])) for text_pred in text_preds]
            languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
        else:
            languages_dict = None

        out = self.doc_builder(
            pages,
            boxes,
            objectness_scores,
            text_preds,
            origin_page_shapes,
            crop_orientations,
            orientations,
            languages_dict,
        )
        return out


================================================
FILE: onnxtr/models/preprocessor/__init__.py
================================================
from .base import *


================================================
FILE: onnxtr/models/preprocessor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import math
from typing import Any

import numpy as np

from onnxtr.transforms import Normalize, Resize
from onnxtr.utils.geometry import shape_translate
from onnxtr.utils.multithreading import multithread_exec
from onnxtr.utils.repr import NestedObject

__all__ = ["PreProcessor"]


class PreProcessor(NestedObject):
    """Implements an abstract preprocessor object which performs casting, resizing, batching and normalization.

    Args:
        output_size: expected size of each page in format (H, W)
        batch_size: the size of page batches
        mean: mean value of the training distribution by channel
        std: standard deviation of the training distribution by channel
        **kwargs: additional arguments for the resizing operation
    """

    _children_names: list[str] = ["resize", "normalize"]

    def __init__(
        self,
        output_size: tuple[int, int],
        batch_size: int,
        mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
        std: tuple[float, float, float] = (1.0, 1.0, 1.0),
        **kwargs: Any,
    ) -> None:
        self.batch_size = batch_size
        self.resize = Resize(output_size, **kwargs)
        self.normalize = Normalize(mean, std)

    def batch_inputs(self, samples: list[np.ndarray]) -> list[np.ndarray]:
        """Gather samples into batches for inference purposes

        Args:
            samples: list of samples (tf.Tensor)

        Returns:
            list of batched samples
        """
        num_batches = int(math.ceil(len(samples) / self.batch_size))
        batches = [
            np.stack(samples[idx * self.batch_size : min((idx + 1) * self.batch_size, len(samples))], axis=0)
            for idx in range(int(num_batches))
        ]

        return batches

    def sample_transforms(self, x: np.ndarray) -> np.ndarray:
        if x.ndim != 3:
            raise AssertionError("expected list of 3D Tensors")
        if isinstance(x, np.ndarray):
            if x.dtype not in (np.uint8, np.float32):
                raise TypeError("unsupported data type for numpy.ndarray")
        x = shape_translate(x, "HWC")

        # Resizing
        x = self.resize(x)
        # Data type & 255 division
        if x.dtype == np.uint8:
            x = x.astype(np.float32) / 255.0

        return x

    def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[np.ndarray]:
        """Prepare document data for model forwarding

        Args:
            x: list of images (np.array) or tensors (already resized and batched)

        Returns:
            list of page batches
        """
        # Input type check
        if isinstance(x, np.ndarray):
            if x.ndim != 4:
                raise AssertionError("expected 4D Tensor")
            if isinstance(x, np.ndarray):
                if x.dtype not in (np.uint8, np.float32):
                    raise TypeError("unsupported data type for numpy.ndarray")
            x = shape_translate(x, "BHWC")

            # Resizing
            if (x.shape[1], x.shape[2]) != self.resize.output_size:
                x = np.array([self.resize(sample) for sample in x])
            # Data type & 255 division
            if x.dtype == np.uint8:
                x = x.astype(np.float32) / 255.0
            batches = [x]

        elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x):
            # Sample transform (to tensor, resize)
            samples = list(multithread_exec(self.sample_transforms, x))
            # Batching
            batches = self.batch_inputs(samples)
        else:
            raise TypeError(f"invalid input type: {type(x)}")

        # Batch transforms (normalize)
        batches = list(multithread_exec(self.normalize, batches))

        return batches


================================================
FILE: onnxtr/models/recognition/__init__.py
================================================
from .models import *
from .zoo import *


================================================
FILE: onnxtr/models/recognition/core.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


from onnxtr.utils.repr import NestedObject

__all__ = ["RecognitionPostProcessor"]


class RecognitionPostProcessor(NestedObject):
    """Abstract class to postprocess the raw output of the model

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(
        self,
        vocab: str,
    ) -> None:
        self.vocab = vocab
        self._embedding = list(self.vocab) + ["<eos>"]

    def extra_repr(self) -> str:
        return f"vocab_size={len(self.vocab)}"


================================================
FILE: onnxtr/models/recognition/models/__init__.py
================================================
from .crnn import *
from .sar import *
from .master import *
from .vitstr import *
from .parseq import *
from .viptr import *


================================================
FILE: onnxtr/models/recognition/models/crnn.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from itertools import groupby
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"]

default_cfgs: dict[str, dict[str, Any]] = {
    "crnn_vgg16_bn": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn-743599aa.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn_static_8_bit-df1b594d.onnx",
    },
    "crnn_mobilenet_v3_small": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_small_static_8_bit-4949006f.onnx",
    },
    "crnn_mobilenet_v3_large": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_large-d42e8185.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_large_static_8_bit-459e856d.onnx",
    },
}


class CRNNPostProcessor(RecognitionPostProcessor):
    """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(self, vocab):
        self.vocab = vocab

    def decode_sequence(self, sequence, vocab):
        return "".join([vocab[int(char)] for char in sequence])

    def ctc_best_path(
        self,
        logits,
        vocab,
        blank=0,
    ):
        """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
        <https://github.com/githubharald/CTCDecoder>`_.

        Args:
            logits: model output, shape: N x T x C
            vocab: vocabulary to use
            blank: index of blank label

        Returns:
            A list of tuples: (word, confidence)
        """
        # Gather the most confident characters, and assign the smallest conf among those to the sequence prob
        probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1)

        # collapse best path (using itertools.groupby), map to chars, join char list to string
        words = [
            self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
            for seq in np.argmax(logits, axis=-1)
        ]

        return list(zip(words, probs.astype(float).tolist()))

    def __call__(self, logits):
        """Performs decoding of raw output with CTC and decoding of CTC predictions
        with label_to_idx mapping dictionnary

        Args:
            logits: raw output of the model, shape (N, C + 1, seq_len)

        Returns:
            A tuple of 2 lists: a list of str (words) and a list of float (probs)

        """
        # Decode CTC
        return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))


class CRNN(Engine):
    """CRNN Onnx loader

    Args:
        model_path: path or url to onnx model file
        vocab: vocabulary used for encoding
        engine_cfg: configuration for the inference engine
        cfg: configuration dictionary
        **kwargs: additional arguments to be passed to `Engine`
    """

    _children_names: list[str] = ["postprocessor"]

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = CRNNPostProcessor(self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}
        if return_model_output:
            out["out_map"] = logits

        # Post-process
        out["preds"] = self.postprocessor(logits)

        return out


def _crnn(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> CRNN:
    kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])

    _cfg = deepcopy(default_cfgs[arch])
    _cfg["vocab"] = kwargs["vocab"]
    _cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    # Build the model
    return CRNN(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def crnn_vgg16_bn(
    model_path: str = default_cfgs["crnn_vgg16_bn"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> CRNN:
    """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import crnn_vgg16_bn
    >>> model = crnn_vgg16_bn()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    """
    return _crnn("crnn_vgg16_bn", model_path, load_in_8_bit, engine_cfg, **kwargs)


def crnn_mobilenet_v3_small(
    model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> CRNN:
    """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import crnn_mobilenet_v3_small
    >>> model = crnn_mobilenet_v3_small()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    """
    return _crnn("crnn_mobilenet_v3_small", model_path, load_in_8_bit, engine_cfg, **kwargs)


def crnn_mobilenet_v3_large(
    model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> CRNN:
    """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import crnn_mobilenet_v3_large
    >>> model = crnn_mobilenet_v3_large()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the CRNN architecture

    Returns:
        text recognition architecture
    """
    return _crnn("crnn_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/models/master.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["MASTER", "master"]


default_cfgs: dict[str, dict[str, Any]] = {
    "master": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/master-b1287fcd.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/master_dynamic_8_bit-d8bd8206.onnx",
    },
}


class MASTER(Engine):
    """MASTER Onnx loader

    Args:
        model_path: path or url to onnx model file
        vocab: vocabulary, (without EOS, SOS, PAD)
        engine_cfg: configuration for the inference engine
        cfg: dictionary containing information about the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = MASTERPostProcessor(vocab=self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        """Call function

        Args:
            x: images
            return_model_output: if True, return logits

        Returns:
            A dictionnary containing eventually logits and predictions.
        """
        logits = self.run(x)
        out: dict[str, Any] = {}

        if return_model_output:
            out["out_map"] = logits

        out["preds"] = self.postprocessor(logits)

        return out


class MASTERPostProcessor(RecognitionPostProcessor):
    """Post-processor for the MASTER model

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(
        self,
        vocab: str,
    ) -> None:
        super().__init__(vocab)
        self._embedding = list(vocab) + ["<eos>"] + ["<sos>"] + ["<pad>"]

    def __call__(self, logits: np.ndarray) -> list[tuple[str, float]]:
        # compute pred with argmax for attention models
        out_idxs = np.argmax(logits, axis=-1)
        # N x L
        probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1)
        # Take the minimum confidence of the sequence
        probs = np.min(probs, axis=1)

        word_values = [
            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
        ]

        return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))


def _master(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> MASTER:
    # Patch the config
    _cfg = deepcopy(default_cfgs[arch])
    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])

    kwargs["vocab"] = _cfg["vocab"]
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    return MASTER(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def master(
    model_path: str = default_cfgs["master"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> MASTER:
    """MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import master
    >>> model = master()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keywoard arguments passed to the MASTER architecture

    Returns:
        text recognition architecture
    """
    return _master("master", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/models/parseq.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["PARSeq", "parseq"]

default_cfgs: dict[str, dict[str, Any]] = {
    "parseq": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/parseq_dynamic_8_bit-5b04d9f7.onnx",
    },
}


class PARSeq(Engine):
    """PARSeq Onnx loader

    Args:
        model_path: path to onnx model file
        vocab: vocabulary used for encoding
        engine_cfg: configuration for the inference engine
        cfg: dictionary containing information about the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = PARSeqPostProcessor(vocab=self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        logits = self.run(x)
        out: dict[str, Any] = {}

        if return_model_output:
            out["out_map"] = logits

        out["preds"] = self.postprocessor(logits)
        return out


class PARSeqPostProcessor(RecognitionPostProcessor):
    """Post processor for PARSeq architecture

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(
        self,
        vocab: str,
    ) -> None:
        super().__init__(vocab)
        self._embedding = list(vocab) + ["<eos>", "<sos>", "<pad>"]

    def __call__(self, logits):
        # compute pred with argmax for attention models
        out_idxs = np.argmax(logits, axis=-1)
        preds_prob = softmax(logits, axis=-1).max(axis=-1)

        word_values = [
            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
        ]
        # compute probabilties for each word up to the EOS token
        probs = [
            preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0
            for i, word in enumerate(word_values)
        ]

        return list(zip(word_values, probs))


def _parseq(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> PARSeq:
    # Patch the config
    _cfg = deepcopy(default_cfgs[arch])
    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])

    kwargs["vocab"] = _cfg["vocab"]
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    # Build the model
    return PARSeq(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def parseq(
    model_path: str = default_cfgs["parseq"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> PARSeq:
    """PARSeq architecture from
    `"Scene Text Recognition with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.

    >>> import numpy as np
    >>> from onnxtr.models import parseq
    >>> model = parseq()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the PARSeq architecture

    Returns:
        text recognition architecture
    """
    return _parseq("parseq", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/models/sar.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["SAR", "sar_resnet31"]

default_cfgs: dict[str, dict[str, Any]] = {
    "sar_resnet31": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/sar_resnet31-395f8005.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/sar_resnet31_static_8_bit-c07316bc.onnx",
    },
}


class SAR(Engine):
    """SAR Onnx loader

    Args:
        model_path: path to onnx model file
        vocab: vocabulary used for encoding
        engine_cfg: configuration for the inference engine
        cfg: dictionary containing information about the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = SARPostProcessor(self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}
        if return_model_output:
            out["out_map"] = logits

        out["preds"] = self.postprocessor(logits)

        return out


class SARPostProcessor(RecognitionPostProcessor):
    """Post processor for SAR architectures

    Args:
        embedding: string containing the ordered sequence of supported characters
    """

    def __init__(
        self,
        vocab: str,
    ) -> None:
        super().__init__(vocab)
        self._embedding = list(self.vocab) + ["<eos>"]

    def __call__(self, logits):
        # compute pred with argmax for attention models
        out_idxs = np.argmax(logits, axis=-1)
        # N x L
        probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1)
        # Take the minimum confidence of the sequence
        probs = np.min(probs, axis=1)

        word_values = [
            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
        ]

        return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))


def _sar(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> SAR:
    # Patch the config
    _cfg = deepcopy(default_cfgs[arch])
    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])

    kwargs["vocab"] = _cfg["vocab"]
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    # Build the model
    return SAR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def sar_resnet31(
    model_path: str = default_cfgs["sar_resnet31"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> SAR:
    """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
    Baseline for Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import sar_resnet31
    >>> model = sar_resnet31()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the SAR architecture

    Returns:
        text recognition architecture
    """
    return _sar("sar_resnet31", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/models/viptr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import logging
from copy import deepcopy
from itertools import groupby
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["VIPTR", "viptr_tiny"]

default_cfgs: dict[str, dict[str, Any]] = {
    "viptr_tiny": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx",
    },
}


class VIPTRPostProcessor(RecognitionPostProcessor):
    """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(self, vocab):
        self.vocab = vocab

    def decode_sequence(self, sequence, vocab):
        return "".join([vocab[int(char)] for char in sequence])

    def ctc_best_path(
        self,
        logits,
        vocab,
        blank=0,
    ):
        """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
        <https://github.com/githubharald/CTCDecoder>`_.

        Args:
            logits: model output, shape: N x T x C
            vocab: vocabulary to use
            blank: index of blank label

        Returns:
            A list of tuples: (word, confidence)
        """
        # Gather the most confident characters, and assign the smallest conf among those to the sequence prob
        probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1)

        # collapse best path (using itertools.groupby), map to chars, join char list to string
        words = [
            self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
            for seq in np.argmax(logits, axis=-1)
        ]

        return list(zip(words, probs.astype(float).tolist()))

    def __call__(self, logits):
        """Performs decoding of raw output with CTC and decoding of CTC predictions
        with label_to_idx mapping dictionnary

        Args:
            logits: raw output of the model, shape (N, C + 1, seq_len)

        Returns:
            A tuple of 2 lists: a list of str (words) and a list of float (probs)

        """
        # Decode CTC
        return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))


class VIPTR(Engine):
    """VIPTR Onnx loader

    Args:
        model_path: path or url to onnx model file
        vocab: vocabulary used for encoding
        engine_cfg: configuration for the inference engine
        cfg: configuration dictionary
        **kwargs: additional arguments to be passed to `Engine`
    """

    _children_names: list[str] = ["postprocessor"]

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = VIPTRPostProcessor(self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}
        if return_model_output:
            out["out_map"] = logits

        # Post-process
        out["preds"] = self.postprocessor(logits)

        return out


def _viptr(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> VIPTR:
    if load_in_8_bit:
        logging.warning("VIPTR models do not support 8-bit quantization yet. Loading full precision model...")
    kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])

    _cfg = deepcopy(default_cfgs[arch])
    _cfg["vocab"] = kwargs["vocab"]
    _cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    # Build the model
    return VIPTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def viptr_tiny(
    model_path: str = default_cfgs["viptr_tiny"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> VIPTR:
    """VIPTR as described in `"A Vision Permutable Extractor for Fast and Efficient
    Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import viptr_tiny
    >>> model = viptr_tiny()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the VIPTR architecture

    Returns:
        text recognition architecture
    """
    return _viptr("viptr_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/models/vitstr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from typing import Any

import numpy as np
from scipy.special import softmax

from onnxtr.utils import VOCABS

from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor

__all__ = ["ViTSTR", "vitstr_small", "vitstr_base"]

default_cfgs: dict[str, dict[str, Any]] = {
    "vitstr_small": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_small-3ff9c500.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_small_dynamic_8_bit-bec6c796.onnx",
    },
    "vitstr_base": {
        "mean": (0.694, 0.695, 0.693),
        "std": (0.299, 0.296, 0.301),
        "input_shape": (3, 32, 128),
        "vocab": VOCABS["french"],
        "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_base-ff62f5be.onnx",
        "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_base_dynamic_8_bit-976c7cd6.onnx",
    },
}


class ViTSTR(Engine):
    """ViTSTR Onnx loader

    Args:
        model_path: path to onnx model file
        vocab: vocabulary used for encoding
        engine_cfg: configuration for the inference engine
        cfg: dictionary containing information about the model
        **kwargs: additional arguments to be passed to `Engine`
    """

    def __init__(
        self,
        model_path: str,
        vocab: str,
        engine_cfg: EngineConfig | None = None,
        cfg: dict[str, Any] | None = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)

        self.vocab = vocab
        self.cfg = cfg

        self.postprocessor = ViTSTRPostProcessor(vocab=self.vocab)

    def __call__(
        self,
        x: np.ndarray,
        return_model_output: bool = False,
    ) -> dict[str, Any]:
        logits = self.run(x)

        out: dict[str, Any] = {}
        if return_model_output:
            out["out_map"] = logits

        out["preds"] = self.postprocessor(logits)

        return out


class ViTSTRPostProcessor(RecognitionPostProcessor):
    """Post processor for ViTSTR architecture

    Args:
        vocab: string containing the ordered sequence of supported characters
    """

    def __init__(
        self,
        vocab: str,
    ) -> None:
        super().__init__(vocab)
        self._embedding = list(vocab) + ["<eos>", "<sos>"]

    def __call__(self, logits):
        # compute pred with argmax for attention models
        out_idxs = np.argmax(logits, axis=-1)
        preds_prob = softmax(logits, axis=-1).max(axis=-1)

        word_values = [
            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0] for encoded_seq in out_idxs
        ]
        # compute probabilties for each word up to the EOS token
        probs = [
            preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0
            for i, word in enumerate(word_values)
        ]

        return list(zip(word_values, probs))


def _vitstr(
    arch: str,
    model_path: str,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> ViTSTR:
    # Patch the config
    _cfg = deepcopy(default_cfgs[arch])
    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])

    kwargs["vocab"] = _cfg["vocab"]
    # Patch the url
    model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path

    # Build the model
    return ViTSTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)


def vitstr_small(
    model_path: str = default_cfgs["vitstr_small"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> ViTSTR:
    """ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
    <https://arxiv.org/pdf/2105.08582.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import vitstr_small
    >>> model = vitstr_small()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the ViTSTR architecture

    Returns:
        text recognition architecture
    """
    return _vitstr("vitstr_small", model_path, load_in_8_bit, engine_cfg, **kwargs)


def vitstr_base(
    model_path: str = default_cfgs["vitstr_base"]["url"],
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> ViTSTR:
    """ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
    <https://arxiv.org/pdf/2105.08582.pdf>`_.

    >>> import numpy as np
    >>> from onnxtr.models import vitstr_base
    >>> model = vitstr_base()
    >>> input_tensor = np.random.rand(1, 3, 32, 128)
    >>> out = model(input_tensor)

    Args:
        model_path: path to onnx model file, defaults to url in default_cfgs
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration for the inference engine
        **kwargs: keyword arguments of the ViTSTR architecture

    Returns:
        text recognition architecture
    """
    return _vitstr("vitstr_base", model_path, load_in_8_bit, engine_cfg, **kwargs)


================================================
FILE: onnxtr/models/recognition/predictor/__init__.py
================================================
from .base import *


================================================
FILE: onnxtr/models/recognition/predictor/_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import math

import numpy as np

from ..utils import merge_multi_strings

__all__ = ["split_crops", "remap_preds"]


def split_crops(
    crops: list[np.ndarray],
    max_ratio: float,
    target_ratio: int,
    split_overlap_ratio: float,
    channels_last: bool = True,
) -> tuple[list[np.ndarray], list[int | tuple[int, int, float]], bool]:
    """
    Split crops horizontally if they exceed a given aspect ratio.

    Args:
        crops: List of image crops (H, W, C) if channels_last else (C, H, W).
        max_ratio: Aspect ratio threshold above which crops are split.
        target_ratio: Target aspect ratio after splitting (e.g., 4 for 128x32).
        split_overlap_ratio: Desired overlap between splits (as a fraction of split width).
        channels_last: Whether the crops are in channels-last format.

    Returns:
        A tuple containing:
            - The new list of crops (possibly with splits),
            - A mapping indicating how to reassemble predictions,
            - A boolean indicating whether remapping is required.
    """
    if split_overlap_ratio <= 0.0 or split_overlap_ratio >= 1.0:
        raise ValueError(f"Valid range for split_overlap_ratio is (0.0, 1.0), but is: {split_overlap_ratio}")

    remap_required = False
    new_crops: list[np.ndarray] = []
    crop_map: list[int | tuple[int, int, float]] = []

    for crop in crops:
        h, w = crop.shape[:2] if channels_last else crop.shape[-2:]
        aspect_ratio = w / h

        if aspect_ratio > max_ratio:
            split_width = max(1, math.ceil(h * target_ratio))
            overlap_width = max(0, math.floor(split_width * split_overlap_ratio))

            splits, last_overlap = _split_horizontally(crop, split_width, overlap_width, channels_last)

            # Remove any empty splits
            splits = [s for s in splits if all(dim > 0 for dim in s.shape)]
            if splits:
                crop_map.append((len(new_crops), len(new_crops) + len(splits), last_overlap))
                new_crops.extend(splits)
                remap_required = True
            else:
                # Fallback: treat it as a single crop
                crop_map.append(len(new_crops))
                new_crops.append(crop)
        else:
            crop_map.append(len(new_crops))
            new_crops.append(crop)

    return new_crops, crop_map, remap_required


def _split_horizontally(
    image: np.ndarray, split_width: int, overlap_width: int, channels_last: bool
) -> tuple[list[np.ndarray], float]:
    """
    Horizontally split a single image with overlapping regions.

    Args:
        image: The image to split (H, W, C) if channels_last else (C, H, W).
        split_width: Width of each split.
        overlap_width: Width of the overlapping region.
        channels_last: Whether the image is in channels-last format.

    Returns:
        - A list of horizontal image slices.
        - The actual overlap ratio of the last split.
    """
    image_width = image.shape[1] if channels_last else image.shape[-1]
    if image_width <= split_width:
        return [image], 0.0

    # Compute start columns for each split
    step = split_width - overlap_width
    starts = list(range(0, image_width - split_width + 1, step))

    # Ensure the last patch reaches the end of the image
    if starts[-1] + split_width < image_width:
        starts.append(image_width - split_width)

    splits = []
    for start_col in starts:
        end_col = start_col + split_width
        if channels_last:
            split = image[:, start_col:end_col, :]
        else:
            split = image[:, :, start_col:end_col]
        splits.append(split)

    # Calculate the last overlap ratio, if only one split no overlap
    last_overlap = 0
    if len(starts) > 1:
        last_overlap = (starts[-2] + split_width) - starts[-1]
    last_overlap_ratio = last_overlap / split_width if split_width else 0.0

    return splits, last_overlap_ratio


def remap_preds(
    preds: list[tuple[str, float]],
    crop_map: list[int | tuple[int, int, float]],
    overlap_ratio: float,
) -> list[tuple[str, float]]:
    """
    Reconstruct predictions from possibly split crops.

    Args:
        preds: List of (text, confidence) tuples from each crop.
        crop_map: Map returned by `split_crops`.
        overlap_ratio: Overlap ratio used during splitting.

    Returns:
        List of merged (text, confidence) tuples corresponding to original crops.
    """
    remapped = []
    for item in crop_map:
        if isinstance(item, int):
            remapped.append(preds[item])
        else:
            start_idx, end_idx, last_overlap = item
            text_parts, confidences = zip(*preds[start_idx:end_idx])
            merged_text = merge_multi_strings(list(text_parts), overlap_ratio, last_overlap)
            merged_conf = sum(confidences) / len(confidences)  # average confidence
            remapped.append((merged_text, merged_conf))
    return remapped


================================================
FILE: onnxtr/models/recognition/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from collections.abc import Sequence
from typing import Any

import numpy as np

from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject

from ._utils import remap_preds, split_crops

__all__ = ["RecognitionPredictor"]


class RecognitionPredictor(NestedObject):
    """Implements an object able to identify character sequences in images

    Args:
        pre_processor: transform inputs for easier batched model inference
        model: core recognition architecture
        split_wide_crops: wether to use crop splitting for high aspect ratio crops
    """

    def __init__(
        self,
        pre_processor: PreProcessor,
        model: Any,
        split_wide_crops: bool = True,
    ) -> None:
        super().__init__()
        self.pre_processor = pre_processor
        self.model = model
        self.split_wide_crops = split_wide_crops
        self.critical_ar = 8  # Critical aspect ratio
        self.overlap_ratio = 0.5  # Ratio of overlap between neighboring crops
        self.target_ar = 6  # Target aspect ratio

    def __call__(
        self,
        crops: Sequence[np.ndarray],
        **kwargs: Any,
    ) -> list[tuple[str, float]]:
        if len(crops) == 0:
            return []
        # Dimension check
        if any(crop.ndim != 3 for crop in crops):
            raise ValueError("incorrect input shape: all crops are expected to be multi-channel 2D images.")

        # Split crops that are too wide
        remapped = False
        if self.split_wide_crops:
            new_crops, crop_map, remapped = split_crops(
                crops,  # type: ignore[arg-type]
                self.critical_ar,
                self.target_ar,
                self.overlap_ratio,
                True,
            )
            if remapped:
                crops = new_crops

        # Resize & batch them
        processed_batches = self.pre_processor(crops)  # type: ignore[arg-type]

        # Forward it
        raw = [self.model(batch, **kwargs)["preds"] for batch in processed_batches]

        # Process outputs
        out = [charseq for batch in raw for charseq in batch]

        # Remap crops
        if self.split_wide_crops and remapped:
            out = remap_preds(out, crop_map, self.overlap_ratio)

        return out


================================================
FILE: onnxtr/models/recognition/utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


from rapidfuzz.distance import Hamming

__all__ = ["merge_strings", "merge_multi_strings"]


def merge_strings(a: str, b: str, overlap_ratio: float) -> str:
    """Merges 2 character sequences in the best way to maximize the alignment of their overlapping characters.

    Args:
        a: first char seq, suffix should be similar to b's prefix.
        b: second char seq, prefix should be similar to a's suffix.
        overlap_ratio: estimated ratio of overlapping characters.

    Returns:
        A merged character sequence.

    Example::
        >>> from doctr.models.recognition.utils import merge_strings
        >>> merge_strings('abcd', 'cdefgh', 0.5)
        'abcdefgh'
        >>> merge_strings('abcdi', 'cdefgh', 0.5)
        'abcdefgh'
    """
    seq_len = min(len(a), len(b))
    if seq_len <= 1:  # One sequence is empty or will be after cropping in next step, return both to keep data
        return a + b

    a_crop, b_crop = a[:-1], b[1:]  # Remove last letter of "a" and first of "b", because they might be cut off
    max_overlap = min(len(a_crop), len(b_crop))

    # Compute Hamming distances for all possible overlaps
    scores = [Hamming.distance(a_crop[-i:], b_crop[:i], processor=None) for i in range(1, max_overlap + 1)]

    # Find zero-score matches
    zero_matches = [i for i, score in enumerate(scores) if score == 0]

    expected_overlap = round(len(b) * overlap_ratio) - 3  # adjust for cropping and index

    # Case 1: One perfect match - exactly one zero score - just merge there
    if len(zero_matches) == 1:
        i = zero_matches[0]
        return a_crop + b_crop[i + 1 :]

    # Case 2: Multiple perfect matches - likely due to repeated characters.
    # Use the estimated overlap length to choose the match closest to the expected alignment.
    elif len(zero_matches) > 1:
        best_i = min(zero_matches, key=lambda x: abs(x - expected_overlap))
        return a_crop + b_crop[best_i + 1 :]

    # Case 3: Absence of zero scores indicates that the same character in the image was recognized differently OR that
    # the overlap was too small and we just need to merge the crops fully
    if expected_overlap < -1:
        return a + b
    elif expected_overlap < 0:
        return a_crop + b_crop

    # Find best overlap by minimizing Hamming distance + distance from expected overlap size
    combined_scores = [score + abs(i - expected_overlap) for i, score in enumerate(scores)]
    best_i = combined_scores.index(min(combined_scores))
    return a_crop + b_crop[best_i + 1 :]


def merge_multi_strings(seq_list: list[str], overlap_ratio: float, last_overlap_ratio: float) -> str:
    """
    Merges consecutive string sequences with overlapping characters.

    Args:
        seq_list: list of sequences to merge. Sequences need to be ordered from left to right.
        overlap_ratio: Estimated ratio of overlapping letters between neighboring strings.
        last_overlap_ratio: Estimated ratio of overlapping letters for the last element in seq_list.

    Returns:
        A merged character sequence

    Example::
        >>> from doctr.models.recognition.utils import merge_multi_strings
        >>> merge_multi_strings(['abc', 'bcdef', 'difghi', 'aijkl'], 0.5, 0.1)
        'abcdefghijkl'
    """
    if not seq_list:
        return ""
    result = seq_list[0]
    for i in range(1, len(seq_list)):
        text_b = seq_list[i]
        ratio = last_overlap_ratio if i == len(seq_list) - 1 else overlap_ratio
        result = merge_strings(result, text_b, ratio)
    return result


================================================
FILE: onnxtr/models/recognition/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

from .. import recognition
from ..engine import EngineConfig
from ..preprocessor import PreProcessor
from .predictor import RecognitionPredictor

__all__ = ["recognition_predictor"]


ARCHS: list[str] = [
    "crnn_vgg16_bn",
    "crnn_mobilenet_v3_small",
    "crnn_mobilenet_v3_large",
    "sar_resnet31",
    "master",
    "vitstr_small",
    "vitstr_base",
    "parseq",
    "viptr_tiny",
]


def _predictor(
    arch: Any, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any
) -> RecognitionPredictor:
    if isinstance(arch, str):
        if arch not in ARCHS:
            raise ValueError(f"unknown architecture '{arch}'")

        _model = recognition.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
    else:
        if not isinstance(
            arch,
            (
                recognition.CRNN,
                recognition.SAR,
                recognition.MASTER,
                recognition.ViTSTR,
                recognition.PARSeq,
                recognition.VIPTR,
            ),
        ):
            raise ValueError(f"unknown architecture: {type(arch)}")
        _model = arch

    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
    kwargs["batch_size"] = kwargs.get("batch_size", 1024)
    input_shape = _model.cfg["input_shape"][1:]
    predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)

    return predictor


def recognition_predictor(
    arch: Any = "crnn_vgg16_bn",
    symmetric_pad: bool = False,
    batch_size: int = 128,
    load_in_8_bit: bool = False,
    engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> RecognitionPredictor:
    """Text recognition architecture.

    Example::
        >>> import numpy as np
        >>> from onnxtr.models import recognition_predictor
        >>> model = recognition_predictor()
        >>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8)
        >>> out = model([input_page])

    Args:
        arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
        symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right
        batch_size: number of samples the model processes in parallel
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        engine_cfg: configuration of inference engine
        **kwargs: optional parameters to be passed to the architecture

    Returns:
        Recognition predictor
    """
    return _predictor(
        arch=arch,
        symmetric_pad=symmetric_pad,
        batch_size=batch_size,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=engine_cfg,
        **kwargs,
    )


================================================
FILE: onnxtr/models/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Any

from .detection.zoo import detection_predictor
from .engine import EngineConfig
from .predictor import OCRPredictor
from .recognition.zoo import recognition_predictor

__all__ = ["ocr_predictor"]


def _predictor(
    det_arch: Any,
    reco_arch: Any,
    assume_straight_pages: bool = True,
    preserve_aspect_ratio: bool = True,
    symmetric_pad: bool = True,
    det_bs: int = 2,
    reco_bs: int = 512,
    detect_orientation: bool = False,
    straighten_pages: bool = False,
    detect_language: bool = False,
    load_in_8_bit: bool = False,
    det_engine_cfg: EngineConfig | None = None,
    reco_engine_cfg: EngineConfig | None = None,
    clf_engine_cfg: EngineConfig | None = None,
    **kwargs,
) -> OCRPredictor:
    # Detection
    det_predictor = detection_predictor(
        det_arch,
        batch_size=det_bs,
        assume_straight_pages=assume_straight_pages,
        preserve_aspect_ratio=preserve_aspect_ratio,
        symmetric_pad=symmetric_pad,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=det_engine_cfg,
    )

    # Recognition
    reco_predictor = recognition_predictor(
        reco_arch,
        batch_size=reco_bs,
        load_in_8_bit=load_in_8_bit,
        engine_cfg=reco_engine_cfg,
    )

    return OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=assume_straight_pages,
        preserve_aspect_ratio=preserve_aspect_ratio,
        symmetric_pad=symmetric_pad,
        detect_orientation=detect_orientation,
        straighten_pages=straighten_pages,
        detect_language=detect_language,
        clf_engine_cfg=clf_engine_cfg,
        **kwargs,
    )


def ocr_predictor(
    det_arch: Any = "fast_base",
    reco_arch: Any = "crnn_vgg16_bn",
    assume_straight_pages: bool = True,
    preserve_aspect_ratio: bool = True,
    symmetric_pad: bool = True,
    export_as_straight_boxes: bool = False,
    detect_orientation: bool = False,
    straighten_pages: bool = False,
    detect_language: bool = False,
    load_in_8_bit: bool = False,
    det_engine_cfg: EngineConfig | None = None,
    reco_engine_cfg: EngineConfig | None = None,
    clf_engine_cfg: EngineConfig | None = None,
    **kwargs: Any,
) -> OCRPredictor:
    """End-to-end OCR architecture using one model for localization, and another for text recognition.

    >>> import numpy as np
    >>> from onnxtr.models import ocr_predictor
    >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn')
    >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
    >>> out = model([input_page])

    Args:
        det_arch: name of the detection architecture or the model itself to use
            (e.g. 'db_resnet50', 'db_mobilenet_v3_large')
        reco_arch: name of the recognition architecture or the model itself to use
            (e.g. 'crnn_vgg16_bn', 'sar_resnet31')
        assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
            without rotated textual elements.
        preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before
            running the detection model on it.
        symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right.
        export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions
            (potentially rotated) as straight bounding boxes.
        detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
            page. Doing so will slightly deteriorate the overall latency.
        straighten_pages: if True, estimates the page general orientation
            based on the segmentation map median line orientation.
            Then, rotates page before passing it again to the deep learning detection module.
            Doing so will improve performances for documents with page-uniform rotations.
        detect_language: if True, the language prediction will be added to the predictions for each
            page. Doing so will slightly deteriorate the overall latency.
        load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
        det_engine_cfg: configuration of the detection engine
        reco_engine_cfg: configuration of the recognition engine
        clf_engine_cfg: configuration of the orientation classification engine
        kwargs: keyword args of `OCRPredictor`

    Returns:
        OCR predictor
    """
    return _predictor(
        det_arch,
        reco_arch,
        assume_straight_pages=assume_straight_pages,
        preserve_aspect_ratio=preserve_aspect_ratio,
        symmetric_pad=symmetric_pad,
        export_as_straight_boxes=export_as_straight_boxes,
        detect_orientation=detect_orientation,
        straighten_pages=straighten_pages,
        detect_language=detect_language,
        load_in_8_bit=load_in_8_bit,
        det_engine_cfg=det_engine_cfg,
        reco_engine_cfg=reco_engine_cfg,
        clf_engine_cfg=clf_engine_cfg,
        **kwargs,
    )


================================================
FILE: onnxtr/py.typed
================================================


================================================
FILE: onnxtr/transforms/__init__.py
================================================
from .base import *


================================================
FILE: onnxtr/transforms/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import math

import numpy as np
from PIL import Image, ImageOps

__all__ = ["Resize", "Normalize"]


class Resize:
    """Resize the input image to the given size

    Args:
        size: the target size of the image
        interpolation: the interpolation method to use
        preserve_aspect_ratio: whether to preserve the aspect ratio of the image
        symmetric_pad: whether to symmetrically pad the image
    """

    def __init__(
        self,
        size: int | tuple[int, int],
        interpolation=Image.Resampling.BILINEAR,
        preserve_aspect_ratio: bool = False,
        symmetric_pad: bool = False,
    ) -> None:
        self.size = size if isinstance(size, tuple) else (size, size)
        self.interpolation = interpolation
        self.preserve_aspect_ratio = preserve_aspect_ratio
        self.symmetric_pad = symmetric_pad
        self.output_size = size if isinstance(size, tuple) else (size, size)

        if not isinstance(self.size, (tuple, int)):
            raise AssertionError("size should be either a tuple or an int")

    def __call__(self, img: np.ndarray) -> np.ndarray:
        if img.dtype != np.uint8:
            img_pil = Image.fromarray((img * 255).clip(0, 255).astype(np.uint8))
        else:
            img_pil = Image.fromarray(img)

        sh, sw = self.size
        w, h = img_pil.size

        if not self.preserve_aspect_ratio:
            img_resized_pil = img_pil.resize((sw, sh), resample=self.interpolation)
            return np.array(img_resized_pil)

        actual_ratio = h / w
        target_ratio = sh / sw

        if actual_ratio > target_ratio:
            new_h = sh
            new_w = max(int(sh / actual_ratio), 1)
        else:
            new_w = sw
            new_h = max(int(sw * actual_ratio), 1)

        img_resized_pil = img_pil.resize((new_w, new_h), resample=self.interpolation)

        delta_w = sw - new_w
        delta_h = sh - new_h

        if self.symmetric_pad:
            # Symmetric padding
            pad_left = math.ceil(delta_w / 2)
            pad_right = math.floor(delta_w / 2)
            pad_top = math.ceil(delta_h / 2)
            pad_bottom = math.floor(delta_h / 2)
        else:
            # Asymmetric padding
            pad_left, pad_top = 0, 0
            pad_right, pad_bottom = delta_w, delta_h

        img_padded_pil = ImageOps.expand(
            img_resized_pil,
            border=(pad_left, pad_top, pad_right, pad_bottom),
            fill=0,
        )

        return np.array(img_padded_pil)

    def __repr__(self) -> str:
        interpolate_str = self.interpolation
        _repr = f"output_size={self.size}, interpolation='{interpolate_str}'"
        if self.preserve_aspect_ratio:
            _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
        return f"{self.__class__.__name__}({_repr})"


class Normalize:
    """Normalize the input image

    Args:
        mean: mean values to subtract
        std: standard deviation values to divide
    """

    def __init__(
        self,
        mean: float | tuple[float, float, float] = (0.485, 0.456, 0.406),
        std: float | tuple[float, float, float] = (0.229, 0.224, 0.225),
    ) -> None:
        self.mean = mean
        self.std = std

        if not isinstance(self.mean, (float, tuple, list)):
            raise AssertionError("mean should be either a tuple, a list or a float")
        if not isinstance(self.std, (float, tuple, list)):
            raise AssertionError("std should be either a tuple, a list or a float")

    def __call__(
        self,
        img: np.ndarray,
    ) -> np.ndarray:
        # Normalize image
        return (img - np.array(self.mean).astype(img.dtype)) / np.array(self.std).astype(img.dtype)

    def __repr__(self) -> str:
        _repr = f"mean={self.mean}, std={self.std}"
        return f"{self.__class__.__name__}({_repr})"


================================================
FILE: onnxtr/utils/__init__.py
================================================
from .common_types import *
from .data import *
from .geometry import *
from .vocabs import *


================================================
FILE: onnxtr/utils/common_types.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from pathlib import Path

__all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]


Point2D = tuple[float, float]
BoundingBox = tuple[Point2D, Point2D]
Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D]
Polygon = list[Point2D]
AbstractPath = str | Path
AbstractFile = AbstractPath | bytes
Bbox = tuple[float, float, float, float]


================================================
FILE: onnxtr/utils/data.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/datasets/utils.py

import hashlib
import logging
import os
import re
import urllib.error
import urllib.request
from pathlib import Path

from tqdm.auto import tqdm

__all__ = ["download_from_url"]


# matches bfd8deac from resnet18-bfd8deac.ckpt
HASH_REGEX = re.compile(r"-([a-f0-9]*)\.")
USER_AGENT = "felixdittrich92/OnnxTR"


def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None:
    with open(filename, "wb") as fh:
        with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
            with tqdm(total=response.length) as pbar:
                for chunk in iter(lambda: response.read(chunk_size), ""):
                    if not chunk:
                        break
                    pbar.update(chunk_size)
                    fh.write(chunk)


def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool:
    with open(file_path, "rb") as f:
        sha_hash = hashlib.sha256(f.read()).hexdigest()

    return sha_hash[: len(hash_prefix)] == hash_prefix


def download_from_url(
    url: str,
    file_name: str | None = None,
    hash_prefix: str | None = None,
    cache_dir: str | None = None,
    cache_subdir: str | None = None,
) -> Path:
    """Download a file using its URL

    >>> from onnxtr.models import download_from_url
    >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")

    Args:
        url: the URL of the file to download
        file_name: optional name of the file once downloaded
        hash_prefix: optional expected SHA256 hash of the file
        cache_dir: cache directory
        cache_subdir: subfolder to use in the cache

    Returns:
        the location of the downloaded file

    Note:
        You can change cache directory location by using `ONNXTR_CACHE_DIR` environment variable.
    """
    if not isinstance(file_name, str):
        file_name = url.rpartition("/")[-1].split("&")[0]

    cache_dir = (
        str(os.environ.get("ONNXTR_CACHE_DIR", os.path.join(os.path.expanduser("~"), ".cache", "onnxtr")))
        if cache_dir is None
        else cache_dir
    )

    # Check hash in file name
    if hash_prefix is None:
        r = HASH_REGEX.search(file_name)
        hash_prefix = r.group(1) if r else None

    folder_path = Path(cache_dir) if cache_subdir is None else Path(cache_dir, cache_subdir)
    file_path = folder_path.joinpath(file_name)
    # Check file existence
    if file_path.is_file() and (hash_prefix is None or _check_integrity(file_path, hash_prefix)):
        logging.info(f"Using downloaded & verified file: {file_path}")
        return file_path

    try:
        # Create folder hierarchy
        folder_path.mkdir(parents=True, exist_ok=True)
    except OSError:
        error_message = f"Failed creating cache direcotry at {folder_path}"
        if os.environ.get("ONNXTR_CACHE_DIR", ""):
            error_message += " using path from 'ONNXTR_CACHE_DIR' environment variable."
        else:
            error_message += (
                ". You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed."
            )
        logging.error(error_message)
        raise
    # Download the file
    try:
        print(f"Downloading {url} to {file_path}")
        _urlretrieve(url, file_path)
    except (urllib.error.URLError, IOError) as e:  # pragma: no cover
        if url[:5] == "https":
            url = url.replace("https:", "http:")
            print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
            _urlretrieve(url, file_path)
        else:
            raise e

    # Remove corrupted files
    if isinstance(hash_prefix, str) and not _check_integrity(file_path, hash_prefix):  # pragma: no cover
        # Remove file
        os.remove(file_path)
        raise ValueError(f"corrupted download, the hash of {url} does not match its expected value")

    return file_path


================================================
FILE: onnxtr/utils/fonts.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import logging
import platform

from PIL import ImageFont

__all__ = ["get_font"]


def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
    """Resolves a compatible ImageFont for the system

    Args:
        font_family: the font family to use
        font_size: the size of the font upon rendering

    Returns:
        the Pillow font
    """
    # Font selection
    if font_family is None:
        try:
            font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
        except OSError:  # pragma: no cover
            font = ImageFont.load_default()  # type: ignore[assignment]
            logging.warning(
                "unable to load recommended font family. Loading default PIL font,"
                "font size issues may be expected."
                "To prevent this, it is recommended to specify the value of 'font_family'."
            )
    else:  # pragma: no cover
        font = ImageFont.truetype(font_family, font_size)

    return font


================================================
FILE: onnxtr/utils/geometry.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from math import ceil

import cv2
import numpy as np

from .common_types import BoundingBox, Polygon4P

__all__ = [
    "bbox_to_polygon",
    "polygon_to_bbox",
    "order_points",
    "resolve_enclosing_bbox",
    "resolve_enclosing_rbbox",
    "rotate_boxes",
    "compute_expanded_shape",
    "rotate_image",
    "estimate_page_angle",
    "convert_to_relative_coords",
    "rotate_abs_geoms",
    "extract_crops",
    "extract_rcrops",
    "shape_translate",
    "detach_scores",
]


def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
    """Convert a bounding box to a polygon

    Args:
        bbox: a bounding box

    Returns:
        a polygon
    """
    return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]


def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
    """Convert a polygon to a bounding box

    Args:
        polygon: a polygon

    Returns:
        a bounding box
    """
    x, y = zip(*polygon)
    return (min(x), min(y)), (max(x), max(y))


def order_points(pts: np.ndarray) -> np.ndarray:
    """Order points in the following order: top-left, top-right, bottom-right, bottom-left

    Args:
        pts: array of shape (4, 2) or (4,) with the coordinates of the points

    Returns:
        ordered points in the following order: top-left, top-right, bottom-right, bottom-left
    """
    pts = np.asarray(pts)

    # (xmin, ymin, xmax, ymax)
    if pts.shape == (4,):
        xmin, ymin, xmax, ymax = pts
        return np.array(
            [
                [xmin, ymin],  # top-left
                [xmax, ymin],  # top-right
                [xmax, ymax],  # bottom-right
                [xmin, ymax],  # bottom-left
            ],
            dtype=pts.dtype,
        )

    # (4, 2) quadrangle
    if pts.shape == (4, 2):
        c = pts.mean(axis=0)

        # compute angle of each point around centroid
        angles = np.arctan2(pts[:, 1] - c[1], pts[:, 0] - c[0])

        # sort by angle (counter-clockwise ordering)
        pts = pts[np.argsort(angles)]

        # ensure consistent starting point (top-left)
        start_idx = np.argmin(pts.sum(axis=1))
        pts = np.roll(pts, -start_idx, axis=0)

        # ensure order is TL, TR, BR, BL (clockwise)
        def area(poly):
            return 0.5 * np.sum(poly[:, 0] * np.roll(poly[:, 1], -1) - poly[:, 1] * np.roll(poly[:, 0], -1))

        if area(pts) < 0:
            pts = np.roll(pts[::-1], 1, axis=0)

        return pts.astype(pts.dtype)

    raise ValueError(f"Unsupported shape {pts.shape}, expected (4,) or (4,2)")


def detach_scores(boxes: list[np.ndarray]) -> tuple[list[np.ndarray], list[np.ndarray]]:
    """Detach the objectness scores from box predictions

    Args:
        boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)

    Returns:
        a tuple of two lists: the first one contains the boxes without the objectness scores,
        the second one contains the objectness scores
    """

    def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
        if boxes.ndim == 2:
            return boxes[:, :-1], boxes[:, -1]
        return boxes[:, :-1], boxes[:, -1, -1]

    loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
    return list(loc_preds), list(obj_scores)


def shape_translate(data: np.ndarray, format: str) -> np.ndarray:
    """Translate the shape of the input data to the desired format

    Args:
        data: input data in shape (B, C, H, W) or (B, H, W, C) or (C, H, W) or (H, W, C)
        format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC')

    Returns:
        the reshaped data
    """
    # Get the current shape
    current_shape = data.shape

    # Check the number of dimensions
    num_dims = len(current_shape)

    if num_dims != len(format):
        return data

    if format == "BCHW" and data.shape[1] in [1, 3]:
        return data
    elif format == "BHWC" and data.shape[-1] in [1, 3]:
        return data
    elif format == "CHW" and data.shape[0] in [1, 3]:
        return data
    elif format == "HWC" and data.shape[-1] in [1, 3]:
        return data
    elif format == "BCHW" and data.shape[1] not in [1, 3]:
        return np.moveaxis(data, -1, 1)
    elif format == "BHWC" and data.shape[-1] not in [1, 3]:
        return np.moveaxis(data, 1, -1)
    elif format == "CHW" and data.shape[0] not in [1, 3]:
        return np.moveaxis(data, -1, 0)
    elif format == "HWC" and data.shape[-1] not in [1, 3]:
        return np.moveaxis(data, 0, -1)
    else:
        return data


def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray:
    """Compute enclosing bbox either from:

    Args:
        bboxes: boxes in one of the following formats:

            - an array of boxes: (*, 4), where boxes have this shape:
            (xmin, ymin, xmax, ymax)

            - a list of BoundingBox

    Returns:
        a (1, 4) array (enclosing boxarray), or a BoundingBox
    """
    if isinstance(bboxes, np.ndarray):
        xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
        return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
    else:
        x, y = zip(*[point for box in bboxes for point in box])
        return (min(x), min(y)), (max(x), max(y))


def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
    """Compute enclosing rotated bbox either from:

    Args:
        rbboxes: boxes in one of the following formats:

            - an array of boxes: (*, 4, 2), where boxes have this shape:
            (x1, y1), (x2, y2), (x3, y3), (x4, y4)

            - a list of BoundingBox
        intermed_size: size of the intermediate image

    Returns:
        a (4, 2) array (enclosing rotated box)
    """
    cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
    # Convert to absolute for minAreaRect
    rect = cv2.minAreaRect(cloud.astype(np.float32) * intermed_size)
    return order_points(cv2.boxPoints(rect) / intermed_size)


def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
    """Rotate points counter-clockwise.

    Args:
        points: array of size (N, 2)
        angle: angle between -90 and +90 degrees

    Returns:
        Rotated points
    """
    angle_rad = angle * np.pi / 180.0  # compute radian angle for np functions
    rotation_mat = np.array(
        [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
    )
    return np.matmul(points, rotation_mat.T)


def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[int, int]:
    """Compute the shape of an expanded rotated image

    Args:
        img_shape: the height and width of the image
        angle: angle between -90 and +90 degrees

    Returns:
        the height and width of the rotated image
    """
    points: np.ndarray = np.array([
        [img_shape[1] / 2, img_shape[0] / 2],
        [-img_shape[1] / 2, img_shape[0] / 2],
    ])

    rotated_points = rotate_abs_points(points, angle)

    wh_shape = 2 * np.abs(rotated_points).max(axis=0)
    return wh_shape[1], wh_shape[0]


def rotate_abs_geoms(
    geoms: np.ndarray,
    angle: float,
    img_shape: tuple[int, int],
    expand: bool = True,
) -> np.ndarray:
    """Rotate a batch of bounding boxes or polygons by an angle around the
    image center.

    Args:
        geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
        angle: anti-clockwise rotation angle in degrees
        img_shape: the height and width of the image
        expand: whether the image should be padded to avoid information loss

    Returns:
        A batch of rotated polygons (N, 4, 2)
    """
    # Switch to polygons
    polys = (
        np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
        if geoms.ndim == 2
        else geoms
    )
    polys = polys.astype(np.float32)

    # Switch to image center as referential
    polys[..., 0] -= img_shape[1] / 2
    polys[..., 1] = img_shape[0] / 2 - polys[..., 1]

    # Rotated them around image center
    rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2)
    # Switch back to top-left corner as referential
    target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
    # Clip coords to fit since there is no expansion
    rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
    rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])

    return rotated_polys


def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray:
    """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
    This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
    coordinates after a resizing of the image.

    Args:
        loc_preds: (N, 4, 2) array of RELATIVE loc_preds
        orig_shape: shape of the origin image
        dest_shape: shape of the destination image

    Returns:
        A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
    """
    if len(dest_shape) != 2:
        raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
    if len(orig_shape) != 2:
        raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
    orig_height, orig_width = orig_shape
    dest_height, dest_width = dest_shape
    mboxes = loc_preds.copy()
    mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
    mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height

    return mboxes


def rotate_boxes(
    loc_preds: np.ndarray,
    angle: float,
    orig_shape: tuple[int, int],
    min_angle: float = 1.0,
    target_shape: tuple[int, int] | None = None,
) -> np.ndarray:
    """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes
    (4, 2) of an angle, if angle > min_angle, around the center of the page.
    If target_shape is specified, the boxes are remapped to the target shape after the rotation. This
    is done to remove the padding that is created by rotate_page(expand=True)

    Args:
        loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
        angle: angle between -90 and +90 degrees
        orig_shape: shape of the origin image
        min_angle: minimum angle to rotate boxes
        target_shape: shape of the destination image

    Returns:
        A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
    """
    # Change format of the boxes to rotated boxes
    _boxes = loc_preds.copy()
    if _boxes.ndim == 2:
        _boxes = np.stack(
            [
                _boxes[:, [0, 1]],
                _boxes[:, [2, 1]],
                _boxes[:, [2, 3]],
                _boxes[:, [0, 3]],
            ],
            axis=1,
        )
    # If small angle, return boxes (no rotation)
    if abs(angle) < min_angle or abs(angle) > 90 - min_angle:
        return _boxes
    # Compute rotation matrix
    angle_rad = angle * np.pi / 180.0  # compute radian angle for np functions
    rotation_mat = np.array(
        [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
    )
    # Rotate absolute points
    points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
    image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
    rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
    rotated_boxes: np.ndarray = np.stack(
        (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
    )

    # Apply a mask if requested
    if target_shape is not None:
        rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)

    return rotated_boxes


def rotate_image(
    image: np.ndarray,
    angle: float,
    expand: bool = False,
    preserve_origin_shape: bool = False,
) -> np.ndarray:
    """Rotate an image counterclockwise by an given angle.

    Args:
        image: numpy tensor to rotate
        angle: rotation angle in degrees, between -90 and +90
        expand: whether the image should be padded before the rotation
        preserve_origin_shape: if expand is set to True, resizes the final output to the original image size

    Returns:
        Rotated array, padded by 0 by default.
    """
    # Compute the expanded padding
    exp_img: np.ndarray
    if expand:
        exp_shape = compute_expanded_shape(image.shape[:2], angle)
        h_pad, w_pad = (
            int(max(0, ceil(exp_shape[0] - image.shape[0]))),
            int(max(0, ceil(exp_shape[1] - image.shape[1]))),
        )
        exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
    else:
        exp_img = image

    height, width = exp_img.shape[:2]
    rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
    rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height))
    if expand:
        # Pad to get the same aspect ratio
        if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
            # Pad width
            if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
                h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
            # Pad height
            else:
                h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
            rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
        if preserve_origin_shape:
            # rescale
            rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)

    return rot_img


def remove_image_padding(image: np.ndarray) -> np.ndarray:
    """Remove black border padding from an image

    Args:
        image: numpy tensor to remove padding from

    Returns:
        Image with padding removed
    """
    # Find the bounding box of the non-black region
    rows = np.any(image, axis=1)
    cols = np.any(image, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]

    return image[rmin : rmax + 1, cmin : cmax + 1]


def estimate_page_angle(polys: np.ndarray) -> float:
    """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
    estimated angle ccw in degrees
    """
    # Compute mean left points and mean right point with respect to the reading direction (oriented polygon)
    xleft = polys[:, 0, 0] + polys[:, 3, 0]
    yleft = polys[:, 0, 1] + polys[:, 3, 1]
    xright = polys[:, 1, 0] + polys[:, 2, 0]
    yright = polys[:, 1, 1] + polys[:, 2, 1]
    with np.errstate(divide="raise", invalid="raise"):
        try:
            return float(
                np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi)  # Y axis from top to bottom!
            )
        except FloatingPointError:
            return 0.0


def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray:
    """Convert a geometry to relative coordinates

    Args:
        geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
        img_shape: the height and width of the image

    Returns:
        the updated geometry
    """
    # Polygon
    if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
        polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
        polygons[..., 0] = geoms[..., 0] / img_shape[1]
        polygons[..., 1] = geoms[..., 1] / img_shape[0]
        return polygons.clip(0, 1)
    if geoms.ndim == 2 and geoms.shape[1] == 4:
        boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
        boxes[:, ::2] = geoms[:, ::2] / img_shape[1]
        boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0]
        return boxes.clip(0, 1)

    raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")


def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> list[np.ndarray]:
    """Created cropped images from list of bounding boxes

    Args:
        img: input image
        boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
            coordinates (xmin, ymin, xmax, ymax)
        channels_last: whether the channel dimensions is the last one instead of the last one

    Returns:
        list of cropped images
    """
    if boxes.shape[0] == 0:
        return []
    if boxes.shape[1] != 4:
        raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")

    # Project relative coordinates
    _boxes = boxes.copy()
    h, w = img.shape[:2] if channels_last else img.shape[-2:]
    if not np.issubdtype(_boxes.dtype, np.integer):
        _boxes[:, [0, 2]] *= w
        _boxes[:, [1, 3]] *= h
        _boxes = _boxes.round().astype(int)
        # Add last index
        _boxes[2:] += 1
    if channels_last:
        return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])

    return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes])


def extract_rcrops(
    img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
) -> list[np.ndarray]:
    """Created cropped images from list of rotated bounding boxes

    Args:
        img: input image
        polys: bounding boxes of shape (N, 4, 2)
        dtype: target data type of bounding boxes
        channels_last: whether the channel dimensions is the last one instead of the last one
        assume_horizontal: whether the boxes are assumed to be only horizontally oriented

    Returns:
        list of cropped images
    """
    if polys.shape[0] == 0:
        return []
    if polys.shape[1:] != (4, 2):
        raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")

    # Project relative coordinates
    _boxes = polys.copy()
    height, width = img.shape[:2] if channels_last else img.shape[-2:]
    if not np.issubdtype(_boxes.dtype, np.integer):
        _boxes[:, :, 0] *= width
        _boxes[:, :, 1] *= height

    src_img = img if channels_last else img.transpose(1, 2, 0)

    # Handle only horizontal oriented boxes
    if assume_horizontal:
        crops = []

        for box in _boxes:
            # Calculate the centroid of the quadrilateral
            centroid = np.mean(box, axis=0)

            # Divide the points into left and right
            left_points = box[box[:, 0] < centroid[0]]
            right_points = box[box[:, 0] >= centroid[0]]

            # Sort the left points according to the y-axis
            left_points = left_points[np.argsort(left_points[:, 1])]
            top_left_pt = left_points[0]
            bottom_left_pt = left_points[-1]
            # Sort the right points according to the y-axis
            right_points = right_points[np.argsort(right_points[:, 1])]
            top_right_pt = right_points[0]
            bottom_right_pt = right_points[-1]
            box_points = np.array(
                [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
                dtype=dtype,
            )

            # Get the width and height of the rectangle that will contain the warped quadrilateral
            width_upper = np.linalg.norm(top_right_pt - top_left_pt)
            width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
            height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
            height_right = np.linalg.norm(bottom_right_pt - top_right_pt)

            # Get the maximum width and height
            rect_width = max(int(width_upper), int(width_lower))
            rect_height = max(int(height_left), int(height_right))

            dst_pts = np.array(
                [
                    [0, 0],  # top-left
                    # bottom-left
                    [0, rect_height - 1],
                    # top-right
                    [rect_width - 1, 0],
                    # bottom-right
                    [rect_width - 1, rect_height - 1],
                ],
                dtype=dtype,
            )

            # Get the perspective transform matrix using the box points
            affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)

            # Perform the perspective warp to get the rectified crop
            crop = cv2.warpPerspective(
                src_img,
                affine_mat,
                (rect_width, rect_height),
            )

            # Add the crop to the list of crops
            crops.append(crop)

    # Handle any oriented boxes
    else:
        src_pts = _boxes[:, :3].astype(np.float32)
        # Preserve size
        d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
        d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
        # (N, 3, 2)
        dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
        dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
        dst_pts[:, 2, 1] = d2 - 1
        # Use a warp transformation to extract the crop
        crops = [
            cv2.warpAffine(
                src_img,
                # Transformation matrix
                cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
                (int(d1[idx]), int(d2[idx])),
            )
            for idx in range(_boxes.shape[0])
        ]

    return crops


================================================
FILE: onnxtr/utils/multithreading.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.


import multiprocessing as mp
import os
from collections.abc import Callable, Iterable, Iterator
from multiprocessing.pool import ThreadPool
from typing import Any

from onnxtr.file_utils import ENV_VARS_TRUE_VALUES

__all__ = ["multithread_exec"]


def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]:
    """Execute a given function in parallel for each element of a given sequence

    >>> from onnxtr.utils.multithreading import multithread_exec
    >>> entries = [1, 4, 8]
    >>> results = multithread_exec(lambda x: x ** 2, entries)

    Args:
        func: function to be executed on each element of the iterable
        seq: iterable
        threads: number of workers to be used for multiprocessing

    Returns:
        iterator of the function's results using the iterable as inputs

    Notes:
        This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
        If you do not have write permissions for this directory (if you run `onnxtr` on AWS Lambda for instance),
        you might want to disable multiprocessing. To achieve that, set 'ONNXTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
    """
    threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
    # Single-thread
    if threads < 2 or os.environ.get("ONNXTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
        results = map(func, seq)
    # Multi-threading
    else:
        with ThreadPool(threads) as tp:
            # ThreadPool's map function returns a list, but seq could be of a different type
            # That's why wrapping result in map to return iterator
            results = map(lambda x: x, tp.map(func, seq))  # noqa: C417
    return results


================================================
FILE: onnxtr/utils/reconstitution.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
import logging
from typing import Any

import numpy as np
from anyascii import anyascii
from PIL import Image, ImageDraw

from .fonts import get_font

__all__ = ["synthesize_page"]


# Global variable to avoid multiple warnings
ROTATION_WARNING = False


def _warn_rotation(entry: dict[str, Any]) -> None:  # pragma: no cover
    global ROTATION_WARNING
    if not ROTATION_WARNING and len(entry["geometry"]) == 4:
        logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
        ROTATION_WARNING = True


def _synthesize(
    response: Image.Image,
    entry: dict[str, Any],
    w: int,
    h: int,
    draw_proba: bool = False,
    font_family: str | None = None,
    smoothing_factor: float = 0.75,
    min_font_size: int = 6,
    max_font_size: int = 50,
) -> Image.Image:
    if len(entry["geometry"]) == 2:
        (xmin, ymin), (xmax, ymax) = entry["geometry"]
        polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
    else:
        polygon = entry["geometry"]

    # Calculate the bounding box of the word
    x_coords, y_coords = zip(*polygon)
    xmin, ymin, xmax, ymax = (
        int(round(w * min(x_coords))),
        int(round(h * min(y_coords))),
        int(round(w * max(x_coords))),
        int(round(h * max(y_coords))),
    )
    word_width = xmax - xmin
    word_height = ymax - ymin

    # If lines are provided instead of words, concatenate the word entries
    if "words" in entry:
        word_text = " ".join(word["value"] for word in entry["words"])
    else:
        word_text = entry["value"]
    # Find the optimal font size
    try:
        font_size = min(word_height, max_font_size)
        font = get_font(font_family, font_size)
        text_width, text_height = font.getbbox(word_text)[2:4]

        while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
            font_size = max(int(font_size * smoothing_factor), min_font_size)
            font = get_font(font_family, font_size)
            text_width, text_height = font.getbbox(word_text)[2:4]
    except ValueError:  # pragma: no cover
        font = get_font(font_family, min_font_size)

    # Create a mask for the word
    mask = Image.new("L", (w, h), 0)
    ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)

    # Draw the word text
    d = ImageDraw.Draw(response)
    try:
        try:
            d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
        except UnicodeEncodeError:  # pragma: no cover
            d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
    # Catch generic exceptions to avoid crashing the whole rendering
    except Exception:  # pragma: no cover
        logging.warning(f"Could not render word: {word_text}")

    if draw_proba:
        confidence = (
            entry["confidence"]
            if "confidence" in entry
            else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
        )
        p = int(255 * confidence)
        color = (255 - p, 0, p)  # Red to blue gradient based on probability
        d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)

        prob_font = get_font(font_family, 20)
        prob_text = f"{confidence:.2f}"
        prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]

        # Position the probability slightly above the bounding box
        prob_x_offset = (word_width - prob_text_width) // 2
        prob_y_offset = ymin - prob_text_height - 2
        prob_y_offset = max(0, prob_y_offset)

        d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")

    return response


def synthesize_page(
    page: dict[str, Any],
    draw_proba: bool = False,
    font_family: str | None = None,
    smoothing_factor: float = 0.95,
    min_font_size: int = 8,
    max_font_size: int = 50,
) -> np.ndarray:
    """Draw a the content of the element page (OCR response) on a blank page.

    Args:
        page: exported Page object to represent
        draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
        font_family: family of the font
        smoothing_factor: factor to smooth the font size
        min_font_size: minimum font size
        max_font_size: maximum font size

    Returns:
        the synthesized page
    """
    # Draw template
    h, w = page["dimensions"]
    response = Image.new("RGB", (w, h), color=(255, 255, 255))

    for block in page["blocks"]:
        # If lines are provided use these to get better rendering results
        if len(block["lines"]) > 1:
            for line in block["lines"]:
                _warn_rotation(block)  # pragma: no cover
                response = _synthesize(
                    response=response,
                    entry=line,
                    w=w,
                    h=h,
                    draw_proba=draw_proba,
                    font_family=font_family,
                    smoothing_factor=smoothing_factor,
                    min_font_size=min_font_size,
                    max_font_size=max_font_size,
                )
        # Otherwise, draw each word
        else:
            for line in block["lines"]:
                _warn_rotation(block)  # pragma: no cover
                for word in line["words"]:
                    response = _synthesize(
                        response=response,
                        entry=word,
                        w=w,
                        h=h,
                        draw_proba=draw_proba,
                        font_family=font_family,
                        smoothing_factor=smoothing_factor,
                        min_font_size=min_font_size,
                        max_font_size=max_font_size,
                    )

    return np.array(response, dtype=np.uint8)


================================================
FILE: onnxtr/utils/repr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

# Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py


__all__ = ["NestedObject"]


def _addindent(s_, num_spaces):
    s = s_.split("\n")
    # don't do anything for single-line stuff
    if len(s) == 1:
        return s_
    first = s.pop(0)
    s = [(num_spaces * " ") + line for line in s]
    s = "\n".join(s)
    s = first + "\n" + s
    return s


class NestedObject:
    """Base class for all nested objects in onnxtr"""

    _children_names: list[str]

    def extra_repr(self) -> str:
        return ""

    def __repr__(self):
        # We treat the extra repr like the sub-object, one item per line
        extra_lines = []
        extra_repr = self.extra_repr()
        # empty string will be split into list ['']
        if extra_repr:
            extra_lines = extra_repr.split("\n")
        child_lines = []
        if hasattr(self, "_children_names"):
            for key in self._children_names:
                child = getattr(self, key)
                if isinstance(child, list) and len(child) > 0:
                    child_str = ",\n".join([repr(subchild) for subchild in child])
                    if len(child) > 1:
                        child_str = _addindent(f"\n{child_str},", 2) + "\n"
                    child_str = f"[{child_str}]"
                else:
                    child_str = repr(child)
                child_str = _addindent(child_str, 2)
                child_lines.append("(" + key + "): " + child_str)
        lines = extra_lines + child_lines

        main_str = self.__class__.__name__ + "("
        if lines:
            # simple one-liner info, which most builtin Modules will use
            if len(extra_lines) == 1 and not child_lines:
                main_str += extra_lines[0]
            else:
                main_str += "\n  " + "\n  ".join(lines) + "\n"

        main_str += ")"
        return main_str


================================================
FILE: onnxtr/utils/visualization.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from copy import deepcopy
from typing import Any

import cv2
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.figure import Figure

from .common_types import BoundingBox, Polygon4P

__all__ = ["visualize_page", "draw_boxes"]


def rect_patch(
    geometry: BoundingBox,
    page_dimensions: tuple[int, int],
    label: str | None = None,
    color: tuple[float, float, float] = (0, 0, 0),
    alpha: float = 0.3,
    linewidth: int = 2,
    fill: bool = True,
    preserve_aspect_ratio: bool = False,
) -> patches.Rectangle:
    """Create a matplotlib rectangular patch for the element

    Args:
        geometry: bounding box of the element
        page_dimensions: dimensions of the Page in format (height, width)
        label: label to display when hovered
        color: color to draw box
        alpha: opacity parameter to fill the boxes, 0 = transparent
        linewidth: line width
        fill: whether the patch should be filled
        preserve_aspect_ratio: pass True if you passed True to the predictor

    Returns:
        a rectangular Patch
    """
    if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
        raise ValueError("invalid geometry format")

    # Unpack
    height, width = page_dimensions
    (xmin, ymin), (xmax, ymax) = geometry
    # Switch to absolute coords
    if preserve_aspect_ratio:
        width = height = max(height, width)
    xmin, w = xmin * width, (xmax - xmin) * width
    ymin, h = ymin * height, (ymax - ymin) * height

    return patches.Rectangle(
        (xmin, ymin),
        w,
        h,
        fill=fill,
        linewidth=linewidth,
        edgecolor=(*color, alpha),
        facecolor=(*color, alpha),
        label=label,
    )


def polygon_patch(
    geometry: np.ndarray,
    page_dimensions: tuple[int, int],
    label: str | None = None,
    color: tuple[float, float, float] = (0, 0, 0),
    alpha: float = 0.3,
    linewidth: int = 2,
    fill: bool = True,
    preserve_aspect_ratio: bool = False,
) -> patches.Polygon:
    """Create a matplotlib polygon patch for the element

    Args:
        geometry: bounding box of the element
        page_dimensions: dimensions of the Page in format (height, width)
        label: label to display when hovered
        color: color to draw box
        alpha: opacity parameter to fill the boxes, 0 = transparent
        linewidth: line width
        fill: whether the patch should be filled
        preserve_aspect_ratio: pass True if you passed True to the predictor

    Returns:
        a polygon Patch
    """
    if not geometry.shape == (4, 2):
        raise ValueError("invalid geometry format")

    # Unpack
    height, width = page_dimensions
    geometry[:, 0] = geometry[:, 0] * (max(width, height) if preserve_aspect_ratio else width)
    geometry[:, 1] = geometry[:, 1] * (max(width, height) if preserve_aspect_ratio else height)

    return patches.Polygon(
        geometry,
        fill=fill,
        linewidth=linewidth,
        edgecolor=(*color, alpha),
        facecolor=(*color, alpha),
        label=label,
    )


def create_obj_patch(
    geometry: BoundingBox | Polygon4P | np.ndarray,
    page_dimensions: tuple[int, int],
    **kwargs: Any,
) -> patches.Patch:
    """Create a matplotlib patch for the element

    Args:
        geometry: bounding box (straight or rotated) of the element
        page_dimensions: dimensions of the page in format (height, width)
        **kwargs: keyword arguments for the patch

    Returns:
        a matplotlib Patch
    """
    if isinstance(geometry, tuple):
        if len(geometry) == 2:  # straight word BB (2 pts)
            return rect_patch(geometry, page_dimensions, **kwargs)
        elif len(geometry) == 4:  # rotated word BB (4 pts)
            return polygon_patch(np.asarray(geometry), page_dimensions, **kwargs)
    elif isinstance(geometry, np.ndarray) and geometry.shape == (4, 2):  # rotated line
        return polygon_patch(geometry, page_dimensions, **kwargs)
    raise ValueError("invalid geometry format")


def visualize_page(
    page: dict[str, Any],
    image: np.ndarray,
    words_only: bool = True,
    display_artefacts: bool = True,
    scale: float = 10,
    interactive: bool = True,
    add_labels: bool = True,
    **kwargs: Any,
) -> Figure:
    """Visualize a full page with predicted blocks, lines and words

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> from onnxtr.utils.visualization import visualize_page
    >>> from onnxtr.models import ocr_db_crnn
    >>> model = ocr_db_crnn()
    >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
    >>> out = model([[input_page]])
    >>> visualize_page(out[0].pages[0].export(), input_page)
    >>> plt.show()

    Args:
        page: the exported Page of a Document
        image: np array of the page, needs to have the same shape than page['dimensions']
        words_only: whether only words should be displayed
        display_artefacts: whether artefacts should be displayed
        scale: figsize of the largest windows side
        interactive: whether the plot should be interactive
        add_labels: for static plot, adds text labels on top of bounding box
        **kwargs: keyword arguments for the polygon patch

    Returns:
        the matplotlib figure
    """
    # Get proper scale and aspect ratio
    h, w = image.shape[:2]
    size = (scale * w / h, scale) if h > w else (scale, h / w * scale)
    fig, ax = plt.subplots(figsize=size)
    # Display the image
    ax.imshow(image)
    # hide both axis
    ax.axis("off")

    if interactive:
        artists: list[patches.Patch] = []  # instantiate an empty list of patches (to be drawn on the page)

    for block in page["blocks"]:
        if not words_only:
            rect = create_obj_patch(
                block["geometry"], page["dimensions"], label="block", color=(0, 1, 0), linewidth=1, **kwargs
            )
            # add patch on figure
            ax.add_patch(rect)
            if interactive:
                # add patch to cursor's artists
                artists.append(rect)

        for line in block["lines"]:
            if not words_only:
                rect = create_obj_patch(
                    line["geometry"], page["dimensions"], label="line", color=(1, 0, 0), linewidth=1, **kwargs
                )
                ax.add_patch(rect)
                if interactive:
                    artists.append(rect)

            for word in line["words"]:
                rect = create_obj_patch(
                    word["geometry"],
                    page["dimensions"],
                    label=f"{word['value']} (confidence: {word['confidence']:.2%})",
                    color=(0, 0, 1),
                    **kwargs,
                )
                ax.add_patch(rect)
                if interactive:
                    artists.append(rect)
                elif add_labels:
                    if len(word["geometry"]) == 5:
                        text_loc = (
                            int(page["dimensions"][1] * (word["geometry"][0] - word["geometry"][2] / 2)),
                            int(page["dimensions"][0] * (word["geometry"][1] - word["geometry"][3] / 2)),
                        )
                    else:
                        text_loc = (
                            int(page["dimensions"][1] * word["geometry"][0][0]),
                            int(page["dimensions"][0] * word["geometry"][0][1]),
                        )

                    if len(word["geometry"]) == 2:
                        # We draw only if boxes are in straight format
                        ax.text(
                            *text_loc,
                            word["value"],
                            size=10,
                            alpha=0.5,
                            color=(0, 0, 1),
                        )

        if display_artefacts:
            for artefact in block["artefacts"]:
                rect = create_obj_patch(
                    artefact["geometry"],
                    page["dimensions"],
                    label="artefact",
                    color=(0.5, 0.5, 0.5),
                    linewidth=1,
                    **kwargs,
                )
                ax.add_patch(rect)
                if interactive:
                    artists.append(rect)

    if interactive:
        import mplcursors

        # Create mlp Cursor to hover patches in artists
        mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
    fig.tight_layout(pad=0.0)

    return fig


def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None:
    """Draw an array of relative straight boxes on an image

    Args:
        boxes: array of relative boxes, of shape (*, 4)
        image: np array, float32 or uint8
        color: color to use for bounding box edges
        **kwargs: keyword arguments from `matplotlib.pyplot.plot`
    """
    h, w = image.shape[:2]
    # Convert boxes to absolute coords
    _boxes = deepcopy(boxes)
    _boxes[:, [0, 2]] *= w
    _boxes[:, [1, 3]] *= h
    _boxes = _boxes.astype(np.int32)
    for box in _boxes.tolist():
        xmin, ymin, xmax, ymax = box
        image = cv2.rectangle(
            image,
            (xmin, ymin),
            (xmax, ymax),
            color=color if isinstance(color, tuple) else (0, 0, 255),
            thickness=2,
        )
    plt.imshow(image)
    plt.plot(**kwargs)


================================================
FILE: onnxtr/utils/vocabs.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import re
import string

__all__ = ["VOCABS"]

_BASE_VOCABS = {
    # Latin
    "digits": string.digits,
    "ascii_letters": string.ascii_letters,
    "punctuation": string.punctuation,
    "currency": "£€¥¢฿",
    # Cyrillic
    "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
    "russian_cyrillic_letters": "ёыэЁЫЭ",
    "russian_signs": "ъЪ",
    # Greek
    "ancient_greek": "αβγδεζηθικλμνξοπρστςυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
    # Arabic & Persian
    "arabic_diacritics": "".join(["ً", "ٌ", "ٍ", "َ", "ُ", "ِ", "ّ", "ْ", "ٕ", "ٓ", "ٔ", "ٚ"]),
    "arabic_digits": "٠١٢٣٤٥٦٧٨٩",
    "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىيٱ",
    "arabic_punctuation": "؟؛«»—،",
    "persian_letters": "پچژڢڤگکی",
    # Bengali
    "bengali_consonants": "কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহড়ঢ়য়ৰৱৼ",
    "bengali_vowels": "অআইঈউঊঋঌএঐওঔৠৡ",
    "bengali_digits": "০১২৩৪৫৬৭৮৯",
    "bengali_matras": "".join(["া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "ৗ"]),
    "bengali_virama": "্",
    "bengali_punctuation": "ঽৎ৽৺৻",
    "bengali_signs": "".join(["ঁ", "ং", "ঃ", "়"]),
    # Gujarati
    "gujarati_consonants": "કખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહ",
    "gujarati_vowels": "અઆઇઈઉઊઋઌઍએઐઑઓઔ",
    "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯",
    "gujarati_matras": "".join([
        "ઁ",
        "ં",
        "ઃ",
        "઼",
        "ા",
        "િ",
        "ી",
        "ુ",
        "ૂ",
        "ૃ",
        "ૄ",
        "ૅ",
        "ે",
        "ૈ",
        "ૉ",
        "ો",
        "ૌ",
        "ૢ",
        "ૣ",
        "ૺ",
        "ૻ",
        "ૼ",
        "૽",
        "૾",
        "૿",
    ]),
    "gujarati_virama": "્",
    "gujarati_punctuation": "ઽ॥",
    "gujarati_signs": "ૐ૰",
    # Devanagari
    "devanagari_consonants": "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहऴऩळक़ख़ग़ज़ड़ढ़फ़य़ऱॺॻॼॽॾ",
    "devanagari_vowels": "अआइईउऊऋऌऍऎएऐऑऒओऔॠॡॲऄॵॶॳॴॷॸॹ",
    "devanagari_digits": "०१२३४५६७८९",
    "devanagari_matras": "".join([
        "़",
        "ं",
        "ँ",
        "ः",
        "॑",
        "॒",
        "ा",
        "ि",
        "ी",
        "ु",
        "ू",
        "ृ",
        "ॄ",
        "ॅ",
        "ॆ",
        "े",
        "ै",
        "ॉ",
        "ॊ",
        "ो",
        "ौ",
        "ॢ",
        "ॣ",
        "ॏ",
        "ॎ",
    ]),
    "devanagari_virama": "्",
    "devanagari_punctuation": "।॥॰ऽꣲ",
    "devanagari_signs": "ॐ",
    # Punjabi (Gurmukhi script)
    "punjabi_consonants": "ਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸ਼ਸਹਖ਼ਗ਼ਜ਼ਫ਼ੜਲ਼",
    "punjabi_vowels": "ਅਆਇਈਉਊਏਐਓਔੲੳ",
    "punjabi_digits": "੦੧੨੩੪੫੬੭੮੯",
    "punjabi_matras": "".join(["ਂ", "਼", "ਾ", "ਿ", "ੀ", "ੁ", "ੂ", "ੇ", "ੈ", "ੋ", "ੌ", "ੑ", "ੰ", "ੱ", "ੵ"]),
    "punjabi_virama": "੍",
    "punjabi_punctuation": "।॥",
    "punjabi_signs": "ੴ",
    # Tamil
    "tamil_consonants": "கஙசஞடணதநபமயரலவழளறன",
    "tamil_vowels": "அஆஇஈஉஊஎஏஐஒஓஔ",
    "tamil_digits": "௦௧௨௩௪௫௬௭௮௯",
    "tamil_matras": "".join(["ா", "ி", "ீ", "ு", "ூ", "ெ", "ே", "ை", "ொ", "ோ", "ௌ"]),
    "tamil_virama": "்",
    "tamil_punctuation": "௰௱௲",
    "tamil_signs": "ஃௐ",
    "tamil_fractions": "௳௴௵௶௷௸௹௺",
    # Telugu
    "telugu_consonants": "కఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళవశషసహఴ",
    "telugu_digits": "౦౧౨౩౪౫౬౭౮౯" + "౸౹౺౻",  # Telugu digits and fractional digits
    "telugu_vowels": "అఆఇఈఉఊఋఌఎఏఐఒఓఔౠౡ",
    "telugu_matras": "".join(["ా", "ి", "ీ", "ు", "ూ", "ృ", "ౄ", "ె", "ే", "ై", "ొ", "ో", "ౌ", "ౢ", "ౣ"]),
    "telugu_virama": "్",
    "telugu_punctuation": "ఽ",
    "telugu_signs": "".join(["ఁ", "ం", "ః"]),
    # Kannada
    "kannada_consonants": "ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಶಷಸಹಳ",
    "kannada_vowels": "ಅಆಇಈಉಊಋॠಌೡಎಏಐಒಓಔ",
    "kannada_digits": "೦೧೨೩೪೫೬೭೮೯",
    "kannada_matras": "".join(["ಾ", "ಿ", "ೀ", "ು", "ೂ", "ೃ", "ೄ", "ೆ", "ೇ", "ೈ", "ೊ", "ೋ", "ೌ"]),
    "kannada_virama": "್",
    "kannada_punctuation": "।॥ೱೲ",
    "kannada_signs": "".join(["ಂ", "ಃ", "ಁ"]),
    # Sinhala
    "sinhala_consonants": "කඛගඝඞචඡජඣඤටඨඩඪණතථදධනපඵබභමයරලවශෂසහළෆ",
    "sinhala_vowels": "අආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖ",
    "sinhala_digits": "෦෧෨෩෪෫෬෭෮෯",
    "sinhala_matras": "".join(["ා", "ැ", "ෑ", "ි", "ී", "ු", "ූ", "ෙ", "ේ", "ෛ", "ො", "ෝ", "ෞ"]),
    "sinhala_virama": "්",
    "sinhala_punctuation": "෴",
    "sinhala_signs": "".join(["ං", "ඃ"]),
    # Malayalam
    "malayalam_consonants": "കഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരറലളഴവശഷസഹ",
    "malayalam_vowels": "അആഇഈഉഊഋൠഌൡഎഏഐഒഓഔ",
    "malayalam_digits": "൦൧൨൩൪൫൬൭൮൯",
    "malayalam_matras": "".join(["ാ", "ി", "ീ", "ു", "ൂ", "ൃ", "ൄ", "ൢ", "ൣ", "െ", "േ", "ൈ", "ൊ", "ോ", "ൌ"]),
    "malayalam_virama": "്",
    "malayalam_signs": "".join(["ഃ", "൹", "ഽ", "൏", "ം"]),
    # Odia (Oriya)
    "odia_consonants": "କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳଵଶଷସହୟୱଡ଼ଢ଼",
    "odia_vowels": "ଅଆଇଈଉଊଋଌଏଐଓଔୡୠ",
    "odia_digits": "୦୧୨୩୪୫୬୭୮୯" + "୲୳୴୵୶୷",  # Odia digits and fractional digits
    "odia_matras": "".join(["ା", "ି", "ୀ", "ୁ", "ୂ", "ୃ", "ୄ", "େ", "ୈ", "ୋ", "ୌ", "ୢ", "ୣ"]),
    "odia_virama": "୍",
    "odia_punctuation": "ଽ",
    "odia_signs": "".join(["ଂ", "ଃ", "ଁ", "଼", "୰"]),
    # Khmer
    "khmer_consonants": "កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ",
    "khmer_vowels": "ឣឤឥឦឧឨឩឪឫឬឭឮឯឰឱឲឳ",
    "khmer_digits": "០១២៣៤៥៦៧៨៩",
    "khmer_matras": "".join(["ា", "ិ", "ី", "ឹ", "ឺ", "ុ", "ូ", "ួ", "ើ", "ឿ", "ៀ", "េ", "ែ", "ៃ", "ោ", "ៅ"]),
    "khmer_diacritics": "".join(["ំ", "ះ", "ៈ", "៉", "៊", "់", "៌", "៍", "៎", "៏", "័", "៑", "៓", "៝"]),
    "khmer_virama": "្",
    "khmer_punctuation": "។៕៖៘៙៚ៗៜ",
    # Burmese
    "burmese_consonants": "ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအၐၑၒၓၔၕၚၛၜၝၡၥၦၮၯၰၵၶၷၸၹၺၻၼၽၾၿႀႁႎ",
    "burmese_vowels": "ဣဤဥဦဧဩဪဿ",
    "burmese_digits": "၀၁၂၃၄၅၆၇၈၉" + "႐႑႒႓႔႕႖႗႘႙",  # Burmese digits and Shan digits
    "burmese_diacritics": "".join(["့", "း", "ံ", "ါ", "ာ", "ိ", "ီ", "ု", "ူ", "ေ", "ဲ", "ဳ", "ဴ", "ဵ", "ျြွှ"]),  # းံါာိီုူေဲံ့းှျြွှ
    #  ္ (virama) and ် (final consonant) - the first is used to stack consonants, the second is used for final consonants
    "burmese_virama": "".join([
        "္",
        "်",
    ]),
    "burmese_punctuation": "၊။၌၍၎၏" + "ၤ" + "ၗ",  # Includes ၗ and ၤ
    # Javanese
    "javanese_consonants": "ꦏꦐꦑꦒꦓꦔꦕꦖꦗꦘꦙꦚꦛꦜꦝꦞꦟꦠꦡꦢꦣꦤꦥꦦꦧꦨꦩꦪꦫꦬꦭꦮꦯꦰꦱꦲ",
    "javanese_vowels": "ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ" + "ꦴꦵꦶꦷꦸꦹꦺꦻꦼ",  # sec: Dependent vowels ꦴꦵꦶꦷꦸꦹꦺꦻꦼ
    "javanese_digits": "꧐꧑꧒꧓꧔꧕꧖꧗꧘꧙",
    "javanese_diacritics": "".join(["ꦀ", "ꦁ", "ꦂ", "ꦃ", "꦳", "ꦽ", "ꦾ", "ꦿ"]),  # ꦀꦁꦂꦃ꦳ꦽꦾꦿ
    "javanese_virama": "꧀",
    "javanese_punctuation": "".join(["꧈", "꧉", "꧊", "꧋", "꧌", "꧍", "ꧏ"]),
    # Sudanese
    "sudanese_consonants": "ᮊᮋᮌᮍᮎᮏᮐᮑᮒᮓᮔᮕᮖᮗᮘᮙᮚᮛᮜᮝᮞᮟᮠᮮᮯᮺᮻᮼᮽᮾᮿ",
    "sudanese_vowels": "ᮃᮄᮅᮆᮇᮈᮉ",
    "sudanese_digits": "᮰᮱᮲᮳᮴᮵᮶᮷᮸᮹",
    "sudanese_diacritics": "".join(["ᮀ", "ᮁ", "ᮂ", "ᮡ", "ᮢ", "ᮣ", "ᮤ", "ᮥ", "ᮦ", "ᮧ", "ᮨ", "ᮩ", "᮪", "᮫", "ᮬ", "ᮭ"]),  # "ᮀᮁᮂᮡᮢᮣᮤᮥᮦᮧᮨᮩ᮪᮫ᮬᮭ"
    # Hebrew
    "hebrew_cantillations": "".join([
        "֑",
        "֒",
        "֓",
        "֔",
        "֕",
        "֖",
        "֗",
        "֘",
        "֙",
        "֚",
        "֛",
        "֜",
        "֝",
        "֞",
        "֟",
        "֠",
        "֡",
        "֢",
        "֣",
        "֤",
        "֥",
        "֦",
        "֧",
        "֨",
        "֩",
        "֪",
        "֫",
        "֬",
        "֭",
        "֮",
        "֯",
    ]),
    "hebrew_consonants": "אבגדהוזחטיךכלםמןנסעףפץצקרשת",
    "hebrew_specials": "ׯװױײיִﬞײַﬠﬡﬢﬣﬤﬥﬦﬧﬨ﬩שׁשׂשּׁשּׂאַאָאּבּגּדּהּוּזּטּיּךּכּלּמּנּסּףּפּצּקּרּשּתּוֹבֿכֿפֿﭏ",
    "hebrew_punctuation": "".join(["ֽ", "־", "ֿ", "׀", "ׁ", "ׂ", "׃", "ׄ", "ׅ", "׆", "׳", "״"]),
    "hebrew_vowels": "".join(["ְ", "ֱ", "ֲ", "ֳ", "ִ", "ֵ", "ֶ", "ַ", "ָ", "ֹ", "ֺ", "ֻ", "ׇ"]),
}


VOCABS: dict[str, str] = {}

for key, value in _BASE_VOCABS.items():
    VOCABS[key] = value

# Latin & latin-dependent alphabets
VOCABS["latin"] = _BASE_VOCABS["digits"] + _BASE_VOCABS["ascii_letters"] + _BASE_VOCABS["punctuation"]
VOCABS["english"] = VOCABS["latin"] + "°" + _BASE_VOCABS["currency"]

VOCABS["albanian"] = VOCABS["english"] + "çëÇË"

VOCABS["afrikaans"] = VOCABS["english"] + "èëïîôûêÈËÏÎÔÛÊ"

VOCABS["azerbaijani"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çəğöşüÇƏĞÖŞÜ" + "₼"

VOCABS["basque"] = VOCABS["english"] + "ñçÑÇ"

VOCABS["bosnian"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ"

VOCABS["catalan"] = VOCABS["english"] + "àèéíïòóúüçÀÈÉÍÏÒÓÚÜÇ"

VOCABS["croatian"] = VOCABS["english"] + "ČčĆćĐđŠšŽž"

VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"

VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"

VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"

VOCABS["estonian"] = VOCABS["english"] + "šžõäöüŠŽÕÄÖÜ"

VOCABS["esperanto"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "ĉĝĥĵŝŭĈĜĤĴŜŬ" + "₷"

VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"

VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"

VOCABS["frisian"] = re.sub(r"[QqXx]", "", VOCABS["english"]) + "âêôûúÂÊÔÛÚ" + "ƒƑ"

VOCABS["galician"] = re.sub(r"[JjKkWw]", "", VOCABS["english"]) + "ñÑçÇ"

VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"

VOCABS["hausa"] = re.sub(r"[PpQqVvXx]", "", VOCABS["english"]) + "ɓɗƙƴƁƊƘƳ" + "₦"

VOCABS["hungarian"] = VOCABS["english"] + "áéíóöúüÁÉÍÓÖÚÜ"

VOCABS["icelandic"] = re.sub(r"[CcQqWw]", "", VOCABS["english"]) + "ðáéíóúýþæöÐÁÉÍÓÚÝÞÆÖ"

VOCABS["indonesian"] = VOCABS["english"]

VOCABS["irish"] = VOCABS["english"] + "áéíóúÁÉÍÓÚ"

VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"

VOCABS["latvian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "āčēģīķļņšūžĀČĒĢĪĶĻŅŠŪŽ"

VOCABS["lithuanian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "ąčęėįšųūžĄČĘĖĮŠŲŪŽ"

VOCABS["luxembourgish"] = VOCABS["english"] + "äöüéëÄÖÜÉË"

VOCABS["malagasy"] = re.sub(r"[CcQqUuWwXx]", "", VOCABS["english"]) + "ôñÔÑ"

VOCABS["malay"] = VOCABS["english"]

VOCABS["maltese"] = re.sub(r"[CcYy]", "", VOCABS["english"]) + "ċġħżĊĠĦŻ"

VOCABS["maori"] = re.sub(r"[BbCcDdFfJjLlOoQqSsVvXxYyZz]", "", VOCABS["english"]) + "āēīōūĀĒĪŌŪ"

VOCABS["montenegrin"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćšžźČĆŠŚŽŹ"

VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"

VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"

VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"

VOCABS["quechua"] = re.sub(r"[BbDdFfGgJjVvXxZz]", "", VOCABS["english"]) + "ñÑĉĈçÇ"

VOCABS["romanian"] = VOCABS["english"] + "ăâîșțĂÂÎȘȚ"

VOCABS["scottish_gaelic"] = re.sub(r"[JjKkQqVvWwXxYyZz]", "", VOCABS["english"]) + "àèìòùÀÈÌÒÙ"

VOCABS["serbian_latin"] = VOCABS["english"] + "čćđžšČĆĐŽŠ"

VOCABS["slovak"] = VOCABS["english"] + "ôäčďľňšťžáéíĺóŕúýÔÄČĎĽŇŠŤŽÁÉÍĹÓŔÚÝ"

VOCABS["slovene"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ"

VOCABS["somali"] = re.sub(r"[PpVvZz]", "", VOCABS["english"])

VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"

VOCABS["swahili"] = re.sub(r"[QqXx]", "", VOCABS["english"])

VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"

VOCABS["tagalog"] = re.sub(r"[CcQqWwXx]", "", VOCABS["english"]) + "ñÑ" + "₱"

VOCABS["turkish"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "çğıöşüâîûÇĞİÖŞÜÂÎÛ" + "₺"

VOCABS["uzbek_latin"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çğɉñöşÇĞɈÑÖŞ"

VOCABS["vietnamese"] = (
    VOCABS["english"]
    + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ"
    + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ"
    + "₫"  # currency
)

VOCABS["welsh"] = re.sub(r"[KkQqVvXxZz]", "", VOCABS["english"]) + "âêîôŵŷÂÊÎÔŴŶ"

VOCABS["yoruba"] = re.sub(r"[CcQqVvXxZz]", "", VOCABS["english"]) + "ẹọṣẸỌṢ" + "₦"

VOCABS["zulu"] = VOCABS["english"]

# Non-latin alphabets.

# Cyrillic
VOCABS["russian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_cyrillic_letters"]
    + _BASE_VOCABS["russian_signs"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "₽"
)

VOCABS["belarusian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_cyrillic_letters"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ўiЎI"
    + "₽"
)

VOCABS["ukrainian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ґіїєҐІЇЄ"
    + "₴"
)

VOCABS["tatar"] = VOCABS["russian"] + "ӘәҖҗҢңӨөҮү"

VOCABS["tajik"] = VOCABS["russian"].replace("₽", "") + "ҒғҚқҲҳҶҷӢӣӮӯ"

VOCABS["kazakh"] = VOCABS["russian"].replace("₽", "") + "ӘәҒғҚқҢңӨөҰұҮүҺһІі" + "₸"

VOCABS["kyrgyz"] = VOCABS["russian"].replace("₽", "") + "ҢңӨөҮү"

VOCABS["bulgarian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_signs"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
)

VOCABS["macedonian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ЃѓЅѕЈјЉљЊњЌќЏџ"
)

VOCABS["mongolian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_cyrillic_letters"]
    + _BASE_VOCABS["russian_signs"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ӨөҮү"
    + "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙"  # Mongolian digits
    + "₮"
)

VOCABS["yakut"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_cyrillic_letters"]
    + _BASE_VOCABS["russian_signs"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ҔҕҤҥӨөҺһҮү"
    + "₽"
)

VOCABS["serbian_cyrillic"] = (
    "абвгдежзиклмнопрстуфхцчшАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШ"  # limited cyrillic
    + "JjЂђЉљЊњЋћЏџ"  # Serbian specials
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
)

VOCABS["uzbek_cyrillic"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["russian_cyrillic_letters"]
    + _BASE_VOCABS["russian_signs"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ЎўҚқҒғҲҳ"
)

VOCABS["ukrainian"] = (
    _BASE_VOCABS["generic_cyrillic_letters"]
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["currency"]
    + "ґіїєҐІЇЄ₴"
)

# Greek
VOCABS["greek"] = (
    _BASE_VOCABS["punctuation"] + _BASE_VOCABS["ancient_greek"] + _BASE_VOCABS["currency"] + "άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ"
)
VOCABS["greek_extended"] = (
    VOCABS["greek"]
    + "ͶͷϜϝἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿ"
    + "ὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὪὫὬὭὮὯὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐ"
    + "ᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺᾼῂῃῄῆῇῈῊῌῒΐῖῗῚῢΰῤῥῦῧῪῬῲῳῴῶῷῸῺῼ"
)

# Hebrew
VOCABS["hebrew"] = (
    _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + _BASE_VOCABS["hebrew_consonants"]
    + _BASE_VOCABS["hebrew_vowels"]
    + _BASE_VOCABS["hebrew_punctuation"]
    + _BASE_VOCABS["hebrew_cantillations"]
    + _BASE_VOCABS["hebrew_specials"]
    + "₪"
)

# Arabic
VOCABS["arabic"] = (
    _BASE_VOCABS["digits"]
    + _BASE_VOCABS["arabic_digits"]
    + _BASE_VOCABS["arabic_letters"]
    + _BASE_VOCABS["persian_letters"]
    + _BASE_VOCABS["arabic_diacritics"]
    + _BASE_VOCABS["arabic_punctuation"]
    + _BASE_VOCABS["punctuation"]
)

VOCABS["persian"] = VOCABS["arabic"]

VOCABS["urdu"] = VOCABS["persian"] + "ٹڈڑںھےہۃ"

VOCABS["pashto"] = VOCABS["persian"] + "ټډړږښځڅڼېۍ"

VOCABS["kurdish"] = VOCABS["persian"] + "ڵڕۆێە"

VOCABS["uyghur"] = VOCABS["persian"] + "ەېۆۇۈڭھ"

VOCABS["sindhi"] = VOCABS["persian"] + "ڀٿٺٽڦڄڃڇڏڌڊڍڙڳڱڻھ"

# Indic scripts
# Rules:
# Any consonant can be "combined" with any matra
# The virama is used to create consonant clusters - so C + Virama + C = CC

# Devanagari based
VOCABS["devanagari"] = (
    _BASE_VOCABS["devanagari_consonants"]
    + _BASE_VOCABS["devanagari_vowels"]
    + _BASE_VOCABS["devanagari_digits"]
    + _BASE_VOCABS["devanagari_matras"]
    + _BASE_VOCABS["devanagari_virama"]
    + _BASE_VOCABS["devanagari_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Devanagari
    + "₹"  # currency
)

VOCABS["hindi"] = VOCABS["devanagari"]

VOCABS["sanskrit"] = VOCABS["devanagari"]

VOCABS["marathi"] = VOCABS["devanagari"]

VOCABS["nepali"] = VOCABS["devanagari"]

# Gujarati
VOCABS["gujarati"] = (
    _BASE_VOCABS["gujarati_consonants"]
    + _BASE_VOCABS["gujarati_vowels"]
    + _BASE_VOCABS["gujarati_digits"]
    + _BASE_VOCABS["gujarati_matras"]
    + _BASE_VOCABS["gujarati_virama"]
    + _BASE_VOCABS["gujarati_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Gujarati
    + _BASE_VOCABS["gujarati_signs"]
    + "૱"  # currency
)

# Bengali
VOCABS["bengali"] = (
    _BASE_VOCABS["bengali_consonants"]
    + _BASE_VOCABS["bengali_vowels"]
    + _BASE_VOCABS["bengali_digits"]
    + _BASE_VOCABS["bengali_matras"]
    + _BASE_VOCABS["bengali_virama"]
    + _BASE_VOCABS["bengali_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Bengali
    + _BASE_VOCABS["bengali_signs"]
    + "৳"  # currency
)

# Brahmic scripts
VOCABS["tamil"] = (
    _BASE_VOCABS["tamil_consonants"]
    + _BASE_VOCABS["tamil_vowels"]
    + _BASE_VOCABS["tamil_digits"]
    + _BASE_VOCABS["tamil_matras"]
    + _BASE_VOCABS["tamil_virama"]
    + _BASE_VOCABS["tamil_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Tamil
    + _BASE_VOCABS["tamil_fractions"]  # This is a Tamil-specific addition
    + _BASE_VOCABS["tamil_signs"]
    + "₹"  # currency
)

VOCABS["telugu"] = (
    _BASE_VOCABS["telugu_consonants"]
    + _BASE_VOCABS["telugu_vowels"]
    + _BASE_VOCABS["telugu_digits"]
    + _BASE_VOCABS["telugu_matras"]
    + _BASE_VOCABS["telugu_virama"]
    + _BASE_VOCABS["telugu_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Telugu
    + _BASE_VOCABS["telugu_signs"]
    + "₹"  # currency
)

VOCABS["kannada"] = (
    _BASE_VOCABS["kannada_consonants"]
    + _BASE_VOCABS["kannada_vowels"]
    + _BASE_VOCABS["kannada_digits"]
    + _BASE_VOCABS["kannada_matras"]
    + _BASE_VOCABS["kannada_virama"]
    + _BASE_VOCABS["kannada_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Kannada
    + _BASE_VOCABS["kannada_signs"]
    + "₹"  # currency
)

VOCABS["sinhala"] = (
    _BASE_VOCABS["sinhala_consonants"]
    + _BASE_VOCABS["sinhala_vowels"]
    + _BASE_VOCABS["sinhala_digits"]
    + _BASE_VOCABS["sinhala_matras"]
    + _BASE_VOCABS["sinhala_virama"]
    + _BASE_VOCABS["sinhala_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Sinhala
    + _BASE_VOCABS["sinhala_signs"]
    + "₹"  # currency
)

VOCABS["malayalam"] = (
    _BASE_VOCABS["malayalam_consonants"]
    + _BASE_VOCABS["malayalam_vowels"]
    + _BASE_VOCABS["malayalam_digits"]
    + _BASE_VOCABS["malayalam_matras"]
    + _BASE_VOCABS["malayalam_virama"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Malayalam
    + _BASE_VOCABS["malayalam_signs"]
    + "₹"  # currency
)

VOCABS["punjabi"] = (
    _BASE_VOCABS["punjabi_consonants"]
    + _BASE_VOCABS["punjabi_vowels"]
    + _BASE_VOCABS["punjabi_digits"]
    + _BASE_VOCABS["punjabi_matras"]
    + _BASE_VOCABS["punjabi_virama"]
    + _BASE_VOCABS["punjabi_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Punjabi
    + _BASE_VOCABS["punjabi_signs"]
    + "₹"  # currency
)


VOCABS["odia"] = (
    _BASE_VOCABS["odia_consonants"]
    + _BASE_VOCABS["odia_vowels"]
    + _BASE_VOCABS["odia_digits"]
    + _BASE_VOCABS["odia_matras"]
    + _BASE_VOCABS["odia_virama"]
    + _BASE_VOCABS["odia_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Odia
    + _BASE_VOCABS["odia_signs"]
    + "₹"  # currency
)

VOCABS["khmer"] = (
    _BASE_VOCABS["khmer_consonants"]
    + _BASE_VOCABS["khmer_vowels"]
    + _BASE_VOCABS["khmer_digits"]
    + _BASE_VOCABS["khmer_matras"]
    + _BASE_VOCABS["khmer_virama"]
    + _BASE_VOCABS["khmer_diacritics"]  # This is a Khmer-specific addition
    + _BASE_VOCABS["khmer_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Khmer
    + "៛"  # Cambodian currency
)

# Armenian
VOCABS["armenian"] = (
    "ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙՠաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևֈ"
    + _BASE_VOCABS["digits"]
    + _BASE_VOCABS["punctuation"]
    + "՚՛՜՝՞՟։֊"
    + "֏"
)

# Sudanese
VOCABS["sudanese"] = (
    _BASE_VOCABS["digits"]
    + _BASE_VOCABS["sudanese_digits"]
    + _BASE_VOCABS["sudanese_consonants"]
    + _BASE_VOCABS["sudanese_vowels"]
    + _BASE_VOCABS["sudanese_diacritics"]
    + _BASE_VOCABS["punctuation"]
)

# Thai
# Rules:
# Diacritics are used to modify the consonants and vowels
VOCABS["thai"] = (
    _BASE_VOCABS["digits"]
    + "๐๑๒๓๔๕๖๗๘๙"
    + _BASE_VOCABS["punctuation"]
    + "๏๚๛ๆฯ"
    + "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮ"  # Thai consonants
    + "ะาำเแโใไๅ"  # Thai vowels
    + " ัิีึืฺุู็่้๊๋์ํ๎".replace(" ", "")
    + "฿"
)

VOCABS["lao"] = (
    _BASE_VOCABS["digits"]
    + "໐໑໒໓໔໕໖໗໘໙"
    + _BASE_VOCABS["punctuation"]
    + "ໆໞໟຯ"
    + "ກຂຄຆງຈຉຊຌຍຎຏຐຑຒຓດຕຖທຘນບປຜຝພຟຠມຢຣລວຨຩສຫຬອຮ"  # Lao consonants
    + "ະາຳຽເແໂໃໄ"  # Lao vowels
    + "ໜໝ"  # Lao ligature
    + "".join(["ັ", "ິ", "ີ", "ຶ", "ື", "ຸ", "ູ", "຺", "ົ", "ຼ", "່", "້", "໊", "໋", "໌", "ໍ"])
)

# Burmese & Javanese

# Rules:
# - A syllable usually starts with a base consonant.
# - Diacritics (sandhangan), which represent vowels and consonant modifications, are attached to the base consonant:
#   - Vowel signs (ꦴꦵꦶꦷꦸꦹꦺꦻꦼ) follow the consonant and determine the syllable's vowel sound.
#   - Medial signs like ꦿ (ra), ꦾ (ya), and ꦽ (vocalic r) modify the consonant cluster.
# - The virama (꧀, called *pangkon*) suppresses the inherent vowel,
# creating consonant clusters.
# - Special signs like ꦀ (cecak), ꦁ (layar), ꦂ (cakra), and ꦃ (wignyan)
# can appear before or after syllables to represent nasal or glottal finals.
# - Independent vowels (ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ) can occur without a base consonant, especially at word/sentence starts.
# - Use Unicode NFC normalization to ensure composed syllables render correctly.

VOCABS["burmese"] = (
    _BASE_VOCABS["digits"]
    + _BASE_VOCABS["burmese_digits"]
    + _BASE_VOCABS["burmese_consonants"]
    + _BASE_VOCABS["burmese_vowels"]
    + _BASE_VOCABS["burmese_diacritics"]
    + _BASE_VOCABS["burmese_virama"]
    + _BASE_VOCABS["burmese_punctuation"]
)

VOCABS["javanese"] = (
    _BASE_VOCABS["digits"]
    + _BASE_VOCABS["javanese_digits"]
    + _BASE_VOCABS["javanese_consonants"]
    + _BASE_VOCABS["javanese_vowels"]
    + _BASE_VOCABS["javanese_diacritics"]
    + _BASE_VOCABS["javanese_virama"]
    + _BASE_VOCABS["javanese_punctuation"]
    + _BASE_VOCABS["punctuation"]  # western punctuation used in Javanese
)

# Georgian (Mkhedruli - modern)
VOCABS["georgian"] = (
    _BASE_VOCABS["digits"]
    + "ႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅჇჍაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶჷჸჹჺჼჽჾჿ"
    + _BASE_VOCABS["punctuation"]
    + "჻"
    + "₾"  # currency
)

# Ethiopic
VOCABS["ethiopic"] = (
    "ሀሁሂሃሄህሆሇለሉሊላሌልሎሏሐሑሒሓሔሕሖሗመሙሚማሜምሞሟሠሡሢሣሤሥሦሧረሩሪራሬርሮሯሰሱሲሳሴስሶሷሸሹሺሻሼሽሾሿቀቁቂቃቄቅቆቇቈቊቋ"
    + "ቌቍቐቑቒቓቔቕቖቘቚቛቜቝበቡቢባቤብቦቧቨቩቪቫቬቭቮቯተቱቲታቴትቶቷቸቹቺቻቼችቾቿኀኁኂኃኄኅኆኇኈኊኋኌኍነኑኒናኔንኖኗኘኙኚኛኜኝኞኟአኡኢኣኤእኦኧ"
    + "ከኩኪካኬክኮኯኰኲኳኴኵኸኹኺኻኼኽኾዀዂዃዄዅወዉዊዋዌውዎዏዐዑዒዓዔዕዖዘዙዚዛዜዝዞዟዠዡዢዣዤዥዦዧየዩዪያዬይዮዯደዱዲዳዴድዶዷዸዹዺ"
    + "ዻዼዽዾዿጀጁጂጃጄጅጆጇገጉጊጋጌግጎጏጐጒጓጔጕጘጙጚጛጜጝጞጟጠጡጢጣጤጥጦጧጨጩጪጫጬጭጮጯጰጱጲጳጴጵጶጷጸጹጺጻጼጽጾጿፀፁፂፃፄፅፆ"
    + "ፇፈፉፊፋፌፍፎፏፐፑፒፓፔፕፖፗፘፙፚᎀᎁᎂᎃᎄᎅᎆᎇᎈᎉᎊᎋᎌᎍᎎᎏ"
    + "፩፪፫፬፭፮፯፰፱፲፳፴፵፶፷፸፹፺፻፼"  # digits
)

# East Asian
VOCABS["japanese"] = (
    _BASE_VOCABS["digits"]
    + "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづ"
    + "てでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめ"
    + "もゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ"  # Hiragana
    + "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダ"
    + "チヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメ"
    + "モャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺーヽヾヿ"  # Katakana
    # Kanji jōyō (incl. numerals)
    + "亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲"  # noqa: E501
    + "永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加"  # noqa: E501
    + "可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯"  # noqa: E501
    + "街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺"  # noqa: E501
    + "款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿"  # noqa: E501
    + "輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況"  # noqa: E501
    + "峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄"  # noqa: E501
    + "刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権"  # noqa: E501
    + "憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行"  # noqa: E501
    + "坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今"  # noqa: E501
    + "困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶"  # noqa: E501
    + "殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜"  # noqa: E501
    + "諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇尺借酌釈爵若弱寂手主守朱取狩首殊珠"  # noqa: E501
    + "酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循"  # noqa: E501
    + "順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞"  # noqa: E501
    + "償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁"  # noqa: E501
    + "尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥"  # noqa: E501
    + "石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖"  # noqa: E501
    + "租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属"  # noqa: E501
    + "賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡"  # noqa: E501
    + "短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝貼"  # noqa: E501
    + "超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店"  # noqa: E501
    + "点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童"  # noqa: E501
    + "道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇"  # noqa: E501
    + "馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒"  # noqa: E501
    + "範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府"  # noqa: E501
    + "怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返"  # noqa: E501
    + "変偏遍編弁便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木"  # noqa: E501
    + "朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲"  # noqa: E501
    + "耗猛網目黙門紋問冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁"  # noqa: E501
    + "謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘"  # noqa: E501
    + "倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕"  # noqa: E501
    + _BASE_VOCABS["punctuation"]
    + "。・〜°—、「」『』【】゛》《〉〈"
    + _BASE_VOCABS["currency"]
)

VOCABS["korean"] = (
    _BASE_VOCABS["digits"]
    + "가각갂갃간갅갆갇갈갉갊갋갌갍갎갏감갑값갓갔강갖갗갘같갚갛개객갞갟갠갡갢갣갤갥갦갧갨갩갪갫갬갭갮갯갰갱갲갳갴갵갶갷갸갹갺갻갼갽갾갿걀걁걂걃걄걅걆걇걈"  # noqa: E501
    + "걉걊걋걌걍걎걏걐걑걒걓걔걕걖걗걘걙걚걛걜걝걞걟걠걡걢걣걤걥걦걧걨걩걪걫걬걭걮걯거걱걲걳건걵걶걷걸걹걺걻걼걽걾걿검겁겂것겄겅겆겇겈겉겊겋게겍겎겏겐겑"  # noqa: E501
    + "겒겓겔겕겖겗겘겙겚겛겜겝겞겟겠겡겢겣겤겥겦겧겨격겪겫견겭겮겯결겱겲겳겴겵겶겷겸겹겺겻겼경겾겿곀곁곂곃계곅곆곇곈곉곊곋곌곍곎곏곐곑곒곓곔곕곖곗곘곙곚"  # noqa: E501
    + "곛곜곝곞곟고곡곢곣곤곥곦곧골곩곪곫곬곭곮곯곰곱곲곳곴공곶곷곸곹곺곻과곽곾곿관괁괂괃괄괅괆괇괈괉괊괋괌괍괎괏괐광괒괓괔괕괖괗괘괙괚괛괜괝괞괟괠괡괢괣"  # noqa: E501
    + "괤괥괦괧괨괩괪괫괬괭괮괯괰괱괲괳괴괵괶괷괸괹괺괻괼괽괾괿굀굁굂굃굄굅굆굇굈굉굊굋굌굍굎굏교굑굒굓굔굕굖굗굘굙굚굛굜굝굞굟굠굡굢굣굤굥굦굧굨굩굪굫구"  # noqa: E501
    + "국굮굯군굱굲굳굴굵굶굷굸굹굺굻굼굽굾굿궀궁궂궃궄궅궆궇궈궉궊궋권궍궎궏궐궑궒궓궔궕궖궗궘궙궚궛궜궝궞궟궠궡궢궣궤궥궦궧궨궩궪궫궬궭궮궯궰궱궲궳궴궵"  # noqa: E501
    + "궶궷궸궹궺궻궼궽궾궿귀귁귂귃귄귅귆귇귈귉귊귋귌귍귎귏귐귑귒귓귔귕귖귗귘귙귚귛규귝귞귟균귡귢귣귤귥귦귧귨귩귪귫귬귭귮귯귰귱귲귳귴귵귶귷그극귺귻근귽귾"  # noqa: E501
    + "귿글긁긂긃긄긅긆긇금급긊긋긌긍긎긏긐긑긒긓긔긕긖긗긘긙긚긛긜긝긞긟긠긡긢긣긤긥긦긧긨긩긪긫긬긭긮긯기긱긲긳긴긵긶긷길긹긺긻긼긽긾긿김깁깂깃깄깅깆깇"  # noqa: E501
    + "깈깉깊깋까깍깎깏깐깑깒깓깔깕깖깗깘깙깚깛깜깝깞깟깠깡깢깣깤깥깦깧깨깩깪깫깬깭깮깯깰깱깲깳깴깵깶깷깸깹깺깻깼깽깾깿꺀꺁꺂꺃꺄꺅꺆꺇꺈꺉꺊꺋꺌꺍꺎꺏꺐"  # noqa: E501
    + "꺑꺒꺓꺔꺕꺖꺗꺘꺙꺚꺛꺜꺝꺞꺟꺠꺡꺢꺣꺤꺥꺦꺧꺨꺩꺪꺫꺬꺭꺮꺯꺰꺱꺲꺳꺴꺵꺶꺷꺸꺹꺺꺻꺼꺽꺾꺿껀껁껂껃껄껅껆껇껈껉껊껋껌껍껎껏껐껑껒껓껔껕껖껗께껙"  # noqa: E501
    + "껚껛껜껝껞껟껠껡껢껣껤껥껦껧껨껩껪껫껬껭껮껯껰껱껲껳껴껵껶껷껸껹껺껻껼껽껾껿꼀꼁꼂꼃꼄꼅꼆꼇꼈꼉꼊꼋꼌꼍꼎꼏꼐꼑꼒꼓꼔꼕꼖꼗꼘꼙꼚꼛꼜꼝꼞꼟꼠꼡꼢"  # noqa: E501
    + "꼣꼤꼥꼦꼧꼨꼩꼪꼫꼬꼭꼮꼯꼰꼱꼲꼳꼴꼵꼶꼷꼸꼹꼺꼻꼼꼽꼾꼿꽀꽁꽂꽃꽄꽅꽆꽇꽈꽉꽊꽋꽌꽍꽎꽏꽐꽑꽒꽓꽔꽕꽖꽗꽘꽙꽚꽛꽜꽝꽞꽟꽠꽡꽢꽣꽤꽥꽦꽧꽨꽩꽪꽫"  # noqa: E501
    + "꽬꽭꽮꽯꽰꽱꽲꽳꽴꽵꽶꽷꽸꽹꽺꽻꽼꽽꽾꽿꾀꾁꾂꾃꾄꾅꾆꾇꾈꾉꾊꾋꾌꾍꾎꾏꾐꾑꾒꾓꾔꾕꾖꾗꾘꾙꾚꾛꾜꾝꾞꾟꾠꾡꾢꾣꾤꾥꾦꾧꾨꾩꾪꾫꾬꾭꾮꾯꾰꾱꾲꾳꾴"  # noqa: E501
    + "꾵꾶꾷꾸꾹꾺꾻꾼꾽꾾꾿꿀꿁꿂꿃꿄꿅꿆꿇꿈꿉꿊꿋꿌꿍꿎꿏꿐꿑꿒꿓꿔꿕꿖꿗꿘꿙꿚꿛꿜꿝꿞꿟꿠꿡꿢꿣꿤꿥꿦꿧꿨꿩꿪꿫꿬꿭꿮꿯꿰꿱꿲꿳꿴꿵꿶꿷꿸꿹꿺꿻꿼꿽"  # noqa: E501
    + "꿾꿿뀀뀁뀂뀃뀄뀅뀆뀇뀈뀉뀊뀋뀌뀍뀎뀏뀐뀑뀒뀓뀔뀕뀖뀗뀘뀙뀚뀛뀜뀝뀞뀟뀠뀡뀢뀣뀤뀥뀦뀧뀨뀩뀪뀫뀬뀭뀮뀯뀰뀱뀲뀳뀴뀵뀶뀷뀸뀹뀺뀻뀼뀽뀾뀿끀끁끂끃끄끅끆"  # noqa: E501
    + "끇끈끉끊끋끌끍끎끏끐끑끒끓끔끕끖끗끘끙끚끛끜끝끞끟끠끡끢끣끤끥끦끧끨끩끪끫끬끭끮끯끰끱끲끳끴끵끶끷끸끹끺끻끼끽끾끿낀낁낂낃낄낅낆낇낈낉낊낋낌낍낎낏"  # noqa: E501
    + "낐낑낒낓낔낕낖낗나낙낚낛난낝낞낟날낡낢낣낤낥낦낧남납낪낫났낭낮낯낰낱낲낳내낵낶낷낸낹낺낻낼낽낾낿냀냁냂냃냄냅냆냇냈냉냊냋냌냍냎냏냐냑냒냓냔냕냖냗냘"  # noqa: E501
    + "냙냚냛냜냝냞냟냠냡냢냣냤냥냦냧냨냩냪냫냬냭냮냯냰냱냲냳냴냵냶냷냸냹냺냻냼냽냾냿넀넁넂넃넄넅넆넇너넉넊넋넌넍넎넏널넑넒넓넔넕넖넗넘넙넚넛넜넝넞넟넠넡"  # noqa: E501
    + "넢넣네넥넦넧넨넩넪넫넬넭넮넯넰넱넲넳넴넵넶넷넸넹넺넻넼넽넾넿녀녁녂녃년녅녆녇녈녉녊녋녌녍녎녏념녑녒녓녔녕녖녗녘녙녚녛녜녝녞녟녠녡녢녣녤녥녦녧녨녩녪"  # noqa: E501
    + "녫녬녭녮녯녰녱녲녳녴녵녶녷노녹녺녻논녽녾녿놀놁놂놃놄놅놆놇놈놉놊놋놌농놎놏놐놑높놓놔놕놖놗놘놙놚놛놜놝놞놟놠놡놢놣놤놥놦놧놨놩놪놫놬놭놮놯놰놱놲놳"  # noqa: E501
    + "놴놵놶놷놸놹놺놻놼놽놾놿뇀뇁뇂뇃뇄뇅뇆뇇뇈뇉뇊뇋뇌뇍뇎뇏뇐뇑뇒뇓뇔뇕뇖뇗뇘뇙뇚뇛뇜뇝뇞뇟뇠뇡뇢뇣뇤뇥뇦뇧뇨뇩뇪뇫뇬뇭뇮뇯뇰뇱뇲뇳뇴뇵뇶뇷뇸뇹뇺뇻뇼"  # noqa: E501
    + "뇽뇾뇿눀눁눂눃누눅눆눇눈눉눊눋눌눍눎눏눐눑눒눓눔눕눖눗눘눙눚눛눜눝눞눟눠눡눢눣눤눥눦눧눨눩눪눫눬눭눮눯눰눱눲눳눴눵눶눷눸눹눺눻눼눽눾눿뉀뉁뉂뉃뉄뉅"  # noqa: E501
    + "뉆뉇뉈뉉뉊뉋뉌뉍뉎뉏뉐뉑뉒뉓뉔뉕뉖뉗뉘뉙뉚뉛뉜뉝뉞뉟뉠뉡뉢뉣뉤뉥뉦뉧뉨뉩뉪뉫뉬뉭뉮뉯뉰뉱뉲뉳뉴뉵뉶뉷뉸뉹뉺뉻뉼뉽뉾뉿늀늁늂늃늄늅늆늇늈늉늊늋늌늍늎"  # noqa: E501
    + "늏느늑늒늓는늕늖늗늘늙늚늛늜늝늞늟늠늡늢늣늤능늦늧늨늩늪늫늬늭늮늯늰늱늲늳늴늵늶늷늸늹늺늻늼늽늾늿닀닁닂닃닄닅닆닇니닉닊닋닌닍닎닏닐닑닒닓닔닕닖닗"  # noqa: E501
    + "님닙닚닛닜닝닞닟닠닡닢닣다닥닦닧단닩닪닫달닭닮닯닰닱닲닳담답닶닷닸당닺닻닼닽닾닿대댁댂댃댄댅댆댇댈댉댊댋댌댍댎댏댐댑댒댓댔댕댖댗댘댙댚댛댜댝댞댟댠"  # noqa: E501
    + "댡댢댣댤댥댦댧댨댩댪댫댬댭댮댯댰댱댲댳댴댵댶댷댸댹댺댻댼댽댾댿덀덁덂덃덄덅덆덇덈덉덊덋덌덍덎덏덐덑덒덓더덕덖덗던덙덚덛덜덝덞덟덠덡덢덣덤덥덦덧덨덩"  # noqa: E501
    + "덪덫덬덭덮덯데덱덲덳덴덵덶덷델덹덺덻덼덽덾덿뎀뎁뎂뎃뎄뎅뎆뎇뎈뎉뎊뎋뎌뎍뎎뎏뎐뎑뎒뎓뎔뎕뎖뎗뎘뎙뎚뎛뎜뎝뎞뎟뎠뎡뎢뎣뎤뎥뎦뎧뎨뎩뎪뎫뎬뎭뎮뎯뎰뎱뎲"  # noqa: E501
    + "뎳뎴뎵뎶뎷뎸뎹뎺뎻뎼뎽뎾뎿돀돁돂돃도독돆돇돈돉돊돋돌돍돎돏돐돑돒돓돔돕돖돗돘동돚돛돜돝돞돟돠돡돢돣돤돥돦돧돨돩돪돫돬돭돮돯돰돱돲돳돴돵돶돷돸돹돺돻"  # noqa: E501
    + "돼돽돾돿됀됁됂됃됄됅됆됇됈됉됊됋됌됍됎됏됐됑됒됓됔됕됖됗되됙됚됛된됝됞됟될됡됢됣됤됥됦됧됨됩됪됫됬됭됮됯됰됱됲됳됴됵됶됷됸됹됺됻됼됽됾됿둀둁둂둃둄"  # noqa: E501
    + "둅둆둇둈둉둊둋둌둍둎둏두둑둒둓둔둕둖둗둘둙둚둛둜둝둞둟둠둡둢둣둤둥둦둧둨둩둪둫둬둭둮둯둰둱둲둳둴둵둶둷둸둹둺둻둼둽둾둿뒀뒁뒂뒃뒄뒅뒆뒇뒈뒉뒊뒋뒌뒍"  # noqa: E501
    + "뒎뒏뒐뒑뒒뒓뒔뒕뒖뒗뒘뒙뒚뒛뒜뒝뒞뒟뒠뒡뒢뒣뒤뒥뒦뒧뒨뒩뒪뒫뒬뒭뒮뒯뒰뒱뒲뒳뒴뒵뒶뒷뒸뒹뒺뒻뒼뒽뒾뒿듀듁듂듃듄듅듆듇듈듉듊듋듌듍듎듏듐듑듒듓듔듕듖"  # noqa: E501
    + "듗듘듙듚듛드득듞듟든듡듢듣들듥듦듧듨듩듪듫듬듭듮듯듰등듲듳듴듵듶듷듸듹듺듻듼듽듾듿딀딁딂딃딄딅딆딇딈딉딊딋딌딍딎딏딐딑딒딓디딕딖딗딘딙딚딛딜딝딞딟"  # noqa: E501
    + "딠딡딢딣딤딥딦딧딨딩딪딫딬딭딮딯따딱딲딳딴딵딶딷딸딹딺딻딼딽딾딿땀땁땂땃땄땅땆땇땈땉땊땋때땍땎땏땐땑땒땓땔땕땖땗땘땙땚땛땜땝땞땟땠땡땢땣땤땥땦땧땨"  # noqa: E501
    + "땩땪땫땬땭땮땯땰땱땲땳땴땵땶땷땸땹땺땻땼땽땾땿떀떁떂떃떄떅떆떇떈떉떊떋떌떍떎떏떐떑떒떓떔떕떖떗떘떙떚떛떜떝떞떟떠떡떢떣떤떥떦떧떨떩떪떫떬떭떮떯떰떱"  # noqa: E501
    + "떲떳떴떵떶떷떸떹떺떻떼떽떾떿뗀뗁뗂뗃뗄뗅뗆뗇뗈뗉뗊뗋뗌뗍뗎뗏뗐뗑뗒뗓뗔뗕뗖뗗뗘뗙뗚뗛뗜뗝뗞뗟뗠뗡뗢뗣뗤뗥뗦뗧뗨뗩뗪뗫뗬뗭뗮뗯뗰뗱뗲뗳뗴뗵뗶뗷뗸뗹뗺"  # noqa: E501
    + "뗻뗼뗽뗾뗿똀똁똂똃똄똅똆똇똈똉똊똋똌똍똎똏또똑똒똓똔똕똖똗똘똙똚똛똜똝똞똟똠똡똢똣똤똥똦똧똨똩똪똫똬똭똮똯똰똱똲똳똴똵똶똷똸똹똺똻똼똽똾똿뙀뙁뙂뙃"  # noqa: E501
    + "뙄뙅뙆뙇뙈뙉뙊뙋뙌뙍뙎뙏뙐뙑뙒뙓뙔뙕뙖뙗뙘뙙뙚뙛뙜뙝뙞뙟뙠뙡뙢뙣뙤뙥뙦뙧뙨뙩뙪뙫뙬뙭뙮뙯뙰뙱뙲뙳뙴뙵뙶뙷뙸뙹뙺뙻뙼뙽뙾뙿뚀뚁뚂뚃뚄뚅뚆뚇뚈뚉뚊뚋뚌"  # noqa: E501
    + "뚍뚎뚏뚐뚑뚒뚓뚔뚕뚖뚗뚘뚙뚚뚛뚜뚝뚞뚟뚠뚡뚢뚣뚤뚥뚦뚧뚨뚩뚪뚫뚬뚭뚮뚯뚰뚱뚲뚳뚴뚵뚶뚷뚸뚹뚺뚻뚼뚽뚾뚿뛀뛁뛂뛃뛄뛅뛆뛇뛈뛉뛊뛋뛌뛍뛎뛏뛐뛑뛒뛓뛔뛕"  # noqa: E501
    + "뛖뛗뛘뛙뛚뛛뛜뛝뛞뛟뛠뛡뛢뛣뛤뛥뛦뛧뛨뛩뛪뛫뛬뛭뛮뛯뛰뛱뛲뛳뛴뛵뛶뛷뛸뛹뛺뛻뛼뛽뛾뛿뜀뜁뜂뜃뜄뜅뜆뜇뜈뜉뜊뜋뜌뜍뜎뜏뜐뜑뜒뜓뜔뜕뜖뜗뜘뜙뜚뜛뜜뜝뜞"  # noqa: E501
    + "뜟뜠뜡뜢뜣뜤뜥뜦뜧뜨뜩뜪뜫뜬뜭뜮뜯뜰뜱뜲뜳뜴뜵뜶뜷뜸뜹뜺뜻뜼뜽뜾뜿띀띁띂띃띄띅띆띇띈띉띊띋띌띍띎띏띐띑띒띓띔띕띖띗띘띙띚띛띜띝띞띟띠띡띢띣띤띥띦띧"  # noqa: E501
    + "띨띩띪띫띬띭띮띯띰띱띲띳띴띵띶띷띸띹띺띻라락띾띿란랁랂랃랄랅랆랇랈랉랊랋람랍랎랏랐랑랒랓랔랕랖랗래랙랚랛랜랝랞랟랠랡랢랣랤랥랦랧램랩랪랫랬랭랮랯랰"  # noqa: E501
    + "랱랲랳랴략랶랷랸랹랺랻랼랽랾랿럀럁럂럃럄럅럆럇럈량럊럋럌럍럎럏럐럑럒럓럔럕럖럗럘럙럚럛럜럝럞럟럠럡럢럣럤럥럦럧럨럩럪럫러럭럮럯런럱럲럳럴럵럶럷럸럹"  # noqa: E501
    + "럺럻럼럽럾럿렀렁렂렃렄렅렆렇레렉렊렋렌렍렎렏렐렑렒렓렔렕렖렗렘렙렚렛렜렝렞렟렠렡렢렣려력렦렧련렩렪렫렬렭렮렯렰렱렲렳렴렵렶렷렸령렺렻렼렽렾렿례롁롂"  # noqa: E501
    + "롃롄롅롆롇롈롉롊롋롌롍롎롏롐롑롒롓롔롕롖롗롘롙롚롛로록롞롟론롡롢롣롤롥롦롧롨롩롪롫롬롭롮롯롰롱롲롳롴롵롶롷롸롹롺롻롼롽롾롿뢀뢁뢂뢃뢄뢅뢆뢇뢈뢉뢊뢋"  # noqa: E501
    + "뢌뢍뢎뢏뢐뢑뢒뢓뢔뢕뢖뢗뢘뢙뢚뢛뢜뢝뢞뢟뢠뢡뢢뢣뢤뢥뢦뢧뢨뢩뢪뢫뢬뢭뢮뢯뢰뢱뢲뢳뢴뢵뢶뢷뢸뢹뢺뢻뢼뢽뢾뢿룀룁룂룃룄룅룆룇룈룉룊룋료룍룎룏룐룑룒룓룔"  # noqa: E501
    + "룕룖룗룘룙룚룛룜룝룞룟룠룡룢룣룤룥룦룧루룩룪룫룬룭룮룯룰룱룲룳룴룵룶룷룸룹룺룻룼룽룾룿뤀뤁뤂뤃뤄뤅뤆뤇뤈뤉뤊뤋뤌뤍뤎뤏뤐뤑뤒뤓뤔뤕뤖뤗뤘뤙뤚뤛뤜뤝"  # noqa: E501
    + "뤞뤟뤠뤡뤢뤣뤤뤥뤦뤧뤨뤩뤪뤫뤬뤭뤮뤯뤰뤱뤲뤳뤴뤵뤶뤷뤸뤹뤺뤻뤼뤽뤾뤿륀륁륂륃륄륅륆륇륈륉륊륋륌륍륎륏륐륑륒륓륔륕륖륗류륙륚륛륜륝륞륟률륡륢륣륤륥륦"  # noqa: E501
    + "륧륨륩륪륫륬륭륮륯륰륱륲륳르륵륶륷른륹륺륻를륽륾륿릀릁릂릃름릅릆릇릈릉릊릋릌릍릎릏릐릑릒릓릔릕릖릗릘릙릚릛릜릝릞릟릠릡릢릣릤릥릦릧릨릩릪릫리릭릮릯"  # noqa: E501
    + "린릱릲릳릴릵릶릷릸릹릺릻림립릾릿맀링맂맃맄맅맆맇마막맊맋만맍많맏말맑맒맓맔맕맖맗맘맙맚맛맜망맞맟맠맡맢맣매맥맦맧맨맩맪맫맬맭맮맯맰맱맲맳맴맵맶맷맸"  # noqa: E501
    + "맹맺맻맼맽맾맿먀먁먂먃먄먅먆먇먈먉먊먋먌먍먎먏먐먑먒먓먔먕먖먗먘먙먚먛먜먝먞먟먠먡먢먣먤먥먦먧먨먩먪먫먬먭먮먯먰먱먲먳먴먵먶먷머먹먺먻먼먽먾먿멀멁"  # noqa: E501
    + "멂멃멄멅멆멇멈멉멊멋멌멍멎멏멐멑멒멓메멕멖멗멘멙멚멛멜멝멞멟멠멡멢멣멤멥멦멧멨멩멪멫멬멭멮멯며멱멲멳면멵멶멷멸멹멺멻멼멽멾멿몀몁몂몃몄명몆몇몈몉몊"  # noqa: E501
    + "몋몌몍몎몏몐몑몒몓몔몕몖몗몘몙몚몛몜몝몞몟몠몡몢몣몤몥몦몧모목몪몫몬몭몮몯몰몱몲몳몴몵몶몷몸몹몺못몼몽몾몿뫀뫁뫂뫃뫄뫅뫆뫇뫈뫉뫊뫋뫌뫍뫎뫏뫐뫑뫒뫓"  # noqa: E501
    + "뫔뫕뫖뫗뫘뫙뫚뫛뫜뫝뫞뫟뫠뫡뫢뫣뫤뫥뫦뫧뫨뫩뫪뫫뫬뫭뫮뫯뫰뫱뫲뫳뫴뫵뫶뫷뫸뫹뫺뫻뫼뫽뫾뫿묀묁묂묃묄묅묆묇묈묉묊묋묌묍묎묏묐묑묒묓묔묕묖묗묘묙묚묛묜"  # noqa: E501
    + "묝묞묟묠묡묢묣묤묥묦묧묨묩묪묫묬묭묮묯묰묱묲묳무묵묶묷문묹묺묻물묽묾묿뭀뭁뭂뭃뭄뭅뭆뭇뭈뭉뭊뭋뭌뭍뭎뭏뭐뭑뭒뭓뭔뭕뭖뭗뭘뭙뭚뭛뭜뭝뭞뭟뭠뭡뭢뭣뭤뭥"  # noqa: E501
    + "뭦뭧뭨뭩뭪뭫뭬뭭뭮뭯뭰뭱뭲뭳뭴뭵뭶뭷뭸뭹뭺뭻뭼뭽뭾뭿뮀뮁뮂뮃뮄뮅뮆뮇뮈뮉뮊뮋뮌뮍뮎뮏뮐뮑뮒뮓뮔뮕뮖뮗뮘뮙뮚뮛뮜뮝뮞뮟뮠뮡뮢뮣뮤뮥뮦뮧뮨뮩뮪뮫뮬뮭뮮"  # noqa: E501
    + "뮯뮰뮱뮲뮳뮴뮵뮶뮷뮸뮹뮺뮻뮼뮽뮾뮿므믁믂믃믄믅믆믇믈믉믊믋믌믍믎믏믐믑믒믓믔믕믖믗믘믙믚믛믜믝믞믟믠믡믢믣믤믥믦믧믨믩믪믫믬믭믮믯믰믱믲믳믴믵믶믷"  # noqa: E501
    + "미믹믺믻민믽믾믿밀밁밂밃밄밅밆밇밈밉밊밋밌밍밎및밐밑밒밓바박밖밗반밙밚받발밝밞밟밠밡밢밣밤밥밦밧밨방밪밫밬밭밮밯배백밲밳밴밵밶밷밸밹밺밻밼밽밾밿뱀"  # noqa: E501
    + "뱁뱂뱃뱄뱅뱆뱇뱈뱉뱊뱋뱌뱍뱎뱏뱐뱑뱒뱓뱔뱕뱖뱗뱘뱙뱚뱛뱜뱝뱞뱟뱠뱡뱢뱣뱤뱥뱦뱧뱨뱩뱪뱫뱬뱭뱮뱯뱰뱱뱲뱳뱴뱵뱶뱷뱸뱹뱺뱻뱼뱽뱾뱿벀벁벂벃버벅벆벇번벉"  # noqa: E501
    + "벊벋벌벍벎벏벐벑벒벓범법벖벗벘벙벚벛벜벝벞벟베벡벢벣벤벥벦벧벨벩벪벫벬벭벮벯벰벱벲벳벴벵벶벷벸벹벺벻벼벽벾벿변볁볂볃별볅볆볇볈볉볊볋볌볍볎볏볐병볒"  # noqa: E501
    + "볓볔볕볖볗볘볙볚볛볜볝볞볟볠볡볢볣볤볥볦볧볨볩볪볫볬볭볮볯볰볱볲볳보복볶볷본볹볺볻볼볽볾볿봀봁봂봃봄봅봆봇봈봉봊봋봌봍봎봏봐봑봒봓봔봕봖봗봘봙봚봛"  # noqa: E501
    + "봜봝봞봟봠봡봢봣봤봥봦봧봨봩봪봫봬봭봮봯봰봱봲봳봴봵봶봷봸봹봺봻봼봽봾봿뵀뵁뵂뵃뵄뵅뵆뵇뵈뵉뵊뵋뵌뵍뵎뵏뵐뵑뵒뵓뵔뵕뵖뵗뵘뵙뵚뵛뵜뵝뵞뵟뵠뵡뵢뵣뵤"  # noqa: E501
    + "뵥뵦뵧뵨뵩뵪뵫뵬뵭뵮뵯뵰뵱뵲뵳뵴뵵뵶뵷뵸뵹뵺뵻뵼뵽뵾뵿부북붂붃분붅붆붇불붉붊붋붌붍붎붏붐붑붒붓붔붕붖붗붘붙붚붛붜붝붞붟붠붡붢붣붤붥붦붧붨붩붪붫붬붭"  # noqa: E501
    + "붮붯붰붱붲붳붴붵붶붷붸붹붺붻붼붽붾붿뷀뷁뷂뷃뷄뷅뷆뷇뷈뷉뷊뷋뷌뷍뷎뷏뷐뷑뷒뷓뷔뷕뷖뷗뷘뷙뷚뷛뷜뷝뷞뷟뷠뷡뷢뷣뷤뷥뷦뷧뷨뷩뷪뷫뷬뷭뷮뷯뷰뷱뷲뷳뷴뷵뷶"  # noqa: E501
    + "뷷뷸뷹뷺뷻뷼뷽뷾뷿븀븁븂븃븄븅븆븇븈븉븊븋브븍븎븏븐븑븒븓블븕븖븗븘븙븚븛븜븝븞븟븠븡븢븣븤븥븦븧븨븩븪븫븬븭븮븯븰븱븲븳븴븵븶븷븸븹븺븻븼븽븾븿"  # noqa: E501
    + "빀빁빂빃비빅빆빇빈빉빊빋빌빍빎빏빐빑빒빓빔빕빖빗빘빙빚빛빜빝빞빟빠빡빢빣빤빥빦빧빨빩빪빫빬빭빮빯빰빱빲빳빴빵빶빷빸빹빺빻빼빽빾빿뺀뺁뺂뺃뺄뺅뺆뺇뺈"  # noqa: E501
    + "뺉뺊뺋뺌뺍뺎뺏뺐뺑뺒뺓뺔뺕뺖뺗뺘뺙뺚뺛뺜뺝뺞뺟뺠뺡뺢뺣뺤뺥뺦뺧뺨뺩뺪뺫뺬뺭뺮뺯뺰뺱뺲뺳뺴뺵뺶뺷뺸뺹뺺뺻뺼뺽뺾뺿뻀뻁뻂뻃뻄뻅뻆뻇뻈뻉뻊뻋뻌뻍뻎뻏뻐뻑"  # noqa: E501
    + "뻒뻓뻔뻕뻖뻗뻘뻙뻚뻛뻜뻝뻞뻟뻠뻡뻢뻣뻤뻥뻦뻧뻨뻩뻪뻫뻬뻭뻮뻯뻰뻱뻲뻳뻴뻵뻶뻷뻸뻹뻺뻻뻼뻽뻾뻿뼀뼁뼂뼃뼄뼅뼆뼇뼈뼉뼊뼋뼌뼍뼎뼏뼐뼑뼒뼓뼔뼕뼖뼗뼘뼙뼚"  # noqa: E501
    + "뼛뼜뼝뼞뼟뼠뼡뼢뼣뼤뼥뼦뼧뼨뼩뼪뼫뼬뼭뼮뼯뼰뼱뼲뼳뼴뼵뼶뼷뼸뼹뼺뼻뼼뼽뼾뼿뽀뽁뽂뽃뽄뽅뽆뽇뽈뽉뽊뽋뽌뽍뽎뽏뽐뽑뽒뽓뽔뽕뽖뽗뽘뽙뽚뽛뽜뽝뽞뽟뽠뽡뽢뽣"  # noqa: E501
    + "뽤뽥뽦뽧뽨뽩뽪뽫뽬뽭뽮뽯뽰뽱뽲뽳뽴뽵뽶뽷뽸뽹뽺뽻뽼뽽뽾뽿뾀뾁뾂뾃뾄뾅뾆뾇뾈뾉뾊뾋뾌뾍뾎뾏뾐뾑뾒뾓뾔뾕뾖뾗뾘뾙뾚뾛뾜뾝뾞뾟뾠뾡뾢뾣뾤뾥뾦뾧뾨뾩뾪뾫뾬"  # noqa: E501
    + "뾭뾮뾯뾰뾱뾲뾳뾴뾵뾶뾷뾸뾹뾺뾻뾼뾽뾾뾿뿀뿁뿂뿃뿄뿅뿆뿇뿈뿉뿊뿋뿌뿍뿎뿏뿐뿑뿒뿓뿔뿕뿖뿗뿘뿙뿚뿛뿜뿝뿞뿟뿠뿡뿢뿣뿤뿥뿦뿧뿨뿩뿪뿫뿬뿭뿮뿯뿰뿱뿲뿳뿴뿵"  # noqa: E501
    + "뿶뿷뿸뿹뿺뿻뿼뿽뿾뿿쀀쀁쀂쀃쀄쀅쀆쀇쀈쀉쀊쀋쀌쀍쀎쀏쀐쀑쀒쀓쀔쀕쀖쀗쀘쀙쀚쀛쀜쀝쀞쀟쀠쀡쀢쀣쀤쀥쀦쀧쀨쀩쀪쀫쀬쀭쀮쀯쀰쀱쀲쀳쀴쀵쀶쀷쀸쀹쀺쀻쀼쀽쀾"  # noqa: E501
    + "쀿쁀쁁쁂쁃쁄쁅쁆쁇쁈쁉쁊쁋쁌쁍쁎쁏쁐쁑쁒쁓쁔쁕쁖쁗쁘쁙쁚쁛쁜쁝쁞쁟쁠쁡쁢쁣쁤쁥쁦쁧쁨쁩쁪쁫쁬쁭쁮쁯쁰쁱쁲쁳쁴쁵쁶쁷쁸쁹쁺쁻쁼쁽쁾쁿삀삁삂삃삄삅삆삇"  # noqa: E501
    + "삈삉삊삋삌삍삎삏삐삑삒삓삔삕삖삗삘삙삚삛삜삝삞삟삠삡삢삣삤삥삦삧삨삩삪삫사삭삮삯산삱삲삳살삵삶삷삸삹삺삻삼삽삾삿샀상샂샃샄샅샆샇새색샊샋샌샍샎샏샐"  # noqa: E501
    + "샑샒샓샔샕샖샗샘샙샚샛샜생샞샟샠샡샢샣샤샥샦샧샨샩샪샫샬샭샮샯샰샱샲샳샴샵샶샷샸샹샺샻샼샽샾샿섀섁섂섃섄섅섆섇섈섉섊섋섌섍섎섏섐섑섒섓섔섕섖섗섘섙"  # noqa: E501
    + "섚섛서석섞섟선섡섢섣설섥섦섧섨섩섪섫섬섭섮섯섰성섲섳섴섵섶섷세섹섺섻센섽섾섿셀셁셂셃셄셅셆셇셈셉셊셋셌셍셎셏셐셑셒셓셔셕셖셗션셙셚셛셜셝셞셟셠셡셢"  # noqa: E501
    + "셣셤셥셦셧셨셩셪셫셬셭셮셯셰셱셲셳셴셵셶셷셸셹셺셻셼셽셾셿솀솁솂솃솄솅솆솇솈솉솊솋소속솎솏손솑솒솓솔솕솖솗솘솙솚솛솜솝솞솟솠송솢솣솤솥솦솧솨솩솪솫"  # noqa: E501
    + "솬솭솮솯솰솱솲솳솴솵솶솷솸솹솺솻솼솽솾솿쇀쇁쇂쇃쇄쇅쇆쇇쇈쇉쇊쇋쇌쇍쇎쇏쇐쇑쇒쇓쇔쇕쇖쇗쇘쇙쇚쇛쇜쇝쇞쇟쇠쇡쇢쇣쇤쇥쇦쇧쇨쇩쇪쇫쇬쇭쇮쇯쇰쇱쇲쇳쇴"  # noqa: E501
    + "쇵쇶쇷쇸쇹쇺쇻쇼쇽쇾쇿숀숁숂숃숄숅숆숇숈숉숊숋숌숍숎숏숐숑숒숓숔숕숖숗수숙숚숛순숝숞숟술숡숢숣숤숥숦숧숨숩숪숫숬숭숮숯숰숱숲숳숴숵숶숷숸숹숺숻숼숽"  # noqa: E501
    + "숾숿쉀쉁쉂쉃쉄쉅쉆쉇쉈쉉쉊쉋쉌쉍쉎쉏쉐쉑쉒쉓쉔쉕쉖쉗쉘쉙쉚쉛쉜쉝쉞쉟쉠쉡쉢쉣쉤쉥쉦쉧쉨쉩쉪쉫쉬쉭쉮쉯쉰쉱쉲쉳쉴쉵쉶쉷쉸쉹쉺쉻쉼쉽쉾쉿슀슁슂슃슄슅슆"  # noqa: E501
    + "슇슈슉슊슋슌슍슎슏슐슑슒슓슔슕슖슗슘슙슚슛슜슝슞슟슠슡슢슣스슥슦슧슨슩슪슫슬슭슮슯슰슱슲슳슴습슶슷슸승슺슻슼슽슾슿싀싁싂싃싄싅싆싇싈싉싊싋싌싍싎싏"  # noqa: E501
    + "싐싑싒싓싔싕싖싗싘싙싚싛시식싞싟신싡싢싣실싥싦싧싨싩싪싫심십싮싯싰싱싲싳싴싵싶싷싸싹싺싻싼싽싾싿쌀쌁쌂쌃쌄쌅쌆쌇쌈쌉쌊쌋쌌쌍쌎쌏쌐쌑쌒쌓쌔쌕쌖쌗쌘"  # noqa: E501
    + "쌙쌚쌛쌜쌝쌞쌟쌠쌡쌢쌣쌤쌥쌦쌧쌨쌩쌪쌫쌬쌭쌮쌯쌰쌱쌲쌳쌴쌵쌶쌷쌸쌹쌺쌻쌼쌽쌾쌿썀썁썂썃썄썅썆썇썈썉썊썋썌썍썎썏썐썑썒썓썔썕썖썗썘썙썚썛썜썝썞썟썠썡"  # noqa: E501
    + "썢썣썤썥썦썧써썩썪썫썬썭썮썯썰썱썲썳썴썵썶썷썸썹썺썻썼썽썾썿쎀쎁쎂쎃쎄쎅쎆쎇쎈쎉쎊쎋쎌쎍쎎쎏쎐쎑쎒쎓쎔쎕쎖쎗쎘쎙쎚쎛쎜쎝쎞쎟쎠쎡쎢쎣쎤쎥쎦쎧쎨쎩쎪"  # noqa: E501
    + "쎫쎬쎭쎮쎯쎰쎱쎲쎳쎴쎵쎶쎷쎸쎹쎺쎻쎼쎽쎾쎿쏀쏁쏂쏃쏄쏅쏆쏇쏈쏉쏊쏋쏌쏍쏎쏏쏐쏑쏒쏓쏔쏕쏖쏗쏘쏙쏚쏛쏜쏝쏞쏟쏠쏡쏢쏣쏤쏥쏦쏧쏨쏩쏪쏫쏬쏭쏮쏯쏰쏱쏲쏳"  # noqa: E501
    + "쏴쏵쏶쏷쏸쏹쏺쏻쏼쏽쏾쏿쐀쐁쐂쐃쐄쐅쐆쐇쐈쐉쐊쐋쐌쐍쐎쐏쐐쐑쐒쐓쐔쐕쐖쐗쐘쐙쐚쐛쐜쐝쐞쐟쐠쐡쐢쐣쐤쐥쐦쐧쐨쐩쐪쐫쐬쐭쐮쐯쐰쐱쐲쐳쐴쐵쐶쐷쐸쐹쐺쐻쐼"  # noqa: E501
    + "쐽쐾쐿쑀쑁쑂쑃쑄쑅쑆쑇쑈쑉쑊쑋쑌쑍쑎쑏쑐쑑쑒쑓쑔쑕쑖쑗쑘쑙쑚쑛쑜쑝쑞쑟쑠쑡쑢쑣쑤쑥쑦쑧쑨쑩쑪쑫쑬쑭쑮쑯쑰쑱쑲쑳쑴쑵쑶쑷쑸쑹쑺쑻쑼쑽쑾쑿쒀쒁쒂쒃쒄쒅"  # noqa: E501
    + "쒆쒇쒈쒉쒊쒋쒌쒍쒎쒏쒐쒑쒒쒓쒔쒕쒖쒗쒘쒙쒚쒛쒜쒝쒞쒟쒠쒡쒢쒣쒤쒥쒦쒧쒨쒩쒪쒫쒬쒭쒮쒯쒰쒱쒲쒳쒴쒵쒶쒷쒸쒹쒺쒻쒼쒽쒾쒿쓀쓁쓂쓃쓄쓅쓆쓇쓈쓉쓊쓋쓌쓍쓎"  # noqa: E501
    + "쓏쓐쓑쓒쓓쓔쓕쓖쓗쓘쓙쓚쓛쓜쓝쓞쓟쓠쓡쓢쓣쓤쓥쓦쓧쓨쓩쓪쓫쓬쓭쓮쓯쓰쓱쓲쓳쓴쓵쓶쓷쓸쓹쓺쓻쓼쓽쓾쓿씀씁씂씃씄씅씆씇씈씉씊씋씌씍씎씏씐씑씒씓씔씕씖씗"  # noqa: E501
    + "씘씙씚씛씜씝씞씟씠씡씢씣씤씥씦씧씨씩씪씫씬씭씮씯씰씱씲씳씴씵씶씷씸씹씺씻씼씽씾씿앀앁앂앃아악앆앇안앉않앋알앍앎앏앐앑앒앓암압앖앗았앙앚앛앜앝앞앟애"  # noqa: E501
    + "액앢앣앤앥앦앧앨앩앪앫앬앭앮앯앰앱앲앳앴앵앶앷앸앹앺앻야약앾앿얀얁얂얃얄얅얆얇얈얉얊얋얌얍얎얏얐양얒얓얔얕얖얗얘얙얚얛얜얝얞얟얠얡얢얣얤얥얦얧얨얩"  # noqa: E501
    + "얪얫얬얭얮얯얰얱얲얳어억얶얷언얹얺얻얼얽얾얿엀엁엂엃엄업없엇었엉엊엋엌엍엎엏에엑엒엓엔엕엖엗엘엙엚엛엜엝엞엟엠엡엢엣엤엥엦엧엨엩엪엫여역엮엯연엱엲"  # noqa: E501
    + "엳열엵엶엷엸엹엺엻염엽엾엿였영옂옃옄옅옆옇예옉옊옋옌옍옎옏옐옑옒옓옔옕옖옗옘옙옚옛옜옝옞옟옠옡옢옣오옥옦옧온옩옪옫올옭옮옯옰옱옲옳옴옵옶옷옸옹옺옻"  # noqa: E501
    + "옼옽옾옿와왁왂왃완왅왆왇왈왉왊왋왌왍왎왏왐왑왒왓왔왕왖왗왘왙왚왛왜왝왞왟왠왡왢왣왤왥왦왧왨왩왪왫왬왭왮왯왰왱왲왳왴왵왶왷외왹왺왻왼왽왾왿욀욁욂욃욄"  # noqa: E501
    + "욅욆욇욈욉욊욋욌욍욎욏욐욑욒욓요욕욖욗욘욙욚욛욜욝욞욟욠욡욢욣욤욥욦욧욨용욪욫욬욭욮욯우욱욲욳운욵욶욷울욹욺욻욼욽욾욿움웁웂웃웄웅웆웇웈웉웊웋워웍"  # noqa: E501
    + "웎웏원웑웒웓월웕웖웗웘웙웚웛웜웝웞웟웠웡웢웣웤웥웦웧웨웩웪웫웬웭웮웯웰웱웲웳웴웵웶웷웸웹웺웻웼웽웾웿윀윁윂윃위윅윆윇윈윉윊윋윌윍윎윏윐윑윒윓윔윕윖"  # noqa: E501
    + "윗윘윙윚윛윜윝윞윟유육윢윣윤윥윦윧율윩윪윫윬윭윮윯윰윱윲윳윴융윶윷윸윹윺윻으윽윾윿은읁읂읃을읅읆읇읈읉읊읋음읍읎읏읐응읒읓읔읕읖읗의읙읚읛읜읝읞읟"  # noqa: E501
    + "읠읡읢읣읤읥읦읧읨읩읪읫읬읭읮읯읰읱읲읳이익읶읷인읹읺읻일읽읾읿잀잁잂잃임입잆잇있잉잊잋잌잍잎잏자작잒잓잔잕잖잗잘잙잚잛잜잝잞잟잠잡잢잣잤장잦잧잨"  # noqa: E501
    + "잩잪잫재잭잮잯잰잱잲잳잴잵잶잷잸잹잺잻잼잽잾잿쟀쟁쟂쟃쟄쟅쟆쟇쟈쟉쟊쟋쟌쟍쟎쟏쟐쟑쟒쟓쟔쟕쟖쟗쟘쟙쟚쟛쟜쟝쟞쟟쟠쟡쟢쟣쟤쟥쟦쟧쟨쟩쟪쟫쟬쟭쟮쟯쟰쟱"  # noqa: E501
    + "쟲쟳쟴쟵쟶쟷쟸쟹쟺쟻쟼쟽쟾쟿저적젂젃전젅젆젇절젉젊젋젌젍젎젏점접젒젓젔정젖젗젘젙젚젛제젝젞젟젠젡젢젣젤젥젦젧젨젩젪젫젬젭젮젯젰젱젲젳젴젵젶젷져젹젺"  # noqa: E501
    + "젻젼젽젾젿졀졁졂졃졄졅졆졇졈졉졊졋졌졍졎졏졐졑졒졓졔졕졖졗졘졙졚졛졜졝졞졟졠졡졢졣졤졥졦졧졨졩졪졫졬졭졮졯조족졲졳존졵졶졷졸졹졺졻졼졽졾졿좀좁좂좃"  # noqa: E501
    + "좄종좆좇좈좉좊좋좌좍좎좏좐좑좒좓좔좕좖좗좘좙좚좛좜좝좞좟좠좡좢좣좤좥좦좧좨좩좪좫좬좭좮좯좰좱좲좳좴좵좶좷좸좹좺좻좼좽좾좿죀죁죂죃죄죅죆죇죈죉죊죋죌"  # noqa: E501
    + "죍죎죏죐죑죒죓죔죕죖죗죘죙죚죛죜죝죞죟죠죡죢죣죤죥죦죧죨죩죪죫죬죭죮죯죰죱죲죳죴죵죶죷죸죹죺죻주죽죾죿준줁줂줃줄줅줆줇줈줉줊줋줌줍줎줏줐중줒줓줔줕"  # noqa: E501
    + "줖줗줘줙줚줛줜줝줞줟줠줡줢줣줤줥줦줧줨줩줪줫줬줭줮줯줰줱줲줳줴줵줶줷줸줹줺줻줼줽줾줿쥀쥁쥂쥃쥄쥅쥆쥇쥈쥉쥊쥋쥌쥍쥎쥏쥐쥑쥒쥓쥔쥕쥖쥗쥘쥙쥚쥛쥜쥝쥞"  # noqa: E501
    + "쥟쥠쥡쥢쥣쥤쥥쥦쥧쥨쥩쥪쥫쥬쥭쥮쥯쥰쥱쥲쥳쥴쥵쥶쥷쥸쥹쥺쥻쥼쥽쥾쥿즀즁즂즃즄즅즆즇즈즉즊즋즌즍즎즏즐즑즒즓즔즕즖즗즘즙즚즛즜증즞즟즠즡즢즣즤즥즦즧"  # noqa: E501
    + "즨즩즪즫즬즭즮즯즰즱즲즳즴즵즶즷즸즹즺즻즼즽즾즿지직짂짃진짅짆짇질짉짊짋짌짍짎짏짐집짒짓짔징짖짗짘짙짚짛짜짝짞짟짠짡짢짣짤짥짦짧짨짩짪짫짬짭짮짯짰"  # noqa: E501
    + "짱짲짳짴짵짶짷째짹짺짻짼짽짾짿쨀쨁쨂쨃쨄쨅쨆쨇쨈쨉쨊쨋쨌쨍쨎쨏쨐쨑쨒쨓쨔쨕쨖쨗쨘쨙쨚쨛쨜쨝쨞쨟쨠쨡쨢쨣쨤쨥쨦쨧쨨쨩쨪쨫쨬쨭쨮쨯쨰쨱쨲쨳쨴쨵쨶쨷쨸쨹"  # noqa: E501
    + "쨺쨻쨼쨽쨾쨿쩀쩁쩂쩃쩄쩅쩆쩇쩈쩉쩊쩋쩌쩍쩎쩏쩐쩑쩒쩓쩔쩕쩖쩗쩘쩙쩚쩛쩜쩝쩞쩟쩠쩡쩢쩣쩤쩥쩦쩧쩨쩩쩪쩫쩬쩭쩮쩯쩰쩱쩲쩳쩴쩵쩶쩷쩸쩹쩺쩻쩼쩽쩾쩿쪀쪁쪂"  # noqa: E501
    + "쪃쪄쪅쪆쪇쪈쪉쪊쪋쪌쪍쪎쪏쪐쪑쪒쪓쪔쪕쪖쪗쪘쪙쪚쪛쪜쪝쪞쪟쪠쪡쪢쪣쪤쪥쪦쪧쪨쪩쪪쪫쪬쪭쪮쪯쪰쪱쪲쪳쪴쪵쪶쪷쪸쪹쪺쪻쪼쪽쪾쪿쫀쫁쫂쫃쫄쫅쫆쫇쫈쫉쫊쫋"  # noqa: E501
    + "쫌쫍쫎쫏쫐쫑쫒쫓쫔쫕쫖쫗쫘쫙쫚쫛쫜쫝쫞쫟쫠쫡쫢쫣쫤쫥쫦쫧쫨쫩쫪쫫쫬쫭쫮쫯쫰쫱쫲쫳쫴쫵쫶쫷쫸쫹쫺쫻쫼쫽쫾쫿쬀쬁쬂쬃쬄쬅쬆쬇쬈쬉쬊쬋쬌쬍쬎쬏쬐쬑쬒쬓쬔"  # noqa: E501
    + "쬕쬖쬗쬘쬙쬚쬛쬜쬝쬞쬟쬠쬡쬢쬣쬤쬥쬦쬧쬨쬩쬪쬫쬬쬭쬮쬯쬰쬱쬲쬳쬴쬵쬶쬷쬸쬹쬺쬻쬼쬽쬾쬿쭀쭁쭂쭃쭄쭅쭆쭇쭈쭉쭊쭋쭌쭍쭎쭏쭐쭑쭒쭓쭔쭕쭖쭗쭘쭙쭚쭛쭜쭝"  # noqa: E501
    + "쭞쭟쭠쭡쭢쭣쭤쭥쭦쭧쭨쭩쭪쭫쭬쭭쭮쭯쭰쭱쭲쭳쭴쭵쭶쭷쭸쭹쭺쭻쭼쭽쭾쭿쮀쮁쮂쮃쮄쮅쮆쮇쮈쮉쮊쮋쮌쮍쮎쮏쮐쮑쮒쮓쮔쮕쮖쮗쮘쮙쮚쮛쮜쮝쮞쮟쮠쮡쮢쮣쮤쮥쮦"  # noqa: E501
    + "쮧쮨쮩쮪쮫쮬쮭쮮쮯쮰쮱쮲쮳쮴쮵쮶쮷쮸쮹쮺쮻쮼쮽쮾쮿쯀쯁쯂쯃쯄쯅쯆쯇쯈쯉쯊쯋쯌쯍쯎쯏쯐쯑쯒쯓쯔쯕쯖쯗쯘쯙쯚쯛쯜쯝쯞쯟쯠쯡쯢쯣쯤쯥쯦쯧쯨쯩쯪쯫쯬쯭쯮쯯"  # noqa: E501
    + "쯰쯱쯲쯳쯴쯵쯶쯷쯸쯹쯺쯻쯼쯽쯾쯿찀찁찂찃찄찅찆찇찈찉찊찋찌찍찎찏찐찑찒찓찔찕찖찗찘찙찚찛찜찝찞찟찠찡찢찣찤찥찦찧차착찪찫찬찭찮찯찰찱찲찳찴찵찶찷참"  # noqa: E501
    + "찹찺찻찼창찾찿챀챁챂챃채책챆챇챈챉챊챋챌챍챎챏챐챑챒챓챔챕챖챗챘챙챚챛챜챝챞챟챠챡챢챣챤챥챦챧챨챩챪챫챬챭챮챯챰챱챲챳챴챵챶챷챸챹챺챻챼챽챾챿첀첁"  # noqa: E501
    + "첂첃첄첅첆첇첈첉첊첋첌첍첎첏첐첑첒첓첔첕첖첗처척첚첛천첝첞첟철첡첢첣첤첥첦첧첨첩첪첫첬청첮첯첰첱첲첳체첵첶첷첸첹첺첻첼첽첾첿쳀쳁쳂쳃쳄쳅쳆쳇쳈쳉쳊"  # noqa: E501
    + "쳋쳌쳍쳎쳏쳐쳑쳒쳓쳔쳕쳖쳗쳘쳙쳚쳛쳜쳝쳞쳟쳠쳡쳢쳣쳤쳥쳦쳧쳨쳩쳪쳫쳬쳭쳮쳯쳰쳱쳲쳳쳴쳵쳶쳷쳸쳹쳺쳻쳼쳽쳾쳿촀촁촂촃촄촅촆촇초촉촊촋촌촍촎촏촐촑촒촓"  # noqa: E501
    + "촔촕촖촗촘촙촚촛촜총촞촟촠촡촢촣촤촥촦촧촨촩촪촫촬촭촮촯촰촱촲촳촴촵촶촷촸촹촺촻촼촽촾촿쵀쵁쵂쵃쵄쵅쵆쵇쵈쵉쵊쵋쵌쵍쵎쵏쵐쵑쵒쵓쵔쵕쵖쵗쵘쵙쵚쵛최"  # noqa: E501
    + "쵝쵞쵟쵠쵡쵢쵣쵤쵥쵦쵧쵨쵩쵪쵫쵬쵭쵮쵯쵰쵱쵲쵳쵴쵵쵶쵷쵸쵹쵺쵻쵼쵽쵾쵿춀춁춂춃춄춅춆춇춈춉춊춋춌춍춎춏춐춑춒춓추축춖춗춘춙춚춛출춝춞춟춠춡춢춣춤춥"  # noqa: E501
    + "춦춧춨충춪춫춬춭춮춯춰춱춲춳춴춵춶춷춸춹춺춻춼춽춾춿췀췁췂췃췄췅췆췇췈췉췊췋췌췍췎췏췐췑췒췓췔췕췖췗췘췙췚췛췜췝췞췟췠췡췢췣췤췥췦췧취췩췪췫췬췭췮"  # noqa: E501
    + "췯췰췱췲췳췴췵췶췷췸췹췺췻췼췽췾췿츀츁츂츃츄츅츆츇츈츉츊츋츌츍츎츏츐츑츒츓츔츕츖츗츘츙츚츛츜츝츞츟츠측츢츣츤츥츦츧츨츩츪츫츬츭츮츯츰츱츲츳츴층츶츷"  # noqa: E501
    + "츸츹츺츻츼츽츾츿칀칁칂칃칄칅칆칇칈칉칊칋칌칍칎칏칐칑칒칓칔칕칖칗치칙칚칛친칝칞칟칠칡칢칣칤칥칦칧침칩칪칫칬칭칮칯칰칱칲칳카칵칶칷칸칹칺칻칼칽칾칿캀"  # noqa: E501
    + "캁캂캃캄캅캆캇캈캉캊캋캌캍캎캏캐캑캒캓캔캕캖캗캘캙캚캛캜캝캞캟캠캡캢캣캤캥캦캧캨캩캪캫캬캭캮캯캰캱캲캳캴캵캶캷캸캹캺캻캼캽캾캿컀컁컂컃컄컅컆컇컈컉"  # noqa: E501
    + "컊컋컌컍컎컏컐컑컒컓컔컕컖컗컘컙컚컛컜컝컞컟컠컡컢컣커컥컦컧컨컩컪컫컬컭컮컯컰컱컲컳컴컵컶컷컸컹컺컻컼컽컾컿케켁켂켃켄켅켆켇켈켉켊켋켌켍켎켏켐켑켒"  # noqa: E501
    + "켓켔켕켖켗켘켙켚켛켜켝켞켟켠켡켢켣켤켥켦켧켨켩켪켫켬켭켮켯켰켱켲켳켴켵켶켷켸켹켺켻켼켽켾켿콀콁콂콃콄콅콆콇콈콉콊콋콌콍콎콏콐콑콒콓코콕콖콗콘콙콚콛"  # noqa: E501
    + "콜콝콞콟콠콡콢콣콤콥콦콧콨콩콪콫콬콭콮콯콰콱콲콳콴콵콶콷콸콹콺콻콼콽콾콿쾀쾁쾂쾃쾄쾅쾆쾇쾈쾉쾊쾋쾌쾍쾎쾏쾐쾑쾒쾓쾔쾕쾖쾗쾘쾙쾚쾛쾜쾝쾞쾟쾠쾡쾢쾣쾤"  # noqa: E501
    + "쾥쾦쾧쾨쾩쾪쾫쾬쾭쾮쾯쾰쾱쾲쾳쾴쾵쾶쾷쾸쾹쾺쾻쾼쾽쾾쾿쿀쿁쿂쿃쿄쿅쿆쿇쿈쿉쿊쿋쿌쿍쿎쿏쿐쿑쿒쿓쿔쿕쿖쿗쿘쿙쿚쿛쿜쿝쿞쿟쿠쿡쿢쿣쿤쿥쿦쿧쿨쿩쿪쿫쿬쿭"  # noqa: E501
    + "쿮쿯쿰쿱쿲쿳쿴쿵쿶쿷쿸쿹쿺쿻쿼쿽쿾쿿퀀퀁퀂퀃퀄퀅퀆퀇퀈퀉퀊퀋퀌퀍퀎퀏퀐퀑퀒퀓퀔퀕퀖퀗퀘퀙퀚퀛퀜퀝퀞퀟퀠퀡퀢퀣퀤퀥퀦퀧퀨퀩퀪퀫퀬퀭퀮퀯퀰퀱퀲퀳퀴퀵퀶"  # noqa: E501
    + "퀷퀸퀹퀺퀻퀼퀽퀾퀿큀큁큂큃큄큅큆큇큈큉큊큋큌큍큎큏큐큑큒큓큔큕큖큗큘큙큚큛큜큝큞큟큠큡큢큣큤큥큦큧큨큩큪큫크큭큮큯큰큱큲큳클큵큶큷큸큹큺큻큼큽큾큿"  # noqa: E501
    + "킀킁킂킃킄킅킆킇킈킉킊킋킌킍킎킏킐킑킒킓킔킕킖킗킘킙킚킛킜킝킞킟킠킡킢킣키킥킦킧킨킩킪킫킬킭킮킯킰킱킲킳킴킵킶킷킸킹킺킻킼킽킾킿타탁탂탃탄탅탆탇탈"  # noqa: E501
    + "탉탊탋탌탍탎탏탐탑탒탓탔탕탖탗탘탙탚탛태택탞탟탠탡탢탣탤탥탦탧탨탩탪탫탬탭탮탯탰탱탲탳탴탵탶탷탸탹탺탻탼탽탾탿턀턁턂턃턄턅턆턇턈턉턊턋턌턍턎턏턐턑"  # noqa: E501
    + "턒턓턔턕턖턗턘턙턚턛턜턝턞턟턠턡턢턣턤턥턦턧턨턩턪턫턬턭턮턯터턱턲턳턴턵턶턷털턹턺턻턼턽턾턿텀텁텂텃텄텅텆텇텈텉텊텋테텍텎텏텐텑텒텓텔텕텖텗텘텙텚"  # noqa: E501
    + "텛템텝텞텟텠텡텢텣텤텥텦텧텨텩텪텫텬텭텮텯텰텱텲텳텴텵텶텷텸텹텺텻텼텽텾텿톀톁톂톃톄톅톆톇톈톉톊톋톌톍톎톏톐톑톒톓톔톕톖톗톘톙톚톛톜톝톞톟토톡톢톣"  # noqa: E501
    + "톤톥톦톧톨톩톪톫톬톭톮톯톰톱톲톳톴통톶톷톸톹톺톻톼톽톾톿퇀퇁퇂퇃퇄퇅퇆퇇퇈퇉퇊퇋퇌퇍퇎퇏퇐퇑퇒퇓퇔퇕퇖퇗퇘퇙퇚퇛퇜퇝퇞퇟퇠퇡퇢퇣퇤퇥퇦퇧퇨퇩퇪퇫퇬"  # noqa: E501
    + "퇭퇮퇯퇰퇱퇲퇳퇴퇵퇶퇷퇸퇹퇺퇻퇼퇽퇾퇿툀툁툂툃툄툅툆툇툈툉툊툋툌툍툎툏툐툑툒툓툔툕툖툗툘툙툚툛툜툝툞툟툠툡툢툣툤툥툦툧툨툩툪툫투툭툮툯툰툱툲툳툴툵"  # noqa: E501
    + "툶툷툸툹툺툻툼툽툾툿퉀퉁퉂퉃퉄퉅퉆퉇퉈퉉퉊퉋퉌퉍퉎퉏퉐퉑퉒퉓퉔퉕퉖퉗퉘퉙퉚퉛퉜퉝퉞퉟퉠퉡퉢퉣퉤퉥퉦퉧퉨퉩퉪퉫퉬퉭퉮퉯퉰퉱퉲퉳퉴퉵퉶퉷퉸퉹퉺퉻퉼퉽퉾"  # noqa: E501
    + "퉿튀튁튂튃튄튅튆튇튈튉튊튋튌튍튎튏튐튑튒튓튔튕튖튗튘튙튚튛튜튝튞튟튠튡튢튣튤튥튦튧튨튩튪튫튬튭튮튯튰튱튲튳튴튵튶튷트특튺튻튼튽튾튿틀틁틂틃틄틅틆틇"  # noqa: E501
    + "틈틉틊틋틌틍틎틏틐틑틒틓틔틕틖틗틘틙틚틛틜틝틞틟틠틡틢틣틤틥틦틧틨틩틪틫틬틭틮틯티틱틲틳틴틵틶틷틸틹틺틻틼틽틾틿팀팁팂팃팄팅팆팇팈팉팊팋파팍팎팏판"  # noqa: E501
    + "팑팒팓팔팕팖팗팘팙팚팛팜팝팞팟팠팡팢팣팤팥팦팧패팩팪팫팬팭팮팯팰팱팲팳팴팵팶팷팸팹팺팻팼팽팾팿퍀퍁퍂퍃퍄퍅퍆퍇퍈퍉퍊퍋퍌퍍퍎퍏퍐퍑퍒퍓퍔퍕퍖퍗퍘퍙"  # noqa: E501
    + "퍚퍛퍜퍝퍞퍟퍠퍡퍢퍣퍤퍥퍦퍧퍨퍩퍪퍫퍬퍭퍮퍯퍰퍱퍲퍳퍴퍵퍶퍷퍸퍹퍺퍻퍼퍽퍾퍿펀펁펂펃펄펅펆펇펈펉펊펋펌펍펎펏펐펑펒펓펔펕펖펗페펙펚펛펜펝펞펟펠펡펢"  # noqa: E501
    + "펣펤펥펦펧펨펩펪펫펬펭펮펯펰펱펲펳펴펵펶펷편펹펺펻펼펽펾펿폀폁폂폃폄폅폆폇폈평폊폋폌폍폎폏폐폑폒폓폔폕폖폗폘폙폚폛폜폝폞폟폠폡폢폣폤폥폦폧폨폩폪폫"  # noqa: E501
    + "포폭폮폯폰폱폲폳폴폵폶폷폸폹폺폻폼폽폾폿퐀퐁퐂퐃퐄퐅퐆퐇퐈퐉퐊퐋퐌퐍퐎퐏퐐퐑퐒퐓퐔퐕퐖퐗퐘퐙퐚퐛퐜퐝퐞퐟퐠퐡퐢퐣퐤퐥퐦퐧퐨퐩퐪퐫퐬퐭퐮퐯퐰퐱퐲퐳퐴"  # noqa: E501
    + "퐵퐶퐷퐸퐹퐺퐻퐼퐽퐾퐿푀푁푂푃푄푅푆푇푈푉푊푋푌푍푎푏푐푑푒푓푔푕푖푗푘푙푚푛표푝푞푟푠푡푢푣푤푥푦푧푨푩푪푫푬푭푮푯푰푱푲푳푴푵푶푷푸푹푺푻푼푽"  # noqa: E501
    + "푾푿풀풁풂풃풄풅풆풇품풉풊풋풌풍풎풏풐풑풒풓풔풕풖풗풘풙풚풛풜풝풞풟풠풡풢풣풤풥풦풧풨풩풪풫풬풭풮풯풰풱풲풳풴풵풶풷풸풹풺풻풼풽풾풿퓀퓁퓂퓃퓄퓅퓆"  # noqa: E501
    + "퓇퓈퓉퓊퓋퓌퓍퓎퓏퓐퓑퓒퓓퓔퓕퓖퓗퓘퓙퓚퓛퓜퓝퓞퓟퓠퓡퓢퓣퓤퓥퓦퓧퓨퓩퓪퓫퓬퓭퓮퓯퓰퓱퓲퓳퓴퓵퓶퓷퓸퓹퓺퓻퓼퓽퓾퓿픀픁픂픃프픅픆픇픈픉픊픋플픍픎픏"  # noqa: E501
    + "픐픑픒픓픔픕픖픗픘픙픚픛픜픝픞픟픠픡픢픣픤픥픦픧픨픩픪픫픬픭픮픯픰픱픲픳픴픵픶픷픸픹픺픻피픽픾픿핀핁핂핃필핅핆핇핈핉핊핋핌핍핎핏핐핑핒핓핔핕핖핗하"  # noqa: E501
    + "학핚핛한핝핞핟할핡핢핣핤핥핦핧함합핪핫핬항핮핯핰핱핲핳해핵핶핷핸핹핺핻핼핽핾핿햀햁햂햃햄햅햆햇했행햊햋햌햍햎햏햐햑햒햓햔햕햖햗햘햙햚햛햜햝햞햟햠햡"  # noqa: E501
    + "햢햣햤향햦햧햨햩햪햫햬햭햮햯햰햱햲햳햴햵햶햷햸햹햺햻햼햽햾햿헀헁헂헃헄헅헆헇허헉헊헋헌헍헎헏헐헑헒헓헔헕헖헗험헙헚헛헜헝헞헟헠헡헢헣헤헥헦헧헨헩헪"  # noqa: E501
    + "헫헬헭헮헯헰헱헲헳헴헵헶헷헸헹헺헻헼헽헾헿혀혁혂혃현혅혆혇혈혉혊혋혌혍혎혏혐협혒혓혔형혖혗혘혙혚혛혜혝혞혟혠혡혢혣혤혥혦혧혨혩혪혫혬혭혮혯혰혱혲혳"  # noqa: E501
    + "혴혵혶혷호혹혺혻혼혽혾혿홀홁홂홃홄홅홆홇홈홉홊홋홌홍홎홏홐홑홒홓화확홖홗환홙홚홛활홝홞홟홠홡홢홣홤홥홦홧홨황홪홫홬홭홮홯홰홱홲홳홴홵홶홷홸홹홺홻홼"  # noqa: E501
    + "홽홾홿횀횁횂횃횄횅횆횇횈횉횊횋회획횎횏횐횑횒횓횔횕횖횗횘횙횚횛횜횝횞횟횠횡횢횣횤횥횦횧효횩횪횫횬횭횮횯횰횱횲횳횴횵횶횷횸횹횺횻횼횽횾횿훀훁훂훃후훅"  # noqa: E501
    + "훆훇훈훉훊훋훌훍훎훏훐훑훒훓훔훕훖훗훘훙훚훛훜훝훞훟훠훡훢훣훤훥훦훧훨훩훪훫훬훭훮훯훰훱훲훳훴훵훶훷훸훹훺훻훼훽훾훿휀휁휂휃휄휅휆휇휈휉휊휋휌휍휎"  # noqa: E501
    + "휏휐휑휒휓휔휕휖휗휘휙휚휛휜휝휞휟휠휡휢휣휤휥휦휧휨휩휪휫휬휭휮휯휰휱휲휳휴휵휶휷휸휹휺휻휼휽휾휿흀흁흂흃흄흅흆흇흈흉흊흋흌흍흎흏흐흑흒흓흔흕흖흗"  # noqa: E501
    + "흘흙흚흛흜흝흞흟흠흡흢흣흤흥흦흧흨흩흪흫희흭흮흯흰흱흲흳흴흵흶흷흸흹흺흻흼흽흾흿힀힁힂힃힄힅힆힇히힉힊힋힌힍힎힏힐힑힒힓힔힕힖힗힘힙힚힛힜힝힞힟힠"  # noqa: E501
    + "힡힢힣"
    + _BASE_VOCABS["punctuation"]
    + "。・〜°—、「」『』【】゛》《〉〈"  # punctuation
    + _BASE_VOCABS["currency"]
    + "₩"
)

VOCABS["simplified_chinese"] = (
    _BASE_VOCABS["digits"]
    + "㐀㐁㐂㐃㐄㐅㐆㐇㐈㐉㐊㐋㐌㐍㐎㐏㐐㐑㐒㐓㐔㐕㐖㐗㐘㐙㐚㐛㐜㐝㐞㐟㐠㐡㐢㐣㐤㐥㐦㐧㐨㐩㐪㐫㐬㐭㐮㐯㐰㐱㐲㐳㐴㐵㐶㐷㐸㐹㐺㐻㐼㐽㐾㐿㑀㑁㑂"  # noqa: E501
    + "㑄㑅㑆㑇㑈㑉㑊㑋㑌㑍㑎㑏㑐㑑㑒㑓㑔㑕㑖㑗㑘㑙㑚㑛㑜㑝㑞㑟㑠㑡㑢㑣㑤㑥㑦㑧㑨㑩㑪㑫㑬㑭㑮㑯㑰㑱㑲㑳㑴㑵㑶㑷㑸㑹㑺㑻㑼㑽㑾㑿㒀㒁㒂㒃㒄㒅㒆"  # noqa: E501
    + "㒇㒈㒉㒊㒋㒌㒍㒎㒏㒐㒑㒒㒓㒔㒕㒖㒗㒘㒙㒚㒛㒜㒝㒞㒟㒠㒡㒢㒣㒤㒥㒦㒧㒨㒩㒪㒫㒬㒭㒮㒯㒰㒱㒲㒳㒴㒵㒶㒷㒸㒹㒺㒻㒼㒽㒾㒿㓀㓁㓂㓃㓄㓅㓆㓇㓈㓉"  # noqa: E501
    + "㓊㓋㓌㓍㓎㓏㓐㓑㓒㓓㓔㓕㓖㓗㓘㓙㓚㓛㓜㓝㓞㓟㓠㓡㓢㓣㓤㓥㓦㓧㓨㓩㓪㓫㓬㓭㓮㓯㓰㓱㓲㓳㓴㓵㓶㓷㓸㓹㓺㓻㓼㓽㓾㓿㔀㔁㔂㔃㔄㔅㔆㔇㔈㔉㔊㔋㔌"  # noqa: E501
    + "㔍㔎㔏㔐㔑㔒㔓㔔㔕㔖㔗㔘㔙㔚㔛㔜㔝㔞㔟㔠㔡㔢㔣㔤㔥㔦㔧㔨㔩㔪㔫㔬㔭㔮㔯㔰㔱㔲㔳㔴㔵㔶㔷㔸㔹㔺㔻㔼㔽㔾㔿㕀㕁㕂㕃㕄㕅㕆㕇㕈㕉㕊㕋㕌㕍㕎㕏"  # noqa: E501
    + "㕐㕑㕒㕓㕔㕕㕖㕗㕘㕙㕚㕛㕜㕝㕞㕟㕠㕡㕢㕣㕤㕥㕦㕧㕨㕩㕪㕫㕬㕭㕮㕯㕰㕱㕲㕳㕴㕵㕶㕷㕸㕹㕺㕻㕼㕽㕾㕿㖀㖁㖂㖃㖄㖅㖆㖇㖈㖉㖊㖋㖌㖍㖎㖏㖐㖑㖒"  # noqa: E501
    + "㖓㖔㖕㖖㖗㖘㖙㖚㖛㖜㖝㖞㖟㖠㖡㖢㖣㖤㖥㖦㖧㖨㖩㖪㖫㖬㖭㖮㖯㖰㖱㖲㖳㖴㖵㖶㖷㖸㖹㖺㖻㖼㖽㖾㖿㗀㗁㗂㗃㗄㗅㗆㗇㗈㗉㗊㗋㗌㗍㗎㗏㗐㗑㗒㗓㗔㗕"  # noqa: E501
    + "㗖㗗㗘㗙㗚㗛㗜㗝㗞㗟㗠㗡㗢㗣㗤㗥㗦㗧㗨㗩㗪㗫㗬㗭㗮㗯㗰㗱㗲㗳㗴㗵㗶㗷㗸㗹㗺㗻㗼㗽㗾㗿㘀㘁㘂㘃㘄㘅㘆㘇㘈㘉㘊㘋㘌㘍㘎㘏㘐㘑㘒㘓㘔㘕㘖㘗㘘"  # noqa: E501
    + "㘙㘚㘛㘜㘝㘞㘟㘠㘡㘢㘣㘤㘥㘦㘧㘨㘩㘪㘫㘬㘭㘮㘯㘰㘱㘲㘳㘴㘵㘶㘷㘸㘹㘺㘻㘼㘽㘾㘿㙀㙁㙂㙃㙄㙅㙆㙇㙈㙉㙊㙋㙌㙍㙎㙏㙐㙑㙒㙓㙔㙕㙖㙗㙘㙙㙚㙛"  # noqa: E501
    + "㙜㙝㙞㙟㙠㙡㙢㙣㙤㙥㙦㙧㙨㙩㙪㙫㙬㙭㙮㙯㙰㙱㙲㙳㙴㙵㙶㙷㙸㙹㙺㙻㙼㙽㙾㙿㚀㚁㚂㚃㚄㚅㚆㚇㚈㚉㚊㚋㚌㚍㚎㚏㚐㚑㚒㚓㚔㚕㚖㚗㚘㚙㚚㚛㚜㚝㚞"  # noqa: E501
    + "㚟㚠㚡㚢㚣㚤㚥㚦㚧㚨㚩㚪㚫㚬㚭㚮㚯㚰㚱㚲㚳㚴㚵㚶㚷㚸㚹㚺㚻㚼㚽㚾㚿㛀㛁㛂㛃㛄㛅㛆㛇㛈㛉㛊㛋㛌㛍㛎㛏㛐㛑㛒㛓㛔㛕㛖㛗㛘㛙㛚㛛㛜㛝㛞㛟㛠㛡"  # noqa: E501
    + "㛢㛣㛤㛥㛦㛧㛨㛩㛪㛫㛬㛭㛮㛯㛰㛱㛲㛳㛴㛵㛶㛷㛸㛹㛺㛻㛼㛽㛾㛿㜀㜁㜂㜃㜄㜅㜆㜇㜈㜉㜊㜋㜌㜍㜎㜏㜐㜑㜒㜓㜔㜕㜖㜗㜘㜙㜚㜛㜜㜝㜞㜟㜠㜡㜢㜣㜤"  # noqa: E501
    + "㜥㜦㜧㜨㜩㜪㜫㜬㜭㜮㜯㜰㜱㜲㜳㜴㜵㜶㜷㜸㜹㜺㜻㜼㜽㜾㜿㝀㝁㝂㝃㝄㝅㝆㝇㝈㝉㝊㝋㝌㝍㝎㝏㝐㝑㝒㝓㝔㝕㝖㝗㝘㝙㝚㝛㝜㝝㝞㝟㝠㝡㝢㝣㝤㝥㝦㝧"  # noqa: E501
    + "㝨㝩㝪㝫㝬㝭㝮㝯㝰㝱㝲㝳㝴㝵㝶㝷㝸㝹㝺㝻㝼㝽㝾㝿㞀㞁㞂㞃㞄㞅㞆㞇㞈㞉㞊㞋㞌㞍㞎㞏㞐㞑㞒㞓㞔㞕㞖㞗㞘㞙㞚㞛㞜㞝㞞㞟㞠㞡㞢㞣㞤㞥㞦㞧㞨㞩㞪"  # noqa: E501
    + "㞫㞬㞭㞮㞯㞰㞱㞲㞳㞴㞵㞶㞷㞸㞹㞺㞻㞼㞽㞾㞿㟀㟁㟂㟃㟄㟅㟆㟇㟈㟉㟊㟋㟌㟍㟎㟏㟐㟑㟒㟓㟔㟕㟖㟗㟘㟙㟚㟛㟜㟝㟞㟟㟠㟡㟢㟣㟤㟥㟦㟧㟨㟩㟪㟫㟬㟭"  # noqa: E501
    + "㟮㟯㟰㟱㟲㟳㟴㟵㟶㟷㟸㟹㟺㟻㟼㟽㟾㟿㠀㠁㠂㠃㠄㠅㠆㠇㠈㠉㠊㠋㠌㠍㠎㠏㠐㠑㠒㠓㠔㠕㠖㠗㠘㠙㠚㠛㠜㠝㠞㠟㠠㠡㠢㠣㠤㠥㠦㠧㠨㠩㠪㠫㠬㠭㠮㠯㠰"  # noqa: E501
    + "㠱㠲㠳㠴㠵㠶㠷㠸㠹㠺㠻㠼㠽㠾㠿㡀㡁㡂㡃㡄㡅㡆㡇㡈㡉㡊㡋㡌㡍㡎㡏㡐㡑㡒㡓㡔㡕㡖㡗㡘㡙㡚㡛㡜㡝㡞㡟㡠㡡㡢㡣㡤㡥㡦㡧㡨㡩㡪㡫㡬㡭㡮㡯㡰㡱㡲㡳"  # noqa: E501
    + "㡴㡵㡶㡷㡸㡹㡺㡻㡼㡽㡾㡿㢀㢁㢂㢃㢄㢅㢆㢇㢈㢉㢊㢋㢌㢍㢎㢏㢐㢑㢒㢓㢔㢕㢖㢗㢘㢙㢚㢛㢜㢝㢞㢟㢠㢡㢢㢣㢤㢥㢦㢧㢨㢩㢪㢫㢬㢭㢮㢯㢰㢱㢲㢳㢴㢵㢶"  # noqa: E501
    + "㢷㢸㢹㢺㢻㢼㢽㢾㢿㣀㣁㣂㣃㣄㣅㣆㣇㣈㣉㣊㣋㣌㣍㣎㣏㣐㣑㣒㣓㣔㣕㣖㣗㣘㣙㣚㣛㣜㣝㣞㣟㣠㣡㣢㣣㣤㣥㣦㣧㣨㣩㣪㣫㣬㣭㣮㣯㣰㣱㣲㣳㣴㣵㣶㣷㣸㣹"  # noqa: E501
    + "㣺㣻㣼㣽㣾㣿㤀㤁㤂㤃㤄㤅㤆㤇㤈㤉㤊㤋㤌㤍㤎㤏㤐㤑㤒㤓㤔㤕㤖㤗㤘㤙㤚㤛㤜㤝㤞㤟㤠㤡㤢㤣㤤㤥㤦㤧㤨㤩㤪㤫㤬㤭㤮㤯㤰㤱㤲㤳㤴㤵㤶㤷㤸㤹㤺㤻㤼"  # noqa: E501
    + "㤽㤾㤿㥀㥁㥂㥃㥄㥅㥆㥇㥈㥉㥊㥋㥌㥍㥎㥏㥐㥑㥒㥓㥔㥕㥖㥗㥘㥙㥚㥛㥜㥝㥞㥟㥠㥡㥢㥣㥤㥥㥦㥧㥨㥩㥪㥫㥬㥭㥮㥯㥰㥱㥲㥳㥴㥵㥶㥷㥸㥹㥺㥻㥼㥽㥾㥿"  # noqa: E501
    + "㦀㦁㦂㦃㦄㦅㦆㦇㦈㦉㦊㦋㦌㦍㦎㦏㦐㦑㦒㦓㦔㦕㦖㦗㦘㦙㦚㦛㦜㦝㦞㦟㦠㦡㦢㦣㦤㦥㦦㦧㦨㦩㦪㦫㦬㦭㦮㦯㦰㦱㦲㦳㦴㦵㦶㦷㦸㦹㦺㦻㦼㦽㦾㦿㧀㧁㧂"  # noqa: E501
    + "㧃㧄㧅㧆㧇㧈㧉㧊㧋㧌㧍㧎㧏㧐㧑㧒㧓㧔㧕㧖㧗㧘㧙㧚㧛㧜㧝㧞㧟㧠㧡㧢㧣㧤㧥㧦㧧㧨㧩㧪㧫㧬㧭㧮㧯㧰㧱㧲㧳㧴㧵㧶㧷㧸㧹㧺㧻㧼㧽㧾㧿㨀㨁㨂㨃㨄㨅"  # noqa: E501
    + "㨆㨇㨈㨉㨊㨋㨌㨍㨎㨏㨐㨑㨒㨓㨔㨕㨖㨗㨘㨙㨚㨛㨜㨝㨞㨟㨠㨡㨢㨣㨤㨥㨦㨧㨨㨩㨪㨫㨬㨭㨮㨯㨰㨱㨲㨳㨴㨵㨶㨷㨸㨹㨺㨻㨼㨽㨾㨿㩀㩁㩂㩃㩄㩅㩆㩇㩈"  # noqa: E501
    + "㩉㩊㩋㩌㩍㩎㩏㩐㩑㩒㩓㩔㩕㩖㩗㩘㩙㩚㩛㩜㩝㩞㩟㩠㩡㩢㩣㩤㩥㩦㩧㩨㩩㩪㩫㩬㩭㩮㩯㩰㩱㩲㩳㩴㩵㩶㩷㩸㩹㩺㩻㩼㩽㩾㩿㪀㪁㪂㪃㪄㪅㪆㪇㪈㪉㪊㪋"  # noqa: E501
    + "㪌㪍㪎㪏㪐㪑㪒㪓㪔㪕㪖㪗㪘㪙㪚㪛㪜㪝㪞㪟㪠㪡㪢㪣㪤㪥㪦㪧㪨㪩㪪㪫㪬㪭㪮㪯㪰㪱㪲㪳㪴㪵㪶㪷㪸㪹㪺㪻㪼㪽㪾㪿㫀㫁㫂㫃㫄㫅㫆㫇㫈㫉㫊㫋㫌㫍㫎"  # noqa: E501
    + "㫏㫐㫑㫒㫓㫔㫕㫖㫗㫘㫙㫚㫛㫜㫝㫞㫟㫠㫡㫢㫣㫤㫥㫦㫧㫨㫩㫪㫫㫬㫭㫮㫯㫰㫱㫲㫳㫴㫵㫶㫷㫸㫹㫺㫻㫼㫽㫾㫿㬀㬁㬂㬃㬄㬅㬆㬇㬈㬉㬊㬋㬌㬍㬎㬏㬐㬑"  # noqa: E501
    + "㬒㬓㬔㬕㬖㬗㬘㬙㬚㬛㬜㬝㬞㬟㬠㬡㬢㬣㬤㬥㬦㬧㬨㬩㬪㬫㬬㬭㬮㬯㬰㬱㬲㬳㬴㬵㬶㬷㬸㬹㬺㬻㬼㬽㬾㬿㭀㭁㭂㭃㭄㭅㭆㭇㭈㭉㭊㭋㭌㭍㭎㭏㭐㭑㭒㭓㭔"  # noqa: E501
    + "㭕㭖㭗㭘㭙㭚㭛㭜㭝㭞㭟㭠㭡㭢㭣㭤㭥㭦㭧㭨㭩㭪㭫㭬㭭㭮㭯㭰㭱㭲㭳㭴㭵㭶㭷㭸㭹㭺㭻㭼㭽㭾㭿㮀㮁㮂㮃㮄㮅㮆㮇㮈㮉㮊㮋㮌㮍㮎㮏㮐㮑㮒㮓㮔㮕㮖㮗"  # noqa: E501
    + "㮘㮙㮚㮛㮜㮝㮞㮟㮠㮡㮢㮣㮤㮥㮦㮧㮨㮩㮪㮫㮬㮭㮮㮯㮰㮱㮲㮳㮴㮵㮶㮷㮸㮹㮺㮻㮼㮽㮾㮿㯀㯁㯂㯃㯄㯅㯆㯇㯈㯉㯊㯋㯌㯍㯎㯏㯐㯑㯒㯓㯔㯕㯖㯗㯘㯙㯚"  # noqa: E501
    + "㯛㯜㯝㯞㯟㯠㯡㯢㯣㯤㯥㯦㯧㯨㯩㯪㯫㯬㯭㯮㯯㯰㯱㯲㯳㯴㯵㯶㯷㯸㯹㯺㯻㯼㯽㯾㯿㰀㰁㰂㰃㰄㰅㰆㰇㰈㰉㰊㰋㰌㰍㰎㰏㰐㰑㰒㰓㰔㰕㰖㰗㰘㰙㰚㰛㰜㰝"  # noqa: E501
    + "㰞㰟㰠㰡㰢㰣㰤㰥㰦㰧㰨㰩㰪㰫㰬㰭㰮㰯㰰㰱㰲㰳㰴㰵㰶㰷㰸㰹㰺㰻㰼㰽㰾㰿㱀㱁㱂㱃㱄㱅㱆㱇㱈㱉㱊㱋㱌㱍㱎㱏㱐㱑㱒㱓㱔㱕㱖㱗㱘㱙㱚㱛㱜㱝㱞㱟㱠"  # noqa: E501
    + "㱡㱢㱣㱤㱥㱦㱧㱨㱩㱪㱫㱬㱭㱮㱯㱰㱱㱲㱳㱴㱵㱶㱷㱸㱹㱺㱻㱼㱽㱾㱿㲀㲁㲂㲃㲄㲅㲆㲇㲈㲉㲊㲋㲌㲍㲎㲏㲐㲑㲒㲓㲔㲕㲖㲗㲘㲙㲚㲛㲜㲝㲞㲟㲠㲡㲢㲣"  # noqa: E501
    + "㲤㲥㲦㲧㲨㲩㲪㲫㲬㲭㲮㲯㲰㲱㲲㲳㲴㲵㲶㲷㲸㲹㲺㲻㲼㲽㲾㲿㳀㳁㳂㳃㳄㳅㳆㳇㳈㳉㳊㳋㳌㳍㳎㳏㳐㳑㳒㳓㳔㳕㳖㳗㳘㳙㳚㳛㳜㳝㳞㳟㳠㳡㳢㳣㳤㳥㳦"  # noqa: E501
    + "㳧㳨㳩㳪㳫㳬㳭㳮㳯㳰㳱㳲㳳㳴㳵㳶㳷㳸㳹㳺㳻㳼㳽㳾㳿㴀㴁㴂㴃㴄㴅㴆㴇㴈㴉㴊㴋㴌㴍㴎㴏㴐㴑㴒㴓㴔㴕㴖㴗㴘㴙㴚㴛㴜㴝㴞㴟㴠㴡㴢㴣㴤㴥㴦㴧㴨㴩"  # noqa: E501
    + "㴪㴫㴬㴭㴮㴯㴰㴱㴲㴳㴴㴵㴶㴷㴸㴹㴺㴻㴼㴽㴾㴿㵀㵁㵂㵃㵄㵅㵆㵇㵈㵉㵊㵋㵌㵍㵎㵏㵐㵑㵒㵓㵔㵕㵖㵗㵘㵙㵚㵛㵜㵝㵞㵟㵠㵡㵢㵣㵤㵥㵦㵧㵨㵩㵪㵫㵬"  # noqa: E501
    + "㵭㵮㵯㵰㵱㵲㵳㵴㵵㵶㵷㵸㵹㵺㵻㵼㵽㵾㵿㶀㶁㶂㶃㶄㶅㶆㶇㶈㶉㶊㶋㶌㶍㶎㶏㶐㶑㶒㶓㶔㶕㶖㶗㶘㶙㶚㶛㶜㶝㶞㶟㶠㶡㶢㶣㶤㶥㶦㶧㶨㶩㶪㶫㶬㶭㶮㶯"  # noqa: E501
    + "㶰㶱㶲㶳㶴㶵㶶㶷㶸㶹㶺㶻㶼㶽㶾㶿㷀㷁㷂㷃㷄㷅㷆㷇㷈㷉㷊㷋㷌㷍㷎㷏㷐㷑㷒㷓㷔㷕㷖㷗㷘㷙㷚㷛㷜㷝㷞㷟㷠㷡㷢㷣㷤㷥㷦㷧㷨㷩㷪㷫㷬㷭㷮㷯㷰㷱㷲"  # noqa: E501
    + "㷳㷴㷵㷶㷷㷸㷹㷺㷻㷼㷽㷾㷿㸀㸁㸂㸃㸄㸅㸆㸇㸈㸉㸊㸋㸌㸍㸎㸏㸐㸑㸒㸓㸔㸕㸖㸗㸘㸙㸚㸛㸜㸝㸞㸟㸠㸡㸢㸣㸤㸥㸦㸧㸨㸩㸪㸫㸬㸭㸮㸯㸰㸱㸲㸳㸴㸵"  # noqa: E501
    + "㸶㸷㸸㸹㸺㸻㸼㸽㸾㸿㹀㹁㹂㹃㹄㹅㹆㹇㹈㹉㹊㹋㹌㹍㹎㹏㹐㹑㹒㹓㹔㹕㹖㹗㹘㹙㹚㹛㹜㹝㹞㹟㹠㹡㹢㹣㹤㹥㹦㹧㹨㹩㹪㹫㹬㹭㹮㹯㹰㹱㹲㹳㹴㹵㹶㹷㹸"  # noqa: E501
    + "㹹㹺㹻㹼㹽㹾㹿㺀㺁㺂㺃㺄㺅㺆㺇㺈㺉㺊㺋㺌㺍㺎㺏㺐㺑㺒㺓㺔㺕㺖㺗㺘㺙㺚㺛㺜㺝㺞㺟㺠㺡㺢㺣㺤㺥㺦㺧㺨㺩㺪㺫㺬㺭㺮㺯㺰㺱㺲㺳㺴㺵㺶㺷㺸㺹㺺㺻"  # noqa: E501
    + "㺼㺽㺾㺿㻀㻁㻂㻃㻄㻅㻆㻇㻈㻉㻊㻋㻌㻍㻎㻏㻐㻑㻒㻓㻔㻕㻖㻗㻘㻙㻚㻛㻜㻝㻞㻟㻠㻡㻢㻣㻤㻥㻦㻧㻨㻩㻪㻫㻬㻭㻮㻯㻰㻱㻲㻳㻴㻵㻶㻷㻸㻹㻺㻻㻼㻽㻾"  # noqa: E501
    + "㻿㼀㼁㼂㼃㼄㼅㼆㼇㼈㼉㼊㼋㼌㼍㼎㼏㼐㼑㼒㼓㼔㼕㼖㼗㼘㼙㼚㼛㼜㼝㼞㼟㼠㼡㼢㼣㼤㼥㼦㼧㼨㼩㼪㼫㼬㼭㼮㼯㼰㼱㼲㼳㼴㼵㼶㼷㼸㼹㼺㼻㼼㼽㼾㼿㽀㽁"  # noqa: E501
    + "㽂㽃㽄㽅㽆㽇㽈㽉㽊㽋㽌㽍㽎㽏㽐㽑㽒㽓㽔㽕㽖㽗㽘㽙㽚㽛㽜㽝㽞㽟㽠㽡㽢㽣㽤㽥㽦㽧㽨㽩㽪㽫㽬㽭㽮㽯㽰㽱㽲㽳㽴㽵㽶㽷㽸㽹㽺㽻㽼㽽㽾㽿㾀㾁㾂㾃㾄"  # noqa: E501
    + "㾅㾆㾇㾈㾉㾊㾋㾌㾍㾎㾏㾐㾑㾒㾓㾔㾕㾖㾗㾘㾙㾚㾛㾜㾝㾞㾟㾠㾡㾢㾣㾤㾥㾦㾧㾨㾩㾪㾫㾬㾭㾮㾯㾰㾱㾲㾳㾴㾵㾶㾷㾸㾹㾺㾻㾼㾽㾾㾿㿀㿁㿂㿃㿄㿅㿆㿇"  # noqa: E501
    + "㿈㿉㿊㿋㿌㿍㿎㿏㿐㿑㿒㿓㿔㿕㿖㿗㿘㿙㿚㿛㿜㿝㿞㿟㿠㿡㿢㿣㿤㿥㿦㿧㿨㿩㿪㿫㿬㿭㿮㿯㿰㿱㿲㿳㿴㿵㿶㿷㿸㿹㿺㿻㿼㿽㿾㿿䀀䀁䀂䀃䀄䀅䀆䀇䀈䀉䀊"  # noqa: E501
    + "䀋䀌䀍䀎䀏䀐䀑䀒䀓䀔䀕䀖䀗䀘䀙䀚䀛䀜䀝䀞䀟䀠䀡䀢䀣䀤䀥䀦䀧䀨䀩䀪䀫䀬䀭䀮䀯䀰䀱䀲䀳䀴䀵䀶䀷䀸䀹䀺䀻䀼䀽䀾䀿䁀䁁䁂䁃䁄䁅䁆䁇䁈䁉䁊䁋䁌䁍"  # noqa: E501
    + "䁎䁏䁐䁑䁒䁓䁔䁕䁖䁗䁘䁙䁚䁛䁜䁝䁞䁟䁠䁡䁢䁣䁤䁥䁦䁧䁨䁩䁪䁫䁬䁭䁮䁯䁰䁱䁲䁳䁴䁵䁶䁷䁸䁹䁺䁻䁼䁽䁾䁿䂀䂁䂂䂃䂄䂅䂆䂇䂈䂉䂊䂋䂌䂍䂎䂏䂐"  # noqa: E501
    + "䂑䂒䂓䂔䂕䂖䂗䂘䂙䂚䂛䂜䂝䂞䂟䂠䂡䂢䂣䂤䂥䂦䂧䂨䂩䂪䂫䂬䂭䂮䂯䂰䂱䂲䂳䂴䂵䂶䂷䂸䂹䂺䂻䂼䂽䂾䂿䃀䃁䃂䃃䃄䃅䃆䃇䃈䃉䃊䃋䃌䃍䃎䃏䃐䃑䃒䃓"  # noqa: E501
    + "䃔䃕䃖䃗䃘䃙䃚䃛䃜䃝䃞䃟䃠䃡䃢䃣䃤䃥䃦䃧䃨䃩䃪䃫䃬䃭䃮䃯䃰䃱䃲䃳䃴䃵䃶䃷䃸䃹䃺䃻䃼䃽䃾䃿䄀䄁䄂䄃䄄䄅䄆䄇䄈䄉䄊䄋䄌䄍䄎䄏䄐䄑䄒䄓䄔䄕䄖"  # noqa: E501
    + "䄗䄘䄙䄚䄛䄜䄝䄞䄟䄠䄡䄢䄣䄤䄥䄦䄧䄨䄩䄪䄫䄬䄭䄮䄯䄰䄱䄲䄳䄴䄵䄶䄷䄸䄹䄺䄻䄼䄽䄾䄿䅀䅁䅂䅃䅄䅅䅆䅇䅈䅉䅊䅋䅌䅍䅎䅏䅐䅑䅒䅓䅔䅕䅖䅗䅘䅙"  # noqa: E501
    + "䅚䅛䅜䅝䅞䅟䅠䅡䅢䅣䅤䅥䅦䅧䅨䅩䅪䅫䅬䅭䅮䅯䅰䅱䅲䅳䅴䅵䅶䅷䅸䅹䅺䅻䅼䅽䅾䅿䆀䆁䆂䆃䆄䆅䆆䆇䆈䆉䆊䆋䆌䆍䆎䆏䆐䆑䆒䆓䆔䆕䆖䆗䆘䆙䆚䆛䆜"  # noqa: E501
    + "䆝䆞䆟䆠䆡䆢䆣䆤䆥䆦䆧䆨䆩䆪䆫䆬䆭䆮䆯䆰䆱䆲䆳䆴䆵䆶䆷䆸䆹䆺䆻䆼䆽䆾䆿䇀䇁䇂䇃䇄䇅䇆䇇䇈䇉䇊䇋䇌䇍䇎䇏䇐䇑䇒䇓䇔䇕䇖䇗䇘䇙䇚䇛䇜䇝䇞䇟"  # noqa: E501
    + "䇠䇡䇢䇣䇤䇥䇦䇧䇨䇩䇪䇫䇬䇭䇮䇯䇰䇱䇲䇳䇴䇵䇶䇷䇸䇹䇺䇻䇼䇽䇾䇿䈀䈁䈂䈃䈄䈅䈆䈇䈈䈉䈊䈋䈌䈍䈎䈏䈐䈑䈒䈓䈔䈕䈖䈗䈘䈙䈚䈛䈜䈝䈞䈟䈠䈡䈢"  # noqa: E501
    + "䈣䈤䈥䈦䈧䈨䈩䈪䈫䈬䈭䈮䈯䈰䈱䈲䈳䈴䈵䈶䈷䈸䈹䈺䈻䈼䈽䈾䈿䉀䉁䉂䉃䉄䉅䉆䉇䉈䉉䉊䉋䉌䉍䉎䉏䉐䉑䉒䉓䉔䉕䉖䉗䉘䉙䉚䉛䉜䉝䉞䉟䉠䉡䉢䉣䉤䉥"  # noqa: E501
    + "䉦䉧䉨䉩㑃䉪䉫䉬䉭䉮䉯䉰䉱䉲䉳䉴䉵䉶䉷䉸䉹䉺䉻䉼䉽䉾䉿䊀䊁䊂䊃䊄䊅䊆䊇䊈䊉䊊䊋䊌䊍䊎䊏䊐䊑䊒䊓䊔䊕䊖䊗䊘䊙䊚䊛䊜䊝䊞䊟䊠䊡䊢䊣䊤䊥䊦䊧"  # noqa: E501
    + "䊨䊩䊪䊫䊬䊭䊮䊯䊰䊱䊲䊳䊴䊵䊶䊷䊸䊹䊺䊻䊼䊽䊾䊿䋀䋁䋂䋃䋄䋅䋆䋇䋈䋉䋊䋋䋌䋍䋎䋏䋐䋑䋒䋓䋔䋕䋖䋗䋘䋙䋚䋛䋜䋝䋞䋟䋠䋡䋢䋣䋤䋥䋦䋧䋨䋩䋪"  # noqa: E501
    + "䋫䋬䋭䋮䋯䋰䋱䋲䋳䋴䋵䋶䋷䋸䋹䋺䋻䋼䋽䋾䋿䌀䌁䌂䌃䌄䌅䌆䌇䌈䌉䌊䌋䌌䌍䌎䌏䌐䌑䌒䌓䌔䌕䌖䌗䌘䌙䌚䌛䌜䌝䌞䌟䌠䌡䌢䌣䌤䌥䌦䌧䌨䌩䌪䌫䌬䌭"  # noqa: E501
    + "䌮䌯䌰䌱䌲䌳䌴䌵䌶䌷䌸䌹䌺䌻䌼䌽䌾䌿䍀䍁䍂䍃䍄䍅䍆䍇䍈䍉䍊䍋䍌䍍䍎䍏䍐䍑䍒䍓䍔䍕䍖䍗䍘䍙䍚䍛䍜䍝䍞䍟䍠䍡䍢䍣䍤䍥䍦䍧䍨䍩䍪䍫䍬䍭䍮䍯䍰"  # noqa: E501
    + "䍱䍲䍳䍴䍵䍶䍷䍸䍹䍺䍻䍼䍽䍾䍿䎀䎁䎂䎃䎄䎅䎆䎇䎈䎉䎊䎋䎌䎍䎎䎏䎐䎑䎒䎓䎔䎕䎖䎗䎘䎙䎚䎛䎜䎝䎞䎟䎠䎡䎢䎣䎤䎥䎦䎧䎨䎩䎪䎫䎬䎭䎮䎯䎰䎱䎲䎳"  # noqa: E501
    + "䎴䎵䎶䎷䎸䎹䎺䎻䎼䎽䎾䎿䏀䏁䏂䏃䏄䏅䏆䏇䏈䏉䏊䏋䏌䏍䏎䏏䏐䏑䏒䏓䏔䏕䏖䏗䏘䏙䏚䏛䏜䏝䏞䏟䏠䏡䏢䏣䏤䏥䏦䏧䏨䏩䏪䏫䏬䏭䏮䏯䏰䏱䏲䏳䏴䏵䏶"  # noqa: E501
    + "䏷䏸䏹䏺䏻䏼䏽䏾䏿䐀䐁䐂䐃䐄䐅䐆䐇䐈䐉䐊䐋䐌䐍䐎䐏䐐䐑䐒䐓䐔䐕䐖䐗䐘䐙䐚䐛䐜䐝䐞䐟䐠䐡䐢䐣䐤䐥䐦䐧䐨䐩䐪䐫䐬䐭䐮䐯䐰䐱䐲䐳䐴䐵䐶䐷䐸䐹"  # noqa: E501
    + "䐺䐻䐼䐽䐾䐿䑀䑁䑂䑃䑄䑅䑆䑇䑈䑉䑊䑋䑌䑍䑎䑏䑐䑑䑒䑓䑔䑕䑖䑗䑘䑙䑚䑛䑜䑝䑞䑟䑠䑡䑢䑣䑤䑥䑦䑧䑨䑩䑪䑫䑬䑭䑮䑯䑰䑱䑲䑳䑴䑵䑶䑷䑸䑹䑺䑻䑼"  # noqa: E501
    + "䑽䑾䑿䒀䒁䒂䒃䒄䒅䒆䒇䒈䒉䒊䒋䒌䒍䒎䒏䒐䒑䒒䒓䒔䒕䒖䒗䒘䒙䒚䒛䒜䒝䒞䒟䒠䒡䒢䒣䒤䒥䒦䒧䒨䒩䒪䒫䒬䒭䒮䒯䒰䒱䒲䒳䒴䒵䒶䒷䒸䒹䒺䒻䒼䒽䒾䒿"  # noqa: E501
    + "䓀䓁䓂䓃䓄䓅䓆䓇䓈䓉䓊䓋䓌䓍䓎䓏䓐䓑䓒䓓䓔䓕䓖䓗䓘䓙䓚䓛䓜䓝䓞䓟䓠䓡䓢䓣䓤䓥䓦䓧䓨䓩䓪䓫䓬䓭䓮䓯䓰䓱䓲䓳䓴䓵䓶䓷䓸䓹䓺䓻䓼䓽䓾䓿䔀䔁䔂"  # noqa: E501
    + "䔃䔄䔅䔆䔇䔈䔉䔊䔋䔌䔍䔎䔏䔐䔑䔒䔓䔔䔕䔖䔗䔘䔙䔚䔛䔜䔝䔞䔟䔠䔡䔢䔣䔤䔥䔦䔧䔨䔩䔪䔫䔬䔭䔮䔯䔰䔱䔲䔳䔴䔵䔶䔷䔸䔹䔺䔻䔼䔽䔾䔿䕀䕁䕂䕃䕄䕅"  # noqa: E501
    + "䕆䕇䕈䕉䕊䕋䕌䕍䕎䕏䕐䕑䕒䕓䕔䕕䕖䕗䕘䕙䕚䕛䕜䕝䕞䕟䕠䕡䕢䕣䕤䕥䕦䕧䕨䕩䕪䕫䕬䕭䕮䕯䕰䕱䕲䕳䕴䕵䕶䕷䕸䕹䕺䕻䕼䕽䕾䕿䖀䖁䖂䖃䖄䖅䖆䖇䖈"  # noqa: E501
    + "䖉䖊䖋䖌䖍䖎䖏䖐䖑䖒䖓䖔䖕䖖䖗䖘䖙䖚䖛䖜䖝䖞䖟䖠䖡䖢䖣䖤䖥䖦䖧䖨䖩䖪䖫䖬䖭䖮䖯䖰䖱䖲䖳䖴䖵䖶䖷䖸䖹䖺䖻䖼䖽䖾䖿䗀䗁䗂䗃䗄䗅䗆䗇䗈䗉䗊䗋"  # noqa: E501
    + "䗌䗍䗎䗏䗐䗑䗒䗓䗔䗕䗖䗗䗘䗙䗚䗛䗜䗝䗞䗟䗠䗡䗢䗣䗤䗥䗦䗧䗨䗩䗪䗫䗬䗭䗮䗯䗰䗱䗲䗳䗴䗵䗶䗷䗸䗹䗺䗻䗼䗽䗾䗿䘀䘁䘂䘃䘄䘅䘆䘇䘈䘉䘊䘋䘌䘍䘎"  # noqa: E501
    + "䘏䘐䘑䘒䘓䘔䘕䘖䘗䘘䘙䘚䘛䘜䘝䘞䘟䘠䘡䘢䘣䘤䘥䘦䘧䘨䘩䘪䘫䘬䘭䘮䘯䘰䘱䘲䘳䘴䘵䘶䘷䘸䘹䘺䘻䘼䘽䘾䘿䙀䙁䙂䙃䙄䙅䙆䙇䙈䙉䙊䙋䙌䙍䙎䙏䙐䙑"  # noqa: E501
    + "䙒䙓䙔䙕䙖䙗䙘䙙䙚䙛䙜䙝䙞䙟䙠䙡䙢䙣䙤䙥䙦䙧䙨䙩䙪䙫䙬䙭䙮䙯䙰䙱䙲䙳䙴䙵䙶䙷䙸䙹䙺䙻䙼䙽䙾䙿䚀䚁䚂䚃䚄䚅䚆䚇䚈䚉䚊䚋䚌䚍䚎䚏䚐䚑䚒䚓䚔"  # noqa: E501
    + "䚕䚖䚗䚘䚙䚚䚛䚜䚝䚞䚟䚠䚡䚢䚣䚤䚥䚦䚧䚨䚩䚪䚫䚬䚭䚮䚯䚰䚱䚲䚳䚴䚵䚶䚷䚸䚹䚺䚻䚼䚽䚾䚿䛀䛁䛂䛃䛄䛅䛆䛇䛈䛉䛊䛋䛌䛍䛎䛏䛐䛑䛒䛓䛔䛕䛖䛗"  # noqa: E501
    + "䛘䛙䛚䛛䛜䛝䛞䛟䛠䛡䛢䛣䛤䛥䛦䛧䛨䛩䛪䛫䛬䛭䛮䛯䛰䛱䛲䛳䛴䛵䛶䛷䛸䛹䛺䛻䛼䛽䛾䛿䜀䜁䜂䜃䜄䜅䜆䜇䜈䜉䜊䜋䜌䜍䜎䜏䜐䜑䜒䜓䜔䜕䜖䜗䜘䜙䜚"  # noqa: E501
    + "䜛䜜䜝䜞䜟䜠䜡䜢䜣䜤䜥䜦䜧䜨䜩䜪䜫䜬䜭䜮䜯䜰䜱䜲䜳䜴䜵䜶䜷䜸䜹䜺䜻䜼䜽䜾䜿䝀䝁䝂䝃䝄䝅䝆䝇䝈䝉䝊䝋䝌䝍䝎䝏䝐䝑䝒䝓䝔䝕䝖䝗䝘䝙䝚䝛䝜䝝"  # noqa: E501
    + "䝞䝟䝠䝡䝢䝣䝤䝥䝦䝧䝨䝩䝪䝫䝬䝭䝮䝯䝰䝱䝲䝳䝴䝵䝶䝷䝸䝹䝺䝻䝼䝽䝾䝿䞀䞁䞂䞃䞄䞅䞆䞇䞈䞉䞊䞋䞌䞍䞎䞏䞐䞑䞒䞓䞔䞕䞖䞗䞘䞙䞚䞛䞜䞝䞞䞟䞠"  # noqa: E501
    + "䞡䞢䞣䞤䞥䞦䞧䞨䞩䞪䞫䞬䞭䞮䞯䞰䞱䞲䞳䞴䞵䞶䞷䞸䞹䞺䞻䞼䞽䞾䞿䟀䟁䟂䟃䟄䟅䟆䟇䟈䟉䟊䟋䟌䟍䟎䟏䟐䟑䟒䟓䟔䟕䟖䟗䟘䟙䟚䟛䟜䟝䟞䟟䟠䟡䟢䟣"  # noqa: E501
    + "䟤䟥䟦䟧䟨䟩䟪䟫䟬䟭䟮䟯䟰䟱䟲䟳䟴䟵䟶䟷䟸䟹䟺䟻䟼䟽䟾䟿䠀䠁䠂䠃䠄䠅䠆䠇䠈䠉䠊䠋䠌䠍䠎䠏䠐䠑䠒䠓䠔䠕䠖䠗䠘䠙䠚䠛䠜䠝䠞䠟䠠䠡䠢䠣䠤䠥䠦"  # noqa: E501
    + "䠧䠨䠩䠪䠫䠬䠭䠮䠯䠰䠱䠲䠳䠴䠵䠶䠷䠸䠹䠺䠻䠼䠽䠾䠿䡀䡁䡂䡃䡄䡅䡆䡇䡈䡉䡊䡋䡌䡍䡎䡏䡐䡑䡒䡓䡔䡕䡖䡗䡘䡙䡚䡛䡜䡝䡞䡟䡠䡡䡢䡣䡤䡥䡦䡧䡨䡩"  # noqa: E501
    + "䡪䡫䡬䡭䡮䡯䡰䡱䡲䡳䡴䡵䡶䡷䡸䡹䡺䡻䡼䡽䡾䡿䢀䢁䢂䢃䢄䢅䢆䢇䢈䢉䢊䢋䢌䢍䢎䢏䢐䢑䢒䢓䢔䢕䢖䢗䢘䢙䢚䢛䢜䢝䢞䢟䢠䢡䢢䢣䢤䢥䢦䢧䢨䢩䢪䢫䢬"  # noqa: E501
    + "䢭䢮䢯䢰䢱䢲䢳䢴䢵䢶䢷䢸䢹䢺䢻䢼䢽䢾䢿䣀䣁䣂䣃䣄䣅䣆䣇䣈䣉䣊䣋䣌䣍䣎䣏䣐䣑䣒䣓䣔䣕䣖䣗䣘䣙䣚䣛䣜䣝䣞䣟䣠䣡䣢䣣䣤䣥䣦䣧䣨䣩䣪䣫䣬䣭䣮䣯"  # noqa: E501
    + "䣰䣱䣲䣳䣴䣵䣶䣷䣸䣹䣺䣻䣼䣽䣾䣿䤀䤁䤂䤃䤄䤅䤆䤇䤈䤉䤊䤋䤌䤍䤎䤏䤐䤑䤒䤓䤔䤕䤖䤗䤘䤙䤚䤛䤜䤝䤞䤟䤠䤡䤢䤣䤤䤥䤦䤧䤨䤩䤪䤫䤬䤭䤮䤯䤰䤱䤲"  # noqa: E501
    + "䤳䤴䤵䤶䤷䤸䤹䤺䤻䤼䤽䤾䤿䥀䥁䥂䥃䥄䥅䥆䥇䥈䥉䥊䥋䥌䥍䥎䥏䥐䥑䥒䥓䥔䥕䥖䥗䥘䥙䥚䥛䥜䥝䥞䥟䥠䥡䥢䥣䥤䥥䥦䥧䥨䥩䥪䥫䥬䥭䥮䥯䥰䥱䥲䥳䥴䥵"  # noqa: E501
    + "䥶䥷䥸䥹䥺䥻䥼䥽䥾䥿䦀䦁䦂䦃䦄䦅䦆䦇䦈䦉䦊䦋䦌䦍䦎䦏䦐䦑䦒䦓䦔䦕䦖䦗䦘䦙䦚䦛䦜䦝䦞䦟䦠䦡䦢䦣䦤䦥䦦䦧䦨䦩䦪䦫䦬䦭䦮䦯䦰䦱䦲䦳䦴䦵䦶䦷䦸"  # noqa: E501
    + "䦹䦺䦻䦼䦽䦾䦿䧀䧁䧂䧃䧄䧅䧆䧇䧈䧉䧊䧋䧌䧍䧎䧏䧐䧑䧒䧓䧔䧕䧖䧗䧘䧙䧚䧛䧜䧝䧞䧟䧠䧡䧢䧣䧤䧥䧦䧧䧨䧩䧪䧫䧬䧭䧮䧯䧰䧱䧲䧳䧴䧵䧶䧷䧸䧹䧺䧻"  # noqa: E501
    + "䧼䧽䧾䧿䨀䨁䨂䨃䨄䨅䨆䨇䨈䨉䨊䨋䨌䨍䨎䨏䨐䨑䨒䨓䨔䨕䨖䨗䨘䨙䨚䨛䨜䨝䨞䨟䨠䨡䨢䨣䨤䨥䨦䨧䨨䨩䨪䨫䨬䨭䨮䨯䨰䨱䨲䨳䨴䨵䨶䨷䨸䨹䨺䨻䨼䨽䨾"  # noqa: E501
    + "䨿䩀䩁䩂䩃䩄䩅䩆䩇䩈䩉䩊䩋䩌䩍䩎䩏䩐䩑䩒䩓䩔䩕䩖䩗䩘䩙䩚䩛䩜䩝䩞䩟䩠䩡䩢䩣䩤䩥䩦䩧䩨䩩䩪䩫䩬䩭䩮䩯䩰䩱䩲䩳䩴䩵䩶䩷䩸䩹䩺䩻䩼䩽䩾䩿䪀䪁"  # noqa: E501
    + "䪂䪃䪄䪅䪆䪇䪈䪉䪊䪋䪌䪍䪎䪏䪐䪑䪒䪓䪔䪕䪖䪗䪘䪙䪚䪛䪜䪝䪞䪟䪠䪡䪢䪣䪤䪥䪦䪧䪨䪩䪪䪫䪬䪭䪮䪯䪰䪱䪲䪳䪴䪵䪶䪷䪸䪹䪺䪻䪼䪽䪾䪿䫀䫁䫂䫃䫄"  # noqa: E501
    + "䫅䫆䫇䫈䫉䫊䫋䫌䫍䫎䫏䫐䫑䫒䫓䫔䫕䫖䫗䫘䫙䫚䫛䫜䫝䫞䫟䫠䫡䫢䫣䫤䫥䫦䫧䫨䫩䫪䫫䫬䫭䫮䫯䫰䫱䫲䫳䫴䫵䫶䫷䫸䫹䫺䫻䫼䫽䫾䫿䬀䬁䬂䬃䬄䬅䬆䬇"  # noqa: E501
    + "䬈䬉䬊䬋䬌䬍䬎䬏䬐䬑䬒䬓䬔䬕䬖䬗䬘䬙䬚䬛䬜䬝䬞䬟䬠䬡䬢䬣䬤䬥䬦䬧䬨䬩䬪䬫䬬䬭䬮䬯䬰䬱䬲䬳䬴䬵䬶䬷䬸䬹䬺䬻䬼䬽䬾䬿䭀䭁䭂䭃䭄䭅䭆䭇䭈䭉䭊"  # noqa: E501
    + "䭋䭌䭍䭎䭏䭐䭑䭒䭓䭔䭕䭖䭗䭘䭙䭚䭛䭜䭝䭞䭟䭠䭡䭢䭣䭤䭥䭦䭧䭨䭩䭪䭫䭬䭭䭮䭯䭰䭱䭲䭳䭴䭵䭶䭷䭸䭹䭺䭻䭼䭽䭾䭿䮀䮁䮂䮃䮄䮅䮆䮇䮈䮉䮊䮋䮌䮍"  # noqa: E501
    + "䮎䮏䮐䮑䮒䮓䮔䮕䮖䮗䮘䮙䮚䮛䮜䮝䮞䮟䮠䮡䮢䮣䮤䮥䮦䮧䮨䮩䮪䮫䮬䮭䮮䮯䮰䮱䮲䮳䮴䮵䮶䮷䮸䮹䮺䮻䮼䮽䮾䮿䯀䯁䯂䯃䯄䯅䯆䯇䯈䯉䯊䯋䯌䯍䯎䯏䯐"  # noqa: E501
    + "䯑䯒䯓䯔䯕䯖䯗䯘䯙䯚䯛䯜䯝䯞䯟䯠䯡䯢䯣䯤䯥䯦䯧䯨䯩䯪䯫䯬䯭䯮䯯䯰䯱䯲䯳䯴䯵䯶䯷䯸䯹䯺䯻䯼䯽䯾䯿䰀䰁䰂䰃䰄䰅䰆䰇䰈䰉䰊䰋䰌䰍䰎䰏䰐䰑䰒䰓"  # noqa: E501
    + "䰔䰕䰖䰗䰘䰙䰚䰛䰜䰝䰞䰟䰠䰡䰢䰣䰤䰥䰦䰧䰨䰩䰪䰫䰬䰭䰮䰯䰰䰱䰲䰳䰴䰵䰶䰷䰸䰹䰺䰻䰼䰽䰾䰿䱀䱁䱂䱃䱄䱅䱆䱇䱈䱉䱊䱋䱌䱍䱎䱏䱐䱑䱒䱓䱔䱕䱖"  # noqa: E501
    + "䱗䱘䱙䱚䱛䱜䱝䱞䱟䱠䱡䱢䱣䱤䱥䱦䱧䱨䱩䱪䱫䱬䱭䱮䱯䱰䱱䱲䱳䱴䱵䱶䱷䱸䱹䱺䱻䱼䱽䱾䱿䲀䲁䲂䲃䲄䲅䲆䲇䲈䲉䲊䲋䲌䲍䲎䲏䲐䲑䲒䲓䲔䲕䲖䲗䲘䲙"  # noqa: E501
    + "䲚䲛䲜䲝䲞䲟䲠䲡䲢䲣䲤䲥䲦䲧䲨䲩䲪䲫䲬䲭䲮䲯䲰䲱䲲䲳䲴䲵䲶䲷䲸䲹䲺䲻䲼䲽䲾䲿䳀䳁䳂䳃䳄䳅䳆䳇䳈䳉䳊䳋䳌䳍䳎䳏䳐䳑䳒䳓䳔䳕䳖䳗䳘䳙䳚䳛䳜"  # noqa: E501
    + "䳝䳞䳟䳠䳡䳢䳣䳤䳥䳦䳧䳨䳩䳪䳫䳬䳭䳮䳯䳰䳱䳲䳳䳴䳵䳶䳷䳸䳹䳺䳻䳼䳽䳾䳿䴀䴁䴂䴃䴄䴅䴆䴇䴈䴉䴊䴋䴌䴍䴎䴏䴐䴑䴒䴓䴔䴕䴖䴗䴘䴙䴚䴛䴜䴝䴞䴟"  # noqa: E501
    + "䴠䴡䴢䴣䴤䴥䴦䴧䴨䴩䴪䴫䴬䴭䴮䴯䴰䴱䴲䴳䴴䴵䴶䴷䴸䴹䴺䴻䴼䴽䴾䴿䵀䵁䵂䵃䵄䵅䵆䵇䵈䵉䵊䵋䵌䵍䵎䵏䵐䵑䵒䵓䵔䵕䵖䵗䵘䵙䵚䵛䵜䵝䵞䵟䵠䵡䵢"  # noqa: E501
    + "䵣䵤䵥䵦䵧䵨䵩䵪䵫䵬䵭䵮䵯䵰䵱䵲䵳䵴䵵䵶䵷䵸䵹䵺䵻䵼䵽䵾䵿䶀䶁䶂䶃䶄䶅䶆䶇䶈䶉䶊䶋䶌䶍䶎䶏䶐䶑䶒䶓䶔䶕䶖䶗䶘䶙䶚䶛䶜䶝䶞䶟䶠䶡䶢䶣䶤䶥"  # noqa: E501
    + "䶦䶧䶨䶩䶪䶫䶬䶭䶮䶯䶰䶱䶲䶳䶴䶵䶶䶷䶸䶹䶺䶻䶼䶽䶾䶿"
    + _BASE_VOCABS["punctuation"]
    + "。・〜°—、「」『』【】゛》《〉〈"  # punctuation
    + _BASE_VOCABS["currency"]
)

# Multi-lingual
VOCABS["multilingual"] = "".join(
    dict.fromkeys(
        # latin_based
        VOCABS["english"]
        + VOCABS["albanian"]
        + VOCABS["afrikaans"]
        + VOCABS["azerbaijani"]
        + VOCABS["basque"]
        + VOCABS["bosnian"]
        + VOCABS["catalan"]
        + VOCABS["croatian"]
        + VOCABS["czech"]
        + VOCABS["danish"]
        + VOCABS["dutch"]
        + VOCABS["estonian"]
        + VOCABS["esperanto"]
        + VOCABS["french"]
        + VOCABS["finnish"]
        + VOCABS["frisian"]
        + VOCABS["galician"]
        + VOCABS["german"]
        + VOCABS["hausa"]
        + VOCABS["hungarian"]
        + VOCABS["icelandic"]
        + VOCABS["indonesian"]
        + VOCABS["irish"]
        + VOCABS["italian"]
        + VOCABS["latvian"]
        + VOCABS["lithuanian"]
        + VOCABS["luxembourgish"]
        + VOCABS["maori"]
        + VOCABS["malagasy"]
        + VOCABS["malay"]
        + VOCABS["maltese"]
        + VOCABS["montenegrin"]
        + VOCABS["norwegian"]
        + VOCABS["polish"]
        + VOCABS["portuguese"]
        + VOCABS["quechua"]
        + VOCABS["romanian"]
        + VOCABS["scottish_gaelic"]
        + VOCABS["serbian_latin"]
        + VOCABS["slovak"]
        + VOCABS["slovene"]
        + VOCABS["somali"]
        + VOCABS["spanish"]
        + VOCABS["swahili"]
        + VOCABS["swedish"]
        + VOCABS["tagalog"]
        + VOCABS["turkish"]
        + VOCABS["uzbek_latin"]
        + VOCABS["vietnamese"]
        + VOCABS["welsh"]
        + VOCABS["yoruba"]
        + VOCABS["zulu"]
        + "§"  # paragraph sign
        # cyrillic_based
        + VOCABS["russian"]
        + VOCABS["belarusian"]
        + VOCABS["ukrainian"]
        + VOCABS["tatar"]
        + VOCABS["tajik"]
        + VOCABS["kazakh"]
        + VOCABS["kyrgyz"]
        + VOCABS["bulgarian"]
        + VOCABS["macedonian"]
        + VOCABS["mongolian"]
        + VOCABS["yakut"]
        + VOCABS["serbian_cyrillic"]
        + VOCABS["uzbek_cyrillic"]
        # greek
        + VOCABS["greek"]
        # hebrew
        + VOCABS["hebrew"]
    )
)


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "onnxtr"
description = "Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents."
authors = [{name = "Felix Dittrich", email = "felixdittrich92@gmail.com"}]
maintainers = [
    {name = "Felix Dittrich"},
]
readme = "README.md"
requires-python = ">=3.10.0,<4"
license = {file = "LICENSE"}
keywords=["OCR", "deep learning", "computer vision", "onnx", "text detection", "text recognition", "docTR", "document analysis", "document processing", "document AI"]
classifiers=[
        "Development Status :: 4 - Beta",
        "Intended Audience :: Developers",
        "Intended Audience :: Education",
        "Intended Audience :: Science/Research",
        "License :: OSI Approved :: Apache Software License",
        "Natural Language :: English",
        "Operating System :: OS Independent",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.11",
        "Programming Language :: Python :: 3.12",
        "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dynamic = ["version"]
dependencies = [
    # For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
    # Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
    "numpy>=1.16.0,<3.0.0",
    "scipy>=1.4.0,<2.0.0",
    "pypdfium2>=4.11.0,<6.0.0",
    "pyclipper>=1.2.0,<2.0.0",
    "rapidfuzz>=3.0.0,<4.0.0",
    "langdetect>=1.0.9,<2.0.0",
    "huggingface-hub>=0.23.0,<2.0.0",
    "Pillow>=9.2.0",
    "defusedxml>=0.7.0",
    "anyascii>=0.3.2",
    "tqdm>=4.30.0",
]

[project.optional-dependencies]
cpu = [
    "onnxruntime>=1.18.0",
    "opencv-python>=4.5.0,<5.0.0",
]
gpu = [
    "onnxruntime-gpu>=1.18.0",
    "opencv-python>=4.5.0,<5.0.0",
]
openvino = [
    "onnxruntime-openvino>=1.18.0",
    "opencv-python>=4.5.0,<5.0.0",
]
cpu-headless = [
    "onnxruntime>=1.18.0",
    "opencv-python-headless>=4.5.0,<5.0.0",
]
gpu-headless = [
    "onnxruntime-gpu>=1.18.0",
    "opencv-python-headless>=4.5.0,<5.0.0",
]
openvino-headless = [
    "onnxruntime-openvino>=1.18.0",
    "opencv-python-headless>=4.5.0,<5.0.0",
]
html = [
    "weasyprint>=55.0",
]
viz = [
    "matplotlib>=3.1.0",
    "mplcursors>=0.3",
]
testing = [
    "pytest>=5.3.2",
    "coverage[toml]>=4.5.4",
    "requests>=2.20.0",
    "pytest-memray>=1.7.0",
    "psutil>=7.0.0",
]
quality = [
    "ruff>=0.1.5",
    "mypy>=0.812",
    "pre-commit>=2.17.0",
]
dev = [
    # Runtime
    "onnxruntime>=1.18.0",
    "opencv-python>=4.5.0,<5.0.0",
    # HTML
    "weasyprint>=55.0",
    # Visualization
    "matplotlib>=3.1.0",
    "mplcursors>=0.3",
    # Testing
    "pytest>=5.3.2",
    "coverage[toml]>=4.5.4",
    "requests>=2.20.0",
    "pytest-memray>=1.7.0",
    "psutil>=7.0.0",
    # Quality
    "ruff>=0.1.5",
    "mypy>=0.812",
    "pre-commit>=2.17.0",
]

[project.urls]
repository = "https://github.com/felixdittrich92/OnnxTR"
tracker = "https://github.com/felixdittrich92/OnnxTR/issues"
changelog = "https://github.com/felixdittrich92/OnnxTR/releases"

[tool.setuptools]
zip-safe = true

[tool.setuptools.packages.find]
exclude = ["docs*", "tests*", "scripts*", "demo*"]

[tool.setuptools.package-data]
onnxtr = ["py.typed"]

[tool.mypy]
files = "onnxtr/"
show_error_codes = true
pretty = true
warn_unused_ignores = true
warn_redundant_casts = true
no_implicit_optional = true
check_untyped_defs = true
implicit_reexport = false

[[tool.mypy.overrides]]
module = [
    "onnxruntime.*",
	"PIL.*",
	"scipy.*",
	"cv2.*",
	"matplotlib.*",
    "numpy.*",
	"pyclipper.*",
	"mplcursors.*",
	"defusedxml.*",
	"weasyprint.*",
	"pypdfium2.*",
	"langdetect.*",
    "huggingface_hub.*",
    "rapidfuzz.*",
    "anyascii.*",
    "tqdm.*",
]
ignore_missing_imports = true

[tool.ruff]
exclude = [".git", "venv*", "build", "**/__init__.py"]
line-length = 120
target-version = "py310"
preview=true

[tool.ruff.lint]
select = [
    # https://docs.astral.sh/ruff/rules/
    "E", "W", "F", "I", "N", "Q", "C4", "T10", "LOG",
    "D101", "D103", "D201","D202","D207","D208","D214","D215","D300","D301","D417", "D419", "D207"  # pydocstyle
]
ignore = ["E402", "E203", "F403", "E731", "N812", "N817", "C408", "LOG015"]

[tool.ruff.lint.isort]
known-first-party = ["onnxtr", "utils"]
known-third-party = ["onnxruntime", "cv2"]

[tool.ruff.lint.per-file-ignores]
"onnxtr/models/**.py" = ["N806", "F841"]
"tests/**.py" = ["D"]
"scripts/**.py" = ["D"]
"demo/**.py" = ["D"]
".github/**.py" = ["D"]


[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"

[tool.coverage.run]
source = ["onnxtr"]


================================================
FILE: scripts/convert_to_float16.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

try:
    from onnxconverter_common import auto_convert_mixed_precision
except ImportError:
    raise ImportError("Failed to import onnxconverter_common. Please install `pip install onnxconverter-common`.")

# Check GPU availability
import onnxruntime

if onnxruntime.get_device() != "GPU":
    raise RuntimeError(
        "Please install OnnxTR with GPU support to run this script. "
        + "`pip install onnxtr[gpu]` or `pip install -e .[gpu]`"
    )

import argparse
import time
from tempfile import TemporaryDirectory
from typing import Any

import numpy as np
import onnx

from onnxtr.models import classification, detection, recognition
from onnxtr.models.classification.zoo import ORIENTATION_ARCHS
from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS
from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS


def _load_model(arch: str, model_path: str | None = None) -> Any:
    if arch in DETECTION_ARCHS:
        model = detection.__dict__[arch]() if model_path is None else detection.__dict__[arch](model_path)
    elif args.arch in RECOGNITION_ARCHS:
        model = recognition.__dict__[arch]() if model_path is None else recognition.__dict__[arch](model_path)
    elif args.arch in ORIENTATION_ARCHS:
        model = classification.__dict__[arch]() if model_path is None else classification.__dict__[arch](model_path)
    else:
        raise ValueError(f"Unknown architecture {arch}")
    return model


def _latency_check(args: Any, size: tuple[int], model: Any, img_tensor: np.ndarray) -> None:
    # Warmup
    for _ in range(10):
        _ = model(img_tensor)

    timings = []

    # Evaluation runs
    for _ in range(args.it):
        start_ts = time.perf_counter()
        _ = model(img_tensor)
        timings.append(time.perf_counter() - start_ts)

    _timings = np.array(timings)
    print(f"{args.arch} ({args.it} runs on ({size}) inputs)")
    print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms")


def _validate(fp32_in: list[np.ndarray], fp16_in: list[np.ndarray]) -> bool:
    assert fp32_in[0].shape == fp16_in[0].shape, "Input shapes are not the same"
    # print mean difference between fp32 and fp16 inputs
    if np.abs(fp32_in[0] - fp16_in[0]).mean() > 1e-3:
        print(
            f"Mean difference between fp32 and fp16 inputs: {np.abs(fp32_in[0] - fp16_in[0]).mean()} "
            + "-> YOU MAY EXPECT DIFFERING RESULTS"
        )
    return True  # NOTE: Only warning, not error


def main(args):
    model_float32 = _load_model(args.arch, model_path=args.input_model if args.input_model else None)
    size = (1, *model_float32.cfg["input_shape"])

    img_tensor = np.random.rand(*size).astype(np.float32)

    with TemporaryDirectory() as temp_dir:
        model_fp16_path = f"{temp_dir}/model_fp16.onnx"
        input_feed = {model_float32.runtime_inputs.name: img_tensor}
        model_float16 = auto_convert_mixed_precision(
            # NOTE: keep_io_types=True is required to keep the input/output type as float32
            onnx.load(str(model_float32.model_path)),
            input_feed,
            validate_fn=_validate,
            keep_io_types=True,
        )
        onnx.save(model_float16, model_fp16_path)
        model_fp16 = _load_model(args.arch, model_fp16_path)

    # Latency check
    _latency_check(args, size, model_float32, img_tensor)
    _latency_check(args, size, model_fp16, img_tensor)

    onnx.save(model_float16, args.arch + "_fp16.onnx")
    print(f"FP16 model saved at {args.arch}_fp16.onnx")
    print("Attention: FP16 converted models can only run on GPU devices.")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="OnnxTR FP32 to FP16 conversion",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "arch",
        type=str,
        choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS,
        help="Architecture to convert",
    )
    parser.add_argument("--input_model", type=str, help="Path to the input model", required=False)
    parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run")
    args = parser.parse_args()

    main(args)


================================================
FILE: scripts/evaluate.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

try:
    from doctr.version import __version__

    print(f"DocTR version: {__version__}")
except ImportError:
    raise ImportError("Failed to import `doctr`. Please install `pip install python-doctr[torch]`.")

import os
import time
from typing import Any

import numpy as np
from doctr import datasets
from doctr import transforms as T
from doctr.utils.metrics import LocalizationConfusion, OCRMetric, TextMatch
from tqdm import tqdm

from onnxtr.models import EngineConfig, ocr_predictor
from onnxtr.utils.geometry import extract_crops, extract_rcrops


def _pct(val):
    return "N/A" if val is None else f"{val:.2%}"


def main(args):
    if not args.rotation:
        args.eval_straight = True

    if args.profiling:
        os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE"
        try:
            import memray
            import yappi
        except ImportError:
            raise ImportError("Please install yappi and memray to enable profiling - `pip install yappi memray`.")
        yappi.set_clock_type("cpu")
        # Drop memray profile and flamegraph if they already exist
        if os.path.exists("memray_profile.bin"):
            os.remove("memray_profile.bin")
        if os.path.exists("memray_flamegraph.html"):
            os.remove("memray_flamegraph.html")
        memray_tracker = memray.Tracker("memray_profile.bin")
        memray_tracker.__enter__()

    input_shape = (args.size, args.size)

    # We define a transformation function which does transform the annotation
    # to the required format for the Resize transformation
    def _transform(img, target):
        boxes = target["boxes"]
        transformed_img, transformed_boxes = T.Resize(
            input_shape, preserve_aspect_ratio=args.keep_ratio, symmetric_pad=args.symmetric_pad
        )(img, boxes)
        return transformed_img, {"boxes": transformed_boxes, "labels": target["labels"]}

    predictor = ocr_predictor(
        args.detection,
        args.recognition,
        reco_bs=args.batch_size,
        preserve_aspect_ratio=False,  # we handle the transformation directly in the dataset so this is set to False
        symmetric_pad=False,  # we handle the transformation directly in the dataset so this is set to False
        assume_straight_pages=not args.rotation,
        load_in_8_bit=args.load_8bit,
        det_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
        reco_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
        clf_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
    )

    # Load the dataset
    train_set = datasets.__dict__[args.dataset](
        train=True,
        download=True,
        use_polygons=not args.eval_straight,
        sample_transforms=_transform,
    )
    val_set = datasets.__dict__[args.dataset](
        train=False,
        download=True,
        use_polygons=not args.eval_straight,
        sample_transforms=_transform,
    )
    sets = [train_set, val_set]

    reco_metric = TextMatch()

    det_metric = LocalizationConfusion(iou_thresh=args.iou, use_polygons=not args.eval_straight)
    e2e_metric = OCRMetric(iou_thresh=args.iou, use_polygons=not args.eval_straight)

    sample_idx = 0
    extraction_fn = extract_crops if args.eval_straight else extract_rcrops

    timings = []

    # Warmup
    print("Warming up the model...")
    dummy_img = np.zeros((args.size, args.size, 3), dtype=np.uint8)
    for _ in range(5):
        _ = predictor([dummy_img])
    print("Warmup done.\n")

    for dataset in sets:
        for page, target in tqdm(dataset):
            if hasattr(page, "numpy"):
                page = page.numpy()

            if page.ndim == 3 and page.shape[0] in [1, 3]:
                page = np.moveaxis(page, 0, -1)

            if page.dtype != np.uint8:
                page = (page * 255).astype(np.uint8) if np.max(page) <= 1 else page.astype(np.uint8)

            # GT
            gt_boxes = target["boxes"]
            gt_labels = target["labels"]

            # Forward
            if args.profiling:
                yappi.start()
            start_ts = time.perf_counter()
            out = predictor(page[None, ...])
            timings.append(time.perf_counter() - start_ts)
            if args.profiling:
                yappi.stop()

            crops = extraction_fn(page, gt_boxes, channels_last=True)
            reco_out = predictor.reco_predictor(crops)

            reco_words: Any = []
            if len(reco_out):
                reco_words, _ = zip(*reco_out)

            # Unpack preds
            pred_boxes: list[list[Any]] = []
            pred_labels: list[str] = []
            for page in out.pages:
                height, width = page.dimensions
                for block in page.blocks:
                    for line in block.lines:
                        for word in line.words:
                            if not args.rotation:
                                (a, b), (c, d) = word.geometry
                            else:
                                (
                                    [x1, y1],
                                    [x2, y2],
                                    [x3, y3],
                                    [x4, y4],
                                ) = word.geometry
                            if np.issubdtype(gt_boxes.dtype, np.integer):
                                if not args.rotation:
                                    pred_boxes.append([
                                        int(a * width),
                                        int(b * height),
                                        int(c * width),
                                        int(d * height),
                                    ])
                                else:
                                    if args.eval_straight:
                                        pred_boxes.append([
                                            int(width * min(x1, x2, x3, x4)),
                                            int(height * min(y1, y2, y3, y4)),
                                            int(width * max(x1, x2, x3, x4)),
                                            int(height * max(y1, y2, y3, y4)),
                                        ])
                                    else:
                                        pred_boxes.append([
                                            [int(x1 * width), int(y1 * height)],
                                            [int(x2 * width), int(y2 * height)],
                                            [int(x3 * width), int(y3 * height)],
                                            [int(x4 * width), int(y4 * height)],
                                        ])
                            else:
                                if not args.rotation:
                                    pred_boxes.append([a, b, c, d])
                                else:
                                    if args.eval_straight:
                                        pred_boxes.append([
                                            min(x1, x2, x3, x4),
                                            min(y1, y2, y3, y4),
                                            max(x1, x2, x3, x4),
                                            max(y1, y2, y3, y4),
                                        ])
                                    else:
                                        pred_boxes.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
                            pred_labels.append(word.value)

            # Update the metric
            det_metric.update(gt_boxes, np.asarray(pred_boxes))
            reco_metric.update(gt_labels, reco_words)
            e2e_metric.update(gt_boxes, np.asarray(pred_boxes), gt_labels, pred_labels)

            # Loop break
            sample_idx += 1
            if isinstance(args.samples, int) and args.samples == sample_idx:
                break
        if isinstance(args.samples, int) and args.samples == sample_idx:
            break

    # Unpack aggregated metrics
    print(f"Model Evaluation (model= {args.detection} + {args.recognition}, dataset={args.dataset})")
    recall, precision, mean_iou = det_metric.summary()
    print(f"Text Detection - Recall: {_pct(recall)}, Precision: {_pct(precision)}, Mean IoU: {_pct(mean_iou)}")
    acc = reco_metric.summary()
    print(f"Text Recognition - Accuracy: {_pct(acc['raw'])} (unicase: {_pct(acc['unicase'])})")
    recall, precision, mean_iou = e2e_metric.summary()
    print(
        f"OCR - Recall: {_pct(recall['raw'])} (unicase: {_pct(recall['unicase'])}), "
        f"Precision: {_pct(precision['raw'])} (unicase: {_pct(precision['unicase'])}), Mean IoU: {_pct(mean_iou)}\n"
    )
    print(f"Number of samples: {sample_idx}")
    print(f"Total inference time: {np.sum(timings):.2f} sec")
    print(f"Average inference time per sample: {np.mean(timings):.6f} sec")

    if args.profiling:
        import subprocess

        memray_tracker.__exit__(None, None, None)

        with open("yappi_profile.stats", "w") as f:
            yappi.get_func_stats().print_all(out=f)

        print("Profiling complete. Generating memray flamegraph and stats...")
        subprocess.run(["memray", "flamegraph", "memray_profile.bin", "-o", "memray_flamegraph.html"])
        subprocess.run(["memray", "stats", "memray_profile.bin"])


def parse_args():
    import argparse

    parser = argparse.ArgumentParser(
        description="OnnxTR end-to-end evaluation", formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )

    parser.add_argument("detection", type=str, help="Text detection model to use for analysis")
    parser.add_argument("recognition", type=str, help="Text recognition model to use for analysis")
    parser.add_argument("--iou", type=float, default=0.5, help="IoU threshold to match a pair of boxes")
    parser.add_argument("--dataset", type=str, default="FUNSD", help="choose a dataset: FUNSD, CORD")
    parser.add_argument("--rotation", dest="rotation", action="store_true", help="run rotated OCR + postprocessing")
    parser.add_argument("-b", "--batch_size", type=int, default=32, help="batch size for recognition")
    parser.add_argument("--size", type=int, default=1024, help="model input size, H = W")
    parser.add_argument("--keep_ratio", action="store_true", help="keep the aspect ratio of the input image")
    parser.add_argument("--symmetric_pad", action="store_true", help="pad the image symmetrically")
    parser.add_argument("--samples", type=int, default=None, help="evaluate only on the N first samples")
    parser.add_argument(
        "--eval-straight",
        action="store_true",
        help="evaluate on straight pages with straight bbox (to use the quick and light metric)",
    )
    parser.add_argument("--load_8bit", action="store_true", help="load model in 8bit mode")
    parser.add_argument("--force-cpu", action="store_true", help="force CPU execution")
    parser.add_argument("--profiling", action="store_true", help="enable profiling")
    args = parser.parse_args()

    return args


if __name__ == "__main__":
    args = parse_args()
    main(args)


================================================
FILE: scripts/latency.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import argparse
import time

import numpy as np

from onnxtr.models import classification, detection, recognition
from onnxtr.models.classification.zoo import ORIENTATION_ARCHS
from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS
from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS


def main(args):
    if args.arch in DETECTION_ARCHS:
        model = detection.__dict__[args.arch](load_in_8_bit=args.load8bit)
    elif args.arch in RECOGNITION_ARCHS:
        model = recognition.__dict__[args.arch](load_in_8_bit=args.load8bit)
    elif args.arch in ORIENTATION_ARCHS:
        model = classification.__dict__[args.arch](load_in_8_bit=args.load8bit)
    else:
        raise ValueError(f"Unknown architecture {args.arch}")

    size = (1, *model.cfg["input_shape"])
    img_tensor = np.random.rand(*size).astype(np.float32)

    # Warmup
    for _ in range(10):
        _ = model(img_tensor)

    timings = []

    # Evaluation runs
    for _ in range(args.it):
        start_ts = time.perf_counter()
        _ = model(img_tensor)
        timings.append(time.perf_counter() - start_ts)

    _timings = np.array(timings)
    print(f"{args.arch} ({args.it} runs on ({size}) inputs)")
    print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="OnnxTR latency benchmark",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "arch",
        type=str,
        choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS,
        help="Architecture to benchmark",
    )
    parser.add_argument("--load8bit", action="store_true", help="Load the 8-bit quantized model")
    parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run")
    args = parser.parse_args()

    main(args)


================================================
FILE: scripts/quantize.py
================================================
import argparse
import os
import time
from enum import Enum

import numpy as np
import onnxruntime
from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantType, quantize_dynamic, quantize_static

from onnxtr.io.image import read_img_as_numpy
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.geometry import shape_translate


class TaskShapes(Enum):
    """Enum class to define the shapes of the input tensors for different tasks"""

    crop_orientation = (256, 256)
    page_orientation = (512, 512)
    detection = (1024, 1024)
    recognition = (32, 128)


class CalibrationDataLoader(CalibrationDataReader):
    def __init__(self, calibration_image_folder: str, model_path: str, task_shape: tuple[int]):
        self.enum_data = None
        self.preprocessor = PreProcessor(output_size=task_shape, batch_size=1)
        self.dataset = [
            self.preprocessor(
                np.expand_dims(read_img_as_numpy(os.path.join(calibration_image_folder, img_file)), axis=0)
            )
            for img_file in os.listdir(calibration_image_folder)[:500]  # limit to 500 images
        ]

        session = onnxruntime.InferenceSession(model_path, None)
        self.input_name = session.get_inputs()[0].name
        self.datasize = len(self.dataset)

    def get_next(self):
        if self.enum_data is None:
            self.enum_data = iter([
                {self.input_name: shape_translate(input_data[0], format="BCHW")} for input_data in self.dataset
            ])
        return next(self.enum_data, None)

    def rewind(self):
        self.enum_data = None


def benchmark(calibration_image_folder: str, model_path: str, task_shape: tuple[int]):
    session = onnxruntime.InferenceSession(model_path)
    input_name = session.get_inputs()[0].name
    output_name = [output.name for output in session.get_outputs()]
    dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape)
    sample = shape_translate(dataset.dataset[0][0], format="BCHW")  # take 1 sample for benchmarking

    total = 0.0
    runs = 10
    # Warming up
    _ = session.run(output_name, {input_name: sample})
    for _ in range(runs):
        start = time.perf_counter()
        _ = session.run(output_name, {input_name: sample})
        end = (time.perf_counter() - start) * 1000
        total += end
        print(f"{end:.2f}ms")
    total /= runs
    print(f"Avg: {total:.2f}ms")


def benchmark_mean_diff(
    calibration_image_folder: str, model_path: str, quantized_model_path: str, task_shape: tuple[int]
):
    """Check the mean difference between the original and quantized model"""
    session = onnxruntime.InferenceSession(model_path)
    quantized_session = onnxruntime.InferenceSession(quantized_model_path)
    input_name = session.get_inputs()[0].name
    output_name = [output.name for output in session.get_outputs()]
    quantized_output_name = [output.name for output in quantized_session.get_outputs()]
    dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape)
    sample = shape_translate(dataset.dataset[0][0], format="BCHW")  # take 1 sample for benchmarking

    output = session.run(output_name, {input_name: sample})[0]
    quantized_output = quantized_session.run(quantized_output_name, {input_name: sample})[0]

    mean_diff = np.mean(np.abs(output - quantized_output))
    print(f"Mean difference between original and quantized model: {mean_diff:.2f}")


def main(args):
    input_model_path = args.input_model
    calibration_dataset_path = args.calibrate_dataset
    if args.task == "crop_orientation":
        task_shape = TaskShapes.crop_orientation.value
    elif args.task == "page_orientation":
        task_shape = TaskShapes.page_orientation.value
    elif args.task == "detection":
        task_shape = TaskShapes.detection.value
    else:
        task_shape = TaskShapes.recognition.value
    print(f"Task: {args.task} | Task shape: {task_shape}")

    dr = CalibrationDataLoader(calibration_dataset_path, input_model_path, task_shape)
    base_model_name = input_model_path.split("/")[-1].split("-")[0]
    static_out_name = base_model_name + "_static_8_bit.onnx"
    dynamic_out_name = base_model_name + "_dynamic_8_bit.onnx"

    print("benchmarking fp32 model...")
    benchmark(calibration_dataset_path, input_model_path, task_shape)

    # Calibrate and quantize model
    # Turn off model optimization during quantization
    if "parseq" not in input_model_path:  # Skip static quantization for Parseq
        print("Calibrating and quantizing model static...")
        try:
            quantize_static(
                input_model_path,
                static_out_name,
                dr,
                quant_format=args.quant_format,
                weight_type=QuantType.QInt8,
                activation_type=QuantType.QUInt8,
                reduce_range=True,
            )
        except Exception:
            print("Error during static quantization --> Change weight_type also to QUInt8")
            quantize_static(
                input_model_path,
                static_out_name,
                dr,
                quant_format=args.quant_format,
                weight_type=QuantType.QUInt8,
                activation_type=QuantType.QUInt8,
                reduce_range=True,
            )

        print("benchmarking static int8 model...")
        benchmark(calibration_dataset_path, static_out_name, task_shape)

        print("benchmarking mean difference between fp32 and static int8 model...")
        benchmark_mean_diff(calibration_dataset_path, input_model_path, static_out_name, task_shape)

        print("Calibrated and quantized static model saved.")

    if "sar" not in input_model_path:  # Skip dynamic quantization for SAR_ResNet31
        print("Dynamic int 8 quantization...")
        quantize_dynamic(
            input_model_path,
            dynamic_out_name,
            weight_type=QuantType.QUInt8,
        )
        print("Dynamic model saved.")

        print("benchmarking dynamic int8 model...")
        benchmark(calibration_dataset_path, dynamic_out_name, task_shape)

        print("benchmarking mean difference between fp32 and dynamic int8 model...")
        benchmark_mean_diff(calibration_dataset_path, input_model_path, dynamic_out_name, task_shape)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="OnnxTR script to quantize models and benchmark the quantized models",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument("--input_model", required=True, help="input model")
    parser.add_argument(
        "--task",
        required=True,
        type=str,
        choices=["crop_orientation", "page_orientation", "detection", "recognition"],
        help="task shape",
    )
    parser.add_argument(
        "--calibrate_dataset",
        type=str,
        required=True,
        help="calibration data set (word crop images for recognition, crop_orientation else page images for detection, page_orientation)",  # noqa
    )
    parser.add_argument(
        "--quant_format",
        default=QuantFormat.QDQ,
        type=QuantFormat.from_string,
        choices=list(QuantFormat),
    )
    args = parser.parse_args()

    main(args)


================================================
FILE: setup.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import os
from pathlib import Path

from setuptools import setup

PKG_NAME = "onnxtr"
VERSION = os.getenv("BUILD_VERSION", "0.8.2a0")


if __name__ == "__main__":
    print(f"Building wheel {PKG_NAME}-{VERSION}")

    # Dynamically set the __version__ attribute
    cwd = Path(__file__).parent.absolute()
    with open(cwd.joinpath("onnxtr", "version.py"), "w", encoding="utf-8") as f:
        f.write(f"__version__ = '{VERSION}'\n")

    setup(name=PKG_NAME, version=VERSION)


================================================
FILE: tests/common/test_contrib.py
================================================
import numpy as np
import pytest

from onnxtr.contrib import artefacts
from onnxtr.contrib.base import _BasePredictor
from onnxtr.io import DocumentFile


def test_base_predictor():
    # check that we need to provide either a url or a model_path
    with pytest.raises(ValueError):
        _ = _BasePredictor(batch_size=2)

    predictor = _BasePredictor(batch_size=2, url=artefacts.default_cfgs["yolov8_artefact"]["url"])
    # check that we need to implement preprocess and postprocess
    with pytest.raises(NotImplementedError):
        predictor.preprocess(np.zeros((10, 10, 3)))
    with pytest.raises(NotImplementedError):
        predictor.postprocess([np.zeros((10, 10, 3))], [[np.zeros((10, 10, 3))]])


def test_artefact_detector(mock_artefact_image_stream):
    doc = DocumentFile.from_images([mock_artefact_image_stream])
    detector = artefacts.ArtefactDetector(batch_size=2, conf_threshold=0.5, iou_threshold=0.5)
    results = detector(doc)
    assert isinstance(results, list) and len(results) == 1 and isinstance(results[0], list)
    assert all(isinstance(artefact, dict) for artefact in results[0])
    # check result keys
    assert all(key in results[0][0] for key in ["label", "confidence", "box"])
    assert all(len(artefact["box"]) == 4 for artefact in results[0])
    assert all(isinstance(coord, int) for box in results[0] for coord in box["box"])
    assert all(isinstance(artefact["confidence"], float) for artefact in results[0])
    assert all(isinstance(artefact["label"], str) for artefact in results[0])
    # check results for the mock image are 9 artefacts
    assert len(results[0]) == 9
    # test visualization non-blocking for tests
    detector.show(block=False)


================================================
FILE: tests/common/test_core.py
================================================
import pytest

import onnxtr
from onnxtr.file_utils import requires_package


def test_version():
    assert len(onnxtr.__version__.split(".")) == 3


def test_requires_package():
    requires_package("numpy")  # availbable
    with pytest.raises(ImportError):  # not available
        requires_package("non_existent_package")


================================================
FILE: tests/common/test_engine_cfg.py
================================================
import gc

import numpy as np
import psutil
import pytest
from onnxruntime import RunOptions, SessionOptions

from onnxtr import models
from onnxtr.io import Document
from onnxtr.models import EngineConfig, detection, recognition
from onnxtr.models.predictor import OCRPredictor


def _get_rss_mb():
    gc.collect()
    process = psutil.Process()
    return process.memory_info().rss / (1024 * 1024)


def _test_predictor(predictor):
    # Output checks
    assert isinstance(predictor, OCRPredictor)

    doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)]
    out = predictor(doc)
    # Document
    assert isinstance(out, Document)

    # The input doc has 1 page
    assert len(out.pages) == 1
    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])


@pytest.mark.parametrize(
    "det_arch, reco_arch",
    [[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)],
)
def test_engine_cfg(det_arch, reco_arch):
    session_options = SessionOptions()
    session_options.enable_cpu_mem_arena = False
    engine_cfg = EngineConfig(
        providers=["CPUExecutionProvider"],
        session_options=session_options,
    )

    assert engine_cfg.__repr__() == "EngineConfig(providers=['CPUExecutionProvider'])"

    # Model
    predictor = models.ocr_predictor(
        det_arch, reco_arch, det_engine_cfg=engine_cfg, reco_engine_cfg=engine_cfg, clf_engine_cfg=engine_cfg
    )
    assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"]
    assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena
    assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"]
    assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena
    _test_predictor(predictor)

    # passing model instance directly
    det_model = detection.__dict__[det_arch](engine_cfg=engine_cfg)
    assert det_model.providers == ["CPUExecutionProvider"]
    assert not det_model.session_options.enable_cpu_mem_arena

    reco_model = recognition.__dict__[reco_arch](engine_cfg=engine_cfg)
    assert reco_model.providers == ["CPUExecutionProvider"]
    assert not reco_model.session_options.enable_cpu_mem_arena

    predictor = models.ocr_predictor(det_model, reco_model)
    assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"]
    assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena
    assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"]
    assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena
    _test_predictor(predictor)

    det_predictor = models.detection_predictor(det_arch, engine_cfg=engine_cfg)
    assert det_predictor.model.providers == ["CPUExecutionProvider"]
    assert not det_predictor.model.session_options.enable_cpu_mem_arena

    reco_predictor = models.recognition_predictor(reco_arch, engine_cfg=engine_cfg)
    assert reco_predictor.model.providers == ["CPUExecutionProvider"]
    assert not reco_predictor.model.session_options.enable_cpu_mem_arena


def test_cpu_memory_arena_shrinkage_enabled():
    session_options = SessionOptions()
    session_options.enable_mem_pattern = False
    session_options.enable_cpu_mem_arena = True

    enable_shrinkage = False

    providers = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]

    def enable_arena_shrinkage(run_options: "RunOptions") -> "RunOptions":
        if enable_shrinkage:
            run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0")
            assert run_options.get_run_config_entry("memory.enable_memory_arena_shrinkage") == "cpu:0"
        return run_options

    engine_cfg = EngineConfig(
        providers=providers,
        session_options=session_options,
        run_options_provider=enable_arena_shrinkage,
    )

    predictor = models.ocr_predictor(
        det_engine_cfg=engine_cfg,
        reco_engine_cfg=engine_cfg,
        clf_engine_cfg=engine_cfg,
        detect_orientation=True,
    )

    assert predictor.det_predictor.model.providers == providers
    assert predictor.det_predictor.model.session_options.enable_cpu_mem_arena
    assert predictor.reco_predictor.model.providers == providers
    assert predictor.reco_predictor.model.session_options.enable_cpu_mem_arena

    rng = np.random.RandomState(seed=42)
    sample = rng.randint(0, 256, (1024, 1024, 3), dtype=np.uint8)

    start_rss = _get_rss_mb()

    predictor([sample])
    increased_rss = _get_rss_mb()

    assert increased_rss > start_rss

    enable_shrinkage = True

    predictor([sample])
    decreased_rss = _get_rss_mb()

    assert increased_rss > decreased_rss


================================================
FILE: tests/common/test_headers.py
================================================
"""Test for python files copyright headers."""

from datetime import datetime
from pathlib import Path


def test_copyright_header():
    copyright_header = "".join([
        f"# Copyright (C) {2021}-{datetime.now().year}, Mindee | Felix Dittrich.\n\n",
        "# This program is licensed under the Apache License 2.0.\n",
        "# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.\n",
    ])
    excluded_files = ["__init__.py", "version.py"]
    invalid_files = []
    locations = [".github", "onnxtr"]

    for location in locations:
        for source_path in Path(__file__).parent.parent.parent.joinpath(location).rglob("*.py"):
            if source_path.name not in excluded_files:
                source_path_content = source_path.read_text()
                if copyright_header not in source_path_content:
                    invalid_files.append(source_path)
    assert len(invalid_files) == 0, f"Invalid copyright header in the following files: {invalid_files}"


================================================
FILE: tests/common/test_io.py
================================================
from io import BytesIO
from pathlib import Path

import numpy as np
import pytest
import requests

from onnxtr import io


def _check_doc_content(doc_tensors, num_pages):
    # 1 doc of 8 pages
    assert len(doc_tensors) == num_pages
    assert all(isinstance(page, np.ndarray) for page in doc_tensors)
    assert all(page.dtype == np.uint8 for page in doc_tensors)


def test_read_pdf(mock_pdf):
    doc = io.read_pdf(mock_pdf)
    _check_doc_content(doc, 2)

    # Test with Path
    doc = io.read_pdf(Path(mock_pdf))
    _check_doc_content(doc, 2)

    with open(mock_pdf, "rb") as f:
        doc = io.read_pdf(f.read())
    _check_doc_content(doc, 2)

    # Wrong input type
    with pytest.raises(TypeError):
        _ = io.read_pdf(123)

    # Wrong path
    with pytest.raises(FileNotFoundError):
        _ = io.read_pdf("my_imaginary_file.pdf")


def test_read_img_as_numpy(tmpdir_factory, mock_pdf):
    # Wrong input type
    with pytest.raises(TypeError):
        _ = io.read_img_as_numpy(123)

    # Non-existing file
    with pytest.raises(FileNotFoundError):
        io.read_img_as_numpy("my_imaginary_file.jpg")

    # Invalid image
    with pytest.raises(ValueError):
        io.read_img_as_numpy(str(mock_pdf))

    # From path
    url = "https://doctr-static.mindee.com/models?id=v0.2.1/Grace_Hopper.jpg&src=0"
    file = BytesIO(requests.get(url).content)
    tmp_path = str(tmpdir_factory.mktemp("data").join("mock_img_file.jpg"))
    with open(tmp_path, "wb") as f:
        f.write(file.getbuffer())

    # Path & stream
    with open(tmp_path, "rb") as f:
        page_stream = io.read_img_as_numpy(f.read())

    for page in (io.read_img_as_numpy(tmp_path), page_stream):
        # Data type
        assert isinstance(page, np.ndarray)
        assert page.dtype == np.uint8
        # Shape
        assert page.shape == (606, 517, 3)

    # RGB
    bgr_page = io.read_img_as_numpy(tmp_path, rgb_output=False)
    assert np.all(page == bgr_page[..., ::-1])

    # Resize
    target_size = (200, 150)
    resized_page = io.read_img_as_numpy(tmp_path, target_size)
    assert resized_page.shape[:2] == target_size


def test_read_html():
    url = "https://www.google.com"
    pdf_stream = io.read_html(url)
    assert isinstance(pdf_stream, bytes)


def test_document_file(mock_pdf, mock_artefact_image_stream):
    pages = io.DocumentFile.from_images([mock_artefact_image_stream])
    _check_doc_content(pages, 1)

    assert isinstance(io.DocumentFile.from_pdf(mock_pdf), list)
    assert isinstance(io.DocumentFile.from_url("https://www.google.com"), list)


def test_pdf(mock_pdf):
    pages = io.DocumentFile.from_pdf(mock_pdf)

    # As images
    num_pages = 2
    _check_doc_content(pages, num_pages)


================================================
FILE: tests/common/test_io_elements.py
================================================
from xml.etree.ElementTree import ElementTree

import numpy as np
import pytest

from onnxtr.io import elements


def _mock_words(size=(1.0, 1.0), offset=(0, 0), confidence=0.9, objectness_score=0.9, polygons=False):
    box_word_elements = [
        elements.Word(
            "hello",
            confidence,
            ((offset[0], offset[1]), (size[0] / 2 + offset[0], size[1] / 2 + offset[1])),
            objectness_score,
            {"value": 0, "confidence": None},
        ),
        elements.Word(
            "world",
            confidence,
            ((size[0] / 2 + offset[0], size[1] / 2 + offset[1]), (size[0] + offset[0], size[1] + offset[1])),
            objectness_score,
            {"value": 0, "confidence": None},
        ),
    ]
    polygons_word_elements = [
        elements.Word(
            "hello",
            confidence,
            # (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2)
            np.array([
                [offset[0], offset[1]],
                [size[0] / 2 + offset[0], offset[1]],
                [size[0] / 2 + offset[0], size[1] / 2 + offset[1]],
                [offset[0], size[1] / 2 + offset[1]],
            ]),
            objectness_score,
            {"value": 0, "confidence": None},
        ),
        elements.Word(
            "world",
            confidence,
            # (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2)
            np.array([
                [size[0] / 2 + offset[0], size[1] / 2 + offset[1]],
                [size[0] + offset[0], size[1] / 2 + offset[1]],
                [size[0] + offset[0], size[1] + offset[1]],
                [size[0] / 2 + offset[0], size[1] + offset[1]],
            ]),
            objectness_score,
            {"value": 0, "confidence": None},
        ),
    ]
    return polygons_word_elements if polygons else box_word_elements


def _mock_artefacts(size=(1, 1), offset=(0, 0), confidence=0.8):
    sub_size = (size[0] / 2, size[1] / 2)
    return [
        elements.Artefact(
            "qr_code", confidence, ((offset[0], offset[1]), (sub_size[0] + offset[0], sub_size[1] + offset[1]))
        ),
        elements.Artefact(
            "qr_code",
            confidence,
            ((sub_size[0] + offset[0], sub_size[1] + offset[1]), (size[0] + offset[0], size[1] + offset[1])),
        ),
    ]


def _mock_lines(size=(1, 1), offset=(0, 0), polygons=False):
    sub_size = (size[0] / 2, size[1] / 2)
    return [
        elements.Line(_mock_words(size=sub_size, offset=offset, polygons=polygons)),
        elements.Line(
            _mock_words(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1]), polygons=polygons)
        ),
    ]


def _mock_blocks(size=(1, 1), offset=(0, 0), polygons=False):
    sub_size = (size[0] / 4, size[1] / 4)
    return [
        elements.Block(
            _mock_lines(size=sub_size, offset=offset, polygons=polygons),
            _mock_artefacts(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1])),
        ),
        elements.Block(
            _mock_lines(
                size=sub_size, offset=(offset[0] + 2 * sub_size[0], offset[1] + 2 * sub_size[1]), polygons=polygons
            ),
            _mock_artefacts(size=sub_size, offset=(offset[0] + 3 * sub_size[0], offset[1] + 3 * sub_size[1])),
        ),
    ]


def _mock_pages(block_size=(1, 1), block_offset=(0, 0), polygons=False):
    return [
        elements.Page(
            np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
            _mock_blocks(block_size, block_offset, polygons),
            0,
            (300, 200),
            {"value": 0.0, "confidence": 1.0},
            {"value": "EN", "confidence": 0.8},
        ),
        elements.Page(
            np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
            _mock_blocks(block_size, block_offset),
            1,
            (500, 1000),
            {"value": 0.15, "confidence": 0.8},
            {"value": "FR", "confidence": 0.7},
        ),
    ]


def test_element():
    with pytest.raises(KeyError):
        elements.Element(sub_elements=[1])


def test_word():
    word_str = "hello"
    conf = 0.8
    geom = ((0, 0), (1, 1))
    objectness_score = 0.9
    crop_orientation = {"value": 0, "confidence": None}
    word = elements.Word(word_str, conf, geom, objectness_score, crop_orientation)

    # Attribute checks
    assert word.value == word_str
    assert word.confidence == conf
    assert word.geometry == geom
    assert word.objectness_score == objectness_score
    assert word.crop_orientation == crop_orientation

    # Render
    assert word.render() == word_str

    # Export
    assert word.export() == {
        "value": word_str,
        "confidence": conf,
        "geometry": geom,
        "objectness_score": objectness_score,
        "crop_orientation": crop_orientation,
    }

    # Repr
    assert word.__repr__() == f"Word(value='hello', confidence={conf:.2})"

    # Class method
    state_dict = {
        "value": "there",
        "confidence": 0.1,
        "geometry": ((0, 0), (0.5, 0.5)),
        "objectness_score": objectness_score,
        "crop_orientation": crop_orientation,
    }
    word = elements.Word.from_dict(state_dict)
    assert word.export() == state_dict


def test_line():
    geom = ((0, 0), (0.5, 0.5))
    objectness_score = 0.9
    words = _mock_words(size=geom[1], offset=geom[0])
    line = elements.Line(words)

    # Attribute checks
    assert len(line.words) == len(words)
    assert all(isinstance(w, elements.Word) for w in line.words)
    assert line.geometry == geom
    assert line.objectness_score == objectness_score

    # Render
    assert line.render() == "hello world"

    # Export
    assert line.export() == {
        "words": [w.export() for w in words],
        "geometry": geom,
        "objectness_score": objectness_score,
    }

    # Repr
    words_str = " " * 4 + ",\n    ".join(repr(word) for word in words) + ","
    assert line.__repr__() == f"Line(\n  (words): [\n{words_str}\n  ]\n)"

    # Ensure that words repr does't span on several lines when there are none
    assert repr(elements.Line([], ((0, 0), (1, 1)))) == "Line(\n  (words): []\n)"

    # from dict
    state_dict = {
        "words": [
            {
                "value": "there",
                "confidence": 0.1,
                "geometry": ((0, 0), (1.0, 1.0)),
                "objectness_score": objectness_score,
                "crop_orientation": {"value": 0, "confidence": None},
            }
        ],
        "geometry": ((0, 0), (1.0, 1.0)),
        "objectness_score": objectness_score,
    }
    line = elements.Line.from_dict(state_dict)
    assert line.export() == state_dict


def test_artefact():
    artefact_type = "qr_code"
    conf = 0.8
    geom = ((0, 0), (1, 1))
    artefact = elements.Artefact(artefact_type, conf, geom)

    # Attribute checks
    assert artefact.type == artefact_type
    assert artefact.confidence == conf
    assert artefact.geometry == geom

    # Render
    assert artefact.render() == "[QR_CODE]"

    # Export
    assert artefact.export() == {"type": artefact_type, "confidence": conf, "geometry": geom}

    # Repr
    assert artefact.__repr__() == f"Artefact(type='{artefact_type}', confidence={conf:.2})"


def test_block():
    geom = ((0, 0), (1, 1))
    sub_size = (geom[1][0] / 2, geom[1][0] / 2)
    objectness_score = 0.9
    lines = _mock_lines(size=sub_size, offset=geom[0])
    artefacts = _mock_artefacts(size=sub_size, offset=sub_size)
    block = elements.Block(lines, artefacts)

    # Attribute checks
    assert len(block.lines) == len(lines)
    assert len(block.artefacts) == len(artefacts)
    assert all(isinstance(w, elements.Line) for w in block.lines)
    assert all(isinstance(a, elements.Artefact) for a in block.artefacts)
    assert block.geometry == geom

    # Render
    assert block.render() == "hello world\nhello world"

    # Export
    assert block.export() == {
        "lines": [line.export() for line in lines],
        "artefacts": [artefact.export() for artefact in artefacts],
        "geometry": geom,
        "objectness_score": objectness_score,
    }


def test_page():
    page = np.zeros((300, 200, 3), dtype=np.uint8)
    page_idx = 0
    page_size = (300, 200)
    orientation = {"value": 0.0, "confidence": 0.0}
    language = {"value": "EN", "confidence": 0.8}
    blocks = _mock_blocks()
    page = elements.Page(page, blocks, page_idx, page_size, orientation, language)

    # Attribute checks
    assert len(page.blocks) == len(blocks)
    assert all(isinstance(b, elements.Block) for b in page.blocks)
    assert isinstance(page.page, np.ndarray)
    assert page.page_idx == page_idx
    assert page.dimensions == page_size
    assert page.orientation == orientation
    assert page.language == language

    # Render
    assert page.render() == "hello world\nhello world\n\nhello world\nhello world"

    # Export
    assert page.export() == {
        "blocks": [b.export() for b in blocks],
        "page_idx": page_idx,
        "dimensions": page_size,
        "orientation": orientation,
        "language": language,
    }

    # Export XML
    assert (
        isinstance(page.export_as_xml(), tuple)
        and isinstance(page.export_as_xml()[0], (bytes, bytearray))
        and isinstance(page.export_as_xml()[1], ElementTree)
    )

    # Repr
    assert "\n".join(repr(page).split("\n")[:2]) == f"Page(\n  dimensions={page_size!r}"

    # Show
    page.show(block=False)

    # Synthesize
    img = page.synthesize()
    assert isinstance(img, np.ndarray)
    assert img.shape == (*page_size, 3)


def test_document():
    pages = _mock_pages()
    doc = elements.Document(pages)

    # Attribute checks
    assert len(doc.pages) == len(pages)
    assert all(isinstance(p, elements.Page) for p in doc.pages)

    # Render
    page_export = "hello world\nhello world\n\nhello world\nhello world"
    assert doc.render() == f"{page_export}\n\n\n\n{page_export}"

    # Export
    assert doc.export() == {"pages": [p.export() for p in pages]}

    # Export XML
    xml_output = doc.export_as_xml()
    assert isinstance(xml_output, list) and len(xml_output) == len(pages)
    # Check that the XML is well-formed in hOCR format
    for xml_bytes, xml_tree in xml_output:
        assert isinstance(xml_bytes, bytes)
        assert isinstance(xml_tree, ElementTree)
        root = xml_tree.getroot()
        assert root.tag == "html"
        assert root[0].tag == "head"
        assert root[1].tag == "body"
        assert root[1][0].tag == "div" and root[1][0].attrib["class"] == "ocr_page"
        for block in root[1][0]:
            assert block.tag == "div" and block.attrib["class"] == "ocr_carea"
            assert block[0].tag == "p" and block[0].attrib["class"] == "ocr_par"
            for line in block[0]:
                assert line.tag == "span" and line.attrib["class"] == "ocr_line"
                for word in line:
                    assert word.tag == "span" and word.attrib["class"] == "ocrx_word"

    # Show
    doc.show(block=False)

    # Synthesize
    img_list = doc.synthesize()
    assert isinstance(img_list, list) and len(img_list) == len(pages)


================================================
FILE: tests/common/test_models.py
================================================
from io import BytesIO

import cv2
import numpy as np
import pytest
import requests

from onnxtr.io import reader
from onnxtr.models._utils import estimate_orientation, get_language
from onnxtr.utils import geometry


@pytest.fixture(scope="function")
def mock_image(tmpdir_factory):
    url = "https://doctr-static.mindee.com/models?id=v0.2.1/bitmap30.png&src=0"
    file = BytesIO(requests.get(url).content)
    tmp_path = str(tmpdir_factory.mktemp("data").join("mock_bitmap.jpg"))
    with open(tmp_path, "wb") as f:
        f.write(file.getbuffer())
    image = reader.read_img_as_numpy(tmp_path)
    return image


@pytest.fixture(scope="function")
def mock_bitmap(mock_image):
    bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0)
    bitmap = np.expand_dims(bitmap, axis=-1)
    return bitmap


def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip):
    assert estimate_orientation(mock_image * 0) == 0

    # test binarized image
    angle = estimate_orientation(mock_bitmap)
    assert abs(angle) - 30 < 1.0

    angle = estimate_orientation(mock_bitmap * 255)
    assert abs(angle) - 30.0 < 1.0

    angle = estimate_orientation(mock_image)
    assert abs(angle) - 30.0 < 1.0

    rotated = geometry.rotate_image(mock_image, angle)
    angle_rotated = estimate_orientation(rotated)
    assert abs(angle_rotated) == 0

    mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip)
    assert estimate_orientation(mock_tilted_payslip) == -30

    rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True)
    angle_rotated = estimate_orientation(rotated)
    assert abs(angle_rotated) < 1.0
    with pytest.raises(AssertionError):
        estimate_orientation(np.ones((10, 10, 10)))

    # test with general_page_orientation
    assert estimate_orientation(mock_bitmap, (90, 0.9)) in range(140, 160)

    rotated = geometry.rotate_image(mock_tilted_payslip, -30)
    assert estimate_orientation(rotated, (0, 0.9)) in range(-10, 10)

    assert estimate_orientation(mock_image, (0, 0.9)) - 30 < 1.0

    # Aspect Ratio Independence (Portrait vs Landscape)
    # Pad the tilted image to be very tall (Portrait)
    portrait_img = cv2.copyMakeBorder(mock_tilted_payslip, 500, 500, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
    # Pad the tilted image to be very wide (Landscape)
    landscape_img = cv2.copyMakeBorder(mock_tilted_payslip, 0, 0, 500, 500, cv2.BORDER_CONSTANT, value=[0, 0, 0])

    assert abs(estimate_orientation(portrait_img) - (-30)) <= 1.0
    assert abs(estimate_orientation(landscape_img) - (-30)) <= 1.0

    # Perpendicular Noise Test
    vertical_noise = np.zeros((1000, 1000, 3), dtype=np.uint8)
    cv2.line(vertical_noise, (500, 100), (500, 900), (255, 255, 255), 10)
    assert estimate_orientation(vertical_noise) == 0


def test_get_lang():
    sentence = "This is a test sentence."
    expected_lang = "en"
    threshold_prob = 0.99

    lang = get_language(sentence)

    assert lang[0] == expected_lang
    assert lang[1] > threshold_prob

    lang = get_language("a")
    assert lang[0] == "unknown"
    assert lang[1] == 0.0


================================================
FILE: tests/common/test_models_builder.py
================================================
import numpy as np
import pytest

from onnxtr.io import Document
from onnxtr.models import builder

words_per_page = 10


def test_documentbuilder():
    num_pages = 2

    # Don't resolve lines
    doc_builder = builder.DocumentBuilder(resolve_lines=False, resolve_blocks=False)
    pages = [np.zeros((100, 200, 3))] * num_pages
    boxes = np.random.rand(words_per_page, 6)  # array format
    boxes[:2] *= boxes[2:4]
    objectness_scores = np.array([0.9] * words_per_page)
    # Arg consistency check
    with pytest.raises(ValueError):
        doc_builder(
            pages,
            [boxes, boxes],
            [objectness_scores, objectness_scores],
            [("hello", 1.0)] * 3,
            [(100, 200), (100, 200)],
            [{"value": 0, "confidence": None}] * 3,
        )
    out = doc_builder(
        pages,
        [boxes, boxes],
        [objectness_scores, objectness_scores],
        [[("hello", 1.0)] * words_per_page] * num_pages,
        [(100, 200), (100, 200)],
        [[{"value": 0, "confidence": None}] * words_per_page] * num_pages,
    )
    assert isinstance(out, Document)
    assert len(out.pages) == num_pages
    assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
        page.page.shape == (100, 200, 3) for page in out.pages
    )
    # 1 Block & 1 line per page
    assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
    assert len(out.pages[0].blocks[0].lines[0].words) == words_per_page

    # Resolve lines
    doc_builder = builder.DocumentBuilder(resolve_lines=True, resolve_blocks=True)
    out = doc_builder(
        pages,
        [boxes, boxes],
        [objectness_scores, objectness_scores],
        [[("hello", 1.0)] * words_per_page] * num_pages,
        [(100, 200), (100, 200)],
        [[{"value": 0, "confidence": None}] * words_per_page] * num_pages,
    )

    # No detection
    boxes = np.zeros((0, 4))
    objectness_scores = np.zeros([0])
    out = doc_builder(
        pages, [boxes, boxes], [objectness_scores, objectness_scores], [[], []], [(100, 200), (100, 200)], [[]]
    )
    assert len(out.pages[0].blocks) == 0

    # Rotated boxes to export as straight boxes
    boxes = np.array([
        [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
        [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
    ])
    objectness_scores = np.array([0.99, 0.99])
    doc_builder_2 = builder.DocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
    out = doc_builder_2(
        [np.zeros((100, 100, 3))],
        [boxes],
        [objectness_scores],
        [[("hello", 0.99), ("word", 0.99)]],
        [(100, 100)],
        [[{"value": 0, "confidence": None}] * 2],
    )
    assert out.pages[0].blocks[0].lines[0].words[-1].geometry == ((0.45, 0.5), (0.6, 0.65))
    assert out.pages[0].blocks[0].lines[0].words[-1].objectness_score == 0.99

    # Repr
    assert (
        repr(doc_builder) == "DocumentBuilder(resolve_lines=True, "
        "resolve_blocks=True, paragraph_break=0.035, export_as_straight_boxes=False)"
    )


@pytest.mark.parametrize(
    "input_boxes, sorted_idxs",
    [
        [[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]],  # vertical
        [[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]],  # diagonal
        [[[0, 0.5, 0.1, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [0, 1, 2]],  # same line, 2p
        [[[0, 0.5, 0.1, 0.6], [0.2, 0.49, 0.35, 0.59], [0.8, 0.52, 0.9, 0.63]], [0, 1, 2]],  # ~same line
        [[[0, 0.3, 0.4, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]],  # 2 lines
        [[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]],  # 2 lines
        [
            [
                [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
                [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
            ],
            [0, 1],
        ],  # rot
    ],
)
def test_sort_boxes(input_boxes, sorted_idxs):
    doc_builder = builder.DocumentBuilder()
    assert doc_builder._sort_boxes(np.asarray(input_boxes))[0].tolist() == sorted_idxs


@pytest.mark.parametrize(
    "input_boxes, lines",
    [
        [[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]],  # vertical
        [[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]],  # diagonal
        [[[0, 0.5, 0.14, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [[0, 1], [2]]],  # same line, 2p
        [[[0, 0.5, 0.18, 0.6], [0.2, 0.48, 0.35, 0.58], [0.8, 0.52, 0.9, 0.63]], [[0, 1], [2]]],  # ~same line
        [[[0, 0.3, 0.48, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [[0, 1], [2]]],  # 2 lines
        [[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [[0], [1], [2]]],  # 2 lines
        [
            [
                [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
                [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
            ],
            [[0], [1]],
        ],  # rot
    ],
)
def test_resolve_lines(input_boxes, lines):
    doc_builder = builder.DocumentBuilder()
    assert doc_builder._resolve_lines(np.asarray(input_boxes)) == lines


================================================
FILE: tests/common/test_models_classification.py
================================================
import cv2
import numpy as np
import pytest

from onnxtr.models import classification, detection
from onnxtr.models.classification.predictor import OrientationPredictor
from onnxtr.models.engine import Engine


@pytest.mark.parametrize(
    "arch_name, input_shape",
    [
        ["mobilenet_v3_small_crop_orientation", (256, 256, 3)],
        ["mobilenet_v3_small_page_orientation", (512, 512, 3)],
    ],
)
def test_classification_models(arch_name, input_shape):
    batch_size = 8
    model = classification.__dict__[arch_name]()
    assert isinstance(model, Engine)
    input_tensor = np.random.rand(batch_size, *input_shape).astype(np.float32)
    out = model(input_tensor)
    assert isinstance(out, np.ndarray)
    assert out.shape == (8, 4)


@pytest.mark.parametrize(
    "arch_name",
    [
        "mobilenet_v3_small_crop_orientation",
        "mobilenet_v3_small_page_orientation",
    ],
)
def test_classification_zoo(arch_name):
    if "crop" in arch_name:
        batch_size = 16
        input_array = np.random.rand(batch_size, 3, 256, 256).astype(np.float32)
        # Model
        predictor = classification.zoo.crop_orientation_predictor(arch_name)

        with pytest.raises(ValueError):
            predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model")
    else:
        batch_size = 2
        input_array = np.random.rand(batch_size, 3, 512, 512).astype(np.float32)
        # Model
        predictor = classification.zoo.page_orientation_predictor(arch_name)

        with pytest.raises(ValueError):
            predictor = classification.zoo.page_orientation_predictor(arch="wrong_model")
    # object check
    assert isinstance(predictor, OrientationPredictor)

    out = predictor(input_array)
    class_idxs, classes, confs = out[0], out[1], out[2]
    assert isinstance(class_idxs, list) and len(class_idxs) == batch_size
    assert isinstance(classes, list) and len(classes) == batch_size
    assert isinstance(confs, list) and len(confs) == batch_size
    assert all(isinstance(pred, int) for pred in class_idxs)
    assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes)
    assert all(isinstance(pred, float) for pred in confs)


@pytest.mark.parametrize("quantized", [False, True])
def test_crop_orientation_model(mock_text_box, quantized):
    text_box_0 = cv2.imread(mock_text_box)
    # rotates counter-clockwise
    text_box_270 = np.rot90(text_box_0, 1)
    text_box_180 = np.rot90(text_box_0, 2)
    text_box_90 = np.rot90(text_box_0, 3)
    classifier = classification.crop_orientation_predictor(
        "mobilenet_v3_small_crop_orientation", load_in_8_bit=quantized
    )
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3]
    # 270 degrees is equivalent to -90 degrees
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90]
    assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2])

    # Test custom model loading
    classifier = classification.crop_orientation_predictor(
        classification.mobilenet_v3_small_crop_orientation(load_in_8_bit=quantized)
    )
    assert isinstance(classifier, OrientationPredictor)

    with pytest.raises(ValueError):
        _ = classification.crop_orientation_predictor(detection.db_resnet34())

    # Test with disabled predictor
    classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation", disabled=True)
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [1.0, 1.0, 1.0, 1.0],
    ]


@pytest.mark.parametrize("quantized", [False, True])
def test_page_orientation_model(mock_payslip, quantized):
    text_box_0 = cv2.imread(mock_payslip)
    # rotates counter-clockwise
    text_box_270 = np.rot90(text_box_0, 1)
    text_box_180 = np.rot90(text_box_0, 2)
    text_box_90 = np.rot90(text_box_0, 3)
    classifier = classification.crop_orientation_predictor(
        "mobilenet_v3_small_page_orientation", load_in_8_bit=quantized
    )
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3]
    # 270 degrees is equivalent to -90 degrees
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90]
    assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2])

    # Test custom model loading
    classifier = classification.page_orientation_predictor(
        classification.mobilenet_v3_small_page_orientation(load_in_8_bit=quantized)
    )
    assert isinstance(classifier, OrientationPredictor)

    with pytest.raises(ValueError):
        _ = classification.page_orientation_predictor(detection.db_resnet34())

    # Test with disabled predictor
    classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation", disabled=True)
    assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [1.0, 1.0, 1.0, 1.0],
    ]


================================================
FILE: tests/common/test_models_detection.py
================================================
import numpy as np
import pytest

from onnxtr.models import detection
from onnxtr.models.detection.postprocessor.base import GeneralDetectionPostProcessor
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.engine import Engine


def test_postprocessor():
    postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=True)
    r_postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=False)
    with pytest.raises(AssertionError):
        postprocessor(np.random.rand(2, 512, 512).astype(np.float32))
    mock_batch = np.random.rand(2, 512, 512, 1).astype(np.float32)
    out = postprocessor(mock_batch)
    r_out = r_postprocessor(mock_batch)
    # Batch composition
    assert isinstance(out, list)
    assert len(out) == 2
    assert all(isinstance(sample, list) and all(isinstance(v, np.ndarray) for v in sample) for sample in out)
    assert all(all(v.shape[1] == 5 for v in sample) for sample in out)
    assert all(all(v.shape[1] == 5 and v.shape[2] == 2 for v in sample) for sample in r_out)
    # Relative coords
    assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in out)
    assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in r_out)
    # Repr
    assert repr(postprocessor) == "GeneralDetectionPostProcessor(bin_thresh=0.1, box_thresh=0.1)"
    # Edge case when the expanded points of the polygon has two lists
    issue_points = np.array(
        [
            [869, 561],
            [923, 581],
            [925, 595],
            [915, 583],
            [889, 583],
            [905, 593],
            [882, 601],
            [901, 595],
            [904, 604],
            [876, 608],
            [915, 614],
            [911, 605],
            [925, 601],
            [930, 616],
            [911, 617],
            [900, 636],
            [931, 637],
            [904, 649],
            [932, 649],
            [932, 628],
            [918, 627],
            [934, 624],
            [935, 573],
            [909, 569],
            [934, 562],
        ],
        dtype=np.int32,
    )
    out = postprocessor.polygon_to_box(issue_points)
    r_out = r_postprocessor.polygon_to_box(issue_points)
    assert isinstance(out, tuple) and len(out) == 4
    assert isinstance(r_out, np.ndarray) and r_out.shape == (4, 2)


@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
    "arch_name, input_shape, output_size, out_prob",
    [
        ["db_resnet34", (1024, 1024, 3), (1024, 1024, 1), True],
        ["db_resnet50", (1024, 1024, 3), (1024, 1024, 1), True],
        ["db_mobilenet_v3_large", (1024, 1024, 3), (1024, 1024, 1), True],
        ["linknet_resnet18", (1024, 1024, 3), (1024, 1024, 1), True],
        ["linknet_resnet34", (1024, 1024, 3), (1024, 1024, 1), True],
        ["linknet_resnet50", (1024, 1024, 3), (1024, 1024, 1), True],
        ["fast_tiny", (1024, 1024, 3), (1024, 1024, 1), True],
        ["fast_small", (1024, 1024, 3), (1024, 1024, 1), True],
        ["fast_base", (1024, 1024, 3), (1024, 1024, 1), True],
    ],
)
def test_detection_models(arch_name, input_shape, output_size, out_prob, quantized):
    batch_size = 2
    model = detection.__dict__[arch_name](load_in_8_bit=quantized)
    assert isinstance(model, Engine)
    input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
    out = model(input_array, return_model_output=True)
    assert isinstance(out, dict)
    assert len(out) == 2
    # Check proba map
    assert out["out_map"].shape == (batch_size, *output_size)
    assert out["out_map"].dtype == np.float32
    if out_prob:
        assert np.all(out["out_map"] >= 0) and np.all(out["out_map"] <= 1)
    # Check boxes
    for boxes_list in out["preds"]:
        for boxes in boxes_list:
            assert boxes.shape[1] == 5
            assert np.all(boxes[:, :2] < boxes[:, 2:4])
            assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1)


@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
    "arch_name",
    [
        "db_resnet34",
        "db_resnet50",
        "db_mobilenet_v3_large",
        "linknet_resnet18",
        "linknet_resnet34",
        "linknet_resnet50",
        "fast_tiny",
        "fast_small",
        "fast_base",
    ],
)
def test_detection_zoo(arch_name, quantized):
    # Model
    predictor = detection.zoo.detection_predictor(
        arch_name, load_in_8_bit=quantized, preserve_aspect_ratio=False, symmetric_pad=False
    )
    # object check
    assert isinstance(predictor, DetectionPredictor)
    input_array = np.random.rand(2, 3, 1024, 1024).astype(np.float32)

    out, seq_maps = predictor(input_array, return_maps=True)
    assert isinstance(out, list)
    for box in out:
        assert isinstance(box, np.ndarray)
        assert box.shape[1] == 5
        assert np.all(box[:, :2] < box[:, 2:4])
        assert np.all(box[:, :4] >= 0) and np.all(box[:, :4] <= 1)
    assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
    assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
    # check that all values in the seq_maps are between 0 and 1
    assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)


================================================
FILE: tests/common/test_models_detection_utils.py
================================================
import numpy as np
import pytest

from onnxtr.models.detection._utils import _remove_padding


@pytest.mark.parametrize("pages", [[np.zeros((1000, 1000))], [np.zeros((1000, 2000))], [np.zeros((2000, 1000))]])
@pytest.mark.parametrize("preserve_aspect_ratio", [True, False])
@pytest.mark.parametrize("symmetric_pad", [True, False])
@pytest.mark.parametrize("assume_straight_pages", [True, False])
def test_remove_padding(pages, preserve_aspect_ratio, symmetric_pad, assume_straight_pages):
    h, w = pages[0].shape
    # straight pages test cases
    if assume_straight_pages:
        loc_preds = [np.array([[0.7, 0.1, 0.7, 0.2]])]
        if h == w or not preserve_aspect_ratio:
            expected = loc_preds
        else:
            if symmetric_pad:
                if h > w:
                    expected = [np.array([[0.9, 0.1, 0.9, 0.2]])]
                else:
                    expected = [np.array([[0.7, 0.0, 0.7, 0.0]])]
            else:
                if h > w:
                    expected = [np.array([[1.0, 0.1, 1.0, 0.2]])]
                else:
                    expected = [np.array([[0.7, 0.2, 0.7, 0.4]])]
    # non-straight pages test cases
    else:
        loc_preds = [np.array([[[0.9, 0.1], [0.9, 0.2], [0.8, 0.2], [0.8, 0.2]]])]
        if h == w or not preserve_aspect_ratio:
            expected = loc_preds
        else:
            if symmetric_pad:
                if h > w:
                    expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])]
                else:
                    expected = [np.array([[[0.9, 0.0], [0.9, 0.0], [0.8, 0.0], [0.8, 0.0]]])]
            else:
                if h > w:
                    expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])]
                else:
                    expected = [np.array([[[0.9, 0.2], [0.9, 0.4], [0.8, 0.4], [0.8, 0.4]]])]

    result = _remove_padding(pages, loc_preds, preserve_aspect_ratio, symmetric_pad, assume_straight_pages)
    for res, exp in zip(result, expected):
        assert np.allclose(res, exp)


================================================
FILE: tests/common/test_models_factory.py
================================================
import json
import os
import tempfile

import pytest

from onnxtr import models
from onnxtr.models.factory import _save_model_and_config_for_hf_hub, from_hub, push_to_hf_hub

AVAILABLE_ARCHS = {
    "classification": models.classification.zoo.ORIENTATION_ARCHS,
    "detection": models.detection.zoo.ARCHS,
    "recognition": models.recognition.zoo.ARCHS,
}


def test_push_to_hf_hub():
    model = models.classification.mobilenet_v3_small_crop_orientation()
    with pytest.raises(ValueError):
        # run_config and/or arch must be specified
        push_to_hf_hub(model, model_name="test", task="classification")
    with pytest.raises(ValueError):
        # task must be one of classification, detection, recognition, obj_detection
        push_to_hf_hub(model, model_name="test", task="invalid_task", arch="mobilenet_v3_small")
    with pytest.raises(ValueError):
        # arch not in available architectures for task
        push_to_hf_hub(model, model_name="test", task="detection", arch="crnn_mobilenet_v3_large")


def test_models_huggingface_hub(tmpdir):
    with tempfile.TemporaryDirectory() as tmp_dir:
        for task_name, archs in AVAILABLE_ARCHS.items():
            for arch_name in archs:
                model = models.__dict__[task_name].__dict__[arch_name]()

                _save_model_and_config_for_hf_hub(model, arch=arch_name, task=task_name, save_dir=tmp_dir)

                assert hasattr(model, "cfg")
                assert len(os.listdir(tmp_dir)) == 2
                assert os.path.exists(tmp_dir + "/model.onnx")
                assert os.path.exists(tmp_dir + "/config.json")
                tmp_config = json.load(open(tmp_dir + "/config.json"))
                assert arch_name == tmp_config["arch"]
                assert task_name == tmp_config["task"]
                assert all(key in model.cfg.keys() for key in tmp_config.keys())

                # test from hub
                hub_model = from_hub(repo_id="Felix92/onnxtr-{}".format(arch_name).replace("_", "-"))
                assert isinstance(hub_model, type(model))


================================================
FILE: tests/common/test_models_preprocessor.py
================================================
import numpy as np
import pytest

from onnxtr.models.preprocessor import PreProcessor


@pytest.mark.parametrize(
    "batch_size, output_size, input_tensor, expected_batches, expected_value",
    [
        [2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, 0.5],  # numpy uint8
        [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, 0.5],  # numpy fp32
        [2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, 0.5],  # list of numpy uint8
        [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, 0.5],  # list of numpy fp32 list of tf fp32
    ],
)
def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value):
    processor = PreProcessor(output_size, batch_size)

    # Invalid input type
    with pytest.raises(TypeError):
        processor(42)
    # 4D check
    with pytest.raises(AssertionError):
        processor(np.full((256, 128, 3), 255, dtype=np.uint8))
    with pytest.raises(TypeError):
        processor(np.full((1, 256, 128, 3), 255, dtype=np.int32))
    # 3D check
    with pytest.raises(AssertionError):
        processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)])
    with pytest.raises(TypeError):
        processor([np.full((256, 128, 3), 255, dtype=np.int32)])

    out = processor(input_tensor)
    assert isinstance(out, list) and len(out) == expected_batches
    assert all(isinstance(b, np.ndarray) for b in out)
    assert all(b.dtype == np.float32 for b in out)
    assert all(b.shape[1:3] == output_size for b in out)
    assert all(np.all(b == expected_value) for b in out)
    assert len(repr(processor).split("\n")) == 4


================================================
FILE: tests/common/test_models_recognition.py
================================================
import numpy as np
import pytest

from onnxtr.models import recognition
from onnxtr.models.engine import Engine
from onnxtr.models.recognition.core import RecognitionPostProcessor
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.models.recognition.predictor._utils import remap_preds, split_crops
from onnxtr.utils.vocabs import VOCABS


def test_recognition_postprocessor():
    mock_vocab = VOCABS["french"]
    post_processor = RecognitionPostProcessor(mock_vocab)
    assert post_processor.extra_repr() == f"vocab_size={len(mock_vocab)}"
    assert post_processor.vocab == mock_vocab
    assert post_processor._embedding == list(mock_vocab) + ["<eos>"]


@pytest.mark.parametrize(
    "crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops",
    [
        # No split required
        [[np.zeros((32, 128, 3), dtype=np.uint8)], 8, 4, 0.5, True, 1],
        [[np.zeros((3, 32, 128), dtype=np.uint8)], 8, 4, 0.5, False, 1],
        # Split required
        [[np.zeros((32, 1024, 3), dtype=np.uint8)], 8, 6, 0.5, True, 10],
        [[np.zeros((3, 32, 1024), dtype=np.uint8)], 8, 6, 0.5, False, 10],
    ],
)
def test_split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops):
    new_crops, crop_map, should_remap = split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last)
    assert len(new_crops) == num_crops
    assert len(crop_map) == len(crops)
    assert should_remap == (len(crops) != len(new_crops))


@pytest.mark.parametrize(
    "preds, crop_map, split_overlap_ratio, pred",
    [
        # Nothing to remap
        ([("hello", 0.5)], [0], 0.5, [("hello", 0.5)]),
        # Merge
        ([("hellowo", 0.5), ("loworld", 0.6)], [(0, 2, 0.5)], 0.5, [("helloworld", 0.55)]),
    ],
)
def test_remap_preds(preds, crop_map, split_overlap_ratio, pred):
    preds = remap_preds(preds, crop_map, split_overlap_ratio)
    assert len(preds) == len(pred)
    assert preds == pred
    assert all(isinstance(pred, tuple) for pred in preds)
    assert all(isinstance(pred[0], str) and isinstance(pred[1], float) for pred in preds)


@pytest.mark.parametrize(
    "inputs, max_ratio, target_ratio, target_overlap_ratio, expected_remap_required, expected_len, expected_shape, "
    "expected_crop_map, channels_last",
    [
        # Don't split
        ([np.zeros((32, 32 * 4, 3))], 4, 4, 0.5, False, 1, (32, 128, 3), 0, True),
        # Split needed
        ([np.zeros((32, 32 * 4 + 1, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.9921875), True),
        # Larger max ratio prevents split
        ([np.zeros((32, 32 * 8, 3))], 8, 4, 0.5, False, 1, (32, 256, 3), 0, True),
        # Half-overlap, two crops
        ([np.zeros((32, 128 + 64, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True),
        # Half-overlap, two crops, channels first
        ([np.zeros((3, 32, 128 + 64))], 4, 4, 0.5, True, 2, (3, 32, 128), (0, 2, 0.5), False),
        # Half-overlap with small max_ratio forces split
        ([np.zeros((32, 128 + 64, 3))], 2, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True),
        # > half last overlap ratio
        ([np.zeros((32, 128 + 32, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.75), True),
        # 3 crops, half last overlap
        ([np.zeros((32, 128 + 128, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.5), True),
        # 3 crops, > half last overlap
        ([np.zeros((32, 128 + 64 + 32, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.75), True),
        # Split into larger crops
        ([np.zeros((32, 192 * 2, 3))], 4, 6, 0.5, True, 3, (32, 192, 3), (0, 3, 0.5), True),
        # Test fallback for empty splits
        ([np.empty((1, 0, 3))], -1, 4, 0.5, False, 1, (1, 0, 3), (0), True),
    ],
)
def test_split_crops_cases(
    inputs,
    max_ratio,
    target_ratio,
    target_overlap_ratio,
    expected_remap_required,
    expected_len,
    expected_shape,
    expected_crop_map,
    channels_last,
):
    new_crops, crop_map, _remap_required = split_crops(
        inputs,
        max_ratio=max_ratio,
        target_ratio=target_ratio,
        split_overlap_ratio=target_overlap_ratio,
        channels_last=channels_last,
    )

    assert _remap_required == expected_remap_required
    assert len(new_crops) == expected_len
    assert len(crop_map) == 1

    if expected_remap_required:
        assert isinstance(crop_map[0], tuple)

    assert crop_map[0] == expected_crop_map

    for crop in new_crops:
        assert crop.shape == expected_shape


@pytest.mark.parametrize(
    "split_overlap_ratio",
    [
        # lower bound
        0.0,
        # upper bound
        1.0,
    ],
)
def test_invalid_split_overlap_ratio(split_overlap_ratio):
    with pytest.raises(ValueError):
        split_crops(
            [np.zeros((32, 32 * 4, 3))],
            max_ratio=4,
            target_ratio=4,
            split_overlap_ratio=split_overlap_ratio,
        )


@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
    "arch_name, input_shape",
    [
        ["crnn_vgg16_bn", (32, 128, 3)],
        ["crnn_mobilenet_v3_small", (32, 128, 3)],
        ["crnn_mobilenet_v3_large", (32, 128, 3)],
        ["sar_resnet31", (32, 128, 3)],
        ["master", (32, 128, 3)],
        ["vitstr_small", (32, 128, 3)],
        ["vitstr_base", (32, 128, 3)],
        ["parseq", (32, 128, 3)],
        ["viptr_tiny", (32, 128, 3)],
    ],
)
def test_recognition_models(arch_name, input_shape, quantized):
    mock_vocab = VOCABS["french"]
    batch_size = 4
    model = recognition.__dict__[arch_name](load_in_8_bit=quantized)
    assert isinstance(model, Engine)
    input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)

    out = model(input_array, return_model_output=True)
    assert isinstance(out, dict)
    assert len(out) == 2
    assert isinstance(out["preds"], list)
    assert len(out["preds"]) == batch_size
    assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])

    assert isinstance(out["out_map"], np.ndarray)
    assert out["out_map"].shape[0] == 4

    # test model post processor
    post_processor = model.postprocessor
    decoded = post_processor(np.random.rand(2, len(mock_vocab), 30).astype(np.float32))
    assert isinstance(decoded, list)
    assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded)
    assert len(decoded) == 2
    assert all(char in mock_vocab for word, _ in decoded for char in word)

    # Testing with a fixed batch size
    model = recognition.__dict__[arch_name]()
    model.fixed_batch_size = 1
    assert isinstance(model, Engine)
    input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)

    out = model(input_array, return_model_output=True)
    assert isinstance(out, dict)
    assert len(out) == 2
    assert isinstance(out["preds"], list)
    assert len(out["preds"]) == batch_size
    assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])

    assert isinstance(out["out_map"], np.ndarray)
    assert out["out_map"].shape[0] == 4


@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
    "input_shape",
    [
        (128, 128, 3),
        (32, 1024, 3),  # test case split wide crops
    ],
)
@pytest.mark.parametrize(
    "arch_name",
    [
        "crnn_vgg16_bn",
        "crnn_mobilenet_v3_small",
        "crnn_mobilenet_v3_large",
        "sar_resnet31",
        "master",
        "vitstr_small",
        "vitstr_base",
        "parseq",
        "viptr_tiny",
    ],
)
def test_recognition_zoo(arch_name, input_shape, quantized):
    batch_size = 2
    # Model
    predictor = recognition.zoo.recognition_predictor(arch_name, load_in_8_bit=quantized)
    # object check
    assert isinstance(predictor, RecognitionPredictor)
    input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
    out = predictor(input_array)
    assert isinstance(out, list) and len(out) == batch_size
    assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out)

    with pytest.raises(ValueError):
        _ = recognition.zoo.recognition_predictor(arch="wrong_model")


================================================
FILE: tests/common/test_models_recognition_utils.py
================================================
import pytest

from onnxtr.models.recognition.utils import merge_multi_strings, merge_strings


@pytest.mark.parametrize(
    "a, b, overlap_ratio, merged",
    [
        # Last character of first string and first of last string will be cropped when merging - indicated by X
        ("abcX", "Xdef", 0.5, "abcdef"),
        ("abcdX", "Xdef", 0.75, "abcdef"),
        ("abcdeX", "Xdef", 0.9, "abcdef"),
        ("abcdefX", "Xdef", 0.9, "abcdef"),
        # Long repetition - four of seven characters in the second string are in the estimated overlap
        # X-chars will be cropped during merge, because they might be cut off during splitting of corresponding image
        ("abccccX", "Xcccccc", 4 / 7, "abcccccccc"),
        ("abc", "", 0.5, "abc"),
        ("", "abc", 0.5, "abc"),
        ("a", "b", 0.5, "ab"),
        # No overlap of input strings after crop
        ("abcdX", "Xefghi", 0.33, "abcdefghi"),
        # No overlap of input strings after crop with shorter inputs
        ("bcdX", "Xefgh", 0.4, "bcdefgh"),
        # No overlap of input strings after crop with even shorter inputs
        ("cdX", "Xefg", 0.5, "cdefg"),
        # Full overlap of input strings
        ("abcdX", "Xbcde", 1.0, "abcde"),
        # One repetition within inputs
        ("ababX", "Xabde", 0.8, "ababde"),
        # Multiple repetitions within inputs
        ("ababX", "Xabab", 0.8, "ababab"),
        # Multiple repetitions within inputs with shorter input strings
        ("abaX", "Xbab", 1.0, "abab"),
        # Longer multiple repetitions within inputs with half overlap
        ("cabababX", "Xabababc", 0.5, "cabababababc"),
        # Longer multiple repetitions within inputs with full overlap
        ("ababaX", "Xbabab", 1.0, "ababab"),
        # One different letter in overlap
        ("one_differon", "ferent_letter", 0.5, "one_differont_letter"),
        # First string empty after crop
        ("-", "test", 0.9, "-test"),
        # Second string empty after crop
        ("test", "-", 0.9, "test-"),
    ],
)
def test_merge_strings(a, b, overlap_ratio, merged):
    assert merged == merge_strings(a, b, overlap_ratio)


@pytest.mark.parametrize(
    "seq_list, overlap_ratio, last_overlap_ratio, merged",
    [
        # One character at each conjunction point will be cropped when merging - indicated by X
        (["abcX", "Xdef"], 0.5, 0.5, "abcdef"),
        (["abcdX", "XdefX", "XefghX", "Xijk"], 0.5, 0.5, "abcdefghijk"),
        (["abcdX", "XdefX", "XefghiX", "Xaijk"], 0.5, 0.8, "abcdefghijk"),
        (["aaaa", "aaab", "aabc"], 0.8, 0.3, "aaaabc"),
        # Handle empty input
        ([], 0.5, 0.4, ""),
    ],
)
def test_merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio, merged):
    assert merged == merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio)


================================================
FILE: tests/common/test_models_zoo.py
================================================
import numpy as np
import pytest

from onnxtr import models
from onnxtr.io import Document, DocumentFile
from onnxtr.models import detection, recognition
from onnxtr.models.classification import mobilenet_v3_small_crop_orientation, mobilenet_v3_small_page_orientation
from onnxtr.models.classification.zoo import crop_orientation_predictor, page_orientation_predictor
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.detection.zoo import ARCHS as DET_ARCHS
from onnxtr.models.detection.zoo import detection_predictor
from onnxtr.models.predictor import OCRPredictor
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.models.recognition.zoo import ARCHS as RECO_ARCHS
from onnxtr.models.recognition.zoo import recognition_predictor
from onnxtr.models.zoo import ocr_predictor
from onnxtr.utils.repr import NestedObject


# Create a dummy callback
class _DummyCallback:
    def __call__(self, loc_preds):
        return loc_preds


@pytest.mark.parametrize(
    "assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation",
    [
        [True, False, False, False],
        [False, False, True, True],
        [True, True, False, False],
        [False, True, True, True],
        [True, False, True, False],
    ],
)
def test_ocrpredictor(
    mock_pdf, assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation
):
    det_bsize = 4
    det_predictor = DetectionPredictor(
        PreProcessor(output_size=(1024, 1024), batch_size=det_bsize),
        detection.db_mobilenet_v3_large(assume_straight_pages=assume_straight_pages),
    )

    reco_bsize = 16
    reco_predictor = RecognitionPredictor(
        PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True),
        recognition.crnn_vgg16_bn(),
    )

    doc = DocumentFile.from_pdf(mock_pdf)

    predictor = OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=assume_straight_pages,
        straighten_pages=straighten_pages,
        detect_orientation=True,
        detect_language=True,
        resolve_lines=True,
        resolve_blocks=True,
        disable_page_orientation=disable_page_orientation,
        disable_crop_orientation=disable_crop_orientation,
    )

    assert (
        predictor._page_orientation_disabled if disable_page_orientation else not predictor._page_orientation_disabled
    )
    assert (
        predictor._crop_orientation_disabled if disable_crop_orientation else not predictor._crop_orientation_disabled
    )

    if assume_straight_pages:
        assert predictor.crop_orientation_predictor is None
        if predictor.detect_orientation or predictor.straighten_pages:
            assert isinstance(predictor.page_orientation_predictor, NestedObject)
        else:
            assert predictor.page_orientation_predictor is None
    else:
        assert isinstance(predictor.crop_orientation_predictor, NestedObject)
        assert isinstance(predictor.page_orientation_predictor, NestedObject)

    out = predictor(doc)
    assert isinstance(out, Document)
    assert len(out.pages) == 2
    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])

    assert out.pages[0].orientation["value"] in range(-2, 3)
    assert isinstance(out.pages[0].language["value"], str)
    assert isinstance(out.render(), str)
    assert isinstance(out.pages[0].render(), str)
    assert isinstance(out.export(), dict)
    assert isinstance(out.pages[0].export(), dict)

    with pytest.raises(ValueError):
        _ = ocr_predictor("unknown_arch")

    # Test with custom orientation models
    custom_crop_orientation_model = mobilenet_v3_small_crop_orientation()
    custom_page_orientation_model = mobilenet_v3_small_page_orientation()

    if assume_straight_pages:
        if predictor.detect_orientation or predictor.straighten_pages:
            # Overwrite the default orientation models
            predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
            predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
    else:
        # Overwrite the default orientation models
        predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
        predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)

    out = predictor(doc)
    orientation = 0
    assert out.pages[0].orientation["value"] == orientation


def test_trained_ocr_predictor(mock_payslip):
    doc = DocumentFile.from_images(mock_payslip)

    det_predictor = detection_predictor(
        "db_resnet50",
        batch_size=2,
        assume_straight_pages=True,
        symmetric_pad=True,
        preserve_aspect_ratio=False,
    )
    reco_predictor = recognition_predictor("crnn_vgg16_bn", batch_size=128)

    predictor = OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=True,
        straighten_pages=True,
        preserve_aspect_ratio=False,
        resolve_lines=True,
        resolve_blocks=True,
    )
    # test hooks
    predictor.add_hook(_DummyCallback())

    out = predictor(doc)

    assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
    geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
    assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)

    assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
    geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
    assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)

    det_predictor = detection_predictor(
        "db_resnet50",
        batch_size=2,
        assume_straight_pages=True,
        preserve_aspect_ratio=True,
        symmetric_pad=True,
    )

    predictor = OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=True,
        straighten_pages=True,
        preserve_aspect_ratio=True,
        symmetric_pad=True,
        resolve_lines=True,
        resolve_blocks=True,
    )

    out = predictor(doc)

    assert "Mr" in out.pages[0].blocks[0].lines[0].words[0].value

    # test list archs
    archs = predictor.list_archs()
    assert isinstance(archs, dict)
    assert archs["recognition_archs"] == RECO_ARCHS
    assert archs["detection_archs"] == DET_ARCHS


def _test_predictor(predictor):
    # Output checks
    assert isinstance(predictor, OCRPredictor)

    doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)]
    out = predictor(doc)
    # Document
    assert isinstance(out, Document)

    # The input doc has 1 page
    assert len(out.pages) == 1
    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])


@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
    "det_arch, reco_arch",
    [[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)],
)
def test_zoo_models(det_arch, reco_arch, quantized):
    # Model
    predictor = models.ocr_predictor(det_arch, reco_arch, load_in_8_bit=quantized)
    _test_predictor(predictor)

    # passing model instance directly
    det_model = detection.__dict__[det_arch]()
    reco_model = recognition.__dict__[reco_arch]()
    predictor = models.ocr_predictor(det_model, reco_model)
    _test_predictor(predictor)

    # passing recognition model as detection model
    with pytest.raises(ValueError):
        models.ocr_predictor(det_arch=reco_model)

    # passing detection model as recognition model
    with pytest.raises(ValueError):
        models.ocr_predictor(reco_arch=det_model)


================================================
FILE: tests/common/test_transforms.py
================================================
import numpy as np
import pytest

from onnxtr.transforms import Normalize, Resize


def test_resize():
    output_size = (32, 32)
    transfo = Resize(output_size)
    input_t = np.ones((64, 64, 3), dtype=np.float32)
    out = transfo(input_t)

    assert np.all(out == 255)
    assert out.shape[:2] == output_size
    assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='2')"

    transfo = Resize(output_size, preserve_aspect_ratio=True)
    input_t = np.ones((32, 64, 3), dtype=np.float32)
    out = transfo(input_t)

    assert out.shape[:2] == output_size
    assert not np.all(out == 255)
    # Asymetric padding
    assert np.all(out[-1] == 0) and np.all(out[0] == 255)

    # Symetric padding
    transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
    assert repr(transfo) == (
        f"Resize(output_size={output_size}, interpolation='2', preserve_aspect_ratio=True, symmetric_pad=True)"
    )
    out = transfo(input_t)
    assert out.shape[:2] == output_size
    # symetric padding
    assert np.all(out[-1] == 0) and np.all(out[0] == 0)

    # Inverse aspect ratio
    input_t = np.ones((64, 32, 3), dtype=np.float32)
    out = transfo(input_t)

    assert not np.all(out == 1)
    assert out.shape[:2] == output_size

    # Same aspect ratio
    output_size = (32, 128)
    transfo = Resize(output_size, preserve_aspect_ratio=True)
    out = transfo(np.ones((16, 64, 3), dtype=np.float32))
    assert out.shape[:2] == output_size


@pytest.mark.parametrize(
    "input_shape",
    [
        [8, 32, 32, 3],
        [32, 32, 3],
        [32, 3],
    ],
)
def test_normalize(input_shape):
    mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
    transfo = Normalize(mean, std)
    input_t = np.ones(input_shape, dtype=np.float32)

    out = transfo(input_t)

    assert np.all(out == 1)
    assert repr(transfo) == f"Normalize(mean={mean}, std={std})"

    with pytest.raises(AssertionError):
        Normalize(mean="32")

    with pytest.raises(AssertionError):
        Normalize(std="32")


================================================
FILE: tests/common/test_utils_data.py
================================================
import os
import tempfile
from pathlib import PosixPath
from unittest.mock import patch

import pytest

from onnxtr.utils.data import _urlretrieve, download_from_url


def test__urlretrieve():
    with tempfile.TemporaryDirectory() as temp_dir:
        file_path = os.path.join(temp_dir, "crnn_mobilenet_v3_small-bded4d49.onnx")
        _urlretrieve(
            "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
            file_path,
        )
        assert os.path.exists(file_path), f"File {file_path} does not exist."


@patch("onnxtr.utils.data._urlretrieve")
@patch("pathlib.Path.mkdir")
@patch.dict(os.environ, {"HOME": "/"}, clear=True)
def test_download_from_url(mkdir_mock, urlretrieve_mock):
    download_from_url("test_url")
    urlretrieve_mock.assert_called_with("test_url", PosixPath("/.cache/onnxtr/test_url"))


@patch.dict(os.environ, {"ONNXTR_CACHE_DIR": "/test"}, clear=True)
@patch("onnxtr.utils.data._urlretrieve")
@patch("pathlib.Path.mkdir")
def test_download_from_url_customizing_cache_dir(mkdir_mock, urlretrieve_mock):
    download_from_url("test_url")
    urlretrieve_mock.assert_called_with("test_url", PosixPath("/test/test_url"))


@patch.dict(os.environ, {"HOME": "/"}, clear=True)
@patch("pathlib.Path.mkdir", side_effect=OSError)
@patch("logging.error")
def test_download_from_url_error_creating_directory(logging_mock, mkdir_mock):
    with pytest.raises(OSError):
        download_from_url("test_url")
    logging_mock.assert_called_with(
        "Failed creating cache direcotry at /.cache/onnxtr."
        " You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed."
    )


@patch.dict(os.environ, {"HOME": "/", "ONNXTR_CACHE_DIR": "/test"}, clear=True)
@patch("pathlib.Path.mkdir", side_effect=OSError)
@patch("logging.error")
def test_download_from_url_error_creating_directory_with_env_var(logging_mock, mkdir_mock):
    with pytest.raises(OSError):
        download_from_url("test_url")
    logging_mock.assert_called_with(
        "Failed creating cache direcotry at /test using path from 'ONNXTR_CACHE_DIR' environment variable."
    )


================================================
FILE: tests/common/test_utils_fonts.py
================================================
from PIL.ImageFont import FreeTypeFont, ImageFont

from onnxtr.utils.fonts import get_font


def test_get_font():
    # Attempts to load recommended OS font
    font = get_font()

    assert isinstance(font, (ImageFont, FreeTypeFont))


================================================
FILE: tests/common/test_utils_geometry.py
================================================
from copy import deepcopy
from math import hypot

import numpy as np
import pytest

from onnxtr.io import DocumentFile
from onnxtr.utils import geometry


def test_bbox_to_polygon():
    assert geometry.bbox_to_polygon(((0, 0), (1, 1))) == ((0, 0), (1, 0), (0, 1), (1, 1))


def test_polygon_to_bbox():
    assert geometry.polygon_to_bbox(((0, 0), (1, 0), (0, 1), (1, 1))) == ((0, 0), (1, 1))


def test_order_points():
    # bbox format (xmin, ymin, xmax, ymax)
    bbox = np.array([1, 2, 5, 6])
    expected_bbox = np.array([
        [1, 2],  # top-left
        [5, 2],  # top-right
        [5, 6],  # bottom-right
        [1, 6],  # bottom-left
    ])
    out_bbox = geometry.order_points(bbox)
    assert np.all(out_bbox == expected_bbox)

    # quadrangle (unordered)
    quad = np.array([
        [5, 6],  # br
        [1, 2],  # tl
        [1, 6],  # bl
        [5, 2],  # tr
    ])
    expected_quad = expected_bbox
    out_quad = geometry.order_points(quad)
    assert np.all(out_quad == expected_quad)

    # already ordered quad
    ordered_quad = expected_bbox.copy()
    out_ordered = geometry.order_points(ordered_quad)
    assert np.all(out_ordered == expected_bbox)

    # float inputs
    quad_float = quad.astype(np.float32)
    out_float = geometry.order_points(quad_float)
    assert out_float.dtype == quad_float.dtype
    assert np.allclose(out_float, expected_quad)

    with pytest.raises(ValueError):
        geometry.order_points(np.array([1, 2, 3]))  # wrong shape

    with pytest.raises(ValueError):
        geometry.order_points(np.zeros((5, 2)))  # too many points


def test_detach_scores():
    # box test
    boxes = np.array([[0.1, 0.1, 0.2, 0.2, 0.9], [0.15, 0.15, 0.2, 0.2, 0.8]])
    pred = geometry.detach_scores([boxes])
    target1 = np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]])
    target2 = np.array([0.9, 0.8])
    assert np.all(pred[0] == target1) and np.all(pred[1] == target2)
    # polygon test
    boxes = np.array([
        [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.9]],
        [[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.8]],
    ])
    pred = geometry.detach_scores([boxes])
    target1 = np.array([
        [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
        [[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
    ])
    target2 = np.array([0.9, 0.8])
    assert np.all(pred[0] == target1) and np.all(pred[1] == target2)


def test_resolve_enclosing_bbox():
    assert geometry.resolve_enclosing_bbox([((0, 0.5), (1, 0)), ((0.5, 0), (1, 0.25))]) == ((0, 0), (1, 0.5))
    pred = geometry.resolve_enclosing_bbox(np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]]))
    assert pred.all() == np.array([0.1, 0.1, 0.2, 0.2]).all()


def test_resolve_enclosing_rbbox():
    box1 = np.asarray([[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]])
    box2 = np.asarray([[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]])

    pred = geometry.resolve_enclosing_rbbox([box1, box2])
    expected_raw = np.asarray([[0.05, 0.15], [0.1, 0.1], [0.6, 0.6], [0.55, 0.65]])
    target = geometry.order_points(expected_raw)
    assert np.allclose(pred, target, atol=1e-3)


def test_remap_boxes():
    pred = geometry.remap_boxes(
        np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 20)
    )
    target = np.asarray([[[0.375, 0.375], [0.375, 0.625], [0.625, 0.375], [0.625, 0.625]]])
    assert np.all(pred == target)

    pred = geometry.remap_boxes(
        np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 10)
    )
    target = np.asarray([[[0.25, 0.375], [0.25, 0.625], [0.75, 0.375], [0.75, 0.625]]])
    assert np.all(pred == target)

    with pytest.raises(ValueError):
        geometry.remap_boxes(
            np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40, 150), (160, 40)
        )

    with pytest.raises(ValueError):
        geometry.remap_boxes(np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40), (160,))

    orig_dimension = (100, 100)
    dest_dimensions = (200, 100)
    # Unpack dimensions
    height_o, width_o = orig_dimension
    height_d, width_d = dest_dimensions

    orig_box = np.asarray([[[0.25, 0.25], [0.25, 0.25], [0.75, 0.75], [0.75, 0.75]]])

    pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions)

    # Switch to absolute coords
    orig = np.stack((orig_box[:, :, 0] * width_o, orig_box[:, :, 1] * height_o), axis=2)[0]
    dest = np.stack((pred[:, :, 0] * width_d, pred[:, :, 1] * height_d), axis=2)[0]

    len_orig = hypot(orig[0][0] - orig[2][0], orig[0][1] - orig[2][1])
    len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1])
    assert len_orig == len_dest

    alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1]) / (orig[0][0] - orig[2][0])))
    alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1]) / (dest[0][0] - dest[2][0])))
    assert alpha_orig == alpha_dest


def test_rotate_boxes():
    boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]])
    rboxes = np.array([[0.1, 0.1], [0.8, 0.1], [0.8, 0.3], [0.1, 0.3]])
    # Angle = 0
    rotated = geometry.rotate_boxes(boxes, angle=0.0, orig_shape=(1, 1))
    assert np.all(rotated == rboxes)
    # Angle < 1:
    rotated = geometry.rotate_boxes(boxes, angle=0.5, orig_shape=(1, 1))
    assert np.all(rotated == rboxes)
    # Angle = 30
    rotated = geometry.rotate_boxes(boxes, angle=30, orig_shape=(1, 1))
    assert rotated.shape == (1, 4, 2)

    boxes = np.array([[0.0, 0.0, 0.6, 0.2, 0.5]])
    # Angle = -90:
    rotated = geometry.rotate_boxes(boxes, angle=-90, orig_shape=(1, 1), min_angle=0)
    assert np.allclose(rotated, np.array([[[1, 0.0], [1, 0.6], [0.8, 0.6], [0.8, 0.0]]]))
    # Angle = 90
    rotated = geometry.rotate_boxes(boxes, angle=+90, orig_shape=(1, 1), min_angle=0)
    assert np.allclose(rotated, np.array([[[0, 1.0], [0, 0.4], [0.2, 0.4], [0.2, 1.0]]]))


@pytest.fixture
def sample_geoms():
    return np.array([
        [[10, 10], [20, 10], [20, 20], [10, 20]],
        [
            [
                30,
                30,
            ],
            [40, 30],
            [40, 40],
            [30, 40],
        ],
    ])


def test_rotate_abs_geoms(sample_geoms):
    img_shape = (100, 100)
    angle = 45.0
    expanded_polys = geometry.rotate_abs_geoms(sample_geoms, angle, img_shape)

    # Check if the output has the correct shape
    assert expanded_polys.shape == sample_geoms.shape


def test_rotate_image():
    img = np.ones((32, 64, 3), dtype=np.float32)
    rotated = geometry.rotate_image(img, 30.0)
    assert rotated.shape[:-1] == (32, 64)
    assert rotated[0, 0, 0] == 0
    assert rotated[0, :, 0].sum() > 1

    # Expand
    rotated = geometry.rotate_image(img, 30.0, expand=True)
    assert rotated.shape[:-1] == (60, 120)
    assert rotated[0, :, 0].sum() <= 1

    # Expand
    rotated = geometry.rotate_image(img, 30.0, expand=True, preserve_origin_shape=True)
    assert rotated.shape[:-1] == (32, 64)
    assert rotated[0, :, 0].sum() <= 1

    # Expand with 90° rotation
    rotated = geometry.rotate_image(img, 90.0, expand=True)
    assert rotated.shape[:-1] == (64, 128)
    assert rotated[0, :, 0].sum() <= 1


def test_remove_image_padding():
    img = np.ones((32, 64, 3), dtype=np.float32)
    padded = np.pad(img, ((10, 10), (20, 20), (0, 0)))
    cropped = geometry.remove_image_padding(padded)
    assert np.all(cropped == img)

    # No padding
    cropped = geometry.remove_image_padding(img)
    assert np.all(cropped == img)


@pytest.mark.parametrize(
    "abs_geoms, img_size, rel_geoms",
    [
        # Full image (boxes)
        [np.array([[0, 0, 32, 32]]), (32, 32), np.array([[0, 0, 1, 1]], dtype=np.float32)],
        # Full image (polygons)
        [
            np.array([[[0, 0], [32, 0], [32, 32], [0, 32]]]),
            (32, 32),
            np.array([[[0, 0], [1, 0], [1, 1], [0, 1]]], dtype=np.float32),
        ],
        # Quarter image (boxes)
        [np.array([[0, 0, 16, 16]]), (32, 32), np.array([[0, 0, 0.5, 0.5]], dtype=np.float32)],
        # Quarter image (polygons)
        [
            np.array([[[0, 0], [16, 0], [16, 16], [0, 16]]]),
            (32, 32),
            np.array([[[0, 0], [0.5, 0], [0.5, 0.5], [0, 0.5]]], dtype=np.float32),
        ],
    ],
)
def test_convert_to_relative_coords(abs_geoms, img_size, rel_geoms):
    assert np.all(geometry.convert_to_relative_coords(abs_geoms, img_size) == rel_geoms)

    # Wrong format
    with pytest.raises(ValueError):
        geometry.convert_to_relative_coords(np.zeros((3, 5)), (32, 32))


def test_estimate_page_angle():
    straight_polys = np.array([
        [[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4]],
        [[0.4, 0.4], [0.5, 0.4], [0.5, 0.5], [0.4, 0.5]],
        [[0.5, 0.5], [0.6, 0.5], [0.6, 0.6], [0.5, 0.6]],
    ])
    rotated_polys = geometry.rotate_boxes(straight_polys, angle=20, orig_shape=(512, 512))
    angle = geometry.estimate_page_angle(rotated_polys)
    assert np.isclose(angle, 20)
    # Test divide by zero / NaN
    invalid_poly = np.array([[[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]])
    angle = geometry.estimate_page_angle(invalid_poly)
    assert angle == 0.0


def test_extract_crops(mock_pdf):
    doc_img = DocumentFile.from_pdf(mock_pdf)[0]
    num_crops = 2
    rel_boxes = np.array(
        [[idx / num_crops, idx / num_crops, (idx + 1) / num_crops, (idx + 1) / num_crops] for idx in range(num_crops)],
        dtype=np.float32,
    )
    abs_boxes = np.array(
        [
            [
                int(idx * doc_img.shape[1] / num_crops),
                int(idx * doc_img.shape[0]) / num_crops,
                int((idx + 1) * doc_img.shape[1] / num_crops),
                int((idx + 1) * doc_img.shape[0] / num_crops),
            ]
            for idx in range(num_crops)
        ],
        dtype=np.float32,
    )

    with pytest.raises(AssertionError):
        geometry.extract_crops(doc_img, np.zeros((1, 5)))

    for boxes in (rel_boxes, abs_boxes):
        croped_imgs = geometry.extract_crops(doc_img, boxes)
        # Number of crops
        assert len(croped_imgs) == num_crops
        # Data type and shape
        assert all(isinstance(crop, np.ndarray) for crop in croped_imgs)
        assert all(crop.ndim == 3 for crop in croped_imgs)

    # Identity
    assert np.all(
        doc_img == geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=True)[0]
    )
    torch_img = np.transpose(doc_img, axes=(-1, 0, 1))
    assert np.all(
        torch_img
        == np.transpose(
            geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=False)[0],
            axes=(-1, 0, 1),
        )
    )

    # No box
    assert geometry.extract_crops(doc_img, np.zeros((0, 4))) == []


@pytest.mark.parametrize("assume_horizontal", [True, False])
def test_extract_rcrops(mock_pdf, assume_horizontal):
    doc_img = DocumentFile.from_pdf(mock_pdf)[0]
    num_crops = 2
    rel_boxes = np.array(
        [
            [
                [idx / num_crops, idx / num_crops],
                [idx / num_crops + 0.1, idx / num_crops],
                [idx / num_crops + 0.1, idx / num_crops + 0.1],
                [idx / num_crops, idx / num_crops],
            ]
            for idx in range(num_crops)
        ],
        dtype=np.float32,
    )
    abs_boxes = deepcopy(rel_boxes)
    abs_boxes[:, :, 0] *= doc_img.shape[1]
    abs_boxes[:, :, 1] *= doc_img.shape[0]
    abs_boxes = abs_boxes.astype(np.int64)

    with pytest.raises(AssertionError):
        geometry.extract_rcrops(doc_img, np.zeros((1, 8)), assume_horizontal=assume_horizontal)
    for boxes in (rel_boxes, abs_boxes):
        croped_imgs = geometry.extract_rcrops(doc_img, boxes, assume_horizontal=assume_horizontal)
        # Number of crops
        assert len(croped_imgs) == num_crops
        # Data type and shape
        assert all(isinstance(crop, np.ndarray) for crop in croped_imgs)
        assert all(crop.ndim == 3 for crop in croped_imgs)

    # No box
    assert geometry.extract_rcrops(doc_img, np.zeros((0, 4, 2)), assume_horizontal=assume_horizontal) == []


@pytest.mark.parametrize(
    "format,input_shape,expected_shape",
    [
        ("BCHW", (32, 3, 64, 64), (32, 3, 64, 64)),
        ("BCHW", (32, 64, 64, 3), (32, 3, 64, 64)),
        ("BHWC", (32, 64, 64, 3), (32, 64, 64, 3)),
        ("BHWC", (32, 3, 64, 64), (32, 64, 64, 3)),
        ("XYZ", (32, 3, 64, 64), (32, 3, 64, 64)),
        ("CHW", (3, 64, 64), (3, 64, 64)),
        ("CHW", (64, 64, 3), (3, 64, 64)),
        ("HWC", (64, 64, 3), (64, 64, 3)),
        ("HWC", (3, 64, 64), (64, 64, 3)),
    ],
)
def test_shape_translate(format, input_shape, expected_shape):
    sample_data = np.random.rand(*input_shape).astype(np.float32)
    output_data = geometry.shape_translate(sample_data, format)

    # Assert that the output data has the expected shape
    assert output_data.shape == expected_shape


================================================
FILE: tests/common/test_utils_multithreading.py
================================================
import os
from multiprocessing.pool import ThreadPool
from unittest.mock import patch

import pytest

from onnxtr.utils.multithreading import multithread_exec


@pytest.mark.parametrize(
    "input_seq, func, output_seq",
    [
        [[1, 2, 3], lambda x: 2 * x, [2, 4, 6]],
        [[1, 2, 3], lambda x: x**2, [1, 4, 9]],
        [
            ["this is", "show me", "I know"],
            lambda x: x + " the way",
            ["this is the way", "show me the way", "I know the way"],
        ],
    ],
)
def test_multithread_exec(input_seq, func, output_seq):
    assert list(multithread_exec(func, input_seq)) == output_seq
    assert list(multithread_exec(func, input_seq, 0)) == output_seq


@patch.dict(os.environ, {"ONNXTR_MULTIPROCESSING_DISABLE": "TRUE"}, clear=True)
def test_multithread_exec_multiprocessing_disable():
    with patch.object(ThreadPool, "map") as mock_tp_map:
        multithread_exec(lambda x: x, [1, 2])
    assert not mock_tp_map.called


================================================
FILE: tests/common/test_utils_reconstitution.py
================================================
import numpy as np
from test_io_elements import _mock_pages

from onnxtr.utils import reconstitution


def test_synthesize_page():
    pages = _mock_pages()
    # Test without probability rendering
    render_no_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=False)
    assert isinstance(render_no_proba, np.ndarray)
    assert render_no_proba.shape == (*pages[0].dimensions, 3)

    # Test with probability rendering
    render_with_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=True)
    assert isinstance(render_with_proba, np.ndarray)
    assert render_with_proba.shape == (*pages[0].dimensions, 3)

    # Test with only one line
    pages_one_line = pages[0].export()
    pages_one_line["blocks"][0]["lines"] = [pages_one_line["blocks"][0]["lines"][0]]
    render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True)
    assert isinstance(render_one_line, np.ndarray)
    assert render_one_line.shape == (*pages[0].dimensions, 3)

    # Test with polygons
    pages_poly = pages[0].export()
    pages_poly["blocks"][0]["lines"][0]["geometry"] = [(0, 0), (0, 1), (1, 1), (1, 0)]
    render_poly = reconstitution.synthesize_page(pages_poly, draw_proba=True)
    assert isinstance(render_poly, np.ndarray)
    assert render_poly.shape == (*pages[0].dimensions, 3)


================================================
FILE: tests/common/test_utils_visualization.py
================================================
import numpy as np
import pytest
from test_io_elements import _mock_pages

from onnxtr.utils import visualization


def test_visualize_page():
    pages = _mock_pages()
    image = np.ones((300, 200, 3))
    visualization.visualize_page(pages[0].export(), image, words_only=False)
    visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False)
    visualization.visualize_page(
        pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True
    )
    # geometry checks
    with pytest.raises(ValueError):
        visualization.create_obj_patch([1, 2], (100, 100))

    with pytest.raises(ValueError):
        visualization.create_obj_patch((1, 2), (100, 100))

    with pytest.raises(ValueError):
        visualization.create_obj_patch((1, 2, 3, 4, 5), (100, 100))
    # polygon patch
    pages = _mock_pages(polygons=True)
    image = np.ones((300, 200, 3))
    visualization.visualize_page(pages[0].export(), image, words_only=False)
    visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False)
    visualization.visualize_page(
        pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True
    )


def test_draw_boxes():
    image = np.ones((256, 256, 3), dtype=np.float32)
    boxes = [
        [0.1, 0.1, 0.2, 0.2],
        [0.15, 0.15, 0.19, 0.2],  # to suppress
        [0.5, 0.5, 0.6, 0.55],
        [0.55, 0.5, 0.7, 0.55],  # to suppress
    ]
    visualization.draw_boxes(boxes=np.array(boxes), image=image, block=False)


================================================
FILE: tests/common/test_utils_vocabs.py
================================================
from collections import Counter

from onnxtr.utils import VOCABS


def test_vocabs_duplicates():
    for key, vocab in VOCABS.items():
        assert isinstance(vocab, str)

        duplicates = [char for char, count in Counter(vocab).items() if count > 1]
        assert not duplicates, f"Duplicate characters in {key} vocab: {duplicates}"


================================================
FILE: tests/conftest.py
================================================
from io import BytesIO

import cv2
import pytest
import requests
from PIL import Image, ImageDraw

from onnxtr.io import reader
from onnxtr.utils import geometry
from onnxtr.utils.fonts import get_font


def synthesize_text_img(
    text: str,
    font_size: int = 32,
    font_family=None,
    background_color=None,
    text_color=None,
) -> Image.Image:
    background_color = (0, 0, 0) if background_color is None else background_color
    text_color = (255, 255, 255) if text_color is None else text_color

    font = get_font(font_family, font_size)
    left, top, right, bottom = font.getbbox(text)
    text_w, text_h = right - left, bottom - top
    h, w = int(round(1.3 * text_h)), int(round(1.1 * text_w))
    # If single letter, make the image square, otherwise expand to meet the text size
    img_size = (h, w) if len(text) > 1 else (max(h, w), max(h, w))

    img = Image.new("RGB", img_size[::-1], color=background_color)
    d = ImageDraw.Draw(img)

    # Offset so that the text is centered
    text_pos = (int(round((img_size[1] - text_w) / 2)), int(round((img_size[0] - text_h) / 2)))
    # Draw the text
    d.text(text_pos, text, font=font, fill=text_color)
    return img


@pytest.fixture(scope="session")
def mock_vocab():
    return "3K}7eé;5àÎYho]QwV6qU~W\"XnbBvcADfËmy.9ÔpÛ*{CôïE%M4#ÈR:g@T$x?0î£|za1ù8,OG€P-kçHëÀÂ2É/ûIJ'j(LNÙFut[)èZs+&°Sd=Ï!<â_Ç>rêi`l"  # noqa


@pytest.fixture(scope="session")
def mock_pdf(tmpdir_factory):
    # Page 1
    text_img = synthesize_text_img("I am a jedi!", background_color=(255, 255, 255), text_color=(0, 0, 0))
    page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255))
    page.paste(text_img, (50, 100))

    # Page 2
    text_img = synthesize_text_img("No, I am your father.", background_color=(255, 255, 255), text_color=(0, 0, 0))
    _page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255))
    _page.paste(text_img, (40, 300))

    # Save the PDF
    fn = tmpdir_factory.mktemp("data").join("mock_pdf_file.pdf")
    page.save(str(fn), "PDF", save_all=True, append_images=[_page])

    return str(fn)


@pytest.fixture(scope="session")
def mock_payslip(tmpdir_factory):
    url = "https://3.bp.blogspot.com/-Es0oHTCrVEk/UnYA-iW9rYI/AAAAAAAAAFI/hWExrXFbo9U/s1600/003.jpg"
    file = BytesIO(requests.get(url).content)
    folder = tmpdir_factory.mktemp("data")
    fn = str(folder.join("mock_payslip.jpeg"))
    with open(fn, "wb") as f:
        f.write(file.getbuffer())
    return fn


@pytest.fixture(scope="session")
def mock_tilted_payslip(mock_payslip, tmpdir_factory):
    image = reader.read_img_as_numpy(mock_payslip)
    image = geometry.rotate_image(image, 30, expand=True)
    tmp_path = str(tmpdir_factory.mktemp("data").join("mock_tilted_payslip.jpg"))
    cv2.imwrite(tmp_path, image)
    return tmp_path


@pytest.fixture(scope="session")
def mock_text_box_stream():
    url = "https://doctr-static.mindee.com/models?id=v0.5.1/word-crop.png&src=0"
    return requests.get(url).content


@pytest.fixture(scope="session")
def mock_text_box(mock_text_box_stream, tmpdir_factory):
    file = BytesIO(mock_text_box_stream)
    fn = tmpdir_factory.mktemp("data").join("mock_text_box_file.png")
    with open(fn, "wb") as f:
        f.write(file.getbuffer())
    return str(fn)


@pytest.fixture(scope="session")
def mock_artefact_image_stream():
    url = "https://github.com/mindee/doctr/releases/download/v0.8.1/artefact_dummy.jpg"
    return requests.get(url).content