Repository: felixdittrich92/OnnxTR
Branch: main
Commit: b10318c76097
Files: 126
Total size: 480.5 KB
Directory structure:
gitextract_7yglu2_f/
├── .conda/
│ └── meta.yaml
├── .github/
│ ├── CODEOWNERS
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yml
│ │ ├── config.yml
│ │ └── feature_request.yml
│ ├── dependabot.yml
│ ├── release.yml
│ └── workflows/
│ ├── builds.yml
│ ├── clear_caches.yml
│ ├── demo.yml
│ ├── docker.yml
│ ├── main.yml
│ ├── publish.yml
│ └── style.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.md
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── demo/
│ ├── README.md
│ ├── app.py
│ ├── packages.txt
│ └── requirements.txt
├── onnxtr/
│ ├── __init__.py
│ ├── contrib/
│ │ ├── __init__.py
│ │ ├── artefacts.py
│ │ └── base.py
│ ├── file_utils.py
│ ├── io/
│ │ ├── __init__.py
│ │ ├── elements.py
│ │ ├── html.py
│ │ ├── image.py
│ │ ├── pdf.py
│ │ └── reader.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── _utils.py
│ │ ├── builder.py
│ │ ├── classification/
│ │ │ ├── __init__.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ └── mobilenet.py
│ │ │ ├── predictor/
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ └── zoo.py
│ │ ├── detection/
│ │ │ ├── __init__.py
│ │ │ ├── _utils/
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ ├── core.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── differentiable_binarization.py
│ │ │ │ ├── fast.py
│ │ │ │ └── linknet.py
│ │ │ ├── postprocessor/
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ ├── predictor/
│ │ │ │ ├── __init__.py
│ │ │ │ └── base.py
│ │ │ └── zoo.py
│ │ ├── engine.py
│ │ ├── factory/
│ │ │ ├── __init__.py
│ │ │ └── hub.py
│ │ ├── predictor/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ └── predictor.py
│ │ ├── preprocessor/
│ │ │ ├── __init__.py
│ │ │ └── base.py
│ │ ├── recognition/
│ │ │ ├── __init__.py
│ │ │ ├── core.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── crnn.py
│ │ │ │ ├── master.py
│ │ │ │ ├── parseq.py
│ │ │ │ ├── sar.py
│ │ │ │ ├── viptr.py
│ │ │ │ └── vitstr.py
│ │ │ ├── predictor/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── _utils.py
│ │ │ │ └── base.py
│ │ │ ├── utils.py
│ │ │ └── zoo.py
│ │ └── zoo.py
│ ├── py.typed
│ ├── transforms/
│ │ ├── __init__.py
│ │ └── base.py
│ └── utils/
│ ├── __init__.py
│ ├── common_types.py
│ ├── data.py
│ ├── fonts.py
│ ├── geometry.py
│ ├── multithreading.py
│ ├── reconstitution.py
│ ├── repr.py
│ ├── visualization.py
│ └── vocabs.py
├── pyproject.toml
├── scripts/
│ ├── convert_to_float16.py
│ ├── evaluate.py
│ ├── latency.py
│ └── quantize.py
├── setup.py
└── tests/
├── common/
│ ├── test_contrib.py
│ ├── test_core.py
│ ├── test_engine_cfg.py
│ ├── test_headers.py
│ ├── test_io.py
│ ├── test_io_elements.py
│ ├── test_models.py
│ ├── test_models_builder.py
│ ├── test_models_classification.py
│ ├── test_models_detection.py
│ ├── test_models_detection_utils.py
│ ├── test_models_factory.py
│ ├── test_models_preprocessor.py
│ ├── test_models_recognition.py
│ ├── test_models_recognition_utils.py
│ ├── test_models_zoo.py
│ ├── test_transforms.py
│ ├── test_utils_data.py
│ ├── test_utils_fonts.py
│ ├── test_utils_geometry.py
│ ├── test_utils_multithreading.py
│ ├── test_utils_reconstitution.py
│ ├── test_utils_visualization.py
│ └── test_utils_vocabs.py
└── conftest.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .conda/meta.yaml
================================================
{% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %}
{% set project = pyproject.get('project') %}
{% set urls = pyproject.get('project', {}).get('urls') %}
{% set version = environ.get('BUILD_VERSION', '0.8.2a0') %}
package:
name: onnxtr
version: {{ version }}
source:
fn: onnxtr-{{ version }}.tar.gz
url: ../dist/onnxtr-{{ version }}.tar.gz
build:
script: python setup.py install --single-version-externally-managed --record=record.txt
requirements:
host:
- python>=3.10, <3.12
- setuptools
run:
- numpy >=1.16.0, <3.0.0
- scipy >=1.4.0, <2.0.0
- pillow >=9.2.0
- opencv >=4.5.0, <5.0.0
- pypdfium2-team::pypdfium2_helpers >=4.11.0, <5.0.0
- pyclipper >=1.2.0, <2.0.0
- langdetect >=1.0.9, <2.0.0
- rapidfuzz >=3.0.0, <4.0.0
- huggingface_hub >=0.20.0, <1.0.0
- defusedxml >=0.7.0
- anyascii >=0.3.2
- tqdm >=4.30.0
test:
requires:
- pip
- onnxruntime
imports:
- onnxtr
about:
home: {{ urls.get('repository') }}
license: Apache-2.0
license_file: {{ project.get('license', {}).get('file') }}
summary: {{ project.get('description') | replace(":", " -")}}
dev_url: {{ urls.get('repository') }}
================================================
FILE: .github/CODEOWNERS
================================================
* @felixdittrich92
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: felixdittrich92
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
polar: # Replace with a single Polar username
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
thanks_dev: # Replace with a single thanks.dev username
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yml
================================================
name: 🐛 Bug report
description: Create a report to help us improve the library
labels: 'type: bug'
body:
- type: markdown
attributes:
value: >
#### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/felixdittrich92/onnxtr/issues).
- type: textarea
attributes:
label: Bug description
description: |
A clear and concise description of what the bug is.
Please explain the result you observed and the behavior you were expecting.
placeholder: |
A clear and concise description of what the bug is.
validations:
required: true
- type: textarea
attributes:
label: Code snippet to reproduce the bug
description: |
Sample code to reproduce the problem.
Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability.
placeholder: |
```python
Sample code to reproduce the problem
```
validations:
required: true
- type: textarea
attributes:
label: Error traceback
description: |
The error message you received running the code snippet, with the full traceback.
Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability.
placeholder: |
```
The error message you got, with the full traceback.
```
validations:
required: true
- type: textarea
attributes:
label: Environment
description: |
Please describe your environment:
OS:
Python version:
Library version:
Onnxruntime version:
validations:
required: true
- type: markdown
attributes:
value: >
Thanks for helping us improve the library!
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: true
contact_links:
- name: Usage questions
url: https://github.com/felixdittrich92/OnnxTR/discussions
about: Ask questions and discuss with other OnnxTR community members
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yml
================================================
name: 🚀 Feature request
description: >
Submit a proposal/request for a new feature for OnnxTR. Please search for existing issues before creating a new one.
For non-onnx related features please use the [main repository](https://github.com/mindee/doctr/issues).
labels: 'type: enhancement'
body:
- type: textarea
attributes:
label: 🚀 The feature
description: >
A clear and concise description of the feature proposal
validations:
required: true
- type: textarea
attributes:
label: Additional context
description: >
Add any other context or screenshots about the feature request.
- type: markdown
attributes:
value: >
Thanks for contributing 🎉
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
open-pull-requests-limit: 10
target-branch: "main"
labels: ["topic: build"]
schedule:
interval: weekly
day: sunday
- package-ecosystem: "github-actions"
directory: "/"
open-pull-requests-limit: 10
target-branch: "main"
labels: ["topic: CI/CD"]
schedule:
interval: weekly
day: sunday
groups:
github-actions:
patterns:
- "*"
================================================
FILE: .github/release.yml
================================================
changelog:
exclude:
labels:
- ignore-for-release
categories:
- title: Breaking Changes 🛠
labels:
- "type: breaking change"
# NEW FEATURES
- title: New Features
labels:
- "type: new feature"
# BUG FIXES
- title: Bug Fixes
labels:
- "type: bug"
# IMPROVEMENTS
- title: Improvements
labels:
- "type: enhancement"
# MISC
- title: Miscellaneous
labels:
- "type: misc"
================================================
FILE: .github/workflows/builds.yml
================================================
name: builds
on:
push:
branches: main
pull_request:
branches: main
schedule:
# Runs every 2 weeks on Monday at 03:00 UTC
- cron: '0 3 * * 1'
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
# MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865
python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }}
architecture: x64
- name: Cache python modules
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
- name: Install package
run: |
python -m pip install --upgrade pip
pip install -e .[cpu-headless,viz] --upgrade
- name: Import package
run: python -c "import onnxtr; print(onnxtr.__version__)"
conda:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: conda-incubator/setup-miniconda@v4
with:
auto-update-conda: true
python-version: "3.10"
channels: pypdfium2-team,bblanchon,defaults,conda-forge
channel-priority: strict
- name: Install dependencies
shell: bash -el {0}
run: conda install -y conda-build conda-verify anaconda-client
- name: Install libEGL
run: sudo apt-get update && sudo apt-get install -y libegl1
- name: Build and verify
shell: bash -el {0}
run: |
python setup.py sdist
mkdir conda-dist
conda build .conda/ --output-folder conda-dist
conda-verify conda-dist/linux-64/*conda --ignore=C1115
================================================
FILE: .github/workflows/clear_caches.yml
================================================
name: Clear GitHub runner caches
on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *' # Runs once a day
jobs:
clear:
name: Clear caches
runs-on: ubuntu-latest
steps:
- uses: MyAlbum/purge-cache@v2
with:
max-age: 172800 # Caches older than 2 days are deleted
================================================
FILE: .github/workflows/demo.yml
================================================
name: Sync Hugging Face demo
on:
# Run 'test-demo' on every pull request to the main branch
pull_request:
branches: [main]
# Run 'sync-to-hub' on push when tagging (e.g., 'v*') and on a scheduled cron job
push:
tags:
- 'v*'
schedule:
- cron: '0 2 10 * *' # At 02:00 on day-of-month 10 (every month)
# Allow manual triggering of the workflow
workflow_dispatch:
jobs:
# This job runs on every pull request to main
test-demo:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python: ["3.10"]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Cache python modules
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('demo/requirements.txt') }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r demo/requirements.txt --upgrade
- name: Start Gradio demo
run: |
nohup python demo/app.py &
sleep 10 # Allow some time for the Gradio server to start
- name: Check demo build
run: |
curl --fail http://127.0.0.1:7860/ || exit 1
# This job only runs when a new version tag is pushed or during the cron job
sync-to-hub:
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
needs: test-demo
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: Install huggingface_hub
run: pip install huggingface-hub
- name: Upload folder to Hugging Face
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
python -c "
from huggingface_hub import HfApi
api = HfApi(token='${{ secrets.HF_TOKEN }}')
repo_id = 'Felix92/OnnxTR-OCR'
api.upload_folder(repo_id=repo_id, repo_type='space', folder_path='demo/')
api.restart_space(repo_id=repo_id, factory_reboot=True)
"
================================================
FILE: .github/workflows/docker.yml
================================================
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
#
name: Docker image on ghcr.io
on:
push:
tags:
- 'v*'
pull_request:
branches: main
schedule:
- cron: '0 2 1 6 *' # At 02:00 on day-of-month 1 in June (once a year actually)
env:
REGISTRY: ghcr.io
jobs:
build-and-push-image:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
image:
- "ubuntu:24.04" # Base image for CPU variants
- "nvidia/cuda:12.6.2-base-ubuntu24.04" # Base image for GPU
variant:
- "cpu-headless" # CPU variant 1
- "openvino-headless" # CPU variant 2
- "gpu-headless" # GPU variant
python: [3.10.13]
# Exclude invalid combinations
exclude:
- image: "nvidia/cuda:12.6.2-base-ubuntu24.04"
variant: "cpu-headless"
- image: "nvidia/cuda:12.6.2-base-ubuntu24.04"
variant: "openvino-headless"
- image: "ubuntu:24.04"
variant: "gpu-headless"
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Log in to the Container registry
uses: docker/login-action@v4
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Sanitize docker tag
run: |
# Start with the base prefix
PREFIX_DOCKER_TAG="OnnxTR-${{ matrix.variant }}-py${{ matrix.python }}"
# Replace any commas with hyphens (if needed)
PREFIX_DOCKER_TAG=$(echo "$PREFIX_DOCKER_TAG" | sed 's/,/-/g')
# Determine suffix based on image
IMAGE="${{ matrix.image }}"
case "$IMAGE" in
"nvidia/cuda:"*)
SUFFIX=$(echo "$IMAGE" | sed -E 's|.*/cuda:([0-9]+\.[0-9]+\.[0-9]+)-base-(ubuntu[0-9]+\.[0-9]+)|-\2-cuda\1|')
;;
"ubuntu:"*)
SUFFIX=$(echo "$IMAGE" | sed -E 's|ubuntu:([0-9]+\.[0-9]+)|-ubuntu\1|')
;;
*)
SUFFIX=""
;;
esac
# Combine the prefix, suffix, and ensure ending hyphen
PREFIX_DOCKER_TAG="${PREFIX_DOCKER_TAG}${SUFFIX}-"
# Export to environment
echo "PREFIX_DOCKER_TAG=${PREFIX_DOCKER_TAG}" >> $GITHUB_ENV
# Debugging output
echo "Final Docker Tag: $PREFIX_DOCKER_TAG"
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v6
with:
images: ${{ env.REGISTRY }}/${{ github.repository }}
tags: |
# used only on schedule event
type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ env.PREFIX_DOCKER_TAG }}
# used only if a tag following semver is published
type=semver,pattern={{raw}},prefix=${{ env.PREFIX_DOCKER_TAG }}
- name: Build Docker image
id: build
uses: docker/build-push-action@v7
with:
context: .
build-args: |
BASE_IMAGE=${{ matrix.image }}
SYSTEM=${{ matrix.variant }}
PYTHON_VERSION=${{ matrix.python }}
ONNXTR_REPO=${{ github.repository }}
ONNXTR_VERSION=${{ github.sha }}
push: false # push only if `import onnxtr` works
tags: ${{ steps.meta.outputs.tags }}
- name: Check if `import onnxtr` works
run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import onnxtr; print(onnxtr.__version__)'
- name: Push Docker image
if: ${{ (github.ref == 'refs/heads/main' && github.event_name != 'pull_request') || (startsWith(github.ref, 'refs/tags') && github.event_name == 'push') }}
uses: docker/build-push-action@v7
with:
context: .
build-args: |
BASE_IMAGE=${{ matrix.image }}
SYSTEM=${{ matrix.variant }}
PYTHON_VERSION=${{ matrix.python }}
ONNXTR_REPO=${{ github.repository }}
ONNXTR_VERSION=${{ github.sha }}
push: true
tags: ${{ steps.meta.outputs.tags }}
================================================
FILE: .github/workflows/main.yml
================================================
name: tests
on:
push:
branches: main
pull_request:
branches: main
schedule:
# Runs every 2 weeks on Monday at 03:00 UTC
- cron: '0 3 * * 1'
jobs:
pytest-common:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: ["3.10", "3.11", "3.12"]
backend: ["cpu-headless", "openvino-headless"]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Cache python modules
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[${{ matrix.backend }},viz,html,testing] --upgrade
- name: Run unittests
run: |
coverage run -m pytest tests/common/ -rs --memray
coverage xml -o coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml
- uses: actions/upload-artifact@v7
with:
name: coverage-common-${{ matrix.backend }}-${{ matrix.python }}
path: ./coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml
if-no-files-found: error
codecov-upload:
runs-on: ubuntu-latest
needs: [ pytest-common ]
steps:
- uses: actions/checkout@v6
- uses: actions/download-artifact@v8
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v6
with:
flags: unittests
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}
================================================
FILE: .github/workflows/publish.yml
================================================
name: publish
on:
release:
types: [published]
jobs:
pypi:
if: "!github.event.release.prerelease"
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python: ["3.10"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Cache python modules
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine --upgrade
- name: Get release tag
id: release_tag
run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
- name: Build and publish
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
VERSION: ${{ env.VERSION }}
run: |
BUILD_VERSION=$VERSION python setup.py sdist bdist_wheel
twine check dist/*
twine upload dist/*
pypi-check:
needs: pypi
if: "!github.event.release.prerelease"
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python: ["3.10"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Install package
run: |
python -m pip install --upgrade pip
pip install onnxtr[cpu] --upgrade
python -c "from importlib.metadata import version; print(version('onnxtr'))"
conda:
if: "!github.event.release.prerelease"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: conda-incubator/setup-miniconda@v4
with:
auto-update-conda: true
python-version: "3.10"
channels: pypdfium2-team,bblanchon,defaults,conda-forge
channel-priority: strict
- name: Install dependencies
shell: bash -el {0}
run: conda install -y conda-build conda-verify anaconda-client
- name: Install libEGL
run: sudo apt-get update && sudo apt-get install -y libegl1
- name: Get release tag
id: release_tag
run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
- name: Build and publish
shell: bash -el {0}
env:
ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_TOKEN }}
VERSION: ${{ env.VERSION }}
run: |
echo "BUILD_VERSION=${VERSION}" >> $GITHUB_ENV
python setup.py sdist
mkdir conda-dist
conda build .conda/ --output-folder conda-dist
conda-verify conda-dist/linux-64/*conda --ignore=C1115
anaconda upload conda-dist/linux-64/*conda
conda-check:
if: "!github.event.release.prerelease"
runs-on: ubuntu-latest
needs: conda
steps:
- uses: conda-incubator/setup-miniconda@v4
with:
auto-update-conda: true
python-version: "3.10"
- name: Install package
shell: bash -el {0}
run: |
conda config --set channel_priority strict
conda install -c conda-forge onnxruntime
conda install -c felix92 -c pypdfium2-team -c bblanchon -c defaults -c conda-forge onnxtr
python -c "from importlib.metadata import version; print(version('onnxtr'))"
================================================
FILE: .github/workflows/style.yml
================================================
name: style
on:
push:
branches: main
pull_request:
branches: main
jobs:
ruff:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: ["3.10"]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Run ruff
run: |
pip install ruff --upgrade
ruff --version
ruff check --diff .
mypy:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python: ["3.10"]
steps:
- uses: actions/checkout@v6
- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python }}
architecture: x64
- name: Cache python modules
uses: actions/cache@v5
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[dev] --upgrade
pip install mypy --upgrade
- name: Run mypy
run: |
mypy --version
mypy
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Temp files
onnxtr/version.py
logs/
wandb/
.idea/
# Model files
*.onnx
.qodo
# Profile files
yappi_profile.stats
memray_profile.bin
memray_flamegraph.html
================================================
FILE: .pre-commit-config.yaml
================================================
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-ast
- id: check-yaml
exclude: .conda
- id: check-toml
- id: check-json
- id: check-added-large-files
exclude: docs/images/
- id: end-of-file-fixer
- id: trailing-whitespace
- id: debug-statements
- id: check-merge-conflict
- id: no-commit-to-branch
args: ['--branch', 'main']
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.0
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
contact@mindee.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
================================================
FILE: Dockerfile
================================================
ARG BASE_IMAGE
FROM ${BASE_IMAGE}
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ARG SYSTEM
ARG PYTHON_VERSION
RUN apt-get update && apt-get install -y --no-install-recommends \
# - Other packages
build-essential \
pkg-config \
curl \
wget \
software-properties-common \
unzip \
git \
# - Packages to build Python
tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \
# - Packages for docTR
libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
fi
# Install Python
RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
tar -zxf Python-$PYTHON_VERSION.tgz && \
cd Python-$PYTHON_VERSION && \
mkdir /opt/python/ && \
./configure --prefix=/opt/python && \
make && \
make install && \
cd .. && \
rm Python-$PYTHON_VERSION.tgz && \
rm -r Python-$PYTHON_VERSION
ENV PATH=/opt/python/bin:$PATH
# Install OnnxTR
ARG ONNXTR_REPO='felixdittrich92/onnxtr'
ARG ONNXTR_VERSION=main
RUN pip3 install -U pip setuptools wheel && \
pip3 install "onnxtr[$SYSTEM,html]@git+https://github.com/$ONNXTR_REPO.git@$ONNXTR_VERSION"
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: Makefile
================================================
.PHONY: quality style test docs-single-version docs
# this target runs checks on all files
quality:
ruff check .
mypy onnxtr/
# this target runs checks on all files and potentially modifies some of them
style:
ruff format .
ruff check --fix .
# Run tests for the library
test:
coverage run -m pytest tests/common/ -rs --memray
coverage report --fail-under=80 --show-missing
# Check that docs can build
docs-single-version:
sphinx-build docs/source docs/_build -a
# Check that docs can build
docs:
cd docs && bash build.sh
================================================
FILE: README.md
================================================
[](LICENSE)

[](https://codecov.io/gh/felixdittrich92/OnnxTR)
[](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
[](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr)
[](https://socket.dev/pypi/package/onnxtr/overview/0.8.1/tar-gz)
[](https://pypi.org/project/OnnxTR/)
[](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr)
[](https://huggingface.co/spaces/Felix92/OnnxTR-OCR)

> :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project.
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnx**
What you can expect from this repository:
- efficient ways to parse textual information (localize and identify each word) from your documents
- a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library - no PyTorch or TensorFlow dependencies
- more lightweight package with faster inference latency and less required resources
- 8-Bit quantized models for faster inference on CPU

## Installation
### Prerequisites
Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR.
### Latest release
You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows:
**NOTE:**
Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU), CoreML (Apple Silicon).
For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started).
- **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html).
```shell
# standard cpu support
pip install "onnxtr[cpu]"
pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless
# with gpu support
pip install "onnxtr[gpu]"
pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless
# OpenVINO cpu | gpu support for Intel CPUs | GPUs
pip install "onnxtr[openvino]"
pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless
# with HTML support
pip install "onnxtr[html]"
# with support for visualization
pip install "onnxtr[viz]"
# with support for all dependencies
pip install "onnxtr[html, gpu, viz]"
```
**Recommendation:**
If you have:
- a NVIDIA GPU, use one of the `gpu` variants
- an Intel CPU or GPU, use one of the `openvino` variants
- an Apple Silicon Mac, use one of the `cpu` variants (CoreML is auto-detected)
- otherwise, use one of the `cpu` variants
**OpenVINO:**
By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options).
### Reading files
Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet:
```python
from onnxtr.io import DocumentFile
# PDF
pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
# Image
single_img_doc = DocumentFile.from_images("path/to/your/img.jpg")
# Webpage (requires `weasyprint` to be installed)
webpage_doc = DocumentFile.from_url("https://www.yoursite.com")
# Multiple page images
multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"])
```
### Putting it together
Let's use the default `ocr_predictor` model for an example:
```python
from onnxtr.io import DocumentFile
from onnxtr.models import ocr_predictor, EngineConfig
model = ocr_predictor(
det_arch="fast_base", # detection architecture
reco_arch="vitstr_base", # recognition architecture
det_bs=2, # detection batch size
reco_bs=512, # recognition batch size
# Document related parameters
assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True)
straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False)
export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False)
# Preprocessing related parameters
preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True)
symmetric_pad=True, # set to `False` to disable symmetric padding (default: True)
# Additional parameters - meta information
detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False)
detect_language=False, # set to `True` if the language of the pages should be detected (default: False)
# Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True`
disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False)
disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False)
# DocumentBuilder specific parameters
resolve_lines=True, # whether words should be automatically grouped into lines (default: True)
resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False)
paragraph_break=0.035, # relative length of the minimum space separating paragraphs (default: 0.035)
# OnnxTR specific parameters
# NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy
load_in_8_bit=False, # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False)
# Advanced engine configuration options
det_engine_cfg=EngineConfig(), # detection model engine configuration (default: internal predefined configuration)
reco_engine_cfg=EngineConfig(), # recognition model engine configuration (default: internal predefined configuration)
clf_engine_cfg=EngineConfig(), # classification (orientation) model engine configuration (default: internal predefined configuration)
)
# PDF
doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
# Analyze
result = model(doc)
# Display the result (requires matplotlib & mplcursors to be installed)
result.show()
```

Or even rebuild the original document from its predictions:
```python
import matplotlib.pyplot as plt
synthetic_pages = result.synthesize()
plt.imshow(synthetic_pages[0])
plt.axis("off")
plt.show()
```

The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`).
To get a better understanding of the document model, check out [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure):
You can also export them as a nested dict, more appropriate for JSON format / render it or export as XML (hocr format):
```python
json_output = result.export() # nested dict
text_output = result.render() # human-readable text
xml_output = result.export_as_xml() # hocr format
for output in xml_output:
xml_bytes_string = output[0]
xml_element = output[1]
```
Advanced engine configuration options
You can also define advanced engine configurations for the models / predictors:
```python
from onnxruntime import SessionOptions
from onnxtr.models import ocr_predictor, EngineConfig
general_options = (
SessionOptions()
) # For configuartion options see: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions
general_options.enable_cpu_mem_arena = False
# NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error
# List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g.
# [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
providers = [
("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"})
] # For available providers see: https://onnxruntime.ai/docs/execution-providers/
engine_config = EngineConfig(session_options=general_options, providers=providers)
# We use the default predictor with the custom engine configuration
# NOTE: You can define differnt engine configurations for detection, recognition and classification depending on your needs
predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config)
```
You can also dynamically configure whether the memory arena should shrink:
```python
from random import random
from onnxruntime import RunOptions, SessionOptions
from onnxtr.models import ocr_predictor, EngineConfig
def arena_shrinkage_handler(run_options: RunOptions) -> RunOptions:
"""
Shrink the memory arena on 10% of inference runs.
"""
if random() < 0.1:
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0")
return run_options
engine_config = EngineConfig(run_options_provider=arena_shrinkage_handler)
engine_config.session_options.enable_mem_pattern = False
predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config)
```
## Loading custom exported models
You can also load docTR custom exported models:
For exporting please take a look at the [doctr documentation](https://mindee.github.io/doctr/using_doctr/using_model_export.html#export-to-onnx).
```python
from onnxtr.models import ocr_predictor, linknet_resnet18, parseq
reco_model = parseq("path_to_custom_model.onnx", vocab="ABC")
det_model = linknet_resnet18("path_to_custom_model.onnx")
model = ocr_predictor(det_arch=det_model, reco_arch=reco_model)
```
## Loading models from HuggingFace Hub
You can also load models from the HuggingFace Hub:
```python
from onnxtr.io import DocumentFile
from onnxtr.models import ocr_predictor, from_hub
img = DocumentFile.from_images([""])
# Load your model from the hub
model = from_hub("onnxtr/my-model")
# Pass it to the predictor
# If your model is a recognition model:
predictor = ocr_predictor(det_arch="db_mobilenet_v3_large", reco_arch=model)
# If your model is a detection model:
predictor = ocr_predictor(det_arch=model, reco_arch="crnn_mobilenet_v3_small")
# Get your predictions
res = predictor(img)
```
HF Hub search: [here](https://huggingface.co/models?search=onnxtr).
Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842)
Or push your own models to the hub:
```python
from onnxtr.models import parseq, push_to_hf_hub, login_to_hub
from onnxtr.utils.vocabs import VOCABS
# Login to the hub
login_to_hub()
# Recogniton model
model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"])
push_to_hf_hub(
model,
model_name="onnxtr-parseq-multilingual-v1",
task="recognition", # The task for which the model is intended [detection, recognition, classification]
arch="parseq", # The name of the model architecture
override=False, # Set to `True` if you want to override an existing model / repository
)
# Detection model
model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx")
push_to_hf_hub(model, model_name="onnxtr-linknet-resnet18", task="detection", arch="linknet_resnet18", override=True)
```
## Models architectures
Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models:
### Text Detection
- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf).
- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf)
- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf)
### Text Recognition
- CRNN: [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/pdf/1507.05717.pdf).
- SAR: [Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/pdf/1811.00751.pdf).
- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf).
- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf).
- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966).
- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110).
```python
predictor = ocr_predictor()
predictor.list_archs()
{
"detection archs": [
"db_resnet34",
"db_resnet50",
"db_mobilenet_v3_large",
"linknet_resnet18",
"linknet_resnet34",
"linknet_resnet50",
"fast_tiny", # No 8-bit support
"fast_small", # No 8-bit support
"fast_base", # No 8-bit support
],
"recognition archs": [
"crnn_vgg16_bn",
"crnn_mobilenet_v3_small",
"crnn_mobilenet_v3_large",
"sar_resnet31",
"master",
"vitstr_small",
"vitstr_base",
"parseqviptr_tiny", # No 8-bit support
],
}
```
### Documentation
This repository is in sync with the [doctr](https://github.com/mindee/doctr) library, which provides a high-level API to perform OCR on documents.
This repository stays up-to-date with the latest features and improvements from the base project.
So we can refer to the [doctr documentation](https://mindee.github.io/doctr/) for more detailed information.
NOTE:
- `pretrained` is the default in OnnxTR, and not available as a parameter.
- docTR specific environment variables (e.g.: DOCTR_CACHE_DIR -> ONNXTR_CACHE_DIR) needs to be replaced with `ONNXTR_` prefix.
### Benchmarks
The CPU benchmarks was measured on a `i7-14700K Intel CPU`.
The GPU benchmarks was measured on a `RTX 4080 Nvidia GPU`.
Benchmarking performed on the FUNSD dataset and CORD dataset.
docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition.
The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU.
- CPU benchmarks:
|Library |FUNSD (199 pages) |CORD (900 pages) |
|------------------------------------|-------------------------------|-------------------------------|
|docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page |
|**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** |
|**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** |
|**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** |
|EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page |
|**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page |
|Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page |
|PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page |
- GPU benchmarks:
|Library |FUNSD (199 pages) |CORD (900 pages) |
|-------------------------------------|-------------------------------|-------------------------------|
|docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page |
|**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** |
|OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page |
|**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** |
|EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page |
|Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page |
|**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** |
## Citation
If you wish to cite please refer to the base project citation, feel free to use this [BibTeX](http://www.bibtex.org/) reference:
```bibtex
@misc{doctr2021,
title={docTR: Document Text Recognition},
author={Mindee},
year={2021},
publisher = {GitHub},
howpublished = {\url{https://github.com/mindee/doctr}}
}
```
```bibtex
@misc{onnxtr2024,
title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx},
author={Felix Dittrich},
year={2024},
publisher = {GitHub},
howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}}
}
```
## License
Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information.
================================================
FILE: demo/README.md
================================================
---
title: OnnxTR OCR
emoji: 🔥
colorFrom: red
colorTo: purple
sdk: gradio
sdk_version: 5.34.2
app_file: app.py
pinned: false
license: apache-2.0
---
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
## Run the demo locally
```bash
cd demo
pip install -r requirements.txt
python3 app.py
```
================================================
FILE: demo/app.py
================================================
import io
import os
from typing import Any
# NOTE: This is a fix to run the demo on the HuggingFace Zero GPU or CPU spaces
if os.environ.get("SPACES_ZERO_GPU") is not None:
import spaces
else:
class spaces: # noqa: N801
@staticmethod
def GPU(func): # noqa: N802
def wrapper(*args, **kwargs):
return func(*args, **kwargs)
return wrapper
import cv2
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.figure import Figure
from PIL import Image
from onnxtr.io import DocumentFile
from onnxtr.models import EngineConfig, from_hub, ocr_predictor
from onnxtr.models.predictor import OCRPredictor
from onnxtr.utils.visualization import visualize_page
DET_ARCHS: list[str] = [
"fast_base",
"fast_small",
"fast_tiny",
"db_resnet50",
"db_resnet34",
"db_mobilenet_v3_large",
"linknet_resnet18",
"linknet_resnet34",
"linknet_resnet50",
]
RECO_ARCHS: list[str] = [
"crnn_vgg16_bn",
"crnn_mobilenet_v3_small",
"crnn_mobilenet_v3_large",
"master",
"sar_resnet31",
"vitstr_small",
"vitstr_base",
"parseq",
"viptr_tiny",
]
CUSTOM_RECO_ARCHS: list[str] = [
"Felix92/onnxtr-parseq-multilingual-v1",
]
def load_predictor(
det_arch: str,
reco_arch: str,
use_gpu: bool,
assume_straight_pages: bool,
straighten_pages: bool,
export_as_straight_boxes: bool,
detect_language: bool,
load_in_8_bit: bool,
bin_thresh: float,
box_thresh: float,
disable_crop_orientation: bool = False,
disable_page_orientation: bool = False,
) -> OCRPredictor:
"""Load a predictor from doctr.models
Args:
----
det_arch: detection architecture
reco_arch: recognition architecture
use_gpu: whether to use the GPU or not
assume_straight_pages: whether to assume straight pages or not
disable_crop_orientation: whether to disable crop orientation or not
disable_page_orientation: whether to disable page orientation or not
straighten_pages: whether to straighten rotated pages or not
export_as_straight_boxes: whether to export straight boxes
detect_language: whether to detect the language of the text
load_in_8_bit: whether to load the image in 8 bit mode
bin_thresh: binarization threshold for the segmentation map
box_thresh: minimal objectness score to consider a box
Returns:
-------
instance of OCRPredictor
"""
engine_cfg = (
EngineConfig()
if use_gpu
else EngineConfig(providers=[("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})])
)
predictor = ocr_predictor(
det_arch=det_arch,
reco_arch=reco_arch if reco_arch not in CUSTOM_RECO_ARCHS else from_hub(reco_arch),
assume_straight_pages=assume_straight_pages,
straighten_pages=straighten_pages,
detect_language=detect_language,
load_in_8_bit=load_in_8_bit,
export_as_straight_boxes=export_as_straight_boxes,
detect_orientation=not assume_straight_pages,
disable_crop_orientation=disable_crop_orientation,
disable_page_orientation=disable_page_orientation,
det_engine_cfg=engine_cfg,
reco_engine_cfg=engine_cfg,
clf_engine_cfg=engine_cfg,
)
predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh
predictor.det_predictor.model.postprocessor.box_thresh = box_thresh
return predictor
def forward_image(predictor: OCRPredictor, image: np.ndarray) -> np.ndarray:
"""Forward an image through the predictor
Args:
----
predictor: instance of OCRPredictor
image: image to process
Returns:
-------
segmentation map
"""
processed_batches = predictor.det_predictor.pre_processor([image])
out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
seg_map = out["out_map"]
return seg_map
def matplotlib_to_pil(fig: Figure | np.ndarray) -> Image.Image:
"""Convert a matplotlib figure to a PIL image
Args:
----
fig: matplotlib figure or numpy array
Returns:
-------
PIL image
"""
buf = io.BytesIO()
if isinstance(fig, Figure):
fig.savefig(buf)
else:
plt.imsave(buf, fig)
buf.seek(0)
return Image.open(buf)
@spaces.GPU
def analyze_page(
uploaded_file: Any,
page_idx: int,
det_arch: str,
reco_arch: str,
use_gpu: bool,
assume_straight_pages: bool,
disable_crop_orientation: bool,
disable_page_orientation: bool,
straighten_pages: bool,
export_as_straight_boxes: bool,
detect_language: bool,
load_in_8_bit: bool,
bin_thresh: float,
box_thresh: float,
):
"""Analyze a page
Args:
----
uploaded_file: file to analyze
page_idx: index of the page to analyze
det_arch: detection architecture
reco_arch: recognition architecture
use_gpu: whether to use the GPU or not
assume_straight_pages: whether to assume straight pages or not
disable_crop_orientation: whether to disable crop orientation or not
disable_page_orientation: whether to disable page orientation or not
straighten_pages: whether to straighten rotated pages or not
export_as_straight_boxes: whether to export straight boxes
detect_language: whether to detect the language of the text
load_in_8_bit: whether to load the image in 8 bit mode
bin_thresh: binarization threshold for the segmentation map
box_thresh: minimal objectness score to consider a box
Returns:
-------
input image, segmentation heatmap, output image, OCR output, synthesized page
"""
if uploaded_file is None:
return None, "Please upload a document", None, None, None
if uploaded_file.name.endswith(".pdf"):
doc = DocumentFile.from_pdf(uploaded_file)
else:
doc = DocumentFile.from_images(uploaded_file)
try:
page = doc[page_idx - 1]
except IndexError:
page = doc[-1]
img = page
predictor = load_predictor(
det_arch=det_arch,
reco_arch=reco_arch,
use_gpu=use_gpu,
assume_straight_pages=assume_straight_pages,
straighten_pages=straighten_pages,
export_as_straight_boxes=export_as_straight_boxes,
detect_language=detect_language,
load_in_8_bit=load_in_8_bit,
bin_thresh=bin_thresh,
box_thresh=box_thresh,
disable_crop_orientation=disable_crop_orientation,
disable_page_orientation=disable_page_orientation,
)
seg_map = forward_image(predictor, page)
seg_map = np.squeeze(seg_map)
seg_map = cv2.resize(seg_map, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LINEAR)
seg_heatmap = matplotlib_to_pil(seg_map)
out = predictor([page])
page_export = out.pages[0].export()
fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False)
out_img = matplotlib_to_pil(fig)
if assume_straight_pages or (not assume_straight_pages and straighten_pages):
synthesized_page = out.pages[0].synthesize()
else:
synthesized_page = None
return img, seg_heatmap, out_img, page_export, synthesized_page
with gr.Blocks(fill_height=True) as demo:
gr.HTML(
"""
To use this interactive demo for OnnxTR:
1. Upload a document (PDF, JPG, or PNG)
2. Select the model architectures for text detection and recognition you want to use
3. Press the "Analyze page" button to process the uploaded document
"""
)
with gr.Row():
with gr.Column(scale=1):
upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=[".pdf", ".jpg", ".png"])
page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection")
det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model")
reco_model = gr.Dropdown(
choices=RECO_ARCHS + CUSTOM_RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model"
)
use_gpu = gr.Checkbox(value=True, label="Use GPU")
assume_straight = gr.Checkbox(value=True, label="Assume straight pages")
disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation")
disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation")
straighten = gr.Checkbox(value=False, label="Straighten pages")
export_as_straight_boxes = gr.Checkbox(value=False, label="Export as straight boxes")
det_language = gr.Checkbox(value=False, label="Detect language")
load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models")
binarization_threshold = gr.Slider(
minimum=0.1, maximum=0.9, value=0.3, step=0.1, label="Binarization threshold"
)
box_threshold = gr.Slider(minimum=0.1, maximum=0.9, value=0.1, step=0.1, label="Box threshold")
analyze_button = gr.Button("Analyze page")
with gr.Column(scale=3):
with gr.Row():
input_image = gr.Image(label="Input page", width=700, height=500)
segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=700, height=500)
output_image = gr.Image(label="Output page", width=700, height=500)
with gr.Row():
with gr.Column(scale=3):
ocr_output = gr.JSON(label="OCR output", render=True, scale=1, height=500)
with gr.Column(scale=3):
synthesized_page = gr.Image(label="Synthesized page", width=700, height=500)
analyze_button.click(
analyze_page,
inputs=[
upload,
page_selection,
det_model,
reco_model,
use_gpu,
assume_straight,
disable_crop_orientation,
disable_page_orientation,
straighten,
export_as_straight_boxes,
det_language,
load_in_8_bit,
binarization_threshold,
box_threshold,
],
outputs=[input_image, segmentation_heatmap, output_image, ocr_output, synthesized_page],
)
demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"])
================================================
FILE: demo/packages.txt
================================================
python3-opencv
fonts-freefont-ttf
================================================
FILE: demo/requirements.txt
================================================
-e "onnxtr[gpu-headless,viz] @ git+https://github.com/felixdittrich92/OnnxTR.git"
gradio>=5.30.0,<7.0.0
spaces>=0.37.0
# Quick fix to avoid HuggingFace Spaces cudnn9.x Cuda12.x issue
# NOTE: outdated
# onnxruntime-gpu==1.19.0
================================================
FILE: onnxtr/__init__.py
================================================
from . import io, models, contrib, transforms, utils
from .version import __version__ # noqa: F401
================================================
FILE: onnxtr/contrib/__init__.py
================================================
from .artefacts import ArtefactDetector
================================================
FILE: onnxtr/contrib/artefacts.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import cv2
import numpy as np
from onnxtr.file_utils import requires_package
from .base import _BasePredictor
__all__ = ["ArtefactDetector"]
default_cfgs: dict[str, dict[str, Any]] = {
"yolov8_artefact": {
"input_shape": (3, 1024, 1024),
"labels": ["bar_code", "qr_code", "logo", "photo"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/yolo_artefact-f9d66f14.onnx",
},
}
class ArtefactDetector(_BasePredictor):
"""
A class to detect artefacts in images
>>> from onnxtr.io import DocumentFile
>>> from onnxtr.contrib.artefacts import ArtefactDetector
>>> doc = DocumentFile.from_images(["path/to/image.jpg"])
>>> detector = ArtefactDetector()
>>> results = detector(doc)
Args:
arch: the architecture to use
batch_size: the batch size to use
model_path: the path to the model to use
labels: the labels to use
input_shape: the input shape to use
mask_labels: the mask labels to use
conf_threshold: the confidence threshold to use
iou_threshold: the intersection over union threshold to use
**kwargs: additional arguments to be passed to `download_from_url`
"""
def __init__(
self,
arch: str = "yolov8_artefact",
batch_size: int = 2,
model_path: str | None = None,
labels: list[str] | None = None,
input_shape: tuple[int, int, int] | None = None,
conf_threshold: float = 0.5,
iou_threshold: float = 0.5,
**kwargs: Any,
) -> None:
super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs)
self.labels = labels or default_cfgs[arch]["labels"]
self.input_shape = input_shape or default_cfgs[arch]["input_shape"]
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
def preprocess(self, img: np.ndarray) -> np.ndarray:
return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0)
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]:
results = []
for batch in zip(output, input_images):
for out, img in zip(batch[0], batch[1]):
org_height, org_width = img.shape[:2]
width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1]
for res in out:
sample_results = []
for row in np.transpose(np.squeeze(res)):
classes_scores = row[4:]
max_score = np.amax(classes_scores)
if max_score >= self.conf_threshold:
class_id = np.argmax(classes_scores)
x, y, w, h = row[0], row[1], row[2], row[3]
# to rescaled xmin, ymin, xmax, ymax
xmin = int((x - w / 2) * width_scale)
ymin = int((y - h / 2) * height_scale)
xmax = int((x + w / 2) * width_scale)
ymax = int((y + h / 2) * height_scale)
sample_results.append({
"label": self.labels[class_id],
"confidence": float(max_score),
"box": [xmin, ymin, xmax, ymax],
})
# Filter out overlapping boxes
boxes = [res["box"] for res in sample_results]
scores = [res["confidence"] for res in sample_results]
keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type]
sample_results = [sample_results[i] for i in keep_indices]
results.append(sample_results)
self._results = results
return results
def show(self, **kwargs: Any) -> None:
"""
Display the results
Args:
**kwargs: additional keyword arguments to be passed to `plt.show`
"""
requires_package("matplotlib", "`.show()` requires matplotlib installed")
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
# visualize the results with matplotlib
if self._results and self._inputs:
for img, res in zip(self._inputs, self._results):
plt.figure(figsize=(10, 10))
plt.imshow(img)
for obj in res:
xmin, ymin, xmax, ymax = obj["box"]
label = obj["label"]
plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red")
plt.gca().add_patch(
Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2)
)
plt.show(**kwargs)
================================================
FILE: onnxtr/contrib/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
import onnxruntime as ort
from onnxtr.utils.data import download_from_url
class _BasePredictor:
"""
Base class for all predictors
Args:
batch_size: the batch size to use
url: the url to use to download a model if needed
model_path: the path to the model to use
**kwargs: additional arguments to be passed to `download_from_url`
"""
def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None:
self.batch_size = batch_size
self.session = self._init_model(url, model_path, **kwargs)
self._inputs: list[np.ndarray] = []
self._results: list[Any] = []
def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any:
"""
Download the model from the given url if needed
Args:
url: the url to use
model_path: the path to the model to use
**kwargs: additional arguments to be passed to `download_from_url`
Returns:
Any: the ONNX loaded model
"""
if not url and not model_path:
raise ValueError("You must provide either a url or a model_path")
onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type]
return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
def preprocess(self, img: np.ndarray) -> np.ndarray:
"""
Preprocess the input image
Args:
img: the input image to preprocess
Returns:
np.ndarray: the preprocessed image
"""
raise NotImplementedError
def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any:
"""
Postprocess the model output
Args:
output: the model output to postprocess
input_images: the input images used to generate the output
Returns:
Any: the postprocessed output
"""
raise NotImplementedError
def __call__(self, inputs: list[np.ndarray]) -> Any:
"""
Call the model on the given inputs
Args:
inputs: the inputs to use
Returns:
Any: the postprocessed output
"""
self._inputs = inputs
model_inputs = self.session.get_inputs()
batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)]
processed_batches = [
np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs
]
outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches]
return self.postprocess(outputs, batched_inputs)
================================================
FILE: onnxtr/file_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import importlib.metadata
import logging
__all__ = ["requires_package"]
ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover
"""
package requirement helper
Args:
name: name of the package
extra_message: additional message to display if the package is not found
"""
try:
_pkg_version = importlib.metadata.version(name)
logging.info(f"{name} version {_pkg_version} available.")
except importlib.metadata.PackageNotFoundError:
raise ImportError(
f"\n\n{extra_message if extra_message is not None else ''} "
f"\nPlease install it with the following command: pip install {name}\n"
)
================================================
FILE: onnxtr/io/__init__.py
================================================
from .elements import *
from .html import *
from .image import *
from .pdf import *
from .reader import *
================================================
FILE: onnxtr/io/elements.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
from defusedxml import defuse_stdlib
defuse_stdlib()
from xml.etree import ElementTree as ET
from xml.etree.ElementTree import Element as ETElement
from xml.etree.ElementTree import SubElement
import numpy as np
import onnxtr
from onnxtr.file_utils import requires_package
from onnxtr.utils.common_types import BoundingBox
from onnxtr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox
from onnxtr.utils.reconstitution import synthesize_page
from onnxtr.utils.repr import NestedObject
try: # optional dependency for visualization
from onnxtr.utils.visualization import visualize_page
except ModuleNotFoundError: # pragma: no cover
pass
__all__ = ["Element", "Word", "Artefact", "Line", "Block", "Page", "Document"]
class Element(NestedObject):
"""Implements an abstract document element with exporting and text rendering capabilities"""
_children_names: list[str] = []
_exported_keys: list[str] = []
def __init__(self, **kwargs: Any) -> None:
for k, v in kwargs.items():
if k in self._children_names:
setattr(self, k, v)
else:
raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
def export(self) -> dict[str, Any]:
"""Exports the object into a nested dict format"""
export_dict = {k: getattr(self, k) for k in self._exported_keys}
for children_name in self._children_names:
export_dict[children_name] = [c.export() for c in getattr(self, children_name)]
return export_dict
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
raise NotImplementedError
def render(self) -> str:
raise NotImplementedError
class Word(Element):
"""Implements a word element
Args:
value: the text string of the word
confidence: the confidence associated with the text prediction
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
the page's size
objectness_score: the objectness score of the detection
crop_orientation: the general orientation of the crop in degrees and its confidence
"""
_exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"]
_children_names: list[str] = []
def __init__(
self,
value: str,
confidence: float,
geometry: BoundingBox | np.ndarray,
objectness_score: float,
crop_orientation: dict[str, Any],
) -> None:
super().__init__()
self.value = value
self.confidence = confidence
self.geometry = geometry
self.objectness_score = objectness_score
self.crop_orientation = crop_orientation
def render(self) -> str:
"""Renders the full text of the element"""
return self.value
def extra_repr(self) -> str:
return f"value='{self.value}', confidence={self.confidence:.2}"
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
return cls(**kwargs)
class Artefact(Element):
"""Implements a non-textual element
Args:
artefact_type: the type of artefact
confidence: the confidence of the type prediction
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
the page's size.
"""
_exported_keys: list[str] = ["geometry", "type", "confidence"]
_children_names: list[str] = []
def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None:
super().__init__()
self.geometry = geometry
self.type = artefact_type
self.confidence = confidence
def render(self) -> str:
"""Renders the full text of the element"""
return f"[{self.type.upper()}]"
def extra_repr(self) -> str:
return f"type='{self.type}', confidence={self.confidence:.2}"
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
return cls(**kwargs)
class Line(Element):
"""Implements a line element as a collection of words
Args:
words: list of word elements
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
all words in it.
"""
_exported_keys: list[str] = ["geometry", "objectness_score"]
_children_names: list[str] = ["words"]
words: list[Word] = []
def __init__(
self,
words: list[Word],
geometry: BoundingBox | np.ndarray | None = None,
objectness_score: float | None = None,
) -> None:
# Compute the objectness score of the line
if objectness_score is None:
objectness_score = float(np.mean([w.objectness_score for w in words]))
# Resolve the geometry using the smallest enclosing bounding box
if geometry is None:
# Check whether this is a rotated or straight box
box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox
geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[misc]
super().__init__(words=words)
self.geometry = geometry
self.objectness_score = objectness_score
def render(self) -> str:
"""Renders the full text of the element"""
return " ".join(w.render() for w in self.words)
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
kwargs.update({
"words": [Word.from_dict(_dict) for _dict in save_dict["words"]],
})
return cls(**kwargs)
class Block(Element):
"""Implements a block element as a collection of lines and artefacts
Args:
lines: list of line elements
artefacts: list of artefacts
geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to
the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing
all lines and artefacts in it.
"""
_exported_keys: list[str] = ["geometry", "objectness_score"]
_children_names: list[str] = ["lines", "artefacts"]
lines: list[Line] = []
artefacts: list[Artefact] = []
def __init__(
self,
lines: list[Line] = [],
artefacts: list[Artefact] = [],
geometry: BoundingBox | np.ndarray | None = None,
objectness_score: float | None = None,
) -> None:
# Compute the objectness score of the line
if objectness_score is None:
objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words]))
# Resolve the geometry using the smallest enclosing bounding box
if geometry is None:
line_boxes = [word.geometry for line in lines for word in line.words]
artefact_boxes = [artefact.geometry for artefact in artefacts]
box_resolution_fn = (
resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox
)
geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore
super().__init__(lines=lines, artefacts=artefacts)
self.geometry = geometry
self.objectness_score = objectness_score
def render(self, line_break: str = "\n") -> str:
"""Renders the full text of the element"""
return line_break.join(line.render() for line in self.lines)
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
kwargs.update({
"lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]],
"artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]],
})
return cls(**kwargs)
class Page(Element):
"""Implements a page element as a collection of blocks
Args:
page: image encoded as a numpy array in uint8
blocks: list of block elements
page_idx: the index of the page in the input raw document
dimensions: the page size in pixels in format (height, width)
orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction
language: a dictionary with the language value and confidence of the prediction
"""
_exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"]
_children_names: list[str] = ["blocks"]
blocks: list[Block] = []
def __init__(
self,
page: np.ndarray,
blocks: list[Block],
page_idx: int,
dimensions: tuple[int, int],
orientation: dict[str, Any] | None = None,
language: dict[str, Any] | None = None,
) -> None:
super().__init__(blocks=blocks)
self.page = page
self.page_idx = page_idx
self.dimensions = dimensions
self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None)
self.language = language if isinstance(language, dict) else dict(value=None, confidence=None)
def render(self, block_break: str = "\n\n") -> str:
"""Renders the full text of the element"""
return block_break.join(b.render() for b in self.blocks)
def extra_repr(self) -> str:
return f"dimensions={self.dimensions}"
def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None:
"""Overlay the result on a given image
Args:
interactive: whether the display should be interactive
preserve_aspect_ratio: pass True if you passed True to the predictor
**kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method
"""
requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed")
requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed")
import matplotlib.pyplot as plt
visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio)
plt.show(**kwargs)
def synthesize(self, **kwargs) -> np.ndarray:
"""Synthesize the page from the predictions
Args:
**kwargs: keyword arguments passed to the `synthesize_page` method
Returns
synthesized page
"""
return synthesize_page(self.export(), **kwargs)
def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]:
"""Export the page as XML (hOCR-format)
convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md
Args:
file_title: the title of the XML file
Returns:
a tuple of the XML byte string, and its ElementTree
"""
p_idx = self.page_idx
block_count: int = 1
line_count: int = 1
word_count: int = 1
height, width = self.dimensions
language = self.language if "language" in self.language.keys() else "en"
# Create the XML root element
page_hocr = ETElement("html", attrib={"xmlns": "http://www.w3.org/1999/xhtml", "xml:lang": str(language)})
# Create the header / SubElements of the root element
head = SubElement(page_hocr, "head")
SubElement(head, "title").text = file_title
SubElement(head, "meta", attrib={"http-equiv": "Content-Type", "content": "text/html; charset=utf-8"})
SubElement(
head,
"meta",
attrib={"name": "ocr-system", "content": f"onnxtr {onnxtr.__version__}"}, # type: ignore[attr-defined]
)
SubElement(
head,
"meta",
attrib={"name": "ocr-capabilities", "content": "ocr_page ocr_carea ocr_par ocr_line ocrx_word"},
)
# Create the body
body = SubElement(page_hocr, "body")
page_div = SubElement(
body,
"div",
attrib={
"class": "ocr_page",
"id": f"page_{p_idx + 1}",
"title": f"image; bbox 0 0 {width} {height}; ppageno 0",
},
)
# iterate over the blocks / lines / words and create the XML elements in body line by line with the attributes
for block in self.blocks:
if len(block.geometry) != 2:
raise TypeError("XML export is only available for straight bounding boxes for now.")
(xmin, ymin), (xmax, ymax) = block.geometry
block_div = SubElement(
page_div,
"div",
attrib={
"class": "ocr_carea",
"id": f"block_{block_count}",
"title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
{int(round(xmax * width))} {int(round(ymax * height))}",
},
)
paragraph = SubElement(
block_div,
"p",
attrib={
"class": "ocr_par",
"id": f"par_{block_count}",
"title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
{int(round(xmax * width))} {int(round(ymax * height))}",
},
)
block_count += 1
for line in block.lines:
(xmin, ymin), (xmax, ymax) = line.geometry
# NOTE: baseline, x_size, x_descenders, x_ascenders is currently initalized to 0
line_span = SubElement(
paragraph,
"span",
attrib={
"class": "ocr_line",
"id": f"line_{line_count}",
"title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
{int(round(xmax * width))} {int(round(ymax * height))}; \
baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0",
},
)
line_count += 1
for word in line.words:
(xmin, ymin), (xmax, ymax) = word.geometry
conf = word.confidence
word_div = SubElement(
line_span,
"span",
attrib={
"class": "ocrx_word",
"id": f"word_{word_count}",
"title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \
{int(round(xmax * width))} {int(round(ymax * height))}; \
x_wconf {int(round(conf * 100))}",
},
)
# set the text
word_div.text = word.value
word_count += 1
return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr))
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]})
return cls(**kwargs)
class Document(Element):
"""Implements a document element as a collection of pages
Args:
pages: list of page elements
"""
_children_names: list[str] = ["pages"]
pages: list[Page] = []
def __init__(
self,
pages: list[Page],
) -> None:
super().__init__(pages=pages)
def render(self, page_break: str = "\n\n\n\n") -> str:
"""Renders the full text of the element"""
return page_break.join(p.render() for p in self.pages)
def show(self, **kwargs) -> None:
"""Overlay the result on a given image"""
for result in self.pages:
result.show(**kwargs)
def synthesize(self, **kwargs) -> list[np.ndarray]:
"""Synthesize all pages from their predictions
Args:
**kwargs: keyword arguments passed to the `Page.synthesize` method
Returns:
list of synthesized pages
"""
return [page.synthesize(**kwargs) for page in self.pages]
def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]:
"""Export the document as XML (hOCR-format)
Args:
**kwargs: additional keyword arguments passed to the Page.export_as_xml method
Returns:
list of tuple of (bytes, ElementTree)
"""
return [page.export_as_xml(**kwargs) for page in self.pages]
@classmethod
def from_dict(cls, save_dict: dict[str, Any], **kwargs):
kwargs = {k: save_dict[k] for k in cls._exported_keys}
kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]})
return cls(**kwargs)
================================================
FILE: onnxtr/io/html.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
__all__ = ["read_html"]
def read_html(url: str, **kwargs: Any) -> bytes:
"""Read a PDF file and convert it into an image in numpy format
>>> from onnxtr.io import read_html
>>> doc = read_html("https://www.yoursite.com")
Args:
url: URL of the target web page
**kwargs: keyword arguments from `weasyprint.HTML`
Returns:
decoded PDF file as a bytes stream
"""
from weasyprint import HTML
return HTML(url, **kwargs).write_pdf()
================================================
FILE: onnxtr/io/image.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from pathlib import Path
import cv2
import numpy as np
from onnxtr.utils.common_types import AbstractFile
__all__ = ["read_img_as_numpy"]
def read_img_as_numpy(
file: AbstractFile,
output_size: tuple[int, int] | None = None,
rgb_output: bool = True,
) -> np.ndarray:
"""Read an image file into numpy format
>>> from onnxtr.io import read_img_as_numpy
>>> page = read_img_as_numpy("path/to/your/doc.jpg")
Args:
file: the path to the image file
output_size: the expected output size of each page in format H x W
rgb_output: whether the output ndarray channel order should be RGB instead of BGR.
Returns:
the page decoded as numpy ndarray of shape H x W x 3
"""
if isinstance(file, (str, Path)):
if not Path(file).is_file():
raise FileNotFoundError(f"unable to access {file}")
img = cv2.imread(str(file), cv2.IMREAD_COLOR)
elif isinstance(file, bytes):
_file: np.ndarray = np.frombuffer(file, np.uint8)
img = cv2.imdecode(_file, cv2.IMREAD_COLOR)
else:
raise TypeError("unsupported object type for argument 'file'")
# Validity check
if img is None:
raise ValueError("unable to read file.")
# Resizing
if isinstance(output_size, tuple):
img = cv2.resize(img, output_size[::-1], interpolation=cv2.INTER_LINEAR)
# Switch the channel order
if rgb_output:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img
================================================
FILE: onnxtr/io/pdf.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
import pypdfium2 as pdfium
from onnxtr.utils.common_types import AbstractFile
__all__ = ["read_pdf"]
def read_pdf(
file: AbstractFile,
scale: int = 2,
rgb_mode: bool = True,
password: str | None = None,
**kwargs: Any,
) -> list[np.ndarray]:
"""Read a PDF file and convert it into an image in numpy format
>>> from onnxtr.io import read_pdf
>>> doc = read_pdf("path/to/your/doc.pdf")
Args:
file: the path to the PDF file
scale: rendering scale (1 corresponds to 72dpi)
rgb_mode: if True, the output will be RGB, otherwise BGR
password: a password to unlock the document, if encrypted
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
the list of pages decoded as numpy ndarray of shape H x W x C
"""
# Rasterise pages to numpy ndarrays with pypdfium2
pdf = pdfium.PdfDocument(file, password=password)
try:
return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf]
finally:
pdf.close()
================================================
FILE: onnxtr/io/reader.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from collections.abc import Sequence
from pathlib import Path
import numpy as np
from onnxtr.file_utils import requires_package
from onnxtr.utils.common_types import AbstractFile
from .html import read_html
from .image import read_img_as_numpy
from .pdf import read_pdf
__all__ = ["DocumentFile"]
class DocumentFile:
"""Read a document from multiple extensions"""
@classmethod
def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]:
"""Read a PDF file
>>> from onnxtr.io import DocumentFile
>>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf")
Args:
file: the path to the PDF file or a binary stream
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
return read_pdf(file, **kwargs)
@classmethod
def from_url(cls, url: str, **kwargs) -> list[np.ndarray]:
"""Interpret a web page as a PDF document
>>> from onnxtr.io import DocumentFile
>>> doc = DocumentFile.from_url("https://www.yoursite.com")
Args:
url: the URL of the target web page
**kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render`
Returns:
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
requires_package(
"weasyprint",
"`.from_url` requires weasyprint installed.\n"
+ "Installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#installation",
)
pdf_stream = read_html(url)
return cls.from_pdf(pdf_stream, **kwargs)
@classmethod
def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]:
"""Read an image file (or a collection of image files) and convert it into an image in numpy format
>>> from onnxtr.io import DocumentFile
>>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"])
Args:
files: the path to the image file or a binary stream, or a collection of those
**kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy`
Returns:
the list of pages decoded as numpy ndarray of shape H x W x 3
"""
if isinstance(files, (str, Path, bytes)):
files = [files]
return [read_img_as_numpy(file, **kwargs) for file in files]
================================================
FILE: onnxtr/models/__init__.py
================================================
from .engine import EngineConfig
from .classification import *
from .detection import *
from .recognition import *
from .zoo import *
from .factory import *
================================================
FILE: onnxtr/models/_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from math import floor
from statistics import median_low
import cv2
import numpy as np
from langdetect import LangDetectException, detect_langs
from onnxtr.utils.geometry import rotate_image
__all__ = ["estimate_orientation", "get_language"]
def get_max_width_length_ratio(contour: np.ndarray) -> float:
"""Get the maximum shape ratio of a contour.
Args:
contour: the contour from cv2.findContour
Returns:
the maximum shape ratio
"""
_, (w, h), _ = cv2.minAreaRect(contour)
if w == 0 or h == 0:
return 0.0
return max(w / h, h / w)
def estimate_orientation(
img: np.ndarray,
general_page_orientation: tuple[int, float] | None = None,
n_ct: int = 70,
ratio_threshold_for_lines: float = 3,
min_confidence: float = 0.2,
lower_area: int = 100,
) -> int:
"""Estimate the angle of the general document orientation based on the
lines of the document and the assumption that they should be horizontal.
Args:
img: the img or bitmap to analyze (H, W, C)
general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence)
estimated by a model
n_ct: the number of contours used for the orientation estimation
ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines
min_confidence: the minimum confidence to consider the general_page_orientation
lower_area: the minimum area of a contour to be considered
Returns:
the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation)
"""
assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported"
# Convert image to grayscale if necessary
if img.shape[-1] == 3:
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = cv2.medianBlur(gray_img, 5)
thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
else:
thresh = img.astype(np.uint8)
page_orientation, orientation_confidence = general_page_orientation or (0, 0.0)
is_confident = page_orientation is not None and orientation_confidence >= min_confidence
base_angle = page_orientation if is_confident else 0
if is_confident:
# We rotate the image to the general orientation which improves the detection
# No expand needed bitmap is already padded
thresh = rotate_image(thresh, -base_angle)
else: # That's only required if we do not work on the detection models bin map
# try to merge words in lines
(h, w) = img.shape[:2]
k_x = max(1, (floor(w / 100)))
k_y = max(1, (floor(h / 100)))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y))
thresh = cv2.dilate(thresh, kernel, iterations=1)
# extract contours
contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
# Filter & Sort contours
contours = sorted(
[contour for contour in contours if cv2.contourArea(contour) > lower_area],
key=get_max_width_length_ratio,
reverse=True,
)
angles = []
for contour in contours[:n_ct]:
_, (w, h), angle = cv2.minAreaRect(contour)
# OpenCV version-proof normalization: force 'w' to be the long side
# so the angle is consistently relative to the major axis.
# https://github.com/opencv/opencv/pull/28051/changes
if w < h:
w, h = h, w
angle -= 90
# Normalize angle to be within [-90, 90]
while angle <= -90:
angle += 180
while angle > 90:
angle -= 180
if h > 0:
if w / h > ratio_threshold_for_lines: # select only contours with ratio like lines
angles.append(angle)
elif w / h < 1 / ratio_threshold_for_lines: # if lines are vertical, substract 90 degree
angles.append(angle - 90)
if len(angles) == 0:
skew_angle = 0 # in case no angles is found
else:
# median_low picks a value from the data to avoid outliers
median = -median_low(angles)
skew_angle = -round(median) if abs(median) != 0 else 0
# Resolve the 90-degree flip ambiguity.
# If the estimation is exactly 90/-90, it's usually a vertical detection of horizontal lines.
if abs(skew_angle) == 90:
skew_angle = 0
# combine with the general orientation and the estimated angle
# Apply the detected skew to our base orientation
final_angle = base_angle + skew_angle
# Standardize result to [-179, 180] range to handle wrap-around cases (e.g., 180 + -31)
while final_angle > 180:
final_angle -= 360
while final_angle <= -180:
final_angle += 360
if is_confident:
# If the estimated angle is perpendicular, treat it as 0 to avoid wrong flips
if abs(skew_angle) % 90 == 0:
return page_orientation
# special case where the estimated angle is mostly wrong:
# case 1: - and + swapped
# case 2: estimated angle is completely wrong
# so in this case we prefer the general page orientation
if abs(skew_angle) == abs(page_orientation) and page_orientation != 0:
return page_orientation
return int(
final_angle
) # return the clockwise angle (negative - left side rotation, positive - right side rotation)
def rectify_crops(
crops: list[np.ndarray],
orientations: list[int],
) -> list[np.ndarray]:
"""Rotate each crop of the list according to the predicted orientation:
0: already straight, no rotation
1: 90 ccw, rotate 3 times ccw
2: 180, rotate 2 times ccw
3: 270 ccw, rotate 1 time ccw
"""
# Inverse predictions (if angle of +90 is detected, rotate by -90)
orientations = [4 - pred if pred != 0 else 0 for pred in orientations]
return (
[crop if orientation == 0 else np.rot90(crop, orientation) for orientation, crop in zip(orientations, crops)]
if len(orientations) > 0
else []
)
def rectify_loc_preds(
page_loc_preds: np.ndarray,
orientations: list[int],
) -> np.ndarray | None:
"""Orient the quadrangle (Polygon4P) according to the predicted orientation,
so that the points are in this order: top L, top R, bot R, bot L if the crop is readable
"""
return (
np.stack(
[
np.roll(page_loc_pred, orientation, axis=0)
for orientation, page_loc_pred in zip(orientations, page_loc_preds)
],
axis=0,
)
if len(orientations) > 0
else None
)
def get_language(text: str) -> tuple[str, float]:
"""Get languages of a text using langdetect model.
Get the language with the highest probability or no language if only a few words or a low probability
Args:
text (str): text
Returns:
The detected language in ISO 639 code and confidence score
"""
try:
lang = detect_langs(text.lower())[0]
except LangDetectException:
return "unknown", 0.0
if len(text) <= 1 or (len(text) <= 5 and lang.prob <= 0.2):
return "unknown", 0.0
return lang.lang, lang.prob
================================================
FILE: onnxtr/models/builder.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from scipy.cluster.hierarchy import fclusterdata
from onnxtr.io.elements import Block, Document, Line, Page, Word
from onnxtr.utils.geometry import estimate_page_angle, resolve_enclosing_bbox, resolve_enclosing_rbbox, rotate_boxes
from onnxtr.utils.repr import NestedObject
__all__ = ["DocumentBuilder"]
class DocumentBuilder(NestedObject):
"""Implements a document builder
Args:
resolve_lines: whether words should be automatically grouped into lines
resolve_blocks: whether lines should be automatically grouped into blocks
paragraph_break: relative length of the minimum space separating paragraphs
export_as_straight_boxes: if True, force straight boxes in the export (fit a rectangle
box to all rotated boxes). Else, keep the boxes format unchanged, no matter what it is.
"""
def __init__(
self,
resolve_lines: bool = True,
resolve_blocks: bool = False,
paragraph_break: float = 0.035,
export_as_straight_boxes: bool = False,
) -> None:
self.resolve_lines = resolve_lines
self.resolve_blocks = resolve_blocks
self.paragraph_break = paragraph_break
self.export_as_straight_boxes = export_as_straight_boxes
@staticmethod
def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
"""Sort bounding boxes from top to bottom, left to right
Args:
boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox)
Returns:
tuple: indices of ordered boxes of shape (N,), boxes
If straight boxes are passed tpo the function, boxes are unchanged
else: boxes returned are straight boxes fitted to the straightened rotated boxes
so that we fit the lines afterwards to the straigthened page
"""
if boxes.ndim == 3:
boxes = rotate_boxes(
loc_preds=boxes,
angle=-estimate_page_angle(boxes),
orig_shape=(1024, 1024),
min_angle=5.0,
)
boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1)
return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes
def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]:
"""Split a line in sub_lines
Args:
boxes: bounding boxes of shape (N, 4)
word_idcs: list of indexes for the words of the line
Returns:
A list of (sub-)lines computed from the original line (words)
"""
lines = []
# Sort words horizontally
word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()]
# Eventually split line horizontally
if len(word_idcs) < 2:
lines.append(word_idcs)
else:
sub_line = [word_idcs[0]]
for i in word_idcs[1:]:
horiz_break = True
prev_box = boxes[sub_line[-1]]
# Compute distance between boxes
dist = boxes[i, 0] - prev_box[2]
# If distance between boxes is lower than paragraph break, same sub-line
if dist < self.paragraph_break:
horiz_break = False
if horiz_break:
lines.append(sub_line)
sub_line = []
sub_line.append(i)
lines.append(sub_line)
return lines
def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]:
"""Order boxes to group them in lines
Args:
boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox
Returns:
nested list of box indices
"""
# Sort boxes, and straighten the boxes if they are rotated
idxs, boxes = self._sort_boxes(boxes)
# Compute median for boxes heights
y_med = np.median(boxes[:, 3] - boxes[:, 1])
lines = []
words = [idxs[0]] # Assign the top-left word to the first line
# Define a mean y-center for the line
y_center_sum = boxes[idxs[0]][[1, 3]].mean()
for idx in idxs[1:]:
vert_break = True
# Compute y_dist
y_dist = abs(boxes[idx][[1, 3]].mean() - y_center_sum / len(words))
# If y-center of the box is close enough to mean y-center of the line, same line
if y_dist < y_med / 2:
vert_break = False
if vert_break:
# Compute sub-lines (horizontal split)
lines.extend(self._resolve_sub_lines(boxes, words))
words = []
y_center_sum = 0
words.append(idx)
y_center_sum += boxes[idx][[1, 3]].mean()
# Use the remaining words to form the last(s) line(s)
if len(words) > 0:
# Compute sub-lines (horizontal split)
lines.extend(self._resolve_sub_lines(boxes, words))
return lines
@staticmethod
def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]:
"""Order lines to group them in blocks
Args:
boxes: bounding boxes of shape (N, 4) or (N, 4, 2)
lines: list of lines, each line is a list of idx
Returns:
nested list of box indices
"""
# Resolve enclosing boxes of lines
if boxes.ndim == 3:
box_lines: np.ndarray = np.asarray([
resolve_enclosing_rbbox([tuple(boxes[idx, :, :]) for idx in line]) # type: ignore[misc]
for line in lines
])
else:
_box_lines = [
resolve_enclosing_bbox([(tuple(boxes[idx, :2]), tuple(boxes[idx, 2:])) for idx in line])
for line in lines
]
box_lines = np.asarray([(x1, y1, x2, y2) for ((x1, y1), (x2, y2)) in _box_lines])
# Compute geometrical features of lines to clusterize
# Clusterizing only with box centers yield to poor results for complex documents
if boxes.ndim == 3:
box_features: np.ndarray = np.stack(
(
(box_lines[:, 0, 0] + box_lines[:, 0, 1]) / 2,
(box_lines[:, 0, 0] + box_lines[:, 2, 0]) / 2,
(box_lines[:, 0, 0] + box_lines[:, 2, 1]) / 2,
(box_lines[:, 0, 1] + box_lines[:, 2, 1]) / 2,
(box_lines[:, 0, 1] + box_lines[:, 2, 0]) / 2,
(box_lines[:, 2, 0] + box_lines[:, 2, 1]) / 2,
),
axis=-1,
)
else:
box_features = np.stack(
(
(box_lines[:, 0] + box_lines[:, 3]) / 2,
(box_lines[:, 1] + box_lines[:, 2]) / 2,
(box_lines[:, 0] + box_lines[:, 2]) / 2,
(box_lines[:, 1] + box_lines[:, 3]) / 2,
box_lines[:, 0],
box_lines[:, 1],
),
axis=-1,
)
# Compute clusters
clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean")
_blocks: dict[int, list[int]] = {}
# Form clusters
for line_idx, cluster_idx in enumerate(clusters):
if cluster_idx in _blocks.keys():
_blocks[cluster_idx].append(line_idx)
else:
_blocks[cluster_idx] = [line_idx]
# Retrieve word-box level to return a fully nested structure
blocks = [[lines[idx] for idx in block] for block in _blocks.values()]
return blocks
def _build_blocks(
self,
boxes: np.ndarray,
objectness_scores: np.ndarray,
word_preds: list[tuple[str, float]],
crop_orientations: list[dict[str, Any]],
) -> list[Block]:
"""Gather independent words in structured blocks
Args:
boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2)
objectness_scores: objectness scores of all detected words of the page, of shape N
word_preds: list of all detected words of the page, of shape N
crop_orientations: list of dictoinaries containing
the general orientation (orientations + confidences) of the crops
Returns:
list of block elements
"""
if boxes.shape[0] != len(word_preds):
raise ValueError(f"Incompatible argument lengths: {boxes.shape[0]}, {len(word_preds)}")
if boxes.shape[0] == 0:
return []
# Decide whether we try to form lines
_boxes = boxes
if self.resolve_lines:
lines = self._resolve_lines(_boxes if _boxes.ndim == 3 else _boxes[:, :4])
# Decide whether we try to form blocks
if self.resolve_blocks and len(lines) > 1:
_blocks = self._resolve_blocks(_boxes if _boxes.ndim == 3 else _boxes[:, :4], lines)
else:
_blocks = [lines]
else:
# Sort bounding boxes, one line for all boxes, one block for the line
lines = [self._sort_boxes(_boxes if _boxes.ndim == 3 else _boxes[:, :4])[0]] # type: ignore[list-item]
_blocks = [lines]
blocks = [
Block([
Line([
Word(
*word_preds[idx],
tuple(tuple(pt) for pt in boxes[idx].tolist()), # type: ignore[arg-type]
float(objectness_scores[idx]),
crop_orientations[idx],
)
if boxes.ndim == 3
else Word(
*word_preds[idx],
((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])),
float(objectness_scores[idx]),
crop_orientations[idx],
)
for idx in line
])
for line in lines
])
for lines in _blocks
]
return blocks
def extra_repr(self) -> str:
return (
f"resolve_lines={self.resolve_lines}, resolve_blocks={self.resolve_blocks}, "
f"paragraph_break={self.paragraph_break}, "
f"export_as_straight_boxes={self.export_as_straight_boxes}"
)
def __call__(
self,
pages: list[np.ndarray],
boxes: list[np.ndarray],
objectness_scores: list[np.ndarray],
text_preds: list[list[tuple[str, float]]],
page_shapes: list[tuple[int, int]],
crop_orientations: list[dict[str, Any]],
orientations: list[dict[str, Any]] | None = None,
languages: list[dict[str, Any]] | None = None,
) -> Document:
"""Re-arrange detected words into structured blocks
Args:
pages: list of N elements, where each element represents the page image
boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4)
or (*, 4, 2) for all words for a given page
objectness_scores: list of N elements, where each element represents the objectness scores
text_preds: list of N elements, where each element is the list of all word prediction (text + confidence)
page_shapes: shape of each page, of size N
crop_orientations: list of N elements, where each element is
a dictionary containing the general orientation (orientations + confidences) of the crops
orientations: optional, list of N elements,
where each element is a dictionary containing the orientation (orientation + confidence)
languages: optional, list of N elements,
where each element is a dictionary containing the language (language + confidence)
Returns:
document object
"""
if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len(
page_shapes
) != len(crop_orientations) != len(objectness_scores):
raise ValueError("All arguments are expected to be lists of the same size")
_orientations = orientations if isinstance(orientations, list) else [None] * len(boxes)
_languages = languages if isinstance(languages, list) else [None] * len(boxes)
if self.export_as_straight_boxes and len(boxes) > 0:
# If boxes are already straight OK, else fit a bounding rect
if boxes[0].ndim == 3:
# Iterate over pages and boxes
boxes = [np.concatenate((p_boxes.min(1), p_boxes.max(1)), 1) for p_boxes in boxes]
_pages = [
Page(
page,
self._build_blocks(
page_boxes,
loc_scores,
word_preds,
word_crop_orientations,
),
_idx,
shape,
orientation,
language,
)
for page, _idx, shape, page_boxes, loc_scores, word_preds, word_crop_orientations, orientation, language in zip( # noqa: E501
pages,
range(len(boxes)),
page_shapes,
boxes,
objectness_scores,
text_preds,
crop_orientations,
_orientations,
_languages,
)
]
return Document(_pages)
================================================
FILE: onnxtr/models/classification/__init__.py
================================================
from .models import *
from .zoo import *
================================================
FILE: onnxtr/models/classification/models/__init__.py
================================================
from .mobilenet import *
================================================
FILE: onnxtr/models/classification/models/mobilenet.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
# Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
from copy import deepcopy
from typing import Any
import numpy as np
from ...engine import Engine, EngineConfig
__all__ = [
"MobileNetV3",
"mobilenet_v3_small_crop_orientation",
"mobilenet_v3_small_page_orientation",
]
default_cfgs: dict[str, dict[str, Any]] = {
"mobilenet_v3_small_crop_orientation": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 256, 256),
"classes": [0, -90, 180, 90],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation-4fde60a1.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation_static_8_bit-c32c7721.onnx",
},
"mobilenet_v3_small_page_orientation": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 512, 512),
"classes": [0, -90, 180, 90],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation-60606ce4.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation_static_8_bit-13b5b014.onnx",
},
}
class MobileNetV3(Engine):
"""MobileNetV3 Onnx loader
Args:
model_path: path or url to onnx model file
engine_cfg: configuration for the inference engine
cfg: configuration dictionary
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.cfg = cfg
def __call__(
self,
x: np.ndarray,
) -> np.ndarray:
return self.run(x)
def _mobilenet_v3(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> MobileNetV3:
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
_cfg = deepcopy(default_cfgs[arch])
return MobileNetV3(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def mobilenet_v3_small_crop_orientation(
model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> MobileNetV3:
"""MobileNetV3-Small architecture as described in
`"Searching for MobileNetV3",
`_.
>>> import numpy as np
>>> from onnxtr.models import mobilenet_v3_small_crop_orientation
>>> model = mobilenet_v3_small_crop_orientation()
>>> input_tensor = np.random.rand((1, 3, 256, 256))
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the MobileNetV3 architecture
Returns:
MobileNetV3
"""
return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs)
def mobilenet_v3_small_page_orientation(
model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> MobileNetV3:
"""MobileNetV3-Small architecture as described in
`"Searching for MobileNetV3",
`_.
>>> import numpy as np
>>> from onnxtr.models import mobilenet_v3_small_page_orientation
>>> model = mobilenet_v3_small_page_orientation()
>>> input_tensor = np.random.rand((1, 3, 512, 512))
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the MobileNetV3 architecture
Returns:
MobileNetV3
"""
return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/classification/predictor/__init__.py
================================================
from .base import *
================================================
FILE: onnxtr/models/classification/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject
__all__ = ["OrientationPredictor"]
class OrientationPredictor(NestedObject):
"""Implements an object able to detect the reading direction of a text box or a page.
4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise.
Args:
pre_processor: transform inputs for easier batched model inference
model: core classification architecture (backbone + classification head)
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
"""
_children_names: list[str] = ["pre_processor", "model"]
def __init__(
self,
pre_processor: PreProcessor | None,
model: Any | None,
) -> None:
self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None
self.model = model
def __call__(
self,
inputs: list[np.ndarray],
) -> list[list[int] | list[float]]:
# Dimension check
if any(input.ndim != 3 for input in inputs):
raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.")
if self.model is None or self.pre_processor is None:
# predictor is disabled
return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)]
processed_batches = self.pre_processor(inputs)
predicted_batches = [self.model(batch) for batch in processed_batches]
# confidence
probs = [np.max(softmax(batch, axis=1), axis=1) for batch in predicted_batches]
# Postprocess predictions
predicted_batches = [np.argmax(out_batch, axis=1) for out_batch in predicted_batches]
class_idxs = [int(pred) for batch in predicted_batches for pred in batch]
classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs]
confs = [round(float(p), 2) for prob in probs for p in prob]
return [class_idxs, classes, confs]
================================================
FILE: onnxtr/models/classification/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
from onnxtr.models.engine import EngineConfig
from .. import classification
from ..preprocessor import PreProcessor
from .predictor import OrientationPredictor
__all__ = ["crop_orientation_predictor", "page_orientation_predictor"]
ORIENTATION_ARCHS: list[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]
def _orientation_predictor(
arch: Any,
model_type: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
disabled: bool = False,
**kwargs: Any,
) -> OrientationPredictor:
if disabled:
# Case where the orientation predictor is disabled
return OrientationPredictor(None, None)
if isinstance(arch, str):
if arch not in ORIENTATION_ARCHS:
raise ValueError(f"unknown architecture '{arch}'")
# Load directly classifier from backbone
_model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
else:
if not isinstance(arch, classification.MobileNetV3):
raise ValueError(f"unknown architecture: {type(arch)}")
_model = arch
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2)
input_shape = _model.cfg["input_shape"][1:]
predictor = OrientationPredictor(
PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs),
_model,
)
return predictor
def crop_orientation_predictor(
arch: Any = "mobilenet_v3_small_crop_orientation",
batch_size: int = 512,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> OrientationPredictor:
"""Crop orientation classification architecture.
>>> import numpy as np
>>> from onnxtr.models import crop_orientation_predictor
>>> model = crop_orientation_predictor(arch='mobilenet_v3_small_crop_orientation')
>>> input_crop = (255 * np.random.rand(256, 256, 3)).astype(np.uint8)
>>> out = model([input_crop])
Args:
arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation')
batch_size: number of samples the model processes in parallel
load_in_8_bit: load the 8-bit quantized version of the model
engine_cfg: configuration of inference engine
**kwargs: keyword arguments to be passed to the OrientationPredictor
Returns:
OrientationPredictor
"""
model_type = "crop"
return _orientation_predictor(
arch=arch,
batch_size=batch_size,
model_type=model_type,
load_in_8_bit=load_in_8_bit,
engine_cfg=engine_cfg,
**kwargs,
)
def page_orientation_predictor(
arch: Any = "mobilenet_v3_small_page_orientation",
batch_size: int = 2,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> OrientationPredictor:
"""Page orientation classification architecture.
>>> import numpy as np
>>> from onnxtr.models import page_orientation_predictor
>>> model = page_orientation_predictor(arch='mobilenet_v3_small_page_orientation')
>>> input_page = (255 * np.random.rand(512, 512, 3)).astype(np.uint8)
>>> out = model([input_page])
Args:
arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation')
batch_size: number of samples the model processes in parallel
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments to be passed to the OrientationPredictor
Returns:
OrientationPredictor
"""
model_type = "page"
return _orientation_predictor(
arch=arch,
batch_size=batch_size,
model_type=model_type,
load_in_8_bit=load_in_8_bit,
engine_cfg=engine_cfg,
**kwargs,
)
================================================
FILE: onnxtr/models/detection/__init__.py
================================================
from .models import *
from .zoo import *
================================================
FILE: onnxtr/models/detection/_utils/__init__.py
================================================
from . base import *
================================================
FILE: onnxtr/models/detection/_utils/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import numpy as np
__all__ = ["_remove_padding"]
def _remove_padding(
pages: list[np.ndarray],
loc_preds: list[np.ndarray],
preserve_aspect_ratio: bool,
symmetric_pad: bool,
assume_straight_pages: bool,
) -> list[np.ndarray]:
"""Remove padding from the localization predictions
Args:
pages: list of pages
loc_preds: list of localization predictions
preserve_aspect_ratio: whether the aspect ratio was preserved during padding
symmetric_pad: whether the padding was symmetric
assume_straight_pages: whether the pages are assumed to be straight
Returns:
list of unpaded localization predictions
"""
if preserve_aspect_ratio:
# Rectify loc_preds to remove padding
rectified_preds = []
for page, loc_pred in zip(pages, loc_preds):
h, w = page.shape[0], page.shape[1]
if h > w:
# y unchanged, dilate x coord
if symmetric_pad:
if assume_straight_pages:
loc_pred[:, [0, 2]] = (loc_pred[:, [0, 2]] - 0.5) * h / w + 0.5
else:
loc_pred[:, :, 0] = (loc_pred[:, :, 0] - 0.5) * h / w + 0.5
else:
if assume_straight_pages:
loc_pred[:, [0, 2]] *= h / w
else:
loc_pred[:, :, 0] *= h / w
elif w > h:
# x unchanged, dilate y coord
if symmetric_pad:
if assume_straight_pages:
loc_pred[:, [1, 3]] = (loc_pred[:, [1, 3]] - 0.5) * w / h + 0.5
else:
loc_pred[:, :, 1] = (loc_pred[:, :, 1] - 0.5) * w / h + 0.5
else:
if assume_straight_pages:
loc_pred[:, [1, 3]] *= w / h
else:
loc_pred[:, :, 1] *= w / h
rectified_preds.append(np.clip(loc_pred, 0, 1))
return rectified_preds
return loc_preds
================================================
FILE: onnxtr/models/detection/core.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import cv2
import numpy as np
from onnxtr.utils.repr import NestedObject
__all__ = ["DetectionPostProcessor"]
class DetectionPostProcessor(NestedObject):
"""Abstract class to postprocess the raw output of the model
Args:
box_thresh (float): minimal objectness score to consider a box
bin_thresh (float): threshold to apply to segmentation raw heatmap
assume straight_pages (bool): if True, fit straight boxes only
"""
def __init__(self, box_thresh: float = 0.5, bin_thresh: float = 0.5, assume_straight_pages: bool = True) -> None:
self.box_thresh = box_thresh
self.bin_thresh = bin_thresh
self.assume_straight_pages = assume_straight_pages
self._opening_kernel: np.ndarray = np.ones((3, 3), dtype=np.uint8)
def extra_repr(self) -> str:
return f"bin_thresh={self.bin_thresh}, box_thresh={self.box_thresh}"
@staticmethod
def box_score(pred: np.ndarray, points: np.ndarray, assume_straight_pages: bool = True) -> float:
"""Compute the confidence score for a polygon : mean of the p values on the polygon
Args:
pred (np.ndarray): p map returned by the model
points: coordinates of the polygon
assume_straight_pages: if True, fit straight boxes only
Returns:
polygon objectness
"""
h, w = pred.shape[:2]
if assume_straight_pages:
xmin = np.clip(np.floor(points[:, 0].min()).astype(np.int32), 0, w - 1)
xmax = np.clip(np.ceil(points[:, 0].max()).astype(np.int32), 0, w - 1)
ymin = np.clip(np.floor(points[:, 1].min()).astype(np.int32), 0, h - 1)
ymax = np.clip(np.ceil(points[:, 1].max()).astype(np.int32), 0, h - 1)
return pred[ymin : ymax + 1, xmin : xmax + 1].mean()
else:
mask: np.ndarray = np.zeros((h, w), np.int32)
cv2.fillPoly(mask, [points.astype(np.int32)], 1.0)
product = pred * mask
return np.sum(product) / np.count_nonzero(product)
def bitmap_to_boxes(
self,
pred: np.ndarray,
bitmap: np.ndarray,
) -> np.ndarray:
raise NotImplementedError
def __call__(
self,
proba_map,
) -> list[list[np.ndarray]]:
"""Performs postprocessing for a list of model outputs
Args:
proba_map: probability map of shape (N, H, W, C)
Returns:
list of N class predictions (for each input sample), where each class predictions is a list of C tensors
of shape (*, 5) or (*, 6)
"""
if proba_map.ndim != 4:
raise AssertionError(f"arg `proba_map` is expected to be 4-dimensional, got {proba_map.ndim}.")
# Erosion + dilation on the binary map
bin_map = [
[
cv2.morphologyEx(bmap[..., idx], cv2.MORPH_OPEN, self._opening_kernel)
for idx in range(proba_map.shape[-1])
]
for bmap in (proba_map >= self.bin_thresh).astype(np.uint8)
]
return [
[self.bitmap_to_boxes(pmaps[..., idx], bmaps[idx]) for idx in range(proba_map.shape[-1])]
for pmaps, bmaps in zip(proba_map, bin_map)
]
================================================
FILE: onnxtr/models/detection/models/__init__.py
================================================
from .fast import *
from .differentiable_binarization import *
from .linknet import *
================================================
FILE: onnxtr/models/detection/models/differentiable_binarization.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from scipy.special import expit
from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor
__all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"]
default_cfgs: dict[str, dict[str, Any]] = {
"db_resnet50": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx",
},
"db_resnet34": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx",
},
"db_mobilenet_v3_large": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large-4987e7bd.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large_static_8_bit-535a6f25.onnx",
},
}
class DBNet(Engine):
"""DBNet Onnx loader
Args:
model_path: path or url to onnx model file
engine_cfg: configuration for the inference engine
bin_thresh: threshold for binarization of the output feature map
box_thresh: minimal objectness score to consider a box
assume_straight_pages: if True, fit straight bounding boxes only
cfg: the configuration dict of the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
engine_cfg: EngineConfig | None = None,
bin_thresh: float = 0.3,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.cfg = cfg
self.assume_straight_pages = assume_straight_pages
self.postprocessor = GeneralDetectionPostProcessor(
assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
**kwargs: Any,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
prob_map = expit(logits)
if return_model_output:
out["out_map"] = prob_map
out["preds"] = self.postprocessor(prob_map)
return out
def _dbnet(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DBNet:
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return DBNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
def db_resnet34(
model_path: str = default_cfgs["db_resnet34"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DBNet:
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
`_, using a ResNet-34 backbone.
>>> import numpy as np
>>> from onnxtr.models import db_resnet34
>>> model = db_resnet34()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _dbnet("db_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)
def db_resnet50(
model_path: str = default_cfgs["db_resnet50"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DBNet:
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
`_, using a ResNet-50 backbone.
>>> import numpy as np
>>> from onnxtr.models import db_resnet50
>>> model = db_resnet50()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _dbnet("db_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)
def db_mobilenet_v3_large(
model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DBNet:
"""DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
`_, using a MobileNet V3 Large backbone.
>>> import numpy as np
>>> from onnxtr.models import db_mobilenet_v3_large
>>> model = db_mobilenet_v3_large()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/detection/models/fast.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import logging
from typing import Any
import numpy as np
from scipy.special import expit
from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor
__all__ = ["FAST", "fast_tiny", "fast_small", "fast_base"]
default_cfgs: dict[str, dict[str, Any]] = {
"fast_tiny": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_tiny-28867779.onnx",
},
"fast_small": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_small-10428b70.onnx",
},
"fast_base": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx",
},
}
class FAST(Engine):
"""FAST Onnx loader
Args:
model_path: path or url to onnx model file
engine_cfg: configuration for the inference engine
bin_thresh: threshold for binarization of the output feature map
box_thresh: minimal objectness score to consider a box
assume_straight_pages: if True, fit straight bounding boxes only
cfg: the configuration dict of the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
engine_cfg: EngineConfig | None = None,
bin_thresh: float = 0.1,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.cfg = cfg
self.assume_straight_pages = assume_straight_pages
self.postprocessor = GeneralDetectionPostProcessor(
assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
**kwargs: Any,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
prob_map = expit(logits)
if return_model_output:
out["out_map"] = prob_map
out["preds"] = self.postprocessor(prob_map)
return out
def _fast(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> FAST:
if load_in_8_bit:
logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...")
# Build the model
return FAST(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
def fast_tiny(
model_path: str = default_cfgs["fast_tiny"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> FAST:
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
`_, using a tiny TextNet backbone.
>>> import numpy as np
>>> from onnxtr.models import fast_tiny
>>> model = fast_tiny()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _fast("fast_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs)
def fast_small(
model_path: str = default_cfgs["fast_small"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> FAST:
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
`_, using a small TextNet backbone.
>>> import numpy as np
>>> from onnxtr.models import fast_small
>>> model = fast_small()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _fast("fast_small", model_path, load_in_8_bit, engine_cfg, **kwargs)
def fast_base(
model_path: str = default_cfgs["fast_base"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> FAST:
"""FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
`_, using a base TextNet backbone.
>>> import numpy as np
>>> from onnxtr.models import fast_base
>>> model = fast_base()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the DBNet architecture
Returns:
text detection architecture
"""
return _fast("fast_base", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/detection/models/linknet.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from scipy.special import expit
from ...engine import Engine, EngineConfig
from ..postprocessor.base import GeneralDetectionPostProcessor
__all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"]
default_cfgs: dict[str, dict[str, Any]] = {
"linknet_resnet18": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx",
},
"linknet_resnet34": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx",
},
"linknet_resnet50": {
"input_shape": (3, 1024, 1024),
"mean": (0.798, 0.785, 0.772),
"std": (0.264, 0.2749, 0.287),
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx",
},
}
class LinkNet(Engine):
"""LinkNet Onnx loader
Args:
model_path: path or url to onnx model file
engine_cfg: configuration for the inference engine
bin_thresh: threshold for binarization of the output feature map
box_thresh: minimal objectness score to consider a box
assume_straight_pages: if True, fit straight bounding boxes only
cfg: the configuration dict of the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
engine_cfg: EngineConfig | None = None,
bin_thresh: float = 0.1,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.cfg = cfg
self.assume_straight_pages = assume_straight_pages
self.postprocessor = GeneralDetectionPostProcessor(
assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
**kwargs: Any,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
prob_map = expit(logits)
if return_model_output:
out["out_map"] = prob_map
out["preds"] = self.postprocessor(prob_map)
return out
def _linknet(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> LinkNet:
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return LinkNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs)
def linknet_resnet18(
model_path: str = default_cfgs["linknet_resnet18"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> LinkNet:
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
`_.
>>> import numpy as np
>>> from onnxtr.models import linknet_resnet18
>>> model = linknet_resnet18()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the LinkNet architecture
Returns:
text detection architecture
"""
return _linknet("linknet_resnet18", model_path, load_in_8_bit, engine_cfg, **kwargs)
def linknet_resnet34(
model_path: str = default_cfgs["linknet_resnet34"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> LinkNet:
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
`_.
>>> import numpy as np
>>> from onnxtr.models import linknet_resnet34
>>> model = linknet_resnet34()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the LinkNet architecture
Returns:
text detection architecture
"""
return _linknet("linknet_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs)
def linknet_resnet50(
model_path: str = default_cfgs["linknet_resnet50"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> LinkNet:
"""LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
`_.
>>> import numpy as np
>>> from onnxtr.models import linknet_resnet50
>>> model = linknet_resnet50()
>>> input_tensor = np.random.rand(1, 3, 1024, 1024)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the LinkNet architecture
Returns:
text detection architecture
"""
return _linknet("linknet_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/detection/postprocessor/__init__.py
================================================
================================================
FILE: onnxtr/models/detection/postprocessor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
# Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization
import cv2
import numpy as np
import pyclipper
from onnxtr.utils import order_points
from ..core import DetectionPostProcessor
__all__ = ["GeneralDetectionPostProcessor"]
class GeneralDetectionPostProcessor(DetectionPostProcessor):
"""Implements a post processor for FAST model.
Args:
bin_thresh: threshold used to binzarized p_map at inference time
box_thresh: minimal objectness score to consider a box
assume_straight_pages: whether the inputs were expected to have horizontal text elements
"""
def __init__(
self,
bin_thresh: float = 0.1,
box_thresh: float = 0.1,
assume_straight_pages: bool = True,
) -> None:
super().__init__(box_thresh, bin_thresh, assume_straight_pages)
self.unclip_ratio = 1.5
def polygon_to_box(
self,
points: np.ndarray,
) -> np.ndarray:
"""Expand a polygon (points) by a factor unclip_ratio, and returns a polygon
Args:
points: The first parameter.
Returns:
a box in absolute coordinates (xmin, ymin, xmax, ymax) or (4, 2) array (quadrangle)
"""
if not self.assume_straight_pages:
# Compute the rectangle polygon enclosing the raw polygon
rect = cv2.minAreaRect(points)
points = cv2.boxPoints(rect)
# Add 1 pixel to correct cv2 approx
area = (rect[1][0] + 1) * (1 + rect[1][1])
length = 2 * (rect[1][0] + rect[1][1]) + 2
else:
area = cv2.contourArea(points)
length = cv2.arcLength(points, closed=True)
distance = area * self.unclip_ratio / length # compute distance to expand polygon
offset = pyclipper.PyclipperOffset()
offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
_points = offset.Execute(distance)
# Take biggest stack of points
idx = 0
if len(_points) > 1:
max_size = 0
for _idx, p in enumerate(_points):
if len(p) > max_size:
idx = _idx
max_size = len(p)
# We ensure that _points can be correctly casted to a ndarray
_points = [_points[idx]]
expanded_points: np.ndarray = np.asarray(_points) # expand polygon
if len(expanded_points) < 1:
return None # type: ignore[return-value]
return (
cv2.boundingRect(expanded_points) # type: ignore[return-value]
if self.assume_straight_pages
else order_points(cv2.boxPoints(cv2.minAreaRect(expanded_points)))
)
def bitmap_to_boxes(
self,
pred: np.ndarray,
bitmap: np.ndarray,
) -> np.ndarray:
"""Compute boxes from a bitmap/pred_map: find connected components then filter boxes
Args:
pred: Pred map from differentiable linknet output
bitmap: Bitmap map computed from pred (binarized)
angle_tol: Comparison tolerance of the angle with the median angle across the page
ratio_tol: Under this limit aspect ratio, we cannot resolve the direction of the crop
Returns:
np tensor boxes for the bitmap, each box is a 6-element list
containing x, y, w, h, alpha, score for the box
"""
height, width = bitmap.shape[:2]
boxes: list[np.ndarray | list[float]] = []
# get contours from connected components on the bitmap
contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours:
# Check whether smallest enclosing bounding box is not too small
if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2):
continue
# Compute objectness
if self.assume_straight_pages:
x, y, w, h = cv2.boundingRect(contour)
points: np.ndarray = np.array([[x, y], [x, y + h], [x + w, y + h], [x + w, y]])
score = self.box_score(pred, points, assume_straight_pages=True)
else:
score = self.box_score(pred, contour, assume_straight_pages=False)
if score < self.box_thresh: # remove polygons with a weak objectness
continue
if self.assume_straight_pages:
_box = self.polygon_to_box(points)
else:
_box = self.polygon_to_box(np.squeeze(contour))
if self.assume_straight_pages:
# compute relative polygon to get rid of img shape
x, y, w, h = _box
xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height
boxes.append([xmin, ymin, xmax, ymax, score])
else:
# compute relative box to get rid of img shape
_box[:, 0] /= width
_box[:, 1] /= height
# Add score to box as (0, score)
boxes.append(np.vstack([_box, np.array([0.0, score])]))
if not self.assume_straight_pages:
return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5, 2), dtype=pred.dtype)
else:
return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype)
================================================
FILE: onnxtr/models/detection/predictor/__init__.py
================================================
from .base import *
================================================
FILE: onnxtr/models/detection/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from onnxtr.models.detection._utils import _remove_padding
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject
__all__ = ["DetectionPredictor"]
class DetectionPredictor(NestedObject):
"""Implements an object able to localize text elements in a document
Args:
pre_processor: transform inputs for easier batched model inference
model: core detection architecture
"""
_children_names: list[str] = ["pre_processor", "model"]
def __init__(
self,
pre_processor: PreProcessor,
model: Any,
) -> None:
self.pre_processor = pre_processor
self.model = model
def __call__(
self,
pages: list[np.ndarray],
return_maps: bool = False,
**kwargs: Any,
) -> list[np.ndarray] | tuple[list[np.ndarray], list[np.ndarray]]:
# Extract parameters from the preprocessor
preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio
symmetric_pad = self.pre_processor.resize.symmetric_pad
assume_straight_pages = self.model.assume_straight_pages
# Dimension check
if any(page.ndim != 3 for page in pages):
raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
processed_batches = self.pre_processor(pages)
predicted_batches = [
self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches
]
# Remove padding from loc predictions
preds = _remove_padding(
pages,
[pred[0] for batch in predicted_batches for pred in batch["preds"]],
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
assume_straight_pages=assume_straight_pages,
)
if return_maps:
seg_maps = [pred for batch in predicted_batches for pred in batch["out_map"]]
return preds, seg_maps
return preds
================================================
FILE: onnxtr/models/detection/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
from .. import detection
from ..engine import EngineConfig
from ..preprocessor import PreProcessor
from .predictor import DetectionPredictor
__all__ = ["detection_predictor"]
ARCHS = [
"db_resnet34",
"db_resnet50",
"db_mobilenet_v3_large",
"linknet_resnet18",
"linknet_resnet34",
"linknet_resnet50",
"fast_tiny",
"fast_small",
"fast_base",
]
def _predictor(
arch: Any,
assume_straight_pages: bool = True,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DetectionPredictor:
if isinstance(arch, str):
if arch not in ARCHS:
raise ValueError(f"unknown architecture '{arch}'")
_model = detection.__dict__[arch](
assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg
)
else:
if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)):
raise ValueError(f"unknown architecture: {type(arch)}")
_model = arch
_model.assume_straight_pages = assume_straight_pages
_model.postprocessor.assume_straight_pages = assume_straight_pages
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
kwargs["batch_size"] = kwargs.get("batch_size", 2)
predictor = DetectionPredictor(
PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
_model,
)
return predictor
def detection_predictor(
arch: Any = "fast_base",
assume_straight_pages: bool = True,
preserve_aspect_ratio: bool = True,
symmetric_pad: bool = True,
batch_size: int = 2,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> DetectionPredictor:
"""Text detection architecture.
>>> import numpy as np
>>> from onnxtr.models import detection_predictor
>>> model = detection_predictor(arch='db_resnet50')
>>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
>>> out = model([input_page])
Args:
arch: name of the architecture or model itself to use (e.g. 'db_resnet50')
assume_straight_pages: If True, fit straight boxes to the page
preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before
running the detection model on it
symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right
batch_size: number of samples the model processes in parallel
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: optional keyword arguments passed to the architecture
Returns:
Detection predictor
"""
return _predictor(
arch=arch,
assume_straight_pages=assume_straight_pages,
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
batch_size=batch_size,
load_in_8_bit=load_in_8_bit,
engine_cfg=engine_cfg,
**kwargs,
)
================================================
FILE: onnxtr/models/engine.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import logging
import os
from collections.abc import Callable
from typing import Any, TypeAlias
import numpy as np
from onnxruntime import (
ExecutionMode,
GraphOptimizationLevel,
InferenceSession,
RunOptions,
SessionOptions,
get_available_providers,
get_device,
)
from onnxruntime.capi._pybind_state import set_default_logger_severity
set_default_logger_severity(int(os.getenv("ORT_LOG_SEVERITY_LEVEL", 4)))
from onnxtr.utils.data import download_from_url
from onnxtr.utils.geometry import shape_translate
__all__ = ["EngineConfig", "RunOptionsProvider"]
RunOptionsProvider: TypeAlias = Callable[[RunOptions], RunOptions]
class EngineConfig:
"""Implements a configuration class for the engine of a model
Args:
providers: list of providers to use for inference ref.: https://onnxruntime.ai/docs/execution-providers/
session_options: configuration for the inference session ref.: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions
"""
def __init__(
self,
providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None,
session_options: SessionOptions | None = None,
run_options_provider: RunOptionsProvider | None = None,
):
self._providers = providers or self._init_providers()
self._session_options = session_options or self._init_sess_opts()
self.run_options_provider = run_options_provider
def _init_providers(self) -> list[tuple[str, dict[str, Any]]]:
providers: Any = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
available_providers = get_available_providers()
logging.info(f"Available providers: {available_providers}")
if "CUDAExecutionProvider" in available_providers and get_device() == "GPU": # pragma: no cover
providers.insert(
0,
(
"CUDAExecutionProvider",
{
"device_id": 0,
"arena_extend_strategy": "kNextPowerOfTwo",
"cudnn_conv_algo_search": "DEFAULT",
"do_copy_in_default_stream": True,
},
),
)
elif "CoreMLExecutionProvider" in available_providers: # pragma: no cover
providers.insert(0, ("CoreMLExecutionProvider", {}))
return providers
def _init_sess_opts(self) -> SessionOptions:
session_options = SessionOptions()
session_options.enable_cpu_mem_arena = True
session_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL
session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
session_options.intra_op_num_threads = -1
session_options.inter_op_num_threads = -1
return session_options
@property
def providers(self) -> list[tuple[str, dict[str, Any]]] | list[str]:
return self._providers
@property
def session_options(self) -> SessionOptions:
return self._session_options
def __repr__(self) -> str:
return f"EngineConfig(providers={self.providers})"
class Engine:
"""Implements an abstract class for the engine of a model
Args:
url: the url to use to download a model if needed
engine_cfg: the configuration of the engine
**kwargs: additional arguments to be passed to `download_from_url`
"""
def __init__(self, url: str, engine_cfg: EngineConfig | None = None, **kwargs: Any) -> None:
engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig()
archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url
# NOTE: older onnxruntime versions require a string path for windows
archive_path = rf"{archive_path}"
# Store model path for each model
self.model_path = archive_path
self.session_options = engine_cfg.session_options
self.providers = engine_cfg.providers
self.run_options_provider = engine_cfg.run_options_provider
self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options)
self.runtime_inputs = self.runtime.get_inputs()[0]
self.tf_exported = int(self.runtime_inputs.shape[-1]) == 3
self.fixed_batch_size: int | str = self.runtime_inputs.shape[
0
] # mostly possible with tensorflow exported models
self.output_name = [output.name for output in self.runtime.get_outputs()]
def run(self, inputs: np.ndarray) -> np.ndarray:
run_options = RunOptions()
if self.run_options_provider is not None:
run_options = self.run_options_provider(run_options)
if self.tf_exported:
inputs = shape_translate(inputs, format="BHWC") # sanity check
else:
inputs = shape_translate(inputs, format="BCHW")
if isinstance(self.fixed_batch_size, int) and self.fixed_batch_size != 0: # dynamic batch size is a string
inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape))
# combine the results
logits = np.concatenate(
[
self.runtime.run(self.output_name, {self.runtime_inputs.name: batch}, run_options=run_options)[0]
for batch in inputs
],
axis=0,
)
else:
logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs}, run_options=run_options)[0]
return shape_translate(logits, format="BHWC")
================================================
FILE: onnxtr/models/factory/__init__.py
================================================
from .hub import *
================================================
FILE: onnxtr/models/factory/hub.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
# Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py
import json
import logging
import shutil
import subprocess
import tempfile
import textwrap
from pathlib import Path
from typing import Any
from huggingface_hub import (
HfApi,
get_token,
hf_hub_download,
login,
)
from onnxtr import models
from onnxtr.models.engine import EngineConfig
__all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
AVAILABLE_ARCHS = {
"classification": models.classification.zoo.ORIENTATION_ARCHS,
"detection": models.detection.zoo.ARCHS,
"recognition": models.recognition.zoo.ARCHS,
}
def login_to_hub() -> None: # pragma: no cover
"""Login to huggingface hub"""
access_token = get_token()
if access_token is not None:
logging.info("Huggingface Hub token found and valid")
login(token=access_token)
else:
login()
# check if git lfs is installed
try:
subprocess.call(["git", "lfs", "version"])
except FileNotFoundError:
raise OSError(
"Looks like you do not have git-lfs installed, please install. \
You can install from https://git-lfs.github.com/. \
Then run `git lfs install` (you only have to do this once)."
)
def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None:
"""Save model and config to disk for pushing to huggingface hub
Args:
model: Onnx model to be saved
save_dir: directory to save model and config
arch: architecture name
task: task name
"""
save_directory = Path(save_dir)
shutil.copy2(model.model_path, save_directory / "model.onnx")
config_path = save_directory / "config.json"
# add model configuration
model_config = model.cfg
model_config["arch"] = arch
model_config["task"] = task
with config_path.open("w") as f:
json.dump(model_config, f, indent=2, ensure_ascii=False)
def push_to_hf_hub(
model: Any, model_name: str, task: str, override: bool = False, **kwargs
) -> None: # pragma: no cover
"""Save model and its configuration on HF hub
>>> from onnxtr.models import login_to_hub, push_to_hf_hub
>>> from onnxtr.models.recognition import crnn_mobilenet_v3_small
>>> login_to_hub()
>>> model = crnn_mobilenet_v3_small()
>>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small')
Args:
model: Onnx model to be saved
model_name: name of the model which is also the repository name
task: task name
override: whether to override the existing model / repo on HF hub
**kwargs: keyword arguments for push_to_hf_hub
"""
run_config = kwargs.get("run_config", None)
arch = kwargs.get("arch", None)
if run_config is None and arch is None:
raise ValueError("run_config or arch must be specified")
if task not in ["classification", "detection", "recognition"]:
raise ValueError("task must be one of classification, detection, recognition")
# default readme
readme = textwrap.dedent(
f"""
---
language:
- en
- fr
license: apache-2.0
---
**Optical Character Recognition made seamless & accessible to anyone, powered by Onnxruntime**
## Task: {task}
https://github.com/felixdittrich92/OnnxTR
### Example usage:
```python
>>> from onnxtr.io import DocumentFile
>>> from onnxtr.models import ocr_predictor, from_hub
>>> img = DocumentFile.from_images([''])
>>> # Load your model from the hub
>>> model = from_hub('onnxtr/my-model')
>>> # Pass it to the predictor
>>> # If your model is a recognition model:
>>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large',
>>> reco_arch=model)
>>> # If your model is a detection model:
>>> predictor = ocr_predictor(det_arch=model,
>>> reco_arch='crnn_mobilenet_v3_small')
>>> # Get your predictions
>>> res = predictor(img)
```
"""
)
# add run configuration to readme if available
if run_config is not None:
arch = run_config.arch
readme += textwrap.dedent(
f"""### Run Configuration
\n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}"""
)
if arch not in AVAILABLE_ARCHS[task]:
raise ValueError(
f"Architecture: {arch} for task: {task} not found.\
\nAvailable architectures: {AVAILABLE_ARCHS}"
)
commit_message = f"Add {model_name} model"
# Create repository
api = HfApi()
api.create_repo(model_name, token=get_token(), exist_ok=False)
# Save model files to a temporary directory
with tempfile.TemporaryDirectory() as tmp_dir:
_save_model_and_config_for_hf_hub(model, tmp_dir, arch=arch, task=task)
readme_path = Path(tmp_dir) / "README.md"
readme_path.write_text(readme)
# Upload all files to the hub
api.upload_folder(
folder_path=tmp_dir,
repo_id=model_name,
commit_message=commit_message,
token=get_token(),
)
def from_hub(repo_id: str, engine_cfg: EngineConfig | None = None, **kwargs: Any):
"""Instantiate & load a pretrained model from HF hub.
>>> from onnxtr.models import from_hub
>>> model = from_hub("onnxtr/my-model")
Args:
repo_id: HuggingFace model hub repo
engine_cfg: configuration for the inference engine (optional)
**kwargs: kwargs of `hf_hub_download`
Returns:
Model loaded with the checkpoint
"""
# Get the config
with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f:
cfg = json.load(f)
model_path = hf_hub_download(repo_id, filename="model.onnx", **kwargs)
arch = cfg["arch"]
task = cfg["task"]
cfg.pop("arch")
cfg.pop("task")
if task == "classification":
model = models.classification.__dict__[arch](model_path, classes=cfg["classes"], engine_cfg=engine_cfg)
elif task == "detection":
model = models.detection.__dict__[arch](model_path, engine_cfg=engine_cfg)
elif task == "recognition":
model = models.recognition.__dict__[arch](
model_path, input_shape=cfg["input_shape"], vocab=cfg["vocab"], engine_cfg=engine_cfg
)
# convert all values which are lists to tuples
for key, value in cfg.items():
if isinstance(value, list):
cfg[key] = tuple(value)
# update model cfg
model.cfg = cfg
return model
================================================
FILE: onnxtr/models/predictor/__init__.py
================================================
from .predictor import *
================================================
FILE: onnxtr/models/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from collections.abc import Callable
from typing import Any
import numpy as np
from onnxtr.models.builder import DocumentBuilder
from onnxtr.models.engine import EngineConfig
from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image
from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds
from ..classification import crop_orientation_predictor, page_orientation_predictor
from ..classification.predictor import OrientationPredictor
from ..detection.zoo import ARCHS as DETECTION_ARCHS
from ..recognition.zoo import ARCHS as RECOGNITION_ARCHS
__all__ = ["_OCRPredictor"]
class _OCRPredictor:
"""Implements an object able to localize and identify text elements in a set of documents
Args:
assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
without rotated textual elements.
straighten_pages: if True, estimates the page general orientation based on the median line orientation.
Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
accordingly. Doing so will improve performances for documents with page-uniform rotations.
preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding)
symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically.
detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
clf_engine_cfg: configuration of the orientation classification engine
**kwargs: keyword args of `DocumentBuilder`
"""
crop_orientation_predictor: OrientationPredictor | None
page_orientation_predictor: OrientationPredictor | None
def __init__(
self,
assume_straight_pages: bool = True,
straighten_pages: bool = False,
preserve_aspect_ratio: bool = True,
symmetric_pad: bool = True,
detect_orientation: bool = False,
load_in_8_bit: bool = False,
clf_engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> None:
self.assume_straight_pages = assume_straight_pages
self.straighten_pages = straighten_pages
self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False)
self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False)
self.crop_orientation_predictor = (
None
if assume_straight_pages
else crop_orientation_predictor(
load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
)
)
self.page_orientation_predictor = (
page_orientation_predictor(
load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled
)
if detect_orientation or straighten_pages or not assume_straight_pages
else None
)
self.doc_builder = DocumentBuilder(**kwargs)
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad
self.hooks: list[Callable] = []
def _general_page_orientations(
self,
pages: list[np.ndarray],
) -> list[tuple[int, float]]:
_, classes, probs = zip(self.page_orientation_predictor(pages)) # type: ignore[misc]
# Flatten to list of tuples with (value, confidence)
page_orientations = [
(orientation, prob)
for page_classes, page_probs in zip(classes, probs)
for orientation, prob in zip(page_classes, page_probs)
]
return page_orientations
def _get_orientations(
self, pages: list[np.ndarray], seg_maps: list[np.ndarray]
) -> tuple[list[tuple[int, float]], list[int]]:
general_pages_orientations = self._general_page_orientations(pages)
origin_page_orientations = [
estimate_orientation(seq_map, general_orientation)
for seq_map, general_orientation in zip(seg_maps, general_pages_orientations)
]
return general_pages_orientations, origin_page_orientations
def _straighten_pages(
self,
pages: list[np.ndarray],
seg_maps: list[np.ndarray],
general_pages_orientations: list[tuple[int, float]] | None = None,
origin_pages_orientations: list[int] | None = None,
) -> list[np.ndarray]:
general_pages_orientations = (
general_pages_orientations if general_pages_orientations else self._general_page_orientations(pages)
)
origin_pages_orientations = (
origin_pages_orientations
if origin_pages_orientations
else [
estimate_orientation(seq_map, general_orientation)
for seq_map, general_orientation in zip(seg_maps, general_pages_orientations)
]
)
return [
# expand if height and width are not equal, afterwards remove padding
remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1]))
for page, angle in zip(pages, origin_pages_orientations)
]
@staticmethod
def _generate_crops(
pages: list[np.ndarray],
loc_preds: list[np.ndarray],
channels_last: bool,
assume_straight_pages: bool = False,
assume_horizontal: bool = False,
) -> list[list[np.ndarray]]:
if assume_straight_pages:
crops = [
extract_crops(page, _boxes[:, :4], channels_last=channels_last)
for page, _boxes in zip(pages, loc_preds)
]
else:
crops = [
extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal)
for page, _boxes in zip(pages, loc_preds)
]
return crops
@staticmethod
def _prepare_crops(
pages: list[np.ndarray],
loc_preds: list[np.ndarray],
channels_last: bool,
assume_straight_pages: bool = False,
assume_horizontal: bool = False,
) -> tuple[list[list[np.ndarray]], list[np.ndarray]]:
crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal)
# Avoid sending zero-sized crops
is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops]
crops = [
[crop for crop, _kept in zip(page_crops, page_kept) if _kept]
for page_crops, page_kept in zip(crops, is_kept)
]
loc_preds = [_boxes[_kept] for _boxes, _kept in zip(loc_preds, is_kept)]
return crops, loc_preds
def _rectify_crops(
self,
crops: list[list[np.ndarray]],
loc_preds: list[np.ndarray],
) -> tuple[list[list[np.ndarray]], list[np.ndarray], list[tuple[int, float]]]:
# Work at a page level
orientations, classes, probs = zip(*[self.crop_orientation_predictor(page_crops) for page_crops in crops]) # type: ignore[misc]
rect_crops = [rectify_crops(page_crops, orientation) for page_crops, orientation in zip(crops, orientations)]
rect_loc_preds = [
rectify_loc_preds(page_loc_preds, orientation) if len(page_loc_preds) > 0 else page_loc_preds
for page_loc_preds, orientation in zip(loc_preds, orientations)
]
# Flatten to list of tuples with (value, confidence)
crop_orientations = [
(orientation, prob)
for page_classes, page_probs in zip(classes, probs)
for orientation, prob in zip(page_classes, page_probs)
]
return rect_crops, rect_loc_preds, crop_orientations # type: ignore[return-value]
@staticmethod
def _process_predictions(
loc_preds: list[np.ndarray],
word_preds: list[tuple[str, float]],
crop_orientations: list[dict[str, Any]],
) -> tuple[list[np.ndarray], list[list[tuple[str, float]]], list[list[dict[str, Any]]]]:
text_preds = []
crop_orientation_preds = []
if len(loc_preds) > 0:
# Text & crop orientation predictions at page level
_idx = 0
for page_boxes in loc_preds:
text_preds.append(word_preds[_idx : _idx + page_boxes.shape[0]])
crop_orientation_preds.append(crop_orientations[_idx : _idx + page_boxes.shape[0]])
_idx += page_boxes.shape[0]
return loc_preds, text_preds, crop_orientation_preds
def add_hook(self, hook: Callable) -> None:
"""Add a hook to the predictor
Args:
hook: a callable that takes as input the `loc_preds` and returns the modified `loc_preds`
"""
self.hooks.append(hook)
def list_archs(self) -> dict[str, list[str]]:
return {"detection_archs": DETECTION_ARCHS, "recognition_archs": RECOGNITION_ARCHS}
================================================
FILE: onnxtr/models/predictor/predictor.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
import numpy as np
from onnxtr.io.elements import Document
from onnxtr.models._utils import get_language
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.engine import EngineConfig
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.utils.geometry import detach_scores
from onnxtr.utils.repr import NestedObject
from .base import _OCRPredictor
__all__ = ["OCRPredictor"]
class OCRPredictor(NestedObject, _OCRPredictor):
"""Implements an object able to localize and identify text elements in a set of documents
Args:
det_predictor: detection module
reco_predictor: recognition module
assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
without rotated textual elements.
straighten_pages: if True, estimates the page general orientation based on the median line orientation.
Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped
accordingly. Doing so will improve performances for documents with page-uniform rotations.
detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
clf_engine_cfg: configuration of the orientation classification engine
**kwargs: keyword args of `DocumentBuilder`
"""
_children_names = ["det_predictor", "reco_predictor", "doc_builder"]
def __init__(
self,
det_predictor: DetectionPredictor,
reco_predictor: RecognitionPredictor,
assume_straight_pages: bool = True,
straighten_pages: bool = False,
preserve_aspect_ratio: bool = True,
symmetric_pad: bool = True,
detect_orientation: bool = False,
detect_language: bool = False,
clf_engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> None:
self.det_predictor = det_predictor
self.reco_predictor = reco_predictor
_OCRPredictor.__init__(
self,
assume_straight_pages,
straighten_pages,
preserve_aspect_ratio,
symmetric_pad,
detect_orientation,
clf_engine_cfg=clf_engine_cfg,
**kwargs,
)
self.detect_orientation = detect_orientation
self.detect_language = detect_language
def __call__(
self,
pages: list[np.ndarray],
**kwargs: Any,
) -> Document:
# Dimension check
if any(page.ndim != 3 for page in pages):
raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.")
origin_page_shapes = [page.shape[:2] for page in pages]
# Localize text elements
loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs)
# Detect document rotation and rotate pages
seg_maps = [
np.where(out_map > getattr(self.det_predictor.model.postprocessor, "bin_thresh"), 255, 0).astype(np.uint8)
for out_map in out_maps
]
if self.detect_orientation:
general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps)
orientations = [
{"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations
]
else:
orientations = None
general_pages_orientations = None
origin_pages_orientations = None
if self.straighten_pages:
pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations)
# update page shapes after straightening
origin_page_shapes = [page.shape[:2] for page in pages]
# forward again to get predictions on straight pages
loc_preds = self.det_predictor(pages, **kwargs) # type: ignore[assignment]
# Detach objectness scores from loc_preds
loc_preds, objectness_scores = detach_scores(loc_preds) # type: ignore[arg-type]
# Apply hooks to loc_preds if any
for hook in self.hooks:
loc_preds = hook(loc_preds)
# Crop images
crops, loc_preds = self._prepare_crops(
pages,
loc_preds,
channels_last=True,
assume_straight_pages=self.assume_straight_pages,
assume_horizontal=self._page_orientation_disabled,
)
# Rectify crop orientation and get crop orientation predictions
crop_orientations: Any = []
if not self.assume_straight_pages:
crops, loc_preds, _crop_orientations = self._rectify_crops(crops, loc_preds)
crop_orientations = [
{"value": orientation[0], "confidence": orientation[1]} for orientation in _crop_orientations
]
# Identify character sequences
word_preds = self.reco_predictor([crop for page_crops in crops for crop in page_crops], **kwargs)
if not crop_orientations:
crop_orientations = [{"value": 0, "confidence": None} for _ in word_preds]
boxes, text_preds, crop_orientations = self._process_predictions(loc_preds, word_preds, crop_orientations)
if self.detect_language:
languages = [get_language(" ".join([item[0] for item in text_pred])) for text_pred in text_preds]
languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages]
else:
languages_dict = None
out = self.doc_builder(
pages,
boxes,
objectness_scores,
text_preds,
origin_page_shapes,
crop_orientations,
orientations,
languages_dict,
)
return out
================================================
FILE: onnxtr/models/preprocessor/__init__.py
================================================
from .base import *
================================================
FILE: onnxtr/models/preprocessor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import math
from typing import Any
import numpy as np
from onnxtr.transforms import Normalize, Resize
from onnxtr.utils.geometry import shape_translate
from onnxtr.utils.multithreading import multithread_exec
from onnxtr.utils.repr import NestedObject
__all__ = ["PreProcessor"]
class PreProcessor(NestedObject):
"""Implements an abstract preprocessor object which performs casting, resizing, batching and normalization.
Args:
output_size: expected size of each page in format (H, W)
batch_size: the size of page batches
mean: mean value of the training distribution by channel
std: standard deviation of the training distribution by channel
**kwargs: additional arguments for the resizing operation
"""
_children_names: list[str] = ["resize", "normalize"]
def __init__(
self,
output_size: tuple[int, int],
batch_size: int,
mean: tuple[float, float, float] = (0.5, 0.5, 0.5),
std: tuple[float, float, float] = (1.0, 1.0, 1.0),
**kwargs: Any,
) -> None:
self.batch_size = batch_size
self.resize = Resize(output_size, **kwargs)
self.normalize = Normalize(mean, std)
def batch_inputs(self, samples: list[np.ndarray]) -> list[np.ndarray]:
"""Gather samples into batches for inference purposes
Args:
samples: list of samples (tf.Tensor)
Returns:
list of batched samples
"""
num_batches = int(math.ceil(len(samples) / self.batch_size))
batches = [
np.stack(samples[idx * self.batch_size : min((idx + 1) * self.batch_size, len(samples))], axis=0)
for idx in range(int(num_batches))
]
return batches
def sample_transforms(self, x: np.ndarray) -> np.ndarray:
if x.ndim != 3:
raise AssertionError("expected list of 3D Tensors")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = shape_translate(x, "HWC")
# Resizing
x = self.resize(x)
# Data type & 255 division
if x.dtype == np.uint8:
x = x.astype(np.float32) / 255.0
return x
def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[np.ndarray]:
"""Prepare document data for model forwarding
Args:
x: list of images (np.array) or tensors (already resized and batched)
Returns:
list of page batches
"""
# Input type check
if isinstance(x, np.ndarray):
if x.ndim != 4:
raise AssertionError("expected 4D Tensor")
if isinstance(x, np.ndarray):
if x.dtype not in (np.uint8, np.float32):
raise TypeError("unsupported data type for numpy.ndarray")
x = shape_translate(x, "BHWC")
# Resizing
if (x.shape[1], x.shape[2]) != self.resize.output_size:
x = np.array([self.resize(sample) for sample in x])
# Data type & 255 division
if x.dtype == np.uint8:
x = x.astype(np.float32) / 255.0
batches = [x]
elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x):
# Sample transform (to tensor, resize)
samples = list(multithread_exec(self.sample_transforms, x))
# Batching
batches = self.batch_inputs(samples)
else:
raise TypeError(f"invalid input type: {type(x)}")
# Batch transforms (normalize)
batches = list(multithread_exec(self.normalize, batches))
return batches
================================================
FILE: onnxtr/models/recognition/__init__.py
================================================
from .models import *
from .zoo import *
================================================
FILE: onnxtr/models/recognition/core.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from onnxtr.utils.repr import NestedObject
__all__ = ["RecognitionPostProcessor"]
class RecognitionPostProcessor(NestedObject):
"""Abstract class to postprocess the raw output of the model
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(
self,
vocab: str,
) -> None:
self.vocab = vocab
self._embedding = list(self.vocab) + [""]
def extra_repr(self) -> str:
return f"vocab_size={len(self.vocab)}"
================================================
FILE: onnxtr/models/recognition/models/__init__.py
================================================
from .crnn import *
from .sar import *
from .master import *
from .vitstr import *
from .parseq import *
from .viptr import *
================================================
FILE: onnxtr/models/recognition/models/crnn.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from itertools import groupby
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"]
default_cfgs: dict[str, dict[str, Any]] = {
"crnn_vgg16_bn": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn-743599aa.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn_static_8_bit-df1b594d.onnx",
},
"crnn_mobilenet_v3_small": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_small_static_8_bit-4949006f.onnx",
},
"crnn_mobilenet_v3_large": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_large-d42e8185.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_large_static_8_bit-459e856d.onnx",
},
}
class CRNNPostProcessor(RecognitionPostProcessor):
"""Postprocess raw prediction of the model (logits) to a list of words using CTC decoding
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(self, vocab):
self.vocab = vocab
def decode_sequence(self, sequence, vocab):
return "".join([vocab[int(char)] for char in sequence])
def ctc_best_path(
self,
logits,
vocab,
blank=0,
):
"""Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
`_.
Args:
logits: model output, shape: N x T x C
vocab: vocabulary to use
blank: index of blank label
Returns:
A list of tuples: (word, confidence)
"""
# Gather the most confident characters, and assign the smallest conf among those to the sequence prob
probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1)
# collapse best path (using itertools.groupby), map to chars, join char list to string
words = [
self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
for seq in np.argmax(logits, axis=-1)
]
return list(zip(words, probs.astype(float).tolist()))
def __call__(self, logits):
"""Performs decoding of raw output with CTC and decoding of CTC predictions
with label_to_idx mapping dictionnary
Args:
logits: raw output of the model, shape (N, C + 1, seq_len)
Returns:
A tuple of 2 lists: a list of str (words) and a list of float (probs)
"""
# Decode CTC
return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))
class CRNN(Engine):
"""CRNN Onnx loader
Args:
model_path: path or url to onnx model file
vocab: vocabulary used for encoding
engine_cfg: configuration for the inference engine
cfg: configuration dictionary
**kwargs: additional arguments to be passed to `Engine`
"""
_children_names: list[str] = ["postprocessor"]
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = CRNNPostProcessor(self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
# Post-process
out["preds"] = self.postprocessor(logits)
return out
def _crnn(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> CRNN:
kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
_cfg = deepcopy(default_cfgs[arch])
_cfg["vocab"] = kwargs["vocab"]
_cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return CRNN(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def crnn_vgg16_bn(
model_path: str = default_cfgs["crnn_vgg16_bn"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> CRNN:
"""CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based
Sequence Recognition and Its Application to Scene Text Recognition" `_.
>>> import numpy as np
>>> from onnxtr.models import crnn_vgg16_bn
>>> model = crnn_vgg16_bn()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the CRNN architecture
Returns:
text recognition architecture
"""
return _crnn("crnn_vgg16_bn", model_path, load_in_8_bit, engine_cfg, **kwargs)
def crnn_mobilenet_v3_small(
model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> CRNN:
"""CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based
Sequence Recognition and Its Application to Scene Text Recognition" `_.
>>> import numpy as np
>>> from onnxtr.models import crnn_mobilenet_v3_small
>>> model = crnn_mobilenet_v3_small()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the CRNN architecture
Returns:
text recognition architecture
"""
return _crnn("crnn_mobilenet_v3_small", model_path, load_in_8_bit, engine_cfg, **kwargs)
def crnn_mobilenet_v3_large(
model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> CRNN:
"""CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based
Sequence Recognition and Its Application to Scene Text Recognition" `_.
>>> import numpy as np
>>> from onnxtr.models import crnn_mobilenet_v3_large
>>> model = crnn_mobilenet_v3_large()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the CRNN architecture
Returns:
text recognition architecture
"""
return _crnn("crnn_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/models/master.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["MASTER", "master"]
default_cfgs: dict[str, dict[str, Any]] = {
"master": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/master-b1287fcd.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/master_dynamic_8_bit-d8bd8206.onnx",
},
}
class MASTER(Engine):
"""MASTER Onnx loader
Args:
model_path: path or url to onnx model file
vocab: vocabulary, (without EOS, SOS, PAD)
engine_cfg: configuration for the inference engine
cfg: dictionary containing information about the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = MASTERPostProcessor(vocab=self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
"""Call function
Args:
x: images
return_model_output: if True, return logits
Returns:
A dictionnary containing eventually logits and predictions.
"""
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
out["preds"] = self.postprocessor(logits)
return out
class MASTERPostProcessor(RecognitionPostProcessor):
"""Post-processor for the MASTER model
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(
self,
vocab: str,
) -> None:
super().__init__(vocab)
self._embedding = list(vocab) + [""] + [""] + [""]
def __call__(self, logits: np.ndarray) -> list[tuple[str, float]]:
# compute pred with argmax for attention models
out_idxs = np.argmax(logits, axis=-1)
# N x L
probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1)
# Take the minimum confidence of the sequence
probs = np.min(probs, axis=1)
word_values = [
"".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs
]
return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))
def _master(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> MASTER:
# Patch the config
_cfg = deepcopy(default_cfgs[arch])
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
_cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
kwargs["vocab"] = _cfg["vocab"]
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
return MASTER(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def master(
model_path: str = default_cfgs["master"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> MASTER:
"""MASTER as described in paper: `_.
>>> import numpy as np
>>> from onnxtr.models import master
>>> model = master()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keywoard arguments passed to the MASTER architecture
Returns:
text recognition architecture
"""
return _master("master", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/models/parseq.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["PARSeq", "parseq"]
default_cfgs: dict[str, dict[str, Any]] = {
"parseq": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/parseq_dynamic_8_bit-5b04d9f7.onnx",
},
}
class PARSeq(Engine):
"""PARSeq Onnx loader
Args:
model_path: path to onnx model file
vocab: vocabulary used for encoding
engine_cfg: configuration for the inference engine
cfg: dictionary containing information about the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = PARSeqPostProcessor(vocab=self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
out["preds"] = self.postprocessor(logits)
return out
class PARSeqPostProcessor(RecognitionPostProcessor):
"""Post processor for PARSeq architecture
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(
self,
vocab: str,
) -> None:
super().__init__(vocab)
self._embedding = list(vocab) + ["", "", ""]
def __call__(self, logits):
# compute pred with argmax for attention models
out_idxs = np.argmax(logits, axis=-1)
preds_prob = softmax(logits, axis=-1).max(axis=-1)
word_values = [
"".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs
]
# compute probabilties for each word up to the EOS token
probs = [
preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0
for i, word in enumerate(word_values)
]
return list(zip(word_values, probs))
def _parseq(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> PARSeq:
# Patch the config
_cfg = deepcopy(default_cfgs[arch])
_cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
kwargs["vocab"] = _cfg["vocab"]
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return PARSeq(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def parseq(
model_path: str = default_cfgs["parseq"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> PARSeq:
"""PARSeq architecture from
`"Scene Text Recognition with Permuted Autoregressive Sequence Models" `_.
>>> import numpy as np
>>> from onnxtr.models import parseq
>>> model = parseq()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the PARSeq architecture
Returns:
text recognition architecture
"""
return _parseq("parseq", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/models/sar.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["SAR", "sar_resnet31"]
default_cfgs: dict[str, dict[str, Any]] = {
"sar_resnet31": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/sar_resnet31-395f8005.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/sar_resnet31_static_8_bit-c07316bc.onnx",
},
}
class SAR(Engine):
"""SAR Onnx loader
Args:
model_path: path to onnx model file
vocab: vocabulary used for encoding
engine_cfg: configuration for the inference engine
cfg: dictionary containing information about the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = SARPostProcessor(self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
out["preds"] = self.postprocessor(logits)
return out
class SARPostProcessor(RecognitionPostProcessor):
"""Post processor for SAR architectures
Args:
embedding: string containing the ordered sequence of supported characters
"""
def __init__(
self,
vocab: str,
) -> None:
super().__init__(vocab)
self._embedding = list(self.vocab) + [""]
def __call__(self, logits):
# compute pred with argmax for attention models
out_idxs = np.argmax(logits, axis=-1)
# N x L
probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1)
# Take the minimum confidence of the sequence
probs = np.min(probs, axis=1)
word_values = [
"".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs
]
return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist()))
def _sar(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> SAR:
# Patch the config
_cfg = deepcopy(default_cfgs[arch])
_cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
kwargs["vocab"] = _cfg["vocab"]
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return SAR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def sar_resnet31(
model_path: str = default_cfgs["sar_resnet31"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> SAR:
"""SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong
Baseline for Irregular Text Recognition" `_.
>>> import numpy as np
>>> from onnxtr.models import sar_resnet31
>>> model = sar_resnet31()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the SAR architecture
Returns:
text recognition architecture
"""
return _sar("sar_resnet31", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/models/viptr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import logging
from copy import deepcopy
from itertools import groupby
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["VIPTR", "viptr_tiny"]
default_cfgs: dict[str, dict[str, Any]] = {
"viptr_tiny": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx",
},
}
class VIPTRPostProcessor(RecognitionPostProcessor):
"""Postprocess raw prediction of the model (logits) to a list of words using CTC decoding
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(self, vocab):
self.vocab = vocab
def decode_sequence(self, sequence, vocab):
return "".join([vocab[int(char)] for char in sequence])
def ctc_best_path(
self,
logits,
vocab,
blank=0,
):
"""Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
`_.
Args:
logits: model output, shape: N x T x C
vocab: vocabulary to use
blank: index of blank label
Returns:
A list of tuples: (word, confidence)
"""
# Gather the most confident characters, and assign the smallest conf among those to the sequence prob
probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1)
# collapse best path (using itertools.groupby), map to chars, join char list to string
words = [
self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
for seq in np.argmax(logits, axis=-1)
]
return list(zip(words, probs.astype(float).tolist()))
def __call__(self, logits):
"""Performs decoding of raw output with CTC and decoding of CTC predictions
with label_to_idx mapping dictionnary
Args:
logits: raw output of the model, shape (N, C + 1, seq_len)
Returns:
A tuple of 2 lists: a list of str (words) and a list of float (probs)
"""
# Decode CTC
return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))
class VIPTR(Engine):
"""VIPTR Onnx loader
Args:
model_path: path or url to onnx model file
vocab: vocabulary used for encoding
engine_cfg: configuration for the inference engine
cfg: configuration dictionary
**kwargs: additional arguments to be passed to `Engine`
"""
_children_names: list[str] = ["postprocessor"]
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = VIPTRPostProcessor(self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
# Post-process
out["preds"] = self.postprocessor(logits)
return out
def _viptr(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> VIPTR:
if load_in_8_bit:
logging.warning("VIPTR models do not support 8-bit quantization yet. Loading full precision model...")
kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
_cfg = deepcopy(default_cfgs[arch])
_cfg["vocab"] = kwargs["vocab"]
_cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return VIPTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def viptr_tiny(
model_path: str = default_cfgs["viptr_tiny"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> VIPTR:
"""VIPTR as described in `"A Vision Permutable Extractor for Fast and Efficient
Scene Text Recognition" `_.
>>> import numpy as np
>>> from onnxtr.models import viptr_tiny
>>> model = viptr_tiny()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the VIPTR architecture
Returns:
text recognition architecture
"""
return _viptr("viptr_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/models/vitstr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from typing import Any
import numpy as np
from scipy.special import softmax
from onnxtr.utils import VOCABS
from ...engine import Engine, EngineConfig
from ..core import RecognitionPostProcessor
__all__ = ["ViTSTR", "vitstr_small", "vitstr_base"]
default_cfgs: dict[str, dict[str, Any]] = {
"vitstr_small": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_small-3ff9c500.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_small_dynamic_8_bit-bec6c796.onnx",
},
"vitstr_base": {
"mean": (0.694, 0.695, 0.693),
"std": (0.299, 0.296, 0.301),
"input_shape": (3, 32, 128),
"vocab": VOCABS["french"],
"url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_base-ff62f5be.onnx",
"url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_base_dynamic_8_bit-976c7cd6.onnx",
},
}
class ViTSTR(Engine):
"""ViTSTR Onnx loader
Args:
model_path: path to onnx model file
vocab: vocabulary used for encoding
engine_cfg: configuration for the inference engine
cfg: dictionary containing information about the model
**kwargs: additional arguments to be passed to `Engine`
"""
def __init__(
self,
model_path: str,
vocab: str,
engine_cfg: EngineConfig | None = None,
cfg: dict[str, Any] | None = None,
**kwargs: Any,
) -> None:
super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs)
self.vocab = vocab
self.cfg = cfg
self.postprocessor = ViTSTRPostProcessor(vocab=self.vocab)
def __call__(
self,
x: np.ndarray,
return_model_output: bool = False,
) -> dict[str, Any]:
logits = self.run(x)
out: dict[str, Any] = {}
if return_model_output:
out["out_map"] = logits
out["preds"] = self.postprocessor(logits)
return out
class ViTSTRPostProcessor(RecognitionPostProcessor):
"""Post processor for ViTSTR architecture
Args:
vocab: string containing the ordered sequence of supported characters
"""
def __init__(
self,
vocab: str,
) -> None:
super().__init__(vocab)
self._embedding = list(vocab) + ["", ""]
def __call__(self, logits):
# compute pred with argmax for attention models
out_idxs = np.argmax(logits, axis=-1)
preds_prob = softmax(logits, axis=-1).max(axis=-1)
word_values = [
"".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs
]
# compute probabilties for each word up to the EOS token
probs = [
preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0
for i, word in enumerate(word_values)
]
return list(zip(word_values, probs))
def _vitstr(
arch: str,
model_path: str,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> ViTSTR:
# Patch the config
_cfg = deepcopy(default_cfgs[arch])
_cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
_cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
kwargs["vocab"] = _cfg["vocab"]
# Patch the url
model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path
# Build the model
return ViTSTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs)
def vitstr_small(
model_path: str = default_cfgs["vitstr_small"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> ViTSTR:
"""ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
`_.
>>> import numpy as np
>>> from onnxtr.models import vitstr_small
>>> model = vitstr_small()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the ViTSTR architecture
Returns:
text recognition architecture
"""
return _vitstr("vitstr_small", model_path, load_in_8_bit, engine_cfg, **kwargs)
def vitstr_base(
model_path: str = default_cfgs["vitstr_base"]["url"],
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> ViTSTR:
"""ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition"
`_.
>>> import numpy as np
>>> from onnxtr.models import vitstr_base
>>> model = vitstr_base()
>>> input_tensor = np.random.rand(1, 3, 32, 128)
>>> out = model(input_tensor)
Args:
model_path: path to onnx model file, defaults to url in default_cfgs
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration for the inference engine
**kwargs: keyword arguments of the ViTSTR architecture
Returns:
text recognition architecture
"""
return _vitstr("vitstr_base", model_path, load_in_8_bit, engine_cfg, **kwargs)
================================================
FILE: onnxtr/models/recognition/predictor/__init__.py
================================================
from .base import *
================================================
FILE: onnxtr/models/recognition/predictor/_utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import math
import numpy as np
from ..utils import merge_multi_strings
__all__ = ["split_crops", "remap_preds"]
def split_crops(
crops: list[np.ndarray],
max_ratio: float,
target_ratio: int,
split_overlap_ratio: float,
channels_last: bool = True,
) -> tuple[list[np.ndarray], list[int | tuple[int, int, float]], bool]:
"""
Split crops horizontally if they exceed a given aspect ratio.
Args:
crops: List of image crops (H, W, C) if channels_last else (C, H, W).
max_ratio: Aspect ratio threshold above which crops are split.
target_ratio: Target aspect ratio after splitting (e.g., 4 for 128x32).
split_overlap_ratio: Desired overlap between splits (as a fraction of split width).
channels_last: Whether the crops are in channels-last format.
Returns:
A tuple containing:
- The new list of crops (possibly with splits),
- A mapping indicating how to reassemble predictions,
- A boolean indicating whether remapping is required.
"""
if split_overlap_ratio <= 0.0 or split_overlap_ratio >= 1.0:
raise ValueError(f"Valid range for split_overlap_ratio is (0.0, 1.0), but is: {split_overlap_ratio}")
remap_required = False
new_crops: list[np.ndarray] = []
crop_map: list[int | tuple[int, int, float]] = []
for crop in crops:
h, w = crop.shape[:2] if channels_last else crop.shape[-2:]
aspect_ratio = w / h
if aspect_ratio > max_ratio:
split_width = max(1, math.ceil(h * target_ratio))
overlap_width = max(0, math.floor(split_width * split_overlap_ratio))
splits, last_overlap = _split_horizontally(crop, split_width, overlap_width, channels_last)
# Remove any empty splits
splits = [s for s in splits if all(dim > 0 for dim in s.shape)]
if splits:
crop_map.append((len(new_crops), len(new_crops) + len(splits), last_overlap))
new_crops.extend(splits)
remap_required = True
else:
# Fallback: treat it as a single crop
crop_map.append(len(new_crops))
new_crops.append(crop)
else:
crop_map.append(len(new_crops))
new_crops.append(crop)
return new_crops, crop_map, remap_required
def _split_horizontally(
image: np.ndarray, split_width: int, overlap_width: int, channels_last: bool
) -> tuple[list[np.ndarray], float]:
"""
Horizontally split a single image with overlapping regions.
Args:
image: The image to split (H, W, C) if channels_last else (C, H, W).
split_width: Width of each split.
overlap_width: Width of the overlapping region.
channels_last: Whether the image is in channels-last format.
Returns:
- A list of horizontal image slices.
- The actual overlap ratio of the last split.
"""
image_width = image.shape[1] if channels_last else image.shape[-1]
if image_width <= split_width:
return [image], 0.0
# Compute start columns for each split
step = split_width - overlap_width
starts = list(range(0, image_width - split_width + 1, step))
# Ensure the last patch reaches the end of the image
if starts[-1] + split_width < image_width:
starts.append(image_width - split_width)
splits = []
for start_col in starts:
end_col = start_col + split_width
if channels_last:
split = image[:, start_col:end_col, :]
else:
split = image[:, :, start_col:end_col]
splits.append(split)
# Calculate the last overlap ratio, if only one split no overlap
last_overlap = 0
if len(starts) > 1:
last_overlap = (starts[-2] + split_width) - starts[-1]
last_overlap_ratio = last_overlap / split_width if split_width else 0.0
return splits, last_overlap_ratio
def remap_preds(
preds: list[tuple[str, float]],
crop_map: list[int | tuple[int, int, float]],
overlap_ratio: float,
) -> list[tuple[str, float]]:
"""
Reconstruct predictions from possibly split crops.
Args:
preds: List of (text, confidence) tuples from each crop.
crop_map: Map returned by `split_crops`.
overlap_ratio: Overlap ratio used during splitting.
Returns:
List of merged (text, confidence) tuples corresponding to original crops.
"""
remapped = []
for item in crop_map:
if isinstance(item, int):
remapped.append(preds[item])
else:
start_idx, end_idx, last_overlap = item
text_parts, confidences = zip(*preds[start_idx:end_idx])
merged_text = merge_multi_strings(list(text_parts), overlap_ratio, last_overlap)
merged_conf = sum(confidences) / len(confidences) # average confidence
remapped.append((merged_text, merged_conf))
return remapped
================================================
FILE: onnxtr/models/recognition/predictor/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from collections.abc import Sequence
from typing import Any
import numpy as np
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.repr import NestedObject
from ._utils import remap_preds, split_crops
__all__ = ["RecognitionPredictor"]
class RecognitionPredictor(NestedObject):
"""Implements an object able to identify character sequences in images
Args:
pre_processor: transform inputs for easier batched model inference
model: core recognition architecture
split_wide_crops: wether to use crop splitting for high aspect ratio crops
"""
def __init__(
self,
pre_processor: PreProcessor,
model: Any,
split_wide_crops: bool = True,
) -> None:
super().__init__()
self.pre_processor = pre_processor
self.model = model
self.split_wide_crops = split_wide_crops
self.critical_ar = 8 # Critical aspect ratio
self.overlap_ratio = 0.5 # Ratio of overlap between neighboring crops
self.target_ar = 6 # Target aspect ratio
def __call__(
self,
crops: Sequence[np.ndarray],
**kwargs: Any,
) -> list[tuple[str, float]]:
if len(crops) == 0:
return []
# Dimension check
if any(crop.ndim != 3 for crop in crops):
raise ValueError("incorrect input shape: all crops are expected to be multi-channel 2D images.")
# Split crops that are too wide
remapped = False
if self.split_wide_crops:
new_crops, crop_map, remapped = split_crops(
crops, # type: ignore[arg-type]
self.critical_ar,
self.target_ar,
self.overlap_ratio,
True,
)
if remapped:
crops = new_crops
# Resize & batch them
processed_batches = self.pre_processor(crops) # type: ignore[arg-type]
# Forward it
raw = [self.model(batch, **kwargs)["preds"] for batch in processed_batches]
# Process outputs
out = [charseq for batch in raw for charseq in batch]
# Remap crops
if self.split_wide_crops and remapped:
out = remap_preds(out, crop_map, self.overlap_ratio)
return out
================================================
FILE: onnxtr/models/recognition/utils.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from rapidfuzz.distance import Hamming
__all__ = ["merge_strings", "merge_multi_strings"]
def merge_strings(a: str, b: str, overlap_ratio: float) -> str:
"""Merges 2 character sequences in the best way to maximize the alignment of their overlapping characters.
Args:
a: first char seq, suffix should be similar to b's prefix.
b: second char seq, prefix should be similar to a's suffix.
overlap_ratio: estimated ratio of overlapping characters.
Returns:
A merged character sequence.
Example::
>>> from doctr.models.recognition.utils import merge_strings
>>> merge_strings('abcd', 'cdefgh', 0.5)
'abcdefgh'
>>> merge_strings('abcdi', 'cdefgh', 0.5)
'abcdefgh'
"""
seq_len = min(len(a), len(b))
if seq_len <= 1: # One sequence is empty or will be after cropping in next step, return both to keep data
return a + b
a_crop, b_crop = a[:-1], b[1:] # Remove last letter of "a" and first of "b", because they might be cut off
max_overlap = min(len(a_crop), len(b_crop))
# Compute Hamming distances for all possible overlaps
scores = [Hamming.distance(a_crop[-i:], b_crop[:i], processor=None) for i in range(1, max_overlap + 1)]
# Find zero-score matches
zero_matches = [i for i, score in enumerate(scores) if score == 0]
expected_overlap = round(len(b) * overlap_ratio) - 3 # adjust for cropping and index
# Case 1: One perfect match - exactly one zero score - just merge there
if len(zero_matches) == 1:
i = zero_matches[0]
return a_crop + b_crop[i + 1 :]
# Case 2: Multiple perfect matches - likely due to repeated characters.
# Use the estimated overlap length to choose the match closest to the expected alignment.
elif len(zero_matches) > 1:
best_i = min(zero_matches, key=lambda x: abs(x - expected_overlap))
return a_crop + b_crop[best_i + 1 :]
# Case 3: Absence of zero scores indicates that the same character in the image was recognized differently OR that
# the overlap was too small and we just need to merge the crops fully
if expected_overlap < -1:
return a + b
elif expected_overlap < 0:
return a_crop + b_crop
# Find best overlap by minimizing Hamming distance + distance from expected overlap size
combined_scores = [score + abs(i - expected_overlap) for i, score in enumerate(scores)]
best_i = combined_scores.index(min(combined_scores))
return a_crop + b_crop[best_i + 1 :]
def merge_multi_strings(seq_list: list[str], overlap_ratio: float, last_overlap_ratio: float) -> str:
"""
Merges consecutive string sequences with overlapping characters.
Args:
seq_list: list of sequences to merge. Sequences need to be ordered from left to right.
overlap_ratio: Estimated ratio of overlapping letters between neighboring strings.
last_overlap_ratio: Estimated ratio of overlapping letters for the last element in seq_list.
Returns:
A merged character sequence
Example::
>>> from doctr.models.recognition.utils import merge_multi_strings
>>> merge_multi_strings(['abc', 'bcdef', 'difghi', 'aijkl'], 0.5, 0.1)
'abcdefghijkl'
"""
if not seq_list:
return ""
result = seq_list[0]
for i in range(1, len(seq_list)):
text_b = seq_list[i]
ratio = last_overlap_ratio if i == len(seq_list) - 1 else overlap_ratio
result = merge_strings(result, text_b, ratio)
return result
================================================
FILE: onnxtr/models/recognition/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
from .. import recognition
from ..engine import EngineConfig
from ..preprocessor import PreProcessor
from .predictor import RecognitionPredictor
__all__ = ["recognition_predictor"]
ARCHS: list[str] = [
"crnn_vgg16_bn",
"crnn_mobilenet_v3_small",
"crnn_mobilenet_v3_large",
"sar_resnet31",
"master",
"vitstr_small",
"vitstr_base",
"parseq",
"viptr_tiny",
]
def _predictor(
arch: Any, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any
) -> RecognitionPredictor:
if isinstance(arch, str):
if arch not in ARCHS:
raise ValueError(f"unknown architecture '{arch}'")
_model = recognition.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg)
else:
if not isinstance(
arch,
(
recognition.CRNN,
recognition.SAR,
recognition.MASTER,
recognition.ViTSTR,
recognition.PARSeq,
recognition.VIPTR,
),
):
raise ValueError(f"unknown architecture: {type(arch)}")
_model = arch
kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
kwargs["std"] = kwargs.get("std", _model.cfg["std"])
kwargs["batch_size"] = kwargs.get("batch_size", 1024)
input_shape = _model.cfg["input_shape"][1:]
predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)
return predictor
def recognition_predictor(
arch: Any = "crnn_vgg16_bn",
symmetric_pad: bool = False,
batch_size: int = 128,
load_in_8_bit: bool = False,
engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> RecognitionPredictor:
"""Text recognition architecture.
Example::
>>> import numpy as np
>>> from onnxtr.models import recognition_predictor
>>> model = recognition_predictor()
>>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8)
>>> out = model([input_page])
Args:
arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn')
symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right
batch_size: number of samples the model processes in parallel
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
engine_cfg: configuration of inference engine
**kwargs: optional parameters to be passed to the architecture
Returns:
Recognition predictor
"""
return _predictor(
arch=arch,
symmetric_pad=symmetric_pad,
batch_size=batch_size,
load_in_8_bit=load_in_8_bit,
engine_cfg=engine_cfg,
**kwargs,
)
================================================
FILE: onnxtr/models/zoo.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from typing import Any
from .detection.zoo import detection_predictor
from .engine import EngineConfig
from .predictor import OCRPredictor
from .recognition.zoo import recognition_predictor
__all__ = ["ocr_predictor"]
def _predictor(
det_arch: Any,
reco_arch: Any,
assume_straight_pages: bool = True,
preserve_aspect_ratio: bool = True,
symmetric_pad: bool = True,
det_bs: int = 2,
reco_bs: int = 512,
detect_orientation: bool = False,
straighten_pages: bool = False,
detect_language: bool = False,
load_in_8_bit: bool = False,
det_engine_cfg: EngineConfig | None = None,
reco_engine_cfg: EngineConfig | None = None,
clf_engine_cfg: EngineConfig | None = None,
**kwargs,
) -> OCRPredictor:
# Detection
det_predictor = detection_predictor(
det_arch,
batch_size=det_bs,
assume_straight_pages=assume_straight_pages,
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
load_in_8_bit=load_in_8_bit,
engine_cfg=det_engine_cfg,
)
# Recognition
reco_predictor = recognition_predictor(
reco_arch,
batch_size=reco_bs,
load_in_8_bit=load_in_8_bit,
engine_cfg=reco_engine_cfg,
)
return OCRPredictor(
det_predictor,
reco_predictor,
assume_straight_pages=assume_straight_pages,
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
detect_orientation=detect_orientation,
straighten_pages=straighten_pages,
detect_language=detect_language,
clf_engine_cfg=clf_engine_cfg,
**kwargs,
)
def ocr_predictor(
det_arch: Any = "fast_base",
reco_arch: Any = "crnn_vgg16_bn",
assume_straight_pages: bool = True,
preserve_aspect_ratio: bool = True,
symmetric_pad: bool = True,
export_as_straight_boxes: bool = False,
detect_orientation: bool = False,
straighten_pages: bool = False,
detect_language: bool = False,
load_in_8_bit: bool = False,
det_engine_cfg: EngineConfig | None = None,
reco_engine_cfg: EngineConfig | None = None,
clf_engine_cfg: EngineConfig | None = None,
**kwargs: Any,
) -> OCRPredictor:
"""End-to-end OCR architecture using one model for localization, and another for text recognition.
>>> import numpy as np
>>> from onnxtr.models import ocr_predictor
>>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn')
>>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
>>> out = model([input_page])
Args:
det_arch: name of the detection architecture or the model itself to use
(e.g. 'db_resnet50', 'db_mobilenet_v3_large')
reco_arch: name of the recognition architecture or the model itself to use
(e.g. 'crnn_vgg16_bn', 'sar_resnet31')
assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages
without rotated textual elements.
preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before
running the detection model on it.
symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right.
export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions
(potentially rotated) as straight bounding boxes.
detect_orientation: if True, the estimated general page orientation will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
straighten_pages: if True, estimates the page general orientation
based on the segmentation map median line orientation.
Then, rotates page before passing it again to the deep learning detection module.
Doing so will improve performances for documents with page-uniform rotations.
detect_language: if True, the language prediction will be added to the predictions for each
page. Doing so will slightly deteriorate the overall latency.
load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False
det_engine_cfg: configuration of the detection engine
reco_engine_cfg: configuration of the recognition engine
clf_engine_cfg: configuration of the orientation classification engine
kwargs: keyword args of `OCRPredictor`
Returns:
OCR predictor
"""
return _predictor(
det_arch,
reco_arch,
assume_straight_pages=assume_straight_pages,
preserve_aspect_ratio=preserve_aspect_ratio,
symmetric_pad=symmetric_pad,
export_as_straight_boxes=export_as_straight_boxes,
detect_orientation=detect_orientation,
straighten_pages=straighten_pages,
detect_language=detect_language,
load_in_8_bit=load_in_8_bit,
det_engine_cfg=det_engine_cfg,
reco_engine_cfg=reco_engine_cfg,
clf_engine_cfg=clf_engine_cfg,
**kwargs,
)
================================================
FILE: onnxtr/py.typed
================================================
================================================
FILE: onnxtr/transforms/__init__.py
================================================
from .base import *
================================================
FILE: onnxtr/transforms/base.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import math
import numpy as np
from PIL import Image, ImageOps
__all__ = ["Resize", "Normalize"]
class Resize:
"""Resize the input image to the given size
Args:
size: the target size of the image
interpolation: the interpolation method to use
preserve_aspect_ratio: whether to preserve the aspect ratio of the image
symmetric_pad: whether to symmetrically pad the image
"""
def __init__(
self,
size: int | tuple[int, int],
interpolation=Image.Resampling.BILINEAR,
preserve_aspect_ratio: bool = False,
symmetric_pad: bool = False,
) -> None:
self.size = size if isinstance(size, tuple) else (size, size)
self.interpolation = interpolation
self.preserve_aspect_ratio = preserve_aspect_ratio
self.symmetric_pad = symmetric_pad
self.output_size = size if isinstance(size, tuple) else (size, size)
if not isinstance(self.size, (tuple, int)):
raise AssertionError("size should be either a tuple or an int")
def __call__(self, img: np.ndarray) -> np.ndarray:
if img.dtype != np.uint8:
img_pil = Image.fromarray((img * 255).clip(0, 255).astype(np.uint8))
else:
img_pil = Image.fromarray(img)
sh, sw = self.size
w, h = img_pil.size
if not self.preserve_aspect_ratio:
img_resized_pil = img_pil.resize((sw, sh), resample=self.interpolation)
return np.array(img_resized_pil)
actual_ratio = h / w
target_ratio = sh / sw
if actual_ratio > target_ratio:
new_h = sh
new_w = max(int(sh / actual_ratio), 1)
else:
new_w = sw
new_h = max(int(sw * actual_ratio), 1)
img_resized_pil = img_pil.resize((new_w, new_h), resample=self.interpolation)
delta_w = sw - new_w
delta_h = sh - new_h
if self.symmetric_pad:
# Symmetric padding
pad_left = math.ceil(delta_w / 2)
pad_right = math.floor(delta_w / 2)
pad_top = math.ceil(delta_h / 2)
pad_bottom = math.floor(delta_h / 2)
else:
# Asymmetric padding
pad_left, pad_top = 0, 0
pad_right, pad_bottom = delta_w, delta_h
img_padded_pil = ImageOps.expand(
img_resized_pil,
border=(pad_left, pad_top, pad_right, pad_bottom),
fill=0,
)
return np.array(img_padded_pil)
def __repr__(self) -> str:
interpolate_str = self.interpolation
_repr = f"output_size={self.size}, interpolation='{interpolate_str}'"
if self.preserve_aspect_ratio:
_repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}"
return f"{self.__class__.__name__}({_repr})"
class Normalize:
"""Normalize the input image
Args:
mean: mean values to subtract
std: standard deviation values to divide
"""
def __init__(
self,
mean: float | tuple[float, float, float] = (0.485, 0.456, 0.406),
std: float | tuple[float, float, float] = (0.229, 0.224, 0.225),
) -> None:
self.mean = mean
self.std = std
if not isinstance(self.mean, (float, tuple, list)):
raise AssertionError("mean should be either a tuple, a list or a float")
if not isinstance(self.std, (float, tuple, list)):
raise AssertionError("std should be either a tuple, a list or a float")
def __call__(
self,
img: np.ndarray,
) -> np.ndarray:
# Normalize image
return (img - np.array(self.mean).astype(img.dtype)) / np.array(self.std).astype(img.dtype)
def __repr__(self) -> str:
_repr = f"mean={self.mean}, std={self.std}"
return f"{self.__class__.__name__}({_repr})"
================================================
FILE: onnxtr/utils/__init__.py
================================================
from .common_types import *
from .data import *
from .geometry import *
from .vocabs import *
================================================
FILE: onnxtr/utils/common_types.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from pathlib import Path
__all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"]
Point2D = tuple[float, float]
BoundingBox = tuple[Point2D, Point2D]
Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D]
Polygon = list[Point2D]
AbstractPath = str | Path
AbstractFile = AbstractPath | bytes
Bbox = tuple[float, float, float, float]
================================================
FILE: onnxtr/utils/data.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/datasets/utils.py
import hashlib
import logging
import os
import re
import urllib.error
import urllib.request
from pathlib import Path
from tqdm.auto import tqdm
__all__ = ["download_from_url"]
# matches bfd8deac from resnet18-bfd8deac.ckpt
HASH_REGEX = re.compile(r"-([a-f0-9]*)\.")
USER_AGENT = "felixdittrich92/OnnxTR"
def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None:
with open(filename, "wb") as fh:
with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response:
with tqdm(total=response.length) as pbar:
for chunk in iter(lambda: response.read(chunk_size), ""):
if not chunk:
break
pbar.update(chunk_size)
fh.write(chunk)
def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool:
with open(file_path, "rb") as f:
sha_hash = hashlib.sha256(f.read()).hexdigest()
return sha_hash[: len(hash_prefix)] == hash_prefix
def download_from_url(
url: str,
file_name: str | None = None,
hash_prefix: str | None = None,
cache_dir: str | None = None,
cache_subdir: str | None = None,
) -> Path:
"""Download a file using its URL
>>> from onnxtr.models import download_from_url
>>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip")
Args:
url: the URL of the file to download
file_name: optional name of the file once downloaded
hash_prefix: optional expected SHA256 hash of the file
cache_dir: cache directory
cache_subdir: subfolder to use in the cache
Returns:
the location of the downloaded file
Note:
You can change cache directory location by using `ONNXTR_CACHE_DIR` environment variable.
"""
if not isinstance(file_name, str):
file_name = url.rpartition("/")[-1].split("&")[0]
cache_dir = (
str(os.environ.get("ONNXTR_CACHE_DIR", os.path.join(os.path.expanduser("~"), ".cache", "onnxtr")))
if cache_dir is None
else cache_dir
)
# Check hash in file name
if hash_prefix is None:
r = HASH_REGEX.search(file_name)
hash_prefix = r.group(1) if r else None
folder_path = Path(cache_dir) if cache_subdir is None else Path(cache_dir, cache_subdir)
file_path = folder_path.joinpath(file_name)
# Check file existence
if file_path.is_file() and (hash_prefix is None or _check_integrity(file_path, hash_prefix)):
logging.info(f"Using downloaded & verified file: {file_path}")
return file_path
try:
# Create folder hierarchy
folder_path.mkdir(parents=True, exist_ok=True)
except OSError:
error_message = f"Failed creating cache direcotry at {folder_path}"
if os.environ.get("ONNXTR_CACHE_DIR", ""):
error_message += " using path from 'ONNXTR_CACHE_DIR' environment variable."
else:
error_message += (
". You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed."
)
logging.error(error_message)
raise
# Download the file
try:
print(f"Downloading {url} to {file_path}")
_urlretrieve(url, file_path)
except (urllib.error.URLError, IOError) as e: # pragma: no cover
if url[:5] == "https":
url = url.replace("https:", "http:")
print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}")
_urlretrieve(url, file_path)
else:
raise e
# Remove corrupted files
if isinstance(hash_prefix, str) and not _check_integrity(file_path, hash_prefix): # pragma: no cover
# Remove file
os.remove(file_path)
raise ValueError(f"corrupted download, the hash of {url} does not match its expected value")
return file_path
================================================
FILE: onnxtr/utils/fonts.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import logging
import platform
from PIL import ImageFont
__all__ = ["get_font"]
def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont:
"""Resolves a compatible ImageFont for the system
Args:
font_family: the font family to use
font_size: the size of the font upon rendering
Returns:
the Pillow font
"""
# Font selection
if font_family is None:
try:
font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size)
except OSError: # pragma: no cover
font = ImageFont.load_default() # type: ignore[assignment]
logging.warning(
"unable to load recommended font family. Loading default PIL font,"
"font size issues may be expected."
"To prevent this, it is recommended to specify the value of 'font_family'."
)
else: # pragma: no cover
font = ImageFont.truetype(font_family, font_size)
return font
================================================
FILE: onnxtr/utils/geometry.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from math import ceil
import cv2
import numpy as np
from .common_types import BoundingBox, Polygon4P
__all__ = [
"bbox_to_polygon",
"polygon_to_bbox",
"order_points",
"resolve_enclosing_bbox",
"resolve_enclosing_rbbox",
"rotate_boxes",
"compute_expanded_shape",
"rotate_image",
"estimate_page_angle",
"convert_to_relative_coords",
"rotate_abs_geoms",
"extract_crops",
"extract_rcrops",
"shape_translate",
"detach_scores",
]
def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P:
"""Convert a bounding box to a polygon
Args:
bbox: a bounding box
Returns:
a polygon
"""
return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1]
def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox:
"""Convert a polygon to a bounding box
Args:
polygon: a polygon
Returns:
a bounding box
"""
x, y = zip(*polygon)
return (min(x), min(y)), (max(x), max(y))
def order_points(pts: np.ndarray) -> np.ndarray:
"""Order points in the following order: top-left, top-right, bottom-right, bottom-left
Args:
pts: array of shape (4, 2) or (4,) with the coordinates of the points
Returns:
ordered points in the following order: top-left, top-right, bottom-right, bottom-left
"""
pts = np.asarray(pts)
# (xmin, ymin, xmax, ymax)
if pts.shape == (4,):
xmin, ymin, xmax, ymax = pts
return np.array(
[
[xmin, ymin], # top-left
[xmax, ymin], # top-right
[xmax, ymax], # bottom-right
[xmin, ymax], # bottom-left
],
dtype=pts.dtype,
)
# (4, 2) quadrangle
if pts.shape == (4, 2):
c = pts.mean(axis=0)
# compute angle of each point around centroid
angles = np.arctan2(pts[:, 1] - c[1], pts[:, 0] - c[0])
# sort by angle (counter-clockwise ordering)
pts = pts[np.argsort(angles)]
# ensure consistent starting point (top-left)
start_idx = np.argmin(pts.sum(axis=1))
pts = np.roll(pts, -start_idx, axis=0)
# ensure order is TL, TR, BR, BL (clockwise)
def area(poly):
return 0.5 * np.sum(poly[:, 0] * np.roll(poly[:, 1], -1) - poly[:, 1] * np.roll(poly[:, 0], -1))
if area(pts) < 0:
pts = np.roll(pts[::-1], 1, axis=0)
return pts.astype(pts.dtype)
raise ValueError(f"Unsupported shape {pts.shape}, expected (4,) or (4,2)")
def detach_scores(boxes: list[np.ndarray]) -> tuple[list[np.ndarray], list[np.ndarray]]:
"""Detach the objectness scores from box predictions
Args:
boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2)
Returns:
a tuple of two lists: the first one contains the boxes without the objectness scores,
the second one contains the objectness scores
"""
def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
if boxes.ndim == 2:
return boxes[:, :-1], boxes[:, -1]
return boxes[:, :-1], boxes[:, -1, -1]
loc_preds, obj_scores = zip(*(_detach(box) for box in boxes))
return list(loc_preds), list(obj_scores)
def shape_translate(data: np.ndarray, format: str) -> np.ndarray:
"""Translate the shape of the input data to the desired format
Args:
data: input data in shape (B, C, H, W) or (B, H, W, C) or (C, H, W) or (H, W, C)
format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC')
Returns:
the reshaped data
"""
# Get the current shape
current_shape = data.shape
# Check the number of dimensions
num_dims = len(current_shape)
if num_dims != len(format):
return data
if format == "BCHW" and data.shape[1] in [1, 3]:
return data
elif format == "BHWC" and data.shape[-1] in [1, 3]:
return data
elif format == "CHW" and data.shape[0] in [1, 3]:
return data
elif format == "HWC" and data.shape[-1] in [1, 3]:
return data
elif format == "BCHW" and data.shape[1] not in [1, 3]:
return np.moveaxis(data, -1, 1)
elif format == "BHWC" and data.shape[-1] not in [1, 3]:
return np.moveaxis(data, 1, -1)
elif format == "CHW" and data.shape[0] not in [1, 3]:
return np.moveaxis(data, -1, 0)
elif format == "HWC" and data.shape[-1] not in [1, 3]:
return np.moveaxis(data, 0, -1)
else:
return data
def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray:
"""Compute enclosing bbox either from:
Args:
bboxes: boxes in one of the following formats:
- an array of boxes: (*, 4), where boxes have this shape:
(xmin, ymin, xmax, ymax)
- a list of BoundingBox
Returns:
a (1, 4) array (enclosing boxarray), or a BoundingBox
"""
if isinstance(bboxes, np.ndarray):
xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1)
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()])
else:
x, y = zip(*[point for box in bboxes for point in box])
return (min(x), min(y)), (max(x), max(y))
def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
"""Compute enclosing rotated bbox either from:
Args:
rbboxes: boxes in one of the following formats:
- an array of boxes: (*, 4, 2), where boxes have this shape:
(x1, y1), (x2, y2), (x3, y3), (x4, y4)
- a list of BoundingBox
intermed_size: size of the intermediate image
Returns:
a (4, 2) array (enclosing rotated box)
"""
cloud: np.ndarray = np.concatenate(rbboxes, axis=0)
# Convert to absolute for minAreaRect
rect = cv2.minAreaRect(cloud.astype(np.float32) * intermed_size)
return order_points(cv2.boxPoints(rect) / intermed_size)
def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
"""Rotate points counter-clockwise.
Args:
points: array of size (N, 2)
angle: angle between -90 and +90 degrees
Returns:
Rotated points
"""
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
rotation_mat = np.array(
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
)
return np.matmul(points, rotation_mat.T)
def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[int, int]:
"""Compute the shape of an expanded rotated image
Args:
img_shape: the height and width of the image
angle: angle between -90 and +90 degrees
Returns:
the height and width of the rotated image
"""
points: np.ndarray = np.array([
[img_shape[1] / 2, img_shape[0] / 2],
[-img_shape[1] / 2, img_shape[0] / 2],
])
rotated_points = rotate_abs_points(points, angle)
wh_shape = 2 * np.abs(rotated_points).max(axis=0)
return wh_shape[1], wh_shape[0]
def rotate_abs_geoms(
geoms: np.ndarray,
angle: float,
img_shape: tuple[int, int],
expand: bool = True,
) -> np.ndarray:
"""Rotate a batch of bounding boxes or polygons by an angle around the
image center.
Args:
geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes
angle: anti-clockwise rotation angle in degrees
img_shape: the height and width of the image
expand: whether the image should be padded to avoid information loss
Returns:
A batch of rotated polygons (N, 4, 2)
"""
# Switch to polygons
polys = (
np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
if geoms.ndim == 2
else geoms
)
polys = polys.astype(np.float32)
# Switch to image center as referential
polys[..., 0] -= img_shape[1] / 2
polys[..., 1] = img_shape[0] / 2 - polys[..., 1]
# Rotated them around image center
rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2)
# Switch back to top-left corner as referential
target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
# Clip coords to fit since there is no expansion
rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])
return rotated_polys
def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray:
"""Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
coordinates after a resizing of the image.
Args:
loc_preds: (N, 4, 2) array of RELATIVE loc_preds
orig_shape: shape of the origin image
dest_shape: shape of the destination image
Returns:
A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial
"""
if len(dest_shape) != 2:
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
if len(orig_shape) != 2:
raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
orig_height, orig_width = orig_shape
dest_height, dest_width = dest_shape
mboxes = loc_preds.copy()
mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
return mboxes
def rotate_boxes(
loc_preds: np.ndarray,
angle: float,
orig_shape: tuple[int, int],
min_angle: float = 1.0,
target_shape: tuple[int, int] | None = None,
) -> np.ndarray:
"""Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes
(4, 2) of an angle, if angle > min_angle, around the center of the page.
If target_shape is specified, the boxes are remapped to the target shape after the rotation. This
is done to remove the padding that is created by rotate_page(expand=True)
Args:
loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes
angle: angle between -90 and +90 degrees
orig_shape: shape of the origin image
min_angle: minimum angle to rotate boxes
target_shape: shape of the destination image
Returns:
A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes
"""
# Change format of the boxes to rotated boxes
_boxes = loc_preds.copy()
if _boxes.ndim == 2:
_boxes = np.stack(
[
_boxes[:, [0, 1]],
_boxes[:, [2, 1]],
_boxes[:, [2, 3]],
_boxes[:, [0, 3]],
],
axis=1,
)
# If small angle, return boxes (no rotation)
if abs(angle) < min_angle or abs(angle) > 90 - min_angle:
return _boxes
# Compute rotation matrix
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
rotation_mat = np.array(
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
)
# Rotate absolute points
points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
rotated_boxes: np.ndarray = np.stack(
(rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
)
# Apply a mask if requested
if target_shape is not None:
rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
return rotated_boxes
def rotate_image(
image: np.ndarray,
angle: float,
expand: bool = False,
preserve_origin_shape: bool = False,
) -> np.ndarray:
"""Rotate an image counterclockwise by an given angle.
Args:
image: numpy tensor to rotate
angle: rotation angle in degrees, between -90 and +90
expand: whether the image should be padded before the rotation
preserve_origin_shape: if expand is set to True, resizes the final output to the original image size
Returns:
Rotated array, padded by 0 by default.
"""
# Compute the expanded padding
exp_img: np.ndarray
if expand:
exp_shape = compute_expanded_shape(image.shape[:2], angle)
h_pad, w_pad = (
int(max(0, ceil(exp_shape[0] - image.shape[0]))),
int(max(0, ceil(exp_shape[1] - image.shape[1]))),
)
exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
else:
exp_img = image
height, width = exp_img.shape[:2]
rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0)
rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height))
if expand:
# Pad to get the same aspect ratio
if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
# Pad width
if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
# Pad height
else:
h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
if preserve_origin_shape:
# rescale
rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
return rot_img
def remove_image_padding(image: np.ndarray) -> np.ndarray:
"""Remove black border padding from an image
Args:
image: numpy tensor to remove padding from
Returns:
Image with padding removed
"""
# Find the bounding box of the non-black region
rows = np.any(image, axis=1)
cols = np.any(image, axis=0)
rmin, rmax = np.where(rows)[0][[0, -1]]
cmin, cmax = np.where(cols)[0][[0, -1]]
return image[rmin : rmax + 1, cmin : cmax + 1]
def estimate_page_angle(polys: np.ndarray) -> float:
"""Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the
estimated angle ccw in degrees
"""
# Compute mean left points and mean right point with respect to the reading direction (oriented polygon)
xleft = polys[:, 0, 0] + polys[:, 3, 0]
yleft = polys[:, 0, 1] + polys[:, 3, 1]
xright = polys[:, 1, 0] + polys[:, 2, 0]
yright = polys[:, 1, 1] + polys[:, 2, 1]
with np.errstate(divide="raise", invalid="raise"):
try:
return float(
np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
)
except FloatingPointError:
return 0.0
def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray:
"""Convert a geometry to relative coordinates
Args:
geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4)
img_shape: the height and width of the image
Returns:
the updated geometry
"""
# Polygon
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2):
polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
polygons[..., 0] = geoms[..., 0] / img_shape[1]
polygons[..., 1] = geoms[..., 1] / img_shape[0]
return polygons.clip(0, 1)
if geoms.ndim == 2 and geoms.shape[1] == 4:
boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32)
boxes[:, ::2] = geoms[:, ::2] / img_shape[1]
boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0]
return boxes.clip(0, 1)
raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}")
def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> list[np.ndarray]:
"""Created cropped images from list of bounding boxes
Args:
img: input image
boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative
coordinates (xmin, ymin, xmax, ymax)
channels_last: whether the channel dimensions is the last one instead of the last one
Returns:
list of cropped images
"""
if boxes.shape[0] == 0:
return []
if boxes.shape[1] != 4:
raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")
# Project relative coordinates
_boxes = boxes.copy()
h, w = img.shape[:2] if channels_last else img.shape[-2:]
if not np.issubdtype(_boxes.dtype, np.integer):
_boxes[:, [0, 2]] *= w
_boxes[:, [1, 3]] *= h
_boxes = _boxes.round().astype(int)
# Add last index
_boxes[2:] += 1
if channels_last:
return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes])
return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes])
def extract_rcrops(
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False
) -> list[np.ndarray]:
"""Created cropped images from list of rotated bounding boxes
Args:
img: input image
polys: bounding boxes of shape (N, 4, 2)
dtype: target data type of bounding boxes
channels_last: whether the channel dimensions is the last one instead of the last one
assume_horizontal: whether the boxes are assumed to be only horizontally oriented
Returns:
list of cropped images
"""
if polys.shape[0] == 0:
return []
if polys.shape[1:] != (4, 2):
raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
# Project relative coordinates
_boxes = polys.copy()
height, width = img.shape[:2] if channels_last else img.shape[-2:]
if not np.issubdtype(_boxes.dtype, np.integer):
_boxes[:, :, 0] *= width
_boxes[:, :, 1] *= height
src_img = img if channels_last else img.transpose(1, 2, 0)
# Handle only horizontal oriented boxes
if assume_horizontal:
crops = []
for box in _boxes:
# Calculate the centroid of the quadrilateral
centroid = np.mean(box, axis=0)
# Divide the points into left and right
left_points = box[box[:, 0] < centroid[0]]
right_points = box[box[:, 0] >= centroid[0]]
# Sort the left points according to the y-axis
left_points = left_points[np.argsort(left_points[:, 1])]
top_left_pt = left_points[0]
bottom_left_pt = left_points[-1]
# Sort the right points according to the y-axis
right_points = right_points[np.argsort(right_points[:, 1])]
top_right_pt = right_points[0]
bottom_right_pt = right_points[-1]
box_points = np.array(
[top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt],
dtype=dtype,
)
# Get the width and height of the rectangle that will contain the warped quadrilateral
width_upper = np.linalg.norm(top_right_pt - top_left_pt)
width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt)
height_left = np.linalg.norm(bottom_left_pt - top_left_pt)
height_right = np.linalg.norm(bottom_right_pt - top_right_pt)
# Get the maximum width and height
rect_width = max(int(width_upper), int(width_lower))
rect_height = max(int(height_left), int(height_right))
dst_pts = np.array(
[
[0, 0], # top-left
# bottom-left
[0, rect_height - 1],
# top-right
[rect_width - 1, 0],
# bottom-right
[rect_width - 1, rect_height - 1],
],
dtype=dtype,
)
# Get the perspective transform matrix using the box points
affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts)
# Perform the perspective warp to get the rectified crop
crop = cv2.warpPerspective(
src_img,
affine_mat,
(rect_width, rect_height),
)
# Add the crop to the list of crops
crops.append(crop)
# Handle any oriented boxes
else:
src_pts = _boxes[:, :3].astype(np.float32)
# Preserve size
d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1)
d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1)
# (N, 3, 2)
dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype)
dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1
dst_pts[:, 2, 1] = d2 - 1
# Use a warp transformation to extract the crop
crops = [
cv2.warpAffine(
src_img,
# Transformation matrix
cv2.getAffineTransform(src_pts[idx], dst_pts[idx]),
(int(d1[idx]), int(d2[idx])),
)
for idx in range(_boxes.shape[0])
]
return crops
================================================
FILE: onnxtr/utils/multithreading.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import multiprocessing as mp
import os
from collections.abc import Callable, Iterable, Iterator
from multiprocessing.pool import ThreadPool
from typing import Any
from onnxtr.file_utils import ENV_VARS_TRUE_VALUES
__all__ = ["multithread_exec"]
def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]:
"""Execute a given function in parallel for each element of a given sequence
>>> from onnxtr.utils.multithreading import multithread_exec
>>> entries = [1, 4, 8]
>>> results = multithread_exec(lambda x: x ** 2, entries)
Args:
func: function to be executed on each element of the iterable
seq: iterable
threads: number of workers to be used for multiprocessing
Returns:
iterator of the function's results using the iterable as inputs
Notes:
This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory.
If you do not have write permissions for this directory (if you run `onnxtr` on AWS Lambda for instance),
you might want to disable multiprocessing. To achieve that, set 'ONNXTR_MULTIPROCESSING_DISABLE' to 'TRUE'.
"""
threads = threads if isinstance(threads, int) else min(16, mp.cpu_count())
# Single-thread
if threads < 2 or os.environ.get("ONNXTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES:
results = map(func, seq)
# Multi-threading
else:
with ThreadPool(threads) as tp:
# ThreadPool's map function returns a list, but seq could be of a different type
# That's why wrapping result in map to return iterator
results = map(lambda x: x, tp.map(func, seq)) # noqa: C417
return results
================================================
FILE: onnxtr/utils/reconstitution.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import logging
from typing import Any
import numpy as np
from anyascii import anyascii
from PIL import Image, ImageDraw
from .fonts import get_font
__all__ = ["synthesize_page"]
# Global variable to avoid multiple warnings
ROTATION_WARNING = False
def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover
global ROTATION_WARNING
if not ROTATION_WARNING and len(entry["geometry"]) == 4:
logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
ROTATION_WARNING = True
def _synthesize(
response: Image.Image,
entry: dict[str, Any],
w: int,
h: int,
draw_proba: bool = False,
font_family: str | None = None,
smoothing_factor: float = 0.75,
min_font_size: int = 6,
max_font_size: int = 50,
) -> Image.Image:
if len(entry["geometry"]) == 2:
(xmin, ymin), (xmax, ymax) = entry["geometry"]
polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
else:
polygon = entry["geometry"]
# Calculate the bounding box of the word
x_coords, y_coords = zip(*polygon)
xmin, ymin, xmax, ymax = (
int(round(w * min(x_coords))),
int(round(h * min(y_coords))),
int(round(w * max(x_coords))),
int(round(h * max(y_coords))),
)
word_width = xmax - xmin
word_height = ymax - ymin
# If lines are provided instead of words, concatenate the word entries
if "words" in entry:
word_text = " ".join(word["value"] for word in entry["words"])
else:
word_text = entry["value"]
# Find the optimal font size
try:
font_size = min(word_height, max_font_size)
font = get_font(font_family, font_size)
text_width, text_height = font.getbbox(word_text)[2:4]
while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
font_size = max(int(font_size * smoothing_factor), min_font_size)
font = get_font(font_family, font_size)
text_width, text_height = font.getbbox(word_text)[2:4]
except ValueError: # pragma: no cover
font = get_font(font_family, min_font_size)
# Create a mask for the word
mask = Image.new("L", (w, h), 0)
ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
# Draw the word text
d = ImageDraw.Draw(response)
try:
try:
d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
except UnicodeEncodeError: # pragma: no cover
d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
# Catch generic exceptions to avoid crashing the whole rendering
except Exception: # pragma: no cover
logging.warning(f"Could not render word: {word_text}")
if draw_proba:
confidence = (
entry["confidence"]
if "confidence" in entry
else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
)
p = int(255 * confidence)
color = (255 - p, 0, p) # Red to blue gradient based on probability
d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
prob_font = get_font(font_family, 20)
prob_text = f"{confidence:.2f}"
prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
# Position the probability slightly above the bounding box
prob_x_offset = (word_width - prob_text_width) // 2
prob_y_offset = ymin - prob_text_height - 2
prob_y_offset = max(0, prob_y_offset)
d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
return response
def synthesize_page(
page: dict[str, Any],
draw_proba: bool = False,
font_family: str | None = None,
smoothing_factor: float = 0.95,
min_font_size: int = 8,
max_font_size: int = 50,
) -> np.ndarray:
"""Draw a the content of the element page (OCR response) on a blank page.
Args:
page: exported Page object to represent
draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0
font_family: family of the font
smoothing_factor: factor to smooth the font size
min_font_size: minimum font size
max_font_size: maximum font size
Returns:
the synthesized page
"""
# Draw template
h, w = page["dimensions"]
response = Image.new("RGB", (w, h), color=(255, 255, 255))
for block in page["blocks"]:
# If lines are provided use these to get better rendering results
if len(block["lines"]) > 1:
for line in block["lines"]:
_warn_rotation(block) # pragma: no cover
response = _synthesize(
response=response,
entry=line,
w=w,
h=h,
draw_proba=draw_proba,
font_family=font_family,
smoothing_factor=smoothing_factor,
min_font_size=min_font_size,
max_font_size=max_font_size,
)
# Otherwise, draw each word
else:
for line in block["lines"]:
_warn_rotation(block) # pragma: no cover
for word in line["words"]:
response = _synthesize(
response=response,
entry=word,
w=w,
h=h,
draw_proba=draw_proba,
font_family=font_family,
smoothing_factor=smoothing_factor,
min_font_size=min_font_size,
max_font_size=max_font_size,
)
return np.array(response, dtype=np.uint8)
================================================
FILE: onnxtr/utils/repr.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
# Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py
__all__ = ["NestedObject"]
def _addindent(s_, num_spaces):
s = s_.split("\n")
# don't do anything for single-line stuff
if len(s) == 1:
return s_
first = s.pop(0)
s = [(num_spaces * " ") + line for line in s]
s = "\n".join(s)
s = first + "\n" + s
return s
class NestedObject:
"""Base class for all nested objects in onnxtr"""
_children_names: list[str]
def extra_repr(self) -> str:
return ""
def __repr__(self):
# We treat the extra repr like the sub-object, one item per line
extra_lines = []
extra_repr = self.extra_repr()
# empty string will be split into list ['']
if extra_repr:
extra_lines = extra_repr.split("\n")
child_lines = []
if hasattr(self, "_children_names"):
for key in self._children_names:
child = getattr(self, key)
if isinstance(child, list) and len(child) > 0:
child_str = ",\n".join([repr(subchild) for subchild in child])
if len(child) > 1:
child_str = _addindent(f"\n{child_str},", 2) + "\n"
child_str = f"[{child_str}]"
else:
child_str = repr(child)
child_str = _addindent(child_str, 2)
child_lines.append("(" + key + "): " + child_str)
lines = extra_lines + child_lines
main_str = self.__class__.__name__ + "("
if lines:
# simple one-liner info, which most builtin Modules will use
if len(extra_lines) == 1 and not child_lines:
main_str += extra_lines[0]
else:
main_str += "\n " + "\n ".join(lines) + "\n"
main_str += ")"
return main_str
================================================
FILE: onnxtr/utils/visualization.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
from copy import deepcopy
from typing import Any
import cv2
import matplotlib.patches as patches
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.figure import Figure
from .common_types import BoundingBox, Polygon4P
__all__ = ["visualize_page", "draw_boxes"]
def rect_patch(
geometry: BoundingBox,
page_dimensions: tuple[int, int],
label: str | None = None,
color: tuple[float, float, float] = (0, 0, 0),
alpha: float = 0.3,
linewidth: int = 2,
fill: bool = True,
preserve_aspect_ratio: bool = False,
) -> patches.Rectangle:
"""Create a matplotlib rectangular patch for the element
Args:
geometry: bounding box of the element
page_dimensions: dimensions of the Page in format (height, width)
label: label to display when hovered
color: color to draw box
alpha: opacity parameter to fill the boxes, 0 = transparent
linewidth: line width
fill: whether the patch should be filled
preserve_aspect_ratio: pass True if you passed True to the predictor
Returns:
a rectangular Patch
"""
if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
raise ValueError("invalid geometry format")
# Unpack
height, width = page_dimensions
(xmin, ymin), (xmax, ymax) = geometry
# Switch to absolute coords
if preserve_aspect_ratio:
width = height = max(height, width)
xmin, w = xmin * width, (xmax - xmin) * width
ymin, h = ymin * height, (ymax - ymin) * height
return patches.Rectangle(
(xmin, ymin),
w,
h,
fill=fill,
linewidth=linewidth,
edgecolor=(*color, alpha),
facecolor=(*color, alpha),
label=label,
)
def polygon_patch(
geometry: np.ndarray,
page_dimensions: tuple[int, int],
label: str | None = None,
color: tuple[float, float, float] = (0, 0, 0),
alpha: float = 0.3,
linewidth: int = 2,
fill: bool = True,
preserve_aspect_ratio: bool = False,
) -> patches.Polygon:
"""Create a matplotlib polygon patch for the element
Args:
geometry: bounding box of the element
page_dimensions: dimensions of the Page in format (height, width)
label: label to display when hovered
color: color to draw box
alpha: opacity parameter to fill the boxes, 0 = transparent
linewidth: line width
fill: whether the patch should be filled
preserve_aspect_ratio: pass True if you passed True to the predictor
Returns:
a polygon Patch
"""
if not geometry.shape == (4, 2):
raise ValueError("invalid geometry format")
# Unpack
height, width = page_dimensions
geometry[:, 0] = geometry[:, 0] * (max(width, height) if preserve_aspect_ratio else width)
geometry[:, 1] = geometry[:, 1] * (max(width, height) if preserve_aspect_ratio else height)
return patches.Polygon(
geometry,
fill=fill,
linewidth=linewidth,
edgecolor=(*color, alpha),
facecolor=(*color, alpha),
label=label,
)
def create_obj_patch(
geometry: BoundingBox | Polygon4P | np.ndarray,
page_dimensions: tuple[int, int],
**kwargs: Any,
) -> patches.Patch:
"""Create a matplotlib patch for the element
Args:
geometry: bounding box (straight or rotated) of the element
page_dimensions: dimensions of the page in format (height, width)
**kwargs: keyword arguments for the patch
Returns:
a matplotlib Patch
"""
if isinstance(geometry, tuple):
if len(geometry) == 2: # straight word BB (2 pts)
return rect_patch(geometry, page_dimensions, **kwargs)
elif len(geometry) == 4: # rotated word BB (4 pts)
return polygon_patch(np.asarray(geometry), page_dimensions, **kwargs)
elif isinstance(geometry, np.ndarray) and geometry.shape == (4, 2): # rotated line
return polygon_patch(geometry, page_dimensions, **kwargs)
raise ValueError("invalid geometry format")
def visualize_page(
page: dict[str, Any],
image: np.ndarray,
words_only: bool = True,
display_artefacts: bool = True,
scale: float = 10,
interactive: bool = True,
add_labels: bool = True,
**kwargs: Any,
) -> Figure:
"""Visualize a full page with predicted blocks, lines and words
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from onnxtr.utils.visualization import visualize_page
>>> from onnxtr.models import ocr_db_crnn
>>> model = ocr_db_crnn()
>>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8)
>>> out = model([[input_page]])
>>> visualize_page(out[0].pages[0].export(), input_page)
>>> plt.show()
Args:
page: the exported Page of a Document
image: np array of the page, needs to have the same shape than page['dimensions']
words_only: whether only words should be displayed
display_artefacts: whether artefacts should be displayed
scale: figsize of the largest windows side
interactive: whether the plot should be interactive
add_labels: for static plot, adds text labels on top of bounding box
**kwargs: keyword arguments for the polygon patch
Returns:
the matplotlib figure
"""
# Get proper scale and aspect ratio
h, w = image.shape[:2]
size = (scale * w / h, scale) if h > w else (scale, h / w * scale)
fig, ax = plt.subplots(figsize=size)
# Display the image
ax.imshow(image)
# hide both axis
ax.axis("off")
if interactive:
artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page)
for block in page["blocks"]:
if not words_only:
rect = create_obj_patch(
block["geometry"], page["dimensions"], label="block", color=(0, 1, 0), linewidth=1, **kwargs
)
# add patch on figure
ax.add_patch(rect)
if interactive:
# add patch to cursor's artists
artists.append(rect)
for line in block["lines"]:
if not words_only:
rect = create_obj_patch(
line["geometry"], page["dimensions"], label="line", color=(1, 0, 0), linewidth=1, **kwargs
)
ax.add_patch(rect)
if interactive:
artists.append(rect)
for word in line["words"]:
rect = create_obj_patch(
word["geometry"],
page["dimensions"],
label=f"{word['value']} (confidence: {word['confidence']:.2%})",
color=(0, 0, 1),
**kwargs,
)
ax.add_patch(rect)
if interactive:
artists.append(rect)
elif add_labels:
if len(word["geometry"]) == 5:
text_loc = (
int(page["dimensions"][1] * (word["geometry"][0] - word["geometry"][2] / 2)),
int(page["dimensions"][0] * (word["geometry"][1] - word["geometry"][3] / 2)),
)
else:
text_loc = (
int(page["dimensions"][1] * word["geometry"][0][0]),
int(page["dimensions"][0] * word["geometry"][0][1]),
)
if len(word["geometry"]) == 2:
# We draw only if boxes are in straight format
ax.text(
*text_loc,
word["value"],
size=10,
alpha=0.5,
color=(0, 0, 1),
)
if display_artefacts:
for artefact in block["artefacts"]:
rect = create_obj_patch(
artefact["geometry"],
page["dimensions"],
label="artefact",
color=(0.5, 0.5, 0.5),
linewidth=1,
**kwargs,
)
ax.add_patch(rect)
if interactive:
artists.append(rect)
if interactive:
import mplcursors
# Create mlp Cursor to hover patches in artists
mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label()))
fig.tight_layout(pad=0.0)
return fig
def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None:
"""Draw an array of relative straight boxes on an image
Args:
boxes: array of relative boxes, of shape (*, 4)
image: np array, float32 or uint8
color: color to use for bounding box edges
**kwargs: keyword arguments from `matplotlib.pyplot.plot`
"""
h, w = image.shape[:2]
# Convert boxes to absolute coords
_boxes = deepcopy(boxes)
_boxes[:, [0, 2]] *= w
_boxes[:, [1, 3]] *= h
_boxes = _boxes.astype(np.int32)
for box in _boxes.tolist():
xmin, ymin, xmax, ymax = box
image = cv2.rectangle(
image,
(xmin, ymin),
(xmax, ymax),
color=color if isinstance(color, tuple) else (0, 0, 255),
thickness=2,
)
plt.imshow(image)
plt.plot(**kwargs)
================================================
FILE: onnxtr/utils/vocabs.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import re
import string
__all__ = ["VOCABS"]
_BASE_VOCABS = {
# Latin
"digits": string.digits,
"ascii_letters": string.ascii_letters,
"punctuation": string.punctuation,
"currency": "£€¥¢฿",
# Cyrillic
"generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ",
"russian_cyrillic_letters": "ёыэЁЫЭ",
"russian_signs": "ъЪ",
# Greek
"ancient_greek": "αβγδεζηθικλμνξοπρστςυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ",
# Arabic & Persian
"arabic_diacritics": "".join(["ً", "ٌ", "ٍ", "َ", "ُ", "ِ", "ّ", "ْ", "ٕ", "ٓ", "ٔ", "ٚ"]),
"arabic_digits": "٠١٢٣٤٥٦٧٨٩",
"arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىيٱ",
"arabic_punctuation": "؟؛«»—،",
"persian_letters": "پچژڢڤگکی",
# Bengali
"bengali_consonants": "কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহড়ঢ়য়ৰৱৼ",
"bengali_vowels": "অআইঈউঊঋঌএঐওঔৠৡ",
"bengali_digits": "০১২৩৪৫৬৭৮৯",
"bengali_matras": "".join(["া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "ৗ"]),
"bengali_virama": "্",
"bengali_punctuation": "ঽৎ৽৺৻",
"bengali_signs": "".join(["ঁ", "ং", "ঃ", "়"]),
# Gujarati
"gujarati_consonants": "કખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહ",
"gujarati_vowels": "અઆઇઈઉઊઋઌઍએઐઑઓઔ",
"gujarati_digits": "૦૧૨૩૪૫૬૭૮૯",
"gujarati_matras": "".join([
"ઁ",
"ં",
"ઃ",
"઼",
"ા",
"િ",
"ી",
"ુ",
"ૂ",
"ૃ",
"ૄ",
"ૅ",
"ે",
"ૈ",
"ૉ",
"ો",
"ૌ",
"ૢ",
"ૣ",
"ૺ",
"ૻ",
"ૼ",
"૽",
"૾",
"૿",
]),
"gujarati_virama": "્",
"gujarati_punctuation": "ઽ॥",
"gujarati_signs": "ૐ૰",
# Devanagari
"devanagari_consonants": "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहऴऩळक़ख़ग़ज़ड़ढ़फ़य़ऱॺॻॼॽॾ",
"devanagari_vowels": "अआइईउऊऋऌऍऎएऐऑऒओऔॠॡॲऄॵॶॳॴॷॸॹ",
"devanagari_digits": "०१२३४५६७८९",
"devanagari_matras": "".join([
"़",
"ं",
"ँ",
"ः",
"॑",
"॒",
"ा",
"ि",
"ी",
"ु",
"ू",
"ृ",
"ॄ",
"ॅ",
"ॆ",
"े",
"ै",
"ॉ",
"ॊ",
"ो",
"ौ",
"ॢ",
"ॣ",
"ॏ",
"ॎ",
]),
"devanagari_virama": "्",
"devanagari_punctuation": "।॥॰ऽꣲ",
"devanagari_signs": "ॐ",
# Punjabi (Gurmukhi script)
"punjabi_consonants": "ਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸ਼ਸਹਖ਼ਗ਼ਜ਼ਫ਼ੜਲ਼",
"punjabi_vowels": "ਅਆਇਈਉਊਏਐਓਔੲੳ",
"punjabi_digits": "੦੧੨੩੪੫੬੭੮੯",
"punjabi_matras": "".join(["ਂ", "਼", "ਾ", "ਿ", "ੀ", "ੁ", "ੂ", "ੇ", "ੈ", "ੋ", "ੌ", "ੑ", "ੰ", "ੱ", "ੵ"]),
"punjabi_virama": "੍",
"punjabi_punctuation": "।॥",
"punjabi_signs": "ੴ",
# Tamil
"tamil_consonants": "கஙசஞடணதநபமயரலவழளறன",
"tamil_vowels": "அஆஇஈஉஊஎஏஐஒஓஔ",
"tamil_digits": "௦௧௨௩௪௫௬௭௮௯",
"tamil_matras": "".join(["ா", "ி", "ீ", "ு", "ூ", "ெ", "ே", "ை", "ொ", "ோ", "ௌ"]),
"tamil_virama": "்",
"tamil_punctuation": "௰௱௲",
"tamil_signs": "ஃௐ",
"tamil_fractions": "௳௴௵௶௷௸௹௺",
# Telugu
"telugu_consonants": "కఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళవశషసహఴ",
"telugu_digits": "౦౧౨౩౪౫౬౭౮౯" + "౸౹౺౻", # Telugu digits and fractional digits
"telugu_vowels": "అఆఇఈఉఊఋఌఎఏఐఒఓఔౠౡ",
"telugu_matras": "".join(["ా", "ి", "ీ", "ు", "ూ", "ృ", "ౄ", "ె", "ే", "ై", "ొ", "ో", "ౌ", "ౢ", "ౣ"]),
"telugu_virama": "్",
"telugu_punctuation": "ఽ",
"telugu_signs": "".join(["ఁ", "ం", "ః"]),
# Kannada
"kannada_consonants": "ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಶಷಸಹಳ",
"kannada_vowels": "ಅಆಇಈಉಊಋॠಌೡಎಏಐಒಓಔ",
"kannada_digits": "೦೧೨೩೪೫೬೭೮೯",
"kannada_matras": "".join(["ಾ", "ಿ", "ೀ", "ು", "ೂ", "ೃ", "ೄ", "ೆ", "ೇ", "ೈ", "ೊ", "ೋ", "ೌ"]),
"kannada_virama": "್",
"kannada_punctuation": "।॥ೱೲ",
"kannada_signs": "".join(["ಂ", "ಃ", "ಁ"]),
# Sinhala
"sinhala_consonants": "කඛගඝඞචඡජඣඤටඨඩඪණතථදධනපඵබභමයරලවශෂසහළෆ",
"sinhala_vowels": "අආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖ",
"sinhala_digits": "෦෧෨෩෪෫෬෭෮෯",
"sinhala_matras": "".join(["ා", "ැ", "ෑ", "ි", "ී", "ු", "ූ", "ෙ", "ේ", "ෛ", "ො", "ෝ", "ෞ"]),
"sinhala_virama": "්",
"sinhala_punctuation": "෴",
"sinhala_signs": "".join(["ං", "ඃ"]),
# Malayalam
"malayalam_consonants": "കഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരറലളഴവശഷസഹ",
"malayalam_vowels": "അആഇഈഉഊഋൠഌൡഎഏഐഒഓഔ",
"malayalam_digits": "൦൧൨൩൪൫൬൭൮൯",
"malayalam_matras": "".join(["ാ", "ി", "ീ", "ു", "ൂ", "ൃ", "ൄ", "ൢ", "ൣ", "െ", "േ", "ൈ", "ൊ", "ോ", "ൌ"]),
"malayalam_virama": "്",
"malayalam_signs": "".join(["ഃ", "൹", "ഽ", "൏", "ം"]),
# Odia (Oriya)
"odia_consonants": "କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳଵଶଷସହୟୱଡ଼ଢ଼",
"odia_vowels": "ଅଆଇଈଉଊଋଌଏଐଓଔୡୠ",
"odia_digits": "୦୧୨୩୪୫୬୭୮୯" + "୲୳୴୵୶୷", # Odia digits and fractional digits
"odia_matras": "".join(["ା", "ି", "ୀ", "ୁ", "ୂ", "ୃ", "ୄ", "େ", "ୈ", "ୋ", "ୌ", "ୢ", "ୣ"]),
"odia_virama": "୍",
"odia_punctuation": "ଽ",
"odia_signs": "".join(["ଂ", "ଃ", "ଁ", "଼", "୰"]),
# Khmer
"khmer_consonants": "កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ",
"khmer_vowels": "ឣឤឥឦឧឨឩឪឫឬឭឮឯឰឱឲឳ",
"khmer_digits": "០១២៣៤៥៦៧៨៩",
"khmer_matras": "".join(["ា", "ិ", "ី", "ឹ", "ឺ", "ុ", "ូ", "ួ", "ើ", "ឿ", "ៀ", "េ", "ែ", "ៃ", "ោ", "ៅ"]),
"khmer_diacritics": "".join(["ំ", "ះ", "ៈ", "៉", "៊", "់", "៌", "៍", "៎", "៏", "័", "៑", "៓", "៝"]),
"khmer_virama": "្",
"khmer_punctuation": "។៕៖៘៙៚ៗៜ",
# Burmese
"burmese_consonants": "ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအၐၑၒၓၔၕၚၛၜၝၡၥၦၮၯၰၵၶၷၸၹၺၻၼၽၾၿႀႁႎ",
"burmese_vowels": "ဣဤဥဦဧဩဪဿ",
"burmese_digits": "၀၁၂၃၄၅၆၇၈၉" + "႐႑႒႓႔႕႖႗႘႙", # Burmese digits and Shan digits
"burmese_diacritics": "".join(["့", "း", "ံ", "ါ", "ာ", "ိ", "ီ", "ု", "ူ", "ေ", "ဲ", "ဳ", "ဴ", "ဵ", "ျြွှ"]), # းံါာိီုူေဲံ့းှျြွှ
# ္ (virama) and ် (final consonant) - the first is used to stack consonants, the second is used for final consonants
"burmese_virama": "".join([
"္",
"်",
]),
"burmese_punctuation": "၊။၌၍၎၏" + "ၤ" + "ၗ", # Includes ၗ and ၤ
# Javanese
"javanese_consonants": "ꦏꦐꦑꦒꦓꦔꦕꦖꦗꦘꦙꦚꦛꦜꦝꦞꦟꦠꦡꦢꦣꦤꦥꦦꦧꦨꦩꦪꦫꦬꦭꦮꦯꦰꦱꦲ",
"javanese_vowels": "ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ" + "ꦴꦵꦶꦷꦸꦹꦺꦻꦼ", # sec: Dependent vowels ꦴꦵꦶꦷꦸꦹꦺꦻꦼ
"javanese_digits": "꧐꧑꧒꧓꧔꧕꧖꧗꧘꧙",
"javanese_diacritics": "".join(["ꦀ", "ꦁ", "ꦂ", "ꦃ", "꦳", "ꦽ", "ꦾ", "ꦿ"]), # ꦀꦁꦂꦃ꦳ꦽꦾꦿ
"javanese_virama": "꧀",
"javanese_punctuation": "".join(["꧈", "꧉", "꧊", "꧋", "꧌", "꧍", "ꧏ"]),
# Sudanese
"sudanese_consonants": "ᮊᮋᮌᮍᮎᮏᮐᮑᮒᮓᮔᮕᮖᮗᮘᮙᮚᮛᮜᮝᮞᮟᮠᮮᮯᮺᮻᮼᮽᮾᮿ",
"sudanese_vowels": "ᮃᮄᮅᮆᮇᮈᮉ",
"sudanese_digits": "᮰᮱᮲᮳᮴᮵᮶᮷᮸᮹",
"sudanese_diacritics": "".join(["ᮀ", "ᮁ", "ᮂ", "ᮡ", "ᮢ", "ᮣ", "ᮤ", "ᮥ", "ᮦ", "ᮧ", "ᮨ", "ᮩ", "᮪", "᮫", "ᮬ", "ᮭ"]), # "ᮀᮁᮂᮡᮢᮣᮤᮥᮦᮧᮨᮩ᮪᮫ᮬᮭ"
# Hebrew
"hebrew_cantillations": "".join([
"֑",
"֒",
"֓",
"֔",
"֕",
"֖",
"֗",
"֘",
"֙",
"֚",
"֛",
"֜",
"֝",
"֞",
"֟",
"֠",
"֡",
"֢",
"֣",
"֤",
"֥",
"֦",
"֧",
"֨",
"֩",
"֪",
"֫",
"֬",
"֭",
"֮",
"֯",
]),
"hebrew_consonants": "אבגדהוזחטיךכלםמןנסעףפץצקרשת",
"hebrew_specials": "ׯװױײיִﬞײַﬠﬡﬢﬣﬤﬥﬦﬧﬨ﬩שׁשׂשּׁשּׂאַאָאּבּגּדּהּוּזּטּיּךּכּלּמּנּסּףּפּצּקּרּשּתּוֹבֿכֿפֿﭏ",
"hebrew_punctuation": "".join(["ֽ", "־", "ֿ", "׀", "ׁ", "ׂ", "׃", "ׄ", "ׅ", "׆", "׳", "״"]),
"hebrew_vowels": "".join(["ְ", "ֱ", "ֲ", "ֳ", "ִ", "ֵ", "ֶ", "ַ", "ָ", "ֹ", "ֺ", "ֻ", "ׇ"]),
}
VOCABS: dict[str, str] = {}
for key, value in _BASE_VOCABS.items():
VOCABS[key] = value
# Latin & latin-dependent alphabets
VOCABS["latin"] = _BASE_VOCABS["digits"] + _BASE_VOCABS["ascii_letters"] + _BASE_VOCABS["punctuation"]
VOCABS["english"] = VOCABS["latin"] + "°" + _BASE_VOCABS["currency"]
VOCABS["albanian"] = VOCABS["english"] + "çëÇË"
VOCABS["afrikaans"] = VOCABS["english"] + "èëïîôûêÈËÏÎÔÛÊ"
VOCABS["azerbaijani"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çəğöşüÇƏĞÖŞÜ" + "₼"
VOCABS["basque"] = VOCABS["english"] + "ñçÑÇ"
VOCABS["bosnian"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ"
VOCABS["catalan"] = VOCABS["english"] + "àèéíïòóúüçÀÈÉÍÏÒÓÚÜÇ"
VOCABS["croatian"] = VOCABS["english"] + "ČčĆćĐ𩹮ž"
VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ"
VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ"
VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ"
VOCABS["estonian"] = VOCABS["english"] + "šžõäöüŠŽÕÄÖÜ"
VOCABS["esperanto"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "ĉĝĥĵŝŭĈĜĤĴŜŬ" + "₷"
VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ"
VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ"
VOCABS["frisian"] = re.sub(r"[QqXx]", "", VOCABS["english"]) + "âêôûúÂÊÔÛÚ" + "ƒƑ"
VOCABS["galician"] = re.sub(r"[JjKkWw]", "", VOCABS["english"]) + "ñÑçÇ"
VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ"
VOCABS["hausa"] = re.sub(r"[PpQqVvXx]", "", VOCABS["english"]) + "ɓɗƙƴƁƊƘƳ" + "₦"
VOCABS["hungarian"] = VOCABS["english"] + "áéíóöúüÁÉÍÓÖÚÜ"
VOCABS["icelandic"] = re.sub(r"[CcQqWw]", "", VOCABS["english"]) + "ðáéíóúýþæöÐÁÉÍÓÚÝÞÆÖ"
VOCABS["indonesian"] = VOCABS["english"]
VOCABS["irish"] = VOCABS["english"] + "áéíóúÁÉÍÓÚ"
VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ"
VOCABS["latvian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "āčēģīķļņšūžĀČĒĢĪĶĻŅŠŪŽ"
VOCABS["lithuanian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "ąčęėįšųūžĄČĘĖĮŠŲŪŽ"
VOCABS["luxembourgish"] = VOCABS["english"] + "äöüéëÄÖÜÉË"
VOCABS["malagasy"] = re.sub(r"[CcQqUuWwXx]", "", VOCABS["english"]) + "ôñÔÑ"
VOCABS["malay"] = VOCABS["english"]
VOCABS["maltese"] = re.sub(r"[CcYy]", "", VOCABS["english"]) + "ċġħżĊĠĦŻ"
VOCABS["maori"] = re.sub(r"[BbCcDdFfJjLlOoQqSsVvXxYyZz]", "", VOCABS["english"]) + "āēīōūĀĒĪŌŪ"
VOCABS["montenegrin"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćšžźČĆŠŚŽŹ"
VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ"
VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ"
VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ"
VOCABS["quechua"] = re.sub(r"[BbDdFfGgJjVvXxZz]", "", VOCABS["english"]) + "ñÑĉĈçÇ"
VOCABS["romanian"] = VOCABS["english"] + "ăâîșțĂÂÎȘȚ"
VOCABS["scottish_gaelic"] = re.sub(r"[JjKkQqVvWwXxYyZz]", "", VOCABS["english"]) + "àèìòùÀÈÌÒÙ"
VOCABS["serbian_latin"] = VOCABS["english"] + "čćđžšČĆĐŽŠ"
VOCABS["slovak"] = VOCABS["english"] + "ôäčďľňšťžáéíĺóŕúýÔÄČĎĽŇŠŤŽÁÉÍĹÓŔÚÝ"
VOCABS["slovene"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ"
VOCABS["somali"] = re.sub(r"[PpVvZz]", "", VOCABS["english"])
VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿"
VOCABS["swahili"] = re.sub(r"[QqXx]", "", VOCABS["english"])
VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ"
VOCABS["tagalog"] = re.sub(r"[CcQqWwXx]", "", VOCABS["english"]) + "ñÑ" + "₱"
VOCABS["turkish"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "çğıöşüâîûÇĞİÖŞÜÂÎÛ" + "₺"
VOCABS["uzbek_latin"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çğɉñöşÇĞɈÑÖŞ"
VOCABS["vietnamese"] = (
VOCABS["english"]
+ "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ"
+ "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ"
+ "₫" # currency
)
VOCABS["welsh"] = re.sub(r"[KkQqVvXxZz]", "", VOCABS["english"]) + "âêîôŵŷÂÊÎÔŴŶ"
VOCABS["yoruba"] = re.sub(r"[CcQqVvXxZz]", "", VOCABS["english"]) + "ẹọṣẸỌṢ" + "₦"
VOCABS["zulu"] = VOCABS["english"]
# Non-latin alphabets.
# Cyrillic
VOCABS["russian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_cyrillic_letters"]
+ _BASE_VOCABS["russian_signs"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "₽"
)
VOCABS["belarusian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_cyrillic_letters"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ўiЎI"
+ "₽"
)
VOCABS["ukrainian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ґіїєҐІЇЄ"
+ "₴"
)
VOCABS["tatar"] = VOCABS["russian"] + "ӘәҖҗҢңӨөҮү"
VOCABS["tajik"] = VOCABS["russian"].replace("₽", "") + "ҒғҚқҲҳҶҷӢӣӮӯ"
VOCABS["kazakh"] = VOCABS["russian"].replace("₽", "") + "ӘәҒғҚқҢңӨөҰұҮүҺһІі" + "₸"
VOCABS["kyrgyz"] = VOCABS["russian"].replace("₽", "") + "ҢңӨөҮү"
VOCABS["bulgarian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_signs"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
)
VOCABS["macedonian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ЃѓЅѕЈјЉљЊњЌќЏџ"
)
VOCABS["mongolian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_cyrillic_letters"]
+ _BASE_VOCABS["russian_signs"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ӨөҮү"
+ "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙" # Mongolian digits
+ "₮"
)
VOCABS["yakut"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_cyrillic_letters"]
+ _BASE_VOCABS["russian_signs"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ҔҕҤҥӨөҺһҮү"
+ "₽"
)
VOCABS["serbian_cyrillic"] = (
"абвгдежзиклмнопрстуфхцчшАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШ" # limited cyrillic
+ "JjЂђЉљЊњЋћЏџ" # Serbian specials
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
)
VOCABS["uzbek_cyrillic"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["russian_cyrillic_letters"]
+ _BASE_VOCABS["russian_signs"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ЎўҚқҒғҲҳ"
)
VOCABS["ukrainian"] = (
_BASE_VOCABS["generic_cyrillic_letters"]
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["currency"]
+ "ґіїєҐІЇЄ₴"
)
# Greek
VOCABS["greek"] = (
_BASE_VOCABS["punctuation"] + _BASE_VOCABS["ancient_greek"] + _BASE_VOCABS["currency"] + "άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ"
)
VOCABS["greek_extended"] = (
VOCABS["greek"]
+ "ͶͷϜϝἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿ"
+ "ὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὪὫὬὭὮὯὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐ"
+ "ᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺᾼῂῃῄῆῇῈῊῌῒΐῖῗῚῢΰῤῥῦῧῪῬῲῳῴῶῷῸῺῼ"
)
# Hebrew
VOCABS["hebrew"] = (
_BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ _BASE_VOCABS["hebrew_consonants"]
+ _BASE_VOCABS["hebrew_vowels"]
+ _BASE_VOCABS["hebrew_punctuation"]
+ _BASE_VOCABS["hebrew_cantillations"]
+ _BASE_VOCABS["hebrew_specials"]
+ "₪"
)
# Arabic
VOCABS["arabic"] = (
_BASE_VOCABS["digits"]
+ _BASE_VOCABS["arabic_digits"]
+ _BASE_VOCABS["arabic_letters"]
+ _BASE_VOCABS["persian_letters"]
+ _BASE_VOCABS["arabic_diacritics"]
+ _BASE_VOCABS["arabic_punctuation"]
+ _BASE_VOCABS["punctuation"]
)
VOCABS["persian"] = VOCABS["arabic"]
VOCABS["urdu"] = VOCABS["persian"] + "ٹڈڑںھےہۃ"
VOCABS["pashto"] = VOCABS["persian"] + "ټډړږښځڅڼېۍ"
VOCABS["kurdish"] = VOCABS["persian"] + "ڵڕۆێە"
VOCABS["uyghur"] = VOCABS["persian"] + "ەېۆۇۈڭھ"
VOCABS["sindhi"] = VOCABS["persian"] + "ڀٿٺٽڦڄڃڇڏڌڊڍڙڳڱڻھ"
# Indic scripts
# Rules:
# Any consonant can be "combined" with any matra
# The virama is used to create consonant clusters - so C + Virama + C = CC
# Devanagari based
VOCABS["devanagari"] = (
_BASE_VOCABS["devanagari_consonants"]
+ _BASE_VOCABS["devanagari_vowels"]
+ _BASE_VOCABS["devanagari_digits"]
+ _BASE_VOCABS["devanagari_matras"]
+ _BASE_VOCABS["devanagari_virama"]
+ _BASE_VOCABS["devanagari_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Devanagari
+ "₹" # currency
)
VOCABS["hindi"] = VOCABS["devanagari"]
VOCABS["sanskrit"] = VOCABS["devanagari"]
VOCABS["marathi"] = VOCABS["devanagari"]
VOCABS["nepali"] = VOCABS["devanagari"]
# Gujarati
VOCABS["gujarati"] = (
_BASE_VOCABS["gujarati_consonants"]
+ _BASE_VOCABS["gujarati_vowels"]
+ _BASE_VOCABS["gujarati_digits"]
+ _BASE_VOCABS["gujarati_matras"]
+ _BASE_VOCABS["gujarati_virama"]
+ _BASE_VOCABS["gujarati_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Gujarati
+ _BASE_VOCABS["gujarati_signs"]
+ "૱" # currency
)
# Bengali
VOCABS["bengali"] = (
_BASE_VOCABS["bengali_consonants"]
+ _BASE_VOCABS["bengali_vowels"]
+ _BASE_VOCABS["bengali_digits"]
+ _BASE_VOCABS["bengali_matras"]
+ _BASE_VOCABS["bengali_virama"]
+ _BASE_VOCABS["bengali_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Bengali
+ _BASE_VOCABS["bengali_signs"]
+ "৳" # currency
)
# Brahmic scripts
VOCABS["tamil"] = (
_BASE_VOCABS["tamil_consonants"]
+ _BASE_VOCABS["tamil_vowels"]
+ _BASE_VOCABS["tamil_digits"]
+ _BASE_VOCABS["tamil_matras"]
+ _BASE_VOCABS["tamil_virama"]
+ _BASE_VOCABS["tamil_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Tamil
+ _BASE_VOCABS["tamil_fractions"] # This is a Tamil-specific addition
+ _BASE_VOCABS["tamil_signs"]
+ "₹" # currency
)
VOCABS["telugu"] = (
_BASE_VOCABS["telugu_consonants"]
+ _BASE_VOCABS["telugu_vowels"]
+ _BASE_VOCABS["telugu_digits"]
+ _BASE_VOCABS["telugu_matras"]
+ _BASE_VOCABS["telugu_virama"]
+ _BASE_VOCABS["telugu_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Telugu
+ _BASE_VOCABS["telugu_signs"]
+ "₹" # currency
)
VOCABS["kannada"] = (
_BASE_VOCABS["kannada_consonants"]
+ _BASE_VOCABS["kannada_vowels"]
+ _BASE_VOCABS["kannada_digits"]
+ _BASE_VOCABS["kannada_matras"]
+ _BASE_VOCABS["kannada_virama"]
+ _BASE_VOCABS["kannada_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Kannada
+ _BASE_VOCABS["kannada_signs"]
+ "₹" # currency
)
VOCABS["sinhala"] = (
_BASE_VOCABS["sinhala_consonants"]
+ _BASE_VOCABS["sinhala_vowels"]
+ _BASE_VOCABS["sinhala_digits"]
+ _BASE_VOCABS["sinhala_matras"]
+ _BASE_VOCABS["sinhala_virama"]
+ _BASE_VOCABS["sinhala_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Sinhala
+ _BASE_VOCABS["sinhala_signs"]
+ "₹" # currency
)
VOCABS["malayalam"] = (
_BASE_VOCABS["malayalam_consonants"]
+ _BASE_VOCABS["malayalam_vowels"]
+ _BASE_VOCABS["malayalam_digits"]
+ _BASE_VOCABS["malayalam_matras"]
+ _BASE_VOCABS["malayalam_virama"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Malayalam
+ _BASE_VOCABS["malayalam_signs"]
+ "₹" # currency
)
VOCABS["punjabi"] = (
_BASE_VOCABS["punjabi_consonants"]
+ _BASE_VOCABS["punjabi_vowels"]
+ _BASE_VOCABS["punjabi_digits"]
+ _BASE_VOCABS["punjabi_matras"]
+ _BASE_VOCABS["punjabi_virama"]
+ _BASE_VOCABS["punjabi_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Punjabi
+ _BASE_VOCABS["punjabi_signs"]
+ "₹" # currency
)
VOCABS["odia"] = (
_BASE_VOCABS["odia_consonants"]
+ _BASE_VOCABS["odia_vowels"]
+ _BASE_VOCABS["odia_digits"]
+ _BASE_VOCABS["odia_matras"]
+ _BASE_VOCABS["odia_virama"]
+ _BASE_VOCABS["odia_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Odia
+ _BASE_VOCABS["odia_signs"]
+ "₹" # currency
)
VOCABS["khmer"] = (
_BASE_VOCABS["khmer_consonants"]
+ _BASE_VOCABS["khmer_vowels"]
+ _BASE_VOCABS["khmer_digits"]
+ _BASE_VOCABS["khmer_matras"]
+ _BASE_VOCABS["khmer_virama"]
+ _BASE_VOCABS["khmer_diacritics"] # This is a Khmer-specific addition
+ _BASE_VOCABS["khmer_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Khmer
+ "៛" # Cambodian currency
)
# Armenian
VOCABS["armenian"] = (
"ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙՠաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևֈ"
+ _BASE_VOCABS["digits"]
+ _BASE_VOCABS["punctuation"]
+ "՚՛՜՝՞՟։֊"
+ "֏"
)
# Sudanese
VOCABS["sudanese"] = (
_BASE_VOCABS["digits"]
+ _BASE_VOCABS["sudanese_digits"]
+ _BASE_VOCABS["sudanese_consonants"]
+ _BASE_VOCABS["sudanese_vowels"]
+ _BASE_VOCABS["sudanese_diacritics"]
+ _BASE_VOCABS["punctuation"]
)
# Thai
# Rules:
# Diacritics are used to modify the consonants and vowels
VOCABS["thai"] = (
_BASE_VOCABS["digits"]
+ "๐๑๒๓๔๕๖๗๘๙"
+ _BASE_VOCABS["punctuation"]
+ "๏๚๛ๆฯ"
+ "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮ" # Thai consonants
+ "ะาำเแโใไๅ" # Thai vowels
+ " ัิีึืฺุู็่้๊๋์ํ๎".replace(" ", "")
+ "฿"
)
VOCABS["lao"] = (
_BASE_VOCABS["digits"]
+ "໐໑໒໓໔໕໖໗໘໙"
+ _BASE_VOCABS["punctuation"]
+ "ໆໞໟຯ"
+ "ກຂຄຆງຈຉຊຌຍຎຏຐຑຒຓດຕຖທຘນບປຜຝພຟຠມຢຣລວຨຩສຫຬອຮ" # Lao consonants
+ "ະາຳຽເແໂໃໄ" # Lao vowels
+ "ໜໝ" # Lao ligature
+ "".join(["ັ", "ິ", "ີ", "ຶ", "ື", "ຸ", "ູ", "຺", "ົ", "ຼ", "່", "້", "໊", "໋", "໌", "ໍ"])
)
# Burmese & Javanese
# Rules:
# - A syllable usually starts with a base consonant.
# - Diacritics (sandhangan), which represent vowels and consonant modifications, are attached to the base consonant:
# - Vowel signs (ꦴꦵꦶꦷꦸꦹꦺꦻꦼ) follow the consonant and determine the syllable's vowel sound.
# - Medial signs like ꦿ (ra), ꦾ (ya), and ꦽ (vocalic r) modify the consonant cluster.
# - The virama (꧀, called *pangkon*) suppresses the inherent vowel,
# creating consonant clusters.
# - Special signs like ꦀ (cecak), ꦁ (layar), ꦂ (cakra), and ꦃ (wignyan)
# can appear before or after syllables to represent nasal or glottal finals.
# - Independent vowels (ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ) can occur without a base consonant, especially at word/sentence starts.
# - Use Unicode NFC normalization to ensure composed syllables render correctly.
VOCABS["burmese"] = (
_BASE_VOCABS["digits"]
+ _BASE_VOCABS["burmese_digits"]
+ _BASE_VOCABS["burmese_consonants"]
+ _BASE_VOCABS["burmese_vowels"]
+ _BASE_VOCABS["burmese_diacritics"]
+ _BASE_VOCABS["burmese_virama"]
+ _BASE_VOCABS["burmese_punctuation"]
)
VOCABS["javanese"] = (
_BASE_VOCABS["digits"]
+ _BASE_VOCABS["javanese_digits"]
+ _BASE_VOCABS["javanese_consonants"]
+ _BASE_VOCABS["javanese_vowels"]
+ _BASE_VOCABS["javanese_diacritics"]
+ _BASE_VOCABS["javanese_virama"]
+ _BASE_VOCABS["javanese_punctuation"]
+ _BASE_VOCABS["punctuation"] # western punctuation used in Javanese
)
# Georgian (Mkhedruli - modern)
VOCABS["georgian"] = (
_BASE_VOCABS["digits"]
+ "ႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅჇჍაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶჷჸჹჺჼჽჾჿ"
+ _BASE_VOCABS["punctuation"]
+ "჻"
+ "₾" # currency
)
# Ethiopic
VOCABS["ethiopic"] = (
"ሀሁሂሃሄህሆሇለሉሊላሌልሎሏሐሑሒሓሔሕሖሗመሙሚማሜምሞሟሠሡሢሣሤሥሦሧረሩሪራሬርሮሯሰሱሲሳሴስሶሷሸሹሺሻሼሽሾሿቀቁቂቃቄቅቆቇቈቊቋ"
+ "ቌቍቐቑቒቓቔቕቖቘቚቛቜቝበቡቢባቤብቦቧቨቩቪቫቬቭቮቯተቱቲታቴትቶቷቸቹቺቻቼችቾቿኀኁኂኃኄኅኆኇኈኊኋኌኍነኑኒናኔንኖኗኘኙኚኛኜኝኞኟአኡኢኣኤእኦኧ"
+ "ከኩኪካኬክኮኯኰኲኳኴኵኸኹኺኻኼኽኾዀዂዃዄዅወዉዊዋዌውዎዏዐዑዒዓዔዕዖዘዙዚዛዜዝዞዟዠዡዢዣዤዥዦዧየዩዪያዬይዮዯደዱዲዳዴድዶዷዸዹዺ"
+ "ዻዼዽዾዿጀጁጂጃጄጅጆጇገጉጊጋጌግጎጏጐጒጓጔጕጘጙጚጛጜጝጞጟጠጡጢጣጤጥጦጧጨጩጪጫጬጭጮጯጰጱጲጳጴጵጶጷጸጹጺጻጼጽጾጿፀፁፂፃፄፅፆ"
+ "ፇፈፉፊፋፌፍፎፏፐፑፒፓፔፕፖፗፘፙፚᎀᎁᎂᎃᎄᎅᎆᎇᎈᎉᎊᎋᎌᎍᎎᎏ"
+ "፩፪፫፬፭፮፯፰፱፲፳፴፵፶፷፸፹፺፻፼" # digits
)
# East Asian
VOCABS["japanese"] = (
_BASE_VOCABS["digits"]
+ "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづ"
+ "てでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめ"
+ "もゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" # Hiragana
+ "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダ"
+ "チヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメ"
+ "モャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺーヽヾヿ" # Katakana
# Kanji jōyō (incl. numerals)
+ "亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲" # noqa: E501
+ "永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加" # noqa: E501
+ "可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯" # noqa: E501
+ "街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺" # noqa: E501
+ "款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿" # noqa: E501
+ "輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況" # noqa: E501
+ "峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄" # noqa: E501
+ "刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権" # noqa: E501
+ "憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行" # noqa: E501
+ "坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今" # noqa: E501
+ "困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶" # noqa: E501
+ "殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜" # noqa: E501
+ "諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇尺借酌釈爵若弱寂手主守朱取狩首殊珠" # noqa: E501
+ "酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循" # noqa: E501
+ "順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞" # noqa: E501
+ "償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁" # noqa: E501
+ "尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥" # noqa: E501
+ "石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖" # noqa: E501
+ "租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属" # noqa: E501
+ "賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡" # noqa: E501
+ "短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝貼" # noqa: E501
+ "超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店" # noqa: E501
+ "点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童" # noqa: E501
+ "道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇" # noqa: E501
+ "馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒" # noqa: E501
+ "範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府" # noqa: E501
+ "怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返" # noqa: E501
+ "変偏遍編弁便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木" # noqa: E501
+ "朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲" # noqa: E501
+ "耗猛網目黙門紋問冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁" # noqa: E501
+ "謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘" # noqa: E501
+ "倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕" # noqa: E501
+ _BASE_VOCABS["punctuation"]
+ "。・〜°—、「」『』【】゛》《〉〈"
+ _BASE_VOCABS["currency"]
)
VOCABS["korean"] = (
_BASE_VOCABS["digits"]
+ "가각갂갃간갅갆갇갈갉갊갋갌갍갎갏감갑값갓갔강갖갗갘같갚갛개객갞갟갠갡갢갣갤갥갦갧갨갩갪갫갬갭갮갯갰갱갲갳갴갵갶갷갸갹갺갻갼갽갾갿걀걁걂걃걄걅걆걇걈" # noqa: E501
+ "걉걊걋걌걍걎걏걐걑걒걓걔걕걖걗걘걙걚걛걜걝걞걟걠걡걢걣걤걥걦걧걨걩걪걫걬걭걮걯거걱걲걳건걵걶걷걸걹걺걻걼걽걾걿검겁겂것겄겅겆겇겈겉겊겋게겍겎겏겐겑" # noqa: E501
+ "겒겓겔겕겖겗겘겙겚겛겜겝겞겟겠겡겢겣겤겥겦겧겨격겪겫견겭겮겯결겱겲겳겴겵겶겷겸겹겺겻겼경겾겿곀곁곂곃계곅곆곇곈곉곊곋곌곍곎곏곐곑곒곓곔곕곖곗곘곙곚" # noqa: E501
+ "곛곜곝곞곟고곡곢곣곤곥곦곧골곩곪곫곬곭곮곯곰곱곲곳곴공곶곷곸곹곺곻과곽곾곿관괁괂괃괄괅괆괇괈괉괊괋괌괍괎괏괐광괒괓괔괕괖괗괘괙괚괛괜괝괞괟괠괡괢괣" # noqa: E501
+ "괤괥괦괧괨괩괪괫괬괭괮괯괰괱괲괳괴괵괶괷괸괹괺괻괼괽괾괿굀굁굂굃굄굅굆굇굈굉굊굋굌굍굎굏교굑굒굓굔굕굖굗굘굙굚굛굜굝굞굟굠굡굢굣굤굥굦굧굨굩굪굫구" # noqa: E501
+ "국굮굯군굱굲굳굴굵굶굷굸굹굺굻굼굽굾굿궀궁궂궃궄궅궆궇궈궉궊궋권궍궎궏궐궑궒궓궔궕궖궗궘궙궚궛궜궝궞궟궠궡궢궣궤궥궦궧궨궩궪궫궬궭궮궯궰궱궲궳궴궵" # noqa: E501
+ "궶궷궸궹궺궻궼궽궾궿귀귁귂귃귄귅귆귇귈귉귊귋귌귍귎귏귐귑귒귓귔귕귖귗귘귙귚귛규귝귞귟균귡귢귣귤귥귦귧귨귩귪귫귬귭귮귯귰귱귲귳귴귵귶귷그극귺귻근귽귾" # noqa: E501
+ "귿글긁긂긃긄긅긆긇금급긊긋긌긍긎긏긐긑긒긓긔긕긖긗긘긙긚긛긜긝긞긟긠긡긢긣긤긥긦긧긨긩긪긫긬긭긮긯기긱긲긳긴긵긶긷길긹긺긻긼긽긾긿김깁깂깃깄깅깆깇" # noqa: E501
+ "깈깉깊깋까깍깎깏깐깑깒깓깔깕깖깗깘깙깚깛깜깝깞깟깠깡깢깣깤깥깦깧깨깩깪깫깬깭깮깯깰깱깲깳깴깵깶깷깸깹깺깻깼깽깾깿꺀꺁꺂꺃꺄꺅꺆꺇꺈꺉꺊꺋꺌꺍꺎꺏꺐" # noqa: E501
+ "꺑꺒꺓꺔꺕꺖꺗꺘꺙꺚꺛꺜꺝꺞꺟꺠꺡꺢꺣꺤꺥꺦꺧꺨꺩꺪꺫꺬꺭꺮꺯꺰꺱꺲꺳꺴꺵꺶꺷꺸꺹꺺꺻꺼꺽꺾꺿껀껁껂껃껄껅껆껇껈껉껊껋껌껍껎껏껐껑껒껓껔껕껖껗께껙" # noqa: E501
+ "껚껛껜껝껞껟껠껡껢껣껤껥껦껧껨껩껪껫껬껭껮껯껰껱껲껳껴껵껶껷껸껹껺껻껼껽껾껿꼀꼁꼂꼃꼄꼅꼆꼇꼈꼉꼊꼋꼌꼍꼎꼏꼐꼑꼒꼓꼔꼕꼖꼗꼘꼙꼚꼛꼜꼝꼞꼟꼠꼡꼢" # noqa: E501
+ "꼣꼤꼥꼦꼧꼨꼩꼪꼫꼬꼭꼮꼯꼰꼱꼲꼳꼴꼵꼶꼷꼸꼹꼺꼻꼼꼽꼾꼿꽀꽁꽂꽃꽄꽅꽆꽇꽈꽉꽊꽋꽌꽍꽎꽏꽐꽑꽒꽓꽔꽕꽖꽗꽘꽙꽚꽛꽜꽝꽞꽟꽠꽡꽢꽣꽤꽥꽦꽧꽨꽩꽪꽫" # noqa: E501
+ "꽬꽭꽮꽯꽰꽱꽲꽳꽴꽵꽶꽷꽸꽹꽺꽻꽼꽽꽾꽿꾀꾁꾂꾃꾄꾅꾆꾇꾈꾉꾊꾋꾌꾍꾎꾏꾐꾑꾒꾓꾔꾕꾖꾗꾘꾙꾚꾛꾜꾝꾞꾟꾠꾡꾢꾣꾤꾥꾦꾧꾨꾩꾪꾫꾬꾭꾮꾯꾰꾱꾲꾳꾴" # noqa: E501
+ "꾵꾶꾷꾸꾹꾺꾻꾼꾽꾾꾿꿀꿁꿂꿃꿄꿅꿆꿇꿈꿉꿊꿋꿌꿍꿎꿏꿐꿑꿒꿓꿔꿕꿖꿗꿘꿙꿚꿛꿜꿝꿞꿟꿠꿡꿢꿣꿤꿥꿦꿧꿨꿩꿪꿫꿬꿭꿮꿯꿰꿱꿲꿳꿴꿵꿶꿷꿸꿹꿺꿻꿼꿽" # noqa: E501
+ "꿾꿿뀀뀁뀂뀃뀄뀅뀆뀇뀈뀉뀊뀋뀌뀍뀎뀏뀐뀑뀒뀓뀔뀕뀖뀗뀘뀙뀚뀛뀜뀝뀞뀟뀠뀡뀢뀣뀤뀥뀦뀧뀨뀩뀪뀫뀬뀭뀮뀯뀰뀱뀲뀳뀴뀵뀶뀷뀸뀹뀺뀻뀼뀽뀾뀿끀끁끂끃끄끅끆" # noqa: E501
+ "끇끈끉끊끋끌끍끎끏끐끑끒끓끔끕끖끗끘끙끚끛끜끝끞끟끠끡끢끣끤끥끦끧끨끩끪끫끬끭끮끯끰끱끲끳끴끵끶끷끸끹끺끻끼끽끾끿낀낁낂낃낄낅낆낇낈낉낊낋낌낍낎낏" # noqa: E501
+ "낐낑낒낓낔낕낖낗나낙낚낛난낝낞낟날낡낢낣낤낥낦낧남납낪낫났낭낮낯낰낱낲낳내낵낶낷낸낹낺낻낼낽낾낿냀냁냂냃냄냅냆냇냈냉냊냋냌냍냎냏냐냑냒냓냔냕냖냗냘" # noqa: E501
+ "냙냚냛냜냝냞냟냠냡냢냣냤냥냦냧냨냩냪냫냬냭냮냯냰냱냲냳냴냵냶냷냸냹냺냻냼냽냾냿넀넁넂넃넄넅넆넇너넉넊넋넌넍넎넏널넑넒넓넔넕넖넗넘넙넚넛넜넝넞넟넠넡" # noqa: E501
+ "넢넣네넥넦넧넨넩넪넫넬넭넮넯넰넱넲넳넴넵넶넷넸넹넺넻넼넽넾넿녀녁녂녃년녅녆녇녈녉녊녋녌녍녎녏념녑녒녓녔녕녖녗녘녙녚녛녜녝녞녟녠녡녢녣녤녥녦녧녨녩녪" # noqa: E501
+ "녫녬녭녮녯녰녱녲녳녴녵녶녷노녹녺녻논녽녾녿놀놁놂놃놄놅놆놇놈놉놊놋놌농놎놏놐놑높놓놔놕놖놗놘놙놚놛놜놝놞놟놠놡놢놣놤놥놦놧놨놩놪놫놬놭놮놯놰놱놲놳" # noqa: E501
+ "놴놵놶놷놸놹놺놻놼놽놾놿뇀뇁뇂뇃뇄뇅뇆뇇뇈뇉뇊뇋뇌뇍뇎뇏뇐뇑뇒뇓뇔뇕뇖뇗뇘뇙뇚뇛뇜뇝뇞뇟뇠뇡뇢뇣뇤뇥뇦뇧뇨뇩뇪뇫뇬뇭뇮뇯뇰뇱뇲뇳뇴뇵뇶뇷뇸뇹뇺뇻뇼" # noqa: E501
+ "뇽뇾뇿눀눁눂눃누눅눆눇눈눉눊눋눌눍눎눏눐눑눒눓눔눕눖눗눘눙눚눛눜눝눞눟눠눡눢눣눤눥눦눧눨눩눪눫눬눭눮눯눰눱눲눳눴눵눶눷눸눹눺눻눼눽눾눿뉀뉁뉂뉃뉄뉅" # noqa: E501
+ "뉆뉇뉈뉉뉊뉋뉌뉍뉎뉏뉐뉑뉒뉓뉔뉕뉖뉗뉘뉙뉚뉛뉜뉝뉞뉟뉠뉡뉢뉣뉤뉥뉦뉧뉨뉩뉪뉫뉬뉭뉮뉯뉰뉱뉲뉳뉴뉵뉶뉷뉸뉹뉺뉻뉼뉽뉾뉿늀늁늂늃늄늅늆늇늈늉늊늋늌늍늎" # noqa: E501
+ "늏느늑늒늓는늕늖늗늘늙늚늛늜늝늞늟늠늡늢늣늤능늦늧늨늩늪늫늬늭늮늯늰늱늲늳늴늵늶늷늸늹늺늻늼늽늾늿닀닁닂닃닄닅닆닇니닉닊닋닌닍닎닏닐닑닒닓닔닕닖닗" # noqa: E501
+ "님닙닚닛닜닝닞닟닠닡닢닣다닥닦닧단닩닪닫달닭닮닯닰닱닲닳담답닶닷닸당닺닻닼닽닾닿대댁댂댃댄댅댆댇댈댉댊댋댌댍댎댏댐댑댒댓댔댕댖댗댘댙댚댛댜댝댞댟댠" # noqa: E501
+ "댡댢댣댤댥댦댧댨댩댪댫댬댭댮댯댰댱댲댳댴댵댶댷댸댹댺댻댼댽댾댿덀덁덂덃덄덅덆덇덈덉덊덋덌덍덎덏덐덑덒덓더덕덖덗던덙덚덛덜덝덞덟덠덡덢덣덤덥덦덧덨덩" # noqa: E501
+ "덪덫덬덭덮덯데덱덲덳덴덵덶덷델덹덺덻덼덽덾덿뎀뎁뎂뎃뎄뎅뎆뎇뎈뎉뎊뎋뎌뎍뎎뎏뎐뎑뎒뎓뎔뎕뎖뎗뎘뎙뎚뎛뎜뎝뎞뎟뎠뎡뎢뎣뎤뎥뎦뎧뎨뎩뎪뎫뎬뎭뎮뎯뎰뎱뎲" # noqa: E501
+ "뎳뎴뎵뎶뎷뎸뎹뎺뎻뎼뎽뎾뎿돀돁돂돃도독돆돇돈돉돊돋돌돍돎돏돐돑돒돓돔돕돖돗돘동돚돛돜돝돞돟돠돡돢돣돤돥돦돧돨돩돪돫돬돭돮돯돰돱돲돳돴돵돶돷돸돹돺돻" # noqa: E501
+ "돼돽돾돿됀됁됂됃됄됅됆됇됈됉됊됋됌됍됎됏됐됑됒됓됔됕됖됗되됙됚됛된됝됞됟될됡됢됣됤됥됦됧됨됩됪됫됬됭됮됯됰됱됲됳됴됵됶됷됸됹됺됻됼됽됾됿둀둁둂둃둄" # noqa: E501
+ "둅둆둇둈둉둊둋둌둍둎둏두둑둒둓둔둕둖둗둘둙둚둛둜둝둞둟둠둡둢둣둤둥둦둧둨둩둪둫둬둭둮둯둰둱둲둳둴둵둶둷둸둹둺둻둼둽둾둿뒀뒁뒂뒃뒄뒅뒆뒇뒈뒉뒊뒋뒌뒍" # noqa: E501
+ "뒎뒏뒐뒑뒒뒓뒔뒕뒖뒗뒘뒙뒚뒛뒜뒝뒞뒟뒠뒡뒢뒣뒤뒥뒦뒧뒨뒩뒪뒫뒬뒭뒮뒯뒰뒱뒲뒳뒴뒵뒶뒷뒸뒹뒺뒻뒼뒽뒾뒿듀듁듂듃듄듅듆듇듈듉듊듋듌듍듎듏듐듑듒듓듔듕듖" # noqa: E501
+ "듗듘듙듚듛드득듞듟든듡듢듣들듥듦듧듨듩듪듫듬듭듮듯듰등듲듳듴듵듶듷듸듹듺듻듼듽듾듿딀딁딂딃딄딅딆딇딈딉딊딋딌딍딎딏딐딑딒딓디딕딖딗딘딙딚딛딜딝딞딟" # noqa: E501
+ "딠딡딢딣딤딥딦딧딨딩딪딫딬딭딮딯따딱딲딳딴딵딶딷딸딹딺딻딼딽딾딿땀땁땂땃땄땅땆땇땈땉땊땋때땍땎땏땐땑땒땓땔땕땖땗땘땙땚땛땜땝땞땟땠땡땢땣땤땥땦땧땨" # noqa: E501
+ "땩땪땫땬땭땮땯땰땱땲땳땴땵땶땷땸땹땺땻땼땽땾땿떀떁떂떃떄떅떆떇떈떉떊떋떌떍떎떏떐떑떒떓떔떕떖떗떘떙떚떛떜떝떞떟떠떡떢떣떤떥떦떧떨떩떪떫떬떭떮떯떰떱" # noqa: E501
+ "떲떳떴떵떶떷떸떹떺떻떼떽떾떿뗀뗁뗂뗃뗄뗅뗆뗇뗈뗉뗊뗋뗌뗍뗎뗏뗐뗑뗒뗓뗔뗕뗖뗗뗘뗙뗚뗛뗜뗝뗞뗟뗠뗡뗢뗣뗤뗥뗦뗧뗨뗩뗪뗫뗬뗭뗮뗯뗰뗱뗲뗳뗴뗵뗶뗷뗸뗹뗺" # noqa: E501
+ "뗻뗼뗽뗾뗿똀똁똂똃똄똅똆똇똈똉똊똋똌똍똎똏또똑똒똓똔똕똖똗똘똙똚똛똜똝똞똟똠똡똢똣똤똥똦똧똨똩똪똫똬똭똮똯똰똱똲똳똴똵똶똷똸똹똺똻똼똽똾똿뙀뙁뙂뙃" # noqa: E501
+ "뙄뙅뙆뙇뙈뙉뙊뙋뙌뙍뙎뙏뙐뙑뙒뙓뙔뙕뙖뙗뙘뙙뙚뙛뙜뙝뙞뙟뙠뙡뙢뙣뙤뙥뙦뙧뙨뙩뙪뙫뙬뙭뙮뙯뙰뙱뙲뙳뙴뙵뙶뙷뙸뙹뙺뙻뙼뙽뙾뙿뚀뚁뚂뚃뚄뚅뚆뚇뚈뚉뚊뚋뚌" # noqa: E501
+ "뚍뚎뚏뚐뚑뚒뚓뚔뚕뚖뚗뚘뚙뚚뚛뚜뚝뚞뚟뚠뚡뚢뚣뚤뚥뚦뚧뚨뚩뚪뚫뚬뚭뚮뚯뚰뚱뚲뚳뚴뚵뚶뚷뚸뚹뚺뚻뚼뚽뚾뚿뛀뛁뛂뛃뛄뛅뛆뛇뛈뛉뛊뛋뛌뛍뛎뛏뛐뛑뛒뛓뛔뛕" # noqa: E501
+ "뛖뛗뛘뛙뛚뛛뛜뛝뛞뛟뛠뛡뛢뛣뛤뛥뛦뛧뛨뛩뛪뛫뛬뛭뛮뛯뛰뛱뛲뛳뛴뛵뛶뛷뛸뛹뛺뛻뛼뛽뛾뛿뜀뜁뜂뜃뜄뜅뜆뜇뜈뜉뜊뜋뜌뜍뜎뜏뜐뜑뜒뜓뜔뜕뜖뜗뜘뜙뜚뜛뜜뜝뜞" # noqa: E501
+ "뜟뜠뜡뜢뜣뜤뜥뜦뜧뜨뜩뜪뜫뜬뜭뜮뜯뜰뜱뜲뜳뜴뜵뜶뜷뜸뜹뜺뜻뜼뜽뜾뜿띀띁띂띃띄띅띆띇띈띉띊띋띌띍띎띏띐띑띒띓띔띕띖띗띘띙띚띛띜띝띞띟띠띡띢띣띤띥띦띧" # noqa: E501
+ "띨띩띪띫띬띭띮띯띰띱띲띳띴띵띶띷띸띹띺띻라락띾띿란랁랂랃랄랅랆랇랈랉랊랋람랍랎랏랐랑랒랓랔랕랖랗래랙랚랛랜랝랞랟랠랡랢랣랤랥랦랧램랩랪랫랬랭랮랯랰" # noqa: E501
+ "랱랲랳랴략랶랷랸랹랺랻랼랽랾랿럀럁럂럃럄럅럆럇럈량럊럋럌럍럎럏럐럑럒럓럔럕럖럗럘럙럚럛럜럝럞럟럠럡럢럣럤럥럦럧럨럩럪럫러럭럮럯런럱럲럳럴럵럶럷럸럹" # noqa: E501
+ "럺럻럼럽럾럿렀렁렂렃렄렅렆렇레렉렊렋렌렍렎렏렐렑렒렓렔렕렖렗렘렙렚렛렜렝렞렟렠렡렢렣려력렦렧련렩렪렫렬렭렮렯렰렱렲렳렴렵렶렷렸령렺렻렼렽렾렿례롁롂" # noqa: E501
+ "롃롄롅롆롇롈롉롊롋롌롍롎롏롐롑롒롓롔롕롖롗롘롙롚롛로록롞롟론롡롢롣롤롥롦롧롨롩롪롫롬롭롮롯롰롱롲롳롴롵롶롷롸롹롺롻롼롽롾롿뢀뢁뢂뢃뢄뢅뢆뢇뢈뢉뢊뢋" # noqa: E501
+ "뢌뢍뢎뢏뢐뢑뢒뢓뢔뢕뢖뢗뢘뢙뢚뢛뢜뢝뢞뢟뢠뢡뢢뢣뢤뢥뢦뢧뢨뢩뢪뢫뢬뢭뢮뢯뢰뢱뢲뢳뢴뢵뢶뢷뢸뢹뢺뢻뢼뢽뢾뢿룀룁룂룃룄룅룆룇룈룉룊룋료룍룎룏룐룑룒룓룔" # noqa: E501
+ "룕룖룗룘룙룚룛룜룝룞룟룠룡룢룣룤룥룦룧루룩룪룫룬룭룮룯룰룱룲룳룴룵룶룷룸룹룺룻룼룽룾룿뤀뤁뤂뤃뤄뤅뤆뤇뤈뤉뤊뤋뤌뤍뤎뤏뤐뤑뤒뤓뤔뤕뤖뤗뤘뤙뤚뤛뤜뤝" # noqa: E501
+ "뤞뤟뤠뤡뤢뤣뤤뤥뤦뤧뤨뤩뤪뤫뤬뤭뤮뤯뤰뤱뤲뤳뤴뤵뤶뤷뤸뤹뤺뤻뤼뤽뤾뤿륀륁륂륃륄륅륆륇륈륉륊륋륌륍륎륏륐륑륒륓륔륕륖륗류륙륚륛륜륝륞륟률륡륢륣륤륥륦" # noqa: E501
+ "륧륨륩륪륫륬륭륮륯륰륱륲륳르륵륶륷른륹륺륻를륽륾륿릀릁릂릃름릅릆릇릈릉릊릋릌릍릎릏릐릑릒릓릔릕릖릗릘릙릚릛릜릝릞릟릠릡릢릣릤릥릦릧릨릩릪릫리릭릮릯" # noqa: E501
+ "린릱릲릳릴릵릶릷릸릹릺릻림립릾릿맀링맂맃맄맅맆맇마막맊맋만맍많맏말맑맒맓맔맕맖맗맘맙맚맛맜망맞맟맠맡맢맣매맥맦맧맨맩맪맫맬맭맮맯맰맱맲맳맴맵맶맷맸" # noqa: E501
+ "맹맺맻맼맽맾맿먀먁먂먃먄먅먆먇먈먉먊먋먌먍먎먏먐먑먒먓먔먕먖먗먘먙먚먛먜먝먞먟먠먡먢먣먤먥먦먧먨먩먪먫먬먭먮먯먰먱먲먳먴먵먶먷머먹먺먻먼먽먾먿멀멁" # noqa: E501
+ "멂멃멄멅멆멇멈멉멊멋멌멍멎멏멐멑멒멓메멕멖멗멘멙멚멛멜멝멞멟멠멡멢멣멤멥멦멧멨멩멪멫멬멭멮멯며멱멲멳면멵멶멷멸멹멺멻멼멽멾멿몀몁몂몃몄명몆몇몈몉몊" # noqa: E501
+ "몋몌몍몎몏몐몑몒몓몔몕몖몗몘몙몚몛몜몝몞몟몠몡몢몣몤몥몦몧모목몪몫몬몭몮몯몰몱몲몳몴몵몶몷몸몹몺못몼몽몾몿뫀뫁뫂뫃뫄뫅뫆뫇뫈뫉뫊뫋뫌뫍뫎뫏뫐뫑뫒뫓" # noqa: E501
+ "뫔뫕뫖뫗뫘뫙뫚뫛뫜뫝뫞뫟뫠뫡뫢뫣뫤뫥뫦뫧뫨뫩뫪뫫뫬뫭뫮뫯뫰뫱뫲뫳뫴뫵뫶뫷뫸뫹뫺뫻뫼뫽뫾뫿묀묁묂묃묄묅묆묇묈묉묊묋묌묍묎묏묐묑묒묓묔묕묖묗묘묙묚묛묜" # noqa: E501
+ "묝묞묟묠묡묢묣묤묥묦묧묨묩묪묫묬묭묮묯묰묱묲묳무묵묶묷문묹묺묻물묽묾묿뭀뭁뭂뭃뭄뭅뭆뭇뭈뭉뭊뭋뭌뭍뭎뭏뭐뭑뭒뭓뭔뭕뭖뭗뭘뭙뭚뭛뭜뭝뭞뭟뭠뭡뭢뭣뭤뭥" # noqa: E501
+ "뭦뭧뭨뭩뭪뭫뭬뭭뭮뭯뭰뭱뭲뭳뭴뭵뭶뭷뭸뭹뭺뭻뭼뭽뭾뭿뮀뮁뮂뮃뮄뮅뮆뮇뮈뮉뮊뮋뮌뮍뮎뮏뮐뮑뮒뮓뮔뮕뮖뮗뮘뮙뮚뮛뮜뮝뮞뮟뮠뮡뮢뮣뮤뮥뮦뮧뮨뮩뮪뮫뮬뮭뮮" # noqa: E501
+ "뮯뮰뮱뮲뮳뮴뮵뮶뮷뮸뮹뮺뮻뮼뮽뮾뮿므믁믂믃믄믅믆믇믈믉믊믋믌믍믎믏믐믑믒믓믔믕믖믗믘믙믚믛믜믝믞믟믠믡믢믣믤믥믦믧믨믩믪믫믬믭믮믯믰믱믲믳믴믵믶믷" # noqa: E501
+ "미믹믺믻민믽믾믿밀밁밂밃밄밅밆밇밈밉밊밋밌밍밎및밐밑밒밓바박밖밗반밙밚받발밝밞밟밠밡밢밣밤밥밦밧밨방밪밫밬밭밮밯배백밲밳밴밵밶밷밸밹밺밻밼밽밾밿뱀" # noqa: E501
+ "뱁뱂뱃뱄뱅뱆뱇뱈뱉뱊뱋뱌뱍뱎뱏뱐뱑뱒뱓뱔뱕뱖뱗뱘뱙뱚뱛뱜뱝뱞뱟뱠뱡뱢뱣뱤뱥뱦뱧뱨뱩뱪뱫뱬뱭뱮뱯뱰뱱뱲뱳뱴뱵뱶뱷뱸뱹뱺뱻뱼뱽뱾뱿벀벁벂벃버벅벆벇번벉" # noqa: E501
+ "벊벋벌벍벎벏벐벑벒벓범법벖벗벘벙벚벛벜벝벞벟베벡벢벣벤벥벦벧벨벩벪벫벬벭벮벯벰벱벲벳벴벵벶벷벸벹벺벻벼벽벾벿변볁볂볃별볅볆볇볈볉볊볋볌볍볎볏볐병볒" # noqa: E501
+ "볓볔볕볖볗볘볙볚볛볜볝볞볟볠볡볢볣볤볥볦볧볨볩볪볫볬볭볮볯볰볱볲볳보복볶볷본볹볺볻볼볽볾볿봀봁봂봃봄봅봆봇봈봉봊봋봌봍봎봏봐봑봒봓봔봕봖봗봘봙봚봛" # noqa: E501
+ "봜봝봞봟봠봡봢봣봤봥봦봧봨봩봪봫봬봭봮봯봰봱봲봳봴봵봶봷봸봹봺봻봼봽봾봿뵀뵁뵂뵃뵄뵅뵆뵇뵈뵉뵊뵋뵌뵍뵎뵏뵐뵑뵒뵓뵔뵕뵖뵗뵘뵙뵚뵛뵜뵝뵞뵟뵠뵡뵢뵣뵤" # noqa: E501
+ "뵥뵦뵧뵨뵩뵪뵫뵬뵭뵮뵯뵰뵱뵲뵳뵴뵵뵶뵷뵸뵹뵺뵻뵼뵽뵾뵿부북붂붃분붅붆붇불붉붊붋붌붍붎붏붐붑붒붓붔붕붖붗붘붙붚붛붜붝붞붟붠붡붢붣붤붥붦붧붨붩붪붫붬붭" # noqa: E501
+ "붮붯붰붱붲붳붴붵붶붷붸붹붺붻붼붽붾붿뷀뷁뷂뷃뷄뷅뷆뷇뷈뷉뷊뷋뷌뷍뷎뷏뷐뷑뷒뷓뷔뷕뷖뷗뷘뷙뷚뷛뷜뷝뷞뷟뷠뷡뷢뷣뷤뷥뷦뷧뷨뷩뷪뷫뷬뷭뷮뷯뷰뷱뷲뷳뷴뷵뷶" # noqa: E501
+ "뷷뷸뷹뷺뷻뷼뷽뷾뷿븀븁븂븃븄븅븆븇븈븉븊븋브븍븎븏븐븑븒븓블븕븖븗븘븙븚븛븜븝븞븟븠븡븢븣븤븥븦븧븨븩븪븫븬븭븮븯븰븱븲븳븴븵븶븷븸븹븺븻븼븽븾븿" # noqa: E501
+ "빀빁빂빃비빅빆빇빈빉빊빋빌빍빎빏빐빑빒빓빔빕빖빗빘빙빚빛빜빝빞빟빠빡빢빣빤빥빦빧빨빩빪빫빬빭빮빯빰빱빲빳빴빵빶빷빸빹빺빻빼빽빾빿뺀뺁뺂뺃뺄뺅뺆뺇뺈" # noqa: E501
+ "뺉뺊뺋뺌뺍뺎뺏뺐뺑뺒뺓뺔뺕뺖뺗뺘뺙뺚뺛뺜뺝뺞뺟뺠뺡뺢뺣뺤뺥뺦뺧뺨뺩뺪뺫뺬뺭뺮뺯뺰뺱뺲뺳뺴뺵뺶뺷뺸뺹뺺뺻뺼뺽뺾뺿뻀뻁뻂뻃뻄뻅뻆뻇뻈뻉뻊뻋뻌뻍뻎뻏뻐뻑" # noqa: E501
+ "뻒뻓뻔뻕뻖뻗뻘뻙뻚뻛뻜뻝뻞뻟뻠뻡뻢뻣뻤뻥뻦뻧뻨뻩뻪뻫뻬뻭뻮뻯뻰뻱뻲뻳뻴뻵뻶뻷뻸뻹뻺뻻뻼뻽뻾뻿뼀뼁뼂뼃뼄뼅뼆뼇뼈뼉뼊뼋뼌뼍뼎뼏뼐뼑뼒뼓뼔뼕뼖뼗뼘뼙뼚" # noqa: E501
+ "뼛뼜뼝뼞뼟뼠뼡뼢뼣뼤뼥뼦뼧뼨뼩뼪뼫뼬뼭뼮뼯뼰뼱뼲뼳뼴뼵뼶뼷뼸뼹뼺뼻뼼뼽뼾뼿뽀뽁뽂뽃뽄뽅뽆뽇뽈뽉뽊뽋뽌뽍뽎뽏뽐뽑뽒뽓뽔뽕뽖뽗뽘뽙뽚뽛뽜뽝뽞뽟뽠뽡뽢뽣" # noqa: E501
+ "뽤뽥뽦뽧뽨뽩뽪뽫뽬뽭뽮뽯뽰뽱뽲뽳뽴뽵뽶뽷뽸뽹뽺뽻뽼뽽뽾뽿뾀뾁뾂뾃뾄뾅뾆뾇뾈뾉뾊뾋뾌뾍뾎뾏뾐뾑뾒뾓뾔뾕뾖뾗뾘뾙뾚뾛뾜뾝뾞뾟뾠뾡뾢뾣뾤뾥뾦뾧뾨뾩뾪뾫뾬" # noqa: E501
+ "뾭뾮뾯뾰뾱뾲뾳뾴뾵뾶뾷뾸뾹뾺뾻뾼뾽뾾뾿뿀뿁뿂뿃뿄뿅뿆뿇뿈뿉뿊뿋뿌뿍뿎뿏뿐뿑뿒뿓뿔뿕뿖뿗뿘뿙뿚뿛뿜뿝뿞뿟뿠뿡뿢뿣뿤뿥뿦뿧뿨뿩뿪뿫뿬뿭뿮뿯뿰뿱뿲뿳뿴뿵" # noqa: E501
+ "뿶뿷뿸뿹뿺뿻뿼뿽뿾뿿쀀쀁쀂쀃쀄쀅쀆쀇쀈쀉쀊쀋쀌쀍쀎쀏쀐쀑쀒쀓쀔쀕쀖쀗쀘쀙쀚쀛쀜쀝쀞쀟쀠쀡쀢쀣쀤쀥쀦쀧쀨쀩쀪쀫쀬쀭쀮쀯쀰쀱쀲쀳쀴쀵쀶쀷쀸쀹쀺쀻쀼쀽쀾" # noqa: E501
+ "쀿쁀쁁쁂쁃쁄쁅쁆쁇쁈쁉쁊쁋쁌쁍쁎쁏쁐쁑쁒쁓쁔쁕쁖쁗쁘쁙쁚쁛쁜쁝쁞쁟쁠쁡쁢쁣쁤쁥쁦쁧쁨쁩쁪쁫쁬쁭쁮쁯쁰쁱쁲쁳쁴쁵쁶쁷쁸쁹쁺쁻쁼쁽쁾쁿삀삁삂삃삄삅삆삇" # noqa: E501
+ "삈삉삊삋삌삍삎삏삐삑삒삓삔삕삖삗삘삙삚삛삜삝삞삟삠삡삢삣삤삥삦삧삨삩삪삫사삭삮삯산삱삲삳살삵삶삷삸삹삺삻삼삽삾삿샀상샂샃샄샅샆샇새색샊샋샌샍샎샏샐" # noqa: E501
+ "샑샒샓샔샕샖샗샘샙샚샛샜생샞샟샠샡샢샣샤샥샦샧샨샩샪샫샬샭샮샯샰샱샲샳샴샵샶샷샸샹샺샻샼샽샾샿섀섁섂섃섄섅섆섇섈섉섊섋섌섍섎섏섐섑섒섓섔섕섖섗섘섙" # noqa: E501
+ "섚섛서석섞섟선섡섢섣설섥섦섧섨섩섪섫섬섭섮섯섰성섲섳섴섵섶섷세섹섺섻센섽섾섿셀셁셂셃셄셅셆셇셈셉셊셋셌셍셎셏셐셑셒셓셔셕셖셗션셙셚셛셜셝셞셟셠셡셢" # noqa: E501
+ "셣셤셥셦셧셨셩셪셫셬셭셮셯셰셱셲셳셴셵셶셷셸셹셺셻셼셽셾셿솀솁솂솃솄솅솆솇솈솉솊솋소속솎솏손솑솒솓솔솕솖솗솘솙솚솛솜솝솞솟솠송솢솣솤솥솦솧솨솩솪솫" # noqa: E501
+ "솬솭솮솯솰솱솲솳솴솵솶솷솸솹솺솻솼솽솾솿쇀쇁쇂쇃쇄쇅쇆쇇쇈쇉쇊쇋쇌쇍쇎쇏쇐쇑쇒쇓쇔쇕쇖쇗쇘쇙쇚쇛쇜쇝쇞쇟쇠쇡쇢쇣쇤쇥쇦쇧쇨쇩쇪쇫쇬쇭쇮쇯쇰쇱쇲쇳쇴" # noqa: E501
+ "쇵쇶쇷쇸쇹쇺쇻쇼쇽쇾쇿숀숁숂숃숄숅숆숇숈숉숊숋숌숍숎숏숐숑숒숓숔숕숖숗수숙숚숛순숝숞숟술숡숢숣숤숥숦숧숨숩숪숫숬숭숮숯숰숱숲숳숴숵숶숷숸숹숺숻숼숽" # noqa: E501
+ "숾숿쉀쉁쉂쉃쉄쉅쉆쉇쉈쉉쉊쉋쉌쉍쉎쉏쉐쉑쉒쉓쉔쉕쉖쉗쉘쉙쉚쉛쉜쉝쉞쉟쉠쉡쉢쉣쉤쉥쉦쉧쉨쉩쉪쉫쉬쉭쉮쉯쉰쉱쉲쉳쉴쉵쉶쉷쉸쉹쉺쉻쉼쉽쉾쉿슀슁슂슃슄슅슆" # noqa: E501
+ "슇슈슉슊슋슌슍슎슏슐슑슒슓슔슕슖슗슘슙슚슛슜슝슞슟슠슡슢슣스슥슦슧슨슩슪슫슬슭슮슯슰슱슲슳슴습슶슷슸승슺슻슼슽슾슿싀싁싂싃싄싅싆싇싈싉싊싋싌싍싎싏" # noqa: E501
+ "싐싑싒싓싔싕싖싗싘싙싚싛시식싞싟신싡싢싣실싥싦싧싨싩싪싫심십싮싯싰싱싲싳싴싵싶싷싸싹싺싻싼싽싾싿쌀쌁쌂쌃쌄쌅쌆쌇쌈쌉쌊쌋쌌쌍쌎쌏쌐쌑쌒쌓쌔쌕쌖쌗쌘" # noqa: E501
+ "쌙쌚쌛쌜쌝쌞쌟쌠쌡쌢쌣쌤쌥쌦쌧쌨쌩쌪쌫쌬쌭쌮쌯쌰쌱쌲쌳쌴쌵쌶쌷쌸쌹쌺쌻쌼쌽쌾쌿썀썁썂썃썄썅썆썇썈썉썊썋썌썍썎썏썐썑썒썓썔썕썖썗썘썙썚썛썜썝썞썟썠썡" # noqa: E501
+ "썢썣썤썥썦썧써썩썪썫썬썭썮썯썰썱썲썳썴썵썶썷썸썹썺썻썼썽썾썿쎀쎁쎂쎃쎄쎅쎆쎇쎈쎉쎊쎋쎌쎍쎎쎏쎐쎑쎒쎓쎔쎕쎖쎗쎘쎙쎚쎛쎜쎝쎞쎟쎠쎡쎢쎣쎤쎥쎦쎧쎨쎩쎪" # noqa: E501
+ "쎫쎬쎭쎮쎯쎰쎱쎲쎳쎴쎵쎶쎷쎸쎹쎺쎻쎼쎽쎾쎿쏀쏁쏂쏃쏄쏅쏆쏇쏈쏉쏊쏋쏌쏍쏎쏏쏐쏑쏒쏓쏔쏕쏖쏗쏘쏙쏚쏛쏜쏝쏞쏟쏠쏡쏢쏣쏤쏥쏦쏧쏨쏩쏪쏫쏬쏭쏮쏯쏰쏱쏲쏳" # noqa: E501
+ "쏴쏵쏶쏷쏸쏹쏺쏻쏼쏽쏾쏿쐀쐁쐂쐃쐄쐅쐆쐇쐈쐉쐊쐋쐌쐍쐎쐏쐐쐑쐒쐓쐔쐕쐖쐗쐘쐙쐚쐛쐜쐝쐞쐟쐠쐡쐢쐣쐤쐥쐦쐧쐨쐩쐪쐫쐬쐭쐮쐯쐰쐱쐲쐳쐴쐵쐶쐷쐸쐹쐺쐻쐼" # noqa: E501
+ "쐽쐾쐿쑀쑁쑂쑃쑄쑅쑆쑇쑈쑉쑊쑋쑌쑍쑎쑏쑐쑑쑒쑓쑔쑕쑖쑗쑘쑙쑚쑛쑜쑝쑞쑟쑠쑡쑢쑣쑤쑥쑦쑧쑨쑩쑪쑫쑬쑭쑮쑯쑰쑱쑲쑳쑴쑵쑶쑷쑸쑹쑺쑻쑼쑽쑾쑿쒀쒁쒂쒃쒄쒅" # noqa: E501
+ "쒆쒇쒈쒉쒊쒋쒌쒍쒎쒏쒐쒑쒒쒓쒔쒕쒖쒗쒘쒙쒚쒛쒜쒝쒞쒟쒠쒡쒢쒣쒤쒥쒦쒧쒨쒩쒪쒫쒬쒭쒮쒯쒰쒱쒲쒳쒴쒵쒶쒷쒸쒹쒺쒻쒼쒽쒾쒿쓀쓁쓂쓃쓄쓅쓆쓇쓈쓉쓊쓋쓌쓍쓎" # noqa: E501
+ "쓏쓐쓑쓒쓓쓔쓕쓖쓗쓘쓙쓚쓛쓜쓝쓞쓟쓠쓡쓢쓣쓤쓥쓦쓧쓨쓩쓪쓫쓬쓭쓮쓯쓰쓱쓲쓳쓴쓵쓶쓷쓸쓹쓺쓻쓼쓽쓾쓿씀씁씂씃씄씅씆씇씈씉씊씋씌씍씎씏씐씑씒씓씔씕씖씗" # noqa: E501
+ "씘씙씚씛씜씝씞씟씠씡씢씣씤씥씦씧씨씩씪씫씬씭씮씯씰씱씲씳씴씵씶씷씸씹씺씻씼씽씾씿앀앁앂앃아악앆앇안앉않앋알앍앎앏앐앑앒앓암압앖앗았앙앚앛앜앝앞앟애" # noqa: E501
+ "액앢앣앤앥앦앧앨앩앪앫앬앭앮앯앰앱앲앳앴앵앶앷앸앹앺앻야약앾앿얀얁얂얃얄얅얆얇얈얉얊얋얌얍얎얏얐양얒얓얔얕얖얗얘얙얚얛얜얝얞얟얠얡얢얣얤얥얦얧얨얩" # noqa: E501
+ "얪얫얬얭얮얯얰얱얲얳어억얶얷언얹얺얻얼얽얾얿엀엁엂엃엄업없엇었엉엊엋엌엍엎엏에엑엒엓엔엕엖엗엘엙엚엛엜엝엞엟엠엡엢엣엤엥엦엧엨엩엪엫여역엮엯연엱엲" # noqa: E501
+ "엳열엵엶엷엸엹엺엻염엽엾엿였영옂옃옄옅옆옇예옉옊옋옌옍옎옏옐옑옒옓옔옕옖옗옘옙옚옛옜옝옞옟옠옡옢옣오옥옦옧온옩옪옫올옭옮옯옰옱옲옳옴옵옶옷옸옹옺옻" # noqa: E501
+ "옼옽옾옿와왁왂왃완왅왆왇왈왉왊왋왌왍왎왏왐왑왒왓왔왕왖왗왘왙왚왛왜왝왞왟왠왡왢왣왤왥왦왧왨왩왪왫왬왭왮왯왰왱왲왳왴왵왶왷외왹왺왻왼왽왾왿욀욁욂욃욄" # noqa: E501
+ "욅욆욇욈욉욊욋욌욍욎욏욐욑욒욓요욕욖욗욘욙욚욛욜욝욞욟욠욡욢욣욤욥욦욧욨용욪욫욬욭욮욯우욱욲욳운욵욶욷울욹욺욻욼욽욾욿움웁웂웃웄웅웆웇웈웉웊웋워웍" # noqa: E501
+ "웎웏원웑웒웓월웕웖웗웘웙웚웛웜웝웞웟웠웡웢웣웤웥웦웧웨웩웪웫웬웭웮웯웰웱웲웳웴웵웶웷웸웹웺웻웼웽웾웿윀윁윂윃위윅윆윇윈윉윊윋윌윍윎윏윐윑윒윓윔윕윖" # noqa: E501
+ "윗윘윙윚윛윜윝윞윟유육윢윣윤윥윦윧율윩윪윫윬윭윮윯윰윱윲윳윴융윶윷윸윹윺윻으윽윾윿은읁읂읃을읅읆읇읈읉읊읋음읍읎읏읐응읒읓읔읕읖읗의읙읚읛읜읝읞읟" # noqa: E501
+ "읠읡읢읣읤읥읦읧읨읩읪읫읬읭읮읯읰읱읲읳이익읶읷인읹읺읻일읽읾읿잀잁잂잃임입잆잇있잉잊잋잌잍잎잏자작잒잓잔잕잖잗잘잙잚잛잜잝잞잟잠잡잢잣잤장잦잧잨" # noqa: E501
+ "잩잪잫재잭잮잯잰잱잲잳잴잵잶잷잸잹잺잻잼잽잾잿쟀쟁쟂쟃쟄쟅쟆쟇쟈쟉쟊쟋쟌쟍쟎쟏쟐쟑쟒쟓쟔쟕쟖쟗쟘쟙쟚쟛쟜쟝쟞쟟쟠쟡쟢쟣쟤쟥쟦쟧쟨쟩쟪쟫쟬쟭쟮쟯쟰쟱" # noqa: E501
+ "쟲쟳쟴쟵쟶쟷쟸쟹쟺쟻쟼쟽쟾쟿저적젂젃전젅젆젇절젉젊젋젌젍젎젏점접젒젓젔정젖젗젘젙젚젛제젝젞젟젠젡젢젣젤젥젦젧젨젩젪젫젬젭젮젯젰젱젲젳젴젵젶젷져젹젺" # noqa: E501
+ "젻젼젽젾젿졀졁졂졃졄졅졆졇졈졉졊졋졌졍졎졏졐졑졒졓졔졕졖졗졘졙졚졛졜졝졞졟졠졡졢졣졤졥졦졧졨졩졪졫졬졭졮졯조족졲졳존졵졶졷졸졹졺졻졼졽졾졿좀좁좂좃" # noqa: E501
+ "좄종좆좇좈좉좊좋좌좍좎좏좐좑좒좓좔좕좖좗좘좙좚좛좜좝좞좟좠좡좢좣좤좥좦좧좨좩좪좫좬좭좮좯좰좱좲좳좴좵좶좷좸좹좺좻좼좽좾좿죀죁죂죃죄죅죆죇죈죉죊죋죌" # noqa: E501
+ "죍죎죏죐죑죒죓죔죕죖죗죘죙죚죛죜죝죞죟죠죡죢죣죤죥죦죧죨죩죪죫죬죭죮죯죰죱죲죳죴죵죶죷죸죹죺죻주죽죾죿준줁줂줃줄줅줆줇줈줉줊줋줌줍줎줏줐중줒줓줔줕" # noqa: E501
+ "줖줗줘줙줚줛줜줝줞줟줠줡줢줣줤줥줦줧줨줩줪줫줬줭줮줯줰줱줲줳줴줵줶줷줸줹줺줻줼줽줾줿쥀쥁쥂쥃쥄쥅쥆쥇쥈쥉쥊쥋쥌쥍쥎쥏쥐쥑쥒쥓쥔쥕쥖쥗쥘쥙쥚쥛쥜쥝쥞" # noqa: E501
+ "쥟쥠쥡쥢쥣쥤쥥쥦쥧쥨쥩쥪쥫쥬쥭쥮쥯쥰쥱쥲쥳쥴쥵쥶쥷쥸쥹쥺쥻쥼쥽쥾쥿즀즁즂즃즄즅즆즇즈즉즊즋즌즍즎즏즐즑즒즓즔즕즖즗즘즙즚즛즜증즞즟즠즡즢즣즤즥즦즧" # noqa: E501
+ "즨즩즪즫즬즭즮즯즰즱즲즳즴즵즶즷즸즹즺즻즼즽즾즿지직짂짃진짅짆짇질짉짊짋짌짍짎짏짐집짒짓짔징짖짗짘짙짚짛짜짝짞짟짠짡짢짣짤짥짦짧짨짩짪짫짬짭짮짯짰" # noqa: E501
+ "짱짲짳짴짵짶짷째짹짺짻짼짽짾짿쨀쨁쨂쨃쨄쨅쨆쨇쨈쨉쨊쨋쨌쨍쨎쨏쨐쨑쨒쨓쨔쨕쨖쨗쨘쨙쨚쨛쨜쨝쨞쨟쨠쨡쨢쨣쨤쨥쨦쨧쨨쨩쨪쨫쨬쨭쨮쨯쨰쨱쨲쨳쨴쨵쨶쨷쨸쨹" # noqa: E501
+ "쨺쨻쨼쨽쨾쨿쩀쩁쩂쩃쩄쩅쩆쩇쩈쩉쩊쩋쩌쩍쩎쩏쩐쩑쩒쩓쩔쩕쩖쩗쩘쩙쩚쩛쩜쩝쩞쩟쩠쩡쩢쩣쩤쩥쩦쩧쩨쩩쩪쩫쩬쩭쩮쩯쩰쩱쩲쩳쩴쩵쩶쩷쩸쩹쩺쩻쩼쩽쩾쩿쪀쪁쪂" # noqa: E501
+ "쪃쪄쪅쪆쪇쪈쪉쪊쪋쪌쪍쪎쪏쪐쪑쪒쪓쪔쪕쪖쪗쪘쪙쪚쪛쪜쪝쪞쪟쪠쪡쪢쪣쪤쪥쪦쪧쪨쪩쪪쪫쪬쪭쪮쪯쪰쪱쪲쪳쪴쪵쪶쪷쪸쪹쪺쪻쪼쪽쪾쪿쫀쫁쫂쫃쫄쫅쫆쫇쫈쫉쫊쫋" # noqa: E501
+ "쫌쫍쫎쫏쫐쫑쫒쫓쫔쫕쫖쫗쫘쫙쫚쫛쫜쫝쫞쫟쫠쫡쫢쫣쫤쫥쫦쫧쫨쫩쫪쫫쫬쫭쫮쫯쫰쫱쫲쫳쫴쫵쫶쫷쫸쫹쫺쫻쫼쫽쫾쫿쬀쬁쬂쬃쬄쬅쬆쬇쬈쬉쬊쬋쬌쬍쬎쬏쬐쬑쬒쬓쬔" # noqa: E501
+ "쬕쬖쬗쬘쬙쬚쬛쬜쬝쬞쬟쬠쬡쬢쬣쬤쬥쬦쬧쬨쬩쬪쬫쬬쬭쬮쬯쬰쬱쬲쬳쬴쬵쬶쬷쬸쬹쬺쬻쬼쬽쬾쬿쭀쭁쭂쭃쭄쭅쭆쭇쭈쭉쭊쭋쭌쭍쭎쭏쭐쭑쭒쭓쭔쭕쭖쭗쭘쭙쭚쭛쭜쭝" # noqa: E501
+ "쭞쭟쭠쭡쭢쭣쭤쭥쭦쭧쭨쭩쭪쭫쭬쭭쭮쭯쭰쭱쭲쭳쭴쭵쭶쭷쭸쭹쭺쭻쭼쭽쭾쭿쮀쮁쮂쮃쮄쮅쮆쮇쮈쮉쮊쮋쮌쮍쮎쮏쮐쮑쮒쮓쮔쮕쮖쮗쮘쮙쮚쮛쮜쮝쮞쮟쮠쮡쮢쮣쮤쮥쮦" # noqa: E501
+ "쮧쮨쮩쮪쮫쮬쮭쮮쮯쮰쮱쮲쮳쮴쮵쮶쮷쮸쮹쮺쮻쮼쮽쮾쮿쯀쯁쯂쯃쯄쯅쯆쯇쯈쯉쯊쯋쯌쯍쯎쯏쯐쯑쯒쯓쯔쯕쯖쯗쯘쯙쯚쯛쯜쯝쯞쯟쯠쯡쯢쯣쯤쯥쯦쯧쯨쯩쯪쯫쯬쯭쯮쯯" # noqa: E501
+ "쯰쯱쯲쯳쯴쯵쯶쯷쯸쯹쯺쯻쯼쯽쯾쯿찀찁찂찃찄찅찆찇찈찉찊찋찌찍찎찏찐찑찒찓찔찕찖찗찘찙찚찛찜찝찞찟찠찡찢찣찤찥찦찧차착찪찫찬찭찮찯찰찱찲찳찴찵찶찷참" # noqa: E501
+ "찹찺찻찼창찾찿챀챁챂챃채책챆챇챈챉챊챋챌챍챎챏챐챑챒챓챔챕챖챗챘챙챚챛챜챝챞챟챠챡챢챣챤챥챦챧챨챩챪챫챬챭챮챯챰챱챲챳챴챵챶챷챸챹챺챻챼챽챾챿첀첁" # noqa: E501
+ "첂첃첄첅첆첇첈첉첊첋첌첍첎첏첐첑첒첓첔첕첖첗처척첚첛천첝첞첟철첡첢첣첤첥첦첧첨첩첪첫첬청첮첯첰첱첲첳체첵첶첷첸첹첺첻첼첽첾첿쳀쳁쳂쳃쳄쳅쳆쳇쳈쳉쳊" # noqa: E501
+ "쳋쳌쳍쳎쳏쳐쳑쳒쳓쳔쳕쳖쳗쳘쳙쳚쳛쳜쳝쳞쳟쳠쳡쳢쳣쳤쳥쳦쳧쳨쳩쳪쳫쳬쳭쳮쳯쳰쳱쳲쳳쳴쳵쳶쳷쳸쳹쳺쳻쳼쳽쳾쳿촀촁촂촃촄촅촆촇초촉촊촋촌촍촎촏촐촑촒촓" # noqa: E501
+ "촔촕촖촗촘촙촚촛촜총촞촟촠촡촢촣촤촥촦촧촨촩촪촫촬촭촮촯촰촱촲촳촴촵촶촷촸촹촺촻촼촽촾촿쵀쵁쵂쵃쵄쵅쵆쵇쵈쵉쵊쵋쵌쵍쵎쵏쵐쵑쵒쵓쵔쵕쵖쵗쵘쵙쵚쵛최" # noqa: E501
+ "쵝쵞쵟쵠쵡쵢쵣쵤쵥쵦쵧쵨쵩쵪쵫쵬쵭쵮쵯쵰쵱쵲쵳쵴쵵쵶쵷쵸쵹쵺쵻쵼쵽쵾쵿춀춁춂춃춄춅춆춇춈춉춊춋춌춍춎춏춐춑춒춓추축춖춗춘춙춚춛출춝춞춟춠춡춢춣춤춥" # noqa: E501
+ "춦춧춨충춪춫춬춭춮춯춰춱춲춳춴춵춶춷춸춹춺춻춼춽춾춿췀췁췂췃췄췅췆췇췈췉췊췋췌췍췎췏췐췑췒췓췔췕췖췗췘췙췚췛췜췝췞췟췠췡췢췣췤췥췦췧취췩췪췫췬췭췮" # noqa: E501
+ "췯췰췱췲췳췴췵췶췷췸췹췺췻췼췽췾췿츀츁츂츃츄츅츆츇츈츉츊츋츌츍츎츏츐츑츒츓츔츕츖츗츘츙츚츛츜츝츞츟츠측츢츣츤츥츦츧츨츩츪츫츬츭츮츯츰츱츲츳츴층츶츷" # noqa: E501
+ "츸츹츺츻츼츽츾츿칀칁칂칃칄칅칆칇칈칉칊칋칌칍칎칏칐칑칒칓칔칕칖칗치칙칚칛친칝칞칟칠칡칢칣칤칥칦칧침칩칪칫칬칭칮칯칰칱칲칳카칵칶칷칸칹칺칻칼칽칾칿캀" # noqa: E501
+ "캁캂캃캄캅캆캇캈캉캊캋캌캍캎캏캐캑캒캓캔캕캖캗캘캙캚캛캜캝캞캟캠캡캢캣캤캥캦캧캨캩캪캫캬캭캮캯캰캱캲캳캴캵캶캷캸캹캺캻캼캽캾캿컀컁컂컃컄컅컆컇컈컉" # noqa: E501
+ "컊컋컌컍컎컏컐컑컒컓컔컕컖컗컘컙컚컛컜컝컞컟컠컡컢컣커컥컦컧컨컩컪컫컬컭컮컯컰컱컲컳컴컵컶컷컸컹컺컻컼컽컾컿케켁켂켃켄켅켆켇켈켉켊켋켌켍켎켏켐켑켒" # noqa: E501
+ "켓켔켕켖켗켘켙켚켛켜켝켞켟켠켡켢켣켤켥켦켧켨켩켪켫켬켭켮켯켰켱켲켳켴켵켶켷켸켹켺켻켼켽켾켿콀콁콂콃콄콅콆콇콈콉콊콋콌콍콎콏콐콑콒콓코콕콖콗콘콙콚콛" # noqa: E501
+ "콜콝콞콟콠콡콢콣콤콥콦콧콨콩콪콫콬콭콮콯콰콱콲콳콴콵콶콷콸콹콺콻콼콽콾콿쾀쾁쾂쾃쾄쾅쾆쾇쾈쾉쾊쾋쾌쾍쾎쾏쾐쾑쾒쾓쾔쾕쾖쾗쾘쾙쾚쾛쾜쾝쾞쾟쾠쾡쾢쾣쾤" # noqa: E501
+ "쾥쾦쾧쾨쾩쾪쾫쾬쾭쾮쾯쾰쾱쾲쾳쾴쾵쾶쾷쾸쾹쾺쾻쾼쾽쾾쾿쿀쿁쿂쿃쿄쿅쿆쿇쿈쿉쿊쿋쿌쿍쿎쿏쿐쿑쿒쿓쿔쿕쿖쿗쿘쿙쿚쿛쿜쿝쿞쿟쿠쿡쿢쿣쿤쿥쿦쿧쿨쿩쿪쿫쿬쿭" # noqa: E501
+ "쿮쿯쿰쿱쿲쿳쿴쿵쿶쿷쿸쿹쿺쿻쿼쿽쿾쿿퀀퀁퀂퀃퀄퀅퀆퀇퀈퀉퀊퀋퀌퀍퀎퀏퀐퀑퀒퀓퀔퀕퀖퀗퀘퀙퀚퀛퀜퀝퀞퀟퀠퀡퀢퀣퀤퀥퀦퀧퀨퀩퀪퀫퀬퀭퀮퀯퀰퀱퀲퀳퀴퀵퀶" # noqa: E501
+ "퀷퀸퀹퀺퀻퀼퀽퀾퀿큀큁큂큃큄큅큆큇큈큉큊큋큌큍큎큏큐큑큒큓큔큕큖큗큘큙큚큛큜큝큞큟큠큡큢큣큤큥큦큧큨큩큪큫크큭큮큯큰큱큲큳클큵큶큷큸큹큺큻큼큽큾큿" # noqa: E501
+ "킀킁킂킃킄킅킆킇킈킉킊킋킌킍킎킏킐킑킒킓킔킕킖킗킘킙킚킛킜킝킞킟킠킡킢킣키킥킦킧킨킩킪킫킬킭킮킯킰킱킲킳킴킵킶킷킸킹킺킻킼킽킾킿타탁탂탃탄탅탆탇탈" # noqa: E501
+ "탉탊탋탌탍탎탏탐탑탒탓탔탕탖탗탘탙탚탛태택탞탟탠탡탢탣탤탥탦탧탨탩탪탫탬탭탮탯탰탱탲탳탴탵탶탷탸탹탺탻탼탽탾탿턀턁턂턃턄턅턆턇턈턉턊턋턌턍턎턏턐턑" # noqa: E501
+ "턒턓턔턕턖턗턘턙턚턛턜턝턞턟턠턡턢턣턤턥턦턧턨턩턪턫턬턭턮턯터턱턲턳턴턵턶턷털턹턺턻턼턽턾턿텀텁텂텃텄텅텆텇텈텉텊텋테텍텎텏텐텑텒텓텔텕텖텗텘텙텚" # noqa: E501
+ "텛템텝텞텟텠텡텢텣텤텥텦텧텨텩텪텫텬텭텮텯텰텱텲텳텴텵텶텷텸텹텺텻텼텽텾텿톀톁톂톃톄톅톆톇톈톉톊톋톌톍톎톏톐톑톒톓톔톕톖톗톘톙톚톛톜톝톞톟토톡톢톣" # noqa: E501
+ "톤톥톦톧톨톩톪톫톬톭톮톯톰톱톲톳톴통톶톷톸톹톺톻톼톽톾톿퇀퇁퇂퇃퇄퇅퇆퇇퇈퇉퇊퇋퇌퇍퇎퇏퇐퇑퇒퇓퇔퇕퇖퇗퇘퇙퇚퇛퇜퇝퇞퇟퇠퇡퇢퇣퇤퇥퇦퇧퇨퇩퇪퇫퇬" # noqa: E501
+ "퇭퇮퇯퇰퇱퇲퇳퇴퇵퇶퇷퇸퇹퇺퇻퇼퇽퇾퇿툀툁툂툃툄툅툆툇툈툉툊툋툌툍툎툏툐툑툒툓툔툕툖툗툘툙툚툛툜툝툞툟툠툡툢툣툤툥툦툧툨툩툪툫투툭툮툯툰툱툲툳툴툵" # noqa: E501
+ "툶툷툸툹툺툻툼툽툾툿퉀퉁퉂퉃퉄퉅퉆퉇퉈퉉퉊퉋퉌퉍퉎퉏퉐퉑퉒퉓퉔퉕퉖퉗퉘퉙퉚퉛퉜퉝퉞퉟퉠퉡퉢퉣퉤퉥퉦퉧퉨퉩퉪퉫퉬퉭퉮퉯퉰퉱퉲퉳퉴퉵퉶퉷퉸퉹퉺퉻퉼퉽퉾" # noqa: E501
+ "퉿튀튁튂튃튄튅튆튇튈튉튊튋튌튍튎튏튐튑튒튓튔튕튖튗튘튙튚튛튜튝튞튟튠튡튢튣튤튥튦튧튨튩튪튫튬튭튮튯튰튱튲튳튴튵튶튷트특튺튻튼튽튾튿틀틁틂틃틄틅틆틇" # noqa: E501
+ "틈틉틊틋틌틍틎틏틐틑틒틓틔틕틖틗틘틙틚틛틜틝틞틟틠틡틢틣틤틥틦틧틨틩틪틫틬틭틮틯티틱틲틳틴틵틶틷틸틹틺틻틼틽틾틿팀팁팂팃팄팅팆팇팈팉팊팋파팍팎팏판" # noqa: E501
+ "팑팒팓팔팕팖팗팘팙팚팛팜팝팞팟팠팡팢팣팤팥팦팧패팩팪팫팬팭팮팯팰팱팲팳팴팵팶팷팸팹팺팻팼팽팾팿퍀퍁퍂퍃퍄퍅퍆퍇퍈퍉퍊퍋퍌퍍퍎퍏퍐퍑퍒퍓퍔퍕퍖퍗퍘퍙" # noqa: E501
+ "퍚퍛퍜퍝퍞퍟퍠퍡퍢퍣퍤퍥퍦퍧퍨퍩퍪퍫퍬퍭퍮퍯퍰퍱퍲퍳퍴퍵퍶퍷퍸퍹퍺퍻퍼퍽퍾퍿펀펁펂펃펄펅펆펇펈펉펊펋펌펍펎펏펐펑펒펓펔펕펖펗페펙펚펛펜펝펞펟펠펡펢" # noqa: E501
+ "펣펤펥펦펧펨펩펪펫펬펭펮펯펰펱펲펳펴펵펶펷편펹펺펻펼펽펾펿폀폁폂폃폄폅폆폇폈평폊폋폌폍폎폏폐폑폒폓폔폕폖폗폘폙폚폛폜폝폞폟폠폡폢폣폤폥폦폧폨폩폪폫" # noqa: E501
+ "포폭폮폯폰폱폲폳폴폵폶폷폸폹폺폻폼폽폾폿퐀퐁퐂퐃퐄퐅퐆퐇퐈퐉퐊퐋퐌퐍퐎퐏퐐퐑퐒퐓퐔퐕퐖퐗퐘퐙퐚퐛퐜퐝퐞퐟퐠퐡퐢퐣퐤퐥퐦퐧퐨퐩퐪퐫퐬퐭퐮퐯퐰퐱퐲퐳퐴" # noqa: E501
+ "퐵퐶퐷퐸퐹퐺퐻퐼퐽퐾퐿푀푁푂푃푄푅푆푇푈푉푊푋푌푍푎푏푐푑푒푓푔푕푖푗푘푙푚푛표푝푞푟푠푡푢푣푤푥푦푧푨푩푪푫푬푭푮푯푰푱푲푳푴푵푶푷푸푹푺푻푼푽" # noqa: E501
+ "푾푿풀풁풂풃풄풅풆풇품풉풊풋풌풍풎풏풐풑풒풓풔풕풖풗풘풙풚풛풜풝풞풟풠풡풢풣풤풥풦풧풨풩풪풫풬풭풮풯풰풱풲풳풴풵풶풷풸풹풺풻풼풽풾풿퓀퓁퓂퓃퓄퓅퓆" # noqa: E501
+ "퓇퓈퓉퓊퓋퓌퓍퓎퓏퓐퓑퓒퓓퓔퓕퓖퓗퓘퓙퓚퓛퓜퓝퓞퓟퓠퓡퓢퓣퓤퓥퓦퓧퓨퓩퓪퓫퓬퓭퓮퓯퓰퓱퓲퓳퓴퓵퓶퓷퓸퓹퓺퓻퓼퓽퓾퓿픀픁픂픃프픅픆픇픈픉픊픋플픍픎픏" # noqa: E501
+ "픐픑픒픓픔픕픖픗픘픙픚픛픜픝픞픟픠픡픢픣픤픥픦픧픨픩픪픫픬픭픮픯픰픱픲픳픴픵픶픷픸픹픺픻피픽픾픿핀핁핂핃필핅핆핇핈핉핊핋핌핍핎핏핐핑핒핓핔핕핖핗하" # noqa: E501
+ "학핚핛한핝핞핟할핡핢핣핤핥핦핧함합핪핫핬항핮핯핰핱핲핳해핵핶핷핸핹핺핻핼핽핾핿햀햁햂햃햄햅햆햇했행햊햋햌햍햎햏햐햑햒햓햔햕햖햗햘햙햚햛햜햝햞햟햠햡" # noqa: E501
+ "햢햣햤향햦햧햨햩햪햫햬햭햮햯햰햱햲햳햴햵햶햷햸햹햺햻햼햽햾햿헀헁헂헃헄헅헆헇허헉헊헋헌헍헎헏헐헑헒헓헔헕헖헗험헙헚헛헜헝헞헟헠헡헢헣헤헥헦헧헨헩헪" # noqa: E501
+ "헫헬헭헮헯헰헱헲헳헴헵헶헷헸헹헺헻헼헽헾헿혀혁혂혃현혅혆혇혈혉혊혋혌혍혎혏혐협혒혓혔형혖혗혘혙혚혛혜혝혞혟혠혡혢혣혤혥혦혧혨혩혪혫혬혭혮혯혰혱혲혳" # noqa: E501
+ "혴혵혶혷호혹혺혻혼혽혾혿홀홁홂홃홄홅홆홇홈홉홊홋홌홍홎홏홐홑홒홓화확홖홗환홙홚홛활홝홞홟홠홡홢홣홤홥홦홧홨황홪홫홬홭홮홯홰홱홲홳홴홵홶홷홸홹홺홻홼" # noqa: E501
+ "홽홾홿횀횁횂횃횄횅횆횇횈횉횊횋회획횎횏횐횑횒횓횔횕횖횗횘횙횚횛횜횝횞횟횠횡횢횣횤횥횦횧효횩횪횫횬횭횮횯횰횱횲횳횴횵횶횷횸횹횺횻횼횽횾횿훀훁훂훃후훅" # noqa: E501
+ "훆훇훈훉훊훋훌훍훎훏훐훑훒훓훔훕훖훗훘훙훚훛훜훝훞훟훠훡훢훣훤훥훦훧훨훩훪훫훬훭훮훯훰훱훲훳훴훵훶훷훸훹훺훻훼훽훾훿휀휁휂휃휄휅휆휇휈휉휊휋휌휍휎" # noqa: E501
+ "휏휐휑휒휓휔휕휖휗휘휙휚휛휜휝휞휟휠휡휢휣휤휥휦휧휨휩휪휫휬휭휮휯휰휱휲휳휴휵휶휷휸휹휺휻휼휽휾휿흀흁흂흃흄흅흆흇흈흉흊흋흌흍흎흏흐흑흒흓흔흕흖흗" # noqa: E501
+ "흘흙흚흛흜흝흞흟흠흡흢흣흤흥흦흧흨흩흪흫희흭흮흯흰흱흲흳흴흵흶흷흸흹흺흻흼흽흾흿힀힁힂힃힄힅힆힇히힉힊힋힌힍힎힏힐힑힒힓힔힕힖힗힘힙힚힛힜힝힞힟힠" # noqa: E501
+ "힡힢힣"
+ _BASE_VOCABS["punctuation"]
+ "。・〜°—、「」『』【】゛》《〉〈" # punctuation
+ _BASE_VOCABS["currency"]
+ "₩"
)
VOCABS["simplified_chinese"] = (
_BASE_VOCABS["digits"]
+ "㐀㐁㐂㐃㐄㐅㐆㐇㐈㐉㐊㐋㐌㐍㐎㐏㐐㐑㐒㐓㐔㐕㐖㐗㐘㐙㐚㐛㐜㐝㐞㐟㐠㐡㐢㐣㐤㐥㐦㐧㐨㐩㐪㐫㐬㐭㐮㐯㐰㐱㐲㐳㐴㐵㐶㐷㐸㐹㐺㐻㐼㐽㐾㐿㑀㑁㑂" # noqa: E501
+ "㑄㑅㑆㑇㑈㑉㑊㑋㑌㑍㑎㑏㑐㑑㑒㑓㑔㑕㑖㑗㑘㑙㑚㑛㑜㑝㑞㑟㑠㑡㑢㑣㑤㑥㑦㑧㑨㑩㑪㑫㑬㑭㑮㑯㑰㑱㑲㑳㑴㑵㑶㑷㑸㑹㑺㑻㑼㑽㑾㑿㒀㒁㒂㒃㒄㒅㒆" # noqa: E501
+ "㒇㒈㒉㒊㒋㒌㒍㒎㒏㒐㒑㒒㒓㒔㒕㒖㒗㒘㒙㒚㒛㒜㒝㒞㒟㒠㒡㒢㒣㒤㒥㒦㒧㒨㒩㒪㒫㒬㒭㒮㒯㒰㒱㒲㒳㒴㒵㒶㒷㒸㒹㒺㒻㒼㒽㒾㒿㓀㓁㓂㓃㓄㓅㓆㓇㓈㓉" # noqa: E501
+ "㓊㓋㓌㓍㓎㓏㓐㓑㓒㓓㓔㓕㓖㓗㓘㓙㓚㓛㓜㓝㓞㓟㓠㓡㓢㓣㓤㓥㓦㓧㓨㓩㓪㓫㓬㓭㓮㓯㓰㓱㓲㓳㓴㓵㓶㓷㓸㓹㓺㓻㓼㓽㓾㓿㔀㔁㔂㔃㔄㔅㔆㔇㔈㔉㔊㔋㔌" # noqa: E501
+ "㔍㔎㔏㔐㔑㔒㔓㔔㔕㔖㔗㔘㔙㔚㔛㔜㔝㔞㔟㔠㔡㔢㔣㔤㔥㔦㔧㔨㔩㔪㔫㔬㔭㔮㔯㔰㔱㔲㔳㔴㔵㔶㔷㔸㔹㔺㔻㔼㔽㔾㔿㕀㕁㕂㕃㕄㕅㕆㕇㕈㕉㕊㕋㕌㕍㕎㕏" # noqa: E501
+ "㕐㕑㕒㕓㕔㕕㕖㕗㕘㕙㕚㕛㕜㕝㕞㕟㕠㕡㕢㕣㕤㕥㕦㕧㕨㕩㕪㕫㕬㕭㕮㕯㕰㕱㕲㕳㕴㕵㕶㕷㕸㕹㕺㕻㕼㕽㕾㕿㖀㖁㖂㖃㖄㖅㖆㖇㖈㖉㖊㖋㖌㖍㖎㖏㖐㖑㖒" # noqa: E501
+ "㖓㖔㖕㖖㖗㖘㖙㖚㖛㖜㖝㖞㖟㖠㖡㖢㖣㖤㖥㖦㖧㖨㖩㖪㖫㖬㖭㖮㖯㖰㖱㖲㖳㖴㖵㖶㖷㖸㖹㖺㖻㖼㖽㖾㖿㗀㗁㗂㗃㗄㗅㗆㗇㗈㗉㗊㗋㗌㗍㗎㗏㗐㗑㗒㗓㗔㗕" # noqa: E501
+ "㗖㗗㗘㗙㗚㗛㗜㗝㗞㗟㗠㗡㗢㗣㗤㗥㗦㗧㗨㗩㗪㗫㗬㗭㗮㗯㗰㗱㗲㗳㗴㗵㗶㗷㗸㗹㗺㗻㗼㗽㗾㗿㘀㘁㘂㘃㘄㘅㘆㘇㘈㘉㘊㘋㘌㘍㘎㘏㘐㘑㘒㘓㘔㘕㘖㘗㘘" # noqa: E501
+ "㘙㘚㘛㘜㘝㘞㘟㘠㘡㘢㘣㘤㘥㘦㘧㘨㘩㘪㘫㘬㘭㘮㘯㘰㘱㘲㘳㘴㘵㘶㘷㘸㘹㘺㘻㘼㘽㘾㘿㙀㙁㙂㙃㙄㙅㙆㙇㙈㙉㙊㙋㙌㙍㙎㙏㙐㙑㙒㙓㙔㙕㙖㙗㙘㙙㙚㙛" # noqa: E501
+ "㙜㙝㙞㙟㙠㙡㙢㙣㙤㙥㙦㙧㙨㙩㙪㙫㙬㙭㙮㙯㙰㙱㙲㙳㙴㙵㙶㙷㙸㙹㙺㙻㙼㙽㙾㙿㚀㚁㚂㚃㚄㚅㚆㚇㚈㚉㚊㚋㚌㚍㚎㚏㚐㚑㚒㚓㚔㚕㚖㚗㚘㚙㚚㚛㚜㚝㚞" # noqa: E501
+ "㚟㚠㚡㚢㚣㚤㚥㚦㚧㚨㚩㚪㚫㚬㚭㚮㚯㚰㚱㚲㚳㚴㚵㚶㚷㚸㚹㚺㚻㚼㚽㚾㚿㛀㛁㛂㛃㛄㛅㛆㛇㛈㛉㛊㛋㛌㛍㛎㛏㛐㛑㛒㛓㛔㛕㛖㛗㛘㛙㛚㛛㛜㛝㛞㛟㛠㛡" # noqa: E501
+ "㛢㛣㛤㛥㛦㛧㛨㛩㛪㛫㛬㛭㛮㛯㛰㛱㛲㛳㛴㛵㛶㛷㛸㛹㛺㛻㛼㛽㛾㛿㜀㜁㜂㜃㜄㜅㜆㜇㜈㜉㜊㜋㜌㜍㜎㜏㜐㜑㜒㜓㜔㜕㜖㜗㜘㜙㜚㜛㜜㜝㜞㜟㜠㜡㜢㜣㜤" # noqa: E501
+ "㜥㜦㜧㜨㜩㜪㜫㜬㜭㜮㜯㜰㜱㜲㜳㜴㜵㜶㜷㜸㜹㜺㜻㜼㜽㜾㜿㝀㝁㝂㝃㝄㝅㝆㝇㝈㝉㝊㝋㝌㝍㝎㝏㝐㝑㝒㝓㝔㝕㝖㝗㝘㝙㝚㝛㝜㝝㝞㝟㝠㝡㝢㝣㝤㝥㝦㝧" # noqa: E501
+ "㝨㝩㝪㝫㝬㝭㝮㝯㝰㝱㝲㝳㝴㝵㝶㝷㝸㝹㝺㝻㝼㝽㝾㝿㞀㞁㞂㞃㞄㞅㞆㞇㞈㞉㞊㞋㞌㞍㞎㞏㞐㞑㞒㞓㞔㞕㞖㞗㞘㞙㞚㞛㞜㞝㞞㞟㞠㞡㞢㞣㞤㞥㞦㞧㞨㞩㞪" # noqa: E501
+ "㞫㞬㞭㞮㞯㞰㞱㞲㞳㞴㞵㞶㞷㞸㞹㞺㞻㞼㞽㞾㞿㟀㟁㟂㟃㟄㟅㟆㟇㟈㟉㟊㟋㟌㟍㟎㟏㟐㟑㟒㟓㟔㟕㟖㟗㟘㟙㟚㟛㟜㟝㟞㟟㟠㟡㟢㟣㟤㟥㟦㟧㟨㟩㟪㟫㟬㟭" # noqa: E501
+ "㟮㟯㟰㟱㟲㟳㟴㟵㟶㟷㟸㟹㟺㟻㟼㟽㟾㟿㠀㠁㠂㠃㠄㠅㠆㠇㠈㠉㠊㠋㠌㠍㠎㠏㠐㠑㠒㠓㠔㠕㠖㠗㠘㠙㠚㠛㠜㠝㠞㠟㠠㠡㠢㠣㠤㠥㠦㠧㠨㠩㠪㠫㠬㠭㠮㠯㠰" # noqa: E501
+ "㠱㠲㠳㠴㠵㠶㠷㠸㠹㠺㠻㠼㠽㠾㠿㡀㡁㡂㡃㡄㡅㡆㡇㡈㡉㡊㡋㡌㡍㡎㡏㡐㡑㡒㡓㡔㡕㡖㡗㡘㡙㡚㡛㡜㡝㡞㡟㡠㡡㡢㡣㡤㡥㡦㡧㡨㡩㡪㡫㡬㡭㡮㡯㡰㡱㡲㡳" # noqa: E501
+ "㡴㡵㡶㡷㡸㡹㡺㡻㡼㡽㡾㡿㢀㢁㢂㢃㢄㢅㢆㢇㢈㢉㢊㢋㢌㢍㢎㢏㢐㢑㢒㢓㢔㢕㢖㢗㢘㢙㢚㢛㢜㢝㢞㢟㢠㢡㢢㢣㢤㢥㢦㢧㢨㢩㢪㢫㢬㢭㢮㢯㢰㢱㢲㢳㢴㢵㢶" # noqa: E501
+ "㢷㢸㢹㢺㢻㢼㢽㢾㢿㣀㣁㣂㣃㣄㣅㣆㣇㣈㣉㣊㣋㣌㣍㣎㣏㣐㣑㣒㣓㣔㣕㣖㣗㣘㣙㣚㣛㣜㣝㣞㣟㣠㣡㣢㣣㣤㣥㣦㣧㣨㣩㣪㣫㣬㣭㣮㣯㣰㣱㣲㣳㣴㣵㣶㣷㣸㣹" # noqa: E501
+ "㣺㣻㣼㣽㣾㣿㤀㤁㤂㤃㤄㤅㤆㤇㤈㤉㤊㤋㤌㤍㤎㤏㤐㤑㤒㤓㤔㤕㤖㤗㤘㤙㤚㤛㤜㤝㤞㤟㤠㤡㤢㤣㤤㤥㤦㤧㤨㤩㤪㤫㤬㤭㤮㤯㤰㤱㤲㤳㤴㤵㤶㤷㤸㤹㤺㤻㤼" # noqa: E501
+ "㤽㤾㤿㥀㥁㥂㥃㥄㥅㥆㥇㥈㥉㥊㥋㥌㥍㥎㥏㥐㥑㥒㥓㥔㥕㥖㥗㥘㥙㥚㥛㥜㥝㥞㥟㥠㥡㥢㥣㥤㥥㥦㥧㥨㥩㥪㥫㥬㥭㥮㥯㥰㥱㥲㥳㥴㥵㥶㥷㥸㥹㥺㥻㥼㥽㥾㥿" # noqa: E501
+ "㦀㦁㦂㦃㦄㦅㦆㦇㦈㦉㦊㦋㦌㦍㦎㦏㦐㦑㦒㦓㦔㦕㦖㦗㦘㦙㦚㦛㦜㦝㦞㦟㦠㦡㦢㦣㦤㦥㦦㦧㦨㦩㦪㦫㦬㦭㦮㦯㦰㦱㦲㦳㦴㦵㦶㦷㦸㦹㦺㦻㦼㦽㦾㦿㧀㧁㧂" # noqa: E501
+ "㧃㧄㧅㧆㧇㧈㧉㧊㧋㧌㧍㧎㧏㧐㧑㧒㧓㧔㧕㧖㧗㧘㧙㧚㧛㧜㧝㧞㧟㧠㧡㧢㧣㧤㧥㧦㧧㧨㧩㧪㧫㧬㧭㧮㧯㧰㧱㧲㧳㧴㧵㧶㧷㧸㧹㧺㧻㧼㧽㧾㧿㨀㨁㨂㨃㨄㨅" # noqa: E501
+ "㨆㨇㨈㨉㨊㨋㨌㨍㨎㨏㨐㨑㨒㨓㨔㨕㨖㨗㨘㨙㨚㨛㨜㨝㨞㨟㨠㨡㨢㨣㨤㨥㨦㨧㨨㨩㨪㨫㨬㨭㨮㨯㨰㨱㨲㨳㨴㨵㨶㨷㨸㨹㨺㨻㨼㨽㨾㨿㩀㩁㩂㩃㩄㩅㩆㩇㩈" # noqa: E501
+ "㩉㩊㩋㩌㩍㩎㩏㩐㩑㩒㩓㩔㩕㩖㩗㩘㩙㩚㩛㩜㩝㩞㩟㩠㩡㩢㩣㩤㩥㩦㩧㩨㩩㩪㩫㩬㩭㩮㩯㩰㩱㩲㩳㩴㩵㩶㩷㩸㩹㩺㩻㩼㩽㩾㩿㪀㪁㪂㪃㪄㪅㪆㪇㪈㪉㪊㪋" # noqa: E501
+ "㪌㪍㪎㪏㪐㪑㪒㪓㪔㪕㪖㪗㪘㪙㪚㪛㪜㪝㪞㪟㪠㪡㪢㪣㪤㪥㪦㪧㪨㪩㪪㪫㪬㪭㪮㪯㪰㪱㪲㪳㪴㪵㪶㪷㪸㪹㪺㪻㪼㪽㪾㪿㫀㫁㫂㫃㫄㫅㫆㫇㫈㫉㫊㫋㫌㫍㫎" # noqa: E501
+ "㫏㫐㫑㫒㫓㫔㫕㫖㫗㫘㫙㫚㫛㫜㫝㫞㫟㫠㫡㫢㫣㫤㫥㫦㫧㫨㫩㫪㫫㫬㫭㫮㫯㫰㫱㫲㫳㫴㫵㫶㫷㫸㫹㫺㫻㫼㫽㫾㫿㬀㬁㬂㬃㬄㬅㬆㬇㬈㬉㬊㬋㬌㬍㬎㬏㬐㬑" # noqa: E501
+ "㬒㬓㬔㬕㬖㬗㬘㬙㬚㬛㬜㬝㬞㬟㬠㬡㬢㬣㬤㬥㬦㬧㬨㬩㬪㬫㬬㬭㬮㬯㬰㬱㬲㬳㬴㬵㬶㬷㬸㬹㬺㬻㬼㬽㬾㬿㭀㭁㭂㭃㭄㭅㭆㭇㭈㭉㭊㭋㭌㭍㭎㭏㭐㭑㭒㭓㭔" # noqa: E501
+ "㭕㭖㭗㭘㭙㭚㭛㭜㭝㭞㭟㭠㭡㭢㭣㭤㭥㭦㭧㭨㭩㭪㭫㭬㭭㭮㭯㭰㭱㭲㭳㭴㭵㭶㭷㭸㭹㭺㭻㭼㭽㭾㭿㮀㮁㮂㮃㮄㮅㮆㮇㮈㮉㮊㮋㮌㮍㮎㮏㮐㮑㮒㮓㮔㮕㮖㮗" # noqa: E501
+ "㮘㮙㮚㮛㮜㮝㮞㮟㮠㮡㮢㮣㮤㮥㮦㮧㮨㮩㮪㮫㮬㮭㮮㮯㮰㮱㮲㮳㮴㮵㮶㮷㮸㮹㮺㮻㮼㮽㮾㮿㯀㯁㯂㯃㯄㯅㯆㯇㯈㯉㯊㯋㯌㯍㯎㯏㯐㯑㯒㯓㯔㯕㯖㯗㯘㯙㯚" # noqa: E501
+ "㯛㯜㯝㯞㯟㯠㯡㯢㯣㯤㯥㯦㯧㯨㯩㯪㯫㯬㯭㯮㯯㯰㯱㯲㯳㯴㯵㯶㯷㯸㯹㯺㯻㯼㯽㯾㯿㰀㰁㰂㰃㰄㰅㰆㰇㰈㰉㰊㰋㰌㰍㰎㰏㰐㰑㰒㰓㰔㰕㰖㰗㰘㰙㰚㰛㰜㰝" # noqa: E501
+ "㰞㰟㰠㰡㰢㰣㰤㰥㰦㰧㰨㰩㰪㰫㰬㰭㰮㰯㰰㰱㰲㰳㰴㰵㰶㰷㰸㰹㰺㰻㰼㰽㰾㰿㱀㱁㱂㱃㱄㱅㱆㱇㱈㱉㱊㱋㱌㱍㱎㱏㱐㱑㱒㱓㱔㱕㱖㱗㱘㱙㱚㱛㱜㱝㱞㱟㱠" # noqa: E501
+ "㱡㱢㱣㱤㱥㱦㱧㱨㱩㱪㱫㱬㱭㱮㱯㱰㱱㱲㱳㱴㱵㱶㱷㱸㱹㱺㱻㱼㱽㱾㱿㲀㲁㲂㲃㲄㲅㲆㲇㲈㲉㲊㲋㲌㲍㲎㲏㲐㲑㲒㲓㲔㲕㲖㲗㲘㲙㲚㲛㲜㲝㲞㲟㲠㲡㲢㲣" # noqa: E501
+ "㲤㲥㲦㲧㲨㲩㲪㲫㲬㲭㲮㲯㲰㲱㲲㲳㲴㲵㲶㲷㲸㲹㲺㲻㲼㲽㲾㲿㳀㳁㳂㳃㳄㳅㳆㳇㳈㳉㳊㳋㳌㳍㳎㳏㳐㳑㳒㳓㳔㳕㳖㳗㳘㳙㳚㳛㳜㳝㳞㳟㳠㳡㳢㳣㳤㳥㳦" # noqa: E501
+ "㳧㳨㳩㳪㳫㳬㳭㳮㳯㳰㳱㳲㳳㳴㳵㳶㳷㳸㳹㳺㳻㳼㳽㳾㳿㴀㴁㴂㴃㴄㴅㴆㴇㴈㴉㴊㴋㴌㴍㴎㴏㴐㴑㴒㴓㴔㴕㴖㴗㴘㴙㴚㴛㴜㴝㴞㴟㴠㴡㴢㴣㴤㴥㴦㴧㴨㴩" # noqa: E501
+ "㴪㴫㴬㴭㴮㴯㴰㴱㴲㴳㴴㴵㴶㴷㴸㴹㴺㴻㴼㴽㴾㴿㵀㵁㵂㵃㵄㵅㵆㵇㵈㵉㵊㵋㵌㵍㵎㵏㵐㵑㵒㵓㵔㵕㵖㵗㵘㵙㵚㵛㵜㵝㵞㵟㵠㵡㵢㵣㵤㵥㵦㵧㵨㵩㵪㵫㵬" # noqa: E501
+ "㵭㵮㵯㵰㵱㵲㵳㵴㵵㵶㵷㵸㵹㵺㵻㵼㵽㵾㵿㶀㶁㶂㶃㶄㶅㶆㶇㶈㶉㶊㶋㶌㶍㶎㶏㶐㶑㶒㶓㶔㶕㶖㶗㶘㶙㶚㶛㶜㶝㶞㶟㶠㶡㶢㶣㶤㶥㶦㶧㶨㶩㶪㶫㶬㶭㶮㶯" # noqa: E501
+ "㶰㶱㶲㶳㶴㶵㶶㶷㶸㶹㶺㶻㶼㶽㶾㶿㷀㷁㷂㷃㷄㷅㷆㷇㷈㷉㷊㷋㷌㷍㷎㷏㷐㷑㷒㷓㷔㷕㷖㷗㷘㷙㷚㷛㷜㷝㷞㷟㷠㷡㷢㷣㷤㷥㷦㷧㷨㷩㷪㷫㷬㷭㷮㷯㷰㷱㷲" # noqa: E501
+ "㷳㷴㷵㷶㷷㷸㷹㷺㷻㷼㷽㷾㷿㸀㸁㸂㸃㸄㸅㸆㸇㸈㸉㸊㸋㸌㸍㸎㸏㸐㸑㸒㸓㸔㸕㸖㸗㸘㸙㸚㸛㸜㸝㸞㸟㸠㸡㸢㸣㸤㸥㸦㸧㸨㸩㸪㸫㸬㸭㸮㸯㸰㸱㸲㸳㸴㸵" # noqa: E501
+ "㸶㸷㸸㸹㸺㸻㸼㸽㸾㸿㹀㹁㹂㹃㹄㹅㹆㹇㹈㹉㹊㹋㹌㹍㹎㹏㹐㹑㹒㹓㹔㹕㹖㹗㹘㹙㹚㹛㹜㹝㹞㹟㹠㹡㹢㹣㹤㹥㹦㹧㹨㹩㹪㹫㹬㹭㹮㹯㹰㹱㹲㹳㹴㹵㹶㹷㹸" # noqa: E501
+ "㹹㹺㹻㹼㹽㹾㹿㺀㺁㺂㺃㺄㺅㺆㺇㺈㺉㺊㺋㺌㺍㺎㺏㺐㺑㺒㺓㺔㺕㺖㺗㺘㺙㺚㺛㺜㺝㺞㺟㺠㺡㺢㺣㺤㺥㺦㺧㺨㺩㺪㺫㺬㺭㺮㺯㺰㺱㺲㺳㺴㺵㺶㺷㺸㺹㺺㺻" # noqa: E501
+ "㺼㺽㺾㺿㻀㻁㻂㻃㻄㻅㻆㻇㻈㻉㻊㻋㻌㻍㻎㻏㻐㻑㻒㻓㻔㻕㻖㻗㻘㻙㻚㻛㻜㻝㻞㻟㻠㻡㻢㻣㻤㻥㻦㻧㻨㻩㻪㻫㻬㻭㻮㻯㻰㻱㻲㻳㻴㻵㻶㻷㻸㻹㻺㻻㻼㻽㻾" # noqa: E501
+ "㻿㼀㼁㼂㼃㼄㼅㼆㼇㼈㼉㼊㼋㼌㼍㼎㼏㼐㼑㼒㼓㼔㼕㼖㼗㼘㼙㼚㼛㼜㼝㼞㼟㼠㼡㼢㼣㼤㼥㼦㼧㼨㼩㼪㼫㼬㼭㼮㼯㼰㼱㼲㼳㼴㼵㼶㼷㼸㼹㼺㼻㼼㼽㼾㼿㽀㽁" # noqa: E501
+ "㽂㽃㽄㽅㽆㽇㽈㽉㽊㽋㽌㽍㽎㽏㽐㽑㽒㽓㽔㽕㽖㽗㽘㽙㽚㽛㽜㽝㽞㽟㽠㽡㽢㽣㽤㽥㽦㽧㽨㽩㽪㽫㽬㽭㽮㽯㽰㽱㽲㽳㽴㽵㽶㽷㽸㽹㽺㽻㽼㽽㽾㽿㾀㾁㾂㾃㾄" # noqa: E501
+ "㾅㾆㾇㾈㾉㾊㾋㾌㾍㾎㾏㾐㾑㾒㾓㾔㾕㾖㾗㾘㾙㾚㾛㾜㾝㾞㾟㾠㾡㾢㾣㾤㾥㾦㾧㾨㾩㾪㾫㾬㾭㾮㾯㾰㾱㾲㾳㾴㾵㾶㾷㾸㾹㾺㾻㾼㾽㾾㾿㿀㿁㿂㿃㿄㿅㿆㿇" # noqa: E501
+ "㿈㿉㿊㿋㿌㿍㿎㿏㿐㿑㿒㿓㿔㿕㿖㿗㿘㿙㿚㿛㿜㿝㿞㿟㿠㿡㿢㿣㿤㿥㿦㿧㿨㿩㿪㿫㿬㿭㿮㿯㿰㿱㿲㿳㿴㿵㿶㿷㿸㿹㿺㿻㿼㿽㿾㿿䀀䀁䀂䀃䀄䀅䀆䀇䀈䀉䀊" # noqa: E501
+ "䀋䀌䀍䀎䀏䀐䀑䀒䀓䀔䀕䀖䀗䀘䀙䀚䀛䀜䀝䀞䀟䀠䀡䀢䀣䀤䀥䀦䀧䀨䀩䀪䀫䀬䀭䀮䀯䀰䀱䀲䀳䀴䀵䀶䀷䀸䀹䀺䀻䀼䀽䀾䀿䁀䁁䁂䁃䁄䁅䁆䁇䁈䁉䁊䁋䁌䁍" # noqa: E501
+ "䁎䁏䁐䁑䁒䁓䁔䁕䁖䁗䁘䁙䁚䁛䁜䁝䁞䁟䁠䁡䁢䁣䁤䁥䁦䁧䁨䁩䁪䁫䁬䁭䁮䁯䁰䁱䁲䁳䁴䁵䁶䁷䁸䁹䁺䁻䁼䁽䁾䁿䂀䂁䂂䂃䂄䂅䂆䂇䂈䂉䂊䂋䂌䂍䂎䂏䂐" # noqa: E501
+ "䂑䂒䂓䂔䂕䂖䂗䂘䂙䂚䂛䂜䂝䂞䂟䂠䂡䂢䂣䂤䂥䂦䂧䂨䂩䂪䂫䂬䂭䂮䂯䂰䂱䂲䂳䂴䂵䂶䂷䂸䂹䂺䂻䂼䂽䂾䂿䃀䃁䃂䃃䃄䃅䃆䃇䃈䃉䃊䃋䃌䃍䃎䃏䃐䃑䃒䃓" # noqa: E501
+ "䃔䃕䃖䃗䃘䃙䃚䃛䃜䃝䃞䃟䃠䃡䃢䃣䃤䃥䃦䃧䃨䃩䃪䃫䃬䃭䃮䃯䃰䃱䃲䃳䃴䃵䃶䃷䃸䃹䃺䃻䃼䃽䃾䃿䄀䄁䄂䄃䄄䄅䄆䄇䄈䄉䄊䄋䄌䄍䄎䄏䄐䄑䄒䄓䄔䄕䄖" # noqa: E501
+ "䄗䄘䄙䄚䄛䄜䄝䄞䄟䄠䄡䄢䄣䄤䄥䄦䄧䄨䄩䄪䄫䄬䄭䄮䄯䄰䄱䄲䄳䄴䄵䄶䄷䄸䄹䄺䄻䄼䄽䄾䄿䅀䅁䅂䅃䅄䅅䅆䅇䅈䅉䅊䅋䅌䅍䅎䅏䅐䅑䅒䅓䅔䅕䅖䅗䅘䅙" # noqa: E501
+ "䅚䅛䅜䅝䅞䅟䅠䅡䅢䅣䅤䅥䅦䅧䅨䅩䅪䅫䅬䅭䅮䅯䅰䅱䅲䅳䅴䅵䅶䅷䅸䅹䅺䅻䅼䅽䅾䅿䆀䆁䆂䆃䆄䆅䆆䆇䆈䆉䆊䆋䆌䆍䆎䆏䆐䆑䆒䆓䆔䆕䆖䆗䆘䆙䆚䆛䆜" # noqa: E501
+ "䆝䆞䆟䆠䆡䆢䆣䆤䆥䆦䆧䆨䆩䆪䆫䆬䆭䆮䆯䆰䆱䆲䆳䆴䆵䆶䆷䆸䆹䆺䆻䆼䆽䆾䆿䇀䇁䇂䇃䇄䇅䇆䇇䇈䇉䇊䇋䇌䇍䇎䇏䇐䇑䇒䇓䇔䇕䇖䇗䇘䇙䇚䇛䇜䇝䇞䇟" # noqa: E501
+ "䇠䇡䇢䇣䇤䇥䇦䇧䇨䇩䇪䇫䇬䇭䇮䇯䇰䇱䇲䇳䇴䇵䇶䇷䇸䇹䇺䇻䇼䇽䇾䇿䈀䈁䈂䈃䈄䈅䈆䈇䈈䈉䈊䈋䈌䈍䈎䈏䈐䈑䈒䈓䈔䈕䈖䈗䈘䈙䈚䈛䈜䈝䈞䈟䈠䈡䈢" # noqa: E501
+ "䈣䈤䈥䈦䈧䈨䈩䈪䈫䈬䈭䈮䈯䈰䈱䈲䈳䈴䈵䈶䈷䈸䈹䈺䈻䈼䈽䈾䈿䉀䉁䉂䉃䉄䉅䉆䉇䉈䉉䉊䉋䉌䉍䉎䉏䉐䉑䉒䉓䉔䉕䉖䉗䉘䉙䉚䉛䉜䉝䉞䉟䉠䉡䉢䉣䉤䉥" # noqa: E501
+ "䉦䉧䉨䉩㑃䉪䉫䉬䉭䉮䉯䉰䉱䉲䉳䉴䉵䉶䉷䉸䉹䉺䉻䉼䉽䉾䉿䊀䊁䊂䊃䊄䊅䊆䊇䊈䊉䊊䊋䊌䊍䊎䊏䊐䊑䊒䊓䊔䊕䊖䊗䊘䊙䊚䊛䊜䊝䊞䊟䊠䊡䊢䊣䊤䊥䊦䊧" # noqa: E501
+ "䊨䊩䊪䊫䊬䊭䊮䊯䊰䊱䊲䊳䊴䊵䊶䊷䊸䊹䊺䊻䊼䊽䊾䊿䋀䋁䋂䋃䋄䋅䋆䋇䋈䋉䋊䋋䋌䋍䋎䋏䋐䋑䋒䋓䋔䋕䋖䋗䋘䋙䋚䋛䋜䋝䋞䋟䋠䋡䋢䋣䋤䋥䋦䋧䋨䋩䋪" # noqa: E501
+ "䋫䋬䋭䋮䋯䋰䋱䋲䋳䋴䋵䋶䋷䋸䋹䋺䋻䋼䋽䋾䋿䌀䌁䌂䌃䌄䌅䌆䌇䌈䌉䌊䌋䌌䌍䌎䌏䌐䌑䌒䌓䌔䌕䌖䌗䌘䌙䌚䌛䌜䌝䌞䌟䌠䌡䌢䌣䌤䌥䌦䌧䌨䌩䌪䌫䌬䌭" # noqa: E501
+ "䌮䌯䌰䌱䌲䌳䌴䌵䌶䌷䌸䌹䌺䌻䌼䌽䌾䌿䍀䍁䍂䍃䍄䍅䍆䍇䍈䍉䍊䍋䍌䍍䍎䍏䍐䍑䍒䍓䍔䍕䍖䍗䍘䍙䍚䍛䍜䍝䍞䍟䍠䍡䍢䍣䍤䍥䍦䍧䍨䍩䍪䍫䍬䍭䍮䍯䍰" # noqa: E501
+ "䍱䍲䍳䍴䍵䍶䍷䍸䍹䍺䍻䍼䍽䍾䍿䎀䎁䎂䎃䎄䎅䎆䎇䎈䎉䎊䎋䎌䎍䎎䎏䎐䎑䎒䎓䎔䎕䎖䎗䎘䎙䎚䎛䎜䎝䎞䎟䎠䎡䎢䎣䎤䎥䎦䎧䎨䎩䎪䎫䎬䎭䎮䎯䎰䎱䎲䎳" # noqa: E501
+ "䎴䎵䎶䎷䎸䎹䎺䎻䎼䎽䎾䎿䏀䏁䏂䏃䏄䏅䏆䏇䏈䏉䏊䏋䏌䏍䏎䏏䏐䏑䏒䏓䏔䏕䏖䏗䏘䏙䏚䏛䏜䏝䏞䏟䏠䏡䏢䏣䏤䏥䏦䏧䏨䏩䏪䏫䏬䏭䏮䏯䏰䏱䏲䏳䏴䏵䏶" # noqa: E501
+ "䏷䏸䏹䏺䏻䏼䏽䏾䏿䐀䐁䐂䐃䐄䐅䐆䐇䐈䐉䐊䐋䐌䐍䐎䐏䐐䐑䐒䐓䐔䐕䐖䐗䐘䐙䐚䐛䐜䐝䐞䐟䐠䐡䐢䐣䐤䐥䐦䐧䐨䐩䐪䐫䐬䐭䐮䐯䐰䐱䐲䐳䐴䐵䐶䐷䐸䐹" # noqa: E501
+ "䐺䐻䐼䐽䐾䐿䑀䑁䑂䑃䑄䑅䑆䑇䑈䑉䑊䑋䑌䑍䑎䑏䑐䑑䑒䑓䑔䑕䑖䑗䑘䑙䑚䑛䑜䑝䑞䑟䑠䑡䑢䑣䑤䑥䑦䑧䑨䑩䑪䑫䑬䑭䑮䑯䑰䑱䑲䑳䑴䑵䑶䑷䑸䑹䑺䑻䑼" # noqa: E501
+ "䑽䑾䑿䒀䒁䒂䒃䒄䒅䒆䒇䒈䒉䒊䒋䒌䒍䒎䒏䒐䒑䒒䒓䒔䒕䒖䒗䒘䒙䒚䒛䒜䒝䒞䒟䒠䒡䒢䒣䒤䒥䒦䒧䒨䒩䒪䒫䒬䒭䒮䒯䒰䒱䒲䒳䒴䒵䒶䒷䒸䒹䒺䒻䒼䒽䒾䒿" # noqa: E501
+ "䓀䓁䓂䓃䓄䓅䓆䓇䓈䓉䓊䓋䓌䓍䓎䓏䓐䓑䓒䓓䓔䓕䓖䓗䓘䓙䓚䓛䓜䓝䓞䓟䓠䓡䓢䓣䓤䓥䓦䓧䓨䓩䓪䓫䓬䓭䓮䓯䓰䓱䓲䓳䓴䓵䓶䓷䓸䓹䓺䓻䓼䓽䓾䓿䔀䔁䔂" # noqa: E501
+ "䔃䔄䔅䔆䔇䔈䔉䔊䔋䔌䔍䔎䔏䔐䔑䔒䔓䔔䔕䔖䔗䔘䔙䔚䔛䔜䔝䔞䔟䔠䔡䔢䔣䔤䔥䔦䔧䔨䔩䔪䔫䔬䔭䔮䔯䔰䔱䔲䔳䔴䔵䔶䔷䔸䔹䔺䔻䔼䔽䔾䔿䕀䕁䕂䕃䕄䕅" # noqa: E501
+ "䕆䕇䕈䕉䕊䕋䕌䕍䕎䕏䕐䕑䕒䕓䕔䕕䕖䕗䕘䕙䕚䕛䕜䕝䕞䕟䕠䕡䕢䕣䕤䕥䕦䕧䕨䕩䕪䕫䕬䕭䕮䕯䕰䕱䕲䕳䕴䕵䕶䕷䕸䕹䕺䕻䕼䕽䕾䕿䖀䖁䖂䖃䖄䖅䖆䖇䖈" # noqa: E501
+ "䖉䖊䖋䖌䖍䖎䖏䖐䖑䖒䖓䖔䖕䖖䖗䖘䖙䖚䖛䖜䖝䖞䖟䖠䖡䖢䖣䖤䖥䖦䖧䖨䖩䖪䖫䖬䖭䖮䖯䖰䖱䖲䖳䖴䖵䖶䖷䖸䖹䖺䖻䖼䖽䖾䖿䗀䗁䗂䗃䗄䗅䗆䗇䗈䗉䗊䗋" # noqa: E501
+ "䗌䗍䗎䗏䗐䗑䗒䗓䗔䗕䗖䗗䗘䗙䗚䗛䗜䗝䗞䗟䗠䗡䗢䗣䗤䗥䗦䗧䗨䗩䗪䗫䗬䗭䗮䗯䗰䗱䗲䗳䗴䗵䗶䗷䗸䗹䗺䗻䗼䗽䗾䗿䘀䘁䘂䘃䘄䘅䘆䘇䘈䘉䘊䘋䘌䘍䘎" # noqa: E501
+ "䘏䘐䘑䘒䘓䘔䘕䘖䘗䘘䘙䘚䘛䘜䘝䘞䘟䘠䘡䘢䘣䘤䘥䘦䘧䘨䘩䘪䘫䘬䘭䘮䘯䘰䘱䘲䘳䘴䘵䘶䘷䘸䘹䘺䘻䘼䘽䘾䘿䙀䙁䙂䙃䙄䙅䙆䙇䙈䙉䙊䙋䙌䙍䙎䙏䙐䙑" # noqa: E501
+ "䙒䙓䙔䙕䙖䙗䙘䙙䙚䙛䙜䙝䙞䙟䙠䙡䙢䙣䙤䙥䙦䙧䙨䙩䙪䙫䙬䙭䙮䙯䙰䙱䙲䙳䙴䙵䙶䙷䙸䙹䙺䙻䙼䙽䙾䙿䚀䚁䚂䚃䚄䚅䚆䚇䚈䚉䚊䚋䚌䚍䚎䚏䚐䚑䚒䚓䚔" # noqa: E501
+ "䚕䚖䚗䚘䚙䚚䚛䚜䚝䚞䚟䚠䚡䚢䚣䚤䚥䚦䚧䚨䚩䚪䚫䚬䚭䚮䚯䚰䚱䚲䚳䚴䚵䚶䚷䚸䚹䚺䚻䚼䚽䚾䚿䛀䛁䛂䛃䛄䛅䛆䛇䛈䛉䛊䛋䛌䛍䛎䛏䛐䛑䛒䛓䛔䛕䛖䛗" # noqa: E501
+ "䛘䛙䛚䛛䛜䛝䛞䛟䛠䛡䛢䛣䛤䛥䛦䛧䛨䛩䛪䛫䛬䛭䛮䛯䛰䛱䛲䛳䛴䛵䛶䛷䛸䛹䛺䛻䛼䛽䛾䛿䜀䜁䜂䜃䜄䜅䜆䜇䜈䜉䜊䜋䜌䜍䜎䜏䜐䜑䜒䜓䜔䜕䜖䜗䜘䜙䜚" # noqa: E501
+ "䜛䜜䜝䜞䜟䜠䜡䜢䜣䜤䜥䜦䜧䜨䜩䜪䜫䜬䜭䜮䜯䜰䜱䜲䜳䜴䜵䜶䜷䜸䜹䜺䜻䜼䜽䜾䜿䝀䝁䝂䝃䝄䝅䝆䝇䝈䝉䝊䝋䝌䝍䝎䝏䝐䝑䝒䝓䝔䝕䝖䝗䝘䝙䝚䝛䝜䝝" # noqa: E501
+ "䝞䝟䝠䝡䝢䝣䝤䝥䝦䝧䝨䝩䝪䝫䝬䝭䝮䝯䝰䝱䝲䝳䝴䝵䝶䝷䝸䝹䝺䝻䝼䝽䝾䝿䞀䞁䞂䞃䞄䞅䞆䞇䞈䞉䞊䞋䞌䞍䞎䞏䞐䞑䞒䞓䞔䞕䞖䞗䞘䞙䞚䞛䞜䞝䞞䞟䞠" # noqa: E501
+ "䞡䞢䞣䞤䞥䞦䞧䞨䞩䞪䞫䞬䞭䞮䞯䞰䞱䞲䞳䞴䞵䞶䞷䞸䞹䞺䞻䞼䞽䞾䞿䟀䟁䟂䟃䟄䟅䟆䟇䟈䟉䟊䟋䟌䟍䟎䟏䟐䟑䟒䟓䟔䟕䟖䟗䟘䟙䟚䟛䟜䟝䟞䟟䟠䟡䟢䟣" # noqa: E501
+ "䟤䟥䟦䟧䟨䟩䟪䟫䟬䟭䟮䟯䟰䟱䟲䟳䟴䟵䟶䟷䟸䟹䟺䟻䟼䟽䟾䟿䠀䠁䠂䠃䠄䠅䠆䠇䠈䠉䠊䠋䠌䠍䠎䠏䠐䠑䠒䠓䠔䠕䠖䠗䠘䠙䠚䠛䠜䠝䠞䠟䠠䠡䠢䠣䠤䠥䠦" # noqa: E501
+ "䠧䠨䠩䠪䠫䠬䠭䠮䠯䠰䠱䠲䠳䠴䠵䠶䠷䠸䠹䠺䠻䠼䠽䠾䠿䡀䡁䡂䡃䡄䡅䡆䡇䡈䡉䡊䡋䡌䡍䡎䡏䡐䡑䡒䡓䡔䡕䡖䡗䡘䡙䡚䡛䡜䡝䡞䡟䡠䡡䡢䡣䡤䡥䡦䡧䡨䡩" # noqa: E501
+ "䡪䡫䡬䡭䡮䡯䡰䡱䡲䡳䡴䡵䡶䡷䡸䡹䡺䡻䡼䡽䡾䡿䢀䢁䢂䢃䢄䢅䢆䢇䢈䢉䢊䢋䢌䢍䢎䢏䢐䢑䢒䢓䢔䢕䢖䢗䢘䢙䢚䢛䢜䢝䢞䢟䢠䢡䢢䢣䢤䢥䢦䢧䢨䢩䢪䢫䢬" # noqa: E501
+ "䢭䢮䢯䢰䢱䢲䢳䢴䢵䢶䢷䢸䢹䢺䢻䢼䢽䢾䢿䣀䣁䣂䣃䣄䣅䣆䣇䣈䣉䣊䣋䣌䣍䣎䣏䣐䣑䣒䣓䣔䣕䣖䣗䣘䣙䣚䣛䣜䣝䣞䣟䣠䣡䣢䣣䣤䣥䣦䣧䣨䣩䣪䣫䣬䣭䣮䣯" # noqa: E501
+ "䣰䣱䣲䣳䣴䣵䣶䣷䣸䣹䣺䣻䣼䣽䣾䣿䤀䤁䤂䤃䤄䤅䤆䤇䤈䤉䤊䤋䤌䤍䤎䤏䤐䤑䤒䤓䤔䤕䤖䤗䤘䤙䤚䤛䤜䤝䤞䤟䤠䤡䤢䤣䤤䤥䤦䤧䤨䤩䤪䤫䤬䤭䤮䤯䤰䤱䤲" # noqa: E501
+ "䤳䤴䤵䤶䤷䤸䤹䤺䤻䤼䤽䤾䤿䥀䥁䥂䥃䥄䥅䥆䥇䥈䥉䥊䥋䥌䥍䥎䥏䥐䥑䥒䥓䥔䥕䥖䥗䥘䥙䥚䥛䥜䥝䥞䥟䥠䥡䥢䥣䥤䥥䥦䥧䥨䥩䥪䥫䥬䥭䥮䥯䥰䥱䥲䥳䥴䥵" # noqa: E501
+ "䥶䥷䥸䥹䥺䥻䥼䥽䥾䥿䦀䦁䦂䦃䦄䦅䦆䦇䦈䦉䦊䦋䦌䦍䦎䦏䦐䦑䦒䦓䦔䦕䦖䦗䦘䦙䦚䦛䦜䦝䦞䦟䦠䦡䦢䦣䦤䦥䦦䦧䦨䦩䦪䦫䦬䦭䦮䦯䦰䦱䦲䦳䦴䦵䦶䦷䦸" # noqa: E501
+ "䦹䦺䦻䦼䦽䦾䦿䧀䧁䧂䧃䧄䧅䧆䧇䧈䧉䧊䧋䧌䧍䧎䧏䧐䧑䧒䧓䧔䧕䧖䧗䧘䧙䧚䧛䧜䧝䧞䧟䧠䧡䧢䧣䧤䧥䧦䧧䧨䧩䧪䧫䧬䧭䧮䧯䧰䧱䧲䧳䧴䧵䧶䧷䧸䧹䧺䧻" # noqa: E501
+ "䧼䧽䧾䧿䨀䨁䨂䨃䨄䨅䨆䨇䨈䨉䨊䨋䨌䨍䨎䨏䨐䨑䨒䨓䨔䨕䨖䨗䨘䨙䨚䨛䨜䨝䨞䨟䨠䨡䨢䨣䨤䨥䨦䨧䨨䨩䨪䨫䨬䨭䨮䨯䨰䨱䨲䨳䨴䨵䨶䨷䨸䨹䨺䨻䨼䨽䨾" # noqa: E501
+ "䨿䩀䩁䩂䩃䩄䩅䩆䩇䩈䩉䩊䩋䩌䩍䩎䩏䩐䩑䩒䩓䩔䩕䩖䩗䩘䩙䩚䩛䩜䩝䩞䩟䩠䩡䩢䩣䩤䩥䩦䩧䩨䩩䩪䩫䩬䩭䩮䩯䩰䩱䩲䩳䩴䩵䩶䩷䩸䩹䩺䩻䩼䩽䩾䩿䪀䪁" # noqa: E501
+ "䪂䪃䪄䪅䪆䪇䪈䪉䪊䪋䪌䪍䪎䪏䪐䪑䪒䪓䪔䪕䪖䪗䪘䪙䪚䪛䪜䪝䪞䪟䪠䪡䪢䪣䪤䪥䪦䪧䪨䪩䪪䪫䪬䪭䪮䪯䪰䪱䪲䪳䪴䪵䪶䪷䪸䪹䪺䪻䪼䪽䪾䪿䫀䫁䫂䫃䫄" # noqa: E501
+ "䫅䫆䫇䫈䫉䫊䫋䫌䫍䫎䫏䫐䫑䫒䫓䫔䫕䫖䫗䫘䫙䫚䫛䫜䫝䫞䫟䫠䫡䫢䫣䫤䫥䫦䫧䫨䫩䫪䫫䫬䫭䫮䫯䫰䫱䫲䫳䫴䫵䫶䫷䫸䫹䫺䫻䫼䫽䫾䫿䬀䬁䬂䬃䬄䬅䬆䬇" # noqa: E501
+ "䬈䬉䬊䬋䬌䬍䬎䬏䬐䬑䬒䬓䬔䬕䬖䬗䬘䬙䬚䬛䬜䬝䬞䬟䬠䬡䬢䬣䬤䬥䬦䬧䬨䬩䬪䬫䬬䬭䬮䬯䬰䬱䬲䬳䬴䬵䬶䬷䬸䬹䬺䬻䬼䬽䬾䬿䭀䭁䭂䭃䭄䭅䭆䭇䭈䭉䭊" # noqa: E501
+ "䭋䭌䭍䭎䭏䭐䭑䭒䭓䭔䭕䭖䭗䭘䭙䭚䭛䭜䭝䭞䭟䭠䭡䭢䭣䭤䭥䭦䭧䭨䭩䭪䭫䭬䭭䭮䭯䭰䭱䭲䭳䭴䭵䭶䭷䭸䭹䭺䭻䭼䭽䭾䭿䮀䮁䮂䮃䮄䮅䮆䮇䮈䮉䮊䮋䮌䮍" # noqa: E501
+ "䮎䮏䮐䮑䮒䮓䮔䮕䮖䮗䮘䮙䮚䮛䮜䮝䮞䮟䮠䮡䮢䮣䮤䮥䮦䮧䮨䮩䮪䮫䮬䮭䮮䮯䮰䮱䮲䮳䮴䮵䮶䮷䮸䮹䮺䮻䮼䮽䮾䮿䯀䯁䯂䯃䯄䯅䯆䯇䯈䯉䯊䯋䯌䯍䯎䯏䯐" # noqa: E501
+ "䯑䯒䯓䯔䯕䯖䯗䯘䯙䯚䯛䯜䯝䯞䯟䯠䯡䯢䯣䯤䯥䯦䯧䯨䯩䯪䯫䯬䯭䯮䯯䯰䯱䯲䯳䯴䯵䯶䯷䯸䯹䯺䯻䯼䯽䯾䯿䰀䰁䰂䰃䰄䰅䰆䰇䰈䰉䰊䰋䰌䰍䰎䰏䰐䰑䰒䰓" # noqa: E501
+ "䰔䰕䰖䰗䰘䰙䰚䰛䰜䰝䰞䰟䰠䰡䰢䰣䰤䰥䰦䰧䰨䰩䰪䰫䰬䰭䰮䰯䰰䰱䰲䰳䰴䰵䰶䰷䰸䰹䰺䰻䰼䰽䰾䰿䱀䱁䱂䱃䱄䱅䱆䱇䱈䱉䱊䱋䱌䱍䱎䱏䱐䱑䱒䱓䱔䱕䱖" # noqa: E501
+ "䱗䱘䱙䱚䱛䱜䱝䱞䱟䱠䱡䱢䱣䱤䱥䱦䱧䱨䱩䱪䱫䱬䱭䱮䱯䱰䱱䱲䱳䱴䱵䱶䱷䱸䱹䱺䱻䱼䱽䱾䱿䲀䲁䲂䲃䲄䲅䲆䲇䲈䲉䲊䲋䲌䲍䲎䲏䲐䲑䲒䲓䲔䲕䲖䲗䲘䲙" # noqa: E501
+ "䲚䲛䲜䲝䲞䲟䲠䲡䲢䲣䲤䲥䲦䲧䲨䲩䲪䲫䲬䲭䲮䲯䲰䲱䲲䲳䲴䲵䲶䲷䲸䲹䲺䲻䲼䲽䲾䲿䳀䳁䳂䳃䳄䳅䳆䳇䳈䳉䳊䳋䳌䳍䳎䳏䳐䳑䳒䳓䳔䳕䳖䳗䳘䳙䳚䳛䳜" # noqa: E501
+ "䳝䳞䳟䳠䳡䳢䳣䳤䳥䳦䳧䳨䳩䳪䳫䳬䳭䳮䳯䳰䳱䳲䳳䳴䳵䳶䳷䳸䳹䳺䳻䳼䳽䳾䳿䴀䴁䴂䴃䴄䴅䴆䴇䴈䴉䴊䴋䴌䴍䴎䴏䴐䴑䴒䴓䴔䴕䴖䴗䴘䴙䴚䴛䴜䴝䴞䴟" # noqa: E501
+ "䴠䴡䴢䴣䴤䴥䴦䴧䴨䴩䴪䴫䴬䴭䴮䴯䴰䴱䴲䴳䴴䴵䴶䴷䴸䴹䴺䴻䴼䴽䴾䴿䵀䵁䵂䵃䵄䵅䵆䵇䵈䵉䵊䵋䵌䵍䵎䵏䵐䵑䵒䵓䵔䵕䵖䵗䵘䵙䵚䵛䵜䵝䵞䵟䵠䵡䵢" # noqa: E501
+ "䵣䵤䵥䵦䵧䵨䵩䵪䵫䵬䵭䵮䵯䵰䵱䵲䵳䵴䵵䵶䵷䵸䵹䵺䵻䵼䵽䵾䵿䶀䶁䶂䶃䶄䶅䶆䶇䶈䶉䶊䶋䶌䶍䶎䶏䶐䶑䶒䶓䶔䶕䶖䶗䶘䶙䶚䶛䶜䶝䶞䶟䶠䶡䶢䶣䶤䶥" # noqa: E501
+ "䶦䶧䶨䶩䶪䶫䶬䶭䶮䶯䶰䶱䶲䶳䶴䶵䶶䶷䶸䶹䶺䶻䶼䶽䶾䶿"
+ _BASE_VOCABS["punctuation"]
+ "。・〜°—、「」『』【】゛》《〉〈" # punctuation
+ _BASE_VOCABS["currency"]
)
# Multi-lingual
VOCABS["multilingual"] = "".join(
dict.fromkeys(
# latin_based
VOCABS["english"]
+ VOCABS["albanian"]
+ VOCABS["afrikaans"]
+ VOCABS["azerbaijani"]
+ VOCABS["basque"]
+ VOCABS["bosnian"]
+ VOCABS["catalan"]
+ VOCABS["croatian"]
+ VOCABS["czech"]
+ VOCABS["danish"]
+ VOCABS["dutch"]
+ VOCABS["estonian"]
+ VOCABS["esperanto"]
+ VOCABS["french"]
+ VOCABS["finnish"]
+ VOCABS["frisian"]
+ VOCABS["galician"]
+ VOCABS["german"]
+ VOCABS["hausa"]
+ VOCABS["hungarian"]
+ VOCABS["icelandic"]
+ VOCABS["indonesian"]
+ VOCABS["irish"]
+ VOCABS["italian"]
+ VOCABS["latvian"]
+ VOCABS["lithuanian"]
+ VOCABS["luxembourgish"]
+ VOCABS["maori"]
+ VOCABS["malagasy"]
+ VOCABS["malay"]
+ VOCABS["maltese"]
+ VOCABS["montenegrin"]
+ VOCABS["norwegian"]
+ VOCABS["polish"]
+ VOCABS["portuguese"]
+ VOCABS["quechua"]
+ VOCABS["romanian"]
+ VOCABS["scottish_gaelic"]
+ VOCABS["serbian_latin"]
+ VOCABS["slovak"]
+ VOCABS["slovene"]
+ VOCABS["somali"]
+ VOCABS["spanish"]
+ VOCABS["swahili"]
+ VOCABS["swedish"]
+ VOCABS["tagalog"]
+ VOCABS["turkish"]
+ VOCABS["uzbek_latin"]
+ VOCABS["vietnamese"]
+ VOCABS["welsh"]
+ VOCABS["yoruba"]
+ VOCABS["zulu"]
+ "§" # paragraph sign
# cyrillic_based
+ VOCABS["russian"]
+ VOCABS["belarusian"]
+ VOCABS["ukrainian"]
+ VOCABS["tatar"]
+ VOCABS["tajik"]
+ VOCABS["kazakh"]
+ VOCABS["kyrgyz"]
+ VOCABS["bulgarian"]
+ VOCABS["macedonian"]
+ VOCABS["mongolian"]
+ VOCABS["yakut"]
+ VOCABS["serbian_cyrillic"]
+ VOCABS["uzbek_cyrillic"]
# greek
+ VOCABS["greek"]
# hebrew
+ VOCABS["hebrew"]
)
)
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "onnxtr"
description = "Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents."
authors = [{name = "Felix Dittrich", email = "felixdittrich92@gmail.com"}]
maintainers = [
{name = "Felix Dittrich"},
]
readme = "README.md"
requires-python = ">=3.10.0,<4"
license = {file = "LICENSE"}
keywords=["OCR", "deep learning", "computer vision", "onnx", "text detection", "text recognition", "docTR", "document analysis", "document processing", "document AI"]
classifiers=[
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dynamic = ["version"]
dependencies = [
# For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515)
# Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0
"numpy>=1.16.0,<3.0.0",
"scipy>=1.4.0,<2.0.0",
"pypdfium2>=4.11.0,<6.0.0",
"pyclipper>=1.2.0,<2.0.0",
"rapidfuzz>=3.0.0,<4.0.0",
"langdetect>=1.0.9,<2.0.0",
"huggingface-hub>=0.23.0,<2.0.0",
"Pillow>=9.2.0",
"defusedxml>=0.7.0",
"anyascii>=0.3.2",
"tqdm>=4.30.0",
]
[project.optional-dependencies]
cpu = [
"onnxruntime>=1.18.0",
"opencv-python>=4.5.0,<5.0.0",
]
gpu = [
"onnxruntime-gpu>=1.18.0",
"opencv-python>=4.5.0,<5.0.0",
]
openvino = [
"onnxruntime-openvino>=1.18.0",
"opencv-python>=4.5.0,<5.0.0",
]
cpu-headless = [
"onnxruntime>=1.18.0",
"opencv-python-headless>=4.5.0,<5.0.0",
]
gpu-headless = [
"onnxruntime-gpu>=1.18.0",
"opencv-python-headless>=4.5.0,<5.0.0",
]
openvino-headless = [
"onnxruntime-openvino>=1.18.0",
"opencv-python-headless>=4.5.0,<5.0.0",
]
html = [
"weasyprint>=55.0",
]
viz = [
"matplotlib>=3.1.0",
"mplcursors>=0.3",
]
testing = [
"pytest>=5.3.2",
"coverage[toml]>=4.5.4",
"requests>=2.20.0",
"pytest-memray>=1.7.0",
"psutil>=7.0.0",
]
quality = [
"ruff>=0.1.5",
"mypy>=0.812",
"pre-commit>=2.17.0",
]
dev = [
# Runtime
"onnxruntime>=1.18.0",
"opencv-python>=4.5.0,<5.0.0",
# HTML
"weasyprint>=55.0",
# Visualization
"matplotlib>=3.1.0",
"mplcursors>=0.3",
# Testing
"pytest>=5.3.2",
"coverage[toml]>=4.5.4",
"requests>=2.20.0",
"pytest-memray>=1.7.0",
"psutil>=7.0.0",
# Quality
"ruff>=0.1.5",
"mypy>=0.812",
"pre-commit>=2.17.0",
]
[project.urls]
repository = "https://github.com/felixdittrich92/OnnxTR"
tracker = "https://github.com/felixdittrich92/OnnxTR/issues"
changelog = "https://github.com/felixdittrich92/OnnxTR/releases"
[tool.setuptools]
zip-safe = true
[tool.setuptools.packages.find]
exclude = ["docs*", "tests*", "scripts*", "demo*"]
[tool.setuptools.package-data]
onnxtr = ["py.typed"]
[tool.mypy]
files = "onnxtr/"
show_error_codes = true
pretty = true
warn_unused_ignores = true
warn_redundant_casts = true
no_implicit_optional = true
check_untyped_defs = true
implicit_reexport = false
[[tool.mypy.overrides]]
module = [
"onnxruntime.*",
"PIL.*",
"scipy.*",
"cv2.*",
"matplotlib.*",
"numpy.*",
"pyclipper.*",
"mplcursors.*",
"defusedxml.*",
"weasyprint.*",
"pypdfium2.*",
"langdetect.*",
"huggingface_hub.*",
"rapidfuzz.*",
"anyascii.*",
"tqdm.*",
]
ignore_missing_imports = true
[tool.ruff]
exclude = [".git", "venv*", "build", "**/__init__.py"]
line-length = 120
target-version = "py310"
preview=true
[tool.ruff.lint]
select = [
# https://docs.astral.sh/ruff/rules/
"E", "W", "F", "I", "N", "Q", "C4", "T10", "LOG",
"D101", "D103", "D201","D202","D207","D208","D214","D215","D300","D301","D417", "D419", "D207" # pydocstyle
]
ignore = ["E402", "E203", "F403", "E731", "N812", "N817", "C408", "LOG015"]
[tool.ruff.lint.isort]
known-first-party = ["onnxtr", "utils"]
known-third-party = ["onnxruntime", "cv2"]
[tool.ruff.lint.per-file-ignores]
"onnxtr/models/**.py" = ["N806", "F841"]
"tests/**.py" = ["D"]
"scripts/**.py" = ["D"]
"demo/**.py" = ["D"]
".github/**.py" = ["D"]
[tool.ruff.lint.flake8-quotes]
docstring-quotes = "double"
[tool.coverage.run]
source = ["onnxtr"]
================================================
FILE: scripts/convert_to_float16.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
try:
from onnxconverter_common import auto_convert_mixed_precision
except ImportError:
raise ImportError("Failed to import onnxconverter_common. Please install `pip install onnxconverter-common`.")
# Check GPU availability
import onnxruntime
if onnxruntime.get_device() != "GPU":
raise RuntimeError(
"Please install OnnxTR with GPU support to run this script. "
+ "`pip install onnxtr[gpu]` or `pip install -e .[gpu]`"
)
import argparse
import time
from tempfile import TemporaryDirectory
from typing import Any
import numpy as np
import onnx
from onnxtr.models import classification, detection, recognition
from onnxtr.models.classification.zoo import ORIENTATION_ARCHS
from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS
from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS
def _load_model(arch: str, model_path: str | None = None) -> Any:
if arch in DETECTION_ARCHS:
model = detection.__dict__[arch]() if model_path is None else detection.__dict__[arch](model_path)
elif args.arch in RECOGNITION_ARCHS:
model = recognition.__dict__[arch]() if model_path is None else recognition.__dict__[arch](model_path)
elif args.arch in ORIENTATION_ARCHS:
model = classification.__dict__[arch]() if model_path is None else classification.__dict__[arch](model_path)
else:
raise ValueError(f"Unknown architecture {arch}")
return model
def _latency_check(args: Any, size: tuple[int], model: Any, img_tensor: np.ndarray) -> None:
# Warmup
for _ in range(10):
_ = model(img_tensor)
timings = []
# Evaluation runs
for _ in range(args.it):
start_ts = time.perf_counter()
_ = model(img_tensor)
timings.append(time.perf_counter() - start_ts)
_timings = np.array(timings)
print(f"{args.arch} ({args.it} runs on ({size}) inputs)")
print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms")
def _validate(fp32_in: list[np.ndarray], fp16_in: list[np.ndarray]) -> bool:
assert fp32_in[0].shape == fp16_in[0].shape, "Input shapes are not the same"
# print mean difference between fp32 and fp16 inputs
if np.abs(fp32_in[0] - fp16_in[0]).mean() > 1e-3:
print(
f"Mean difference between fp32 and fp16 inputs: {np.abs(fp32_in[0] - fp16_in[0]).mean()} "
+ "-> YOU MAY EXPECT DIFFERING RESULTS"
)
return True # NOTE: Only warning, not error
def main(args):
model_float32 = _load_model(args.arch, model_path=args.input_model if args.input_model else None)
size = (1, *model_float32.cfg["input_shape"])
img_tensor = np.random.rand(*size).astype(np.float32)
with TemporaryDirectory() as temp_dir:
model_fp16_path = f"{temp_dir}/model_fp16.onnx"
input_feed = {model_float32.runtime_inputs.name: img_tensor}
model_float16 = auto_convert_mixed_precision(
# NOTE: keep_io_types=True is required to keep the input/output type as float32
onnx.load(str(model_float32.model_path)),
input_feed,
validate_fn=_validate,
keep_io_types=True,
)
onnx.save(model_float16, model_fp16_path)
model_fp16 = _load_model(args.arch, model_fp16_path)
# Latency check
_latency_check(args, size, model_float32, img_tensor)
_latency_check(args, size, model_fp16, img_tensor)
onnx.save(model_float16, args.arch + "_fp16.onnx")
print(f"FP16 model saved at {args.arch}_fp16.onnx")
print("Attention: FP16 converted models can only run on GPU devices.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="OnnxTR FP32 to FP16 conversion",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"arch",
type=str,
choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS,
help="Architecture to convert",
)
parser.add_argument("--input_model", type=str, help="Path to the input model", required=False)
parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run")
args = parser.parse_args()
main(args)
================================================
FILE: scripts/evaluate.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
try:
from doctr.version import __version__
print(f"DocTR version: {__version__}")
except ImportError:
raise ImportError("Failed to import `doctr`. Please install `pip install python-doctr[torch]`.")
import os
import time
from typing import Any
import numpy as np
from doctr import datasets
from doctr import transforms as T
from doctr.utils.metrics import LocalizationConfusion, OCRMetric, TextMatch
from tqdm import tqdm
from onnxtr.models import EngineConfig, ocr_predictor
from onnxtr.utils.geometry import extract_crops, extract_rcrops
def _pct(val):
return "N/A" if val is None else f"{val:.2%}"
def main(args):
if not args.rotation:
args.eval_straight = True
if args.profiling:
os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE"
try:
import memray
import yappi
except ImportError:
raise ImportError("Please install yappi and memray to enable profiling - `pip install yappi memray`.")
yappi.set_clock_type("cpu")
# Drop memray profile and flamegraph if they already exist
if os.path.exists("memray_profile.bin"):
os.remove("memray_profile.bin")
if os.path.exists("memray_flamegraph.html"):
os.remove("memray_flamegraph.html")
memray_tracker = memray.Tracker("memray_profile.bin")
memray_tracker.__enter__()
input_shape = (args.size, args.size)
# We define a transformation function which does transform the annotation
# to the required format for the Resize transformation
def _transform(img, target):
boxes = target["boxes"]
transformed_img, transformed_boxes = T.Resize(
input_shape, preserve_aspect_ratio=args.keep_ratio, symmetric_pad=args.symmetric_pad
)(img, boxes)
return transformed_img, {"boxes": transformed_boxes, "labels": target["labels"]}
predictor = ocr_predictor(
args.detection,
args.recognition,
reco_bs=args.batch_size,
preserve_aspect_ratio=False, # we handle the transformation directly in the dataset so this is set to False
symmetric_pad=False, # we handle the transformation directly in the dataset so this is set to False
assume_straight_pages=not args.rotation,
load_in_8_bit=args.load_8bit,
det_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
reco_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
clf_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None,
)
# Load the dataset
train_set = datasets.__dict__[args.dataset](
train=True,
download=True,
use_polygons=not args.eval_straight,
sample_transforms=_transform,
)
val_set = datasets.__dict__[args.dataset](
train=False,
download=True,
use_polygons=not args.eval_straight,
sample_transforms=_transform,
)
sets = [train_set, val_set]
reco_metric = TextMatch()
det_metric = LocalizationConfusion(iou_thresh=args.iou, use_polygons=not args.eval_straight)
e2e_metric = OCRMetric(iou_thresh=args.iou, use_polygons=not args.eval_straight)
sample_idx = 0
extraction_fn = extract_crops if args.eval_straight else extract_rcrops
timings = []
# Warmup
print("Warming up the model...")
dummy_img = np.zeros((args.size, args.size, 3), dtype=np.uint8)
for _ in range(5):
_ = predictor([dummy_img])
print("Warmup done.\n")
for dataset in sets:
for page, target in tqdm(dataset):
if hasattr(page, "numpy"):
page = page.numpy()
if page.ndim == 3 and page.shape[0] in [1, 3]:
page = np.moveaxis(page, 0, -1)
if page.dtype != np.uint8:
page = (page * 255).astype(np.uint8) if np.max(page) <= 1 else page.astype(np.uint8)
# GT
gt_boxes = target["boxes"]
gt_labels = target["labels"]
# Forward
if args.profiling:
yappi.start()
start_ts = time.perf_counter()
out = predictor(page[None, ...])
timings.append(time.perf_counter() - start_ts)
if args.profiling:
yappi.stop()
crops = extraction_fn(page, gt_boxes, channels_last=True)
reco_out = predictor.reco_predictor(crops)
reco_words: Any = []
if len(reco_out):
reco_words, _ = zip(*reco_out)
# Unpack preds
pred_boxes: list[list[Any]] = []
pred_labels: list[str] = []
for page in out.pages:
height, width = page.dimensions
for block in page.blocks:
for line in block.lines:
for word in line.words:
if not args.rotation:
(a, b), (c, d) = word.geometry
else:
(
[x1, y1],
[x2, y2],
[x3, y3],
[x4, y4],
) = word.geometry
if np.issubdtype(gt_boxes.dtype, np.integer):
if not args.rotation:
pred_boxes.append([
int(a * width),
int(b * height),
int(c * width),
int(d * height),
])
else:
if args.eval_straight:
pred_boxes.append([
int(width * min(x1, x2, x3, x4)),
int(height * min(y1, y2, y3, y4)),
int(width * max(x1, x2, x3, x4)),
int(height * max(y1, y2, y3, y4)),
])
else:
pred_boxes.append([
[int(x1 * width), int(y1 * height)],
[int(x2 * width), int(y2 * height)],
[int(x3 * width), int(y3 * height)],
[int(x4 * width), int(y4 * height)],
])
else:
if not args.rotation:
pred_boxes.append([a, b, c, d])
else:
if args.eval_straight:
pred_boxes.append([
min(x1, x2, x3, x4),
min(y1, y2, y3, y4),
max(x1, x2, x3, x4),
max(y1, y2, y3, y4),
])
else:
pred_boxes.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])
pred_labels.append(word.value)
# Update the metric
det_metric.update(gt_boxes, np.asarray(pred_boxes))
reco_metric.update(gt_labels, reco_words)
e2e_metric.update(gt_boxes, np.asarray(pred_boxes), gt_labels, pred_labels)
# Loop break
sample_idx += 1
if isinstance(args.samples, int) and args.samples == sample_idx:
break
if isinstance(args.samples, int) and args.samples == sample_idx:
break
# Unpack aggregated metrics
print(f"Model Evaluation (model= {args.detection} + {args.recognition}, dataset={args.dataset})")
recall, precision, mean_iou = det_metric.summary()
print(f"Text Detection - Recall: {_pct(recall)}, Precision: {_pct(precision)}, Mean IoU: {_pct(mean_iou)}")
acc = reco_metric.summary()
print(f"Text Recognition - Accuracy: {_pct(acc['raw'])} (unicase: {_pct(acc['unicase'])})")
recall, precision, mean_iou = e2e_metric.summary()
print(
f"OCR - Recall: {_pct(recall['raw'])} (unicase: {_pct(recall['unicase'])}), "
f"Precision: {_pct(precision['raw'])} (unicase: {_pct(precision['unicase'])}), Mean IoU: {_pct(mean_iou)}\n"
)
print(f"Number of samples: {sample_idx}")
print(f"Total inference time: {np.sum(timings):.2f} sec")
print(f"Average inference time per sample: {np.mean(timings):.6f} sec")
if args.profiling:
import subprocess
memray_tracker.__exit__(None, None, None)
with open("yappi_profile.stats", "w") as f:
yappi.get_func_stats().print_all(out=f)
print("Profiling complete. Generating memray flamegraph and stats...")
subprocess.run(["memray", "flamegraph", "memray_profile.bin", "-o", "memray_flamegraph.html"])
subprocess.run(["memray", "stats", "memray_profile.bin"])
def parse_args():
import argparse
parser = argparse.ArgumentParser(
description="OnnxTR end-to-end evaluation", formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("detection", type=str, help="Text detection model to use for analysis")
parser.add_argument("recognition", type=str, help="Text recognition model to use for analysis")
parser.add_argument("--iou", type=float, default=0.5, help="IoU threshold to match a pair of boxes")
parser.add_argument("--dataset", type=str, default="FUNSD", help="choose a dataset: FUNSD, CORD")
parser.add_argument("--rotation", dest="rotation", action="store_true", help="run rotated OCR + postprocessing")
parser.add_argument("-b", "--batch_size", type=int, default=32, help="batch size for recognition")
parser.add_argument("--size", type=int, default=1024, help="model input size, H = W")
parser.add_argument("--keep_ratio", action="store_true", help="keep the aspect ratio of the input image")
parser.add_argument("--symmetric_pad", action="store_true", help="pad the image symmetrically")
parser.add_argument("--samples", type=int, default=None, help="evaluate only on the N first samples")
parser.add_argument(
"--eval-straight",
action="store_true",
help="evaluate on straight pages with straight bbox (to use the quick and light metric)",
)
parser.add_argument("--load_8bit", action="store_true", help="load model in 8bit mode")
parser.add_argument("--force-cpu", action="store_true", help="force CPU execution")
parser.add_argument("--profiling", action="store_true", help="enable profiling")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = parse_args()
main(args)
================================================
FILE: scripts/latency.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import argparse
import time
import numpy as np
from onnxtr.models import classification, detection, recognition
from onnxtr.models.classification.zoo import ORIENTATION_ARCHS
from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS
from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS
def main(args):
if args.arch in DETECTION_ARCHS:
model = detection.__dict__[args.arch](load_in_8_bit=args.load8bit)
elif args.arch in RECOGNITION_ARCHS:
model = recognition.__dict__[args.arch](load_in_8_bit=args.load8bit)
elif args.arch in ORIENTATION_ARCHS:
model = classification.__dict__[args.arch](load_in_8_bit=args.load8bit)
else:
raise ValueError(f"Unknown architecture {args.arch}")
size = (1, *model.cfg["input_shape"])
img_tensor = np.random.rand(*size).astype(np.float32)
# Warmup
for _ in range(10):
_ = model(img_tensor)
timings = []
# Evaluation runs
for _ in range(args.it):
start_ts = time.perf_counter()
_ = model(img_tensor)
timings.append(time.perf_counter() - start_ts)
_timings = np.array(timings)
print(f"{args.arch} ({args.it} runs on ({size}) inputs)")
print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="OnnxTR latency benchmark",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"arch",
type=str,
choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS,
help="Architecture to benchmark",
)
parser.add_argument("--load8bit", action="store_true", help="Load the 8-bit quantized model")
parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run")
args = parser.parse_args()
main(args)
================================================
FILE: scripts/quantize.py
================================================
import argparse
import os
import time
from enum import Enum
import numpy as np
import onnxruntime
from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantType, quantize_dynamic, quantize_static
from onnxtr.io.image import read_img_as_numpy
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.utils.geometry import shape_translate
class TaskShapes(Enum):
"""Enum class to define the shapes of the input tensors for different tasks"""
crop_orientation = (256, 256)
page_orientation = (512, 512)
detection = (1024, 1024)
recognition = (32, 128)
class CalibrationDataLoader(CalibrationDataReader):
def __init__(self, calibration_image_folder: str, model_path: str, task_shape: tuple[int]):
self.enum_data = None
self.preprocessor = PreProcessor(output_size=task_shape, batch_size=1)
self.dataset = [
self.preprocessor(
np.expand_dims(read_img_as_numpy(os.path.join(calibration_image_folder, img_file)), axis=0)
)
for img_file in os.listdir(calibration_image_folder)[:500] # limit to 500 images
]
session = onnxruntime.InferenceSession(model_path, None)
self.input_name = session.get_inputs()[0].name
self.datasize = len(self.dataset)
def get_next(self):
if self.enum_data is None:
self.enum_data = iter([
{self.input_name: shape_translate(input_data[0], format="BCHW")} for input_data in self.dataset
])
return next(self.enum_data, None)
def rewind(self):
self.enum_data = None
def benchmark(calibration_image_folder: str, model_path: str, task_shape: tuple[int]):
session = onnxruntime.InferenceSession(model_path)
input_name = session.get_inputs()[0].name
output_name = [output.name for output in session.get_outputs()]
dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape)
sample = shape_translate(dataset.dataset[0][0], format="BCHW") # take 1 sample for benchmarking
total = 0.0
runs = 10
# Warming up
_ = session.run(output_name, {input_name: sample})
for _ in range(runs):
start = time.perf_counter()
_ = session.run(output_name, {input_name: sample})
end = (time.perf_counter() - start) * 1000
total += end
print(f"{end:.2f}ms")
total /= runs
print(f"Avg: {total:.2f}ms")
def benchmark_mean_diff(
calibration_image_folder: str, model_path: str, quantized_model_path: str, task_shape: tuple[int]
):
"""Check the mean difference between the original and quantized model"""
session = onnxruntime.InferenceSession(model_path)
quantized_session = onnxruntime.InferenceSession(quantized_model_path)
input_name = session.get_inputs()[0].name
output_name = [output.name for output in session.get_outputs()]
quantized_output_name = [output.name for output in quantized_session.get_outputs()]
dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape)
sample = shape_translate(dataset.dataset[0][0], format="BCHW") # take 1 sample for benchmarking
output = session.run(output_name, {input_name: sample})[0]
quantized_output = quantized_session.run(quantized_output_name, {input_name: sample})[0]
mean_diff = np.mean(np.abs(output - quantized_output))
print(f"Mean difference between original and quantized model: {mean_diff:.2f}")
def main(args):
input_model_path = args.input_model
calibration_dataset_path = args.calibrate_dataset
if args.task == "crop_orientation":
task_shape = TaskShapes.crop_orientation.value
elif args.task == "page_orientation":
task_shape = TaskShapes.page_orientation.value
elif args.task == "detection":
task_shape = TaskShapes.detection.value
else:
task_shape = TaskShapes.recognition.value
print(f"Task: {args.task} | Task shape: {task_shape}")
dr = CalibrationDataLoader(calibration_dataset_path, input_model_path, task_shape)
base_model_name = input_model_path.split("/")[-1].split("-")[0]
static_out_name = base_model_name + "_static_8_bit.onnx"
dynamic_out_name = base_model_name + "_dynamic_8_bit.onnx"
print("benchmarking fp32 model...")
benchmark(calibration_dataset_path, input_model_path, task_shape)
# Calibrate and quantize model
# Turn off model optimization during quantization
if "parseq" not in input_model_path: # Skip static quantization for Parseq
print("Calibrating and quantizing model static...")
try:
quantize_static(
input_model_path,
static_out_name,
dr,
quant_format=args.quant_format,
weight_type=QuantType.QInt8,
activation_type=QuantType.QUInt8,
reduce_range=True,
)
except Exception:
print("Error during static quantization --> Change weight_type also to QUInt8")
quantize_static(
input_model_path,
static_out_name,
dr,
quant_format=args.quant_format,
weight_type=QuantType.QUInt8,
activation_type=QuantType.QUInt8,
reduce_range=True,
)
print("benchmarking static int8 model...")
benchmark(calibration_dataset_path, static_out_name, task_shape)
print("benchmarking mean difference between fp32 and static int8 model...")
benchmark_mean_diff(calibration_dataset_path, input_model_path, static_out_name, task_shape)
print("Calibrated and quantized static model saved.")
if "sar" not in input_model_path: # Skip dynamic quantization for SAR_ResNet31
print("Dynamic int 8 quantization...")
quantize_dynamic(
input_model_path,
dynamic_out_name,
weight_type=QuantType.QUInt8,
)
print("Dynamic model saved.")
print("benchmarking dynamic int8 model...")
benchmark(calibration_dataset_path, dynamic_out_name, task_shape)
print("benchmarking mean difference between fp32 and dynamic int8 model...")
benchmark_mean_diff(calibration_dataset_path, input_model_path, dynamic_out_name, task_shape)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="OnnxTR script to quantize models and benchmark the quantized models",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument("--input_model", required=True, help="input model")
parser.add_argument(
"--task",
required=True,
type=str,
choices=["crop_orientation", "page_orientation", "detection", "recognition"],
help="task shape",
)
parser.add_argument(
"--calibrate_dataset",
type=str,
required=True,
help="calibration data set (word crop images for recognition, crop_orientation else page images for detection, page_orientation)", # noqa
)
parser.add_argument(
"--quant_format",
default=QuantFormat.QDQ,
type=QuantFormat.from_string,
choices=list(QuantFormat),
)
args = parser.parse_args()
main(args)
================================================
FILE: setup.py
================================================
# Copyright (C) 2021-2026, Mindee | Felix Dittrich.
# This program is licensed under the Apache License 2.0.
# See LICENSE or go to for full license details.
import os
from pathlib import Path
from setuptools import setup
PKG_NAME = "onnxtr"
VERSION = os.getenv("BUILD_VERSION", "0.8.2a0")
if __name__ == "__main__":
print(f"Building wheel {PKG_NAME}-{VERSION}")
# Dynamically set the __version__ attribute
cwd = Path(__file__).parent.absolute()
with open(cwd.joinpath("onnxtr", "version.py"), "w", encoding="utf-8") as f:
f.write(f"__version__ = '{VERSION}'\n")
setup(name=PKG_NAME, version=VERSION)
================================================
FILE: tests/common/test_contrib.py
================================================
import numpy as np
import pytest
from onnxtr.contrib import artefacts
from onnxtr.contrib.base import _BasePredictor
from onnxtr.io import DocumentFile
def test_base_predictor():
# check that we need to provide either a url or a model_path
with pytest.raises(ValueError):
_ = _BasePredictor(batch_size=2)
predictor = _BasePredictor(batch_size=2, url=artefacts.default_cfgs["yolov8_artefact"]["url"])
# check that we need to implement preprocess and postprocess
with pytest.raises(NotImplementedError):
predictor.preprocess(np.zeros((10, 10, 3)))
with pytest.raises(NotImplementedError):
predictor.postprocess([np.zeros((10, 10, 3))], [[np.zeros((10, 10, 3))]])
def test_artefact_detector(mock_artefact_image_stream):
doc = DocumentFile.from_images([mock_artefact_image_stream])
detector = artefacts.ArtefactDetector(batch_size=2, conf_threshold=0.5, iou_threshold=0.5)
results = detector(doc)
assert isinstance(results, list) and len(results) == 1 and isinstance(results[0], list)
assert all(isinstance(artefact, dict) for artefact in results[0])
# check result keys
assert all(key in results[0][0] for key in ["label", "confidence", "box"])
assert all(len(artefact["box"]) == 4 for artefact in results[0])
assert all(isinstance(coord, int) for box in results[0] for coord in box["box"])
assert all(isinstance(artefact["confidence"], float) for artefact in results[0])
assert all(isinstance(artefact["label"], str) for artefact in results[0])
# check results for the mock image are 9 artefacts
assert len(results[0]) == 9
# test visualization non-blocking for tests
detector.show(block=False)
================================================
FILE: tests/common/test_core.py
================================================
import pytest
import onnxtr
from onnxtr.file_utils import requires_package
def test_version():
assert len(onnxtr.__version__.split(".")) == 3
def test_requires_package():
requires_package("numpy") # availbable
with pytest.raises(ImportError): # not available
requires_package("non_existent_package")
================================================
FILE: tests/common/test_engine_cfg.py
================================================
import gc
import numpy as np
import psutil
import pytest
from onnxruntime import RunOptions, SessionOptions
from onnxtr import models
from onnxtr.io import Document
from onnxtr.models import EngineConfig, detection, recognition
from onnxtr.models.predictor import OCRPredictor
def _get_rss_mb():
gc.collect()
process = psutil.Process()
return process.memory_info().rss / (1024 * 1024)
def _test_predictor(predictor):
# Output checks
assert isinstance(predictor, OCRPredictor)
doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)]
out = predictor(doc)
# Document
assert isinstance(out, Document)
# The input doc has 1 page
assert len(out.pages) == 1
# Dimension check
with pytest.raises(ValueError):
input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
_ = predictor([input_page])
@pytest.mark.parametrize(
"det_arch, reco_arch",
[[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)],
)
def test_engine_cfg(det_arch, reco_arch):
session_options = SessionOptions()
session_options.enable_cpu_mem_arena = False
engine_cfg = EngineConfig(
providers=["CPUExecutionProvider"],
session_options=session_options,
)
assert engine_cfg.__repr__() == "EngineConfig(providers=['CPUExecutionProvider'])"
# Model
predictor = models.ocr_predictor(
det_arch, reco_arch, det_engine_cfg=engine_cfg, reco_engine_cfg=engine_cfg, clf_engine_cfg=engine_cfg
)
assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"]
assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena
assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"]
assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena
_test_predictor(predictor)
# passing model instance directly
det_model = detection.__dict__[det_arch](engine_cfg=engine_cfg)
assert det_model.providers == ["CPUExecutionProvider"]
assert not det_model.session_options.enable_cpu_mem_arena
reco_model = recognition.__dict__[reco_arch](engine_cfg=engine_cfg)
assert reco_model.providers == ["CPUExecutionProvider"]
assert not reco_model.session_options.enable_cpu_mem_arena
predictor = models.ocr_predictor(det_model, reco_model)
assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"]
assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena
assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"]
assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena
_test_predictor(predictor)
det_predictor = models.detection_predictor(det_arch, engine_cfg=engine_cfg)
assert det_predictor.model.providers == ["CPUExecutionProvider"]
assert not det_predictor.model.session_options.enable_cpu_mem_arena
reco_predictor = models.recognition_predictor(reco_arch, engine_cfg=engine_cfg)
assert reco_predictor.model.providers == ["CPUExecutionProvider"]
assert not reco_predictor.model.session_options.enable_cpu_mem_arena
def test_cpu_memory_arena_shrinkage_enabled():
session_options = SessionOptions()
session_options.enable_mem_pattern = False
session_options.enable_cpu_mem_arena = True
enable_shrinkage = False
providers = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]
def enable_arena_shrinkage(run_options: "RunOptions") -> "RunOptions":
if enable_shrinkage:
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0")
assert run_options.get_run_config_entry("memory.enable_memory_arena_shrinkage") == "cpu:0"
return run_options
engine_cfg = EngineConfig(
providers=providers,
session_options=session_options,
run_options_provider=enable_arena_shrinkage,
)
predictor = models.ocr_predictor(
det_engine_cfg=engine_cfg,
reco_engine_cfg=engine_cfg,
clf_engine_cfg=engine_cfg,
detect_orientation=True,
)
assert predictor.det_predictor.model.providers == providers
assert predictor.det_predictor.model.session_options.enable_cpu_mem_arena
assert predictor.reco_predictor.model.providers == providers
assert predictor.reco_predictor.model.session_options.enable_cpu_mem_arena
rng = np.random.RandomState(seed=42)
sample = rng.randint(0, 256, (1024, 1024, 3), dtype=np.uint8)
start_rss = _get_rss_mb()
predictor([sample])
increased_rss = _get_rss_mb()
assert increased_rss > start_rss
enable_shrinkage = True
predictor([sample])
decreased_rss = _get_rss_mb()
assert increased_rss > decreased_rss
================================================
FILE: tests/common/test_headers.py
================================================
"""Test for python files copyright headers."""
from datetime import datetime
from pathlib import Path
def test_copyright_header():
copyright_header = "".join([
f"# Copyright (C) {2021}-{datetime.now().year}, Mindee | Felix Dittrich.\n\n",
"# This program is licensed under the Apache License 2.0.\n",
"# See LICENSE or go to for full license details.\n",
])
excluded_files = ["__init__.py", "version.py"]
invalid_files = []
locations = [".github", "onnxtr"]
for location in locations:
for source_path in Path(__file__).parent.parent.parent.joinpath(location).rglob("*.py"):
if source_path.name not in excluded_files:
source_path_content = source_path.read_text()
if copyright_header not in source_path_content:
invalid_files.append(source_path)
assert len(invalid_files) == 0, f"Invalid copyright header in the following files: {invalid_files}"
================================================
FILE: tests/common/test_io.py
================================================
from io import BytesIO
from pathlib import Path
import numpy as np
import pytest
import requests
from onnxtr import io
def _check_doc_content(doc_tensors, num_pages):
# 1 doc of 8 pages
assert len(doc_tensors) == num_pages
assert all(isinstance(page, np.ndarray) for page in doc_tensors)
assert all(page.dtype == np.uint8 for page in doc_tensors)
def test_read_pdf(mock_pdf):
doc = io.read_pdf(mock_pdf)
_check_doc_content(doc, 2)
# Test with Path
doc = io.read_pdf(Path(mock_pdf))
_check_doc_content(doc, 2)
with open(mock_pdf, "rb") as f:
doc = io.read_pdf(f.read())
_check_doc_content(doc, 2)
# Wrong input type
with pytest.raises(TypeError):
_ = io.read_pdf(123)
# Wrong path
with pytest.raises(FileNotFoundError):
_ = io.read_pdf("my_imaginary_file.pdf")
def test_read_img_as_numpy(tmpdir_factory, mock_pdf):
# Wrong input type
with pytest.raises(TypeError):
_ = io.read_img_as_numpy(123)
# Non-existing file
with pytest.raises(FileNotFoundError):
io.read_img_as_numpy("my_imaginary_file.jpg")
# Invalid image
with pytest.raises(ValueError):
io.read_img_as_numpy(str(mock_pdf))
# From path
url = "https://doctr-static.mindee.com/models?id=v0.2.1/Grace_Hopper.jpg&src=0"
file = BytesIO(requests.get(url).content)
tmp_path = str(tmpdir_factory.mktemp("data").join("mock_img_file.jpg"))
with open(tmp_path, "wb") as f:
f.write(file.getbuffer())
# Path & stream
with open(tmp_path, "rb") as f:
page_stream = io.read_img_as_numpy(f.read())
for page in (io.read_img_as_numpy(tmp_path), page_stream):
# Data type
assert isinstance(page, np.ndarray)
assert page.dtype == np.uint8
# Shape
assert page.shape == (606, 517, 3)
# RGB
bgr_page = io.read_img_as_numpy(tmp_path, rgb_output=False)
assert np.all(page == bgr_page[..., ::-1])
# Resize
target_size = (200, 150)
resized_page = io.read_img_as_numpy(tmp_path, target_size)
assert resized_page.shape[:2] == target_size
def test_read_html():
url = "https://www.google.com"
pdf_stream = io.read_html(url)
assert isinstance(pdf_stream, bytes)
def test_document_file(mock_pdf, mock_artefact_image_stream):
pages = io.DocumentFile.from_images([mock_artefact_image_stream])
_check_doc_content(pages, 1)
assert isinstance(io.DocumentFile.from_pdf(mock_pdf), list)
assert isinstance(io.DocumentFile.from_url("https://www.google.com"), list)
def test_pdf(mock_pdf):
pages = io.DocumentFile.from_pdf(mock_pdf)
# As images
num_pages = 2
_check_doc_content(pages, num_pages)
================================================
FILE: tests/common/test_io_elements.py
================================================
from xml.etree.ElementTree import ElementTree
import numpy as np
import pytest
from onnxtr.io import elements
def _mock_words(size=(1.0, 1.0), offset=(0, 0), confidence=0.9, objectness_score=0.9, polygons=False):
box_word_elements = [
elements.Word(
"hello",
confidence,
((offset[0], offset[1]), (size[0] / 2 + offset[0], size[1] / 2 + offset[1])),
objectness_score,
{"value": 0, "confidence": None},
),
elements.Word(
"world",
confidence,
((size[0] / 2 + offset[0], size[1] / 2 + offset[1]), (size[0] + offset[0], size[1] + offset[1])),
objectness_score,
{"value": 0, "confidence": None},
),
]
polygons_word_elements = [
elements.Word(
"hello",
confidence,
# (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2)
np.array([
[offset[0], offset[1]],
[size[0] / 2 + offset[0], offset[1]],
[size[0] / 2 + offset[0], size[1] / 2 + offset[1]],
[offset[0], size[1] / 2 + offset[1]],
]),
objectness_score,
{"value": 0, "confidence": None},
),
elements.Word(
"world",
confidence,
# (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2)
np.array([
[size[0] / 2 + offset[0], size[1] / 2 + offset[1]],
[size[0] + offset[0], size[1] / 2 + offset[1]],
[size[0] + offset[0], size[1] + offset[1]],
[size[0] / 2 + offset[0], size[1] + offset[1]],
]),
objectness_score,
{"value": 0, "confidence": None},
),
]
return polygons_word_elements if polygons else box_word_elements
def _mock_artefacts(size=(1, 1), offset=(0, 0), confidence=0.8):
sub_size = (size[0] / 2, size[1] / 2)
return [
elements.Artefact(
"qr_code", confidence, ((offset[0], offset[1]), (sub_size[0] + offset[0], sub_size[1] + offset[1]))
),
elements.Artefact(
"qr_code",
confidence,
((sub_size[0] + offset[0], sub_size[1] + offset[1]), (size[0] + offset[0], size[1] + offset[1])),
),
]
def _mock_lines(size=(1, 1), offset=(0, 0), polygons=False):
sub_size = (size[0] / 2, size[1] / 2)
return [
elements.Line(_mock_words(size=sub_size, offset=offset, polygons=polygons)),
elements.Line(
_mock_words(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1]), polygons=polygons)
),
]
def _mock_blocks(size=(1, 1), offset=(0, 0), polygons=False):
sub_size = (size[0] / 4, size[1] / 4)
return [
elements.Block(
_mock_lines(size=sub_size, offset=offset, polygons=polygons),
_mock_artefacts(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1])),
),
elements.Block(
_mock_lines(
size=sub_size, offset=(offset[0] + 2 * sub_size[0], offset[1] + 2 * sub_size[1]), polygons=polygons
),
_mock_artefacts(size=sub_size, offset=(offset[0] + 3 * sub_size[0], offset[1] + 3 * sub_size[1])),
),
]
def _mock_pages(block_size=(1, 1), block_offset=(0, 0), polygons=False):
return [
elements.Page(
np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8),
_mock_blocks(block_size, block_offset, polygons),
0,
(300, 200),
{"value": 0.0, "confidence": 1.0},
{"value": "EN", "confidence": 0.8},
),
elements.Page(
np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8),
_mock_blocks(block_size, block_offset),
1,
(500, 1000),
{"value": 0.15, "confidence": 0.8},
{"value": "FR", "confidence": 0.7},
),
]
def test_element():
with pytest.raises(KeyError):
elements.Element(sub_elements=[1])
def test_word():
word_str = "hello"
conf = 0.8
geom = ((0, 0), (1, 1))
objectness_score = 0.9
crop_orientation = {"value": 0, "confidence": None}
word = elements.Word(word_str, conf, geom, objectness_score, crop_orientation)
# Attribute checks
assert word.value == word_str
assert word.confidence == conf
assert word.geometry == geom
assert word.objectness_score == objectness_score
assert word.crop_orientation == crop_orientation
# Render
assert word.render() == word_str
# Export
assert word.export() == {
"value": word_str,
"confidence": conf,
"geometry": geom,
"objectness_score": objectness_score,
"crop_orientation": crop_orientation,
}
# Repr
assert word.__repr__() == f"Word(value='hello', confidence={conf:.2})"
# Class method
state_dict = {
"value": "there",
"confidence": 0.1,
"geometry": ((0, 0), (0.5, 0.5)),
"objectness_score": objectness_score,
"crop_orientation": crop_orientation,
}
word = elements.Word.from_dict(state_dict)
assert word.export() == state_dict
def test_line():
geom = ((0, 0), (0.5, 0.5))
objectness_score = 0.9
words = _mock_words(size=geom[1], offset=geom[0])
line = elements.Line(words)
# Attribute checks
assert len(line.words) == len(words)
assert all(isinstance(w, elements.Word) for w in line.words)
assert line.geometry == geom
assert line.objectness_score == objectness_score
# Render
assert line.render() == "hello world"
# Export
assert line.export() == {
"words": [w.export() for w in words],
"geometry": geom,
"objectness_score": objectness_score,
}
# Repr
words_str = " " * 4 + ",\n ".join(repr(word) for word in words) + ","
assert line.__repr__() == f"Line(\n (words): [\n{words_str}\n ]\n)"
# Ensure that words repr does't span on several lines when there are none
assert repr(elements.Line([], ((0, 0), (1, 1)))) == "Line(\n (words): []\n)"
# from dict
state_dict = {
"words": [
{
"value": "there",
"confidence": 0.1,
"geometry": ((0, 0), (1.0, 1.0)),
"objectness_score": objectness_score,
"crop_orientation": {"value": 0, "confidence": None},
}
],
"geometry": ((0, 0), (1.0, 1.0)),
"objectness_score": objectness_score,
}
line = elements.Line.from_dict(state_dict)
assert line.export() == state_dict
def test_artefact():
artefact_type = "qr_code"
conf = 0.8
geom = ((0, 0), (1, 1))
artefact = elements.Artefact(artefact_type, conf, geom)
# Attribute checks
assert artefact.type == artefact_type
assert artefact.confidence == conf
assert artefact.geometry == geom
# Render
assert artefact.render() == "[QR_CODE]"
# Export
assert artefact.export() == {"type": artefact_type, "confidence": conf, "geometry": geom}
# Repr
assert artefact.__repr__() == f"Artefact(type='{artefact_type}', confidence={conf:.2})"
def test_block():
geom = ((0, 0), (1, 1))
sub_size = (geom[1][0] / 2, geom[1][0] / 2)
objectness_score = 0.9
lines = _mock_lines(size=sub_size, offset=geom[0])
artefacts = _mock_artefacts(size=sub_size, offset=sub_size)
block = elements.Block(lines, artefacts)
# Attribute checks
assert len(block.lines) == len(lines)
assert len(block.artefacts) == len(artefacts)
assert all(isinstance(w, elements.Line) for w in block.lines)
assert all(isinstance(a, elements.Artefact) for a in block.artefacts)
assert block.geometry == geom
# Render
assert block.render() == "hello world\nhello world"
# Export
assert block.export() == {
"lines": [line.export() for line in lines],
"artefacts": [artefact.export() for artefact in artefacts],
"geometry": geom,
"objectness_score": objectness_score,
}
def test_page():
page = np.zeros((300, 200, 3), dtype=np.uint8)
page_idx = 0
page_size = (300, 200)
orientation = {"value": 0.0, "confidence": 0.0}
language = {"value": "EN", "confidence": 0.8}
blocks = _mock_blocks()
page = elements.Page(page, blocks, page_idx, page_size, orientation, language)
# Attribute checks
assert len(page.blocks) == len(blocks)
assert all(isinstance(b, elements.Block) for b in page.blocks)
assert isinstance(page.page, np.ndarray)
assert page.page_idx == page_idx
assert page.dimensions == page_size
assert page.orientation == orientation
assert page.language == language
# Render
assert page.render() == "hello world\nhello world\n\nhello world\nhello world"
# Export
assert page.export() == {
"blocks": [b.export() for b in blocks],
"page_idx": page_idx,
"dimensions": page_size,
"orientation": orientation,
"language": language,
}
# Export XML
assert (
isinstance(page.export_as_xml(), tuple)
and isinstance(page.export_as_xml()[0], (bytes, bytearray))
and isinstance(page.export_as_xml()[1], ElementTree)
)
# Repr
assert "\n".join(repr(page).split("\n")[:2]) == f"Page(\n dimensions={page_size!r}"
# Show
page.show(block=False)
# Synthesize
img = page.synthesize()
assert isinstance(img, np.ndarray)
assert img.shape == (*page_size, 3)
def test_document():
pages = _mock_pages()
doc = elements.Document(pages)
# Attribute checks
assert len(doc.pages) == len(pages)
assert all(isinstance(p, elements.Page) for p in doc.pages)
# Render
page_export = "hello world\nhello world\n\nhello world\nhello world"
assert doc.render() == f"{page_export}\n\n\n\n{page_export}"
# Export
assert doc.export() == {"pages": [p.export() for p in pages]}
# Export XML
xml_output = doc.export_as_xml()
assert isinstance(xml_output, list) and len(xml_output) == len(pages)
# Check that the XML is well-formed in hOCR format
for xml_bytes, xml_tree in xml_output:
assert isinstance(xml_bytes, bytes)
assert isinstance(xml_tree, ElementTree)
root = xml_tree.getroot()
assert root.tag == "html"
assert root[0].tag == "head"
assert root[1].tag == "body"
assert root[1][0].tag == "div" and root[1][0].attrib["class"] == "ocr_page"
for block in root[1][0]:
assert block.tag == "div" and block.attrib["class"] == "ocr_carea"
assert block[0].tag == "p" and block[0].attrib["class"] == "ocr_par"
for line in block[0]:
assert line.tag == "span" and line.attrib["class"] == "ocr_line"
for word in line:
assert word.tag == "span" and word.attrib["class"] == "ocrx_word"
# Show
doc.show(block=False)
# Synthesize
img_list = doc.synthesize()
assert isinstance(img_list, list) and len(img_list) == len(pages)
================================================
FILE: tests/common/test_models.py
================================================
from io import BytesIO
import cv2
import numpy as np
import pytest
import requests
from onnxtr.io import reader
from onnxtr.models._utils import estimate_orientation, get_language
from onnxtr.utils import geometry
@pytest.fixture(scope="function")
def mock_image(tmpdir_factory):
url = "https://doctr-static.mindee.com/models?id=v0.2.1/bitmap30.png&src=0"
file = BytesIO(requests.get(url).content)
tmp_path = str(tmpdir_factory.mktemp("data").join("mock_bitmap.jpg"))
with open(tmp_path, "wb") as f:
f.write(file.getbuffer())
image = reader.read_img_as_numpy(tmp_path)
return image
@pytest.fixture(scope="function")
def mock_bitmap(mock_image):
bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0)
bitmap = np.expand_dims(bitmap, axis=-1)
return bitmap
def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip):
assert estimate_orientation(mock_image * 0) == 0
# test binarized image
angle = estimate_orientation(mock_bitmap)
assert abs(angle) - 30 < 1.0
angle = estimate_orientation(mock_bitmap * 255)
assert abs(angle) - 30.0 < 1.0
angle = estimate_orientation(mock_image)
assert abs(angle) - 30.0 < 1.0
rotated = geometry.rotate_image(mock_image, angle)
angle_rotated = estimate_orientation(rotated)
assert abs(angle_rotated) == 0
mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip)
assert estimate_orientation(mock_tilted_payslip) == -30
rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True)
angle_rotated = estimate_orientation(rotated)
assert abs(angle_rotated) < 1.0
with pytest.raises(AssertionError):
estimate_orientation(np.ones((10, 10, 10)))
# test with general_page_orientation
assert estimate_orientation(mock_bitmap, (90, 0.9)) in range(140, 160)
rotated = geometry.rotate_image(mock_tilted_payslip, -30)
assert estimate_orientation(rotated, (0, 0.9)) in range(-10, 10)
assert estimate_orientation(mock_image, (0, 0.9)) - 30 < 1.0
# Aspect Ratio Independence (Portrait vs Landscape)
# Pad the tilted image to be very tall (Portrait)
portrait_img = cv2.copyMakeBorder(mock_tilted_payslip, 500, 500, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0])
# Pad the tilted image to be very wide (Landscape)
landscape_img = cv2.copyMakeBorder(mock_tilted_payslip, 0, 0, 500, 500, cv2.BORDER_CONSTANT, value=[0, 0, 0])
assert abs(estimate_orientation(portrait_img) - (-30)) <= 1.0
assert abs(estimate_orientation(landscape_img) - (-30)) <= 1.0
# Perpendicular Noise Test
vertical_noise = np.zeros((1000, 1000, 3), dtype=np.uint8)
cv2.line(vertical_noise, (500, 100), (500, 900), (255, 255, 255), 10)
assert estimate_orientation(vertical_noise) == 0
def test_get_lang():
sentence = "This is a test sentence."
expected_lang = "en"
threshold_prob = 0.99
lang = get_language(sentence)
assert lang[0] == expected_lang
assert lang[1] > threshold_prob
lang = get_language("a")
assert lang[0] == "unknown"
assert lang[1] == 0.0
================================================
FILE: tests/common/test_models_builder.py
================================================
import numpy as np
import pytest
from onnxtr.io import Document
from onnxtr.models import builder
words_per_page = 10
def test_documentbuilder():
num_pages = 2
# Don't resolve lines
doc_builder = builder.DocumentBuilder(resolve_lines=False, resolve_blocks=False)
pages = [np.zeros((100, 200, 3))] * num_pages
boxes = np.random.rand(words_per_page, 6) # array format
boxes[:2] *= boxes[2:4]
objectness_scores = np.array([0.9] * words_per_page)
# Arg consistency check
with pytest.raises(ValueError):
doc_builder(
pages,
[boxes, boxes],
[objectness_scores, objectness_scores],
[("hello", 1.0)] * 3,
[(100, 200), (100, 200)],
[{"value": 0, "confidence": None}] * 3,
)
out = doc_builder(
pages,
[boxes, boxes],
[objectness_scores, objectness_scores],
[[("hello", 1.0)] * words_per_page] * num_pages,
[(100, 200), (100, 200)],
[[{"value": 0, "confidence": None}] * words_per_page] * num_pages,
)
assert isinstance(out, Document)
assert len(out.pages) == num_pages
assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all(
page.page.shape == (100, 200, 3) for page in out.pages
)
# 1 Block & 1 line per page
assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1
assert len(out.pages[0].blocks[0].lines[0].words) == words_per_page
# Resolve lines
doc_builder = builder.DocumentBuilder(resolve_lines=True, resolve_blocks=True)
out = doc_builder(
pages,
[boxes, boxes],
[objectness_scores, objectness_scores],
[[("hello", 1.0)] * words_per_page] * num_pages,
[(100, 200), (100, 200)],
[[{"value": 0, "confidence": None}] * words_per_page] * num_pages,
)
# No detection
boxes = np.zeros((0, 4))
objectness_scores = np.zeros([0])
out = doc_builder(
pages, [boxes, boxes], [objectness_scores, objectness_scores], [[], []], [(100, 200), (100, 200)], [[]]
)
assert len(out.pages[0].blocks) == 0
# Rotated boxes to export as straight boxes
boxes = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
[[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
])
objectness_scores = np.array([0.99, 0.99])
doc_builder_2 = builder.DocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True)
out = doc_builder_2(
[np.zeros((100, 100, 3))],
[boxes],
[objectness_scores],
[[("hello", 0.99), ("word", 0.99)]],
[(100, 100)],
[[{"value": 0, "confidence": None}] * 2],
)
assert out.pages[0].blocks[0].lines[0].words[-1].geometry == ((0.45, 0.5), (0.6, 0.65))
assert out.pages[0].blocks[0].lines[0].words[-1].objectness_score == 0.99
# Repr
assert (
repr(doc_builder) == "DocumentBuilder(resolve_lines=True, "
"resolve_blocks=True, paragraph_break=0.035, export_as_straight_boxes=False)"
)
@pytest.mark.parametrize(
"input_boxes, sorted_idxs",
[
[[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]], # vertical
[[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]], # diagonal
[[[0, 0.5, 0.1, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [0, 1, 2]], # same line, 2p
[[[0, 0.5, 0.1, 0.6], [0.2, 0.49, 0.35, 0.59], [0.8, 0.52, 0.9, 0.63]], [0, 1, 2]], # ~same line
[[[0, 0.3, 0.4, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]], # 2 lines
[[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]], # 2 lines
[
[
[[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
[[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
],
[0, 1],
], # rot
],
)
def test_sort_boxes(input_boxes, sorted_idxs):
doc_builder = builder.DocumentBuilder()
assert doc_builder._sort_boxes(np.asarray(input_boxes))[0].tolist() == sorted_idxs
@pytest.mark.parametrize(
"input_boxes, lines",
[
[[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]], # vertical
[[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]], # diagonal
[[[0, 0.5, 0.14, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [[0, 1], [2]]], # same line, 2p
[[[0, 0.5, 0.18, 0.6], [0.2, 0.48, 0.35, 0.58], [0.8, 0.52, 0.9, 0.63]], [[0, 1], [2]]], # ~same line
[[[0, 0.3, 0.48, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [[0, 1], [2]]], # 2 lines
[[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [[0], [1], [2]]], # 2 lines
[
[
[[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
[[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]],
],
[[0], [1]],
], # rot
],
)
def test_resolve_lines(input_boxes, lines):
doc_builder = builder.DocumentBuilder()
assert doc_builder._resolve_lines(np.asarray(input_boxes)) == lines
================================================
FILE: tests/common/test_models_classification.py
================================================
import cv2
import numpy as np
import pytest
from onnxtr.models import classification, detection
from onnxtr.models.classification.predictor import OrientationPredictor
from onnxtr.models.engine import Engine
@pytest.mark.parametrize(
"arch_name, input_shape",
[
["mobilenet_v3_small_crop_orientation", (256, 256, 3)],
["mobilenet_v3_small_page_orientation", (512, 512, 3)],
],
)
def test_classification_models(arch_name, input_shape):
batch_size = 8
model = classification.__dict__[arch_name]()
assert isinstance(model, Engine)
input_tensor = np.random.rand(batch_size, *input_shape).astype(np.float32)
out = model(input_tensor)
assert isinstance(out, np.ndarray)
assert out.shape == (8, 4)
@pytest.mark.parametrize(
"arch_name",
[
"mobilenet_v3_small_crop_orientation",
"mobilenet_v3_small_page_orientation",
],
)
def test_classification_zoo(arch_name):
if "crop" in arch_name:
batch_size = 16
input_array = np.random.rand(batch_size, 3, 256, 256).astype(np.float32)
# Model
predictor = classification.zoo.crop_orientation_predictor(arch_name)
with pytest.raises(ValueError):
predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model")
else:
batch_size = 2
input_array = np.random.rand(batch_size, 3, 512, 512).astype(np.float32)
# Model
predictor = classification.zoo.page_orientation_predictor(arch_name)
with pytest.raises(ValueError):
predictor = classification.zoo.page_orientation_predictor(arch="wrong_model")
# object check
assert isinstance(predictor, OrientationPredictor)
out = predictor(input_array)
class_idxs, classes, confs = out[0], out[1], out[2]
assert isinstance(class_idxs, list) and len(class_idxs) == batch_size
assert isinstance(classes, list) and len(classes) == batch_size
assert isinstance(confs, list) and len(confs) == batch_size
assert all(isinstance(pred, int) for pred in class_idxs)
assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes)
assert all(isinstance(pred, float) for pred in confs)
@pytest.mark.parametrize("quantized", [False, True])
def test_crop_orientation_model(mock_text_box, quantized):
text_box_0 = cv2.imread(mock_text_box)
# rotates counter-clockwise
text_box_270 = np.rot90(text_box_0, 1)
text_box_180 = np.rot90(text_box_0, 2)
text_box_90 = np.rot90(text_box_0, 3)
classifier = classification.crop_orientation_predictor(
"mobilenet_v3_small_crop_orientation", load_in_8_bit=quantized
)
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3]
# 270 degrees is equivalent to -90 degrees
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90]
assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2])
# Test custom model loading
classifier = classification.crop_orientation_predictor(
classification.mobilenet_v3_small_crop_orientation(load_in_8_bit=quantized)
)
assert isinstance(classifier, OrientationPredictor)
with pytest.raises(ValueError):
_ = classification.crop_orientation_predictor(detection.db_resnet34())
# Test with disabled predictor
classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation", disabled=True)
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [
[0, 0, 0, 0],
[0, 0, 0, 0],
[1.0, 1.0, 1.0, 1.0],
]
@pytest.mark.parametrize("quantized", [False, True])
def test_page_orientation_model(mock_payslip, quantized):
text_box_0 = cv2.imread(mock_payslip)
# rotates counter-clockwise
text_box_270 = np.rot90(text_box_0, 1)
text_box_180 = np.rot90(text_box_0, 2)
text_box_90 = np.rot90(text_box_0, 3)
classifier = classification.crop_orientation_predictor(
"mobilenet_v3_small_page_orientation", load_in_8_bit=quantized
)
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3]
# 270 degrees is equivalent to -90 degrees
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90]
assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2])
# Test custom model loading
classifier = classification.page_orientation_predictor(
classification.mobilenet_v3_small_page_orientation(load_in_8_bit=quantized)
)
assert isinstance(classifier, OrientationPredictor)
with pytest.raises(ValueError):
_ = classification.page_orientation_predictor(detection.db_resnet34())
# Test with disabled predictor
classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation", disabled=True)
assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [
[0, 0, 0, 0],
[0, 0, 0, 0],
[1.0, 1.0, 1.0, 1.0],
]
================================================
FILE: tests/common/test_models_detection.py
================================================
import numpy as np
import pytest
from onnxtr.models import detection
from onnxtr.models.detection.postprocessor.base import GeneralDetectionPostProcessor
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.engine import Engine
def test_postprocessor():
postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=True)
r_postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=False)
with pytest.raises(AssertionError):
postprocessor(np.random.rand(2, 512, 512).astype(np.float32))
mock_batch = np.random.rand(2, 512, 512, 1).astype(np.float32)
out = postprocessor(mock_batch)
r_out = r_postprocessor(mock_batch)
# Batch composition
assert isinstance(out, list)
assert len(out) == 2
assert all(isinstance(sample, list) and all(isinstance(v, np.ndarray) for v in sample) for sample in out)
assert all(all(v.shape[1] == 5 for v in sample) for sample in out)
assert all(all(v.shape[1] == 5 and v.shape[2] == 2 for v in sample) for sample in r_out)
# Relative coords
assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in out)
assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in r_out)
# Repr
assert repr(postprocessor) == "GeneralDetectionPostProcessor(bin_thresh=0.1, box_thresh=0.1)"
# Edge case when the expanded points of the polygon has two lists
issue_points = np.array(
[
[869, 561],
[923, 581],
[925, 595],
[915, 583],
[889, 583],
[905, 593],
[882, 601],
[901, 595],
[904, 604],
[876, 608],
[915, 614],
[911, 605],
[925, 601],
[930, 616],
[911, 617],
[900, 636],
[931, 637],
[904, 649],
[932, 649],
[932, 628],
[918, 627],
[934, 624],
[935, 573],
[909, 569],
[934, 562],
],
dtype=np.int32,
)
out = postprocessor.polygon_to_box(issue_points)
r_out = r_postprocessor.polygon_to_box(issue_points)
assert isinstance(out, tuple) and len(out) == 4
assert isinstance(r_out, np.ndarray) and r_out.shape == (4, 2)
@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
"arch_name, input_shape, output_size, out_prob",
[
["db_resnet34", (1024, 1024, 3), (1024, 1024, 1), True],
["db_resnet50", (1024, 1024, 3), (1024, 1024, 1), True],
["db_mobilenet_v3_large", (1024, 1024, 3), (1024, 1024, 1), True],
["linknet_resnet18", (1024, 1024, 3), (1024, 1024, 1), True],
["linknet_resnet34", (1024, 1024, 3), (1024, 1024, 1), True],
["linknet_resnet50", (1024, 1024, 3), (1024, 1024, 1), True],
["fast_tiny", (1024, 1024, 3), (1024, 1024, 1), True],
["fast_small", (1024, 1024, 3), (1024, 1024, 1), True],
["fast_base", (1024, 1024, 3), (1024, 1024, 1), True],
],
)
def test_detection_models(arch_name, input_shape, output_size, out_prob, quantized):
batch_size = 2
model = detection.__dict__[arch_name](load_in_8_bit=quantized)
assert isinstance(model, Engine)
input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
out = model(input_array, return_model_output=True)
assert isinstance(out, dict)
assert len(out) == 2
# Check proba map
assert out["out_map"].shape == (batch_size, *output_size)
assert out["out_map"].dtype == np.float32
if out_prob:
assert np.all(out["out_map"] >= 0) and np.all(out["out_map"] <= 1)
# Check boxes
for boxes_list in out["preds"]:
for boxes in boxes_list:
assert boxes.shape[1] == 5
assert np.all(boxes[:, :2] < boxes[:, 2:4])
assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1)
@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
"arch_name",
[
"db_resnet34",
"db_resnet50",
"db_mobilenet_v3_large",
"linknet_resnet18",
"linknet_resnet34",
"linknet_resnet50",
"fast_tiny",
"fast_small",
"fast_base",
],
)
def test_detection_zoo(arch_name, quantized):
# Model
predictor = detection.zoo.detection_predictor(
arch_name, load_in_8_bit=quantized, preserve_aspect_ratio=False, symmetric_pad=False
)
# object check
assert isinstance(predictor, DetectionPredictor)
input_array = np.random.rand(2, 3, 1024, 1024).astype(np.float32)
out, seq_maps = predictor(input_array, return_maps=True)
assert isinstance(out, list)
for box in out:
assert isinstance(box, np.ndarray)
assert box.shape[1] == 5
assert np.all(box[:, :2] < box[:, 2:4])
assert np.all(box[:, :4] >= 0) and np.all(box[:, :4] <= 1)
assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps)
assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps)
# check that all values in the seq_maps are between 0 and 1
assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps)
================================================
FILE: tests/common/test_models_detection_utils.py
================================================
import numpy as np
import pytest
from onnxtr.models.detection._utils import _remove_padding
@pytest.mark.parametrize("pages", [[np.zeros((1000, 1000))], [np.zeros((1000, 2000))], [np.zeros((2000, 1000))]])
@pytest.mark.parametrize("preserve_aspect_ratio", [True, False])
@pytest.mark.parametrize("symmetric_pad", [True, False])
@pytest.mark.parametrize("assume_straight_pages", [True, False])
def test_remove_padding(pages, preserve_aspect_ratio, symmetric_pad, assume_straight_pages):
h, w = pages[0].shape
# straight pages test cases
if assume_straight_pages:
loc_preds = [np.array([[0.7, 0.1, 0.7, 0.2]])]
if h == w or not preserve_aspect_ratio:
expected = loc_preds
else:
if symmetric_pad:
if h > w:
expected = [np.array([[0.9, 0.1, 0.9, 0.2]])]
else:
expected = [np.array([[0.7, 0.0, 0.7, 0.0]])]
else:
if h > w:
expected = [np.array([[1.0, 0.1, 1.0, 0.2]])]
else:
expected = [np.array([[0.7, 0.2, 0.7, 0.4]])]
# non-straight pages test cases
else:
loc_preds = [np.array([[[0.9, 0.1], [0.9, 0.2], [0.8, 0.2], [0.8, 0.2]]])]
if h == w or not preserve_aspect_ratio:
expected = loc_preds
else:
if symmetric_pad:
if h > w:
expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])]
else:
expected = [np.array([[[0.9, 0.0], [0.9, 0.0], [0.8, 0.0], [0.8, 0.0]]])]
else:
if h > w:
expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])]
else:
expected = [np.array([[[0.9, 0.2], [0.9, 0.4], [0.8, 0.4], [0.8, 0.4]]])]
result = _remove_padding(pages, loc_preds, preserve_aspect_ratio, symmetric_pad, assume_straight_pages)
for res, exp in zip(result, expected):
assert np.allclose(res, exp)
================================================
FILE: tests/common/test_models_factory.py
================================================
import json
import os
import tempfile
import pytest
from onnxtr import models
from onnxtr.models.factory import _save_model_and_config_for_hf_hub, from_hub, push_to_hf_hub
AVAILABLE_ARCHS = {
"classification": models.classification.zoo.ORIENTATION_ARCHS,
"detection": models.detection.zoo.ARCHS,
"recognition": models.recognition.zoo.ARCHS,
}
def test_push_to_hf_hub():
model = models.classification.mobilenet_v3_small_crop_orientation()
with pytest.raises(ValueError):
# run_config and/or arch must be specified
push_to_hf_hub(model, model_name="test", task="classification")
with pytest.raises(ValueError):
# task must be one of classification, detection, recognition, obj_detection
push_to_hf_hub(model, model_name="test", task="invalid_task", arch="mobilenet_v3_small")
with pytest.raises(ValueError):
# arch not in available architectures for task
push_to_hf_hub(model, model_name="test", task="detection", arch="crnn_mobilenet_v3_large")
def test_models_huggingface_hub(tmpdir):
with tempfile.TemporaryDirectory() as tmp_dir:
for task_name, archs in AVAILABLE_ARCHS.items():
for arch_name in archs:
model = models.__dict__[task_name].__dict__[arch_name]()
_save_model_and_config_for_hf_hub(model, arch=arch_name, task=task_name, save_dir=tmp_dir)
assert hasattr(model, "cfg")
assert len(os.listdir(tmp_dir)) == 2
assert os.path.exists(tmp_dir + "/model.onnx")
assert os.path.exists(tmp_dir + "/config.json")
tmp_config = json.load(open(tmp_dir + "/config.json"))
assert arch_name == tmp_config["arch"]
assert task_name == tmp_config["task"]
assert all(key in model.cfg.keys() for key in tmp_config.keys())
# test from hub
hub_model = from_hub(repo_id="Felix92/onnxtr-{}".format(arch_name).replace("_", "-"))
assert isinstance(hub_model, type(model))
================================================
FILE: tests/common/test_models_preprocessor.py
================================================
import numpy as np
import pytest
from onnxtr.models.preprocessor import PreProcessor
@pytest.mark.parametrize(
"batch_size, output_size, input_tensor, expected_batches, expected_value",
[
[2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, 0.5], # numpy uint8
[2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, 0.5], # numpy fp32
[2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, 0.5], # list of numpy uint8
[2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, 0.5], # list of numpy fp32 list of tf fp32
],
)
def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value):
processor = PreProcessor(output_size, batch_size)
# Invalid input type
with pytest.raises(TypeError):
processor(42)
# 4D check
with pytest.raises(AssertionError):
processor(np.full((256, 128, 3), 255, dtype=np.uint8))
with pytest.raises(TypeError):
processor(np.full((1, 256, 128, 3), 255, dtype=np.int32))
# 3D check
with pytest.raises(AssertionError):
processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)])
with pytest.raises(TypeError):
processor([np.full((256, 128, 3), 255, dtype=np.int32)])
out = processor(input_tensor)
assert isinstance(out, list) and len(out) == expected_batches
assert all(isinstance(b, np.ndarray) for b in out)
assert all(b.dtype == np.float32 for b in out)
assert all(b.shape[1:3] == output_size for b in out)
assert all(np.all(b == expected_value) for b in out)
assert len(repr(processor).split("\n")) == 4
================================================
FILE: tests/common/test_models_recognition.py
================================================
import numpy as np
import pytest
from onnxtr.models import recognition
from onnxtr.models.engine import Engine
from onnxtr.models.recognition.core import RecognitionPostProcessor
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.models.recognition.predictor._utils import remap_preds, split_crops
from onnxtr.utils.vocabs import VOCABS
def test_recognition_postprocessor():
mock_vocab = VOCABS["french"]
post_processor = RecognitionPostProcessor(mock_vocab)
assert post_processor.extra_repr() == f"vocab_size={len(mock_vocab)}"
assert post_processor.vocab == mock_vocab
assert post_processor._embedding == list(mock_vocab) + [""]
@pytest.mark.parametrize(
"crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops",
[
# No split required
[[np.zeros((32, 128, 3), dtype=np.uint8)], 8, 4, 0.5, True, 1],
[[np.zeros((3, 32, 128), dtype=np.uint8)], 8, 4, 0.5, False, 1],
# Split required
[[np.zeros((32, 1024, 3), dtype=np.uint8)], 8, 6, 0.5, True, 10],
[[np.zeros((3, 32, 1024), dtype=np.uint8)], 8, 6, 0.5, False, 10],
],
)
def test_split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops):
new_crops, crop_map, should_remap = split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last)
assert len(new_crops) == num_crops
assert len(crop_map) == len(crops)
assert should_remap == (len(crops) != len(new_crops))
@pytest.mark.parametrize(
"preds, crop_map, split_overlap_ratio, pred",
[
# Nothing to remap
([("hello", 0.5)], [0], 0.5, [("hello", 0.5)]),
# Merge
([("hellowo", 0.5), ("loworld", 0.6)], [(0, 2, 0.5)], 0.5, [("helloworld", 0.55)]),
],
)
def test_remap_preds(preds, crop_map, split_overlap_ratio, pred):
preds = remap_preds(preds, crop_map, split_overlap_ratio)
assert len(preds) == len(pred)
assert preds == pred
assert all(isinstance(pred, tuple) for pred in preds)
assert all(isinstance(pred[0], str) and isinstance(pred[1], float) for pred in preds)
@pytest.mark.parametrize(
"inputs, max_ratio, target_ratio, target_overlap_ratio, expected_remap_required, expected_len, expected_shape, "
"expected_crop_map, channels_last",
[
# Don't split
([np.zeros((32, 32 * 4, 3))], 4, 4, 0.5, False, 1, (32, 128, 3), 0, True),
# Split needed
([np.zeros((32, 32 * 4 + 1, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.9921875), True),
# Larger max ratio prevents split
([np.zeros((32, 32 * 8, 3))], 8, 4, 0.5, False, 1, (32, 256, 3), 0, True),
# Half-overlap, two crops
([np.zeros((32, 128 + 64, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True),
# Half-overlap, two crops, channels first
([np.zeros((3, 32, 128 + 64))], 4, 4, 0.5, True, 2, (3, 32, 128), (0, 2, 0.5), False),
# Half-overlap with small max_ratio forces split
([np.zeros((32, 128 + 64, 3))], 2, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True),
# > half last overlap ratio
([np.zeros((32, 128 + 32, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.75), True),
# 3 crops, half last overlap
([np.zeros((32, 128 + 128, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.5), True),
# 3 crops, > half last overlap
([np.zeros((32, 128 + 64 + 32, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.75), True),
# Split into larger crops
([np.zeros((32, 192 * 2, 3))], 4, 6, 0.5, True, 3, (32, 192, 3), (0, 3, 0.5), True),
# Test fallback for empty splits
([np.empty((1, 0, 3))], -1, 4, 0.5, False, 1, (1, 0, 3), (0), True),
],
)
def test_split_crops_cases(
inputs,
max_ratio,
target_ratio,
target_overlap_ratio,
expected_remap_required,
expected_len,
expected_shape,
expected_crop_map,
channels_last,
):
new_crops, crop_map, _remap_required = split_crops(
inputs,
max_ratio=max_ratio,
target_ratio=target_ratio,
split_overlap_ratio=target_overlap_ratio,
channels_last=channels_last,
)
assert _remap_required == expected_remap_required
assert len(new_crops) == expected_len
assert len(crop_map) == 1
if expected_remap_required:
assert isinstance(crop_map[0], tuple)
assert crop_map[0] == expected_crop_map
for crop in new_crops:
assert crop.shape == expected_shape
@pytest.mark.parametrize(
"split_overlap_ratio",
[
# lower bound
0.0,
# upper bound
1.0,
],
)
def test_invalid_split_overlap_ratio(split_overlap_ratio):
with pytest.raises(ValueError):
split_crops(
[np.zeros((32, 32 * 4, 3))],
max_ratio=4,
target_ratio=4,
split_overlap_ratio=split_overlap_ratio,
)
@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
"arch_name, input_shape",
[
["crnn_vgg16_bn", (32, 128, 3)],
["crnn_mobilenet_v3_small", (32, 128, 3)],
["crnn_mobilenet_v3_large", (32, 128, 3)],
["sar_resnet31", (32, 128, 3)],
["master", (32, 128, 3)],
["vitstr_small", (32, 128, 3)],
["vitstr_base", (32, 128, 3)],
["parseq", (32, 128, 3)],
["viptr_tiny", (32, 128, 3)],
],
)
def test_recognition_models(arch_name, input_shape, quantized):
mock_vocab = VOCABS["french"]
batch_size = 4
model = recognition.__dict__[arch_name](load_in_8_bit=quantized)
assert isinstance(model, Engine)
input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
out = model(input_array, return_model_output=True)
assert isinstance(out, dict)
assert len(out) == 2
assert isinstance(out["preds"], list)
assert len(out["preds"]) == batch_size
assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])
assert isinstance(out["out_map"], np.ndarray)
assert out["out_map"].shape[0] == 4
# test model post processor
post_processor = model.postprocessor
decoded = post_processor(np.random.rand(2, len(mock_vocab), 30).astype(np.float32))
assert isinstance(decoded, list)
assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded)
assert len(decoded) == 2
assert all(char in mock_vocab for word, _ in decoded for char in word)
# Testing with a fixed batch size
model = recognition.__dict__[arch_name]()
model.fixed_batch_size = 1
assert isinstance(model, Engine)
input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
out = model(input_array, return_model_output=True)
assert isinstance(out, dict)
assert len(out) == 2
assert isinstance(out["preds"], list)
assert len(out["preds"]) == batch_size
assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"])
assert isinstance(out["out_map"], np.ndarray)
assert out["out_map"].shape[0] == 4
@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
"input_shape",
[
(128, 128, 3),
(32, 1024, 3), # test case split wide crops
],
)
@pytest.mark.parametrize(
"arch_name",
[
"crnn_vgg16_bn",
"crnn_mobilenet_v3_small",
"crnn_mobilenet_v3_large",
"sar_resnet31",
"master",
"vitstr_small",
"vitstr_base",
"parseq",
"viptr_tiny",
],
)
def test_recognition_zoo(arch_name, input_shape, quantized):
batch_size = 2
# Model
predictor = recognition.zoo.recognition_predictor(arch_name, load_in_8_bit=quantized)
# object check
assert isinstance(predictor, RecognitionPredictor)
input_array = np.random.rand(batch_size, *input_shape).astype(np.float32)
out = predictor(input_array)
assert isinstance(out, list) and len(out) == batch_size
assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out)
with pytest.raises(ValueError):
_ = recognition.zoo.recognition_predictor(arch="wrong_model")
================================================
FILE: tests/common/test_models_recognition_utils.py
================================================
import pytest
from onnxtr.models.recognition.utils import merge_multi_strings, merge_strings
@pytest.mark.parametrize(
"a, b, overlap_ratio, merged",
[
# Last character of first string and first of last string will be cropped when merging - indicated by X
("abcX", "Xdef", 0.5, "abcdef"),
("abcdX", "Xdef", 0.75, "abcdef"),
("abcdeX", "Xdef", 0.9, "abcdef"),
("abcdefX", "Xdef", 0.9, "abcdef"),
# Long repetition - four of seven characters in the second string are in the estimated overlap
# X-chars will be cropped during merge, because they might be cut off during splitting of corresponding image
("abccccX", "Xcccccc", 4 / 7, "abcccccccc"),
("abc", "", 0.5, "abc"),
("", "abc", 0.5, "abc"),
("a", "b", 0.5, "ab"),
# No overlap of input strings after crop
("abcdX", "Xefghi", 0.33, "abcdefghi"),
# No overlap of input strings after crop with shorter inputs
("bcdX", "Xefgh", 0.4, "bcdefgh"),
# No overlap of input strings after crop with even shorter inputs
("cdX", "Xefg", 0.5, "cdefg"),
# Full overlap of input strings
("abcdX", "Xbcde", 1.0, "abcde"),
# One repetition within inputs
("ababX", "Xabde", 0.8, "ababde"),
# Multiple repetitions within inputs
("ababX", "Xabab", 0.8, "ababab"),
# Multiple repetitions within inputs with shorter input strings
("abaX", "Xbab", 1.0, "abab"),
# Longer multiple repetitions within inputs with half overlap
("cabababX", "Xabababc", 0.5, "cabababababc"),
# Longer multiple repetitions within inputs with full overlap
("ababaX", "Xbabab", 1.0, "ababab"),
# One different letter in overlap
("one_differon", "ferent_letter", 0.5, "one_differont_letter"),
# First string empty after crop
("-", "test", 0.9, "-test"),
# Second string empty after crop
("test", "-", 0.9, "test-"),
],
)
def test_merge_strings(a, b, overlap_ratio, merged):
assert merged == merge_strings(a, b, overlap_ratio)
@pytest.mark.parametrize(
"seq_list, overlap_ratio, last_overlap_ratio, merged",
[
# One character at each conjunction point will be cropped when merging - indicated by X
(["abcX", "Xdef"], 0.5, 0.5, "abcdef"),
(["abcdX", "XdefX", "XefghX", "Xijk"], 0.5, 0.5, "abcdefghijk"),
(["abcdX", "XdefX", "XefghiX", "Xaijk"], 0.5, 0.8, "abcdefghijk"),
(["aaaa", "aaab", "aabc"], 0.8, 0.3, "aaaabc"),
# Handle empty input
([], 0.5, 0.4, ""),
],
)
def test_merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio, merged):
assert merged == merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio)
================================================
FILE: tests/common/test_models_zoo.py
================================================
import numpy as np
import pytest
from onnxtr import models
from onnxtr.io import Document, DocumentFile
from onnxtr.models import detection, recognition
from onnxtr.models.classification import mobilenet_v3_small_crop_orientation, mobilenet_v3_small_page_orientation
from onnxtr.models.classification.zoo import crop_orientation_predictor, page_orientation_predictor
from onnxtr.models.detection.predictor import DetectionPredictor
from onnxtr.models.detection.zoo import ARCHS as DET_ARCHS
from onnxtr.models.detection.zoo import detection_predictor
from onnxtr.models.predictor import OCRPredictor
from onnxtr.models.preprocessor import PreProcessor
from onnxtr.models.recognition.predictor import RecognitionPredictor
from onnxtr.models.recognition.zoo import ARCHS as RECO_ARCHS
from onnxtr.models.recognition.zoo import recognition_predictor
from onnxtr.models.zoo import ocr_predictor
from onnxtr.utils.repr import NestedObject
# Create a dummy callback
class _DummyCallback:
def __call__(self, loc_preds):
return loc_preds
@pytest.mark.parametrize(
"assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation",
[
[True, False, False, False],
[False, False, True, True],
[True, True, False, False],
[False, True, True, True],
[True, False, True, False],
],
)
def test_ocrpredictor(
mock_pdf, assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation
):
det_bsize = 4
det_predictor = DetectionPredictor(
PreProcessor(output_size=(1024, 1024), batch_size=det_bsize),
detection.db_mobilenet_v3_large(assume_straight_pages=assume_straight_pages),
)
reco_bsize = 16
reco_predictor = RecognitionPredictor(
PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True),
recognition.crnn_vgg16_bn(),
)
doc = DocumentFile.from_pdf(mock_pdf)
predictor = OCRPredictor(
det_predictor,
reco_predictor,
assume_straight_pages=assume_straight_pages,
straighten_pages=straighten_pages,
detect_orientation=True,
detect_language=True,
resolve_lines=True,
resolve_blocks=True,
disable_page_orientation=disable_page_orientation,
disable_crop_orientation=disable_crop_orientation,
)
assert (
predictor._page_orientation_disabled if disable_page_orientation else not predictor._page_orientation_disabled
)
assert (
predictor._crop_orientation_disabled if disable_crop_orientation else not predictor._crop_orientation_disabled
)
if assume_straight_pages:
assert predictor.crop_orientation_predictor is None
if predictor.detect_orientation or predictor.straighten_pages:
assert isinstance(predictor.page_orientation_predictor, NestedObject)
else:
assert predictor.page_orientation_predictor is None
else:
assert isinstance(predictor.crop_orientation_predictor, NestedObject)
assert isinstance(predictor.page_orientation_predictor, NestedObject)
out = predictor(doc)
assert isinstance(out, Document)
assert len(out.pages) == 2
# Dimension check
with pytest.raises(ValueError):
input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
_ = predictor([input_page])
assert out.pages[0].orientation["value"] in range(-2, 3)
assert isinstance(out.pages[0].language["value"], str)
assert isinstance(out.render(), str)
assert isinstance(out.pages[0].render(), str)
assert isinstance(out.export(), dict)
assert isinstance(out.pages[0].export(), dict)
with pytest.raises(ValueError):
_ = ocr_predictor("unknown_arch")
# Test with custom orientation models
custom_crop_orientation_model = mobilenet_v3_small_crop_orientation()
custom_page_orientation_model = mobilenet_v3_small_page_orientation()
if assume_straight_pages:
if predictor.detect_orientation or predictor.straighten_pages:
# Overwrite the default orientation models
predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
else:
# Overwrite the default orientation models
predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model)
predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model)
out = predictor(doc)
orientation = 0
assert out.pages[0].orientation["value"] == orientation
def test_trained_ocr_predictor(mock_payslip):
doc = DocumentFile.from_images(mock_payslip)
det_predictor = detection_predictor(
"db_resnet50",
batch_size=2,
assume_straight_pages=True,
symmetric_pad=True,
preserve_aspect_ratio=False,
)
reco_predictor = recognition_predictor("crnn_vgg16_bn", batch_size=128)
predictor = OCRPredictor(
det_predictor,
reco_predictor,
assume_straight_pages=True,
straighten_pages=True,
preserve_aspect_ratio=False,
resolve_lines=True,
resolve_blocks=True,
)
# test hooks
predictor.add_hook(_DummyCallback())
out = predictor(doc)
assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr."
geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]])
assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05)
assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised"
geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]])
assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05)
det_predictor = detection_predictor(
"db_resnet50",
batch_size=2,
assume_straight_pages=True,
preserve_aspect_ratio=True,
symmetric_pad=True,
)
predictor = OCRPredictor(
det_predictor,
reco_predictor,
assume_straight_pages=True,
straighten_pages=True,
preserve_aspect_ratio=True,
symmetric_pad=True,
resolve_lines=True,
resolve_blocks=True,
)
out = predictor(doc)
assert "Mr" in out.pages[0].blocks[0].lines[0].words[0].value
# test list archs
archs = predictor.list_archs()
assert isinstance(archs, dict)
assert archs["recognition_archs"] == RECO_ARCHS
assert archs["detection_archs"] == DET_ARCHS
def _test_predictor(predictor):
# Output checks
assert isinstance(predictor, OCRPredictor)
doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)]
out = predictor(doc)
# Document
assert isinstance(out, Document)
# The input doc has 1 page
assert len(out.pages) == 1
# Dimension check
with pytest.raises(ValueError):
input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
_ = predictor([input_page])
@pytest.mark.parametrize("quantized", [False, True])
@pytest.mark.parametrize(
"det_arch, reco_arch",
[[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)],
)
def test_zoo_models(det_arch, reco_arch, quantized):
# Model
predictor = models.ocr_predictor(det_arch, reco_arch, load_in_8_bit=quantized)
_test_predictor(predictor)
# passing model instance directly
det_model = detection.__dict__[det_arch]()
reco_model = recognition.__dict__[reco_arch]()
predictor = models.ocr_predictor(det_model, reco_model)
_test_predictor(predictor)
# passing recognition model as detection model
with pytest.raises(ValueError):
models.ocr_predictor(det_arch=reco_model)
# passing detection model as recognition model
with pytest.raises(ValueError):
models.ocr_predictor(reco_arch=det_model)
================================================
FILE: tests/common/test_transforms.py
================================================
import numpy as np
import pytest
from onnxtr.transforms import Normalize, Resize
def test_resize():
output_size = (32, 32)
transfo = Resize(output_size)
input_t = np.ones((64, 64, 3), dtype=np.float32)
out = transfo(input_t)
assert np.all(out == 255)
assert out.shape[:2] == output_size
assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='2')"
transfo = Resize(output_size, preserve_aspect_ratio=True)
input_t = np.ones((32, 64, 3), dtype=np.float32)
out = transfo(input_t)
assert out.shape[:2] == output_size
assert not np.all(out == 255)
# Asymetric padding
assert np.all(out[-1] == 0) and np.all(out[0] == 255)
# Symetric padding
transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True)
assert repr(transfo) == (
f"Resize(output_size={output_size}, interpolation='2', preserve_aspect_ratio=True, symmetric_pad=True)"
)
out = transfo(input_t)
assert out.shape[:2] == output_size
# symetric padding
assert np.all(out[-1] == 0) and np.all(out[0] == 0)
# Inverse aspect ratio
input_t = np.ones((64, 32, 3), dtype=np.float32)
out = transfo(input_t)
assert not np.all(out == 1)
assert out.shape[:2] == output_size
# Same aspect ratio
output_size = (32, 128)
transfo = Resize(output_size, preserve_aspect_ratio=True)
out = transfo(np.ones((16, 64, 3), dtype=np.float32))
assert out.shape[:2] == output_size
@pytest.mark.parametrize(
"input_shape",
[
[8, 32, 32, 3],
[32, 32, 3],
[32, 3],
],
)
def test_normalize(input_shape):
mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]
transfo = Normalize(mean, std)
input_t = np.ones(input_shape, dtype=np.float32)
out = transfo(input_t)
assert np.all(out == 1)
assert repr(transfo) == f"Normalize(mean={mean}, std={std})"
with pytest.raises(AssertionError):
Normalize(mean="32")
with pytest.raises(AssertionError):
Normalize(std="32")
================================================
FILE: tests/common/test_utils_data.py
================================================
import os
import tempfile
from pathlib import PosixPath
from unittest.mock import patch
import pytest
from onnxtr.utils.data import _urlretrieve, download_from_url
def test__urlretrieve():
with tempfile.TemporaryDirectory() as temp_dir:
file_path = os.path.join(temp_dir, "crnn_mobilenet_v3_small-bded4d49.onnx")
_urlretrieve(
"https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx",
file_path,
)
assert os.path.exists(file_path), f"File {file_path} does not exist."
@patch("onnxtr.utils.data._urlretrieve")
@patch("pathlib.Path.mkdir")
@patch.dict(os.environ, {"HOME": "/"}, clear=True)
def test_download_from_url(mkdir_mock, urlretrieve_mock):
download_from_url("test_url")
urlretrieve_mock.assert_called_with("test_url", PosixPath("/.cache/onnxtr/test_url"))
@patch.dict(os.environ, {"ONNXTR_CACHE_DIR": "/test"}, clear=True)
@patch("onnxtr.utils.data._urlretrieve")
@patch("pathlib.Path.mkdir")
def test_download_from_url_customizing_cache_dir(mkdir_mock, urlretrieve_mock):
download_from_url("test_url")
urlretrieve_mock.assert_called_with("test_url", PosixPath("/test/test_url"))
@patch.dict(os.environ, {"HOME": "/"}, clear=True)
@patch("pathlib.Path.mkdir", side_effect=OSError)
@patch("logging.error")
def test_download_from_url_error_creating_directory(logging_mock, mkdir_mock):
with pytest.raises(OSError):
download_from_url("test_url")
logging_mock.assert_called_with(
"Failed creating cache direcotry at /.cache/onnxtr."
" You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed."
)
@patch.dict(os.environ, {"HOME": "/", "ONNXTR_CACHE_DIR": "/test"}, clear=True)
@patch("pathlib.Path.mkdir", side_effect=OSError)
@patch("logging.error")
def test_download_from_url_error_creating_directory_with_env_var(logging_mock, mkdir_mock):
with pytest.raises(OSError):
download_from_url("test_url")
logging_mock.assert_called_with(
"Failed creating cache direcotry at /test using path from 'ONNXTR_CACHE_DIR' environment variable."
)
================================================
FILE: tests/common/test_utils_fonts.py
================================================
from PIL.ImageFont import FreeTypeFont, ImageFont
from onnxtr.utils.fonts import get_font
def test_get_font():
# Attempts to load recommended OS font
font = get_font()
assert isinstance(font, (ImageFont, FreeTypeFont))
================================================
FILE: tests/common/test_utils_geometry.py
================================================
from copy import deepcopy
from math import hypot
import numpy as np
import pytest
from onnxtr.io import DocumentFile
from onnxtr.utils import geometry
def test_bbox_to_polygon():
assert geometry.bbox_to_polygon(((0, 0), (1, 1))) == ((0, 0), (1, 0), (0, 1), (1, 1))
def test_polygon_to_bbox():
assert geometry.polygon_to_bbox(((0, 0), (1, 0), (0, 1), (1, 1))) == ((0, 0), (1, 1))
def test_order_points():
# bbox format (xmin, ymin, xmax, ymax)
bbox = np.array([1, 2, 5, 6])
expected_bbox = np.array([
[1, 2], # top-left
[5, 2], # top-right
[5, 6], # bottom-right
[1, 6], # bottom-left
])
out_bbox = geometry.order_points(bbox)
assert np.all(out_bbox == expected_bbox)
# quadrangle (unordered)
quad = np.array([
[5, 6], # br
[1, 2], # tl
[1, 6], # bl
[5, 2], # tr
])
expected_quad = expected_bbox
out_quad = geometry.order_points(quad)
assert np.all(out_quad == expected_quad)
# already ordered quad
ordered_quad = expected_bbox.copy()
out_ordered = geometry.order_points(ordered_quad)
assert np.all(out_ordered == expected_bbox)
# float inputs
quad_float = quad.astype(np.float32)
out_float = geometry.order_points(quad_float)
assert out_float.dtype == quad_float.dtype
assert np.allclose(out_float, expected_quad)
with pytest.raises(ValueError):
geometry.order_points(np.array([1, 2, 3])) # wrong shape
with pytest.raises(ValueError):
geometry.order_points(np.zeros((5, 2))) # too many points
def test_detach_scores():
# box test
boxes = np.array([[0.1, 0.1, 0.2, 0.2, 0.9], [0.15, 0.15, 0.2, 0.2, 0.8]])
pred = geometry.detach_scores([boxes])
target1 = np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]])
target2 = np.array([0.9, 0.8])
assert np.all(pred[0] == target1) and np.all(pred[1] == target2)
# polygon test
boxes = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.9]],
[[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.8]],
])
pred = geometry.detach_scores([boxes])
target1 = np.array([
[[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
[[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]],
])
target2 = np.array([0.9, 0.8])
assert np.all(pred[0] == target1) and np.all(pred[1] == target2)
def test_resolve_enclosing_bbox():
assert geometry.resolve_enclosing_bbox([((0, 0.5), (1, 0)), ((0.5, 0), (1, 0.25))]) == ((0, 0), (1, 0.5))
pred = geometry.resolve_enclosing_bbox(np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]]))
assert pred.all() == np.array([0.1, 0.1, 0.2, 0.2]).all()
def test_resolve_enclosing_rbbox():
box1 = np.asarray([[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]])
box2 = np.asarray([[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]])
pred = geometry.resolve_enclosing_rbbox([box1, box2])
expected_raw = np.asarray([[0.05, 0.15], [0.1, 0.1], [0.6, 0.6], [0.55, 0.65]])
target = geometry.order_points(expected_raw)
assert np.allclose(pred, target, atol=1e-3)
def test_remap_boxes():
pred = geometry.remap_boxes(
np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 20)
)
target = np.asarray([[[0.375, 0.375], [0.375, 0.625], [0.625, 0.375], [0.625, 0.625]]])
assert np.all(pred == target)
pred = geometry.remap_boxes(
np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 10)
)
target = np.asarray([[[0.25, 0.375], [0.25, 0.625], [0.75, 0.375], [0.75, 0.625]]])
assert np.all(pred == target)
with pytest.raises(ValueError):
geometry.remap_boxes(
np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40, 150), (160, 40)
)
with pytest.raises(ValueError):
geometry.remap_boxes(np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40), (160,))
orig_dimension = (100, 100)
dest_dimensions = (200, 100)
# Unpack dimensions
height_o, width_o = orig_dimension
height_d, width_d = dest_dimensions
orig_box = np.asarray([[[0.25, 0.25], [0.25, 0.25], [0.75, 0.75], [0.75, 0.75]]])
pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions)
# Switch to absolute coords
orig = np.stack((orig_box[:, :, 0] * width_o, orig_box[:, :, 1] * height_o), axis=2)[0]
dest = np.stack((pred[:, :, 0] * width_d, pred[:, :, 1] * height_d), axis=2)[0]
len_orig = hypot(orig[0][0] - orig[2][0], orig[0][1] - orig[2][1])
len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1])
assert len_orig == len_dest
alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1]) / (orig[0][0] - orig[2][0])))
alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1]) / (dest[0][0] - dest[2][0])))
assert alpha_orig == alpha_dest
def test_rotate_boxes():
boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]])
rboxes = np.array([[0.1, 0.1], [0.8, 0.1], [0.8, 0.3], [0.1, 0.3]])
# Angle = 0
rotated = geometry.rotate_boxes(boxes, angle=0.0, orig_shape=(1, 1))
assert np.all(rotated == rboxes)
# Angle < 1:
rotated = geometry.rotate_boxes(boxes, angle=0.5, orig_shape=(1, 1))
assert np.all(rotated == rboxes)
# Angle = 30
rotated = geometry.rotate_boxes(boxes, angle=30, orig_shape=(1, 1))
assert rotated.shape == (1, 4, 2)
boxes = np.array([[0.0, 0.0, 0.6, 0.2, 0.5]])
# Angle = -90:
rotated = geometry.rotate_boxes(boxes, angle=-90, orig_shape=(1, 1), min_angle=0)
assert np.allclose(rotated, np.array([[[1, 0.0], [1, 0.6], [0.8, 0.6], [0.8, 0.0]]]))
# Angle = 90
rotated = geometry.rotate_boxes(boxes, angle=+90, orig_shape=(1, 1), min_angle=0)
assert np.allclose(rotated, np.array([[[0, 1.0], [0, 0.4], [0.2, 0.4], [0.2, 1.0]]]))
@pytest.fixture
def sample_geoms():
return np.array([
[[10, 10], [20, 10], [20, 20], [10, 20]],
[
[
30,
30,
],
[40, 30],
[40, 40],
[30, 40],
],
])
def test_rotate_abs_geoms(sample_geoms):
img_shape = (100, 100)
angle = 45.0
expanded_polys = geometry.rotate_abs_geoms(sample_geoms, angle, img_shape)
# Check if the output has the correct shape
assert expanded_polys.shape == sample_geoms.shape
def test_rotate_image():
img = np.ones((32, 64, 3), dtype=np.float32)
rotated = geometry.rotate_image(img, 30.0)
assert rotated.shape[:-1] == (32, 64)
assert rotated[0, 0, 0] == 0
assert rotated[0, :, 0].sum() > 1
# Expand
rotated = geometry.rotate_image(img, 30.0, expand=True)
assert rotated.shape[:-1] == (60, 120)
assert rotated[0, :, 0].sum() <= 1
# Expand
rotated = geometry.rotate_image(img, 30.0, expand=True, preserve_origin_shape=True)
assert rotated.shape[:-1] == (32, 64)
assert rotated[0, :, 0].sum() <= 1
# Expand with 90° rotation
rotated = geometry.rotate_image(img, 90.0, expand=True)
assert rotated.shape[:-1] == (64, 128)
assert rotated[0, :, 0].sum() <= 1
def test_remove_image_padding():
img = np.ones((32, 64, 3), dtype=np.float32)
padded = np.pad(img, ((10, 10), (20, 20), (0, 0)))
cropped = geometry.remove_image_padding(padded)
assert np.all(cropped == img)
# No padding
cropped = geometry.remove_image_padding(img)
assert np.all(cropped == img)
@pytest.mark.parametrize(
"abs_geoms, img_size, rel_geoms",
[
# Full image (boxes)
[np.array([[0, 0, 32, 32]]), (32, 32), np.array([[0, 0, 1, 1]], dtype=np.float32)],
# Full image (polygons)
[
np.array([[[0, 0], [32, 0], [32, 32], [0, 32]]]),
(32, 32),
np.array([[[0, 0], [1, 0], [1, 1], [0, 1]]], dtype=np.float32),
],
# Quarter image (boxes)
[np.array([[0, 0, 16, 16]]), (32, 32), np.array([[0, 0, 0.5, 0.5]], dtype=np.float32)],
# Quarter image (polygons)
[
np.array([[[0, 0], [16, 0], [16, 16], [0, 16]]]),
(32, 32),
np.array([[[0, 0], [0.5, 0], [0.5, 0.5], [0, 0.5]]], dtype=np.float32),
],
],
)
def test_convert_to_relative_coords(abs_geoms, img_size, rel_geoms):
assert np.all(geometry.convert_to_relative_coords(abs_geoms, img_size) == rel_geoms)
# Wrong format
with pytest.raises(ValueError):
geometry.convert_to_relative_coords(np.zeros((3, 5)), (32, 32))
def test_estimate_page_angle():
straight_polys = np.array([
[[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4]],
[[0.4, 0.4], [0.5, 0.4], [0.5, 0.5], [0.4, 0.5]],
[[0.5, 0.5], [0.6, 0.5], [0.6, 0.6], [0.5, 0.6]],
])
rotated_polys = geometry.rotate_boxes(straight_polys, angle=20, orig_shape=(512, 512))
angle = geometry.estimate_page_angle(rotated_polys)
assert np.isclose(angle, 20)
# Test divide by zero / NaN
invalid_poly = np.array([[[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]])
angle = geometry.estimate_page_angle(invalid_poly)
assert angle == 0.0
def test_extract_crops(mock_pdf):
doc_img = DocumentFile.from_pdf(mock_pdf)[0]
num_crops = 2
rel_boxes = np.array(
[[idx / num_crops, idx / num_crops, (idx + 1) / num_crops, (idx + 1) / num_crops] for idx in range(num_crops)],
dtype=np.float32,
)
abs_boxes = np.array(
[
[
int(idx * doc_img.shape[1] / num_crops),
int(idx * doc_img.shape[0]) / num_crops,
int((idx + 1) * doc_img.shape[1] / num_crops),
int((idx + 1) * doc_img.shape[0] / num_crops),
]
for idx in range(num_crops)
],
dtype=np.float32,
)
with pytest.raises(AssertionError):
geometry.extract_crops(doc_img, np.zeros((1, 5)))
for boxes in (rel_boxes, abs_boxes):
croped_imgs = geometry.extract_crops(doc_img, boxes)
# Number of crops
assert len(croped_imgs) == num_crops
# Data type and shape
assert all(isinstance(crop, np.ndarray) for crop in croped_imgs)
assert all(crop.ndim == 3 for crop in croped_imgs)
# Identity
assert np.all(
doc_img == geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=True)[0]
)
torch_img = np.transpose(doc_img, axes=(-1, 0, 1))
assert np.all(
torch_img
== np.transpose(
geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=False)[0],
axes=(-1, 0, 1),
)
)
# No box
assert geometry.extract_crops(doc_img, np.zeros((0, 4))) == []
@pytest.mark.parametrize("assume_horizontal", [True, False])
def test_extract_rcrops(mock_pdf, assume_horizontal):
doc_img = DocumentFile.from_pdf(mock_pdf)[0]
num_crops = 2
rel_boxes = np.array(
[
[
[idx / num_crops, idx / num_crops],
[idx / num_crops + 0.1, idx / num_crops],
[idx / num_crops + 0.1, idx / num_crops + 0.1],
[idx / num_crops, idx / num_crops],
]
for idx in range(num_crops)
],
dtype=np.float32,
)
abs_boxes = deepcopy(rel_boxes)
abs_boxes[:, :, 0] *= doc_img.shape[1]
abs_boxes[:, :, 1] *= doc_img.shape[0]
abs_boxes = abs_boxes.astype(np.int64)
with pytest.raises(AssertionError):
geometry.extract_rcrops(doc_img, np.zeros((1, 8)), assume_horizontal=assume_horizontal)
for boxes in (rel_boxes, abs_boxes):
croped_imgs = geometry.extract_rcrops(doc_img, boxes, assume_horizontal=assume_horizontal)
# Number of crops
assert len(croped_imgs) == num_crops
# Data type and shape
assert all(isinstance(crop, np.ndarray) for crop in croped_imgs)
assert all(crop.ndim == 3 for crop in croped_imgs)
# No box
assert geometry.extract_rcrops(doc_img, np.zeros((0, 4, 2)), assume_horizontal=assume_horizontal) == []
@pytest.mark.parametrize(
"format,input_shape,expected_shape",
[
("BCHW", (32, 3, 64, 64), (32, 3, 64, 64)),
("BCHW", (32, 64, 64, 3), (32, 3, 64, 64)),
("BHWC", (32, 64, 64, 3), (32, 64, 64, 3)),
("BHWC", (32, 3, 64, 64), (32, 64, 64, 3)),
("XYZ", (32, 3, 64, 64), (32, 3, 64, 64)),
("CHW", (3, 64, 64), (3, 64, 64)),
("CHW", (64, 64, 3), (3, 64, 64)),
("HWC", (64, 64, 3), (64, 64, 3)),
("HWC", (3, 64, 64), (64, 64, 3)),
],
)
def test_shape_translate(format, input_shape, expected_shape):
sample_data = np.random.rand(*input_shape).astype(np.float32)
output_data = geometry.shape_translate(sample_data, format)
# Assert that the output data has the expected shape
assert output_data.shape == expected_shape
================================================
FILE: tests/common/test_utils_multithreading.py
================================================
import os
from multiprocessing.pool import ThreadPool
from unittest.mock import patch
import pytest
from onnxtr.utils.multithreading import multithread_exec
@pytest.mark.parametrize(
"input_seq, func, output_seq",
[
[[1, 2, 3], lambda x: 2 * x, [2, 4, 6]],
[[1, 2, 3], lambda x: x**2, [1, 4, 9]],
[
["this is", "show me", "I know"],
lambda x: x + " the way",
["this is the way", "show me the way", "I know the way"],
],
],
)
def test_multithread_exec(input_seq, func, output_seq):
assert list(multithread_exec(func, input_seq)) == output_seq
assert list(multithread_exec(func, input_seq, 0)) == output_seq
@patch.dict(os.environ, {"ONNXTR_MULTIPROCESSING_DISABLE": "TRUE"}, clear=True)
def test_multithread_exec_multiprocessing_disable():
with patch.object(ThreadPool, "map") as mock_tp_map:
multithread_exec(lambda x: x, [1, 2])
assert not mock_tp_map.called
================================================
FILE: tests/common/test_utils_reconstitution.py
================================================
import numpy as np
from test_io_elements import _mock_pages
from onnxtr.utils import reconstitution
def test_synthesize_page():
pages = _mock_pages()
# Test without probability rendering
render_no_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=False)
assert isinstance(render_no_proba, np.ndarray)
assert render_no_proba.shape == (*pages[0].dimensions, 3)
# Test with probability rendering
render_with_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=True)
assert isinstance(render_with_proba, np.ndarray)
assert render_with_proba.shape == (*pages[0].dimensions, 3)
# Test with only one line
pages_one_line = pages[0].export()
pages_one_line["blocks"][0]["lines"] = [pages_one_line["blocks"][0]["lines"][0]]
render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True)
assert isinstance(render_one_line, np.ndarray)
assert render_one_line.shape == (*pages[0].dimensions, 3)
# Test with polygons
pages_poly = pages[0].export()
pages_poly["blocks"][0]["lines"][0]["geometry"] = [(0, 0), (0, 1), (1, 1), (1, 0)]
render_poly = reconstitution.synthesize_page(pages_poly, draw_proba=True)
assert isinstance(render_poly, np.ndarray)
assert render_poly.shape == (*pages[0].dimensions, 3)
================================================
FILE: tests/common/test_utils_visualization.py
================================================
import numpy as np
import pytest
from test_io_elements import _mock_pages
from onnxtr.utils import visualization
def test_visualize_page():
pages = _mock_pages()
image = np.ones((300, 200, 3))
visualization.visualize_page(pages[0].export(), image, words_only=False)
visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False)
visualization.visualize_page(
pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True
)
# geometry checks
with pytest.raises(ValueError):
visualization.create_obj_patch([1, 2], (100, 100))
with pytest.raises(ValueError):
visualization.create_obj_patch((1, 2), (100, 100))
with pytest.raises(ValueError):
visualization.create_obj_patch((1, 2, 3, 4, 5), (100, 100))
# polygon patch
pages = _mock_pages(polygons=True)
image = np.ones((300, 200, 3))
visualization.visualize_page(pages[0].export(), image, words_only=False)
visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False)
visualization.visualize_page(
pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True
)
def test_draw_boxes():
image = np.ones((256, 256, 3), dtype=np.float32)
boxes = [
[0.1, 0.1, 0.2, 0.2],
[0.15, 0.15, 0.19, 0.2], # to suppress
[0.5, 0.5, 0.6, 0.55],
[0.55, 0.5, 0.7, 0.55], # to suppress
]
visualization.draw_boxes(boxes=np.array(boxes), image=image, block=False)
================================================
FILE: tests/common/test_utils_vocabs.py
================================================
from collections import Counter
from onnxtr.utils import VOCABS
def test_vocabs_duplicates():
for key, vocab in VOCABS.items():
assert isinstance(vocab, str)
duplicates = [char for char, count in Counter(vocab).items() if count > 1]
assert not duplicates, f"Duplicate characters in {key} vocab: {duplicates}"
================================================
FILE: tests/conftest.py
================================================
from io import BytesIO
import cv2
import pytest
import requests
from PIL import Image, ImageDraw
from onnxtr.io import reader
from onnxtr.utils import geometry
from onnxtr.utils.fonts import get_font
def synthesize_text_img(
text: str,
font_size: int = 32,
font_family=None,
background_color=None,
text_color=None,
) -> Image.Image:
background_color = (0, 0, 0) if background_color is None else background_color
text_color = (255, 255, 255) if text_color is None else text_color
font = get_font(font_family, font_size)
left, top, right, bottom = font.getbbox(text)
text_w, text_h = right - left, bottom - top
h, w = int(round(1.3 * text_h)), int(round(1.1 * text_w))
# If single letter, make the image square, otherwise expand to meet the text size
img_size = (h, w) if len(text) > 1 else (max(h, w), max(h, w))
img = Image.new("RGB", img_size[::-1], color=background_color)
d = ImageDraw.Draw(img)
# Offset so that the text is centered
text_pos = (int(round((img_size[1] - text_w) / 2)), int(round((img_size[0] - text_h) / 2)))
# Draw the text
d.text(text_pos, text, font=font, fill=text_color)
return img
@pytest.fixture(scope="session")
def mock_vocab():
return "3K}7eé;5àÎYho]QwV6qU~W\"XnbBvcADfËmy.9ÔpÛ*{CôïE%M4#ÈR:g@T$x?0î£|za1ù8,OG€P-kçHëÀÂ2É/ûIJ'j(LNÙFut[)èZs+&°Sd=Ï!<â_Ç>rêi`l" # noqa
@pytest.fixture(scope="session")
def mock_pdf(tmpdir_factory):
# Page 1
text_img = synthesize_text_img("I am a jedi!", background_color=(255, 255, 255), text_color=(0, 0, 0))
page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255))
page.paste(text_img, (50, 100))
# Page 2
text_img = synthesize_text_img("No, I am your father.", background_color=(255, 255, 255), text_color=(0, 0, 0))
_page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255))
_page.paste(text_img, (40, 300))
# Save the PDF
fn = tmpdir_factory.mktemp("data").join("mock_pdf_file.pdf")
page.save(str(fn), "PDF", save_all=True, append_images=[_page])
return str(fn)
@pytest.fixture(scope="session")
def mock_payslip(tmpdir_factory):
url = "https://3.bp.blogspot.com/-Es0oHTCrVEk/UnYA-iW9rYI/AAAAAAAAAFI/hWExrXFbo9U/s1600/003.jpg"
file = BytesIO(requests.get(url).content)
folder = tmpdir_factory.mktemp("data")
fn = str(folder.join("mock_payslip.jpeg"))
with open(fn, "wb") as f:
f.write(file.getbuffer())
return fn
@pytest.fixture(scope="session")
def mock_tilted_payslip(mock_payslip, tmpdir_factory):
image = reader.read_img_as_numpy(mock_payslip)
image = geometry.rotate_image(image, 30, expand=True)
tmp_path = str(tmpdir_factory.mktemp("data").join("mock_tilted_payslip.jpg"))
cv2.imwrite(tmp_path, image)
return tmp_path
@pytest.fixture(scope="session")
def mock_text_box_stream():
url = "https://doctr-static.mindee.com/models?id=v0.5.1/word-crop.png&src=0"
return requests.get(url).content
@pytest.fixture(scope="session")
def mock_text_box(mock_text_box_stream, tmpdir_factory):
file = BytesIO(mock_text_box_stream)
fn = tmpdir_factory.mktemp("data").join("mock_text_box_file.png")
with open(fn, "wb") as f:
f.write(file.getbuffer())
return str(fn)
@pytest.fixture(scope="session")
def mock_artefact_image_stream():
url = "https://github.com/mindee/doctr/releases/download/v0.8.1/artefact_dummy.jpg"
return requests.get(url).content