Repository: felixdittrich92/OnnxTR Branch: main Commit: b10318c76097 Files: 126 Total size: 480.5 KB Directory structure: gitextract_7yglu2_f/ ├── .conda/ │ └── meta.yaml ├── .github/ │ ├── CODEOWNERS │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ └── feature_request.yml │ ├── dependabot.yml │ ├── release.yml │ └── workflows/ │ ├── builds.yml │ ├── clear_caches.yml │ ├── demo.yml │ ├── docker.yml │ ├── main.yml │ ├── publish.yml │ └── style.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── demo/ │ ├── README.md │ ├── app.py │ ├── packages.txt │ └── requirements.txt ├── onnxtr/ │ ├── __init__.py │ ├── contrib/ │ │ ├── __init__.py │ │ ├── artefacts.py │ │ └── base.py │ ├── file_utils.py │ ├── io/ │ │ ├── __init__.py │ │ ├── elements.py │ │ ├── html.py │ │ ├── image.py │ │ ├── pdf.py │ │ └── reader.py │ ├── models/ │ │ ├── __init__.py │ │ ├── _utils.py │ │ ├── builder.py │ │ ├── classification/ │ │ │ ├── __init__.py │ │ │ ├── models/ │ │ │ │ ├── __init__.py │ │ │ │ └── mobilenet.py │ │ │ ├── predictor/ │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ │ └── zoo.py │ │ ├── detection/ │ │ │ ├── __init__.py │ │ │ ├── _utils/ │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ │ ├── core.py │ │ │ ├── models/ │ │ │ │ ├── __init__.py │ │ │ │ ├── differentiable_binarization.py │ │ │ │ ├── fast.py │ │ │ │ └── linknet.py │ │ │ ├── postprocessor/ │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ │ ├── predictor/ │ │ │ │ ├── __init__.py │ │ │ │ └── base.py │ │ │ └── zoo.py │ │ ├── engine.py │ │ ├── factory/ │ │ │ ├── __init__.py │ │ │ └── hub.py │ │ ├── predictor/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── predictor.py │ │ ├── preprocessor/ │ │ │ ├── __init__.py │ │ │ └── base.py │ │ ├── recognition/ │ │ │ ├── __init__.py │ │ │ ├── core.py │ │ │ ├── models/ │ │ │ │ ├── __init__.py │ │ │ │ ├── crnn.py │ │ │ │ ├── master.py │ │ │ │ ├── parseq.py │ │ │ │ ├── sar.py │ │ │ │ ├── viptr.py │ │ │ │ └── vitstr.py │ │ │ ├── predictor/ │ │ │ │ ├── __init__.py │ │ │ │ ├── _utils.py │ │ │ │ └── base.py │ │ │ ├── utils.py │ │ │ └── zoo.py │ │ └── zoo.py │ ├── py.typed │ ├── transforms/ │ │ ├── __init__.py │ │ └── base.py │ └── utils/ │ ├── __init__.py │ ├── common_types.py │ ├── data.py │ ├── fonts.py │ ├── geometry.py │ ├── multithreading.py │ ├── reconstitution.py │ ├── repr.py │ ├── visualization.py │ └── vocabs.py ├── pyproject.toml ├── scripts/ │ ├── convert_to_float16.py │ ├── evaluate.py │ ├── latency.py │ └── quantize.py ├── setup.py └── tests/ ├── common/ │ ├── test_contrib.py │ ├── test_core.py │ ├── test_engine_cfg.py │ ├── test_headers.py │ ├── test_io.py │ ├── test_io_elements.py │ ├── test_models.py │ ├── test_models_builder.py │ ├── test_models_classification.py │ ├── test_models_detection.py │ ├── test_models_detection_utils.py │ ├── test_models_factory.py │ ├── test_models_preprocessor.py │ ├── test_models_recognition.py │ ├── test_models_recognition_utils.py │ ├── test_models_zoo.py │ ├── test_transforms.py │ ├── test_utils_data.py │ ├── test_utils_fonts.py │ ├── test_utils_geometry.py │ ├── test_utils_multithreading.py │ ├── test_utils_reconstitution.py │ ├── test_utils_visualization.py │ └── test_utils_vocabs.py └── conftest.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .conda/meta.yaml ================================================ {% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %} {% set project = pyproject.get('project') %} {% set urls = pyproject.get('project', {}).get('urls') %} {% set version = environ.get('BUILD_VERSION', '0.8.2a0') %} package: name: onnxtr version: {{ version }} source: fn: onnxtr-{{ version }}.tar.gz url: ../dist/onnxtr-{{ version }}.tar.gz build: script: python setup.py install --single-version-externally-managed --record=record.txt requirements: host: - python>=3.10, <3.12 - setuptools run: - numpy >=1.16.0, <3.0.0 - scipy >=1.4.0, <2.0.0 - pillow >=9.2.0 - opencv >=4.5.0, <5.0.0 - pypdfium2-team::pypdfium2_helpers >=4.11.0, <5.0.0 - pyclipper >=1.2.0, <2.0.0 - langdetect >=1.0.9, <2.0.0 - rapidfuzz >=3.0.0, <4.0.0 - huggingface_hub >=0.20.0, <1.0.0 - defusedxml >=0.7.0 - anyascii >=0.3.2 - tqdm >=4.30.0 test: requires: - pip - onnxruntime imports: - onnxtr about: home: {{ urls.get('repository') }} license: Apache-2.0 license_file: {{ project.get('license', {}).get('file') }} summary: {{ project.get('description') | replace(":", " -")}} dev_url: {{ urls.get('repository') }} ================================================ FILE: .github/CODEOWNERS ================================================ * @felixdittrich92 ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: felixdittrich92 patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry polar: # Replace with a single Polar username buy_me_a_coffee: # Replace with a single Buy Me a Coffee username thanks_dev: # Replace with a single thanks.dev username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: 🐛 Bug report description: Create a report to help us improve the library labels: 'type: bug' body: - type: markdown attributes: value: > #### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/felixdittrich92/onnxtr/issues). - type: textarea attributes: label: Bug description description: | A clear and concise description of what the bug is. Please explain the result you observed and the behavior you were expecting. placeholder: | A clear and concise description of what the bug is. validations: required: true - type: textarea attributes: label: Code snippet to reproduce the bug description: | Sample code to reproduce the problem. Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability. placeholder: | ```python Sample code to reproduce the problem ``` validations: required: true - type: textarea attributes: label: Error traceback description: | The error message you received running the code snippet, with the full traceback. Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability. placeholder: | ``` The error message you got, with the full traceback. ``` validations: required: true - type: textarea attributes: label: Environment description: | Please describe your environment: OS: Python version: Library version: Onnxruntime version: validations: required: true - type: markdown attributes: value: > Thanks for helping us improve the library! ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: true contact_links: - name: Usage questions url: https://github.com/felixdittrich92/OnnxTR/discussions about: Ask questions and discuss with other OnnxTR community members ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: 🚀 Feature request description: > Submit a proposal/request for a new feature for OnnxTR. Please search for existing issues before creating a new one. For non-onnx related features please use the [main repository](https://github.com/mindee/doctr/issues). labels: 'type: enhancement' body: - type: textarea attributes: label: 🚀 The feature description: > A clear and concise description of the feature proposal validations: required: true - type: textarea attributes: label: Additional context description: > Add any other context or screenshots about the feature request. - type: markdown attributes: value: > Thanks for contributing 🎉 ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "pip" directory: "/" open-pull-requests-limit: 10 target-branch: "main" labels: ["topic: build"] schedule: interval: weekly day: sunday - package-ecosystem: "github-actions" directory: "/" open-pull-requests-limit: 10 target-branch: "main" labels: ["topic: CI/CD"] schedule: interval: weekly day: sunday groups: github-actions: patterns: - "*" ================================================ FILE: .github/release.yml ================================================ changelog: exclude: labels: - ignore-for-release categories: - title: Breaking Changes 🛠 labels: - "type: breaking change" # NEW FEATURES - title: New Features labels: - "type: new feature" # BUG FIXES - title: Bug Fixes labels: - "type: bug" # IMPROVEMENTS - title: Improvements labels: - "type: enhancement" # MISC - title: Miscellaneous labels: - "type: misc" ================================================ FILE: .github/workflows/builds.yml ================================================ name: builds on: push: branches: main pull_request: branches: main schedule: # Runs every 2 weeks on Monday at 03:00 UTC - cron: '0 3 * * 1' jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] python: ["3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865 python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }} architecture: x64 - name: Cache python modules uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - name: Install package run: | python -m pip install --upgrade pip pip install -e .[cpu-headless,viz] --upgrade - name: Import package run: python -c "import onnxtr; print(onnxtr.__version__)" conda: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: conda-incubator/setup-miniconda@v4 with: auto-update-conda: true python-version: "3.10" channels: pypdfium2-team,bblanchon,defaults,conda-forge channel-priority: strict - name: Install dependencies shell: bash -el {0} run: conda install -y conda-build conda-verify anaconda-client - name: Install libEGL run: sudo apt-get update && sudo apt-get install -y libegl1 - name: Build and verify shell: bash -el {0} run: | python setup.py sdist mkdir conda-dist conda build .conda/ --output-folder conda-dist conda-verify conda-dist/linux-64/*conda --ignore=C1115 ================================================ FILE: .github/workflows/clear_caches.yml ================================================ name: Clear GitHub runner caches on: workflow_dispatch: schedule: - cron: '0 0 * * *' # Runs once a day jobs: clear: name: Clear caches runs-on: ubuntu-latest steps: - uses: MyAlbum/purge-cache@v2 with: max-age: 172800 # Caches older than 2 days are deleted ================================================ FILE: .github/workflows/demo.yml ================================================ name: Sync Hugging Face demo on: # Run 'test-demo' on every pull request to the main branch pull_request: branches: [main] # Run 'sync-to-hub' on push when tagging (e.g., 'v*') and on a scheduled cron job push: tags: - 'v*' schedule: - cron: '0 2 10 * *' # At 02:00 on day-of-month 10 (every month) # Allow manual triggering of the workflow workflow_dispatch: jobs: # This job runs on every pull request to main test-demo: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] python: ["3.10"] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Cache python modules uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('demo/requirements.txt') }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -r demo/requirements.txt --upgrade - name: Start Gradio demo run: | nohup python demo/app.py & sleep 10 # Allow some time for the Gradio server to start - name: Check demo build run: | curl --fail http://127.0.0.1:7860/ || exit 1 # This job only runs when a new version tag is pushed or during the cron job sync-to-hub: if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' needs: test-demo runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.10" - name: Install huggingface_hub run: pip install huggingface-hub - name: Upload folder to Hugging Face env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | python -c " from huggingface_hub import HfApi api = HfApi(token='${{ secrets.HF_TOKEN }}') repo_id = 'Felix92/OnnxTR-OCR' api.upload_folder(repo_id=repo_id, repo_type='space', folder_path='demo/') api.restart_space(repo_id=repo_id, factory_reboot=True) " ================================================ FILE: .github/workflows/docker.yml ================================================ # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages # name: Docker image on ghcr.io on: push: tags: - 'v*' pull_request: branches: main schedule: - cron: '0 2 1 6 *' # At 02:00 on day-of-month 1 in June (once a year actually) env: REGISTRY: ghcr.io jobs: build-and-push-image: runs-on: ubuntu-latest strategy: fail-fast: false matrix: image: - "ubuntu:24.04" # Base image for CPU variants - "nvidia/cuda:12.6.2-base-ubuntu24.04" # Base image for GPU variant: - "cpu-headless" # CPU variant 1 - "openvino-headless" # CPU variant 2 - "gpu-headless" # GPU variant python: [3.10.13] # Exclude invalid combinations exclude: - image: "nvidia/cuda:12.6.2-base-ubuntu24.04" variant: "cpu-headless" - image: "nvidia/cuda:12.6.2-base-ubuntu24.04" variant: "openvino-headless" - image: "ubuntu:24.04" variant: "gpu-headless" permissions: contents: read packages: write steps: - name: Checkout repository uses: actions/checkout@v6 - name: Log in to the Container registry uses: docker/login-action@v4 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Sanitize docker tag run: | # Start with the base prefix PREFIX_DOCKER_TAG="OnnxTR-${{ matrix.variant }}-py${{ matrix.python }}" # Replace any commas with hyphens (if needed) PREFIX_DOCKER_TAG=$(echo "$PREFIX_DOCKER_TAG" | sed 's/,/-/g') # Determine suffix based on image IMAGE="${{ matrix.image }}" case "$IMAGE" in "nvidia/cuda:"*) SUFFIX=$(echo "$IMAGE" | sed -E 's|.*/cuda:([0-9]+\.[0-9]+\.[0-9]+)-base-(ubuntu[0-9]+\.[0-9]+)|-\2-cuda\1|') ;; "ubuntu:"*) SUFFIX=$(echo "$IMAGE" | sed -E 's|ubuntu:([0-9]+\.[0-9]+)|-ubuntu\1|') ;; *) SUFFIX="" ;; esac # Combine the prefix, suffix, and ensure ending hyphen PREFIX_DOCKER_TAG="${PREFIX_DOCKER_TAG}${SUFFIX}-" # Export to environment echo "PREFIX_DOCKER_TAG=${PREFIX_DOCKER_TAG}" >> $GITHUB_ENV # Debugging output echo "Final Docker Tag: $PREFIX_DOCKER_TAG" - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v6 with: images: ${{ env.REGISTRY }}/${{ github.repository }} tags: | # used only on schedule event type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ env.PREFIX_DOCKER_TAG }} # used only if a tag following semver is published type=semver,pattern={{raw}},prefix=${{ env.PREFIX_DOCKER_TAG }} - name: Build Docker image id: build uses: docker/build-push-action@v7 with: context: . build-args: | BASE_IMAGE=${{ matrix.image }} SYSTEM=${{ matrix.variant }} PYTHON_VERSION=${{ matrix.python }} ONNXTR_REPO=${{ github.repository }} ONNXTR_VERSION=${{ github.sha }} push: false # push only if `import onnxtr` works tags: ${{ steps.meta.outputs.tags }} - name: Check if `import onnxtr` works run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import onnxtr; print(onnxtr.__version__)' - name: Push Docker image if: ${{ (github.ref == 'refs/heads/main' && github.event_name != 'pull_request') || (startsWith(github.ref, 'refs/tags') && github.event_name == 'push') }} uses: docker/build-push-action@v7 with: context: . build-args: | BASE_IMAGE=${{ matrix.image }} SYSTEM=${{ matrix.variant }} PYTHON_VERSION=${{ matrix.python }} ONNXTR_REPO=${{ github.repository }} ONNXTR_VERSION=${{ github.sha }} push: true tags: ${{ steps.meta.outputs.tags }} ================================================ FILE: .github/workflows/main.yml ================================================ name: tests on: push: branches: main pull_request: branches: main schedule: # Runs every 2 weeks on Monday at 03:00 UTC - cron: '0 3 * * 1' jobs: pytest-common: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: ["3.10", "3.11", "3.12"] backend: ["cpu-headless", "openvino-headless"] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Cache python modules uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests - name: Install dependencies run: | python -m pip install --upgrade pip pip install -e .[${{ matrix.backend }},viz,html,testing] --upgrade - name: Run unittests run: | coverage run -m pytest tests/common/ -rs --memray coverage xml -o coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml - uses: actions/upload-artifact@v7 with: name: coverage-common-${{ matrix.backend }}-${{ matrix.python }} path: ./coverage-common-${{ matrix.backend }}-${{ matrix.python }}.xml if-no-files-found: error codecov-upload: runs-on: ubuntu-latest needs: [ pytest-common ] steps: - uses: actions/checkout@v6 - uses: actions/download-artifact@v8 - name: Upload coverage to Codecov uses: codecov/codecov-action@v6 with: flags: unittests fail_ci_if_error: true token: ${{ secrets.CODECOV_TOKEN }} ================================================ FILE: .github/workflows/publish.yml ================================================ name: publish on: release: types: [published] jobs: pypi: if: "!github.event.release.prerelease" strategy: fail-fast: false matrix: os: [ubuntu-latest] python: ["3.10"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Cache python modules uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine --upgrade - name: Get release tag id: release_tag run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} VERSION: ${{ env.VERSION }} run: | BUILD_VERSION=$VERSION python setup.py sdist bdist_wheel twine check dist/* twine upload dist/* pypi-check: needs: pypi if: "!github.event.release.prerelease" strategy: fail-fast: false matrix: os: [ubuntu-latest] python: ["3.10"] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Install package run: | python -m pip install --upgrade pip pip install onnxtr[cpu] --upgrade python -c "from importlib.metadata import version; print(version('onnxtr'))" conda: if: "!github.event.release.prerelease" runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: conda-incubator/setup-miniconda@v4 with: auto-update-conda: true python-version: "3.10" channels: pypdfium2-team,bblanchon,defaults,conda-forge channel-priority: strict - name: Install dependencies shell: bash -el {0} run: conda install -y conda-build conda-verify anaconda-client - name: Install libEGL run: sudo apt-get update && sudo apt-get install -y libegl1 - name: Get release tag id: release_tag run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV - name: Build and publish shell: bash -el {0} env: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_TOKEN }} VERSION: ${{ env.VERSION }} run: | echo "BUILD_VERSION=${VERSION}" >> $GITHUB_ENV python setup.py sdist mkdir conda-dist conda build .conda/ --output-folder conda-dist conda-verify conda-dist/linux-64/*conda --ignore=C1115 anaconda upload conda-dist/linux-64/*conda conda-check: if: "!github.event.release.prerelease" runs-on: ubuntu-latest needs: conda steps: - uses: conda-incubator/setup-miniconda@v4 with: auto-update-conda: true python-version: "3.10" - name: Install package shell: bash -el {0} run: | conda config --set channel_priority strict conda install -c conda-forge onnxruntime conda install -c felix92 -c pypdfium2-team -c bblanchon -c defaults -c conda-forge onnxtr python -c "from importlib.metadata import version; print(version('onnxtr'))" ================================================ FILE: .github/workflows/style.yml ================================================ name: style on: push: branches: main pull_request: branches: main jobs: ruff: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: ["3.10"] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Run ruff run: | pip install ruff --upgrade ruff --version ruff check --diff . mypy: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: ["3.10"] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: ${{ matrix.python }} architecture: x64 - name: Cache python modules uses: actions/cache@v5 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - name: Install dependencies run: | python -m pip install --upgrade pip pip install -e .[dev] --upgrade pip install mypy --upgrade - name: Run mypy run: | mypy --version mypy ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Temp files onnxtr/version.py logs/ wandb/ .idea/ # Model files *.onnx .qodo # Profile files yappi_profile.stats memray_profile.bin memray_flamegraph.html ================================================ FILE: .pre-commit-config.yaml ================================================ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: check-ast - id: check-yaml exclude: .conda - id: check-toml - id: check-json - id: check-added-large-files exclude: docs/images/ - id: end-of-file-fixer - id: trailing-whitespace - id: debug-statements - id: check-merge-conflict - id: no-commit-to-branch args: ['--branch', 'main'] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.0 hooks: - id: ruff args: [ --fix ] - id: ruff-format ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at contact@mindee.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. ================================================ FILE: Dockerfile ================================================ ARG BASE_IMAGE FROM ${BASE_IMAGE} ENV DEBIAN_FRONTEND=noninteractive ENV LANG=C.UTF-8 ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 ARG SYSTEM ARG PYTHON_VERSION RUN apt-get update && apt-get install -y --no-install-recommends \ # - Other packages build-essential \ pkg-config \ curl \ wget \ software-properties-common \ unzip \ git \ # - Packages to build Python tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \ # - Packages for docTR libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ fi # Install Python RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \ tar -zxf Python-$PYTHON_VERSION.tgz && \ cd Python-$PYTHON_VERSION && \ mkdir /opt/python/ && \ ./configure --prefix=/opt/python && \ make && \ make install && \ cd .. && \ rm Python-$PYTHON_VERSION.tgz && \ rm -r Python-$PYTHON_VERSION ENV PATH=/opt/python/bin:$PATH # Install OnnxTR ARG ONNXTR_REPO='felixdittrich92/onnxtr' ARG ONNXTR_VERSION=main RUN pip3 install -U pip setuptools wheel && \ pip3 install "onnxtr[$SYSTEM,html]@git+https://github.com/$ONNXTR_REPO.git@$ONNXTR_VERSION" ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ .PHONY: quality style test docs-single-version docs # this target runs checks on all files quality: ruff check . mypy onnxtr/ # this target runs checks on all files and potentially modifies some of them style: ruff format . ruff check --fix . # Run tests for the library test: coverage run -m pytest tests/common/ -rs --memray coverage report --fail-under=80 --show-missing # Check that docs can build docs-single-version: sphinx-build docs/source docs/_build -a # Check that docs can build docs: cd docs && bash build.sh ================================================ FILE: README.md ================================================

[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/felixdittrich92/onnxtr/workflows/builds/badge.svg) [![codecov](https://codecov.io/gh/felixdittrich92/OnnxTR/graph/badge.svg?token=WVFRCQBOLI)](https://codecov.io/gh/felixdittrich92/OnnxTR) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/4fff4d764bb14fb8b4f4afeb9587231b)](https://app.codacy.com/gh/felixdittrich92/OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade) [![CodeFactor](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr/badge)](https://www.codefactor.io/repository/github/felixdittrich92/onnxtr) [![Socket Badge](https://socket.dev/api/badge/pypi/package/onnxtr/0.8.1?artifact_id=tar-gz)](https://socket.dev/pypi/package/onnxtr/overview/0.8.1/tar-gz) [![Pypi](https://img.shields.io/badge/pypi-v0.8.1-blue.svg)](https://pypi.org/project/OnnxTR/) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/felixdittrich92/OnnxTR/pkgs/container/onnxtr) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/Felix92/OnnxTR-OCR) ![PyPI - Downloads](https://img.shields.io/pypi/dm/onnxtr) > :warning: Please note that this is a wrapper around the [doctr](https://github.com/mindee/doctr) library to provide a Onnx pipeline for docTR. For feature requests, which are not directly related to the Onnx pipeline, please refer to the base project. **Optical Character Recognition made seamless & accessible to anyone, powered by Onnx** What you can expect from this repository: - efficient ways to parse textual information (localize and identify each word) from your documents - a Onnx pipeline for docTR, a wrapper around the [doctr](https://github.com/mindee/doctr) library - no PyTorch or TensorFlow dependencies - more lightweight package with faster inference latency and less required resources - 8-Bit quantized models for faster inference on CPU ![OCR_example](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/ocr.png) ## Installation ### Prerequisites Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install OnnxTR. ### Latest release You can then install the latest release of the package using [pypi](https://pypi.org/project/OnnxTR/) as follows: **NOTE:** Currently supported execution providers by default are: CPU, CUDA (NVIDIA GPU), OpenVINO (Intel CPU | GPU), CoreML (Apple Silicon). For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/getting-started). - **Prerequisites:** CUDA & cuDNN needs to be installed before [Version table](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html). ```shell # standard cpu support pip install "onnxtr[cpu]" pip install "onnxtr[cpu-headless]" # same as cpu but with opencv-headless # with gpu support pip install "onnxtr[gpu]" pip install "onnxtr[gpu-headless]" # same as gpu but with opencv-headless # OpenVINO cpu | gpu support for Intel CPUs | GPUs pip install "onnxtr[openvino]" pip install "onnxtr[openvino-headless]" # same as openvino but with opencv-headless # with HTML support pip install "onnxtr[html]" # with support for visualization pip install "onnxtr[viz]" # with support for all dependencies pip install "onnxtr[html, gpu, viz]" ``` **Recommendation:** If you have: - a NVIDIA GPU, use one of the `gpu` variants - an Intel CPU or GPU, use one of the `openvino` variants - an Apple Silicon Mac, use one of the `cpu` variants (CoreML is auto-detected) - otherwise, use one of the `cpu` variants **OpenVINO:** By default OnnxTR running with the OpenVINO execution provider backend uses the `CPU` device with `FP32` precision, to change the device or for further configuaration please refer to the [ONNX Runtime OpenVINO documentation](https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options). ### Reading files Documents can be interpreted from PDF / Images / Webpages / Multiple page images using the following code snippet: ```python from onnxtr.io import DocumentFile # PDF pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf") # Image single_img_doc = DocumentFile.from_images("path/to/your/img.jpg") # Webpage (requires `weasyprint` to be installed) webpage_doc = DocumentFile.from_url("https://www.yoursite.com") # Multiple page images multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"]) ``` ### Putting it together Let's use the default `ocr_predictor` model for an example: ```python from onnxtr.io import DocumentFile from onnxtr.models import ocr_predictor, EngineConfig model = ocr_predictor( det_arch="fast_base", # detection architecture reco_arch="vitstr_base", # recognition architecture det_bs=2, # detection batch size reco_bs=512, # recognition batch size # Document related parameters assume_straight_pages=True, # set to `False` if the pages are not straight (rotation, perspective, etc.) (default: True) straighten_pages=False, # set to `True` if the pages should be straightened before final processing (default: False) export_as_straight_boxes=False, # set to `True` if the boxes should be exported as if the pages were straight (default: False) # Preprocessing related parameters preserve_aspect_ratio=True, # set to `False` if the aspect ratio should not be preserved (default: True) symmetric_pad=True, # set to `False` to disable symmetric padding (default: True) # Additional parameters - meta information detect_orientation=False, # set to `True` if the orientation of the pages should be detected (default: False) detect_language=False, # set to `True` if the language of the pages should be detected (default: False) # Orientation specific parameters in combination with `assume_straight_pages=False` and/or `straighten_pages=True` disable_crop_orientation=False, # set to `True` if the crop orientation classification should be disabled (default: False) disable_page_orientation=False, # set to `True` if the general page orientation classification should be disabled (default: False) # DocumentBuilder specific parameters resolve_lines=True, # whether words should be automatically grouped into lines (default: True) resolve_blocks=False, # whether lines should be automatically grouped into blocks (default: False) paragraph_break=0.035, # relative length of the minimum space separating paragraphs (default: 0.035) # OnnxTR specific parameters # NOTE: 8-Bit quantized models are not available for FAST detection models and can in general lead to poorer accuracy load_in_8_bit=False, # set to `True` to load 8-bit quantized models instead of the full precision onces (default: False) # Advanced engine configuration options det_engine_cfg=EngineConfig(), # detection model engine configuration (default: internal predefined configuration) reco_engine_cfg=EngineConfig(), # recognition model engine configuration (default: internal predefined configuration) clf_engine_cfg=EngineConfig(), # classification (orientation) model engine configuration (default: internal predefined configuration) ) # PDF doc = DocumentFile.from_pdf("path/to/your/doc.pdf") # Analyze result = model(doc) # Display the result (requires matplotlib & mplcursors to be installed) result.show() ``` ![Visualization sample](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/doctr_example_script.gif) Or even rebuild the original document from its predictions: ```python import matplotlib.pyplot as plt synthetic_pages = result.synthesize() plt.imshow(synthetic_pages[0]) plt.axis("off") plt.show() ``` ![Synthesis sample](https://github.com/felixdittrich92/OnnxTR/raw/main/docs/images/synthesized_sample.png) The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`). To get a better understanding of the document model, check out [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure): You can also export them as a nested dict, more appropriate for JSON format / render it or export as XML (hocr format): ```python json_output = result.export() # nested dict text_output = result.render() # human-readable text xml_output = result.export_as_xml() # hocr format for output in xml_output: xml_bytes_string = output[0] xml_element = output[1] ```
Advanced engine configuration options You can also define advanced engine configurations for the models / predictors: ```python from onnxruntime import SessionOptions from onnxtr.models import ocr_predictor, EngineConfig general_options = ( SessionOptions() ) # For configuartion options see: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions general_options.enable_cpu_mem_arena = False # NOTE: The following would force to run only on the GPU if no GPU is available it will raise an error # List of strings e.g. ["CUDAExecutionProvider", "CPUExecutionProvider"] or a list of tuples with the provider and its options e.g. # [("CUDAExecutionProvider", {"device_id": 0}), ("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})] providers = [ ("CUDAExecutionProvider", {"device_id": 0, "cudnn_conv_algo_search": "DEFAULT"}) ] # For available providers see: https://onnxruntime.ai/docs/execution-providers/ engine_config = EngineConfig(session_options=general_options, providers=providers) # We use the default predictor with the custom engine configuration # NOTE: You can define differnt engine configurations for detection, recognition and classification depending on your needs predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config) ``` You can also dynamically configure whether the memory arena should shrink: ```python from random import random from onnxruntime import RunOptions, SessionOptions from onnxtr.models import ocr_predictor, EngineConfig def arena_shrinkage_handler(run_options: RunOptions) -> RunOptions: """ Shrink the memory arena on 10% of inference runs. """ if random() < 0.1: run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0") return run_options engine_config = EngineConfig(run_options_provider=arena_shrinkage_handler) engine_config.session_options.enable_mem_pattern = False predictor = ocr_predictor(det_engine_cfg=engine_config, reco_engine_cfg=engine_config, clf_engine_cfg=engine_config) ```
## Loading custom exported models You can also load docTR custom exported models: For exporting please take a look at the [doctr documentation](https://mindee.github.io/doctr/using_doctr/using_model_export.html#export-to-onnx). ```python from onnxtr.models import ocr_predictor, linknet_resnet18, parseq reco_model = parseq("path_to_custom_model.onnx", vocab="ABC") det_model = linknet_resnet18("path_to_custom_model.onnx") model = ocr_predictor(det_arch=det_model, reco_arch=reco_model) ``` ## Loading models from HuggingFace Hub You can also load models from the HuggingFace Hub: ```python from onnxtr.io import DocumentFile from onnxtr.models import ocr_predictor, from_hub img = DocumentFile.from_images([""]) # Load your model from the hub model = from_hub("onnxtr/my-model") # Pass it to the predictor # If your model is a recognition model: predictor = ocr_predictor(det_arch="db_mobilenet_v3_large", reco_arch=model) # If your model is a detection model: predictor = ocr_predictor(det_arch=model, reco_arch="crnn_mobilenet_v3_small") # Get your predictions res = predictor(img) ``` HF Hub search: [here](https://huggingface.co/models?search=onnxtr). Collection: [here](https://huggingface.co/collections/Felix92/onnxtr-66bf213a9f88f7346c90e842) Or push your own models to the hub: ```python from onnxtr.models import parseq, push_to_hf_hub, login_to_hub from onnxtr.utils.vocabs import VOCABS # Login to the hub login_to_hub() # Recogniton model model = parseq("~/onnxtr-parseq-multilingual-v1.onnx", vocab=VOCABS["multilingual"]) push_to_hf_hub( model, model_name="onnxtr-parseq-multilingual-v1", task="recognition", # The task for which the model is intended [detection, recognition, classification] arch="parseq", # The name of the model architecture override=False, # Set to `True` if you want to override an existing model / repository ) # Detection model model = linknet_resnet18("~/onnxtr-linknet-resnet18.onnx") push_to_hf_hub(model, model_name="onnxtr-linknet-resnet18", task="detection", arch="linknet_resnet18", override=True) ``` ## Models architectures Credits where it's due: this repository provides ONNX models for the following architectures, converted from the docTR models: ### Text Detection - DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf). - LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf) - FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf) ### Text Recognition - CRNN: [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/pdf/1507.05717.pdf). - SAR: [Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/pdf/1811.00751.pdf). - MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf). - ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf). - PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966). - VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110). ```python predictor = ocr_predictor() predictor.list_archs() { "detection archs": [ "db_resnet34", "db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50", "fast_tiny", # No 8-bit support "fast_small", # No 8-bit support "fast_base", # No 8-bit support ], "recognition archs": [ "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large", "sar_resnet31", "master", "vitstr_small", "vitstr_base", "parseqviptr_tiny", # No 8-bit support ], } ``` ### Documentation This repository is in sync with the [doctr](https://github.com/mindee/doctr) library, which provides a high-level API to perform OCR on documents. This repository stays up-to-date with the latest features and improvements from the base project. So we can refer to the [doctr documentation](https://mindee.github.io/doctr/) for more detailed information. NOTE: - `pretrained` is the default in OnnxTR, and not available as a parameter. - docTR specific environment variables (e.g.: DOCTR_CACHE_DIR -> ONNXTR_CACHE_DIR) needs to be replaced with `ONNXTR_` prefix. ### Benchmarks The CPU benchmarks was measured on a `i7-14700K Intel CPU`. The GPU benchmarks was measured on a `RTX 4080 Nvidia GPU`. Benchmarking performed on the FUNSD dataset and CORD dataset. docTR / OnnxTR models used for the benchmarks are `fast_base` (full precision) | `db_resnet50` (8-bit variant) for detection and `crnn_vgg16_bn` for recognition. The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_mobilenet_v3_small` takes as comparison `~0.17s / Page` on the FUNSD dataset and `~0.12s / Page` on the CORD dataset in **full precision** on CPU. - CPU benchmarks: |Library |FUNSD (199 pages) |CORD (900 pages) | |------------------------------------|-------------------------------|-------------------------------| |docTR (CPU) - v0.8.1 | ~1.29s / Page | ~0.60s / Page | |**OnnxTR (CPU)** - v0.6.0 | ~0.57s / Page | **~0.25s / Page** | |**OnnxTR (CPU) 8-bit** - v0.6.0 | **~0.38s / Page** | **~0.14s / Page** | |**OnnxTR (CPU-OpenVINO)** - v0.6.0 | **~0.15s / Page** | **~0.14s / Page** | |EasyOCR (CPU) - v1.7.1 | ~1.96s / Page | ~1.75s / Page | |**PyTesseract (CPU)** - v0.3.10 | **~0.50s / Page** | ~0.52s / Page | |Surya (line) (CPU) - v0.4.4 | ~48.76s / Page | ~35.49s / Page | |PaddleOCR (CPU) - no cls - v2.7.3 | ~1.27s / Page | ~0.38s / Page | - GPU benchmarks: |Library |FUNSD (199 pages) |CORD (900 pages) | |-------------------------------------|-------------------------------|-------------------------------| |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page | |**docTR (GPU) float16** - v0.8.1 | **~0.06s / Page** | **~0.03s / Page** | |OnnxTR (GPU) - v0.6.0 | **~0.06s / Page** | ~0.04s / Page | |**OnnxTR (GPU) float16 - v0.6.0** | **~0.05s / Page** | **~0.03s / Page** | |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page | |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page | |**PaddleOCR (GPU) - no cls - v2.7.3**| ~0.08s / Page | **~0.03s / Page** | ## Citation If you wish to cite please refer to the base project citation, feel free to use this [BibTeX](http://www.bibtex.org/) reference: ```bibtex @misc{doctr2021, title={docTR: Document Text Recognition}, author={Mindee}, year={2021}, publisher = {GitHub}, howpublished = {\url{https://github.com/mindee/doctr}} } ``` ```bibtex @misc{onnxtr2024, title={OnnxTR: Optical Character Recognition made seamless & accessible to anyone, powered by Onnx}, author={Felix Dittrich}, year={2024}, publisher = {GitHub}, howpublished = {\url{https://github.com/felixdittrich92/OnnxTR}} } ``` ## License Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/felixdittrich92/OnnxTR?tab=Apache-2.0-1-ov-file#readme) for more information. ================================================ FILE: demo/README.md ================================================ --- title: OnnxTR OCR emoji: 🔥 colorFrom: red colorTo: purple sdk: gradio sdk_version: 5.34.2 app_file: app.py pinned: false license: apache-2.0 --- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference ## Run the demo locally ```bash cd demo pip install -r requirements.txt python3 app.py ``` ================================================ FILE: demo/app.py ================================================ import io import os from typing import Any # NOTE: This is a fix to run the demo on the HuggingFace Zero GPU or CPU spaces if os.environ.get("SPACES_ZERO_GPU") is not None: import spaces else: class spaces: # noqa: N801 @staticmethod def GPU(func): # noqa: N802 def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper import cv2 import gradio as gr import matplotlib.pyplot as plt import numpy as np from matplotlib.figure import Figure from PIL import Image from onnxtr.io import DocumentFile from onnxtr.models import EngineConfig, from_hub, ocr_predictor from onnxtr.models.predictor import OCRPredictor from onnxtr.utils.visualization import visualize_page DET_ARCHS: list[str] = [ "fast_base", "fast_small", "fast_tiny", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50", ] RECO_ARCHS: list[str] = [ "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large", "master", "sar_resnet31", "vitstr_small", "vitstr_base", "parseq", "viptr_tiny", ] CUSTOM_RECO_ARCHS: list[str] = [ "Felix92/onnxtr-parseq-multilingual-v1", ] def load_predictor( det_arch: str, reco_arch: str, use_gpu: bool, assume_straight_pages: bool, straighten_pages: bool, export_as_straight_boxes: bool, detect_language: bool, load_in_8_bit: bool, bin_thresh: float, box_thresh: float, disable_crop_orientation: bool = False, disable_page_orientation: bool = False, ) -> OCRPredictor: """Load a predictor from doctr.models Args: ---- det_arch: detection architecture reco_arch: recognition architecture use_gpu: whether to use the GPU or not assume_straight_pages: whether to assume straight pages or not disable_crop_orientation: whether to disable crop orientation or not disable_page_orientation: whether to disable page orientation or not straighten_pages: whether to straighten rotated pages or not export_as_straight_boxes: whether to export straight boxes detect_language: whether to detect the language of the text load_in_8_bit: whether to load the image in 8 bit mode bin_thresh: binarization threshold for the segmentation map box_thresh: minimal objectness score to consider a box Returns: ------- instance of OCRPredictor """ engine_cfg = ( EngineConfig() if use_gpu else EngineConfig(providers=[("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})]) ) predictor = ocr_predictor( det_arch=det_arch, reco_arch=reco_arch if reco_arch not in CUSTOM_RECO_ARCHS else from_hub(reco_arch), assume_straight_pages=assume_straight_pages, straighten_pages=straighten_pages, detect_language=detect_language, load_in_8_bit=load_in_8_bit, export_as_straight_boxes=export_as_straight_boxes, detect_orientation=not assume_straight_pages, disable_crop_orientation=disable_crop_orientation, disable_page_orientation=disable_page_orientation, det_engine_cfg=engine_cfg, reco_engine_cfg=engine_cfg, clf_engine_cfg=engine_cfg, ) predictor.det_predictor.model.postprocessor.bin_thresh = bin_thresh predictor.det_predictor.model.postprocessor.box_thresh = box_thresh return predictor def forward_image(predictor: OCRPredictor, image: np.ndarray) -> np.ndarray: """Forward an image through the predictor Args: ---- predictor: instance of OCRPredictor image: image to process Returns: ------- segmentation map """ processed_batches = predictor.det_predictor.pre_processor([image]) out = predictor.det_predictor.model(processed_batches[0], return_model_output=True) seg_map = out["out_map"] return seg_map def matplotlib_to_pil(fig: Figure | np.ndarray) -> Image.Image: """Convert a matplotlib figure to a PIL image Args: ---- fig: matplotlib figure or numpy array Returns: ------- PIL image """ buf = io.BytesIO() if isinstance(fig, Figure): fig.savefig(buf) else: plt.imsave(buf, fig) buf.seek(0) return Image.open(buf) @spaces.GPU def analyze_page( uploaded_file: Any, page_idx: int, det_arch: str, reco_arch: str, use_gpu: bool, assume_straight_pages: bool, disable_crop_orientation: bool, disable_page_orientation: bool, straighten_pages: bool, export_as_straight_boxes: bool, detect_language: bool, load_in_8_bit: bool, bin_thresh: float, box_thresh: float, ): """Analyze a page Args: ---- uploaded_file: file to analyze page_idx: index of the page to analyze det_arch: detection architecture reco_arch: recognition architecture use_gpu: whether to use the GPU or not assume_straight_pages: whether to assume straight pages or not disable_crop_orientation: whether to disable crop orientation or not disable_page_orientation: whether to disable page orientation or not straighten_pages: whether to straighten rotated pages or not export_as_straight_boxes: whether to export straight boxes detect_language: whether to detect the language of the text load_in_8_bit: whether to load the image in 8 bit mode bin_thresh: binarization threshold for the segmentation map box_thresh: minimal objectness score to consider a box Returns: ------- input image, segmentation heatmap, output image, OCR output, synthesized page """ if uploaded_file is None: return None, "Please upload a document", None, None, None if uploaded_file.name.endswith(".pdf"): doc = DocumentFile.from_pdf(uploaded_file) else: doc = DocumentFile.from_images(uploaded_file) try: page = doc[page_idx - 1] except IndexError: page = doc[-1] img = page predictor = load_predictor( det_arch=det_arch, reco_arch=reco_arch, use_gpu=use_gpu, assume_straight_pages=assume_straight_pages, straighten_pages=straighten_pages, export_as_straight_boxes=export_as_straight_boxes, detect_language=detect_language, load_in_8_bit=load_in_8_bit, bin_thresh=bin_thresh, box_thresh=box_thresh, disable_crop_orientation=disable_crop_orientation, disable_page_orientation=disable_page_orientation, ) seg_map = forward_image(predictor, page) seg_map = np.squeeze(seg_map) seg_map = cv2.resize(seg_map, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_LINEAR) seg_heatmap = matplotlib_to_pil(seg_map) out = predictor([page]) page_export = out.pages[0].export() fig = visualize_page(out.pages[0].export(), out.pages[0].page, interactive=False, add_labels=False) out_img = matplotlib_to_pil(fig) if assume_straight_pages or (not assume_straight_pages and straighten_pages): synthesized_page = out.pages[0].synthesize() else: synthesized_page = None return img, seg_heatmap, out_img, page_export, synthesized_page with gr.Blocks(fill_height=True) as demo: gr.HTML( """

OnnxTR OCR Demo

GitHub OnnxTR PyPI

To use this interactive demo for OnnxTR:

1. Upload a document (PDF, JPG, or PNG)

2. Select the model architectures for text detection and recognition you want to use

3. Press the "Analyze page" button to process the uploaded document

""" ) with gr.Row(): with gr.Column(scale=1): upload = gr.File(label="Upload File [JPG | PNG | PDF]", file_types=[".pdf", ".jpg", ".png"]) page_selection = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Page selection") det_model = gr.Dropdown(choices=DET_ARCHS, value=DET_ARCHS[0], label="Text detection model") reco_model = gr.Dropdown( choices=RECO_ARCHS + CUSTOM_RECO_ARCHS, value=RECO_ARCHS[0], label="Text recognition model" ) use_gpu = gr.Checkbox(value=True, label="Use GPU") assume_straight = gr.Checkbox(value=True, label="Assume straight pages") disable_crop_orientation = gr.Checkbox(value=False, label="Disable crop orientation") disable_page_orientation = gr.Checkbox(value=False, label="Disable page orientation") straighten = gr.Checkbox(value=False, label="Straighten pages") export_as_straight_boxes = gr.Checkbox(value=False, label="Export as straight boxes") det_language = gr.Checkbox(value=False, label="Detect language") load_in_8_bit = gr.Checkbox(value=False, label="Load 8-bit quantized models") binarization_threshold = gr.Slider( minimum=0.1, maximum=0.9, value=0.3, step=0.1, label="Binarization threshold" ) box_threshold = gr.Slider(minimum=0.1, maximum=0.9, value=0.1, step=0.1, label="Box threshold") analyze_button = gr.Button("Analyze page") with gr.Column(scale=3): with gr.Row(): input_image = gr.Image(label="Input page", width=700, height=500) segmentation_heatmap = gr.Image(label="Segmentation heatmap", width=700, height=500) output_image = gr.Image(label="Output page", width=700, height=500) with gr.Row(): with gr.Column(scale=3): ocr_output = gr.JSON(label="OCR output", render=True, scale=1, height=500) with gr.Column(scale=3): synthesized_page = gr.Image(label="Synthesized page", width=700, height=500) analyze_button.click( analyze_page, inputs=[ upload, page_selection, det_model, reco_model, use_gpu, assume_straight, disable_crop_orientation, disable_page_orientation, straighten, export_as_straight_boxes, det_language, load_in_8_bit, binarization_threshold, box_threshold, ], outputs=[input_image, segmentation_heatmap, output_image, ocr_output, synthesized_page], ) demo.launch(inbrowser=True, allowed_paths=["./data/logo.jpg"]) ================================================ FILE: demo/packages.txt ================================================ python3-opencv fonts-freefont-ttf ================================================ FILE: demo/requirements.txt ================================================ -e "onnxtr[gpu-headless,viz] @ git+https://github.com/felixdittrich92/OnnxTR.git" gradio>=5.30.0,<7.0.0 spaces>=0.37.0 # Quick fix to avoid HuggingFace Spaces cudnn9.x Cuda12.x issue # NOTE: outdated # onnxruntime-gpu==1.19.0 ================================================ FILE: onnxtr/__init__.py ================================================ from . import io, models, contrib, transforms, utils from .version import __version__ # noqa: F401 ================================================ FILE: onnxtr/contrib/__init__.py ================================================ from .artefacts import ArtefactDetector ================================================ FILE: onnxtr/contrib/artefacts.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import cv2 import numpy as np from onnxtr.file_utils import requires_package from .base import _BasePredictor __all__ = ["ArtefactDetector"] default_cfgs: dict[str, dict[str, Any]] = { "yolov8_artefact": { "input_shape": (3, 1024, 1024), "labels": ["bar_code", "qr_code", "logo", "photo"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/yolo_artefact-f9d66f14.onnx", }, } class ArtefactDetector(_BasePredictor): """ A class to detect artefacts in images >>> from onnxtr.io import DocumentFile >>> from onnxtr.contrib.artefacts import ArtefactDetector >>> doc = DocumentFile.from_images(["path/to/image.jpg"]) >>> detector = ArtefactDetector() >>> results = detector(doc) Args: arch: the architecture to use batch_size: the batch size to use model_path: the path to the model to use labels: the labels to use input_shape: the input shape to use mask_labels: the mask labels to use conf_threshold: the confidence threshold to use iou_threshold: the intersection over union threshold to use **kwargs: additional arguments to be passed to `download_from_url` """ def __init__( self, arch: str = "yolov8_artefact", batch_size: int = 2, model_path: str | None = None, labels: list[str] | None = None, input_shape: tuple[int, int, int] | None = None, conf_threshold: float = 0.5, iou_threshold: float = 0.5, **kwargs: Any, ) -> None: super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs) self.labels = labels or default_cfgs[arch]["labels"] self.input_shape = input_shape or default_cfgs[arch]["input_shape"] self.conf_threshold = conf_threshold self.iou_threshold = iou_threshold def preprocess(self, img: np.ndarray) -> np.ndarray: return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0) def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]: results = [] for batch in zip(output, input_images): for out, img in zip(batch[0], batch[1]): org_height, org_width = img.shape[:2] width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1] for res in out: sample_results = [] for row in np.transpose(np.squeeze(res)): classes_scores = row[4:] max_score = np.amax(classes_scores) if max_score >= self.conf_threshold: class_id = np.argmax(classes_scores) x, y, w, h = row[0], row[1], row[2], row[3] # to rescaled xmin, ymin, xmax, ymax xmin = int((x - w / 2) * width_scale) ymin = int((y - h / 2) * height_scale) xmax = int((x + w / 2) * width_scale) ymax = int((y + h / 2) * height_scale) sample_results.append({ "label": self.labels[class_id], "confidence": float(max_score), "box": [xmin, ymin, xmax, ymax], }) # Filter out overlapping boxes boxes = [res["box"] for res in sample_results] scores = [res["confidence"] for res in sample_results] keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type] sample_results = [sample_results[i] for i in keep_indices] results.append(sample_results) self._results = results return results def show(self, **kwargs: Any) -> None: """ Display the results Args: **kwargs: additional keyword arguments to be passed to `plt.show` """ requires_package("matplotlib", "`.show()` requires matplotlib installed") import matplotlib.pyplot as plt from matplotlib.patches import Rectangle # visualize the results with matplotlib if self._results and self._inputs: for img, res in zip(self._inputs, self._results): plt.figure(figsize=(10, 10)) plt.imshow(img) for obj in res: xmin, ymin, xmax, ymax = obj["box"] label = obj["label"] plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red") plt.gca().add_patch( Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2) ) plt.show(**kwargs) ================================================ FILE: onnxtr/contrib/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np import onnxruntime as ort from onnxtr.utils.data import download_from_url class _BasePredictor: """ Base class for all predictors Args: batch_size: the batch size to use url: the url to use to download a model if needed model_path: the path to the model to use **kwargs: additional arguments to be passed to `download_from_url` """ def __init__(self, batch_size: int, url: str | None = None, model_path: str | None = None, **kwargs) -> None: self.batch_size = batch_size self.session = self._init_model(url, model_path, **kwargs) self._inputs: list[np.ndarray] = [] self._results: list[Any] = [] def _init_model(self, url: str | None = None, model_path: str | None = None, **kwargs: Any) -> Any: """ Download the model from the given url if needed Args: url: the url to use model_path: the path to the model to use **kwargs: additional arguments to be passed to `download_from_url` Returns: Any: the ONNX loaded model """ if not url and not model_path: raise ValueError("You must provide either a url or a model_path") onnx_model_path = model_path if model_path else str(download_from_url(url, cache_subdir="models", **kwargs)) # type: ignore[arg-type] return ort.InferenceSession(onnx_model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"]) def preprocess(self, img: np.ndarray) -> np.ndarray: """ Preprocess the input image Args: img: the input image to preprocess Returns: np.ndarray: the preprocessed image """ raise NotImplementedError def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> Any: """ Postprocess the model output Args: output: the model output to postprocess input_images: the input images used to generate the output Returns: Any: the postprocessed output """ raise NotImplementedError def __call__(self, inputs: list[np.ndarray]) -> Any: """ Call the model on the given inputs Args: inputs: the inputs to use Returns: Any: the postprocessed output """ self._inputs = inputs model_inputs = self.session.get_inputs() batched_inputs = [inputs[i : i + self.batch_size] for i in range(0, len(inputs), self.batch_size)] processed_batches = [ np.array([self.preprocess(img) for img in batch], dtype=np.float32) for batch in batched_inputs ] outputs = [self.session.run(None, {model_inputs[0].name: batch}) for batch in processed_batches] return self.postprocess(outputs, batched_inputs) ================================================ FILE: onnxtr/file_utils.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import importlib.metadata import logging __all__ = ["requires_package"] ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"} ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"}) def requires_package(name: str, extra_message: str | None = None) -> None: # pragma: no cover """ package requirement helper Args: name: name of the package extra_message: additional message to display if the package is not found """ try: _pkg_version = importlib.metadata.version(name) logging.info(f"{name} version {_pkg_version} available.") except importlib.metadata.PackageNotFoundError: raise ImportError( f"\n\n{extra_message if extra_message is not None else ''} " f"\nPlease install it with the following command: pip install {name}\n" ) ================================================ FILE: onnxtr/io/__init__.py ================================================ from .elements import * from .html import * from .image import * from .pdf import * from .reader import * ================================================ FILE: onnxtr/io/elements.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any from defusedxml import defuse_stdlib defuse_stdlib() from xml.etree import ElementTree as ET from xml.etree.ElementTree import Element as ETElement from xml.etree.ElementTree import SubElement import numpy as np import onnxtr from onnxtr.file_utils import requires_package from onnxtr.utils.common_types import BoundingBox from onnxtr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox from onnxtr.utils.reconstitution import synthesize_page from onnxtr.utils.repr import NestedObject try: # optional dependency for visualization from onnxtr.utils.visualization import visualize_page except ModuleNotFoundError: # pragma: no cover pass __all__ = ["Element", "Word", "Artefact", "Line", "Block", "Page", "Document"] class Element(NestedObject): """Implements an abstract document element with exporting and text rendering capabilities""" _children_names: list[str] = [] _exported_keys: list[str] = [] def __init__(self, **kwargs: Any) -> None: for k, v in kwargs.items(): if k in self._children_names: setattr(self, k, v) else: raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'") def export(self) -> dict[str, Any]: """Exports the object into a nested dict format""" export_dict = {k: getattr(self, k) for k in self._exported_keys} for children_name in self._children_names: export_dict[children_name] = [c.export() for c in getattr(self, children_name)] return export_dict @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): raise NotImplementedError def render(self) -> str: raise NotImplementedError class Word(Element): """Implements a word element Args: value: the text string of the word confidence: the confidence associated with the text prediction geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to the page's size objectness_score: the objectness score of the detection crop_orientation: the general orientation of the crop in degrees and its confidence """ _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"] _children_names: list[str] = [] def __init__( self, value: str, confidence: float, geometry: BoundingBox | np.ndarray, objectness_score: float, crop_orientation: dict[str, Any], ) -> None: super().__init__() self.value = value self.confidence = confidence self.geometry = geometry self.objectness_score = objectness_score self.crop_orientation = crop_orientation def render(self) -> str: """Renders the full text of the element""" return self.value def extra_repr(self) -> str: return f"value='{self.value}', confidence={self.confidence:.2}" @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} return cls(**kwargs) class Artefact(Element): """Implements a non-textual element Args: artefact_type: the type of artefact confidence: the confidence of the type prediction geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to the page's size. """ _exported_keys: list[str] = ["geometry", "type", "confidence"] _children_names: list[str] = [] def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None: super().__init__() self.geometry = geometry self.type = artefact_type self.confidence = confidence def render(self) -> str: """Renders the full text of the element""" return f"[{self.type.upper()}]" def extra_repr(self) -> str: return f"type='{self.type}', confidence={self.confidence:.2}" @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} return cls(**kwargs) class Line(Element): """Implements a line element as a collection of words Args: words: list of word elements geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing all words in it. """ _exported_keys: list[str] = ["geometry", "objectness_score"] _children_names: list[str] = ["words"] words: list[Word] = [] def __init__( self, words: list[Word], geometry: BoundingBox | np.ndarray | None = None, objectness_score: float | None = None, ) -> None: # Compute the objectness score of the line if objectness_score is None: objectness_score = float(np.mean([w.objectness_score for w in words])) # Resolve the geometry using the smallest enclosing bounding box if geometry is None: # Check whether this is a rotated or straight box box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[misc] super().__init__(words=words) self.geometry = geometry self.objectness_score = objectness_score def render(self) -> str: """Renders the full text of the element""" return " ".join(w.render() for w in self.words) @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({ "words": [Word.from_dict(_dict) for _dict in save_dict["words"]], }) return cls(**kwargs) class Block(Element): """Implements a block element as a collection of lines and artefacts Args: lines: list of line elements artefacts: list of artefacts geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing all lines and artefacts in it. """ _exported_keys: list[str] = ["geometry", "objectness_score"] _children_names: list[str] = ["lines", "artefacts"] lines: list[Line] = [] artefacts: list[Artefact] = [] def __init__( self, lines: list[Line] = [], artefacts: list[Artefact] = [], geometry: BoundingBox | np.ndarray | None = None, objectness_score: float | None = None, ) -> None: # Compute the objectness score of the line if objectness_score is None: objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words])) # Resolve the geometry using the smallest enclosing bounding box if geometry is None: line_boxes = [word.geometry for line in lines for word in line.words] artefact_boxes = [artefact.geometry for artefact in artefacts] box_resolution_fn = ( resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox ) geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore super().__init__(lines=lines, artefacts=artefacts) self.geometry = geometry self.objectness_score = objectness_score def render(self, line_break: str = "\n") -> str: """Renders the full text of the element""" return line_break.join(line.render() for line in self.lines) @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({ "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]], "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]], }) return cls(**kwargs) class Page(Element): """Implements a page element as a collection of blocks Args: page: image encoded as a numpy array in uint8 blocks: list of block elements page_idx: the index of the page in the input raw document dimensions: the page size in pixels in format (height, width) orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction language: a dictionary with the language value and confidence of the prediction """ _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"] _children_names: list[str] = ["blocks"] blocks: list[Block] = [] def __init__( self, page: np.ndarray, blocks: list[Block], page_idx: int, dimensions: tuple[int, int], orientation: dict[str, Any] | None = None, language: dict[str, Any] | None = None, ) -> None: super().__init__(blocks=blocks) self.page = page self.page_idx = page_idx self.dimensions = dimensions self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None) self.language = language if isinstance(language, dict) else dict(value=None, confidence=None) def render(self, block_break: str = "\n\n") -> str: """Renders the full text of the element""" return block_break.join(b.render() for b in self.blocks) def extra_repr(self) -> str: return f"dimensions={self.dimensions}" def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None: """Overlay the result on a given image Args: interactive: whether the display should be interactive preserve_aspect_ratio: pass True if you passed True to the predictor **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method """ requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed") requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed") import matplotlib.pyplot as plt visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio) plt.show(**kwargs) def synthesize(self, **kwargs) -> np.ndarray: """Synthesize the page from the predictions Args: **kwargs: keyword arguments passed to the `synthesize_page` method Returns synthesized page """ return synthesize_page(self.export(), **kwargs) def export_as_xml(self, file_title: str = "OnnxTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]: """Export the page as XML (hOCR-format) convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md Args: file_title: the title of the XML file Returns: a tuple of the XML byte string, and its ElementTree """ p_idx = self.page_idx block_count: int = 1 line_count: int = 1 word_count: int = 1 height, width = self.dimensions language = self.language if "language" in self.language.keys() else "en" # Create the XML root element page_hocr = ETElement("html", attrib={"xmlns": "http://www.w3.org/1999/xhtml", "xml:lang": str(language)}) # Create the header / SubElements of the root element head = SubElement(page_hocr, "head") SubElement(head, "title").text = file_title SubElement(head, "meta", attrib={"http-equiv": "Content-Type", "content": "text/html; charset=utf-8"}) SubElement( head, "meta", attrib={"name": "ocr-system", "content": f"onnxtr {onnxtr.__version__}"}, # type: ignore[attr-defined] ) SubElement( head, "meta", attrib={"name": "ocr-capabilities", "content": "ocr_page ocr_carea ocr_par ocr_line ocrx_word"}, ) # Create the body body = SubElement(page_hocr, "body") page_div = SubElement( body, "div", attrib={ "class": "ocr_page", "id": f"page_{p_idx + 1}", "title": f"image; bbox 0 0 {width} {height}; ppageno 0", }, ) # iterate over the blocks / lines / words and create the XML elements in body line by line with the attributes for block in self.blocks: if len(block.geometry) != 2: raise TypeError("XML export is only available for straight bounding boxes for now.") (xmin, ymin), (xmax, ymax) = block.geometry block_div = SubElement( page_div, "div", attrib={ "class": "ocr_carea", "id": f"block_{block_count}", "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ {int(round(xmax * width))} {int(round(ymax * height))}", }, ) paragraph = SubElement( block_div, "p", attrib={ "class": "ocr_par", "id": f"par_{block_count}", "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ {int(round(xmax * width))} {int(round(ymax * height))}", }, ) block_count += 1 for line in block.lines: (xmin, ymin), (xmax, ymax) = line.geometry # NOTE: baseline, x_size, x_descenders, x_ascenders is currently initalized to 0 line_span = SubElement( paragraph, "span", attrib={ "class": "ocr_line", "id": f"line_{line_count}", "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ {int(round(xmax * width))} {int(round(ymax * height))}; \ baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0", }, ) line_count += 1 for word in line.words: (xmin, ymin), (xmax, ymax) = word.geometry conf = word.confidence word_div = SubElement( line_span, "span", attrib={ "class": "ocrx_word", "id": f"word_{word_count}", "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ {int(round(xmax * width))} {int(round(ymax * height))}; \ x_wconf {int(round(conf * 100))}", }, ) # set the text word_div.text = word.value word_count += 1 return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)) @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]}) return cls(**kwargs) class Document(Element): """Implements a document element as a collection of pages Args: pages: list of page elements """ _children_names: list[str] = ["pages"] pages: list[Page] = [] def __init__( self, pages: list[Page], ) -> None: super().__init__(pages=pages) def render(self, page_break: str = "\n\n\n\n") -> str: """Renders the full text of the element""" return page_break.join(p.render() for p in self.pages) def show(self, **kwargs) -> None: """Overlay the result on a given image""" for result in self.pages: result.show(**kwargs) def synthesize(self, **kwargs) -> list[np.ndarray]: """Synthesize all pages from their predictions Args: **kwargs: keyword arguments passed to the `Page.synthesize` method Returns: list of synthesized pages """ return [page.synthesize(**kwargs) for page in self.pages] def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]: """Export the document as XML (hOCR-format) Args: **kwargs: additional keyword arguments passed to the Page.export_as_xml method Returns: list of tuple of (bytes, ElementTree) """ return [page.export_as_xml(**kwargs) for page in self.pages] @classmethod def from_dict(cls, save_dict: dict[str, Any], **kwargs): kwargs = {k: save_dict[k] for k in cls._exported_keys} kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]}) return cls(**kwargs) ================================================ FILE: onnxtr/io/html.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any __all__ = ["read_html"] def read_html(url: str, **kwargs: Any) -> bytes: """Read a PDF file and convert it into an image in numpy format >>> from onnxtr.io import read_html >>> doc = read_html("https://www.yoursite.com") Args: url: URL of the target web page **kwargs: keyword arguments from `weasyprint.HTML` Returns: decoded PDF file as a bytes stream """ from weasyprint import HTML return HTML(url, **kwargs).write_pdf() ================================================ FILE: onnxtr/io/image.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from pathlib import Path import cv2 import numpy as np from onnxtr.utils.common_types import AbstractFile __all__ = ["read_img_as_numpy"] def read_img_as_numpy( file: AbstractFile, output_size: tuple[int, int] | None = None, rgb_output: bool = True, ) -> np.ndarray: """Read an image file into numpy format >>> from onnxtr.io import read_img_as_numpy >>> page = read_img_as_numpy("path/to/your/doc.jpg") Args: file: the path to the image file output_size: the expected output size of each page in format H x W rgb_output: whether the output ndarray channel order should be RGB instead of BGR. Returns: the page decoded as numpy ndarray of shape H x W x 3 """ if isinstance(file, (str, Path)): if not Path(file).is_file(): raise FileNotFoundError(f"unable to access {file}") img = cv2.imread(str(file), cv2.IMREAD_COLOR) elif isinstance(file, bytes): _file: np.ndarray = np.frombuffer(file, np.uint8) img = cv2.imdecode(_file, cv2.IMREAD_COLOR) else: raise TypeError("unsupported object type for argument 'file'") # Validity check if img is None: raise ValueError("unable to read file.") # Resizing if isinstance(output_size, tuple): img = cv2.resize(img, output_size[::-1], interpolation=cv2.INTER_LINEAR) # Switch the channel order if rgb_output: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) return img ================================================ FILE: onnxtr/io/pdf.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np import pypdfium2 as pdfium from onnxtr.utils.common_types import AbstractFile __all__ = ["read_pdf"] def read_pdf( file: AbstractFile, scale: int = 2, rgb_mode: bool = True, password: str | None = None, **kwargs: Any, ) -> list[np.ndarray]: """Read a PDF file and convert it into an image in numpy format >>> from onnxtr.io import read_pdf >>> doc = read_pdf("path/to/your/doc.pdf") Args: file: the path to the PDF file scale: rendering scale (1 corresponds to 72dpi) rgb_mode: if True, the output will be RGB, otherwise BGR password: a password to unlock the document, if encrypted **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` Returns: the list of pages decoded as numpy ndarray of shape H x W x C """ # Rasterise pages to numpy ndarrays with pypdfium2 pdf = pdfium.PdfDocument(file, password=password) try: return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf] finally: pdf.close() ================================================ FILE: onnxtr/io/reader.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from collections.abc import Sequence from pathlib import Path import numpy as np from onnxtr.file_utils import requires_package from onnxtr.utils.common_types import AbstractFile from .html import read_html from .image import read_img_as_numpy from .pdf import read_pdf __all__ = ["DocumentFile"] class DocumentFile: """Read a document from multiple extensions""" @classmethod def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]: """Read a PDF file >>> from onnxtr.io import DocumentFile >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf") Args: file: the path to the PDF file or a binary stream **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` Returns: the list of pages decoded as numpy ndarray of shape H x W x 3 """ return read_pdf(file, **kwargs) @classmethod def from_url(cls, url: str, **kwargs) -> list[np.ndarray]: """Interpret a web page as a PDF document >>> from onnxtr.io import DocumentFile >>> doc = DocumentFile.from_url("https://www.yoursite.com") Args: url: the URL of the target web page **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` Returns: the list of pages decoded as numpy ndarray of shape H x W x 3 """ requires_package( "weasyprint", "`.from_url` requires weasyprint installed.\n" + "Installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#installation", ) pdf_stream = read_html(url) return cls.from_pdf(pdf_stream, **kwargs) @classmethod def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]: """Read an image file (or a collection of image files) and convert it into an image in numpy format >>> from onnxtr.io import DocumentFile >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"]) Args: files: the path to the image file or a binary stream, or a collection of those **kwargs: additional parameters to :meth:`onnxtr.io.image.read_img_as_numpy` Returns: the list of pages decoded as numpy ndarray of shape H x W x 3 """ if isinstance(files, (str, Path, bytes)): files = [files] return [read_img_as_numpy(file, **kwargs) for file in files] ================================================ FILE: onnxtr/models/__init__.py ================================================ from .engine import EngineConfig from .classification import * from .detection import * from .recognition import * from .zoo import * from .factory import * ================================================ FILE: onnxtr/models/_utils.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from math import floor from statistics import median_low import cv2 import numpy as np from langdetect import LangDetectException, detect_langs from onnxtr.utils.geometry import rotate_image __all__ = ["estimate_orientation", "get_language"] def get_max_width_length_ratio(contour: np.ndarray) -> float: """Get the maximum shape ratio of a contour. Args: contour: the contour from cv2.findContour Returns: the maximum shape ratio """ _, (w, h), _ = cv2.minAreaRect(contour) if w == 0 or h == 0: return 0.0 return max(w / h, h / w) def estimate_orientation( img: np.ndarray, general_page_orientation: tuple[int, float] | None = None, n_ct: int = 70, ratio_threshold_for_lines: float = 3, min_confidence: float = 0.2, lower_area: int = 100, ) -> int: """Estimate the angle of the general document orientation based on the lines of the document and the assumption that they should be horizontal. Args: img: the img or bitmap to analyze (H, W, C) general_page_orientation: the general orientation of the page (angle [0, 90, 180, 270 (-90)], confidence) estimated by a model n_ct: the number of contours used for the orientation estimation ratio_threshold_for_lines: this is the ratio w/h used to discriminates lines min_confidence: the minimum confidence to consider the general_page_orientation lower_area: the minimum area of a contour to be considered Returns: the estimated angle of the page (clockwise, negative for left side rotation, positive for right side rotation) """ assert len(img.shape) == 3 and img.shape[-1] in [1, 3], f"Image shape {img.shape} not supported" # Convert image to grayscale if necessary if img.shape[-1] == 3: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.medianBlur(gray_img, 5) thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] else: thresh = img.astype(np.uint8) page_orientation, orientation_confidence = general_page_orientation or (0, 0.0) is_confident = page_orientation is not None and orientation_confidence >= min_confidence base_angle = page_orientation if is_confident else 0 if is_confident: # We rotate the image to the general orientation which improves the detection # No expand needed bitmap is already padded thresh = rotate_image(thresh, -base_angle) else: # That's only required if we do not work on the detection models bin map # try to merge words in lines (h, w) = img.shape[:2] k_x = max(1, (floor(w / 100))) k_y = max(1, (floor(h / 100))) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y)) thresh = cv2.dilate(thresh, kernel, iterations=1) # extract contours contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) # Filter & Sort contours contours = sorted( [contour for contour in contours if cv2.contourArea(contour) > lower_area], key=get_max_width_length_ratio, reverse=True, ) angles = [] for contour in contours[:n_ct]: _, (w, h), angle = cv2.minAreaRect(contour) # OpenCV version-proof normalization: force 'w' to be the long side # so the angle is consistently relative to the major axis. # https://github.com/opencv/opencv/pull/28051/changes if w < h: w, h = h, w angle -= 90 # Normalize angle to be within [-90, 90] while angle <= -90: angle += 180 while angle > 90: angle -= 180 if h > 0: if w / h > ratio_threshold_for_lines: # select only contours with ratio like lines angles.append(angle) elif w / h < 1 / ratio_threshold_for_lines: # if lines are vertical, substract 90 degree angles.append(angle - 90) if len(angles) == 0: skew_angle = 0 # in case no angles is found else: # median_low picks a value from the data to avoid outliers median = -median_low(angles) skew_angle = -round(median) if abs(median) != 0 else 0 # Resolve the 90-degree flip ambiguity. # If the estimation is exactly 90/-90, it's usually a vertical detection of horizontal lines. if abs(skew_angle) == 90: skew_angle = 0 # combine with the general orientation and the estimated angle # Apply the detected skew to our base orientation final_angle = base_angle + skew_angle # Standardize result to [-179, 180] range to handle wrap-around cases (e.g., 180 + -31) while final_angle > 180: final_angle -= 360 while final_angle <= -180: final_angle += 360 if is_confident: # If the estimated angle is perpendicular, treat it as 0 to avoid wrong flips if abs(skew_angle) % 90 == 0: return page_orientation # special case where the estimated angle is mostly wrong: # case 1: - and + swapped # case 2: estimated angle is completely wrong # so in this case we prefer the general page orientation if abs(skew_angle) == abs(page_orientation) and page_orientation != 0: return page_orientation return int( final_angle ) # return the clockwise angle (negative - left side rotation, positive - right side rotation) def rectify_crops( crops: list[np.ndarray], orientations: list[int], ) -> list[np.ndarray]: """Rotate each crop of the list according to the predicted orientation: 0: already straight, no rotation 1: 90 ccw, rotate 3 times ccw 2: 180, rotate 2 times ccw 3: 270 ccw, rotate 1 time ccw """ # Inverse predictions (if angle of +90 is detected, rotate by -90) orientations = [4 - pred if pred != 0 else 0 for pred in orientations] return ( [crop if orientation == 0 else np.rot90(crop, orientation) for orientation, crop in zip(orientations, crops)] if len(orientations) > 0 else [] ) def rectify_loc_preds( page_loc_preds: np.ndarray, orientations: list[int], ) -> np.ndarray | None: """Orient the quadrangle (Polygon4P) according to the predicted orientation, so that the points are in this order: top L, top R, bot R, bot L if the crop is readable """ return ( np.stack( [ np.roll(page_loc_pred, orientation, axis=0) for orientation, page_loc_pred in zip(orientations, page_loc_preds) ], axis=0, ) if len(orientations) > 0 else None ) def get_language(text: str) -> tuple[str, float]: """Get languages of a text using langdetect model. Get the language with the highest probability or no language if only a few words or a low probability Args: text (str): text Returns: The detected language in ISO 639 code and confidence score """ try: lang = detect_langs(text.lower())[0] except LangDetectException: return "unknown", 0.0 if len(text) <= 1 or (len(text) <= 5 and lang.prob <= 0.2): return "unknown", 0.0 return lang.lang, lang.prob ================================================ FILE: onnxtr/models/builder.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from scipy.cluster.hierarchy import fclusterdata from onnxtr.io.elements import Block, Document, Line, Page, Word from onnxtr.utils.geometry import estimate_page_angle, resolve_enclosing_bbox, resolve_enclosing_rbbox, rotate_boxes from onnxtr.utils.repr import NestedObject __all__ = ["DocumentBuilder"] class DocumentBuilder(NestedObject): """Implements a document builder Args: resolve_lines: whether words should be automatically grouped into lines resolve_blocks: whether lines should be automatically grouped into blocks paragraph_break: relative length of the minimum space separating paragraphs export_as_straight_boxes: if True, force straight boxes in the export (fit a rectangle box to all rotated boxes). Else, keep the boxes format unchanged, no matter what it is. """ def __init__( self, resolve_lines: bool = True, resolve_blocks: bool = False, paragraph_break: float = 0.035, export_as_straight_boxes: bool = False, ) -> None: self.resolve_lines = resolve_lines self.resolve_blocks = resolve_blocks self.paragraph_break = paragraph_break self.export_as_straight_boxes = export_as_straight_boxes @staticmethod def _sort_boxes(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """Sort bounding boxes from top to bottom, left to right Args: boxes: bounding boxes of shape (N, 4) or (N, 4, 2) (in case of rotated bbox) Returns: tuple: indices of ordered boxes of shape (N,), boxes If straight boxes are passed tpo the function, boxes are unchanged else: boxes returned are straight boxes fitted to the straightened rotated boxes so that we fit the lines afterwards to the straigthened page """ if boxes.ndim == 3: boxes = rotate_boxes( loc_preds=boxes, angle=-estimate_page_angle(boxes), orig_shape=(1024, 1024), min_angle=5.0, ) boxes = np.concatenate((boxes.min(1), boxes.max(1)), -1) return (boxes[:, 0] + 2 * boxes[:, 3] / np.median(boxes[:, 3] - boxes[:, 1])).argsort(), boxes def _resolve_sub_lines(self, boxes: np.ndarray, word_idcs: list[int]) -> list[list[int]]: """Split a line in sub_lines Args: boxes: bounding boxes of shape (N, 4) word_idcs: list of indexes for the words of the line Returns: A list of (sub-)lines computed from the original line (words) """ lines = [] # Sort words horizontally word_idcs = [word_idcs[idx] for idx in boxes[word_idcs, 0].argsort().tolist()] # Eventually split line horizontally if len(word_idcs) < 2: lines.append(word_idcs) else: sub_line = [word_idcs[0]] for i in word_idcs[1:]: horiz_break = True prev_box = boxes[sub_line[-1]] # Compute distance between boxes dist = boxes[i, 0] - prev_box[2] # If distance between boxes is lower than paragraph break, same sub-line if dist < self.paragraph_break: horiz_break = False if horiz_break: lines.append(sub_line) sub_line = [] sub_line.append(i) lines.append(sub_line) return lines def _resolve_lines(self, boxes: np.ndarray) -> list[list[int]]: """Order boxes to group them in lines Args: boxes: bounding boxes of shape (N, 4) or (N, 4, 2) in case of rotated bbox Returns: nested list of box indices """ # Sort boxes, and straighten the boxes if they are rotated idxs, boxes = self._sort_boxes(boxes) # Compute median for boxes heights y_med = np.median(boxes[:, 3] - boxes[:, 1]) lines = [] words = [idxs[0]] # Assign the top-left word to the first line # Define a mean y-center for the line y_center_sum = boxes[idxs[0]][[1, 3]].mean() for idx in idxs[1:]: vert_break = True # Compute y_dist y_dist = abs(boxes[idx][[1, 3]].mean() - y_center_sum / len(words)) # If y-center of the box is close enough to mean y-center of the line, same line if y_dist < y_med / 2: vert_break = False if vert_break: # Compute sub-lines (horizontal split) lines.extend(self._resolve_sub_lines(boxes, words)) words = [] y_center_sum = 0 words.append(idx) y_center_sum += boxes[idx][[1, 3]].mean() # Use the remaining words to form the last(s) line(s) if len(words) > 0: # Compute sub-lines (horizontal split) lines.extend(self._resolve_sub_lines(boxes, words)) return lines @staticmethod def _resolve_blocks(boxes: np.ndarray, lines: list[list[int]]) -> list[list[list[int]]]: """Order lines to group them in blocks Args: boxes: bounding boxes of shape (N, 4) or (N, 4, 2) lines: list of lines, each line is a list of idx Returns: nested list of box indices """ # Resolve enclosing boxes of lines if boxes.ndim == 3: box_lines: np.ndarray = np.asarray([ resolve_enclosing_rbbox([tuple(boxes[idx, :, :]) for idx in line]) # type: ignore[misc] for line in lines ]) else: _box_lines = [ resolve_enclosing_bbox([(tuple(boxes[idx, :2]), tuple(boxes[idx, 2:])) for idx in line]) for line in lines ] box_lines = np.asarray([(x1, y1, x2, y2) for ((x1, y1), (x2, y2)) in _box_lines]) # Compute geometrical features of lines to clusterize # Clusterizing only with box centers yield to poor results for complex documents if boxes.ndim == 3: box_features: np.ndarray = np.stack( ( (box_lines[:, 0, 0] + box_lines[:, 0, 1]) / 2, (box_lines[:, 0, 0] + box_lines[:, 2, 0]) / 2, (box_lines[:, 0, 0] + box_lines[:, 2, 1]) / 2, (box_lines[:, 0, 1] + box_lines[:, 2, 1]) / 2, (box_lines[:, 0, 1] + box_lines[:, 2, 0]) / 2, (box_lines[:, 2, 0] + box_lines[:, 2, 1]) / 2, ), axis=-1, ) else: box_features = np.stack( ( (box_lines[:, 0] + box_lines[:, 3]) / 2, (box_lines[:, 1] + box_lines[:, 2]) / 2, (box_lines[:, 0] + box_lines[:, 2]) / 2, (box_lines[:, 1] + box_lines[:, 3]) / 2, box_lines[:, 0], box_lines[:, 1], ), axis=-1, ) # Compute clusters clusters = fclusterdata(box_features, t=0.1, depth=4, criterion="distance", metric="euclidean") _blocks: dict[int, list[int]] = {} # Form clusters for line_idx, cluster_idx in enumerate(clusters): if cluster_idx in _blocks.keys(): _blocks[cluster_idx].append(line_idx) else: _blocks[cluster_idx] = [line_idx] # Retrieve word-box level to return a fully nested structure blocks = [[lines[idx] for idx in block] for block in _blocks.values()] return blocks def _build_blocks( self, boxes: np.ndarray, objectness_scores: np.ndarray, word_preds: list[tuple[str, float]], crop_orientations: list[dict[str, Any]], ) -> list[Block]: """Gather independent words in structured blocks Args: boxes: bounding boxes of all detected words of the page, of shape (N, 4) or (N, 4, 2) objectness_scores: objectness scores of all detected words of the page, of shape N word_preds: list of all detected words of the page, of shape N crop_orientations: list of dictoinaries containing the general orientation (orientations + confidences) of the crops Returns: list of block elements """ if boxes.shape[0] != len(word_preds): raise ValueError(f"Incompatible argument lengths: {boxes.shape[0]}, {len(word_preds)}") if boxes.shape[0] == 0: return [] # Decide whether we try to form lines _boxes = boxes if self.resolve_lines: lines = self._resolve_lines(_boxes if _boxes.ndim == 3 else _boxes[:, :4]) # Decide whether we try to form blocks if self.resolve_blocks and len(lines) > 1: _blocks = self._resolve_blocks(_boxes if _boxes.ndim == 3 else _boxes[:, :4], lines) else: _blocks = [lines] else: # Sort bounding boxes, one line for all boxes, one block for the line lines = [self._sort_boxes(_boxes if _boxes.ndim == 3 else _boxes[:, :4])[0]] # type: ignore[list-item] _blocks = [lines] blocks = [ Block([ Line([ Word( *word_preds[idx], tuple(tuple(pt) for pt in boxes[idx].tolist()), # type: ignore[arg-type] float(objectness_scores[idx]), crop_orientations[idx], ) if boxes.ndim == 3 else Word( *word_preds[idx], ((boxes[idx, 0], boxes[idx, 1]), (boxes[idx, 2], boxes[idx, 3])), float(objectness_scores[idx]), crop_orientations[idx], ) for idx in line ]) for line in lines ]) for lines in _blocks ] return blocks def extra_repr(self) -> str: return ( f"resolve_lines={self.resolve_lines}, resolve_blocks={self.resolve_blocks}, " f"paragraph_break={self.paragraph_break}, " f"export_as_straight_boxes={self.export_as_straight_boxes}" ) def __call__( self, pages: list[np.ndarray], boxes: list[np.ndarray], objectness_scores: list[np.ndarray], text_preds: list[list[tuple[str, float]]], page_shapes: list[tuple[int, int]], crop_orientations: list[dict[str, Any]], orientations: list[dict[str, Any]] | None = None, languages: list[dict[str, Any]] | None = None, ) -> Document: """Re-arrange detected words into structured blocks Args: pages: list of N elements, where each element represents the page image boxes: list of N elements, where each element represents the localization predictions, of shape (*, 4) or (*, 4, 2) for all words for a given page objectness_scores: list of N elements, where each element represents the objectness scores text_preds: list of N elements, where each element is the list of all word prediction (text + confidence) page_shapes: shape of each page, of size N crop_orientations: list of N elements, where each element is a dictionary containing the general orientation (orientations + confidences) of the crops orientations: optional, list of N elements, where each element is a dictionary containing the orientation (orientation + confidence) languages: optional, list of N elements, where each element is a dictionary containing the language (language + confidence) Returns: document object """ if len(boxes) != len(text_preds) != len(crop_orientations) != len(objectness_scores) or len(boxes) != len( page_shapes ) != len(crop_orientations) != len(objectness_scores): raise ValueError("All arguments are expected to be lists of the same size") _orientations = orientations if isinstance(orientations, list) else [None] * len(boxes) _languages = languages if isinstance(languages, list) else [None] * len(boxes) if self.export_as_straight_boxes and len(boxes) > 0: # If boxes are already straight OK, else fit a bounding rect if boxes[0].ndim == 3: # Iterate over pages and boxes boxes = [np.concatenate((p_boxes.min(1), p_boxes.max(1)), 1) for p_boxes in boxes] _pages = [ Page( page, self._build_blocks( page_boxes, loc_scores, word_preds, word_crop_orientations, ), _idx, shape, orientation, language, ) for page, _idx, shape, page_boxes, loc_scores, word_preds, word_crop_orientations, orientation, language in zip( # noqa: E501 pages, range(len(boxes)), page_shapes, boxes, objectness_scores, text_preds, crop_orientations, _orientations, _languages, ) ] return Document(_pages) ================================================ FILE: onnxtr/models/classification/__init__.py ================================================ from .models import * from .zoo import * ================================================ FILE: onnxtr/models/classification/models/__init__.py ================================================ from .mobilenet import * ================================================ FILE: onnxtr/models/classification/models/mobilenet.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py from copy import deepcopy from typing import Any import numpy as np from ...engine import Engine, EngineConfig __all__ = [ "MobileNetV3", "mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation", ] default_cfgs: dict[str, dict[str, Any]] = { "mobilenet_v3_small_crop_orientation": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 256, 256), "classes": [0, -90, 180, 90], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation-4fde60a1.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_crop_orientation_static_8_bit-c32c7721.onnx", }, "mobilenet_v3_small_page_orientation": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 512, 512), "classes": [0, -90, 180, 90], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation-60606ce4.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.0/mobilenet_v3_small_page_orientation_static_8_bit-13b5b014.onnx", }, } class MobileNetV3(Engine): """MobileNetV3 Onnx loader Args: model_path: path or url to onnx model file engine_cfg: configuration for the inference engine cfg: configuration dictionary **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.cfg = cfg def __call__( self, x: np.ndarray, ) -> np.ndarray: return self.run(x) def _mobilenet_v3( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> MobileNetV3: # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path _cfg = deepcopy(default_cfgs[arch]) return MobileNetV3(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def mobilenet_v3_small_crop_orientation( model_path: str = default_cfgs["mobilenet_v3_small_crop_orientation"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> MobileNetV3: """MobileNetV3-Small architecture as described in `"Searching for MobileNetV3", `_. >>> import numpy as np >>> from onnxtr.models import mobilenet_v3_small_crop_orientation >>> model = mobilenet_v3_small_crop_orientation() >>> input_tensor = np.random.rand((1, 3, 256, 256)) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the MobileNetV3 architecture Returns: MobileNetV3 """ return _mobilenet_v3("mobilenet_v3_small_crop_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs) def mobilenet_v3_small_page_orientation( model_path: str = default_cfgs["mobilenet_v3_small_page_orientation"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> MobileNetV3: """MobileNetV3-Small architecture as described in `"Searching for MobileNetV3", `_. >>> import numpy as np >>> from onnxtr.models import mobilenet_v3_small_page_orientation >>> model = mobilenet_v3_small_page_orientation() >>> input_tensor = np.random.rand((1, 3, 512, 512)) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the MobileNetV3 architecture Returns: MobileNetV3 """ return _mobilenet_v3("mobilenet_v3_small_page_orientation", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/classification/predictor/__init__.py ================================================ from .base import * ================================================ FILE: onnxtr/models/classification/predictor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from scipy.special import softmax from onnxtr.models.preprocessor import PreProcessor from onnxtr.utils.repr import NestedObject __all__ = ["OrientationPredictor"] class OrientationPredictor(NestedObject): """Implements an object able to detect the reading direction of a text box or a page. 4 possible orientations: 0, 90, 180, 270 (-90) degrees counter clockwise. Args: pre_processor: transform inputs for easier batched model inference model: core classification architecture (backbone + classification head) load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False """ _children_names: list[str] = ["pre_processor", "model"] def __init__( self, pre_processor: PreProcessor | None, model: Any | None, ) -> None: self.pre_processor = pre_processor if isinstance(pre_processor, PreProcessor) else None self.model = model def __call__( self, inputs: list[np.ndarray], ) -> list[list[int] | list[float]]: # Dimension check if any(input.ndim != 3 for input in inputs): raise ValueError("incorrect input shape: all inputs are expected to be multi-channel 2D images.") if self.model is None or self.pre_processor is None: # predictor is disabled return [[0] * len(inputs), [0] * len(inputs), [1.0] * len(inputs)] processed_batches = self.pre_processor(inputs) predicted_batches = [self.model(batch) for batch in processed_batches] # confidence probs = [np.max(softmax(batch, axis=1), axis=1) for batch in predicted_batches] # Postprocess predictions predicted_batches = [np.argmax(out_batch, axis=1) for out_batch in predicted_batches] class_idxs = [int(pred) for batch in predicted_batches for pred in batch] classes = [int(self.model.cfg["classes"][idx]) for idx in class_idxs] confs = [round(float(p), 2) for prob in probs for p in prob] return [class_idxs, classes, confs] ================================================ FILE: onnxtr/models/classification/zoo.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any from onnxtr.models.engine import EngineConfig from .. import classification from ..preprocessor import PreProcessor from .predictor import OrientationPredictor __all__ = ["crop_orientation_predictor", "page_orientation_predictor"] ORIENTATION_ARCHS: list[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"] def _orientation_predictor( arch: Any, model_type: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, disabled: bool = False, **kwargs: Any, ) -> OrientationPredictor: if disabled: # Case where the orientation predictor is disabled return OrientationPredictor(None, None) if isinstance(arch, str): if arch not in ORIENTATION_ARCHS: raise ValueError(f"unknown architecture '{arch}'") # Load directly classifier from backbone _model = classification.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg) else: if not isinstance(arch, classification.MobileNetV3): raise ValueError(f"unknown architecture: {type(arch)}") _model = arch kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) kwargs["batch_size"] = kwargs.get("batch_size", 512 if model_type == "crop" else 2) input_shape = _model.cfg["input_shape"][1:] predictor = OrientationPredictor( PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs), _model, ) return predictor def crop_orientation_predictor( arch: Any = "mobilenet_v3_small_crop_orientation", batch_size: int = 512, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> OrientationPredictor: """Crop orientation classification architecture. >>> import numpy as np >>> from onnxtr.models import crop_orientation_predictor >>> model = crop_orientation_predictor(arch='mobilenet_v3_small_crop_orientation') >>> input_crop = (255 * np.random.rand(256, 256, 3)).astype(np.uint8) >>> out = model([input_crop]) Args: arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation') batch_size: number of samples the model processes in parallel load_in_8_bit: load the 8-bit quantized version of the model engine_cfg: configuration of inference engine **kwargs: keyword arguments to be passed to the OrientationPredictor Returns: OrientationPredictor """ model_type = "crop" return _orientation_predictor( arch=arch, batch_size=batch_size, model_type=model_type, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg, **kwargs, ) def page_orientation_predictor( arch: Any = "mobilenet_v3_small_page_orientation", batch_size: int = 2, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> OrientationPredictor: """Page orientation classification architecture. >>> import numpy as np >>> from onnxtr.models import page_orientation_predictor >>> model = page_orientation_predictor(arch='mobilenet_v3_small_page_orientation') >>> input_page = (255 * np.random.rand(512, 512, 3)).astype(np.uint8) >>> out = model([input_page]) Args: arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation') batch_size: number of samples the model processes in parallel load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments to be passed to the OrientationPredictor Returns: OrientationPredictor """ model_type = "page" return _orientation_predictor( arch=arch, batch_size=batch_size, model_type=model_type, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg, **kwargs, ) ================================================ FILE: onnxtr/models/detection/__init__.py ================================================ from .models import * from .zoo import * ================================================ FILE: onnxtr/models/detection/_utils/__init__.py ================================================ from . base import * ================================================ FILE: onnxtr/models/detection/_utils/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import numpy as np __all__ = ["_remove_padding"] def _remove_padding( pages: list[np.ndarray], loc_preds: list[np.ndarray], preserve_aspect_ratio: bool, symmetric_pad: bool, assume_straight_pages: bool, ) -> list[np.ndarray]: """Remove padding from the localization predictions Args: pages: list of pages loc_preds: list of localization predictions preserve_aspect_ratio: whether the aspect ratio was preserved during padding symmetric_pad: whether the padding was symmetric assume_straight_pages: whether the pages are assumed to be straight Returns: list of unpaded localization predictions """ if preserve_aspect_ratio: # Rectify loc_preds to remove padding rectified_preds = [] for page, loc_pred in zip(pages, loc_preds): h, w = page.shape[0], page.shape[1] if h > w: # y unchanged, dilate x coord if symmetric_pad: if assume_straight_pages: loc_pred[:, [0, 2]] = (loc_pred[:, [0, 2]] - 0.5) * h / w + 0.5 else: loc_pred[:, :, 0] = (loc_pred[:, :, 0] - 0.5) * h / w + 0.5 else: if assume_straight_pages: loc_pred[:, [0, 2]] *= h / w else: loc_pred[:, :, 0] *= h / w elif w > h: # x unchanged, dilate y coord if symmetric_pad: if assume_straight_pages: loc_pred[:, [1, 3]] = (loc_pred[:, [1, 3]] - 0.5) * w / h + 0.5 else: loc_pred[:, :, 1] = (loc_pred[:, :, 1] - 0.5) * w / h + 0.5 else: if assume_straight_pages: loc_pred[:, [1, 3]] *= w / h else: loc_pred[:, :, 1] *= w / h rectified_preds.append(np.clip(loc_pred, 0, 1)) return rectified_preds return loc_preds ================================================ FILE: onnxtr/models/detection/core.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import cv2 import numpy as np from onnxtr.utils.repr import NestedObject __all__ = ["DetectionPostProcessor"] class DetectionPostProcessor(NestedObject): """Abstract class to postprocess the raw output of the model Args: box_thresh (float): minimal objectness score to consider a box bin_thresh (float): threshold to apply to segmentation raw heatmap assume straight_pages (bool): if True, fit straight boxes only """ def __init__(self, box_thresh: float = 0.5, bin_thresh: float = 0.5, assume_straight_pages: bool = True) -> None: self.box_thresh = box_thresh self.bin_thresh = bin_thresh self.assume_straight_pages = assume_straight_pages self._opening_kernel: np.ndarray = np.ones((3, 3), dtype=np.uint8) def extra_repr(self) -> str: return f"bin_thresh={self.bin_thresh}, box_thresh={self.box_thresh}" @staticmethod def box_score(pred: np.ndarray, points: np.ndarray, assume_straight_pages: bool = True) -> float: """Compute the confidence score for a polygon : mean of the p values on the polygon Args: pred (np.ndarray): p map returned by the model points: coordinates of the polygon assume_straight_pages: if True, fit straight boxes only Returns: polygon objectness """ h, w = pred.shape[:2] if assume_straight_pages: xmin = np.clip(np.floor(points[:, 0].min()).astype(np.int32), 0, w - 1) xmax = np.clip(np.ceil(points[:, 0].max()).astype(np.int32), 0, w - 1) ymin = np.clip(np.floor(points[:, 1].min()).astype(np.int32), 0, h - 1) ymax = np.clip(np.ceil(points[:, 1].max()).astype(np.int32), 0, h - 1) return pred[ymin : ymax + 1, xmin : xmax + 1].mean() else: mask: np.ndarray = np.zeros((h, w), np.int32) cv2.fillPoly(mask, [points.astype(np.int32)], 1.0) product = pred * mask return np.sum(product) / np.count_nonzero(product) def bitmap_to_boxes( self, pred: np.ndarray, bitmap: np.ndarray, ) -> np.ndarray: raise NotImplementedError def __call__( self, proba_map, ) -> list[list[np.ndarray]]: """Performs postprocessing for a list of model outputs Args: proba_map: probability map of shape (N, H, W, C) Returns: list of N class predictions (for each input sample), where each class predictions is a list of C tensors of shape (*, 5) or (*, 6) """ if proba_map.ndim != 4: raise AssertionError(f"arg `proba_map` is expected to be 4-dimensional, got {proba_map.ndim}.") # Erosion + dilation on the binary map bin_map = [ [ cv2.morphologyEx(bmap[..., idx], cv2.MORPH_OPEN, self._opening_kernel) for idx in range(proba_map.shape[-1]) ] for bmap in (proba_map >= self.bin_thresh).astype(np.uint8) ] return [ [self.bitmap_to_boxes(pmaps[..., idx], bmaps[idx]) for idx in range(proba_map.shape[-1])] for pmaps, bmaps in zip(proba_map, bin_map) ] ================================================ FILE: onnxtr/models/detection/models/__init__.py ================================================ from .fast import * from .differentiable_binarization import * from .linknet import * ================================================ FILE: onnxtr/models/detection/models/differentiable_binarization.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from scipy.special import expit from ...engine import Engine, EngineConfig from ..postprocessor.base import GeneralDetectionPostProcessor __all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"] default_cfgs: dict[str, dict[str, Any]] = { "db_resnet50": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet50-69ba0015.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet50_static_8_bit-09a6104f.onnx", }, "db_resnet34": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/db_resnet34-b4873198.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/db_resnet34_static_8_bit-027e2c7f.onnx", }, "db_mobilenet_v3_large": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large-4987e7bd.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.2.0/db_mobilenet_v3_large_static_8_bit-535a6f25.onnx", }, } class DBNet(Engine): """DBNet Onnx loader Args: model_path: path or url to onnx model file engine_cfg: configuration for the inference engine bin_thresh: threshold for binarization of the output feature map box_thresh: minimal objectness score to consider a box assume_straight_pages: if True, fit straight bounding boxes only cfg: the configuration dict of the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, engine_cfg: EngineConfig | None = None, bin_thresh: float = 0.3, box_thresh: float = 0.1, assume_straight_pages: bool = True, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.cfg = cfg self.assume_straight_pages = assume_straight_pages self.postprocessor = GeneralDetectionPostProcessor( assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh ) def __call__( self, x: np.ndarray, return_model_output: bool = False, **kwargs: Any, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} prob_map = expit(logits) if return_model_output: out["out_map"] = prob_map out["preds"] = self.postprocessor(prob_map) return out def _dbnet( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DBNet: # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return DBNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs) def db_resnet34( model_path: str = default_cfgs["db_resnet34"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a ResNet-34 backbone. >>> import numpy as np >>> from onnxtr.models import db_resnet34 >>> model = db_resnet34() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _dbnet("db_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs) def db_resnet50( model_path: str = default_cfgs["db_resnet50"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a ResNet-50 backbone. >>> import numpy as np >>> from onnxtr.models import db_resnet50 >>> model = db_resnet50() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _dbnet("db_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs) def db_mobilenet_v3_large( model_path: str = default_cfgs["db_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DBNet: """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" `_, using a MobileNet V3 Large backbone. >>> import numpy as np >>> from onnxtr.models import db_mobilenet_v3_large >>> model = db_mobilenet_v3_large() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _dbnet("db_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/detection/models/fast.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging from typing import Any import numpy as np from scipy.special import expit from ...engine import Engine, EngineConfig from ..postprocessor.base import GeneralDetectionPostProcessor __all__ = ["FAST", "fast_tiny", "fast_small", "fast_base"] default_cfgs: dict[str, dict[str, Any]] = { "fast_tiny": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_tiny-28867779.onnx", }, "fast_small": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_small-10428b70.onnx", }, "fast_base": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/rep_fast_base-1b89ebf9.onnx", }, } class FAST(Engine): """FAST Onnx loader Args: model_path: path or url to onnx model file engine_cfg: configuration for the inference engine bin_thresh: threshold for binarization of the output feature map box_thresh: minimal objectness score to consider a box assume_straight_pages: if True, fit straight bounding boxes only cfg: the configuration dict of the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, engine_cfg: EngineConfig | None = None, bin_thresh: float = 0.1, box_thresh: float = 0.1, assume_straight_pages: bool = True, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.cfg = cfg self.assume_straight_pages = assume_straight_pages self.postprocessor = GeneralDetectionPostProcessor( assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh ) def __call__( self, x: np.ndarray, return_model_output: bool = False, **kwargs: Any, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} prob_map = expit(logits) if return_model_output: out["out_map"] = prob_map out["preds"] = self.postprocessor(prob_map) return out def _fast( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> FAST: if load_in_8_bit: logging.warning("FAST models do not support 8-bit quantization yet. Loading full precision model...") # Build the model return FAST(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs) def fast_tiny( model_path: str = default_cfgs["fast_tiny"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> FAST: """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" `_, using a tiny TextNet backbone. >>> import numpy as np >>> from onnxtr.models import fast_tiny >>> model = fast_tiny() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _fast("fast_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs) def fast_small( model_path: str = default_cfgs["fast_small"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> FAST: """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" `_, using a small TextNet backbone. >>> import numpy as np >>> from onnxtr.models import fast_small >>> model = fast_small() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _fast("fast_small", model_path, load_in_8_bit, engine_cfg, **kwargs) def fast_base( model_path: str = default_cfgs["fast_base"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> FAST: """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" `_, using a base TextNet backbone. >>> import numpy as np >>> from onnxtr.models import fast_base >>> model = fast_base() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the DBNet architecture Returns: text detection architecture """ return _fast("fast_base", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/detection/models/linknet.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from scipy.special import expit from ...engine import Engine, EngineConfig from ..postprocessor.base import GeneralDetectionPostProcessor __all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"] default_cfgs: dict[str, dict[str, Any]] = { "linknet_resnet18": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet18-e0e0b9dc.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet18_static_8_bit-3b3a37dd.onnx", }, "linknet_resnet34": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet34-93e39a39.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet34_static_8_bit-2824329d.onnx", }, "linknet_resnet50": { "input_shape": (3, 1024, 1024), "mean": (0.798, 0.785, 0.772), "std": (0.264, 0.2749, 0.287), "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/linknet_resnet50-15d8c4ec.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/linknet_resnet50_static_8_bit-65d6b0b8.onnx", }, } class LinkNet(Engine): """LinkNet Onnx loader Args: model_path: path or url to onnx model file engine_cfg: configuration for the inference engine bin_thresh: threshold for binarization of the output feature map box_thresh: minimal objectness score to consider a box assume_straight_pages: if True, fit straight bounding boxes only cfg: the configuration dict of the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, engine_cfg: EngineConfig | None = None, bin_thresh: float = 0.1, box_thresh: float = 0.1, assume_straight_pages: bool = True, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.cfg = cfg self.assume_straight_pages = assume_straight_pages self.postprocessor = GeneralDetectionPostProcessor( assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh ) def __call__( self, x: np.ndarray, return_model_output: bool = False, **kwargs: Any, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} prob_map = expit(logits) if return_model_output: out["out_map"] = prob_map out["preds"] = self.postprocessor(prob_map) return out def _linknet( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> LinkNet: # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return LinkNet(model_path, cfg=default_cfgs[arch], engine_cfg=engine_cfg, **kwargs) def linknet_resnet18( model_path: str = default_cfgs["linknet_resnet18"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. >>> import numpy as np >>> from onnxtr.models import linknet_resnet18 >>> model = linknet_resnet18() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the LinkNet architecture Returns: text detection architecture """ return _linknet("linknet_resnet18", model_path, load_in_8_bit, engine_cfg, **kwargs) def linknet_resnet34( model_path: str = default_cfgs["linknet_resnet34"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. >>> import numpy as np >>> from onnxtr.models import linknet_resnet34 >>> model = linknet_resnet34() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the LinkNet architecture Returns: text detection architecture """ return _linknet("linknet_resnet34", model_path, load_in_8_bit, engine_cfg, **kwargs) def linknet_resnet50( model_path: str = default_cfgs["linknet_resnet50"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> LinkNet: """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" `_. >>> import numpy as np >>> from onnxtr.models import linknet_resnet50 >>> model = linknet_resnet50() >>> input_tensor = np.random.rand(1, 3, 1024, 1024) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the LinkNet architecture Returns: text detection architecture """ return _linknet("linknet_resnet50", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/detection/postprocessor/__init__.py ================================================ ================================================ FILE: onnxtr/models/detection/postprocessor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Credits: post-processing adapted from https://github.com/xuannianz/DifferentiableBinarization import cv2 import numpy as np import pyclipper from onnxtr.utils import order_points from ..core import DetectionPostProcessor __all__ = ["GeneralDetectionPostProcessor"] class GeneralDetectionPostProcessor(DetectionPostProcessor): """Implements a post processor for FAST model. Args: bin_thresh: threshold used to binzarized p_map at inference time box_thresh: minimal objectness score to consider a box assume_straight_pages: whether the inputs were expected to have horizontal text elements """ def __init__( self, bin_thresh: float = 0.1, box_thresh: float = 0.1, assume_straight_pages: bool = True, ) -> None: super().__init__(box_thresh, bin_thresh, assume_straight_pages) self.unclip_ratio = 1.5 def polygon_to_box( self, points: np.ndarray, ) -> np.ndarray: """Expand a polygon (points) by a factor unclip_ratio, and returns a polygon Args: points: The first parameter. Returns: a box in absolute coordinates (xmin, ymin, xmax, ymax) or (4, 2) array (quadrangle) """ if not self.assume_straight_pages: # Compute the rectangle polygon enclosing the raw polygon rect = cv2.minAreaRect(points) points = cv2.boxPoints(rect) # Add 1 pixel to correct cv2 approx area = (rect[1][0] + 1) * (1 + rect[1][1]) length = 2 * (rect[1][0] + rect[1][1]) + 2 else: area = cv2.contourArea(points) length = cv2.arcLength(points, closed=True) distance = area * self.unclip_ratio / length # compute distance to expand polygon offset = pyclipper.PyclipperOffset() offset.AddPath(points, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) _points = offset.Execute(distance) # Take biggest stack of points idx = 0 if len(_points) > 1: max_size = 0 for _idx, p in enumerate(_points): if len(p) > max_size: idx = _idx max_size = len(p) # We ensure that _points can be correctly casted to a ndarray _points = [_points[idx]] expanded_points: np.ndarray = np.asarray(_points) # expand polygon if len(expanded_points) < 1: return None # type: ignore[return-value] return ( cv2.boundingRect(expanded_points) # type: ignore[return-value] if self.assume_straight_pages else order_points(cv2.boxPoints(cv2.minAreaRect(expanded_points))) ) def bitmap_to_boxes( self, pred: np.ndarray, bitmap: np.ndarray, ) -> np.ndarray: """Compute boxes from a bitmap/pred_map: find connected components then filter boxes Args: pred: Pred map from differentiable linknet output bitmap: Bitmap map computed from pred (binarized) angle_tol: Comparison tolerance of the angle with the median angle across the page ratio_tol: Under this limit aspect ratio, we cannot resolve the direction of the crop Returns: np tensor boxes for the bitmap, each box is a 6-element list containing x, y, w, h, alpha, score for the box """ height, width = bitmap.shape[:2] boxes: list[np.ndarray | list[float]] = [] # get contours from connected components on the bitmap contours, _ = cv2.findContours(bitmap.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) for contour in contours: # Check whether smallest enclosing bounding box is not too small if np.any(contour[:, 0].max(axis=0) - contour[:, 0].min(axis=0) < 2): continue # Compute objectness if self.assume_straight_pages: x, y, w, h = cv2.boundingRect(contour) points: np.ndarray = np.array([[x, y], [x, y + h], [x + w, y + h], [x + w, y]]) score = self.box_score(pred, points, assume_straight_pages=True) else: score = self.box_score(pred, contour, assume_straight_pages=False) if score < self.box_thresh: # remove polygons with a weak objectness continue if self.assume_straight_pages: _box = self.polygon_to_box(points) else: _box = self.polygon_to_box(np.squeeze(contour)) if self.assume_straight_pages: # compute relative polygon to get rid of img shape x, y, w, h = _box xmin, ymin, xmax, ymax = x / width, y / height, (x + w) / width, (y + h) / height boxes.append([xmin, ymin, xmax, ymax, score]) else: # compute relative box to get rid of img shape _box[:, 0] /= width _box[:, 1] /= height # Add score to box as (0, score) boxes.append(np.vstack([_box, np.array([0.0, score])])) if not self.assume_straight_pages: return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5, 2), dtype=pred.dtype) else: return np.clip(np.asarray(boxes), 0, 1) if len(boxes) > 0 else np.zeros((0, 5), dtype=pred.dtype) ================================================ FILE: onnxtr/models/detection/predictor/__init__.py ================================================ from .base import * ================================================ FILE: onnxtr/models/detection/predictor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from onnxtr.models.detection._utils import _remove_padding from onnxtr.models.preprocessor import PreProcessor from onnxtr.utils.repr import NestedObject __all__ = ["DetectionPredictor"] class DetectionPredictor(NestedObject): """Implements an object able to localize text elements in a document Args: pre_processor: transform inputs for easier batched model inference model: core detection architecture """ _children_names: list[str] = ["pre_processor", "model"] def __init__( self, pre_processor: PreProcessor, model: Any, ) -> None: self.pre_processor = pre_processor self.model = model def __call__( self, pages: list[np.ndarray], return_maps: bool = False, **kwargs: Any, ) -> list[np.ndarray] | tuple[list[np.ndarray], list[np.ndarray]]: # Extract parameters from the preprocessor preserve_aspect_ratio = self.pre_processor.resize.preserve_aspect_ratio symmetric_pad = self.pre_processor.resize.symmetric_pad assume_straight_pages = self.model.assume_straight_pages # Dimension check if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") processed_batches = self.pre_processor(pages) predicted_batches = [ self.model(batch, return_preds=True, return_model_output=True, **kwargs) for batch in processed_batches ] # Remove padding from loc predictions preds = _remove_padding( pages, [pred[0] for batch in predicted_batches for pred in batch["preds"]], preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, assume_straight_pages=assume_straight_pages, ) if return_maps: seg_maps = [pred for batch in predicted_batches for pred in batch["out_map"]] return preds, seg_maps return preds ================================================ FILE: onnxtr/models/detection/zoo.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any from .. import detection from ..engine import EngineConfig from ..preprocessor import PreProcessor from .predictor import DetectionPredictor __all__ = ["detection_predictor"] ARCHS = [ "db_resnet34", "db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50", "fast_tiny", "fast_small", "fast_base", ] def _predictor( arch: Any, assume_straight_pages: bool = True, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DetectionPredictor: if isinstance(arch, str): if arch not in ARCHS: raise ValueError(f"unknown architecture '{arch}'") _model = detection.__dict__[arch]( assume_straight_pages=assume_straight_pages, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg ) else: if not isinstance(arch, (detection.DBNet, detection.LinkNet, detection.FAST)): raise ValueError(f"unknown architecture: {type(arch)}") _model = arch _model.assume_straight_pages = assume_straight_pages _model.postprocessor.assume_straight_pages = assume_straight_pages kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) kwargs["batch_size"] = kwargs.get("batch_size", 2) predictor = DetectionPredictor( PreProcessor(_model.cfg["input_shape"][1:], **kwargs), _model, ) return predictor def detection_predictor( arch: Any = "fast_base", assume_straight_pages: bool = True, preserve_aspect_ratio: bool = True, symmetric_pad: bool = True, batch_size: int = 2, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> DetectionPredictor: """Text detection architecture. >>> import numpy as np >>> from onnxtr.models import detection_predictor >>> model = detection_predictor(arch='db_resnet50') >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) >>> out = model([input_page]) Args: arch: name of the architecture or model itself to use (e.g. 'db_resnet50') assume_straight_pages: If True, fit straight boxes to the page preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before running the detection model on it symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right batch_size: number of samples the model processes in parallel load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: optional keyword arguments passed to the architecture Returns: Detection predictor """ return _predictor( arch=arch, assume_straight_pages=assume_straight_pages, preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, batch_size=batch_size, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg, **kwargs, ) ================================================ FILE: onnxtr/models/engine.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging import os from collections.abc import Callable from typing import Any, TypeAlias import numpy as np from onnxruntime import ( ExecutionMode, GraphOptimizationLevel, InferenceSession, RunOptions, SessionOptions, get_available_providers, get_device, ) from onnxruntime.capi._pybind_state import set_default_logger_severity set_default_logger_severity(int(os.getenv("ORT_LOG_SEVERITY_LEVEL", 4))) from onnxtr.utils.data import download_from_url from onnxtr.utils.geometry import shape_translate __all__ = ["EngineConfig", "RunOptionsProvider"] RunOptionsProvider: TypeAlias = Callable[[RunOptions], RunOptions] class EngineConfig: """Implements a configuration class for the engine of a model Args: providers: list of providers to use for inference ref.: https://onnxruntime.ai/docs/execution-providers/ session_options: configuration for the inference session ref.: https://onnxruntime.ai/docs/api/python/api_summary.html#sessionoptions """ def __init__( self, providers: list[tuple[str, dict[str, Any]]] | list[str] | None = None, session_options: SessionOptions | None = None, run_options_provider: RunOptionsProvider | None = None, ): self._providers = providers or self._init_providers() self._session_options = session_options or self._init_sess_opts() self.run_options_provider = run_options_provider def _init_providers(self) -> list[tuple[str, dict[str, Any]]]: providers: Any = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})] available_providers = get_available_providers() logging.info(f"Available providers: {available_providers}") if "CUDAExecutionProvider" in available_providers and get_device() == "GPU": # pragma: no cover providers.insert( 0, ( "CUDAExecutionProvider", { "device_id": 0, "arena_extend_strategy": "kNextPowerOfTwo", "cudnn_conv_algo_search": "DEFAULT", "do_copy_in_default_stream": True, }, ), ) elif "CoreMLExecutionProvider" in available_providers: # pragma: no cover providers.insert(0, ("CoreMLExecutionProvider", {})) return providers def _init_sess_opts(self) -> SessionOptions: session_options = SessionOptions() session_options.enable_cpu_mem_arena = True session_options.execution_mode = ExecutionMode.ORT_SEQUENTIAL session_options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL session_options.intra_op_num_threads = -1 session_options.inter_op_num_threads = -1 return session_options @property def providers(self) -> list[tuple[str, dict[str, Any]]] | list[str]: return self._providers @property def session_options(self) -> SessionOptions: return self._session_options def __repr__(self) -> str: return f"EngineConfig(providers={self.providers})" class Engine: """Implements an abstract class for the engine of a model Args: url: the url to use to download a model if needed engine_cfg: the configuration of the engine **kwargs: additional arguments to be passed to `download_from_url` """ def __init__(self, url: str, engine_cfg: EngineConfig | None = None, **kwargs: Any) -> None: engine_cfg = engine_cfg if isinstance(engine_cfg, EngineConfig) else EngineConfig() archive_path = download_from_url(url, cache_subdir="models", **kwargs) if "http" in url else url # NOTE: older onnxruntime versions require a string path for windows archive_path = rf"{archive_path}" # Store model path for each model self.model_path = archive_path self.session_options = engine_cfg.session_options self.providers = engine_cfg.providers self.run_options_provider = engine_cfg.run_options_provider self.runtime = InferenceSession(archive_path, providers=self.providers, sess_options=self.session_options) self.runtime_inputs = self.runtime.get_inputs()[0] self.tf_exported = int(self.runtime_inputs.shape[-1]) == 3 self.fixed_batch_size: int | str = self.runtime_inputs.shape[ 0 ] # mostly possible with tensorflow exported models self.output_name = [output.name for output in self.runtime.get_outputs()] def run(self, inputs: np.ndarray) -> np.ndarray: run_options = RunOptions() if self.run_options_provider is not None: run_options = self.run_options_provider(run_options) if self.tf_exported: inputs = shape_translate(inputs, format="BHWC") # sanity check else: inputs = shape_translate(inputs, format="BCHW") if isinstance(self.fixed_batch_size, int) and self.fixed_batch_size != 0: # dynamic batch size is a string inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape)) # combine the results logits = np.concatenate( [ self.runtime.run(self.output_name, {self.runtime_inputs.name: batch}, run_options=run_options)[0] for batch in inputs ], axis=0, ) else: logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs}, run_options=run_options)[0] return shape_translate(logits, format="BHWC") ================================================ FILE: onnxtr/models/factory/__init__.py ================================================ from .hub import * ================================================ FILE: onnxtr/models/factory/hub.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py import json import logging import shutil import subprocess import tempfile import textwrap from pathlib import Path from typing import Any from huggingface_hub import ( HfApi, get_token, hf_hub_download, login, ) from onnxtr import models from onnxtr.models.engine import EngineConfig __all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"] AVAILABLE_ARCHS = { "classification": models.classification.zoo.ORIENTATION_ARCHS, "detection": models.detection.zoo.ARCHS, "recognition": models.recognition.zoo.ARCHS, } def login_to_hub() -> None: # pragma: no cover """Login to huggingface hub""" access_token = get_token() if access_token is not None: logging.info("Huggingface Hub token found and valid") login(token=access_token) else: login() # check if git lfs is installed try: subprocess.call(["git", "lfs", "version"]) except FileNotFoundError: raise OSError( "Looks like you do not have git-lfs installed, please install. \ You can install from https://git-lfs.github.com/. \ Then run `git lfs install` (you only have to do this once)." ) def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None: """Save model and config to disk for pushing to huggingface hub Args: model: Onnx model to be saved save_dir: directory to save model and config arch: architecture name task: task name """ save_directory = Path(save_dir) shutil.copy2(model.model_path, save_directory / "model.onnx") config_path = save_directory / "config.json" # add model configuration model_config = model.cfg model_config["arch"] = arch model_config["task"] = task with config_path.open("w") as f: json.dump(model_config, f, indent=2, ensure_ascii=False) def push_to_hf_hub( model: Any, model_name: str, task: str, override: bool = False, **kwargs ) -> None: # pragma: no cover """Save model and its configuration on HF hub >>> from onnxtr.models import login_to_hub, push_to_hf_hub >>> from onnxtr.models.recognition import crnn_mobilenet_v3_small >>> login_to_hub() >>> model = crnn_mobilenet_v3_small() >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small') Args: model: Onnx model to be saved model_name: name of the model which is also the repository name task: task name override: whether to override the existing model / repo on HF hub **kwargs: keyword arguments for push_to_hf_hub """ run_config = kwargs.get("run_config", None) arch = kwargs.get("arch", None) if run_config is None and arch is None: raise ValueError("run_config or arch must be specified") if task not in ["classification", "detection", "recognition"]: raise ValueError("task must be one of classification, detection, recognition") # default readme readme = textwrap.dedent( f""" --- language: - en - fr license: apache-2.0 ---

**Optical Character Recognition made seamless & accessible to anyone, powered by Onnxruntime** ## Task: {task} https://github.com/felixdittrich92/OnnxTR ### Example usage: ```python >>> from onnxtr.io import DocumentFile >>> from onnxtr.models import ocr_predictor, from_hub >>> img = DocumentFile.from_images(['']) >>> # Load your model from the hub >>> model = from_hub('onnxtr/my-model') >>> # Pass it to the predictor >>> # If your model is a recognition model: >>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', >>> reco_arch=model) >>> # If your model is a detection model: >>> predictor = ocr_predictor(det_arch=model, >>> reco_arch='crnn_mobilenet_v3_small') >>> # Get your predictions >>> res = predictor(img) ``` """ ) # add run configuration to readme if available if run_config is not None: arch = run_config.arch readme += textwrap.dedent( f"""### Run Configuration \n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}""" ) if arch not in AVAILABLE_ARCHS[task]: raise ValueError( f"Architecture: {arch} for task: {task} not found.\ \nAvailable architectures: {AVAILABLE_ARCHS}" ) commit_message = f"Add {model_name} model" # Create repository api = HfApi() api.create_repo(model_name, token=get_token(), exist_ok=False) # Save model files to a temporary directory with tempfile.TemporaryDirectory() as tmp_dir: _save_model_and_config_for_hf_hub(model, tmp_dir, arch=arch, task=task) readme_path = Path(tmp_dir) / "README.md" readme_path.write_text(readme) # Upload all files to the hub api.upload_folder( folder_path=tmp_dir, repo_id=model_name, commit_message=commit_message, token=get_token(), ) def from_hub(repo_id: str, engine_cfg: EngineConfig | None = None, **kwargs: Any): """Instantiate & load a pretrained model from HF hub. >>> from onnxtr.models import from_hub >>> model = from_hub("onnxtr/my-model") Args: repo_id: HuggingFace model hub repo engine_cfg: configuration for the inference engine (optional) **kwargs: kwargs of `hf_hub_download` Returns: Model loaded with the checkpoint """ # Get the config with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f: cfg = json.load(f) model_path = hf_hub_download(repo_id, filename="model.onnx", **kwargs) arch = cfg["arch"] task = cfg["task"] cfg.pop("arch") cfg.pop("task") if task == "classification": model = models.classification.__dict__[arch](model_path, classes=cfg["classes"], engine_cfg=engine_cfg) elif task == "detection": model = models.detection.__dict__[arch](model_path, engine_cfg=engine_cfg) elif task == "recognition": model = models.recognition.__dict__[arch]( model_path, input_shape=cfg["input_shape"], vocab=cfg["vocab"], engine_cfg=engine_cfg ) # convert all values which are lists to tuples for key, value in cfg.items(): if isinstance(value, list): cfg[key] = tuple(value) # update model cfg model.cfg = cfg return model ================================================ FILE: onnxtr/models/predictor/__init__.py ================================================ from .predictor import * ================================================ FILE: onnxtr/models/predictor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from collections.abc import Callable from typing import Any import numpy as np from onnxtr.models.builder import DocumentBuilder from onnxtr.models.engine import EngineConfig from onnxtr.utils.geometry import extract_crops, extract_rcrops, remove_image_padding, rotate_image from .._utils import estimate_orientation, rectify_crops, rectify_loc_preds from ..classification import crop_orientation_predictor, page_orientation_predictor from ..classification.predictor import OrientationPredictor from ..detection.zoo import ARCHS as DETECTION_ARCHS from ..recognition.zoo import ARCHS as RECOGNITION_ARCHS __all__ = ["_OCRPredictor"] class _OCRPredictor: """Implements an object able to localize and identify text elements in a set of documents Args: assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages without rotated textual elements. straighten_pages: if True, estimates the page general orientation based on the median line orientation. Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped accordingly. Doing so will improve performances for documents with page-uniform rotations. preserve_aspect_ratio: if True, resize preserving the aspect ratio (with padding) symmetric_pad: if True and preserve_aspect_ratio is True, pas the image symmetrically. detect_orientation: if True, the estimated general page orientation will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False clf_engine_cfg: configuration of the orientation classification engine **kwargs: keyword args of `DocumentBuilder` """ crop_orientation_predictor: OrientationPredictor | None page_orientation_predictor: OrientationPredictor | None def __init__( self, assume_straight_pages: bool = True, straighten_pages: bool = False, preserve_aspect_ratio: bool = True, symmetric_pad: bool = True, detect_orientation: bool = False, load_in_8_bit: bool = False, clf_engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> None: self.assume_straight_pages = assume_straight_pages self.straighten_pages = straighten_pages self._page_orientation_disabled = kwargs.pop("disable_page_orientation", False) self._crop_orientation_disabled = kwargs.pop("disable_crop_orientation", False) self.crop_orientation_predictor = ( None if assume_straight_pages else crop_orientation_predictor( load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled ) ) self.page_orientation_predictor = ( page_orientation_predictor( load_in_8_bit=load_in_8_bit, engine_cfg=clf_engine_cfg, disabled=self._crop_orientation_disabled ) if detect_orientation or straighten_pages or not assume_straight_pages else None ) self.doc_builder = DocumentBuilder(**kwargs) self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad self.hooks: list[Callable] = [] def _general_page_orientations( self, pages: list[np.ndarray], ) -> list[tuple[int, float]]: _, classes, probs = zip(self.page_orientation_predictor(pages)) # type: ignore[misc] # Flatten to list of tuples with (value, confidence) page_orientations = [ (orientation, prob) for page_classes, page_probs in zip(classes, probs) for orientation, prob in zip(page_classes, page_probs) ] return page_orientations def _get_orientations( self, pages: list[np.ndarray], seg_maps: list[np.ndarray] ) -> tuple[list[tuple[int, float]], list[int]]: general_pages_orientations = self._general_page_orientations(pages) origin_page_orientations = [ estimate_orientation(seq_map, general_orientation) for seq_map, general_orientation in zip(seg_maps, general_pages_orientations) ] return general_pages_orientations, origin_page_orientations def _straighten_pages( self, pages: list[np.ndarray], seg_maps: list[np.ndarray], general_pages_orientations: list[tuple[int, float]] | None = None, origin_pages_orientations: list[int] | None = None, ) -> list[np.ndarray]: general_pages_orientations = ( general_pages_orientations if general_pages_orientations else self._general_page_orientations(pages) ) origin_pages_orientations = ( origin_pages_orientations if origin_pages_orientations else [ estimate_orientation(seq_map, general_orientation) for seq_map, general_orientation in zip(seg_maps, general_pages_orientations) ] ) return [ # expand if height and width are not equal, afterwards remove padding remove_image_padding(rotate_image(page, angle, expand=page.shape[0] != page.shape[1])) for page, angle in zip(pages, origin_pages_orientations) ] @staticmethod def _generate_crops( pages: list[np.ndarray], loc_preds: list[np.ndarray], channels_last: bool, assume_straight_pages: bool = False, assume_horizontal: bool = False, ) -> list[list[np.ndarray]]: if assume_straight_pages: crops = [ extract_crops(page, _boxes[:, :4], channels_last=channels_last) for page, _boxes in zip(pages, loc_preds) ] else: crops = [ extract_rcrops(page, _boxes[:, :4], channels_last=channels_last, assume_horizontal=assume_horizontal) for page, _boxes in zip(pages, loc_preds) ] return crops @staticmethod def _prepare_crops( pages: list[np.ndarray], loc_preds: list[np.ndarray], channels_last: bool, assume_straight_pages: bool = False, assume_horizontal: bool = False, ) -> tuple[list[list[np.ndarray]], list[np.ndarray]]: crops = _OCRPredictor._generate_crops(pages, loc_preds, channels_last, assume_straight_pages, assume_horizontal) # Avoid sending zero-sized crops is_kept = [[all(s > 0 for s in crop.shape) for crop in page_crops] for page_crops in crops] crops = [ [crop for crop, _kept in zip(page_crops, page_kept) if _kept] for page_crops, page_kept in zip(crops, is_kept) ] loc_preds = [_boxes[_kept] for _boxes, _kept in zip(loc_preds, is_kept)] return crops, loc_preds def _rectify_crops( self, crops: list[list[np.ndarray]], loc_preds: list[np.ndarray], ) -> tuple[list[list[np.ndarray]], list[np.ndarray], list[tuple[int, float]]]: # Work at a page level orientations, classes, probs = zip(*[self.crop_orientation_predictor(page_crops) for page_crops in crops]) # type: ignore[misc] rect_crops = [rectify_crops(page_crops, orientation) for page_crops, orientation in zip(crops, orientations)] rect_loc_preds = [ rectify_loc_preds(page_loc_preds, orientation) if len(page_loc_preds) > 0 else page_loc_preds for page_loc_preds, orientation in zip(loc_preds, orientations) ] # Flatten to list of tuples with (value, confidence) crop_orientations = [ (orientation, prob) for page_classes, page_probs in zip(classes, probs) for orientation, prob in zip(page_classes, page_probs) ] return rect_crops, rect_loc_preds, crop_orientations # type: ignore[return-value] @staticmethod def _process_predictions( loc_preds: list[np.ndarray], word_preds: list[tuple[str, float]], crop_orientations: list[dict[str, Any]], ) -> tuple[list[np.ndarray], list[list[tuple[str, float]]], list[list[dict[str, Any]]]]: text_preds = [] crop_orientation_preds = [] if len(loc_preds) > 0: # Text & crop orientation predictions at page level _idx = 0 for page_boxes in loc_preds: text_preds.append(word_preds[_idx : _idx + page_boxes.shape[0]]) crop_orientation_preds.append(crop_orientations[_idx : _idx + page_boxes.shape[0]]) _idx += page_boxes.shape[0] return loc_preds, text_preds, crop_orientation_preds def add_hook(self, hook: Callable) -> None: """Add a hook to the predictor Args: hook: a callable that takes as input the `loc_preds` and returns the modified `loc_preds` """ self.hooks.append(hook) def list_archs(self) -> dict[str, list[str]]: return {"detection_archs": DETECTION_ARCHS, "recognition_archs": RECOGNITION_ARCHS} ================================================ FILE: onnxtr/models/predictor/predictor.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any import numpy as np from onnxtr.io.elements import Document from onnxtr.models._utils import get_language from onnxtr.models.detection.predictor import DetectionPredictor from onnxtr.models.engine import EngineConfig from onnxtr.models.recognition.predictor import RecognitionPredictor from onnxtr.utils.geometry import detach_scores from onnxtr.utils.repr import NestedObject from .base import _OCRPredictor __all__ = ["OCRPredictor"] class OCRPredictor(NestedObject, _OCRPredictor): """Implements an object able to localize and identify text elements in a set of documents Args: det_predictor: detection module reco_predictor: recognition module assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages without rotated textual elements. straighten_pages: if True, estimates the page general orientation based on the median line orientation. Then, rotates page before passing it to the deep learning modules. The final predictions will be remapped accordingly. Doing so will improve performances for documents with page-uniform rotations. detect_orientation: if True, the estimated general page orientation will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. clf_engine_cfg: configuration of the orientation classification engine **kwargs: keyword args of `DocumentBuilder` """ _children_names = ["det_predictor", "reco_predictor", "doc_builder"] def __init__( self, det_predictor: DetectionPredictor, reco_predictor: RecognitionPredictor, assume_straight_pages: bool = True, straighten_pages: bool = False, preserve_aspect_ratio: bool = True, symmetric_pad: bool = True, detect_orientation: bool = False, detect_language: bool = False, clf_engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> None: self.det_predictor = det_predictor self.reco_predictor = reco_predictor _OCRPredictor.__init__( self, assume_straight_pages, straighten_pages, preserve_aspect_ratio, symmetric_pad, detect_orientation, clf_engine_cfg=clf_engine_cfg, **kwargs, ) self.detect_orientation = detect_orientation self.detect_language = detect_language def __call__( self, pages: list[np.ndarray], **kwargs: Any, ) -> Document: # Dimension check if any(page.ndim != 3 for page in pages): raise ValueError("incorrect input shape: all pages are expected to be multi-channel 2D images.") origin_page_shapes = [page.shape[:2] for page in pages] # Localize text elements loc_preds, out_maps = self.det_predictor(pages, return_maps=True, **kwargs) # Detect document rotation and rotate pages seg_maps = [ np.where(out_map > getattr(self.det_predictor.model.postprocessor, "bin_thresh"), 255, 0).astype(np.uint8) for out_map in out_maps ] if self.detect_orientation: general_pages_orientations, origin_pages_orientations = self._get_orientations(pages, seg_maps) orientations = [ {"value": orientation_page, "confidence": None} for orientation_page in origin_pages_orientations ] else: orientations = None general_pages_orientations = None origin_pages_orientations = None if self.straighten_pages: pages = self._straighten_pages(pages, seg_maps, general_pages_orientations, origin_pages_orientations) # update page shapes after straightening origin_page_shapes = [page.shape[:2] for page in pages] # forward again to get predictions on straight pages loc_preds = self.det_predictor(pages, **kwargs) # type: ignore[assignment] # Detach objectness scores from loc_preds loc_preds, objectness_scores = detach_scores(loc_preds) # type: ignore[arg-type] # Apply hooks to loc_preds if any for hook in self.hooks: loc_preds = hook(loc_preds) # Crop images crops, loc_preds = self._prepare_crops( pages, loc_preds, channels_last=True, assume_straight_pages=self.assume_straight_pages, assume_horizontal=self._page_orientation_disabled, ) # Rectify crop orientation and get crop orientation predictions crop_orientations: Any = [] if not self.assume_straight_pages: crops, loc_preds, _crop_orientations = self._rectify_crops(crops, loc_preds) crop_orientations = [ {"value": orientation[0], "confidence": orientation[1]} for orientation in _crop_orientations ] # Identify character sequences word_preds = self.reco_predictor([crop for page_crops in crops for crop in page_crops], **kwargs) if not crop_orientations: crop_orientations = [{"value": 0, "confidence": None} for _ in word_preds] boxes, text_preds, crop_orientations = self._process_predictions(loc_preds, word_preds, crop_orientations) if self.detect_language: languages = [get_language(" ".join([item[0] for item in text_pred])) for text_pred in text_preds] languages_dict = [{"value": lang[0], "confidence": lang[1]} for lang in languages] else: languages_dict = None out = self.doc_builder( pages, boxes, objectness_scores, text_preds, origin_page_shapes, crop_orientations, orientations, languages_dict, ) return out ================================================ FILE: onnxtr/models/preprocessor/__init__.py ================================================ from .base import * ================================================ FILE: onnxtr/models/preprocessor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import math from typing import Any import numpy as np from onnxtr.transforms import Normalize, Resize from onnxtr.utils.geometry import shape_translate from onnxtr.utils.multithreading import multithread_exec from onnxtr.utils.repr import NestedObject __all__ = ["PreProcessor"] class PreProcessor(NestedObject): """Implements an abstract preprocessor object which performs casting, resizing, batching and normalization. Args: output_size: expected size of each page in format (H, W) batch_size: the size of page batches mean: mean value of the training distribution by channel std: standard deviation of the training distribution by channel **kwargs: additional arguments for the resizing operation """ _children_names: list[str] = ["resize", "normalize"] def __init__( self, output_size: tuple[int, int], batch_size: int, mean: tuple[float, float, float] = (0.5, 0.5, 0.5), std: tuple[float, float, float] = (1.0, 1.0, 1.0), **kwargs: Any, ) -> None: self.batch_size = batch_size self.resize = Resize(output_size, **kwargs) self.normalize = Normalize(mean, std) def batch_inputs(self, samples: list[np.ndarray]) -> list[np.ndarray]: """Gather samples into batches for inference purposes Args: samples: list of samples (tf.Tensor) Returns: list of batched samples """ num_batches = int(math.ceil(len(samples) / self.batch_size)) batches = [ np.stack(samples[idx * self.batch_size : min((idx + 1) * self.batch_size, len(samples))], axis=0) for idx in range(int(num_batches)) ] return batches def sample_transforms(self, x: np.ndarray) -> np.ndarray: if x.ndim != 3: raise AssertionError("expected list of 3D Tensors") if isinstance(x, np.ndarray): if x.dtype not in (np.uint8, np.float32): raise TypeError("unsupported data type for numpy.ndarray") x = shape_translate(x, "HWC") # Resizing x = self.resize(x) # Data type & 255 division if x.dtype == np.uint8: x = x.astype(np.float32) / 255.0 return x def __call__(self, x: np.ndarray | list[np.ndarray]) -> list[np.ndarray]: """Prepare document data for model forwarding Args: x: list of images (np.array) or tensors (already resized and batched) Returns: list of page batches """ # Input type check if isinstance(x, np.ndarray): if x.ndim != 4: raise AssertionError("expected 4D Tensor") if isinstance(x, np.ndarray): if x.dtype not in (np.uint8, np.float32): raise TypeError("unsupported data type for numpy.ndarray") x = shape_translate(x, "BHWC") # Resizing if (x.shape[1], x.shape[2]) != self.resize.output_size: x = np.array([self.resize(sample) for sample in x]) # Data type & 255 division if x.dtype == np.uint8: x = x.astype(np.float32) / 255.0 batches = [x] elif isinstance(x, list) and all(isinstance(sample, np.ndarray) for sample in x): # Sample transform (to tensor, resize) samples = list(multithread_exec(self.sample_transforms, x)) # Batching batches = self.batch_inputs(samples) else: raise TypeError(f"invalid input type: {type(x)}") # Batch transforms (normalize) batches = list(multithread_exec(self.normalize, batches)) return batches ================================================ FILE: onnxtr/models/recognition/__init__.py ================================================ from .models import * from .zoo import * ================================================ FILE: onnxtr/models/recognition/core.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from onnxtr.utils.repr import NestedObject __all__ = ["RecognitionPostProcessor"] class RecognitionPostProcessor(NestedObject): """Abstract class to postprocess the raw output of the model Args: vocab: string containing the ordered sequence of supported characters """ def __init__( self, vocab: str, ) -> None: self.vocab = vocab self._embedding = list(self.vocab) + [""] def extra_repr(self) -> str: return f"vocab_size={len(self.vocab)}" ================================================ FILE: onnxtr/models/recognition/models/__init__.py ================================================ from .crnn import * from .sar import * from .master import * from .vitstr import * from .parseq import * from .viptr import * ================================================ FILE: onnxtr/models/recognition/models/crnn.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from itertools import groupby from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"] default_cfgs: dict[str, dict[str, Any]] = { "crnn_vgg16_bn": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn-743599aa.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.7.1/crnn_vgg16_bn_static_8_bit-df1b594d.onnx", }, "crnn_mobilenet_v3_small": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_small_static_8_bit-4949006f.onnx", }, "crnn_mobilenet_v3_large": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_large-d42e8185.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/crnn_mobilenet_v3_large_static_8_bit-459e856d.onnx", }, } class CRNNPostProcessor(RecognitionPostProcessor): """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding Args: vocab: string containing the ordered sequence of supported characters """ def __init__(self, vocab): self.vocab = vocab def decode_sequence(self, sequence, vocab): return "".join([vocab[int(char)] for char in sequence]) def ctc_best_path( self, logits, vocab, blank=0, ): """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from `_. Args: logits: model output, shape: N x T x C vocab: vocabulary to use blank: index of blank label Returns: A list of tuples: (word, confidence) """ # Gather the most confident characters, and assign the smallest conf among those to the sequence prob probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1) # collapse best path (using itertools.groupby), map to chars, join char list to string words = [ self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab) for seq in np.argmax(logits, axis=-1) ] return list(zip(words, probs.astype(float).tolist())) def __call__(self, logits): """Performs decoding of raw output with CTC and decoding of CTC predictions with label_to_idx mapping dictionnary Args: logits: raw output of the model, shape (N, C + 1, seq_len) Returns: A tuple of 2 lists: a list of str (words) and a list of float (probs) """ # Decode CTC return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab)) class CRNN(Engine): """CRNN Onnx loader Args: model_path: path or url to onnx model file vocab: vocabulary used for encoding engine_cfg: configuration for the inference engine cfg: configuration dictionary **kwargs: additional arguments to be passed to `Engine` """ _children_names: list[str] = ["postprocessor"] def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = CRNNPostProcessor(self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits # Post-process out["preds"] = self.postprocessor(logits) return out def _crnn( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> CRNN: kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"]) _cfg = deepcopy(default_cfgs[arch]) _cfg["vocab"] = kwargs["vocab"] _cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"]) # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return CRNN(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def crnn_vgg16_bn( model_path: str = default_cfgs["crnn_vgg16_bn"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> CRNN: """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import crnn_vgg16_bn >>> model = crnn_vgg16_bn() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the CRNN architecture Returns: text recognition architecture """ return _crnn("crnn_vgg16_bn", model_path, load_in_8_bit, engine_cfg, **kwargs) def crnn_mobilenet_v3_small( model_path: str = default_cfgs["crnn_mobilenet_v3_small"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> CRNN: """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import crnn_mobilenet_v3_small >>> model = crnn_mobilenet_v3_small() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the CRNN architecture Returns: text recognition architecture """ return _crnn("crnn_mobilenet_v3_small", model_path, load_in_8_bit, engine_cfg, **kwargs) def crnn_mobilenet_v3_large( model_path: str = default_cfgs["crnn_mobilenet_v3_large"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> CRNN: """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import crnn_mobilenet_v3_large >>> model = crnn_mobilenet_v3_large() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the CRNN architecture Returns: text recognition architecture """ return _crnn("crnn_mobilenet_v3_large", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/models/master.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["MASTER", "master"] default_cfgs: dict[str, dict[str, Any]] = { "master": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/master-b1287fcd.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/master_dynamic_8_bit-d8bd8206.onnx", }, } class MASTER(Engine): """MASTER Onnx loader Args: model_path: path or url to onnx model file vocab: vocabulary, (without EOS, SOS, PAD) engine_cfg: configuration for the inference engine cfg: dictionary containing information about the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = MASTERPostProcessor(vocab=self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: """Call function Args: x: images return_model_output: if True, return logits Returns: A dictionnary containing eventually logits and predictions. """ logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits out["preds"] = self.postprocessor(logits) return out class MASTERPostProcessor(RecognitionPostProcessor): """Post-processor for the MASTER model Args: vocab: string containing the ordered sequence of supported characters """ def __init__( self, vocab: str, ) -> None: super().__init__(vocab) self._embedding = list(vocab) + [""] + [""] + [""] def __call__(self, logits: np.ndarray) -> list[tuple[str, float]]: # compute pred with argmax for attention models out_idxs = np.argmax(logits, axis=-1) # N x L probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1) # Take the minimum confidence of the sequence probs = np.min(probs, axis=1) word_values = [ "".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs ] return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist())) def _master( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> MASTER: # Patch the config _cfg = deepcopy(default_cfgs[arch]) _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"]) _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"]) kwargs["vocab"] = _cfg["vocab"] # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path return MASTER(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def master( model_path: str = default_cfgs["master"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> MASTER: """MASTER as described in paper: `_. >>> import numpy as np >>> from onnxtr.models import master >>> model = master() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keywoard arguments passed to the MASTER architecture Returns: text recognition architecture """ return _master("master", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/models/parseq.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["PARSeq", "parseq"] default_cfgs: dict[str, dict[str, Any]] = { "parseq": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/parseq-00b40714.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/parseq_dynamic_8_bit-5b04d9f7.onnx", }, } class PARSeq(Engine): """PARSeq Onnx loader Args: model_path: path to onnx model file vocab: vocabulary used for encoding engine_cfg: configuration for the inference engine cfg: dictionary containing information about the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = PARSeqPostProcessor(vocab=self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits out["preds"] = self.postprocessor(logits) return out class PARSeqPostProcessor(RecognitionPostProcessor): """Post processor for PARSeq architecture Args: vocab: string containing the ordered sequence of supported characters """ def __init__( self, vocab: str, ) -> None: super().__init__(vocab) self._embedding = list(vocab) + ["", "", ""] def __call__(self, logits): # compute pred with argmax for attention models out_idxs = np.argmax(logits, axis=-1) preds_prob = softmax(logits, axis=-1).max(axis=-1) word_values = [ "".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs ] # compute probabilties for each word up to the EOS token probs = [ preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0 for i, word in enumerate(word_values) ] return list(zip(word_values, probs)) def _parseq( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> PARSeq: # Patch the config _cfg = deepcopy(default_cfgs[arch]) _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"]) _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"]) kwargs["vocab"] = _cfg["vocab"] # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return PARSeq(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def parseq( model_path: str = default_cfgs["parseq"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> PARSeq: """PARSeq architecture from `"Scene Text Recognition with Permuted Autoregressive Sequence Models" `_. >>> import numpy as np >>> from onnxtr.models import parseq >>> model = parseq() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the PARSeq architecture Returns: text recognition architecture """ return _parseq("parseq", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/models/sar.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["SAR", "sar_resnet31"] default_cfgs: dict[str, dict[str, Any]] = { "sar_resnet31": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/sar_resnet31-395f8005.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/sar_resnet31_static_8_bit-c07316bc.onnx", }, } class SAR(Engine): """SAR Onnx loader Args: model_path: path to onnx model file vocab: vocabulary used for encoding engine_cfg: configuration for the inference engine cfg: dictionary containing information about the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = SARPostProcessor(self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits out["preds"] = self.postprocessor(logits) return out class SARPostProcessor(RecognitionPostProcessor): """Post processor for SAR architectures Args: embedding: string containing the ordered sequence of supported characters """ def __init__( self, vocab: str, ) -> None: super().__init__(vocab) self._embedding = list(self.vocab) + [""] def __call__(self, logits): # compute pred with argmax for attention models out_idxs = np.argmax(logits, axis=-1) # N x L probs = np.take_along_axis(softmax(logits, axis=-1), out_idxs[..., None], axis=-1).squeeze(-1) # Take the minimum confidence of the sequence probs = np.min(probs, axis=1) word_values = [ "".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs ] return list(zip(word_values, np.clip(probs, 0, 1).astype(float).tolist())) def _sar( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> SAR: # Patch the config _cfg = deepcopy(default_cfgs[arch]) _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"]) _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"]) kwargs["vocab"] = _cfg["vocab"] # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return SAR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def sar_resnet31( model_path: str = default_cfgs["sar_resnet31"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> SAR: """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import sar_resnet31 >>> model = sar_resnet31() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the SAR architecture Returns: text recognition architecture """ return _sar("sar_resnet31", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/models/viptr.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging from copy import deepcopy from itertools import groupby from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["VIPTR", "viptr_tiny"] default_cfgs: dict[str, dict[str, Any]] = { "viptr_tiny": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.6.3/viptr_tiny-499b8015.onnx", }, } class VIPTRPostProcessor(RecognitionPostProcessor): """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding Args: vocab: string containing the ordered sequence of supported characters """ def __init__(self, vocab): self.vocab = vocab def decode_sequence(self, sequence, vocab): return "".join([vocab[int(char)] for char in sequence]) def ctc_best_path( self, logits, vocab, blank=0, ): """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from `_. Args: logits: model output, shape: N x T x C vocab: vocabulary to use blank: index of blank label Returns: A list of tuples: (word, confidence) """ # Gather the most confident characters, and assign the smallest conf among those to the sequence prob probs = softmax(logits, axis=-1).max(axis=-1).min(axis=1) # collapse best path (using itertools.groupby), map to chars, join char list to string words = [ self.decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab) for seq in np.argmax(logits, axis=-1) ] return list(zip(words, probs.astype(float).tolist())) def __call__(self, logits): """Performs decoding of raw output with CTC and decoding of CTC predictions with label_to_idx mapping dictionnary Args: logits: raw output of the model, shape (N, C + 1, seq_len) Returns: A tuple of 2 lists: a list of str (words) and a list of float (probs) """ # Decode CTC return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab)) class VIPTR(Engine): """VIPTR Onnx loader Args: model_path: path or url to onnx model file vocab: vocabulary used for encoding engine_cfg: configuration for the inference engine cfg: configuration dictionary **kwargs: additional arguments to be passed to `Engine` """ _children_names: list[str] = ["postprocessor"] def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = VIPTRPostProcessor(self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits # Post-process out["preds"] = self.postprocessor(logits) return out def _viptr( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> VIPTR: if load_in_8_bit: logging.warning("VIPTR models do not support 8-bit quantization yet. Loading full precision model...") kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"]) _cfg = deepcopy(default_cfgs[arch]) _cfg["vocab"] = kwargs["vocab"] _cfg["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"]) # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return VIPTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def viptr_tiny( model_path: str = default_cfgs["viptr_tiny"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> VIPTR: """VIPTR as described in `"A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import viptr_tiny >>> model = viptr_tiny() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the VIPTR architecture Returns: text recognition architecture """ return _viptr("viptr_tiny", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/models/vitstr.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from typing import Any import numpy as np from scipy.special import softmax from onnxtr.utils import VOCABS from ...engine import Engine, EngineConfig from ..core import RecognitionPostProcessor __all__ = ["ViTSTR", "vitstr_small", "vitstr_base"] default_cfgs: dict[str, dict[str, Any]] = { "vitstr_small": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_small-3ff9c500.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_small_dynamic_8_bit-bec6c796.onnx", }, "vitstr_base": { "mean": (0.694, 0.695, 0.693), "std": (0.299, 0.296, 0.301), "input_shape": (3, 32, 128), "vocab": VOCABS["french"], "url": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/vitstr_base-ff62f5be.onnx", "url_8_bit": "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.1.2/vitstr_base_dynamic_8_bit-976c7cd6.onnx", }, } class ViTSTR(Engine): """ViTSTR Onnx loader Args: model_path: path to onnx model file vocab: vocabulary used for encoding engine_cfg: configuration for the inference engine cfg: dictionary containing information about the model **kwargs: additional arguments to be passed to `Engine` """ def __init__( self, model_path: str, vocab: str, engine_cfg: EngineConfig | None = None, cfg: dict[str, Any] | None = None, **kwargs: Any, ) -> None: super().__init__(url=model_path, engine_cfg=engine_cfg, **kwargs) self.vocab = vocab self.cfg = cfg self.postprocessor = ViTSTRPostProcessor(vocab=self.vocab) def __call__( self, x: np.ndarray, return_model_output: bool = False, ) -> dict[str, Any]: logits = self.run(x) out: dict[str, Any] = {} if return_model_output: out["out_map"] = logits out["preds"] = self.postprocessor(logits) return out class ViTSTRPostProcessor(RecognitionPostProcessor): """Post processor for ViTSTR architecture Args: vocab: string containing the ordered sequence of supported characters """ def __init__( self, vocab: str, ) -> None: super().__init__(vocab) self._embedding = list(vocab) + ["", ""] def __call__(self, logits): # compute pred with argmax for attention models out_idxs = np.argmax(logits, axis=-1) preds_prob = softmax(logits, axis=-1).max(axis=-1) word_values = [ "".join(self._embedding[idx] for idx in encoded_seq).split("")[0] for encoded_seq in out_idxs ] # compute probabilties for each word up to the EOS token probs = [ preds_prob[i, : len(word)].clip(0, 1).mean().astype(float) if word else 0.0 for i, word in enumerate(word_values) ] return list(zip(word_values, probs)) def _vitstr( arch: str, model_path: str, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> ViTSTR: # Patch the config _cfg = deepcopy(default_cfgs[arch]) _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"]) _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"]) kwargs["vocab"] = _cfg["vocab"] # Patch the url model_path = default_cfgs[arch]["url_8_bit"] if load_in_8_bit and "http" in model_path else model_path # Build the model return ViTSTR(model_path, cfg=_cfg, engine_cfg=engine_cfg, **kwargs) def vitstr_small( model_path: str = default_cfgs["vitstr_small"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> ViTSTR: """ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import vitstr_small >>> model = vitstr_small() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the ViTSTR architecture Returns: text recognition architecture """ return _vitstr("vitstr_small", model_path, load_in_8_bit, engine_cfg, **kwargs) def vitstr_base( model_path: str = default_cfgs["vitstr_base"]["url"], load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> ViTSTR: """ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition" `_. >>> import numpy as np >>> from onnxtr.models import vitstr_base >>> model = vitstr_base() >>> input_tensor = np.random.rand(1, 3, 32, 128) >>> out = model(input_tensor) Args: model_path: path to onnx model file, defaults to url in default_cfgs load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration for the inference engine **kwargs: keyword arguments of the ViTSTR architecture Returns: text recognition architecture """ return _vitstr("vitstr_base", model_path, load_in_8_bit, engine_cfg, **kwargs) ================================================ FILE: onnxtr/models/recognition/predictor/__init__.py ================================================ from .base import * ================================================ FILE: onnxtr/models/recognition/predictor/_utils.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import math import numpy as np from ..utils import merge_multi_strings __all__ = ["split_crops", "remap_preds"] def split_crops( crops: list[np.ndarray], max_ratio: float, target_ratio: int, split_overlap_ratio: float, channels_last: bool = True, ) -> tuple[list[np.ndarray], list[int | tuple[int, int, float]], bool]: """ Split crops horizontally if they exceed a given aspect ratio. Args: crops: List of image crops (H, W, C) if channels_last else (C, H, W). max_ratio: Aspect ratio threshold above which crops are split. target_ratio: Target aspect ratio after splitting (e.g., 4 for 128x32). split_overlap_ratio: Desired overlap between splits (as a fraction of split width). channels_last: Whether the crops are in channels-last format. Returns: A tuple containing: - The new list of crops (possibly with splits), - A mapping indicating how to reassemble predictions, - A boolean indicating whether remapping is required. """ if split_overlap_ratio <= 0.0 or split_overlap_ratio >= 1.0: raise ValueError(f"Valid range for split_overlap_ratio is (0.0, 1.0), but is: {split_overlap_ratio}") remap_required = False new_crops: list[np.ndarray] = [] crop_map: list[int | tuple[int, int, float]] = [] for crop in crops: h, w = crop.shape[:2] if channels_last else crop.shape[-2:] aspect_ratio = w / h if aspect_ratio > max_ratio: split_width = max(1, math.ceil(h * target_ratio)) overlap_width = max(0, math.floor(split_width * split_overlap_ratio)) splits, last_overlap = _split_horizontally(crop, split_width, overlap_width, channels_last) # Remove any empty splits splits = [s for s in splits if all(dim > 0 for dim in s.shape)] if splits: crop_map.append((len(new_crops), len(new_crops) + len(splits), last_overlap)) new_crops.extend(splits) remap_required = True else: # Fallback: treat it as a single crop crop_map.append(len(new_crops)) new_crops.append(crop) else: crop_map.append(len(new_crops)) new_crops.append(crop) return new_crops, crop_map, remap_required def _split_horizontally( image: np.ndarray, split_width: int, overlap_width: int, channels_last: bool ) -> tuple[list[np.ndarray], float]: """ Horizontally split a single image with overlapping regions. Args: image: The image to split (H, W, C) if channels_last else (C, H, W). split_width: Width of each split. overlap_width: Width of the overlapping region. channels_last: Whether the image is in channels-last format. Returns: - A list of horizontal image slices. - The actual overlap ratio of the last split. """ image_width = image.shape[1] if channels_last else image.shape[-1] if image_width <= split_width: return [image], 0.0 # Compute start columns for each split step = split_width - overlap_width starts = list(range(0, image_width - split_width + 1, step)) # Ensure the last patch reaches the end of the image if starts[-1] + split_width < image_width: starts.append(image_width - split_width) splits = [] for start_col in starts: end_col = start_col + split_width if channels_last: split = image[:, start_col:end_col, :] else: split = image[:, :, start_col:end_col] splits.append(split) # Calculate the last overlap ratio, if only one split no overlap last_overlap = 0 if len(starts) > 1: last_overlap = (starts[-2] + split_width) - starts[-1] last_overlap_ratio = last_overlap / split_width if split_width else 0.0 return splits, last_overlap_ratio def remap_preds( preds: list[tuple[str, float]], crop_map: list[int | tuple[int, int, float]], overlap_ratio: float, ) -> list[tuple[str, float]]: """ Reconstruct predictions from possibly split crops. Args: preds: List of (text, confidence) tuples from each crop. crop_map: Map returned by `split_crops`. overlap_ratio: Overlap ratio used during splitting. Returns: List of merged (text, confidence) tuples corresponding to original crops. """ remapped = [] for item in crop_map: if isinstance(item, int): remapped.append(preds[item]) else: start_idx, end_idx, last_overlap = item text_parts, confidences = zip(*preds[start_idx:end_idx]) merged_text = merge_multi_strings(list(text_parts), overlap_ratio, last_overlap) merged_conf = sum(confidences) / len(confidences) # average confidence remapped.append((merged_text, merged_conf)) return remapped ================================================ FILE: onnxtr/models/recognition/predictor/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from collections.abc import Sequence from typing import Any import numpy as np from onnxtr.models.preprocessor import PreProcessor from onnxtr.utils.repr import NestedObject from ._utils import remap_preds, split_crops __all__ = ["RecognitionPredictor"] class RecognitionPredictor(NestedObject): """Implements an object able to identify character sequences in images Args: pre_processor: transform inputs for easier batched model inference model: core recognition architecture split_wide_crops: wether to use crop splitting for high aspect ratio crops """ def __init__( self, pre_processor: PreProcessor, model: Any, split_wide_crops: bool = True, ) -> None: super().__init__() self.pre_processor = pre_processor self.model = model self.split_wide_crops = split_wide_crops self.critical_ar = 8 # Critical aspect ratio self.overlap_ratio = 0.5 # Ratio of overlap between neighboring crops self.target_ar = 6 # Target aspect ratio def __call__( self, crops: Sequence[np.ndarray], **kwargs: Any, ) -> list[tuple[str, float]]: if len(crops) == 0: return [] # Dimension check if any(crop.ndim != 3 for crop in crops): raise ValueError("incorrect input shape: all crops are expected to be multi-channel 2D images.") # Split crops that are too wide remapped = False if self.split_wide_crops: new_crops, crop_map, remapped = split_crops( crops, # type: ignore[arg-type] self.critical_ar, self.target_ar, self.overlap_ratio, True, ) if remapped: crops = new_crops # Resize & batch them processed_batches = self.pre_processor(crops) # type: ignore[arg-type] # Forward it raw = [self.model(batch, **kwargs)["preds"] for batch in processed_batches] # Process outputs out = [charseq for batch in raw for charseq in batch] # Remap crops if self.split_wide_crops and remapped: out = remap_preds(out, crop_map, self.overlap_ratio) return out ================================================ FILE: onnxtr/models/recognition/utils.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from rapidfuzz.distance import Hamming __all__ = ["merge_strings", "merge_multi_strings"] def merge_strings(a: str, b: str, overlap_ratio: float) -> str: """Merges 2 character sequences in the best way to maximize the alignment of their overlapping characters. Args: a: first char seq, suffix should be similar to b's prefix. b: second char seq, prefix should be similar to a's suffix. overlap_ratio: estimated ratio of overlapping characters. Returns: A merged character sequence. Example:: >>> from doctr.models.recognition.utils import merge_strings >>> merge_strings('abcd', 'cdefgh', 0.5) 'abcdefgh' >>> merge_strings('abcdi', 'cdefgh', 0.5) 'abcdefgh' """ seq_len = min(len(a), len(b)) if seq_len <= 1: # One sequence is empty or will be after cropping in next step, return both to keep data return a + b a_crop, b_crop = a[:-1], b[1:] # Remove last letter of "a" and first of "b", because they might be cut off max_overlap = min(len(a_crop), len(b_crop)) # Compute Hamming distances for all possible overlaps scores = [Hamming.distance(a_crop[-i:], b_crop[:i], processor=None) for i in range(1, max_overlap + 1)] # Find zero-score matches zero_matches = [i for i, score in enumerate(scores) if score == 0] expected_overlap = round(len(b) * overlap_ratio) - 3 # adjust for cropping and index # Case 1: One perfect match - exactly one zero score - just merge there if len(zero_matches) == 1: i = zero_matches[0] return a_crop + b_crop[i + 1 :] # Case 2: Multiple perfect matches - likely due to repeated characters. # Use the estimated overlap length to choose the match closest to the expected alignment. elif len(zero_matches) > 1: best_i = min(zero_matches, key=lambda x: abs(x - expected_overlap)) return a_crop + b_crop[best_i + 1 :] # Case 3: Absence of zero scores indicates that the same character in the image was recognized differently OR that # the overlap was too small and we just need to merge the crops fully if expected_overlap < -1: return a + b elif expected_overlap < 0: return a_crop + b_crop # Find best overlap by minimizing Hamming distance + distance from expected overlap size combined_scores = [score + abs(i - expected_overlap) for i, score in enumerate(scores)] best_i = combined_scores.index(min(combined_scores)) return a_crop + b_crop[best_i + 1 :] def merge_multi_strings(seq_list: list[str], overlap_ratio: float, last_overlap_ratio: float) -> str: """ Merges consecutive string sequences with overlapping characters. Args: seq_list: list of sequences to merge. Sequences need to be ordered from left to right. overlap_ratio: Estimated ratio of overlapping letters between neighboring strings. last_overlap_ratio: Estimated ratio of overlapping letters for the last element in seq_list. Returns: A merged character sequence Example:: >>> from doctr.models.recognition.utils import merge_multi_strings >>> merge_multi_strings(['abc', 'bcdef', 'difghi', 'aijkl'], 0.5, 0.1) 'abcdefghijkl' """ if not seq_list: return "" result = seq_list[0] for i in range(1, len(seq_list)): text_b = seq_list[i] ratio = last_overlap_ratio if i == len(seq_list) - 1 else overlap_ratio result = merge_strings(result, text_b, ratio) return result ================================================ FILE: onnxtr/models/recognition/zoo.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any from .. import recognition from ..engine import EngineConfig from ..preprocessor import PreProcessor from .predictor import RecognitionPredictor __all__ = ["recognition_predictor"] ARCHS: list[str] = [ "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large", "sar_resnet31", "master", "vitstr_small", "vitstr_base", "parseq", "viptr_tiny", ] def _predictor( arch: Any, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any ) -> RecognitionPredictor: if isinstance(arch, str): if arch not in ARCHS: raise ValueError(f"unknown architecture '{arch}'") _model = recognition.__dict__[arch](load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg) else: if not isinstance( arch, ( recognition.CRNN, recognition.SAR, recognition.MASTER, recognition.ViTSTR, recognition.PARSeq, recognition.VIPTR, ), ): raise ValueError(f"unknown architecture: {type(arch)}") _model = arch kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) kwargs["batch_size"] = kwargs.get("batch_size", 1024) input_shape = _model.cfg["input_shape"][1:] predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model) return predictor def recognition_predictor( arch: Any = "crnn_vgg16_bn", symmetric_pad: bool = False, batch_size: int = 128, load_in_8_bit: bool = False, engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> RecognitionPredictor: """Text recognition architecture. Example:: >>> import numpy as np >>> from onnxtr.models import recognition_predictor >>> model = recognition_predictor() >>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8) >>> out = model([input_page]) Args: arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn') symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right batch_size: number of samples the model processes in parallel load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False engine_cfg: configuration of inference engine **kwargs: optional parameters to be passed to the architecture Returns: Recognition predictor """ return _predictor( arch=arch, symmetric_pad=symmetric_pad, batch_size=batch_size, load_in_8_bit=load_in_8_bit, engine_cfg=engine_cfg, **kwargs, ) ================================================ FILE: onnxtr/models/zoo.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from typing import Any from .detection.zoo import detection_predictor from .engine import EngineConfig from .predictor import OCRPredictor from .recognition.zoo import recognition_predictor __all__ = ["ocr_predictor"] def _predictor( det_arch: Any, reco_arch: Any, assume_straight_pages: bool = True, preserve_aspect_ratio: bool = True, symmetric_pad: bool = True, det_bs: int = 2, reco_bs: int = 512, detect_orientation: bool = False, straighten_pages: bool = False, detect_language: bool = False, load_in_8_bit: bool = False, det_engine_cfg: EngineConfig | None = None, reco_engine_cfg: EngineConfig | None = None, clf_engine_cfg: EngineConfig | None = None, **kwargs, ) -> OCRPredictor: # Detection det_predictor = detection_predictor( det_arch, batch_size=det_bs, assume_straight_pages=assume_straight_pages, preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, load_in_8_bit=load_in_8_bit, engine_cfg=det_engine_cfg, ) # Recognition reco_predictor = recognition_predictor( reco_arch, batch_size=reco_bs, load_in_8_bit=load_in_8_bit, engine_cfg=reco_engine_cfg, ) return OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=assume_straight_pages, preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, detect_orientation=detect_orientation, straighten_pages=straighten_pages, detect_language=detect_language, clf_engine_cfg=clf_engine_cfg, **kwargs, ) def ocr_predictor( det_arch: Any = "fast_base", reco_arch: Any = "crnn_vgg16_bn", assume_straight_pages: bool = True, preserve_aspect_ratio: bool = True, symmetric_pad: bool = True, export_as_straight_boxes: bool = False, detect_orientation: bool = False, straighten_pages: bool = False, detect_language: bool = False, load_in_8_bit: bool = False, det_engine_cfg: EngineConfig | None = None, reco_engine_cfg: EngineConfig | None = None, clf_engine_cfg: EngineConfig | None = None, **kwargs: Any, ) -> OCRPredictor: """End-to-end OCR architecture using one model for localization, and another for text recognition. >>> import numpy as np >>> from onnxtr.models import ocr_predictor >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn') >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) >>> out = model([input_page]) Args: det_arch: name of the detection architecture or the model itself to use (e.g. 'db_resnet50', 'db_mobilenet_v3_large') reco_arch: name of the recognition architecture or the model itself to use (e.g. 'crnn_vgg16_bn', 'sar_resnet31') assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages without rotated textual elements. preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before running the detection model on it. symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right. export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions (potentially rotated) as straight bounding boxes. detect_orientation: if True, the estimated general page orientation will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. straighten_pages: if True, estimates the page general orientation based on the segmentation map median line orientation. Then, rotates page before passing it again to the deep learning detection module. Doing so will improve performances for documents with page-uniform rotations. detect_language: if True, the language prediction will be added to the predictions for each page. Doing so will slightly deteriorate the overall latency. load_in_8_bit: whether to load the the 8-bit quantized model, defaults to False det_engine_cfg: configuration of the detection engine reco_engine_cfg: configuration of the recognition engine clf_engine_cfg: configuration of the orientation classification engine kwargs: keyword args of `OCRPredictor` Returns: OCR predictor """ return _predictor( det_arch, reco_arch, assume_straight_pages=assume_straight_pages, preserve_aspect_ratio=preserve_aspect_ratio, symmetric_pad=symmetric_pad, export_as_straight_boxes=export_as_straight_boxes, detect_orientation=detect_orientation, straighten_pages=straighten_pages, detect_language=detect_language, load_in_8_bit=load_in_8_bit, det_engine_cfg=det_engine_cfg, reco_engine_cfg=reco_engine_cfg, clf_engine_cfg=clf_engine_cfg, **kwargs, ) ================================================ FILE: onnxtr/py.typed ================================================ ================================================ FILE: onnxtr/transforms/__init__.py ================================================ from .base import * ================================================ FILE: onnxtr/transforms/base.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import math import numpy as np from PIL import Image, ImageOps __all__ = ["Resize", "Normalize"] class Resize: """Resize the input image to the given size Args: size: the target size of the image interpolation: the interpolation method to use preserve_aspect_ratio: whether to preserve the aspect ratio of the image symmetric_pad: whether to symmetrically pad the image """ def __init__( self, size: int | tuple[int, int], interpolation=Image.Resampling.BILINEAR, preserve_aspect_ratio: bool = False, symmetric_pad: bool = False, ) -> None: self.size = size if isinstance(size, tuple) else (size, size) self.interpolation = interpolation self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad self.output_size = size if isinstance(size, tuple) else (size, size) if not isinstance(self.size, (tuple, int)): raise AssertionError("size should be either a tuple or an int") def __call__(self, img: np.ndarray) -> np.ndarray: if img.dtype != np.uint8: img_pil = Image.fromarray((img * 255).clip(0, 255).astype(np.uint8)) else: img_pil = Image.fromarray(img) sh, sw = self.size w, h = img_pil.size if not self.preserve_aspect_ratio: img_resized_pil = img_pil.resize((sw, sh), resample=self.interpolation) return np.array(img_resized_pil) actual_ratio = h / w target_ratio = sh / sw if actual_ratio > target_ratio: new_h = sh new_w = max(int(sh / actual_ratio), 1) else: new_w = sw new_h = max(int(sw * actual_ratio), 1) img_resized_pil = img_pil.resize((new_w, new_h), resample=self.interpolation) delta_w = sw - new_w delta_h = sh - new_h if self.symmetric_pad: # Symmetric padding pad_left = math.ceil(delta_w / 2) pad_right = math.floor(delta_w / 2) pad_top = math.ceil(delta_h / 2) pad_bottom = math.floor(delta_h / 2) else: # Asymmetric padding pad_left, pad_top = 0, 0 pad_right, pad_bottom = delta_w, delta_h img_padded_pil = ImageOps.expand( img_resized_pil, border=(pad_left, pad_top, pad_right, pad_bottom), fill=0, ) return np.array(img_padded_pil) def __repr__(self) -> str: interpolate_str = self.interpolation _repr = f"output_size={self.size}, interpolation='{interpolate_str}'" if self.preserve_aspect_ratio: _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}" return f"{self.__class__.__name__}({_repr})" class Normalize: """Normalize the input image Args: mean: mean values to subtract std: standard deviation values to divide """ def __init__( self, mean: float | tuple[float, float, float] = (0.485, 0.456, 0.406), std: float | tuple[float, float, float] = (0.229, 0.224, 0.225), ) -> None: self.mean = mean self.std = std if not isinstance(self.mean, (float, tuple, list)): raise AssertionError("mean should be either a tuple, a list or a float") if not isinstance(self.std, (float, tuple, list)): raise AssertionError("std should be either a tuple, a list or a float") def __call__( self, img: np.ndarray, ) -> np.ndarray: # Normalize image return (img - np.array(self.mean).astype(img.dtype)) / np.array(self.std).astype(img.dtype) def __repr__(self) -> str: _repr = f"mean={self.mean}, std={self.std}" return f"{self.__class__.__name__}({_repr})" ================================================ FILE: onnxtr/utils/__init__.py ================================================ from .common_types import * from .data import * from .geometry import * from .vocabs import * ================================================ FILE: onnxtr/utils/common_types.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from pathlib import Path __all__ = ["Point2D", "BoundingBox", "Polygon4P", "Polygon", "Bbox"] Point2D = tuple[float, float] BoundingBox = tuple[Point2D, Point2D] Polygon4P = tuple[Point2D, Point2D, Point2D, Point2D] Polygon = list[Point2D] AbstractPath = str | Path AbstractFile = AbstractPath | bytes Bbox = tuple[float, float, float, float] ================================================ FILE: onnxtr/utils/data.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Adapted from https://github.com/pytorch/vision/blob/master/torchvision/datasets/utils.py import hashlib import logging import os import re import urllib.error import urllib.request from pathlib import Path from tqdm.auto import tqdm __all__ = ["download_from_url"] # matches bfd8deac from resnet18-bfd8deac.ckpt HASH_REGEX = re.compile(r"-([a-f0-9]*)\.") USER_AGENT = "felixdittrich92/OnnxTR" def _urlretrieve(url: str, filename: Path | str, chunk_size: int = 1024) -> None: with open(filename, "wb") as fh: with urllib.request.urlopen(urllib.request.Request(url, headers={"User-Agent": USER_AGENT})) as response: with tqdm(total=response.length) as pbar: for chunk in iter(lambda: response.read(chunk_size), ""): if not chunk: break pbar.update(chunk_size) fh.write(chunk) def _check_integrity(file_path: str | Path, hash_prefix: str) -> bool: with open(file_path, "rb") as f: sha_hash = hashlib.sha256(f.read()).hexdigest() return sha_hash[: len(hash_prefix)] == hash_prefix def download_from_url( url: str, file_name: str | None = None, hash_prefix: str | None = None, cache_dir: str | None = None, cache_subdir: str | None = None, ) -> Path: """Download a file using its URL >>> from onnxtr.models import download_from_url >>> download_from_url("https://yoursource.com/yourcheckpoint-yourhash.zip") Args: url: the URL of the file to download file_name: optional name of the file once downloaded hash_prefix: optional expected SHA256 hash of the file cache_dir: cache directory cache_subdir: subfolder to use in the cache Returns: the location of the downloaded file Note: You can change cache directory location by using `ONNXTR_CACHE_DIR` environment variable. """ if not isinstance(file_name, str): file_name = url.rpartition("/")[-1].split("&")[0] cache_dir = ( str(os.environ.get("ONNXTR_CACHE_DIR", os.path.join(os.path.expanduser("~"), ".cache", "onnxtr"))) if cache_dir is None else cache_dir ) # Check hash in file name if hash_prefix is None: r = HASH_REGEX.search(file_name) hash_prefix = r.group(1) if r else None folder_path = Path(cache_dir) if cache_subdir is None else Path(cache_dir, cache_subdir) file_path = folder_path.joinpath(file_name) # Check file existence if file_path.is_file() and (hash_prefix is None or _check_integrity(file_path, hash_prefix)): logging.info(f"Using downloaded & verified file: {file_path}") return file_path try: # Create folder hierarchy folder_path.mkdir(parents=True, exist_ok=True) except OSError: error_message = f"Failed creating cache direcotry at {folder_path}" if os.environ.get("ONNXTR_CACHE_DIR", ""): error_message += " using path from 'ONNXTR_CACHE_DIR' environment variable." else: error_message += ( ". You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed." ) logging.error(error_message) raise # Download the file try: print(f"Downloading {url} to {file_path}") _urlretrieve(url, file_path) except (urllib.error.URLError, IOError) as e: # pragma: no cover if url[:5] == "https": url = url.replace("https:", "http:") print(f"Failed download. Trying https -> http instead. Downloading {url} to {file_path}") _urlretrieve(url, file_path) else: raise e # Remove corrupted files if isinstance(hash_prefix, str) and not _check_integrity(file_path, hash_prefix): # pragma: no cover # Remove file os.remove(file_path) raise ValueError(f"corrupted download, the hash of {url} does not match its expected value") return file_path ================================================ FILE: onnxtr/utils/fonts.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging import platform from PIL import ImageFont __all__ = ["get_font"] def get_font(font_family: str | None = None, font_size: int = 13) -> ImageFont.FreeTypeFont | ImageFont.ImageFont: """Resolves a compatible ImageFont for the system Args: font_family: the font family to use font_size: the size of the font upon rendering Returns: the Pillow font """ # Font selection if font_family is None: try: font = ImageFont.truetype("FreeMono.ttf" if platform.system() == "Linux" else "Arial.ttf", font_size) except OSError: # pragma: no cover font = ImageFont.load_default() # type: ignore[assignment] logging.warning( "unable to load recommended font family. Loading default PIL font," "font size issues may be expected." "To prevent this, it is recommended to specify the value of 'font_family'." ) else: # pragma: no cover font = ImageFont.truetype(font_family, font_size) return font ================================================ FILE: onnxtr/utils/geometry.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from math import ceil import cv2 import numpy as np from .common_types import BoundingBox, Polygon4P __all__ = [ "bbox_to_polygon", "polygon_to_bbox", "order_points", "resolve_enclosing_bbox", "resolve_enclosing_rbbox", "rotate_boxes", "compute_expanded_shape", "rotate_image", "estimate_page_angle", "convert_to_relative_coords", "rotate_abs_geoms", "extract_crops", "extract_rcrops", "shape_translate", "detach_scores", ] def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P: """Convert a bounding box to a polygon Args: bbox: a bounding box Returns: a polygon """ return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1] def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox: """Convert a polygon to a bounding box Args: polygon: a polygon Returns: a bounding box """ x, y = zip(*polygon) return (min(x), min(y)), (max(x), max(y)) def order_points(pts: np.ndarray) -> np.ndarray: """Order points in the following order: top-left, top-right, bottom-right, bottom-left Args: pts: array of shape (4, 2) or (4,) with the coordinates of the points Returns: ordered points in the following order: top-left, top-right, bottom-right, bottom-left """ pts = np.asarray(pts) # (xmin, ymin, xmax, ymax) if pts.shape == (4,): xmin, ymin, xmax, ymax = pts return np.array( [ [xmin, ymin], # top-left [xmax, ymin], # top-right [xmax, ymax], # bottom-right [xmin, ymax], # bottom-left ], dtype=pts.dtype, ) # (4, 2) quadrangle if pts.shape == (4, 2): c = pts.mean(axis=0) # compute angle of each point around centroid angles = np.arctan2(pts[:, 1] - c[1], pts[:, 0] - c[0]) # sort by angle (counter-clockwise ordering) pts = pts[np.argsort(angles)] # ensure consistent starting point (top-left) start_idx = np.argmin(pts.sum(axis=1)) pts = np.roll(pts, -start_idx, axis=0) # ensure order is TL, TR, BR, BL (clockwise) def area(poly): return 0.5 * np.sum(poly[:, 0] * np.roll(poly[:, 1], -1) - poly[:, 1] * np.roll(poly[:, 0], -1)) if area(pts) < 0: pts = np.roll(pts[::-1], 1, axis=0) return pts.astype(pts.dtype) raise ValueError(f"Unsupported shape {pts.shape}, expected (4,) or (4,2)") def detach_scores(boxes: list[np.ndarray]) -> tuple[list[np.ndarray], list[np.ndarray]]: """Detach the objectness scores from box predictions Args: boxes: list of arrays with boxes of shape (N, 5) or (N, 5, 2) Returns: a tuple of two lists: the first one contains the boxes without the objectness scores, the second one contains the objectness scores """ def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]: if boxes.ndim == 2: return boxes[:, :-1], boxes[:, -1] return boxes[:, :-1], boxes[:, -1, -1] loc_preds, obj_scores = zip(*(_detach(box) for box in boxes)) return list(loc_preds), list(obj_scores) def shape_translate(data: np.ndarray, format: str) -> np.ndarray: """Translate the shape of the input data to the desired format Args: data: input data in shape (B, C, H, W) or (B, H, W, C) or (C, H, W) or (H, W, C) format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC') Returns: the reshaped data """ # Get the current shape current_shape = data.shape # Check the number of dimensions num_dims = len(current_shape) if num_dims != len(format): return data if format == "BCHW" and data.shape[1] in [1, 3]: return data elif format == "BHWC" and data.shape[-1] in [1, 3]: return data elif format == "CHW" and data.shape[0] in [1, 3]: return data elif format == "HWC" and data.shape[-1] in [1, 3]: return data elif format == "BCHW" and data.shape[1] not in [1, 3]: return np.moveaxis(data, -1, 1) elif format == "BHWC" and data.shape[-1] not in [1, 3]: return np.moveaxis(data, 1, -1) elif format == "CHW" and data.shape[0] not in [1, 3]: return np.moveaxis(data, -1, 0) elif format == "HWC" and data.shape[-1] not in [1, 3]: return np.moveaxis(data, 0, -1) else: return data def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray: """Compute enclosing bbox either from: Args: bboxes: boxes in one of the following formats: - an array of boxes: (*, 4), where boxes have this shape: (xmin, ymin, xmax, ymax) - a list of BoundingBox Returns: a (1, 4) array (enclosing boxarray), or a BoundingBox """ if isinstance(bboxes, np.ndarray): xmin, ymin, xmax, ymax = np.split(bboxes, 4, axis=1) return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max()]) else: x, y = zip(*[point for box in bboxes for point in box]) return (min(x), min(y)), (max(x), max(y)) def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray: """Compute enclosing rotated bbox either from: Args: rbboxes: boxes in one of the following formats: - an array of boxes: (*, 4, 2), where boxes have this shape: (x1, y1), (x2, y2), (x3, y3), (x4, y4) - a list of BoundingBox intermed_size: size of the intermediate image Returns: a (4, 2) array (enclosing rotated box) """ cloud: np.ndarray = np.concatenate(rbboxes, axis=0) # Convert to absolute for minAreaRect rect = cv2.minAreaRect(cloud.astype(np.float32) * intermed_size) return order_points(cv2.boxPoints(rect) / intermed_size) def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: """Rotate points counter-clockwise. Args: points: array of size (N, 2) angle: angle between -90 and +90 degrees Returns: Rotated points """ angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype ) return np.matmul(points, rotation_mat.T) def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[int, int]: """Compute the shape of an expanded rotated image Args: img_shape: the height and width of the image angle: angle between -90 and +90 degrees Returns: the height and width of the rotated image """ points: np.ndarray = np.array([ [img_shape[1] / 2, img_shape[0] / 2], [-img_shape[1] / 2, img_shape[0] / 2], ]) rotated_points = rotate_abs_points(points, angle) wh_shape = 2 * np.abs(rotated_points).max(axis=0) return wh_shape[1], wh_shape[0] def rotate_abs_geoms( geoms: np.ndarray, angle: float, img_shape: tuple[int, int], expand: bool = True, ) -> np.ndarray: """Rotate a batch of bounding boxes or polygons by an angle around the image center. Args: geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes angle: anti-clockwise rotation angle in degrees img_shape: the height and width of the image expand: whether the image should be padded to avoid information loss Returns: A batch of rotated polygons (N, 4, 2) """ # Switch to polygons polys = ( np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1) if geoms.ndim == 2 else geoms ) polys = polys.astype(np.float32) # Switch to image center as referential polys[..., 0] -= img_shape[1] / 2 polys[..., 1] = img_shape[0] / 2 - polys[..., 1] # Rotated them around image center rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2) # Switch back to top-left corner as referential target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape # Clip coords to fit since there is no expansion rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1]) rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0]) return rotated_polys def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray: """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. Args: loc_preds: (N, 4, 2) array of RELATIVE loc_preds orig_shape: shape of the origin image dest_shape: shape of the destination image Returns: A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial """ if len(dest_shape) != 2: raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") if len(orig_shape) != 2: raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") orig_height, orig_width = orig_shape dest_height, dest_width = dest_shape mboxes = loc_preds.copy() mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height return mboxes def rotate_boxes( loc_preds: np.ndarray, angle: float, orig_shape: tuple[int, int], min_angle: float = 1.0, target_shape: tuple[int, int] | None = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes (4, 2) of an angle, if angle > min_angle, around the center of the page. If target_shape is specified, the boxes are remapped to the target shape after the rotation. This is done to remove the padding that is created by rotate_page(expand=True) Args: loc_preds: (N, 4) or (N, 4, 2) array of RELATIVE boxes angle: angle between -90 and +90 degrees orig_shape: shape of the origin image min_angle: minimum angle to rotate boxes target_shape: shape of the destination image Returns: A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes _boxes = loc_preds.copy() if _boxes.ndim == 2: _boxes = np.stack( [ _boxes[:, [0, 1]], _boxes[:, [2, 1]], _boxes[:, [2, 3]], _boxes[:, [0, 3]], ], axis=1, ) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return _boxes # Compute rotation matrix angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype ) # Rotate absolute points points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1) image_center = (orig_shape[1] / 2, orig_shape[0] / 2) rotated_points = image_center + np.matmul(points - image_center, rotation_mat) rotated_boxes: np.ndarray = np.stack( (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1 ) # Apply a mask if requested if target_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) return rotated_boxes def rotate_image( image: np.ndarray, angle: float, expand: bool = False, preserve_origin_shape: bool = False, ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. Args: image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation preserve_origin_shape: if expand is set to True, resizes the final output to the original image size Returns: Rotated array, padded by 0 by default. """ # Compute the expanded padding exp_img: np.ndarray if expand: exp_shape = compute_expanded_shape(image.shape[:2], angle) h_pad, w_pad = ( int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))), ) exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) else: exp_img = image height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) if expand: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) # Pad height else: h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) if preserve_origin_shape: # rescale rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) return rot_img def remove_image_padding(image: np.ndarray) -> np.ndarray: """Remove black border padding from an image Args: image: numpy tensor to remove padding from Returns: Image with padding removed """ # Find the bounding box of the non-black region rows = np.any(image, axis=1) cols = np.any(image, axis=0) rmin, rmax = np.where(rows)[0][[0, -1]] cmin, cmax = np.where(cols)[0][[0, -1]] return image[rmin : rmax + 1, cmin : cmax + 1] def estimate_page_angle(polys: np.ndarray) -> float: """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the estimated angle ccw in degrees """ # Compute mean left points and mean right point with respect to the reading direction (oriented polygon) xleft = polys[:, 0, 0] + polys[:, 3, 0] yleft = polys[:, 0, 1] + polys[:, 3, 1] xright = polys[:, 1, 0] + polys[:, 2, 0] yright = polys[:, 1, 1] + polys[:, 2, 1] with np.errstate(divide="raise", invalid="raise"): try: return float( np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom! ) except FloatingPointError: return 0.0 def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray: """Convert a geometry to relative coordinates Args: geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4) img_shape: the height and width of the image Returns: the updated geometry """ # Polygon if geoms.ndim == 3 and geoms.shape[1:] == (4, 2): polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32) polygons[..., 0] = geoms[..., 0] / img_shape[1] polygons[..., 1] = geoms[..., 1] / img_shape[0] return polygons.clip(0, 1) if geoms.ndim == 2 and geoms.shape[1] == 4: boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32) boxes[:, ::2] = geoms[:, ::2] / img_shape[1] boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0] return boxes.clip(0, 1) raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}") def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> list[np.ndarray]: """Created cropped images from list of bounding boxes Args: img: input image boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative coordinates (xmin, ymin, xmax, ymax) channels_last: whether the channel dimensions is the last one instead of the last one Returns: list of cropped images """ if boxes.shape[0] == 0: return [] if boxes.shape[1] != 4: raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)") # Project relative coordinates _boxes = boxes.copy() h, w = img.shape[:2] if channels_last else img.shape[-2:] if not np.issubdtype(_boxes.dtype, np.integer): _boxes[:, [0, 2]] *= w _boxes[:, [1, 3]] *= h _boxes = _boxes.round().astype(int) # Add last index _boxes[2:] += 1 if channels_last: return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes]) return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes]) def extract_rcrops( img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True, assume_horizontal: bool = False ) -> list[np.ndarray]: """Created cropped images from list of rotated bounding boxes Args: img: input image polys: bounding boxes of shape (N, 4, 2) dtype: target data type of bounding boxes channels_last: whether the channel dimensions is the last one instead of the last one assume_horizontal: whether the boxes are assumed to be only horizontally oriented Returns: list of cropped images """ if polys.shape[0] == 0: return [] if polys.shape[1:] != (4, 2): raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)") # Project relative coordinates _boxes = polys.copy() height, width = img.shape[:2] if channels_last else img.shape[-2:] if not np.issubdtype(_boxes.dtype, np.integer): _boxes[:, :, 0] *= width _boxes[:, :, 1] *= height src_img = img if channels_last else img.transpose(1, 2, 0) # Handle only horizontal oriented boxes if assume_horizontal: crops = [] for box in _boxes: # Calculate the centroid of the quadrilateral centroid = np.mean(box, axis=0) # Divide the points into left and right left_points = box[box[:, 0] < centroid[0]] right_points = box[box[:, 0] >= centroid[0]] # Sort the left points according to the y-axis left_points = left_points[np.argsort(left_points[:, 1])] top_left_pt = left_points[0] bottom_left_pt = left_points[-1] # Sort the right points according to the y-axis right_points = right_points[np.argsort(right_points[:, 1])] top_right_pt = right_points[0] bottom_right_pt = right_points[-1] box_points = np.array( [top_left_pt, bottom_left_pt, top_right_pt, bottom_right_pt], dtype=dtype, ) # Get the width and height of the rectangle that will contain the warped quadrilateral width_upper = np.linalg.norm(top_right_pt - top_left_pt) width_lower = np.linalg.norm(bottom_right_pt - bottom_left_pt) height_left = np.linalg.norm(bottom_left_pt - top_left_pt) height_right = np.linalg.norm(bottom_right_pt - top_right_pt) # Get the maximum width and height rect_width = max(int(width_upper), int(width_lower)) rect_height = max(int(height_left), int(height_right)) dst_pts = np.array( [ [0, 0], # top-left # bottom-left [0, rect_height - 1], # top-right [rect_width - 1, 0], # bottom-right [rect_width - 1, rect_height - 1], ], dtype=dtype, ) # Get the perspective transform matrix using the box points affine_mat = cv2.getPerspectiveTransform(box_points, dst_pts) # Perform the perspective warp to get the rectified crop crop = cv2.warpPerspective( src_img, affine_mat, (rect_width, rect_height), ) # Add the crop to the list of crops crops.append(crop) # Handle any oriented boxes else: src_pts = _boxes[:, :3].astype(np.float32) # Preserve size d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1) d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1) # (N, 3, 2) dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype) dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1 dst_pts[:, 2, 1] = d2 - 1 # Use a warp transformation to extract the crop crops = [ cv2.warpAffine( src_img, # Transformation matrix cv2.getAffineTransform(src_pts[idx], dst_pts[idx]), (int(d1[idx]), int(d2[idx])), ) for idx in range(_boxes.shape[0]) ] return crops ================================================ FILE: onnxtr/utils/multithreading.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import multiprocessing as mp import os from collections.abc import Callable, Iterable, Iterator from multiprocessing.pool import ThreadPool from typing import Any from onnxtr.file_utils import ENV_VARS_TRUE_VALUES __all__ = ["multithread_exec"] def multithread_exec(func: Callable[[Any], Any], seq: Iterable[Any], threads: int | None = None) -> Iterator[Any]: """Execute a given function in parallel for each element of a given sequence >>> from onnxtr.utils.multithreading import multithread_exec >>> entries = [1, 4, 8] >>> results = multithread_exec(lambda x: x ** 2, entries) Args: func: function to be executed on each element of the iterable seq: iterable threads: number of workers to be used for multiprocessing Returns: iterator of the function's results using the iterable as inputs Notes: This function uses ThreadPool from multiprocessing package, which uses `/dev/shm` directory for shared memory. If you do not have write permissions for this directory (if you run `onnxtr` on AWS Lambda for instance), you might want to disable multiprocessing. To achieve that, set 'ONNXTR_MULTIPROCESSING_DISABLE' to 'TRUE'. """ threads = threads if isinstance(threads, int) else min(16, mp.cpu_count()) # Single-thread if threads < 2 or os.environ.get("ONNXTR_MULTIPROCESSING_DISABLE", "").upper() in ENV_VARS_TRUE_VALUES: results = map(func, seq) # Multi-threading else: with ThreadPool(threads) as tp: # ThreadPool's map function returns a list, but seq could be of a different type # That's why wrapping result in map to return iterator results = map(lambda x: x, tp.map(func, seq)) # noqa: C417 return results ================================================ FILE: onnxtr/utils/reconstitution.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import logging from typing import Any import numpy as np from anyascii import anyascii from PIL import Image, ImageDraw from .fonts import get_font __all__ = ["synthesize_page"] # Global variable to avoid multiple warnings ROTATION_WARNING = False def _warn_rotation(entry: dict[str, Any]) -> None: # pragma: no cover global ROTATION_WARNING if not ROTATION_WARNING and len(entry["geometry"]) == 4: logging.warning("Polygons with larger rotations will lead to inaccurate rendering") ROTATION_WARNING = True def _synthesize( response: Image.Image, entry: dict[str, Any], w: int, h: int, draw_proba: bool = False, font_family: str | None = None, smoothing_factor: float = 0.75, min_font_size: int = 6, max_font_size: int = 50, ) -> Image.Image: if len(entry["geometry"]) == 2: (xmin, ymin), (xmax, ymax) = entry["geometry"] polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)] else: polygon = entry["geometry"] # Calculate the bounding box of the word x_coords, y_coords = zip(*polygon) xmin, ymin, xmax, ymax = ( int(round(w * min(x_coords))), int(round(h * min(y_coords))), int(round(w * max(x_coords))), int(round(h * max(y_coords))), ) word_width = xmax - xmin word_height = ymax - ymin # If lines are provided instead of words, concatenate the word entries if "words" in entry: word_text = " ".join(word["value"] for word in entry["words"]) else: word_text = entry["value"] # Find the optimal font size try: font_size = min(word_height, max_font_size) font = get_font(font_family, font_size) text_width, text_height = font.getbbox(word_text)[2:4] while (text_width > word_width or text_height > word_height) and font_size > min_font_size: font_size = max(int(font_size * smoothing_factor), min_font_size) font = get_font(font_family, font_size) text_width, text_height = font.getbbox(word_text)[2:4] except ValueError: # pragma: no cover font = get_font(font_family, min_font_size) # Create a mask for the word mask = Image.new("L", (w, h), 0) ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255) # Draw the word text d = ImageDraw.Draw(response) try: try: d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt") except UnicodeEncodeError: # pragma: no cover d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt") # Catch generic exceptions to avoid crashing the whole rendering except Exception: # pragma: no cover logging.warning(f"Could not render word: {word_text}") if draw_proba: confidence = ( entry["confidence"] if "confidence" in entry else sum(w["confidence"] for w in entry["words"]) / len(entry["words"]) ) p = int(255 * confidence) color = (255 - p, 0, p) # Red to blue gradient based on probability d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2) prob_font = get_font(font_family, 20) prob_text = f"{confidence:.2f}" prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4] # Position the probability slightly above the bounding box prob_x_offset = (word_width - prob_text_width) // 2 prob_y_offset = ymin - prob_text_height - 2 prob_y_offset = max(0, prob_y_offset) d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt") return response def synthesize_page( page: dict[str, Any], draw_proba: bool = False, font_family: str | None = None, smoothing_factor: float = 0.95, min_font_size: int = 8, max_font_size: int = 50, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. Args: page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_family: family of the font smoothing_factor: factor to smooth the font size min_font_size: minimum font size max_font_size: maximum font size Returns: the synthesized page """ # Draw template h, w = page["dimensions"] response = Image.new("RGB", (w, h), color=(255, 255, 255)) for block in page["blocks"]: # If lines are provided use these to get better rendering results if len(block["lines"]) > 1: for line in block["lines"]: _warn_rotation(block) # pragma: no cover response = _synthesize( response=response, entry=line, w=w, h=h, draw_proba=draw_proba, font_family=font_family, smoothing_factor=smoothing_factor, min_font_size=min_font_size, max_font_size=max_font_size, ) # Otherwise, draw each word else: for line in block["lines"]: _warn_rotation(block) # pragma: no cover for word in line["words"]: response = _synthesize( response=response, entry=word, w=w, h=h, draw_proba=draw_proba, font_family=font_family, smoothing_factor=smoothing_factor, min_font_size=min_font_size, max_font_size=max_font_size, ) return np.array(response, dtype=np.uint8) ================================================ FILE: onnxtr/utils/repr.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Adapted from https://github.com/pytorch/torch/blob/master/torch/nn/modules/module.py __all__ = ["NestedObject"] def _addindent(s_, num_spaces): s = s_.split("\n") # don't do anything for single-line stuff if len(s) == 1: return s_ first = s.pop(0) s = [(num_spaces * " ") + line for line in s] s = "\n".join(s) s = first + "\n" + s return s class NestedObject: """Base class for all nested objects in onnxtr""" _children_names: list[str] def extra_repr(self) -> str: return "" def __repr__(self): # We treat the extra repr like the sub-object, one item per line extra_lines = [] extra_repr = self.extra_repr() # empty string will be split into list [''] if extra_repr: extra_lines = extra_repr.split("\n") child_lines = [] if hasattr(self, "_children_names"): for key in self._children_names: child = getattr(self, key) if isinstance(child, list) and len(child) > 0: child_str = ",\n".join([repr(subchild) for subchild in child]) if len(child) > 1: child_str = _addindent(f"\n{child_str},", 2) + "\n" child_str = f"[{child_str}]" else: child_str = repr(child) child_str = _addindent(child_str, 2) child_lines.append("(" + key + "): " + child_str) lines = extra_lines + child_lines main_str = self.__class__.__name__ + "(" if lines: # simple one-liner info, which most builtin Modules will use if len(extra_lines) == 1 and not child_lines: main_str += extra_lines[0] else: main_str += "\n " + "\n ".join(lines) + "\n" main_str += ")" return main_str ================================================ FILE: onnxtr/utils/visualization.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from typing import Any import cv2 import matplotlib.patches as patches import matplotlib.pyplot as plt import numpy as np from matplotlib.figure import Figure from .common_types import BoundingBox, Polygon4P __all__ = ["visualize_page", "draw_boxes"] def rect_patch( geometry: BoundingBox, page_dimensions: tuple[int, int], label: str | None = None, color: tuple[float, float, float] = (0, 0, 0), alpha: float = 0.3, linewidth: int = 2, fill: bool = True, preserve_aspect_ratio: bool = False, ) -> patches.Rectangle: """Create a matplotlib rectangular patch for the element Args: geometry: bounding box of the element page_dimensions: dimensions of the Page in format (height, width) label: label to display when hovered color: color to draw box alpha: opacity parameter to fill the boxes, 0 = transparent linewidth: line width fill: whether the patch should be filled preserve_aspect_ratio: pass True if you passed True to the predictor Returns: a rectangular Patch """ if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry): raise ValueError("invalid geometry format") # Unpack height, width = page_dimensions (xmin, ymin), (xmax, ymax) = geometry # Switch to absolute coords if preserve_aspect_ratio: width = height = max(height, width) xmin, w = xmin * width, (xmax - xmin) * width ymin, h = ymin * height, (ymax - ymin) * height return patches.Rectangle( (xmin, ymin), w, h, fill=fill, linewidth=linewidth, edgecolor=(*color, alpha), facecolor=(*color, alpha), label=label, ) def polygon_patch( geometry: np.ndarray, page_dimensions: tuple[int, int], label: str | None = None, color: tuple[float, float, float] = (0, 0, 0), alpha: float = 0.3, linewidth: int = 2, fill: bool = True, preserve_aspect_ratio: bool = False, ) -> patches.Polygon: """Create a matplotlib polygon patch for the element Args: geometry: bounding box of the element page_dimensions: dimensions of the Page in format (height, width) label: label to display when hovered color: color to draw box alpha: opacity parameter to fill the boxes, 0 = transparent linewidth: line width fill: whether the patch should be filled preserve_aspect_ratio: pass True if you passed True to the predictor Returns: a polygon Patch """ if not geometry.shape == (4, 2): raise ValueError("invalid geometry format") # Unpack height, width = page_dimensions geometry[:, 0] = geometry[:, 0] * (max(width, height) if preserve_aspect_ratio else width) geometry[:, 1] = geometry[:, 1] * (max(width, height) if preserve_aspect_ratio else height) return patches.Polygon( geometry, fill=fill, linewidth=linewidth, edgecolor=(*color, alpha), facecolor=(*color, alpha), label=label, ) def create_obj_patch( geometry: BoundingBox | Polygon4P | np.ndarray, page_dimensions: tuple[int, int], **kwargs: Any, ) -> patches.Patch: """Create a matplotlib patch for the element Args: geometry: bounding box (straight or rotated) of the element page_dimensions: dimensions of the page in format (height, width) **kwargs: keyword arguments for the patch Returns: a matplotlib Patch """ if isinstance(geometry, tuple): if len(geometry) == 2: # straight word BB (2 pts) return rect_patch(geometry, page_dimensions, **kwargs) elif len(geometry) == 4: # rotated word BB (4 pts) return polygon_patch(np.asarray(geometry), page_dimensions, **kwargs) elif isinstance(geometry, np.ndarray) and geometry.shape == (4, 2): # rotated line return polygon_patch(geometry, page_dimensions, **kwargs) raise ValueError("invalid geometry format") def visualize_page( page: dict[str, Any], image: np.ndarray, words_only: bool = True, display_artefacts: bool = True, scale: float = 10, interactive: bool = True, add_labels: bool = True, **kwargs: Any, ) -> Figure: """Visualize a full page with predicted blocks, lines and words >>> import numpy as np >>> import matplotlib.pyplot as plt >>> from onnxtr.utils.visualization import visualize_page >>> from onnxtr.models import ocr_db_crnn >>> model = ocr_db_crnn() >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) >>> out = model([[input_page]]) >>> visualize_page(out[0].pages[0].export(), input_page) >>> plt.show() Args: page: the exported Page of a Document image: np array of the page, needs to have the same shape than page['dimensions'] words_only: whether only words should be displayed display_artefacts: whether artefacts should be displayed scale: figsize of the largest windows side interactive: whether the plot should be interactive add_labels: for static plot, adds text labels on top of bounding box **kwargs: keyword arguments for the polygon patch Returns: the matplotlib figure """ # Get proper scale and aspect ratio h, w = image.shape[:2] size = (scale * w / h, scale) if h > w else (scale, h / w * scale) fig, ax = plt.subplots(figsize=size) # Display the image ax.imshow(image) # hide both axis ax.axis("off") if interactive: artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) for block in page["blocks"]: if not words_only: rect = create_obj_patch( block["geometry"], page["dimensions"], label="block", color=(0, 1, 0), linewidth=1, **kwargs ) # add patch on figure ax.add_patch(rect) if interactive: # add patch to cursor's artists artists.append(rect) for line in block["lines"]: if not words_only: rect = create_obj_patch( line["geometry"], page["dimensions"], label="line", color=(1, 0, 0), linewidth=1, **kwargs ) ax.add_patch(rect) if interactive: artists.append(rect) for word in line["words"]: rect = create_obj_patch( word["geometry"], page["dimensions"], label=f"{word['value']} (confidence: {word['confidence']:.2%})", color=(0, 0, 1), **kwargs, ) ax.add_patch(rect) if interactive: artists.append(rect) elif add_labels: if len(word["geometry"]) == 5: text_loc = ( int(page["dimensions"][1] * (word["geometry"][0] - word["geometry"][2] / 2)), int(page["dimensions"][0] * (word["geometry"][1] - word["geometry"][3] / 2)), ) else: text_loc = ( int(page["dimensions"][1] * word["geometry"][0][0]), int(page["dimensions"][0] * word["geometry"][0][1]), ) if len(word["geometry"]) == 2: # We draw only if boxes are in straight format ax.text( *text_loc, word["value"], size=10, alpha=0.5, color=(0, 0, 1), ) if display_artefacts: for artefact in block["artefacts"]: rect = create_obj_patch( artefact["geometry"], page["dimensions"], label="artefact", color=(0.5, 0.5, 0.5), linewidth=1, **kwargs, ) ax.add_patch(rect) if interactive: artists.append(rect) if interactive: import mplcursors # Create mlp Cursor to hover patches in artists mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label())) fig.tight_layout(pad=0.0) return fig def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None: """Draw an array of relative straight boxes on an image Args: boxes: array of relative boxes, of shape (*, 4) image: np array, float32 or uint8 color: color to use for bounding box edges **kwargs: keyword arguments from `matplotlib.pyplot.plot` """ h, w = image.shape[:2] # Convert boxes to absolute coords _boxes = deepcopy(boxes) _boxes[:, [0, 2]] *= w _boxes[:, [1, 3]] *= h _boxes = _boxes.astype(np.int32) for box in _boxes.tolist(): xmin, ymin, xmax, ymax = box image = cv2.rectangle( image, (xmin, ymin), (xmax, ymax), color=color if isinstance(color, tuple) else (0, 0, 255), thickness=2, ) plt.imshow(image) plt.plot(**kwargs) ================================================ FILE: onnxtr/utils/vocabs.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import re import string __all__ = ["VOCABS"] _BASE_VOCABS = { # Latin "digits": string.digits, "ascii_letters": string.ascii_letters, "punctuation": string.punctuation, "currency": "£€¥¢฿", # Cyrillic "generic_cyrillic_letters": "абвгдежзийклмнопрстуфхцчшщьюяАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЮЯ", "russian_cyrillic_letters": "ёыэЁЫЭ", "russian_signs": "ъЪ", # Greek "ancient_greek": "αβγδεζηθικλμνξοπρστςυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ", # Arabic & Persian "arabic_diacritics": "".join(["ً", "ٌ", "ٍ", "َ", "ُ", "ِ", "ّ", "ْ", "ٕ", "ٓ", "ٔ", "ٚ"]), "arabic_digits": "٠١٢٣٤٥٦٧٨٩", "arabic_letters": "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىيٱ", "arabic_punctuation": "؟؛«»—،", "persian_letters": "پچژڢڤگکی", # Bengali "bengali_consonants": "কখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহড়ঢ়য়ৰৱৼ", "bengali_vowels": "অআইঈউঊঋঌএঐওঔৠৡ", "bengali_digits": "০১২৩৪৫৬৭৮৯", "bengali_matras": "".join(["া", "ি", "ী", "ু", "ূ", "ৃ", "ে", "ৈ", "ো", "ৌ", "ৗ"]), "bengali_virama": "্", "bengali_punctuation": "ঽৎ৽৺৻", "bengali_signs": "".join(["ঁ", "ং", "ঃ", "়"]), # Gujarati "gujarati_consonants": "કખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહ", "gujarati_vowels": "અઆઇઈઉઊઋઌઍએઐઑઓઔ", "gujarati_digits": "૦૧૨૩૪૫૬૭૮૯", "gujarati_matras": "".join([ "ઁ", "ં", "ઃ", "઼", "ા", "િ", "ી", "ુ", "ૂ", "ૃ", "ૄ", "ૅ", "ે", "ૈ", "ૉ", "ો", "ૌ", "ૢ", "ૣ", "ૺ", "ૻ", "ૼ", "૽", "૾", "૿", ]), "gujarati_virama": "્", "gujarati_punctuation": "ઽ॥", "gujarati_signs": "ૐ૰", # Devanagari "devanagari_consonants": "कखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहऴऩळक़ख़ग़ज़ड़ढ़फ़य़ऱॺॻॼॽॾ", "devanagari_vowels": "अआइईउऊऋऌऍऎएऐऑऒओऔॠॡॲऄॵॶॳॴॷॸॹ", "devanagari_digits": "०१२३४५६७८९", "devanagari_matras": "".join([ "़", "ं", "ँ", "ः", "॑", "॒", "ा", "ि", "ी", "ु", "ू", "ृ", "ॄ", "ॅ", "ॆ", "े", "ै", "ॉ", "ॊ", "ो", "ौ", "ॢ", "ॣ", "ॏ", "ॎ", ]), "devanagari_virama": "्", "devanagari_punctuation": "।॥॰ऽꣲ", "devanagari_signs": "ॐ", # Punjabi (Gurmukhi script) "punjabi_consonants": "ਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸ਼ਸਹਖ਼ਗ਼ਜ਼ਫ਼ੜਲ਼", "punjabi_vowels": "ਅਆਇਈਉਊਏਐਓਔੲੳ", "punjabi_digits": "੦੧੨੩੪੫੬੭੮੯", "punjabi_matras": "".join(["ਂ", "਼", "ਾ", "ਿ", "ੀ", "ੁ", "ੂ", "ੇ", "ੈ", "ੋ", "ੌ", "ੑ", "ੰ", "ੱ", "ੵ"]), "punjabi_virama": "੍", "punjabi_punctuation": "।॥", "punjabi_signs": "ੴ", # Tamil "tamil_consonants": "கஙசஞடணதநபமயரலவழளறன", "tamil_vowels": "அஆஇஈஉஊஎஏஐஒஓஔ", "tamil_digits": "௦௧௨௩௪௫௬௭௮௯", "tamil_matras": "".join(["ா", "ி", "ீ", "ு", "ூ", "ெ", "ே", "ை", "ொ", "ோ", "ௌ"]), "tamil_virama": "்", "tamil_punctuation": "௰௱௲", "tamil_signs": "ஃௐ", "tamil_fractions": "௳௴௵௶௷௸௹௺", # Telugu "telugu_consonants": "కఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరఱలళవశషసహఴ", "telugu_digits": "౦౧౨౩౪౫౬౭౮౯" + "౸౹౺౻", # Telugu digits and fractional digits "telugu_vowels": "అఆఇఈఉఊఋఌఎఏఐఒఓఔౠౡ", "telugu_matras": "".join(["ా", "ి", "ీ", "ు", "ూ", "ృ", "ౄ", "ె", "ే", "ై", "ొ", "ో", "ౌ", "ౢ", "ౣ"]), "telugu_virama": "్", "telugu_punctuation": "ఽ", "telugu_signs": "".join(["ఁ", "ం", "ః"]), # Kannada "kannada_consonants": "ಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಶಷಸಹಳ", "kannada_vowels": "ಅಆಇಈಉಊಋॠಌೡಎಏಐಒಓಔ", "kannada_digits": "೦೧೨೩೪೫೬೭೮೯", "kannada_matras": "".join(["ಾ", "ಿ", "ೀ", "ು", "ೂ", "ೃ", "ೄ", "ೆ", "ೇ", "ೈ", "ೊ", "ೋ", "ೌ"]), "kannada_virama": "್", "kannada_punctuation": "।॥ೱೲ", "kannada_signs": "".join(["ಂ", "ಃ", "ಁ"]), # Sinhala "sinhala_consonants": "කඛගඝඞචඡජඣඤටඨඩඪණතථදධනපඵබභමයරලවශෂසහළෆ", "sinhala_vowels": "අආඇඈඉඊඋඌඍඎඏඐඑඒඓඔඕඖ", "sinhala_digits": "෦෧෨෩෪෫෬෭෮෯", "sinhala_matras": "".join(["ා", "ැ", "ෑ", "ි", "ී", "ු", "ූ", "ෙ", "ේ", "ෛ", "ො", "ෝ", "ෞ"]), "sinhala_virama": "්", "sinhala_punctuation": "෴", "sinhala_signs": "".join(["ං", "ඃ"]), # Malayalam "malayalam_consonants": "കഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരറലളഴവശഷസഹ", "malayalam_vowels": "അആഇഈഉഊഋൠഌൡഎഏഐഒഓഔ", "malayalam_digits": "൦൧൨൩൪൫൬൭൮൯", "malayalam_matras": "".join(["ാ", "ി", "ീ", "ു", "ൂ", "ൃ", "ൄ", "ൢ", "ൣ", "െ", "േ", "ൈ", "ൊ", "ോ", "ൌ"]), "malayalam_virama": "്", "malayalam_signs": "".join(["ഃ", "൹", "ഽ", "൏", "ം"]), # Odia (Oriya) "odia_consonants": "କଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳଵଶଷସହୟୱଡ଼ଢ଼", "odia_vowels": "ଅଆଇଈଉଊଋଌଏଐଓଔୡୠ", "odia_digits": "୦୧୨୩୪୫୬୭୮୯" + "୲୳୴୵୶୷", # Odia digits and fractional digits "odia_matras": "".join(["ା", "ି", "ୀ", "ୁ", "ୂ", "ୃ", "ୄ", "େ", "ୈ", "ୋ", "ୌ", "ୢ", "ୣ"]), "odia_virama": "୍", "odia_punctuation": "ଽ", "odia_signs": "".join(["ଂ", "ଃ", "ଁ", "଼", "୰"]), # Khmer "khmer_consonants": "កខគឃងចឆជឈញដឋឌឍណតថទធនបផពភមយរលវឝឞសហឡអ", "khmer_vowels": "ឣឤឥឦឧឨឩឪឫឬឭឮឯឰឱឲឳ", "khmer_digits": "០១២៣៤៥៦៧៨៩", "khmer_matras": "".join(["ា", "ិ", "ី", "ឹ", "ឺ", "ុ", "ូ", "ួ", "ើ", "ឿ", "ៀ", "េ", "ែ", "ៃ", "ោ", "ៅ"]), "khmer_diacritics": "".join(["ំ", "ះ", "ៈ", "៉", "៊", "់", "៌", "៍", "៎", "៏", "័", "៑", "៓", "៝"]), "khmer_virama": "្", "khmer_punctuation": "។៕៖៘៙៚ៗៜ", # Burmese "burmese_consonants": "ကခဂဃငစဆဇဈဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအၐၑၒၓၔၕၚၛၜၝၡၥၦၮၯၰၵၶၷၸၹၺၻၼၽၾၿႀႁႎ", "burmese_vowels": "ဣဤဥဦဧဩဪဿ", "burmese_digits": "၀၁၂၃၄၅၆၇၈၉" + "႐႑႒႓႔႕႖႗႘႙", # Burmese digits and Shan digits "burmese_diacritics": "".join(["့", "း", "ံ", "ါ", "ာ", "ိ", "ီ", "ု", "ူ", "ေ", "ဲ", "ဳ", "ဴ", "ဵ", "ျြွှ"]), # းံါာိီုူေဲံ့းှျြွှ # ္ (virama) and ် (final consonant) - the first is used to stack consonants, the second is used for final consonants "burmese_virama": "".join([ "္", "်", ]), "burmese_punctuation": "၊။၌၍၎၏" + "ၤ" + "ၗ", # Includes ၗ and ၤ # Javanese "javanese_consonants": "ꦏꦐꦑꦒꦓꦔꦕꦖꦗꦘꦙꦚꦛꦜꦝꦞꦟꦠꦡꦢꦣꦤꦥꦦꦧꦨꦩꦪꦫꦬꦭꦮꦯꦰꦱꦲ", "javanese_vowels": "ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ" + "ꦴꦵꦶꦷꦸꦹꦺꦻꦼ", # sec: Dependent vowels ꦴꦵꦶꦷꦸꦹꦺꦻꦼ "javanese_digits": "꧐꧑꧒꧓꧔꧕꧖꧗꧘꧙", "javanese_diacritics": "".join(["ꦀ", "ꦁ", "ꦂ", "ꦃ", "꦳", "ꦽ", "ꦾ", "ꦿ"]), # ꦀꦁꦂꦃ꦳ꦽꦾꦿ "javanese_virama": "꧀", "javanese_punctuation": "".join(["꧈", "꧉", "꧊", "꧋", "꧌", "꧍", "ꧏ"]), # Sudanese "sudanese_consonants": "ᮊᮋᮌᮍᮎᮏᮐᮑᮒᮓᮔᮕᮖᮗᮘᮙᮚᮛᮜᮝᮞᮟᮠᮮᮯᮺᮻᮼᮽᮾᮿ", "sudanese_vowels": "ᮃᮄᮅᮆᮇᮈᮉ", "sudanese_digits": "᮰᮱᮲᮳᮴᮵᮶᮷᮸᮹", "sudanese_diacritics": "".join(["ᮀ", "ᮁ", "ᮂ", "ᮡ", "ᮢ", "ᮣ", "ᮤ", "ᮥ", "ᮦ", "ᮧ", "ᮨ", "ᮩ", "᮪", "᮫", "ᮬ", "ᮭ"]), # "ᮀᮁᮂᮡᮢᮣᮤᮥᮦᮧᮨᮩ᮪᮫ᮬᮭ" # Hebrew "hebrew_cantillations": "".join([ "֑", "֒", "֓", "֔", "֕", "֖", "֗", "֘", "֙", "֚", "֛", "֜", "֝", "֞", "֟", "֠", "֡", "֢", "֣", "֤", "֥", "֦", "֧", "֨", "֩", "֪", "֫", "֬", "֭", "֮", "֯", ]), "hebrew_consonants": "אבגדהוזחטיךכלםמןנסעףפץצקרשת", "hebrew_specials": "ׯװױײיִﬞײַﬠﬡﬢﬣﬤﬥﬦﬧﬨ﬩שׁשׂשּׁשּׂאַאָאּבּגּדּהּוּזּטּיּךּכּלּמּנּסּףּפּצּקּרּשּתּוֹבֿכֿפֿﭏ", "hebrew_punctuation": "".join(["ֽ", "־", "ֿ", "׀", "ׁ", "ׂ", "׃", "ׄ", "ׅ", "׆", "׳", "״"]), "hebrew_vowels": "".join(["ְ", "ֱ", "ֲ", "ֳ", "ִ", "ֵ", "ֶ", "ַ", "ָ", "ֹ", "ֺ", "ֻ", "ׇ"]), } VOCABS: dict[str, str] = {} for key, value in _BASE_VOCABS.items(): VOCABS[key] = value # Latin & latin-dependent alphabets VOCABS["latin"] = _BASE_VOCABS["digits"] + _BASE_VOCABS["ascii_letters"] + _BASE_VOCABS["punctuation"] VOCABS["english"] = VOCABS["latin"] + "°" + _BASE_VOCABS["currency"] VOCABS["albanian"] = VOCABS["english"] + "çëÇË" VOCABS["afrikaans"] = VOCABS["english"] + "èëïîôûêÈËÏÎÔÛÊ" VOCABS["azerbaijani"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çəğöşüÇƏĞÖŞÜ" + "₼" VOCABS["basque"] = VOCABS["english"] + "ñçÑÇ" VOCABS["bosnian"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ" VOCABS["catalan"] = VOCABS["english"] + "àèéíïòóúüçÀÈÉÍÏÒÓÚÜÇ" VOCABS["croatian"] = VOCABS["english"] + "ČčĆćĐ𩹮ž" VOCABS["czech"] = VOCABS["english"] + "áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ" VOCABS["danish"] = VOCABS["english"] + "æøåÆØÅ" VOCABS["dutch"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" VOCABS["estonian"] = VOCABS["english"] + "šžõäöüŠŽÕÄÖÜ" VOCABS["esperanto"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "ĉĝĥĵŝŭĈĜĤĴŜŬ" + "₷" VOCABS["french"] = VOCABS["english"] + "àâéèêëîïôùûüçÀÂÉÈÊËÎÏÔÙÛÜÇ" VOCABS["finnish"] = VOCABS["english"] + "äöÄÖ" VOCABS["frisian"] = re.sub(r"[QqXx]", "", VOCABS["english"]) + "âêôûúÂÊÔÛÚ" + "ƒƑ" VOCABS["galician"] = re.sub(r"[JjKkWw]", "", VOCABS["english"]) + "ñÑçÇ" VOCABS["german"] = VOCABS["english"] + "äöüßÄÖÜẞ" VOCABS["hausa"] = re.sub(r"[PpQqVvXx]", "", VOCABS["english"]) + "ɓɗƙƴƁƊƘƳ" + "₦" VOCABS["hungarian"] = VOCABS["english"] + "áéíóöúüÁÉÍÓÖÚÜ" VOCABS["icelandic"] = re.sub(r"[CcQqWw]", "", VOCABS["english"]) + "ðáéíóúýþæöÐÁÉÍÓÚÝÞÆÖ" VOCABS["indonesian"] = VOCABS["english"] VOCABS["irish"] = VOCABS["english"] + "áéíóúÁÉÍÓÚ" VOCABS["italian"] = VOCABS["english"] + "àèéìíîòóùúÀÈÉÌÍÎÒÓÙÚ" VOCABS["latvian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "āčēģīķļņšūžĀČĒĢĪĶĻŅŠŪŽ" VOCABS["lithuanian"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "ąčęėįšųūžĄČĘĖĮŠŲŪŽ" VOCABS["luxembourgish"] = VOCABS["english"] + "äöüéëÄÖÜÉË" VOCABS["malagasy"] = re.sub(r"[CcQqUuWwXx]", "", VOCABS["english"]) + "ôñÔÑ" VOCABS["malay"] = VOCABS["english"] VOCABS["maltese"] = re.sub(r"[CcYy]", "", VOCABS["english"]) + "ċġħżĊĠĦŻ" VOCABS["maori"] = re.sub(r"[BbCcDdFfJjLlOoQqSsVvXxYyZz]", "", VOCABS["english"]) + "āēīōūĀĒĪŌŪ" VOCABS["montenegrin"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćšžźČĆŠŚŽŹ" VOCABS["norwegian"] = VOCABS["english"] + "æøåÆØÅ" VOCABS["polish"] = VOCABS["english"] + "ąćęłńóśźżĄĆĘŁŃÓŚŹŻ" VOCABS["portuguese"] = VOCABS["english"] + "áàâãéêíïóôõúüçÁÀÂÃÉÊÍÏÓÔÕÚÜÇ" VOCABS["quechua"] = re.sub(r"[BbDdFfGgJjVvXxZz]", "", VOCABS["english"]) + "ñÑĉĈçÇ" VOCABS["romanian"] = VOCABS["english"] + "ăâîșțĂÂÎȘȚ" VOCABS["scottish_gaelic"] = re.sub(r"[JjKkQqVvWwXxYyZz]", "", VOCABS["english"]) + "àèìòùÀÈÌÒÙ" VOCABS["serbian_latin"] = VOCABS["english"] + "čćđžšČĆĐŽŠ" VOCABS["slovak"] = VOCABS["english"] + "ôäčďľňšťžáéíĺóŕúýÔÄČĎĽŇŠŤŽÁÉÍĹÓŔÚÝ" VOCABS["slovene"] = re.sub(r"[QqWwXxYy]", "", VOCABS["english"]) + "čćđšžČĆĐŠŽ" VOCABS["somali"] = re.sub(r"[PpVvZz]", "", VOCABS["english"]) VOCABS["spanish"] = VOCABS["english"] + "áéíóúüñÁÉÍÓÚÜÑ" + "¡¿" VOCABS["swahili"] = re.sub(r"[QqXx]", "", VOCABS["english"]) VOCABS["swedish"] = VOCABS["english"] + "åäöÅÄÖ" VOCABS["tagalog"] = re.sub(r"[CcQqWwXx]", "", VOCABS["english"]) + "ñÑ" + "₱" VOCABS["turkish"] = re.sub(r"[QqWwXx]", "", VOCABS["english"]) + "çğıöşüâîûÇĞİÖŞÜÂÎÛ" + "₺" VOCABS["uzbek_latin"] = re.sub(r"[Ww]", "", VOCABS["english"]) + "çğɉñöşÇĞɈÑÖŞ" VOCABS["vietnamese"] = ( VOCABS["english"] + "áàảạãăắằẳẵặâấầẩẫậđéèẻẽẹêếềểễệóòỏõọôốồổộỗơớờởợỡúùủũụưứừửữựíìỉĩịýỳỷỹỵ" + "ÁÀẢẠÃĂẮẰẲẴẶÂẤẦẨẪẬĐÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỘỖƠỚỜỞỢỠÚÙỦŨỤƯỨỪỬỮỰÍÌỈĨỊÝỲỶỸỴ" + "₫" # currency ) VOCABS["welsh"] = re.sub(r"[KkQqVvXxZz]", "", VOCABS["english"]) + "âêîôŵŷÂÊÎÔŴŶ" VOCABS["yoruba"] = re.sub(r"[CcQqVvXxZz]", "", VOCABS["english"]) + "ẹọṣẸỌṢ" + "₦" VOCABS["zulu"] = VOCABS["english"] # Non-latin alphabets. # Cyrillic VOCABS["russian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_cyrillic_letters"] + _BASE_VOCABS["russian_signs"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "₽" ) VOCABS["belarusian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_cyrillic_letters"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ўiЎI" + "₽" ) VOCABS["ukrainian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ґіїєҐІЇЄ" + "₴" ) VOCABS["tatar"] = VOCABS["russian"] + "ӘәҖҗҢңӨөҮү" VOCABS["tajik"] = VOCABS["russian"].replace("₽", "") + "ҒғҚқҲҳҶҷӢӣӮӯ" VOCABS["kazakh"] = VOCABS["russian"].replace("₽", "") + "ӘәҒғҚқҢңӨөҰұҮүҺһІі" + "₸" VOCABS["kyrgyz"] = VOCABS["russian"].replace("₽", "") + "ҢңӨөҮү" VOCABS["bulgarian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_signs"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] ) VOCABS["macedonian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ЃѓЅѕЈјЉљЊњЌќЏџ" ) VOCABS["mongolian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_cyrillic_letters"] + _BASE_VOCABS["russian_signs"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ӨөҮү" + "᠐᠑᠒᠓᠔᠕᠖᠗᠘᠙" # Mongolian digits + "₮" ) VOCABS["yakut"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_cyrillic_letters"] + _BASE_VOCABS["russian_signs"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ҔҕҤҥӨөҺһҮү" + "₽" ) VOCABS["serbian_cyrillic"] = ( "абвгдежзиклмнопрстуфхцчшАБВГДЕЖЗИКЛМНОПРСТУФХЦЧШ" # limited cyrillic + "JjЂђЉљЊњЋћЏџ" # Serbian specials + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] ) VOCABS["uzbek_cyrillic"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["russian_cyrillic_letters"] + _BASE_VOCABS["russian_signs"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ЎўҚқҒғҲҳ" ) VOCABS["ukrainian"] = ( _BASE_VOCABS["generic_cyrillic_letters"] + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["currency"] + "ґіїєҐІЇЄ₴" ) # Greek VOCABS["greek"] = ( _BASE_VOCABS["punctuation"] + _BASE_VOCABS["ancient_greek"] + _BASE_VOCABS["currency"] + "άέήίϊΐόύϋΰώΆΈΉΊΪΌΎΫΏ" ) VOCABS["greek_extended"] = ( VOCABS["greek"] + "ͶͷϜϝἀἁἂἃἄἅἆἇἈἉἊἋἌἍἎἏἐἑἒἓἔἕἘἙἚἛἜἝἠἡἢἣἤἥἦἧἨἩἪἫἬἭἮἯἰἱἲἳἴἵἶἷἸἹἺἻἼἽἾἿ" + "ὀὁὂὃὄὅὈὉὊὋὌὍὐὑὒὓὔὕὖὗὙὛὝὟὠὡὢὣὤὥὦὧὨὩὪὫὬὭὮὯὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾈᾉᾊᾋᾌᾍᾎᾏᾐ" + "ᾑᾒᾓᾔᾕᾖᾗᾘᾙᾚᾛᾜᾝᾞᾟᾠᾡᾢᾣᾤᾥᾦᾧᾨᾩᾪᾫᾬᾭᾮᾯᾲᾳᾴᾶᾷᾺᾼῂῃῄῆῇῈῊῌῒΐῖῗῚῢΰῤῥῦῧῪῬῲῳῴῶῷῸῺῼ" ) # Hebrew VOCABS["hebrew"] = ( _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + _BASE_VOCABS["hebrew_consonants"] + _BASE_VOCABS["hebrew_vowels"] + _BASE_VOCABS["hebrew_punctuation"] + _BASE_VOCABS["hebrew_cantillations"] + _BASE_VOCABS["hebrew_specials"] + "₪" ) # Arabic VOCABS["arabic"] = ( _BASE_VOCABS["digits"] + _BASE_VOCABS["arabic_digits"] + _BASE_VOCABS["arabic_letters"] + _BASE_VOCABS["persian_letters"] + _BASE_VOCABS["arabic_diacritics"] + _BASE_VOCABS["arabic_punctuation"] + _BASE_VOCABS["punctuation"] ) VOCABS["persian"] = VOCABS["arabic"] VOCABS["urdu"] = VOCABS["persian"] + "ٹڈڑںھےہۃ" VOCABS["pashto"] = VOCABS["persian"] + "ټډړږښځڅڼېۍ" VOCABS["kurdish"] = VOCABS["persian"] + "ڵڕۆێە" VOCABS["uyghur"] = VOCABS["persian"] + "ەېۆۇۈڭھ" VOCABS["sindhi"] = VOCABS["persian"] + "ڀٿٺٽڦڄڃڇڏڌڊڍڙڳڱڻھ" # Indic scripts # Rules: # Any consonant can be "combined" with any matra # The virama is used to create consonant clusters - so C + Virama + C = CC # Devanagari based VOCABS["devanagari"] = ( _BASE_VOCABS["devanagari_consonants"] + _BASE_VOCABS["devanagari_vowels"] + _BASE_VOCABS["devanagari_digits"] + _BASE_VOCABS["devanagari_matras"] + _BASE_VOCABS["devanagari_virama"] + _BASE_VOCABS["devanagari_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Devanagari + "₹" # currency ) VOCABS["hindi"] = VOCABS["devanagari"] VOCABS["sanskrit"] = VOCABS["devanagari"] VOCABS["marathi"] = VOCABS["devanagari"] VOCABS["nepali"] = VOCABS["devanagari"] # Gujarati VOCABS["gujarati"] = ( _BASE_VOCABS["gujarati_consonants"] + _BASE_VOCABS["gujarati_vowels"] + _BASE_VOCABS["gujarati_digits"] + _BASE_VOCABS["gujarati_matras"] + _BASE_VOCABS["gujarati_virama"] + _BASE_VOCABS["gujarati_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Gujarati + _BASE_VOCABS["gujarati_signs"] + "૱" # currency ) # Bengali VOCABS["bengali"] = ( _BASE_VOCABS["bengali_consonants"] + _BASE_VOCABS["bengali_vowels"] + _BASE_VOCABS["bengali_digits"] + _BASE_VOCABS["bengali_matras"] + _BASE_VOCABS["bengali_virama"] + _BASE_VOCABS["bengali_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Bengali + _BASE_VOCABS["bengali_signs"] + "৳" # currency ) # Brahmic scripts VOCABS["tamil"] = ( _BASE_VOCABS["tamil_consonants"] + _BASE_VOCABS["tamil_vowels"] + _BASE_VOCABS["tamil_digits"] + _BASE_VOCABS["tamil_matras"] + _BASE_VOCABS["tamil_virama"] + _BASE_VOCABS["tamil_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Tamil + _BASE_VOCABS["tamil_fractions"] # This is a Tamil-specific addition + _BASE_VOCABS["tamil_signs"] + "₹" # currency ) VOCABS["telugu"] = ( _BASE_VOCABS["telugu_consonants"] + _BASE_VOCABS["telugu_vowels"] + _BASE_VOCABS["telugu_digits"] + _BASE_VOCABS["telugu_matras"] + _BASE_VOCABS["telugu_virama"] + _BASE_VOCABS["telugu_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Telugu + _BASE_VOCABS["telugu_signs"] + "₹" # currency ) VOCABS["kannada"] = ( _BASE_VOCABS["kannada_consonants"] + _BASE_VOCABS["kannada_vowels"] + _BASE_VOCABS["kannada_digits"] + _BASE_VOCABS["kannada_matras"] + _BASE_VOCABS["kannada_virama"] + _BASE_VOCABS["kannada_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Kannada + _BASE_VOCABS["kannada_signs"] + "₹" # currency ) VOCABS["sinhala"] = ( _BASE_VOCABS["sinhala_consonants"] + _BASE_VOCABS["sinhala_vowels"] + _BASE_VOCABS["sinhala_digits"] + _BASE_VOCABS["sinhala_matras"] + _BASE_VOCABS["sinhala_virama"] + _BASE_VOCABS["sinhala_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Sinhala + _BASE_VOCABS["sinhala_signs"] + "₹" # currency ) VOCABS["malayalam"] = ( _BASE_VOCABS["malayalam_consonants"] + _BASE_VOCABS["malayalam_vowels"] + _BASE_VOCABS["malayalam_digits"] + _BASE_VOCABS["malayalam_matras"] + _BASE_VOCABS["malayalam_virama"] + _BASE_VOCABS["punctuation"] # western punctuation used in Malayalam + _BASE_VOCABS["malayalam_signs"] + "₹" # currency ) VOCABS["punjabi"] = ( _BASE_VOCABS["punjabi_consonants"] + _BASE_VOCABS["punjabi_vowels"] + _BASE_VOCABS["punjabi_digits"] + _BASE_VOCABS["punjabi_matras"] + _BASE_VOCABS["punjabi_virama"] + _BASE_VOCABS["punjabi_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Punjabi + _BASE_VOCABS["punjabi_signs"] + "₹" # currency ) VOCABS["odia"] = ( _BASE_VOCABS["odia_consonants"] + _BASE_VOCABS["odia_vowels"] + _BASE_VOCABS["odia_digits"] + _BASE_VOCABS["odia_matras"] + _BASE_VOCABS["odia_virama"] + _BASE_VOCABS["odia_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Odia + _BASE_VOCABS["odia_signs"] + "₹" # currency ) VOCABS["khmer"] = ( _BASE_VOCABS["khmer_consonants"] + _BASE_VOCABS["khmer_vowels"] + _BASE_VOCABS["khmer_digits"] + _BASE_VOCABS["khmer_matras"] + _BASE_VOCABS["khmer_virama"] + _BASE_VOCABS["khmer_diacritics"] # This is a Khmer-specific addition + _BASE_VOCABS["khmer_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Khmer + "៛" # Cambodian currency ) # Armenian VOCABS["armenian"] = ( "ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙՠաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆևֈ" + _BASE_VOCABS["digits"] + _BASE_VOCABS["punctuation"] + "՚՛՜՝՞՟։֊" + "֏" ) # Sudanese VOCABS["sudanese"] = ( _BASE_VOCABS["digits"] + _BASE_VOCABS["sudanese_digits"] + _BASE_VOCABS["sudanese_consonants"] + _BASE_VOCABS["sudanese_vowels"] + _BASE_VOCABS["sudanese_diacritics"] + _BASE_VOCABS["punctuation"] ) # Thai # Rules: # Diacritics are used to modify the consonants and vowels VOCABS["thai"] = ( _BASE_VOCABS["digits"] + "๐๑๒๓๔๕๖๗๘๙" + _BASE_VOCABS["punctuation"] + "๏๚๛ๆฯ" + "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮ" # Thai consonants + "ะาำเแโใไๅ" # Thai vowels + " ัิีึืฺุู็่้๊๋์ํ๎".replace(" ", "") + "฿" ) VOCABS["lao"] = ( _BASE_VOCABS["digits"] + "໐໑໒໓໔໕໖໗໘໙" + _BASE_VOCABS["punctuation"] + "ໆໞໟຯ" + "ກຂຄຆງຈຉຊຌຍຎຏຐຑຒຓດຕຖທຘນບປຜຝພຟຠມຢຣລວຨຩສຫຬອຮ" # Lao consonants + "ະາຳຽເແໂໃໄ" # Lao vowels + "ໜໝ" # Lao ligature + "".join(["ັ", "ິ", "ີ", "ຶ", "ື", "ຸ", "ູ", "຺", "ົ", "ຼ", "່", "້", "໊", "໋", "໌", "ໍ"]) ) # Burmese & Javanese # Rules: # - A syllable usually starts with a base consonant. # - Diacritics (sandhangan), which represent vowels and consonant modifications, are attached to the base consonant: # - Vowel signs (ꦴꦵꦶꦷꦸꦹꦺꦻꦼ) follow the consonant and determine the syllable's vowel sound. # - Medial signs like ꦿ (ra), ꦾ (ya), and ꦽ (vocalic r) modify the consonant cluster. # - The virama (꧀, called *pangkon*) suppresses the inherent vowel, # creating consonant clusters. # - Special signs like ꦀ (cecak), ꦁ (layar), ꦂ (cakra), and ꦃ (wignyan) # can appear before or after syllables to represent nasal or glottal finals. # - Independent vowels (ꦄꦅꦆꦇꦈꦉꦊꦋꦌꦍꦎ) can occur without a base consonant, especially at word/sentence starts. # - Use Unicode NFC normalization to ensure composed syllables render correctly. VOCABS["burmese"] = ( _BASE_VOCABS["digits"] + _BASE_VOCABS["burmese_digits"] + _BASE_VOCABS["burmese_consonants"] + _BASE_VOCABS["burmese_vowels"] + _BASE_VOCABS["burmese_diacritics"] + _BASE_VOCABS["burmese_virama"] + _BASE_VOCABS["burmese_punctuation"] ) VOCABS["javanese"] = ( _BASE_VOCABS["digits"] + _BASE_VOCABS["javanese_digits"] + _BASE_VOCABS["javanese_consonants"] + _BASE_VOCABS["javanese_vowels"] + _BASE_VOCABS["javanese_diacritics"] + _BASE_VOCABS["javanese_virama"] + _BASE_VOCABS["javanese_punctuation"] + _BASE_VOCABS["punctuation"] # western punctuation used in Javanese ) # Georgian (Mkhedruli - modern) VOCABS["georgian"] = ( _BASE_VOCABS["digits"] + "ႠႡႢႣႤႥႦႧႨႩႪႫႬႭႮႯႰႱႲႳႴႵႶႷႸႹႺႻႼႽႾႿჀჁჂჃჄჅჇჍაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰჱჲჳჴჵჶჷჸჹჺჼჽჾჿ" + _BASE_VOCABS["punctuation"] + "჻" + "₾" # currency ) # Ethiopic VOCABS["ethiopic"] = ( "ሀሁሂሃሄህሆሇለሉሊላሌልሎሏሐሑሒሓሔሕሖሗመሙሚማሜምሞሟሠሡሢሣሤሥሦሧረሩሪራሬርሮሯሰሱሲሳሴስሶሷሸሹሺሻሼሽሾሿቀቁቂቃቄቅቆቇቈቊቋ" + "ቌቍቐቑቒቓቔቕቖቘቚቛቜቝበቡቢባቤብቦቧቨቩቪቫቬቭቮቯተቱቲታቴትቶቷቸቹቺቻቼችቾቿኀኁኂኃኄኅኆኇኈኊኋኌኍነኑኒናኔንኖኗኘኙኚኛኜኝኞኟአኡኢኣኤእኦኧ" + "ከኩኪካኬክኮኯኰኲኳኴኵኸኹኺኻኼኽኾዀዂዃዄዅወዉዊዋዌውዎዏዐዑዒዓዔዕዖዘዙዚዛዜዝዞዟዠዡዢዣዤዥዦዧየዩዪያዬይዮዯደዱዲዳዴድዶዷዸዹዺ" + "ዻዼዽዾዿጀጁጂጃጄጅጆጇገጉጊጋጌግጎጏጐጒጓጔጕጘጙጚጛጜጝጞጟጠጡጢጣጤጥጦጧጨጩጪጫጬጭጮጯጰጱጲጳጴጵጶጷጸጹጺጻጼጽጾጿፀፁፂፃፄፅፆ" + "ፇፈፉፊፋፌፍፎፏፐፑፒፓፔፕፖፗፘፙፚᎀᎁᎂᎃᎄᎅᎆᎇᎈᎉᎊᎋᎌᎍᎎᎏ" + "፩፪፫፬፭፮፯፰፱፲፳፴፵፶፷፸፹፺፻፼" # digits ) # East Asian VOCABS["japanese"] = ( _BASE_VOCABS["digits"] + "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづ" + "てでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめ" + "もゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟ" # Hiragana + "ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダ" + "チヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメ" + "モャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺーヽヾヿ" # Katakana # Kanji jōyō (incl. numerals) + "亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲" # noqa: E501 + "永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎怨宴媛援園煙猿遠鉛塩演縁艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加" # noqa: E501 + "可仮何花佳価果河苛科架夏家荷華菓貨渦過嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯" # noqa: E501 + "街慨蓋該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠巻看陥乾勘患貫寒喚堪換敢棺" # noqa: E501 + "款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿" # noqa: E501 + "輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況" # noqa: E501 + "峡挟狭恐恭胸脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟熊繰君訓勲薫軍郡群兄" # noqa: E501 + "刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権" # noqa: E501 + "憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固股虎孤弧故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行" # noqa: E501 + "坑孝抗攻更効幸拘肯侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀酷獄骨駒込頃今" # noqa: E501 + "困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶" # noqa: E501 + "殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜" # noqa: E501 + "諮示字寺次耳自似児事侍治持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇尺借酌釈爵若弱寂手主守朱取狩首殊珠" # noqa: E501 + "酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循" # noqa: E501 + "順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞" # noqa: E501 + "償礁鐘上丈冗条状乗城浄剰常情場畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃仁" # noqa: E501 + "尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝清盛婿晴勢聖誠精製誓静請整醒税夕斥" # noqa: E501 + "石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉浅洗染扇栓旋船戦煎羨腺詮践箋銭潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖" # noqa: E501 + "租素措粗組疎訴塑遡礎双壮早争走奏相荘草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属" # noqa: E501 + "賊続卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁但達脱奪棚誰丹旦担単炭胆探淡" # noqa: E501 + "短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝貼" # noqa: E501 + "超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店" # noqa: E501 + "点展添転塡田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖頭謄藤闘騰同洞胴動堂童" # noqa: E501 + "道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇" # noqa: E501 + "馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒" # noqa: E501 + "範繁藩晩番蛮盤比皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏瓶不夫父付布扶府" # noqa: E501 + "怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返" # noqa: E501 + "変偏遍編弁便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木" # noqa: E501 + "朴牧睦僕墨撲没勃堀本奔翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺茂模毛妄盲" # noqa: E501 + "耗猛網目黙門紋問冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁" # noqa: E501 + "謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘" # noqa: E501 + "倫輪隣臨瑠涙累塁類令礼冷励戻例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕" # noqa: E501 + _BASE_VOCABS["punctuation"] + "。・〜°—、「」『』【】゛》《〉〈" + _BASE_VOCABS["currency"] ) VOCABS["korean"] = ( _BASE_VOCABS["digits"] + "가각갂갃간갅갆갇갈갉갊갋갌갍갎갏감갑값갓갔강갖갗갘같갚갛개객갞갟갠갡갢갣갤갥갦갧갨갩갪갫갬갭갮갯갰갱갲갳갴갵갶갷갸갹갺갻갼갽갾갿걀걁걂걃걄걅걆걇걈" # noqa: E501 + "걉걊걋걌걍걎걏걐걑걒걓걔걕걖걗걘걙걚걛걜걝걞걟걠걡걢걣걤걥걦걧걨걩걪걫걬걭걮걯거걱걲걳건걵걶걷걸걹걺걻걼걽걾걿검겁겂것겄겅겆겇겈겉겊겋게겍겎겏겐겑" # noqa: E501 + "겒겓겔겕겖겗겘겙겚겛겜겝겞겟겠겡겢겣겤겥겦겧겨격겪겫견겭겮겯결겱겲겳겴겵겶겷겸겹겺겻겼경겾겿곀곁곂곃계곅곆곇곈곉곊곋곌곍곎곏곐곑곒곓곔곕곖곗곘곙곚" # noqa: E501 + "곛곜곝곞곟고곡곢곣곤곥곦곧골곩곪곫곬곭곮곯곰곱곲곳곴공곶곷곸곹곺곻과곽곾곿관괁괂괃괄괅괆괇괈괉괊괋괌괍괎괏괐광괒괓괔괕괖괗괘괙괚괛괜괝괞괟괠괡괢괣" # noqa: E501 + "괤괥괦괧괨괩괪괫괬괭괮괯괰괱괲괳괴괵괶괷괸괹괺괻괼괽괾괿굀굁굂굃굄굅굆굇굈굉굊굋굌굍굎굏교굑굒굓굔굕굖굗굘굙굚굛굜굝굞굟굠굡굢굣굤굥굦굧굨굩굪굫구" # noqa: E501 + "국굮굯군굱굲굳굴굵굶굷굸굹굺굻굼굽굾굿궀궁궂궃궄궅궆궇궈궉궊궋권궍궎궏궐궑궒궓궔궕궖궗궘궙궚궛궜궝궞궟궠궡궢궣궤궥궦궧궨궩궪궫궬궭궮궯궰궱궲궳궴궵" # noqa: E501 + "궶궷궸궹궺궻궼궽궾궿귀귁귂귃귄귅귆귇귈귉귊귋귌귍귎귏귐귑귒귓귔귕귖귗귘귙귚귛규귝귞귟균귡귢귣귤귥귦귧귨귩귪귫귬귭귮귯귰귱귲귳귴귵귶귷그극귺귻근귽귾" # noqa: E501 + "귿글긁긂긃긄긅긆긇금급긊긋긌긍긎긏긐긑긒긓긔긕긖긗긘긙긚긛긜긝긞긟긠긡긢긣긤긥긦긧긨긩긪긫긬긭긮긯기긱긲긳긴긵긶긷길긹긺긻긼긽긾긿김깁깂깃깄깅깆깇" # noqa: E501 + "깈깉깊깋까깍깎깏깐깑깒깓깔깕깖깗깘깙깚깛깜깝깞깟깠깡깢깣깤깥깦깧깨깩깪깫깬깭깮깯깰깱깲깳깴깵깶깷깸깹깺깻깼깽깾깿꺀꺁꺂꺃꺄꺅꺆꺇꺈꺉꺊꺋꺌꺍꺎꺏꺐" # noqa: E501 + "꺑꺒꺓꺔꺕꺖꺗꺘꺙꺚꺛꺜꺝꺞꺟꺠꺡꺢꺣꺤꺥꺦꺧꺨꺩꺪꺫꺬꺭꺮꺯꺰꺱꺲꺳꺴꺵꺶꺷꺸꺹꺺꺻꺼꺽꺾꺿껀껁껂껃껄껅껆껇껈껉껊껋껌껍껎껏껐껑껒껓껔껕껖껗께껙" # noqa: E501 + "껚껛껜껝껞껟껠껡껢껣껤껥껦껧껨껩껪껫껬껭껮껯껰껱껲껳껴껵껶껷껸껹껺껻껼껽껾껿꼀꼁꼂꼃꼄꼅꼆꼇꼈꼉꼊꼋꼌꼍꼎꼏꼐꼑꼒꼓꼔꼕꼖꼗꼘꼙꼚꼛꼜꼝꼞꼟꼠꼡꼢" # noqa: E501 + "꼣꼤꼥꼦꼧꼨꼩꼪꼫꼬꼭꼮꼯꼰꼱꼲꼳꼴꼵꼶꼷꼸꼹꼺꼻꼼꼽꼾꼿꽀꽁꽂꽃꽄꽅꽆꽇꽈꽉꽊꽋꽌꽍꽎꽏꽐꽑꽒꽓꽔꽕꽖꽗꽘꽙꽚꽛꽜꽝꽞꽟꽠꽡꽢꽣꽤꽥꽦꽧꽨꽩꽪꽫" # noqa: E501 + "꽬꽭꽮꽯꽰꽱꽲꽳꽴꽵꽶꽷꽸꽹꽺꽻꽼꽽꽾꽿꾀꾁꾂꾃꾄꾅꾆꾇꾈꾉꾊꾋꾌꾍꾎꾏꾐꾑꾒꾓꾔꾕꾖꾗꾘꾙꾚꾛꾜꾝꾞꾟꾠꾡꾢꾣꾤꾥꾦꾧꾨꾩꾪꾫꾬꾭꾮꾯꾰꾱꾲꾳꾴" # noqa: E501 + "꾵꾶꾷꾸꾹꾺꾻꾼꾽꾾꾿꿀꿁꿂꿃꿄꿅꿆꿇꿈꿉꿊꿋꿌꿍꿎꿏꿐꿑꿒꿓꿔꿕꿖꿗꿘꿙꿚꿛꿜꿝꿞꿟꿠꿡꿢꿣꿤꿥꿦꿧꿨꿩꿪꿫꿬꿭꿮꿯꿰꿱꿲꿳꿴꿵꿶꿷꿸꿹꿺꿻꿼꿽" # noqa: E501 + "꿾꿿뀀뀁뀂뀃뀄뀅뀆뀇뀈뀉뀊뀋뀌뀍뀎뀏뀐뀑뀒뀓뀔뀕뀖뀗뀘뀙뀚뀛뀜뀝뀞뀟뀠뀡뀢뀣뀤뀥뀦뀧뀨뀩뀪뀫뀬뀭뀮뀯뀰뀱뀲뀳뀴뀵뀶뀷뀸뀹뀺뀻뀼뀽뀾뀿끀끁끂끃끄끅끆" # noqa: E501 + "끇끈끉끊끋끌끍끎끏끐끑끒끓끔끕끖끗끘끙끚끛끜끝끞끟끠끡끢끣끤끥끦끧끨끩끪끫끬끭끮끯끰끱끲끳끴끵끶끷끸끹끺끻끼끽끾끿낀낁낂낃낄낅낆낇낈낉낊낋낌낍낎낏" # noqa: E501 + "낐낑낒낓낔낕낖낗나낙낚낛난낝낞낟날낡낢낣낤낥낦낧남납낪낫났낭낮낯낰낱낲낳내낵낶낷낸낹낺낻낼낽낾낿냀냁냂냃냄냅냆냇냈냉냊냋냌냍냎냏냐냑냒냓냔냕냖냗냘" # noqa: E501 + "냙냚냛냜냝냞냟냠냡냢냣냤냥냦냧냨냩냪냫냬냭냮냯냰냱냲냳냴냵냶냷냸냹냺냻냼냽냾냿넀넁넂넃넄넅넆넇너넉넊넋넌넍넎넏널넑넒넓넔넕넖넗넘넙넚넛넜넝넞넟넠넡" # noqa: E501 + "넢넣네넥넦넧넨넩넪넫넬넭넮넯넰넱넲넳넴넵넶넷넸넹넺넻넼넽넾넿녀녁녂녃년녅녆녇녈녉녊녋녌녍녎녏념녑녒녓녔녕녖녗녘녙녚녛녜녝녞녟녠녡녢녣녤녥녦녧녨녩녪" # noqa: E501 + "녫녬녭녮녯녰녱녲녳녴녵녶녷노녹녺녻논녽녾녿놀놁놂놃놄놅놆놇놈놉놊놋놌농놎놏놐놑높놓놔놕놖놗놘놙놚놛놜놝놞놟놠놡놢놣놤놥놦놧놨놩놪놫놬놭놮놯놰놱놲놳" # noqa: E501 + "놴놵놶놷놸놹놺놻놼놽놾놿뇀뇁뇂뇃뇄뇅뇆뇇뇈뇉뇊뇋뇌뇍뇎뇏뇐뇑뇒뇓뇔뇕뇖뇗뇘뇙뇚뇛뇜뇝뇞뇟뇠뇡뇢뇣뇤뇥뇦뇧뇨뇩뇪뇫뇬뇭뇮뇯뇰뇱뇲뇳뇴뇵뇶뇷뇸뇹뇺뇻뇼" # noqa: E501 + "뇽뇾뇿눀눁눂눃누눅눆눇눈눉눊눋눌눍눎눏눐눑눒눓눔눕눖눗눘눙눚눛눜눝눞눟눠눡눢눣눤눥눦눧눨눩눪눫눬눭눮눯눰눱눲눳눴눵눶눷눸눹눺눻눼눽눾눿뉀뉁뉂뉃뉄뉅" # noqa: E501 + "뉆뉇뉈뉉뉊뉋뉌뉍뉎뉏뉐뉑뉒뉓뉔뉕뉖뉗뉘뉙뉚뉛뉜뉝뉞뉟뉠뉡뉢뉣뉤뉥뉦뉧뉨뉩뉪뉫뉬뉭뉮뉯뉰뉱뉲뉳뉴뉵뉶뉷뉸뉹뉺뉻뉼뉽뉾뉿늀늁늂늃늄늅늆늇늈늉늊늋늌늍늎" # noqa: E501 + "늏느늑늒늓는늕늖늗늘늙늚늛늜늝늞늟늠늡늢늣늤능늦늧늨늩늪늫늬늭늮늯늰늱늲늳늴늵늶늷늸늹늺늻늼늽늾늿닀닁닂닃닄닅닆닇니닉닊닋닌닍닎닏닐닑닒닓닔닕닖닗" # noqa: E501 + "님닙닚닛닜닝닞닟닠닡닢닣다닥닦닧단닩닪닫달닭닮닯닰닱닲닳담답닶닷닸당닺닻닼닽닾닿대댁댂댃댄댅댆댇댈댉댊댋댌댍댎댏댐댑댒댓댔댕댖댗댘댙댚댛댜댝댞댟댠" # noqa: E501 + "댡댢댣댤댥댦댧댨댩댪댫댬댭댮댯댰댱댲댳댴댵댶댷댸댹댺댻댼댽댾댿덀덁덂덃덄덅덆덇덈덉덊덋덌덍덎덏덐덑덒덓더덕덖덗던덙덚덛덜덝덞덟덠덡덢덣덤덥덦덧덨덩" # noqa: E501 + "덪덫덬덭덮덯데덱덲덳덴덵덶덷델덹덺덻덼덽덾덿뎀뎁뎂뎃뎄뎅뎆뎇뎈뎉뎊뎋뎌뎍뎎뎏뎐뎑뎒뎓뎔뎕뎖뎗뎘뎙뎚뎛뎜뎝뎞뎟뎠뎡뎢뎣뎤뎥뎦뎧뎨뎩뎪뎫뎬뎭뎮뎯뎰뎱뎲" # noqa: E501 + "뎳뎴뎵뎶뎷뎸뎹뎺뎻뎼뎽뎾뎿돀돁돂돃도독돆돇돈돉돊돋돌돍돎돏돐돑돒돓돔돕돖돗돘동돚돛돜돝돞돟돠돡돢돣돤돥돦돧돨돩돪돫돬돭돮돯돰돱돲돳돴돵돶돷돸돹돺돻" # noqa: E501 + "돼돽돾돿됀됁됂됃됄됅됆됇됈됉됊됋됌됍됎됏됐됑됒됓됔됕됖됗되됙됚됛된됝됞됟될됡됢됣됤됥됦됧됨됩됪됫됬됭됮됯됰됱됲됳됴됵됶됷됸됹됺됻됼됽됾됿둀둁둂둃둄" # noqa: E501 + "둅둆둇둈둉둊둋둌둍둎둏두둑둒둓둔둕둖둗둘둙둚둛둜둝둞둟둠둡둢둣둤둥둦둧둨둩둪둫둬둭둮둯둰둱둲둳둴둵둶둷둸둹둺둻둼둽둾둿뒀뒁뒂뒃뒄뒅뒆뒇뒈뒉뒊뒋뒌뒍" # noqa: E501 + "뒎뒏뒐뒑뒒뒓뒔뒕뒖뒗뒘뒙뒚뒛뒜뒝뒞뒟뒠뒡뒢뒣뒤뒥뒦뒧뒨뒩뒪뒫뒬뒭뒮뒯뒰뒱뒲뒳뒴뒵뒶뒷뒸뒹뒺뒻뒼뒽뒾뒿듀듁듂듃듄듅듆듇듈듉듊듋듌듍듎듏듐듑듒듓듔듕듖" # noqa: E501 + "듗듘듙듚듛드득듞듟든듡듢듣들듥듦듧듨듩듪듫듬듭듮듯듰등듲듳듴듵듶듷듸듹듺듻듼듽듾듿딀딁딂딃딄딅딆딇딈딉딊딋딌딍딎딏딐딑딒딓디딕딖딗딘딙딚딛딜딝딞딟" # noqa: E501 + "딠딡딢딣딤딥딦딧딨딩딪딫딬딭딮딯따딱딲딳딴딵딶딷딸딹딺딻딼딽딾딿땀땁땂땃땄땅땆땇땈땉땊땋때땍땎땏땐땑땒땓땔땕땖땗땘땙땚땛땜땝땞땟땠땡땢땣땤땥땦땧땨" # noqa: E501 + "땩땪땫땬땭땮땯땰땱땲땳땴땵땶땷땸땹땺땻땼땽땾땿떀떁떂떃떄떅떆떇떈떉떊떋떌떍떎떏떐떑떒떓떔떕떖떗떘떙떚떛떜떝떞떟떠떡떢떣떤떥떦떧떨떩떪떫떬떭떮떯떰떱" # noqa: E501 + "떲떳떴떵떶떷떸떹떺떻떼떽떾떿뗀뗁뗂뗃뗄뗅뗆뗇뗈뗉뗊뗋뗌뗍뗎뗏뗐뗑뗒뗓뗔뗕뗖뗗뗘뗙뗚뗛뗜뗝뗞뗟뗠뗡뗢뗣뗤뗥뗦뗧뗨뗩뗪뗫뗬뗭뗮뗯뗰뗱뗲뗳뗴뗵뗶뗷뗸뗹뗺" # noqa: E501 + "뗻뗼뗽뗾뗿똀똁똂똃똄똅똆똇똈똉똊똋똌똍똎똏또똑똒똓똔똕똖똗똘똙똚똛똜똝똞똟똠똡똢똣똤똥똦똧똨똩똪똫똬똭똮똯똰똱똲똳똴똵똶똷똸똹똺똻똼똽똾똿뙀뙁뙂뙃" # noqa: E501 + "뙄뙅뙆뙇뙈뙉뙊뙋뙌뙍뙎뙏뙐뙑뙒뙓뙔뙕뙖뙗뙘뙙뙚뙛뙜뙝뙞뙟뙠뙡뙢뙣뙤뙥뙦뙧뙨뙩뙪뙫뙬뙭뙮뙯뙰뙱뙲뙳뙴뙵뙶뙷뙸뙹뙺뙻뙼뙽뙾뙿뚀뚁뚂뚃뚄뚅뚆뚇뚈뚉뚊뚋뚌" # noqa: E501 + "뚍뚎뚏뚐뚑뚒뚓뚔뚕뚖뚗뚘뚙뚚뚛뚜뚝뚞뚟뚠뚡뚢뚣뚤뚥뚦뚧뚨뚩뚪뚫뚬뚭뚮뚯뚰뚱뚲뚳뚴뚵뚶뚷뚸뚹뚺뚻뚼뚽뚾뚿뛀뛁뛂뛃뛄뛅뛆뛇뛈뛉뛊뛋뛌뛍뛎뛏뛐뛑뛒뛓뛔뛕" # noqa: E501 + "뛖뛗뛘뛙뛚뛛뛜뛝뛞뛟뛠뛡뛢뛣뛤뛥뛦뛧뛨뛩뛪뛫뛬뛭뛮뛯뛰뛱뛲뛳뛴뛵뛶뛷뛸뛹뛺뛻뛼뛽뛾뛿뜀뜁뜂뜃뜄뜅뜆뜇뜈뜉뜊뜋뜌뜍뜎뜏뜐뜑뜒뜓뜔뜕뜖뜗뜘뜙뜚뜛뜜뜝뜞" # noqa: E501 + "뜟뜠뜡뜢뜣뜤뜥뜦뜧뜨뜩뜪뜫뜬뜭뜮뜯뜰뜱뜲뜳뜴뜵뜶뜷뜸뜹뜺뜻뜼뜽뜾뜿띀띁띂띃띄띅띆띇띈띉띊띋띌띍띎띏띐띑띒띓띔띕띖띗띘띙띚띛띜띝띞띟띠띡띢띣띤띥띦띧" # noqa: E501 + "띨띩띪띫띬띭띮띯띰띱띲띳띴띵띶띷띸띹띺띻라락띾띿란랁랂랃랄랅랆랇랈랉랊랋람랍랎랏랐랑랒랓랔랕랖랗래랙랚랛랜랝랞랟랠랡랢랣랤랥랦랧램랩랪랫랬랭랮랯랰" # noqa: E501 + "랱랲랳랴략랶랷랸랹랺랻랼랽랾랿럀럁럂럃럄럅럆럇럈량럊럋럌럍럎럏럐럑럒럓럔럕럖럗럘럙럚럛럜럝럞럟럠럡럢럣럤럥럦럧럨럩럪럫러럭럮럯런럱럲럳럴럵럶럷럸럹" # noqa: E501 + "럺럻럼럽럾럿렀렁렂렃렄렅렆렇레렉렊렋렌렍렎렏렐렑렒렓렔렕렖렗렘렙렚렛렜렝렞렟렠렡렢렣려력렦렧련렩렪렫렬렭렮렯렰렱렲렳렴렵렶렷렸령렺렻렼렽렾렿례롁롂" # noqa: E501 + "롃롄롅롆롇롈롉롊롋롌롍롎롏롐롑롒롓롔롕롖롗롘롙롚롛로록롞롟론롡롢롣롤롥롦롧롨롩롪롫롬롭롮롯롰롱롲롳롴롵롶롷롸롹롺롻롼롽롾롿뢀뢁뢂뢃뢄뢅뢆뢇뢈뢉뢊뢋" # noqa: E501 + "뢌뢍뢎뢏뢐뢑뢒뢓뢔뢕뢖뢗뢘뢙뢚뢛뢜뢝뢞뢟뢠뢡뢢뢣뢤뢥뢦뢧뢨뢩뢪뢫뢬뢭뢮뢯뢰뢱뢲뢳뢴뢵뢶뢷뢸뢹뢺뢻뢼뢽뢾뢿룀룁룂룃룄룅룆룇룈룉룊룋료룍룎룏룐룑룒룓룔" # noqa: E501 + "룕룖룗룘룙룚룛룜룝룞룟룠룡룢룣룤룥룦룧루룩룪룫룬룭룮룯룰룱룲룳룴룵룶룷룸룹룺룻룼룽룾룿뤀뤁뤂뤃뤄뤅뤆뤇뤈뤉뤊뤋뤌뤍뤎뤏뤐뤑뤒뤓뤔뤕뤖뤗뤘뤙뤚뤛뤜뤝" # noqa: E501 + "뤞뤟뤠뤡뤢뤣뤤뤥뤦뤧뤨뤩뤪뤫뤬뤭뤮뤯뤰뤱뤲뤳뤴뤵뤶뤷뤸뤹뤺뤻뤼뤽뤾뤿륀륁륂륃륄륅륆륇륈륉륊륋륌륍륎륏륐륑륒륓륔륕륖륗류륙륚륛륜륝륞륟률륡륢륣륤륥륦" # noqa: E501 + "륧륨륩륪륫륬륭륮륯륰륱륲륳르륵륶륷른륹륺륻를륽륾륿릀릁릂릃름릅릆릇릈릉릊릋릌릍릎릏릐릑릒릓릔릕릖릗릘릙릚릛릜릝릞릟릠릡릢릣릤릥릦릧릨릩릪릫리릭릮릯" # noqa: E501 + "린릱릲릳릴릵릶릷릸릹릺릻림립릾릿맀링맂맃맄맅맆맇마막맊맋만맍많맏말맑맒맓맔맕맖맗맘맙맚맛맜망맞맟맠맡맢맣매맥맦맧맨맩맪맫맬맭맮맯맰맱맲맳맴맵맶맷맸" # noqa: E501 + "맹맺맻맼맽맾맿먀먁먂먃먄먅먆먇먈먉먊먋먌먍먎먏먐먑먒먓먔먕먖먗먘먙먚먛먜먝먞먟먠먡먢먣먤먥먦먧먨먩먪먫먬먭먮먯먰먱먲먳먴먵먶먷머먹먺먻먼먽먾먿멀멁" # noqa: E501 + "멂멃멄멅멆멇멈멉멊멋멌멍멎멏멐멑멒멓메멕멖멗멘멙멚멛멜멝멞멟멠멡멢멣멤멥멦멧멨멩멪멫멬멭멮멯며멱멲멳면멵멶멷멸멹멺멻멼멽멾멿몀몁몂몃몄명몆몇몈몉몊" # noqa: E501 + "몋몌몍몎몏몐몑몒몓몔몕몖몗몘몙몚몛몜몝몞몟몠몡몢몣몤몥몦몧모목몪몫몬몭몮몯몰몱몲몳몴몵몶몷몸몹몺못몼몽몾몿뫀뫁뫂뫃뫄뫅뫆뫇뫈뫉뫊뫋뫌뫍뫎뫏뫐뫑뫒뫓" # noqa: E501 + "뫔뫕뫖뫗뫘뫙뫚뫛뫜뫝뫞뫟뫠뫡뫢뫣뫤뫥뫦뫧뫨뫩뫪뫫뫬뫭뫮뫯뫰뫱뫲뫳뫴뫵뫶뫷뫸뫹뫺뫻뫼뫽뫾뫿묀묁묂묃묄묅묆묇묈묉묊묋묌묍묎묏묐묑묒묓묔묕묖묗묘묙묚묛묜" # noqa: E501 + "묝묞묟묠묡묢묣묤묥묦묧묨묩묪묫묬묭묮묯묰묱묲묳무묵묶묷문묹묺묻물묽묾묿뭀뭁뭂뭃뭄뭅뭆뭇뭈뭉뭊뭋뭌뭍뭎뭏뭐뭑뭒뭓뭔뭕뭖뭗뭘뭙뭚뭛뭜뭝뭞뭟뭠뭡뭢뭣뭤뭥" # noqa: E501 + "뭦뭧뭨뭩뭪뭫뭬뭭뭮뭯뭰뭱뭲뭳뭴뭵뭶뭷뭸뭹뭺뭻뭼뭽뭾뭿뮀뮁뮂뮃뮄뮅뮆뮇뮈뮉뮊뮋뮌뮍뮎뮏뮐뮑뮒뮓뮔뮕뮖뮗뮘뮙뮚뮛뮜뮝뮞뮟뮠뮡뮢뮣뮤뮥뮦뮧뮨뮩뮪뮫뮬뮭뮮" # noqa: E501 + "뮯뮰뮱뮲뮳뮴뮵뮶뮷뮸뮹뮺뮻뮼뮽뮾뮿므믁믂믃믄믅믆믇믈믉믊믋믌믍믎믏믐믑믒믓믔믕믖믗믘믙믚믛믜믝믞믟믠믡믢믣믤믥믦믧믨믩믪믫믬믭믮믯믰믱믲믳믴믵믶믷" # noqa: E501 + "미믹믺믻민믽믾믿밀밁밂밃밄밅밆밇밈밉밊밋밌밍밎및밐밑밒밓바박밖밗반밙밚받발밝밞밟밠밡밢밣밤밥밦밧밨방밪밫밬밭밮밯배백밲밳밴밵밶밷밸밹밺밻밼밽밾밿뱀" # noqa: E501 + "뱁뱂뱃뱄뱅뱆뱇뱈뱉뱊뱋뱌뱍뱎뱏뱐뱑뱒뱓뱔뱕뱖뱗뱘뱙뱚뱛뱜뱝뱞뱟뱠뱡뱢뱣뱤뱥뱦뱧뱨뱩뱪뱫뱬뱭뱮뱯뱰뱱뱲뱳뱴뱵뱶뱷뱸뱹뱺뱻뱼뱽뱾뱿벀벁벂벃버벅벆벇번벉" # noqa: E501 + "벊벋벌벍벎벏벐벑벒벓범법벖벗벘벙벚벛벜벝벞벟베벡벢벣벤벥벦벧벨벩벪벫벬벭벮벯벰벱벲벳벴벵벶벷벸벹벺벻벼벽벾벿변볁볂볃별볅볆볇볈볉볊볋볌볍볎볏볐병볒" # noqa: E501 + "볓볔볕볖볗볘볙볚볛볜볝볞볟볠볡볢볣볤볥볦볧볨볩볪볫볬볭볮볯볰볱볲볳보복볶볷본볹볺볻볼볽볾볿봀봁봂봃봄봅봆봇봈봉봊봋봌봍봎봏봐봑봒봓봔봕봖봗봘봙봚봛" # noqa: E501 + "봜봝봞봟봠봡봢봣봤봥봦봧봨봩봪봫봬봭봮봯봰봱봲봳봴봵봶봷봸봹봺봻봼봽봾봿뵀뵁뵂뵃뵄뵅뵆뵇뵈뵉뵊뵋뵌뵍뵎뵏뵐뵑뵒뵓뵔뵕뵖뵗뵘뵙뵚뵛뵜뵝뵞뵟뵠뵡뵢뵣뵤" # noqa: E501 + "뵥뵦뵧뵨뵩뵪뵫뵬뵭뵮뵯뵰뵱뵲뵳뵴뵵뵶뵷뵸뵹뵺뵻뵼뵽뵾뵿부북붂붃분붅붆붇불붉붊붋붌붍붎붏붐붑붒붓붔붕붖붗붘붙붚붛붜붝붞붟붠붡붢붣붤붥붦붧붨붩붪붫붬붭" # noqa: E501 + "붮붯붰붱붲붳붴붵붶붷붸붹붺붻붼붽붾붿뷀뷁뷂뷃뷄뷅뷆뷇뷈뷉뷊뷋뷌뷍뷎뷏뷐뷑뷒뷓뷔뷕뷖뷗뷘뷙뷚뷛뷜뷝뷞뷟뷠뷡뷢뷣뷤뷥뷦뷧뷨뷩뷪뷫뷬뷭뷮뷯뷰뷱뷲뷳뷴뷵뷶" # noqa: E501 + "뷷뷸뷹뷺뷻뷼뷽뷾뷿븀븁븂븃븄븅븆븇븈븉븊븋브븍븎븏븐븑븒븓블븕븖븗븘븙븚븛븜븝븞븟븠븡븢븣븤븥븦븧븨븩븪븫븬븭븮븯븰븱븲븳븴븵븶븷븸븹븺븻븼븽븾븿" # noqa: E501 + "빀빁빂빃비빅빆빇빈빉빊빋빌빍빎빏빐빑빒빓빔빕빖빗빘빙빚빛빜빝빞빟빠빡빢빣빤빥빦빧빨빩빪빫빬빭빮빯빰빱빲빳빴빵빶빷빸빹빺빻빼빽빾빿뺀뺁뺂뺃뺄뺅뺆뺇뺈" # noqa: E501 + "뺉뺊뺋뺌뺍뺎뺏뺐뺑뺒뺓뺔뺕뺖뺗뺘뺙뺚뺛뺜뺝뺞뺟뺠뺡뺢뺣뺤뺥뺦뺧뺨뺩뺪뺫뺬뺭뺮뺯뺰뺱뺲뺳뺴뺵뺶뺷뺸뺹뺺뺻뺼뺽뺾뺿뻀뻁뻂뻃뻄뻅뻆뻇뻈뻉뻊뻋뻌뻍뻎뻏뻐뻑" # noqa: E501 + "뻒뻓뻔뻕뻖뻗뻘뻙뻚뻛뻜뻝뻞뻟뻠뻡뻢뻣뻤뻥뻦뻧뻨뻩뻪뻫뻬뻭뻮뻯뻰뻱뻲뻳뻴뻵뻶뻷뻸뻹뻺뻻뻼뻽뻾뻿뼀뼁뼂뼃뼄뼅뼆뼇뼈뼉뼊뼋뼌뼍뼎뼏뼐뼑뼒뼓뼔뼕뼖뼗뼘뼙뼚" # noqa: E501 + "뼛뼜뼝뼞뼟뼠뼡뼢뼣뼤뼥뼦뼧뼨뼩뼪뼫뼬뼭뼮뼯뼰뼱뼲뼳뼴뼵뼶뼷뼸뼹뼺뼻뼼뼽뼾뼿뽀뽁뽂뽃뽄뽅뽆뽇뽈뽉뽊뽋뽌뽍뽎뽏뽐뽑뽒뽓뽔뽕뽖뽗뽘뽙뽚뽛뽜뽝뽞뽟뽠뽡뽢뽣" # noqa: E501 + "뽤뽥뽦뽧뽨뽩뽪뽫뽬뽭뽮뽯뽰뽱뽲뽳뽴뽵뽶뽷뽸뽹뽺뽻뽼뽽뽾뽿뾀뾁뾂뾃뾄뾅뾆뾇뾈뾉뾊뾋뾌뾍뾎뾏뾐뾑뾒뾓뾔뾕뾖뾗뾘뾙뾚뾛뾜뾝뾞뾟뾠뾡뾢뾣뾤뾥뾦뾧뾨뾩뾪뾫뾬" # noqa: E501 + "뾭뾮뾯뾰뾱뾲뾳뾴뾵뾶뾷뾸뾹뾺뾻뾼뾽뾾뾿뿀뿁뿂뿃뿄뿅뿆뿇뿈뿉뿊뿋뿌뿍뿎뿏뿐뿑뿒뿓뿔뿕뿖뿗뿘뿙뿚뿛뿜뿝뿞뿟뿠뿡뿢뿣뿤뿥뿦뿧뿨뿩뿪뿫뿬뿭뿮뿯뿰뿱뿲뿳뿴뿵" # noqa: E501 + "뿶뿷뿸뿹뿺뿻뿼뿽뿾뿿쀀쀁쀂쀃쀄쀅쀆쀇쀈쀉쀊쀋쀌쀍쀎쀏쀐쀑쀒쀓쀔쀕쀖쀗쀘쀙쀚쀛쀜쀝쀞쀟쀠쀡쀢쀣쀤쀥쀦쀧쀨쀩쀪쀫쀬쀭쀮쀯쀰쀱쀲쀳쀴쀵쀶쀷쀸쀹쀺쀻쀼쀽쀾" # noqa: E501 + "쀿쁀쁁쁂쁃쁄쁅쁆쁇쁈쁉쁊쁋쁌쁍쁎쁏쁐쁑쁒쁓쁔쁕쁖쁗쁘쁙쁚쁛쁜쁝쁞쁟쁠쁡쁢쁣쁤쁥쁦쁧쁨쁩쁪쁫쁬쁭쁮쁯쁰쁱쁲쁳쁴쁵쁶쁷쁸쁹쁺쁻쁼쁽쁾쁿삀삁삂삃삄삅삆삇" # noqa: E501 + "삈삉삊삋삌삍삎삏삐삑삒삓삔삕삖삗삘삙삚삛삜삝삞삟삠삡삢삣삤삥삦삧삨삩삪삫사삭삮삯산삱삲삳살삵삶삷삸삹삺삻삼삽삾삿샀상샂샃샄샅샆샇새색샊샋샌샍샎샏샐" # noqa: E501 + "샑샒샓샔샕샖샗샘샙샚샛샜생샞샟샠샡샢샣샤샥샦샧샨샩샪샫샬샭샮샯샰샱샲샳샴샵샶샷샸샹샺샻샼샽샾샿섀섁섂섃섄섅섆섇섈섉섊섋섌섍섎섏섐섑섒섓섔섕섖섗섘섙" # noqa: E501 + "섚섛서석섞섟선섡섢섣설섥섦섧섨섩섪섫섬섭섮섯섰성섲섳섴섵섶섷세섹섺섻센섽섾섿셀셁셂셃셄셅셆셇셈셉셊셋셌셍셎셏셐셑셒셓셔셕셖셗션셙셚셛셜셝셞셟셠셡셢" # noqa: E501 + "셣셤셥셦셧셨셩셪셫셬셭셮셯셰셱셲셳셴셵셶셷셸셹셺셻셼셽셾셿솀솁솂솃솄솅솆솇솈솉솊솋소속솎솏손솑솒솓솔솕솖솗솘솙솚솛솜솝솞솟솠송솢솣솤솥솦솧솨솩솪솫" # noqa: E501 + "솬솭솮솯솰솱솲솳솴솵솶솷솸솹솺솻솼솽솾솿쇀쇁쇂쇃쇄쇅쇆쇇쇈쇉쇊쇋쇌쇍쇎쇏쇐쇑쇒쇓쇔쇕쇖쇗쇘쇙쇚쇛쇜쇝쇞쇟쇠쇡쇢쇣쇤쇥쇦쇧쇨쇩쇪쇫쇬쇭쇮쇯쇰쇱쇲쇳쇴" # noqa: E501 + "쇵쇶쇷쇸쇹쇺쇻쇼쇽쇾쇿숀숁숂숃숄숅숆숇숈숉숊숋숌숍숎숏숐숑숒숓숔숕숖숗수숙숚숛순숝숞숟술숡숢숣숤숥숦숧숨숩숪숫숬숭숮숯숰숱숲숳숴숵숶숷숸숹숺숻숼숽" # noqa: E501 + "숾숿쉀쉁쉂쉃쉄쉅쉆쉇쉈쉉쉊쉋쉌쉍쉎쉏쉐쉑쉒쉓쉔쉕쉖쉗쉘쉙쉚쉛쉜쉝쉞쉟쉠쉡쉢쉣쉤쉥쉦쉧쉨쉩쉪쉫쉬쉭쉮쉯쉰쉱쉲쉳쉴쉵쉶쉷쉸쉹쉺쉻쉼쉽쉾쉿슀슁슂슃슄슅슆" # noqa: E501 + "슇슈슉슊슋슌슍슎슏슐슑슒슓슔슕슖슗슘슙슚슛슜슝슞슟슠슡슢슣스슥슦슧슨슩슪슫슬슭슮슯슰슱슲슳슴습슶슷슸승슺슻슼슽슾슿싀싁싂싃싄싅싆싇싈싉싊싋싌싍싎싏" # noqa: E501 + "싐싑싒싓싔싕싖싗싘싙싚싛시식싞싟신싡싢싣실싥싦싧싨싩싪싫심십싮싯싰싱싲싳싴싵싶싷싸싹싺싻싼싽싾싿쌀쌁쌂쌃쌄쌅쌆쌇쌈쌉쌊쌋쌌쌍쌎쌏쌐쌑쌒쌓쌔쌕쌖쌗쌘" # noqa: E501 + "쌙쌚쌛쌜쌝쌞쌟쌠쌡쌢쌣쌤쌥쌦쌧쌨쌩쌪쌫쌬쌭쌮쌯쌰쌱쌲쌳쌴쌵쌶쌷쌸쌹쌺쌻쌼쌽쌾쌿썀썁썂썃썄썅썆썇썈썉썊썋썌썍썎썏썐썑썒썓썔썕썖썗썘썙썚썛썜썝썞썟썠썡" # noqa: E501 + "썢썣썤썥썦썧써썩썪썫썬썭썮썯썰썱썲썳썴썵썶썷썸썹썺썻썼썽썾썿쎀쎁쎂쎃쎄쎅쎆쎇쎈쎉쎊쎋쎌쎍쎎쎏쎐쎑쎒쎓쎔쎕쎖쎗쎘쎙쎚쎛쎜쎝쎞쎟쎠쎡쎢쎣쎤쎥쎦쎧쎨쎩쎪" # noqa: E501 + "쎫쎬쎭쎮쎯쎰쎱쎲쎳쎴쎵쎶쎷쎸쎹쎺쎻쎼쎽쎾쎿쏀쏁쏂쏃쏄쏅쏆쏇쏈쏉쏊쏋쏌쏍쏎쏏쏐쏑쏒쏓쏔쏕쏖쏗쏘쏙쏚쏛쏜쏝쏞쏟쏠쏡쏢쏣쏤쏥쏦쏧쏨쏩쏪쏫쏬쏭쏮쏯쏰쏱쏲쏳" # noqa: E501 + "쏴쏵쏶쏷쏸쏹쏺쏻쏼쏽쏾쏿쐀쐁쐂쐃쐄쐅쐆쐇쐈쐉쐊쐋쐌쐍쐎쐏쐐쐑쐒쐓쐔쐕쐖쐗쐘쐙쐚쐛쐜쐝쐞쐟쐠쐡쐢쐣쐤쐥쐦쐧쐨쐩쐪쐫쐬쐭쐮쐯쐰쐱쐲쐳쐴쐵쐶쐷쐸쐹쐺쐻쐼" # noqa: E501 + "쐽쐾쐿쑀쑁쑂쑃쑄쑅쑆쑇쑈쑉쑊쑋쑌쑍쑎쑏쑐쑑쑒쑓쑔쑕쑖쑗쑘쑙쑚쑛쑜쑝쑞쑟쑠쑡쑢쑣쑤쑥쑦쑧쑨쑩쑪쑫쑬쑭쑮쑯쑰쑱쑲쑳쑴쑵쑶쑷쑸쑹쑺쑻쑼쑽쑾쑿쒀쒁쒂쒃쒄쒅" # noqa: E501 + "쒆쒇쒈쒉쒊쒋쒌쒍쒎쒏쒐쒑쒒쒓쒔쒕쒖쒗쒘쒙쒚쒛쒜쒝쒞쒟쒠쒡쒢쒣쒤쒥쒦쒧쒨쒩쒪쒫쒬쒭쒮쒯쒰쒱쒲쒳쒴쒵쒶쒷쒸쒹쒺쒻쒼쒽쒾쒿쓀쓁쓂쓃쓄쓅쓆쓇쓈쓉쓊쓋쓌쓍쓎" # noqa: E501 + "쓏쓐쓑쓒쓓쓔쓕쓖쓗쓘쓙쓚쓛쓜쓝쓞쓟쓠쓡쓢쓣쓤쓥쓦쓧쓨쓩쓪쓫쓬쓭쓮쓯쓰쓱쓲쓳쓴쓵쓶쓷쓸쓹쓺쓻쓼쓽쓾쓿씀씁씂씃씄씅씆씇씈씉씊씋씌씍씎씏씐씑씒씓씔씕씖씗" # noqa: E501 + "씘씙씚씛씜씝씞씟씠씡씢씣씤씥씦씧씨씩씪씫씬씭씮씯씰씱씲씳씴씵씶씷씸씹씺씻씼씽씾씿앀앁앂앃아악앆앇안앉않앋알앍앎앏앐앑앒앓암압앖앗았앙앚앛앜앝앞앟애" # noqa: E501 + "액앢앣앤앥앦앧앨앩앪앫앬앭앮앯앰앱앲앳앴앵앶앷앸앹앺앻야약앾앿얀얁얂얃얄얅얆얇얈얉얊얋얌얍얎얏얐양얒얓얔얕얖얗얘얙얚얛얜얝얞얟얠얡얢얣얤얥얦얧얨얩" # noqa: E501 + "얪얫얬얭얮얯얰얱얲얳어억얶얷언얹얺얻얼얽얾얿엀엁엂엃엄업없엇었엉엊엋엌엍엎엏에엑엒엓엔엕엖엗엘엙엚엛엜엝엞엟엠엡엢엣엤엥엦엧엨엩엪엫여역엮엯연엱엲" # noqa: E501 + "엳열엵엶엷엸엹엺엻염엽엾엿였영옂옃옄옅옆옇예옉옊옋옌옍옎옏옐옑옒옓옔옕옖옗옘옙옚옛옜옝옞옟옠옡옢옣오옥옦옧온옩옪옫올옭옮옯옰옱옲옳옴옵옶옷옸옹옺옻" # noqa: E501 + "옼옽옾옿와왁왂왃완왅왆왇왈왉왊왋왌왍왎왏왐왑왒왓왔왕왖왗왘왙왚왛왜왝왞왟왠왡왢왣왤왥왦왧왨왩왪왫왬왭왮왯왰왱왲왳왴왵왶왷외왹왺왻왼왽왾왿욀욁욂욃욄" # noqa: E501 + "욅욆욇욈욉욊욋욌욍욎욏욐욑욒욓요욕욖욗욘욙욚욛욜욝욞욟욠욡욢욣욤욥욦욧욨용욪욫욬욭욮욯우욱욲욳운욵욶욷울욹욺욻욼욽욾욿움웁웂웃웄웅웆웇웈웉웊웋워웍" # noqa: E501 + "웎웏원웑웒웓월웕웖웗웘웙웚웛웜웝웞웟웠웡웢웣웤웥웦웧웨웩웪웫웬웭웮웯웰웱웲웳웴웵웶웷웸웹웺웻웼웽웾웿윀윁윂윃위윅윆윇윈윉윊윋윌윍윎윏윐윑윒윓윔윕윖" # noqa: E501 + "윗윘윙윚윛윜윝윞윟유육윢윣윤윥윦윧율윩윪윫윬윭윮윯윰윱윲윳윴융윶윷윸윹윺윻으윽윾윿은읁읂읃을읅읆읇읈읉읊읋음읍읎읏읐응읒읓읔읕읖읗의읙읚읛읜읝읞읟" # noqa: E501 + "읠읡읢읣읤읥읦읧읨읩읪읫읬읭읮읯읰읱읲읳이익읶읷인읹읺읻일읽읾읿잀잁잂잃임입잆잇있잉잊잋잌잍잎잏자작잒잓잔잕잖잗잘잙잚잛잜잝잞잟잠잡잢잣잤장잦잧잨" # noqa: E501 + "잩잪잫재잭잮잯잰잱잲잳잴잵잶잷잸잹잺잻잼잽잾잿쟀쟁쟂쟃쟄쟅쟆쟇쟈쟉쟊쟋쟌쟍쟎쟏쟐쟑쟒쟓쟔쟕쟖쟗쟘쟙쟚쟛쟜쟝쟞쟟쟠쟡쟢쟣쟤쟥쟦쟧쟨쟩쟪쟫쟬쟭쟮쟯쟰쟱" # noqa: E501 + "쟲쟳쟴쟵쟶쟷쟸쟹쟺쟻쟼쟽쟾쟿저적젂젃전젅젆젇절젉젊젋젌젍젎젏점접젒젓젔정젖젗젘젙젚젛제젝젞젟젠젡젢젣젤젥젦젧젨젩젪젫젬젭젮젯젰젱젲젳젴젵젶젷져젹젺" # noqa: E501 + "젻젼젽젾젿졀졁졂졃졄졅졆졇졈졉졊졋졌졍졎졏졐졑졒졓졔졕졖졗졘졙졚졛졜졝졞졟졠졡졢졣졤졥졦졧졨졩졪졫졬졭졮졯조족졲졳존졵졶졷졸졹졺졻졼졽졾졿좀좁좂좃" # noqa: E501 + "좄종좆좇좈좉좊좋좌좍좎좏좐좑좒좓좔좕좖좗좘좙좚좛좜좝좞좟좠좡좢좣좤좥좦좧좨좩좪좫좬좭좮좯좰좱좲좳좴좵좶좷좸좹좺좻좼좽좾좿죀죁죂죃죄죅죆죇죈죉죊죋죌" # noqa: E501 + "죍죎죏죐죑죒죓죔죕죖죗죘죙죚죛죜죝죞죟죠죡죢죣죤죥죦죧죨죩죪죫죬죭죮죯죰죱죲죳죴죵죶죷죸죹죺죻주죽죾죿준줁줂줃줄줅줆줇줈줉줊줋줌줍줎줏줐중줒줓줔줕" # noqa: E501 + "줖줗줘줙줚줛줜줝줞줟줠줡줢줣줤줥줦줧줨줩줪줫줬줭줮줯줰줱줲줳줴줵줶줷줸줹줺줻줼줽줾줿쥀쥁쥂쥃쥄쥅쥆쥇쥈쥉쥊쥋쥌쥍쥎쥏쥐쥑쥒쥓쥔쥕쥖쥗쥘쥙쥚쥛쥜쥝쥞" # noqa: E501 + "쥟쥠쥡쥢쥣쥤쥥쥦쥧쥨쥩쥪쥫쥬쥭쥮쥯쥰쥱쥲쥳쥴쥵쥶쥷쥸쥹쥺쥻쥼쥽쥾쥿즀즁즂즃즄즅즆즇즈즉즊즋즌즍즎즏즐즑즒즓즔즕즖즗즘즙즚즛즜증즞즟즠즡즢즣즤즥즦즧" # noqa: E501 + "즨즩즪즫즬즭즮즯즰즱즲즳즴즵즶즷즸즹즺즻즼즽즾즿지직짂짃진짅짆짇질짉짊짋짌짍짎짏짐집짒짓짔징짖짗짘짙짚짛짜짝짞짟짠짡짢짣짤짥짦짧짨짩짪짫짬짭짮짯짰" # noqa: E501 + "짱짲짳짴짵짶짷째짹짺짻짼짽짾짿쨀쨁쨂쨃쨄쨅쨆쨇쨈쨉쨊쨋쨌쨍쨎쨏쨐쨑쨒쨓쨔쨕쨖쨗쨘쨙쨚쨛쨜쨝쨞쨟쨠쨡쨢쨣쨤쨥쨦쨧쨨쨩쨪쨫쨬쨭쨮쨯쨰쨱쨲쨳쨴쨵쨶쨷쨸쨹" # noqa: E501 + "쨺쨻쨼쨽쨾쨿쩀쩁쩂쩃쩄쩅쩆쩇쩈쩉쩊쩋쩌쩍쩎쩏쩐쩑쩒쩓쩔쩕쩖쩗쩘쩙쩚쩛쩜쩝쩞쩟쩠쩡쩢쩣쩤쩥쩦쩧쩨쩩쩪쩫쩬쩭쩮쩯쩰쩱쩲쩳쩴쩵쩶쩷쩸쩹쩺쩻쩼쩽쩾쩿쪀쪁쪂" # noqa: E501 + "쪃쪄쪅쪆쪇쪈쪉쪊쪋쪌쪍쪎쪏쪐쪑쪒쪓쪔쪕쪖쪗쪘쪙쪚쪛쪜쪝쪞쪟쪠쪡쪢쪣쪤쪥쪦쪧쪨쪩쪪쪫쪬쪭쪮쪯쪰쪱쪲쪳쪴쪵쪶쪷쪸쪹쪺쪻쪼쪽쪾쪿쫀쫁쫂쫃쫄쫅쫆쫇쫈쫉쫊쫋" # noqa: E501 + "쫌쫍쫎쫏쫐쫑쫒쫓쫔쫕쫖쫗쫘쫙쫚쫛쫜쫝쫞쫟쫠쫡쫢쫣쫤쫥쫦쫧쫨쫩쫪쫫쫬쫭쫮쫯쫰쫱쫲쫳쫴쫵쫶쫷쫸쫹쫺쫻쫼쫽쫾쫿쬀쬁쬂쬃쬄쬅쬆쬇쬈쬉쬊쬋쬌쬍쬎쬏쬐쬑쬒쬓쬔" # noqa: E501 + "쬕쬖쬗쬘쬙쬚쬛쬜쬝쬞쬟쬠쬡쬢쬣쬤쬥쬦쬧쬨쬩쬪쬫쬬쬭쬮쬯쬰쬱쬲쬳쬴쬵쬶쬷쬸쬹쬺쬻쬼쬽쬾쬿쭀쭁쭂쭃쭄쭅쭆쭇쭈쭉쭊쭋쭌쭍쭎쭏쭐쭑쭒쭓쭔쭕쭖쭗쭘쭙쭚쭛쭜쭝" # noqa: E501 + "쭞쭟쭠쭡쭢쭣쭤쭥쭦쭧쭨쭩쭪쭫쭬쭭쭮쭯쭰쭱쭲쭳쭴쭵쭶쭷쭸쭹쭺쭻쭼쭽쭾쭿쮀쮁쮂쮃쮄쮅쮆쮇쮈쮉쮊쮋쮌쮍쮎쮏쮐쮑쮒쮓쮔쮕쮖쮗쮘쮙쮚쮛쮜쮝쮞쮟쮠쮡쮢쮣쮤쮥쮦" # noqa: E501 + "쮧쮨쮩쮪쮫쮬쮭쮮쮯쮰쮱쮲쮳쮴쮵쮶쮷쮸쮹쮺쮻쮼쮽쮾쮿쯀쯁쯂쯃쯄쯅쯆쯇쯈쯉쯊쯋쯌쯍쯎쯏쯐쯑쯒쯓쯔쯕쯖쯗쯘쯙쯚쯛쯜쯝쯞쯟쯠쯡쯢쯣쯤쯥쯦쯧쯨쯩쯪쯫쯬쯭쯮쯯" # noqa: E501 + "쯰쯱쯲쯳쯴쯵쯶쯷쯸쯹쯺쯻쯼쯽쯾쯿찀찁찂찃찄찅찆찇찈찉찊찋찌찍찎찏찐찑찒찓찔찕찖찗찘찙찚찛찜찝찞찟찠찡찢찣찤찥찦찧차착찪찫찬찭찮찯찰찱찲찳찴찵찶찷참" # noqa: E501 + "찹찺찻찼창찾찿챀챁챂챃채책챆챇챈챉챊챋챌챍챎챏챐챑챒챓챔챕챖챗챘챙챚챛챜챝챞챟챠챡챢챣챤챥챦챧챨챩챪챫챬챭챮챯챰챱챲챳챴챵챶챷챸챹챺챻챼챽챾챿첀첁" # noqa: E501 + "첂첃첄첅첆첇첈첉첊첋첌첍첎첏첐첑첒첓첔첕첖첗처척첚첛천첝첞첟철첡첢첣첤첥첦첧첨첩첪첫첬청첮첯첰첱첲첳체첵첶첷첸첹첺첻첼첽첾첿쳀쳁쳂쳃쳄쳅쳆쳇쳈쳉쳊" # noqa: E501 + "쳋쳌쳍쳎쳏쳐쳑쳒쳓쳔쳕쳖쳗쳘쳙쳚쳛쳜쳝쳞쳟쳠쳡쳢쳣쳤쳥쳦쳧쳨쳩쳪쳫쳬쳭쳮쳯쳰쳱쳲쳳쳴쳵쳶쳷쳸쳹쳺쳻쳼쳽쳾쳿촀촁촂촃촄촅촆촇초촉촊촋촌촍촎촏촐촑촒촓" # noqa: E501 + "촔촕촖촗촘촙촚촛촜총촞촟촠촡촢촣촤촥촦촧촨촩촪촫촬촭촮촯촰촱촲촳촴촵촶촷촸촹촺촻촼촽촾촿쵀쵁쵂쵃쵄쵅쵆쵇쵈쵉쵊쵋쵌쵍쵎쵏쵐쵑쵒쵓쵔쵕쵖쵗쵘쵙쵚쵛최" # noqa: E501 + "쵝쵞쵟쵠쵡쵢쵣쵤쵥쵦쵧쵨쵩쵪쵫쵬쵭쵮쵯쵰쵱쵲쵳쵴쵵쵶쵷쵸쵹쵺쵻쵼쵽쵾쵿춀춁춂춃춄춅춆춇춈춉춊춋춌춍춎춏춐춑춒춓추축춖춗춘춙춚춛출춝춞춟춠춡춢춣춤춥" # noqa: E501 + "춦춧춨충춪춫춬춭춮춯춰춱춲춳춴춵춶춷춸춹춺춻춼춽춾춿췀췁췂췃췄췅췆췇췈췉췊췋췌췍췎췏췐췑췒췓췔췕췖췗췘췙췚췛췜췝췞췟췠췡췢췣췤췥췦췧취췩췪췫췬췭췮" # noqa: E501 + "췯췰췱췲췳췴췵췶췷췸췹췺췻췼췽췾췿츀츁츂츃츄츅츆츇츈츉츊츋츌츍츎츏츐츑츒츓츔츕츖츗츘츙츚츛츜츝츞츟츠측츢츣츤츥츦츧츨츩츪츫츬츭츮츯츰츱츲츳츴층츶츷" # noqa: E501 + "츸츹츺츻츼츽츾츿칀칁칂칃칄칅칆칇칈칉칊칋칌칍칎칏칐칑칒칓칔칕칖칗치칙칚칛친칝칞칟칠칡칢칣칤칥칦칧침칩칪칫칬칭칮칯칰칱칲칳카칵칶칷칸칹칺칻칼칽칾칿캀" # noqa: E501 + "캁캂캃캄캅캆캇캈캉캊캋캌캍캎캏캐캑캒캓캔캕캖캗캘캙캚캛캜캝캞캟캠캡캢캣캤캥캦캧캨캩캪캫캬캭캮캯캰캱캲캳캴캵캶캷캸캹캺캻캼캽캾캿컀컁컂컃컄컅컆컇컈컉" # noqa: E501 + "컊컋컌컍컎컏컐컑컒컓컔컕컖컗컘컙컚컛컜컝컞컟컠컡컢컣커컥컦컧컨컩컪컫컬컭컮컯컰컱컲컳컴컵컶컷컸컹컺컻컼컽컾컿케켁켂켃켄켅켆켇켈켉켊켋켌켍켎켏켐켑켒" # noqa: E501 + "켓켔켕켖켗켘켙켚켛켜켝켞켟켠켡켢켣켤켥켦켧켨켩켪켫켬켭켮켯켰켱켲켳켴켵켶켷켸켹켺켻켼켽켾켿콀콁콂콃콄콅콆콇콈콉콊콋콌콍콎콏콐콑콒콓코콕콖콗콘콙콚콛" # noqa: E501 + "콜콝콞콟콠콡콢콣콤콥콦콧콨콩콪콫콬콭콮콯콰콱콲콳콴콵콶콷콸콹콺콻콼콽콾콿쾀쾁쾂쾃쾄쾅쾆쾇쾈쾉쾊쾋쾌쾍쾎쾏쾐쾑쾒쾓쾔쾕쾖쾗쾘쾙쾚쾛쾜쾝쾞쾟쾠쾡쾢쾣쾤" # noqa: E501 + "쾥쾦쾧쾨쾩쾪쾫쾬쾭쾮쾯쾰쾱쾲쾳쾴쾵쾶쾷쾸쾹쾺쾻쾼쾽쾾쾿쿀쿁쿂쿃쿄쿅쿆쿇쿈쿉쿊쿋쿌쿍쿎쿏쿐쿑쿒쿓쿔쿕쿖쿗쿘쿙쿚쿛쿜쿝쿞쿟쿠쿡쿢쿣쿤쿥쿦쿧쿨쿩쿪쿫쿬쿭" # noqa: E501 + "쿮쿯쿰쿱쿲쿳쿴쿵쿶쿷쿸쿹쿺쿻쿼쿽쿾쿿퀀퀁퀂퀃퀄퀅퀆퀇퀈퀉퀊퀋퀌퀍퀎퀏퀐퀑퀒퀓퀔퀕퀖퀗퀘퀙퀚퀛퀜퀝퀞퀟퀠퀡퀢퀣퀤퀥퀦퀧퀨퀩퀪퀫퀬퀭퀮퀯퀰퀱퀲퀳퀴퀵퀶" # noqa: E501 + "퀷퀸퀹퀺퀻퀼퀽퀾퀿큀큁큂큃큄큅큆큇큈큉큊큋큌큍큎큏큐큑큒큓큔큕큖큗큘큙큚큛큜큝큞큟큠큡큢큣큤큥큦큧큨큩큪큫크큭큮큯큰큱큲큳클큵큶큷큸큹큺큻큼큽큾큿" # noqa: E501 + "킀킁킂킃킄킅킆킇킈킉킊킋킌킍킎킏킐킑킒킓킔킕킖킗킘킙킚킛킜킝킞킟킠킡킢킣키킥킦킧킨킩킪킫킬킭킮킯킰킱킲킳킴킵킶킷킸킹킺킻킼킽킾킿타탁탂탃탄탅탆탇탈" # noqa: E501 + "탉탊탋탌탍탎탏탐탑탒탓탔탕탖탗탘탙탚탛태택탞탟탠탡탢탣탤탥탦탧탨탩탪탫탬탭탮탯탰탱탲탳탴탵탶탷탸탹탺탻탼탽탾탿턀턁턂턃턄턅턆턇턈턉턊턋턌턍턎턏턐턑" # noqa: E501 + "턒턓턔턕턖턗턘턙턚턛턜턝턞턟턠턡턢턣턤턥턦턧턨턩턪턫턬턭턮턯터턱턲턳턴턵턶턷털턹턺턻턼턽턾턿텀텁텂텃텄텅텆텇텈텉텊텋테텍텎텏텐텑텒텓텔텕텖텗텘텙텚" # noqa: E501 + "텛템텝텞텟텠텡텢텣텤텥텦텧텨텩텪텫텬텭텮텯텰텱텲텳텴텵텶텷텸텹텺텻텼텽텾텿톀톁톂톃톄톅톆톇톈톉톊톋톌톍톎톏톐톑톒톓톔톕톖톗톘톙톚톛톜톝톞톟토톡톢톣" # noqa: E501 + "톤톥톦톧톨톩톪톫톬톭톮톯톰톱톲톳톴통톶톷톸톹톺톻톼톽톾톿퇀퇁퇂퇃퇄퇅퇆퇇퇈퇉퇊퇋퇌퇍퇎퇏퇐퇑퇒퇓퇔퇕퇖퇗퇘퇙퇚퇛퇜퇝퇞퇟퇠퇡퇢퇣퇤퇥퇦퇧퇨퇩퇪퇫퇬" # noqa: E501 + "퇭퇮퇯퇰퇱퇲퇳퇴퇵퇶퇷퇸퇹퇺퇻퇼퇽퇾퇿툀툁툂툃툄툅툆툇툈툉툊툋툌툍툎툏툐툑툒툓툔툕툖툗툘툙툚툛툜툝툞툟툠툡툢툣툤툥툦툧툨툩툪툫투툭툮툯툰툱툲툳툴툵" # noqa: E501 + "툶툷툸툹툺툻툼툽툾툿퉀퉁퉂퉃퉄퉅퉆퉇퉈퉉퉊퉋퉌퉍퉎퉏퉐퉑퉒퉓퉔퉕퉖퉗퉘퉙퉚퉛퉜퉝퉞퉟퉠퉡퉢퉣퉤퉥퉦퉧퉨퉩퉪퉫퉬퉭퉮퉯퉰퉱퉲퉳퉴퉵퉶퉷퉸퉹퉺퉻퉼퉽퉾" # noqa: E501 + "퉿튀튁튂튃튄튅튆튇튈튉튊튋튌튍튎튏튐튑튒튓튔튕튖튗튘튙튚튛튜튝튞튟튠튡튢튣튤튥튦튧튨튩튪튫튬튭튮튯튰튱튲튳튴튵튶튷트특튺튻튼튽튾튿틀틁틂틃틄틅틆틇" # noqa: E501 + "틈틉틊틋틌틍틎틏틐틑틒틓틔틕틖틗틘틙틚틛틜틝틞틟틠틡틢틣틤틥틦틧틨틩틪틫틬틭틮틯티틱틲틳틴틵틶틷틸틹틺틻틼틽틾틿팀팁팂팃팄팅팆팇팈팉팊팋파팍팎팏판" # noqa: E501 + "팑팒팓팔팕팖팗팘팙팚팛팜팝팞팟팠팡팢팣팤팥팦팧패팩팪팫팬팭팮팯팰팱팲팳팴팵팶팷팸팹팺팻팼팽팾팿퍀퍁퍂퍃퍄퍅퍆퍇퍈퍉퍊퍋퍌퍍퍎퍏퍐퍑퍒퍓퍔퍕퍖퍗퍘퍙" # noqa: E501 + "퍚퍛퍜퍝퍞퍟퍠퍡퍢퍣퍤퍥퍦퍧퍨퍩퍪퍫퍬퍭퍮퍯퍰퍱퍲퍳퍴퍵퍶퍷퍸퍹퍺퍻퍼퍽퍾퍿펀펁펂펃펄펅펆펇펈펉펊펋펌펍펎펏펐펑펒펓펔펕펖펗페펙펚펛펜펝펞펟펠펡펢" # noqa: E501 + "펣펤펥펦펧펨펩펪펫펬펭펮펯펰펱펲펳펴펵펶펷편펹펺펻펼펽펾펿폀폁폂폃폄폅폆폇폈평폊폋폌폍폎폏폐폑폒폓폔폕폖폗폘폙폚폛폜폝폞폟폠폡폢폣폤폥폦폧폨폩폪폫" # noqa: E501 + "포폭폮폯폰폱폲폳폴폵폶폷폸폹폺폻폼폽폾폿퐀퐁퐂퐃퐄퐅퐆퐇퐈퐉퐊퐋퐌퐍퐎퐏퐐퐑퐒퐓퐔퐕퐖퐗퐘퐙퐚퐛퐜퐝퐞퐟퐠퐡퐢퐣퐤퐥퐦퐧퐨퐩퐪퐫퐬퐭퐮퐯퐰퐱퐲퐳퐴" # noqa: E501 + "퐵퐶퐷퐸퐹퐺퐻퐼퐽퐾퐿푀푁푂푃푄푅푆푇푈푉푊푋푌푍푎푏푐푑푒푓푔푕푖푗푘푙푚푛표푝푞푟푠푡푢푣푤푥푦푧푨푩푪푫푬푭푮푯푰푱푲푳푴푵푶푷푸푹푺푻푼푽" # noqa: E501 + "푾푿풀풁풂풃풄풅풆풇품풉풊풋풌풍풎풏풐풑풒풓풔풕풖풗풘풙풚풛풜풝풞풟풠풡풢풣풤풥풦풧풨풩풪풫풬풭풮풯풰풱풲풳풴풵풶풷풸풹풺풻풼풽풾풿퓀퓁퓂퓃퓄퓅퓆" # noqa: E501 + "퓇퓈퓉퓊퓋퓌퓍퓎퓏퓐퓑퓒퓓퓔퓕퓖퓗퓘퓙퓚퓛퓜퓝퓞퓟퓠퓡퓢퓣퓤퓥퓦퓧퓨퓩퓪퓫퓬퓭퓮퓯퓰퓱퓲퓳퓴퓵퓶퓷퓸퓹퓺퓻퓼퓽퓾퓿픀픁픂픃프픅픆픇픈픉픊픋플픍픎픏" # noqa: E501 + "픐픑픒픓픔픕픖픗픘픙픚픛픜픝픞픟픠픡픢픣픤픥픦픧픨픩픪픫픬픭픮픯픰픱픲픳픴픵픶픷픸픹픺픻피픽픾픿핀핁핂핃필핅핆핇핈핉핊핋핌핍핎핏핐핑핒핓핔핕핖핗하" # noqa: E501 + "학핚핛한핝핞핟할핡핢핣핤핥핦핧함합핪핫핬항핮핯핰핱핲핳해핵핶핷핸핹핺핻핼핽핾핿햀햁햂햃햄햅햆햇했행햊햋햌햍햎햏햐햑햒햓햔햕햖햗햘햙햚햛햜햝햞햟햠햡" # noqa: E501 + "햢햣햤향햦햧햨햩햪햫햬햭햮햯햰햱햲햳햴햵햶햷햸햹햺햻햼햽햾햿헀헁헂헃헄헅헆헇허헉헊헋헌헍헎헏헐헑헒헓헔헕헖헗험헙헚헛헜헝헞헟헠헡헢헣헤헥헦헧헨헩헪" # noqa: E501 + "헫헬헭헮헯헰헱헲헳헴헵헶헷헸헹헺헻헼헽헾헿혀혁혂혃현혅혆혇혈혉혊혋혌혍혎혏혐협혒혓혔형혖혗혘혙혚혛혜혝혞혟혠혡혢혣혤혥혦혧혨혩혪혫혬혭혮혯혰혱혲혳" # noqa: E501 + "혴혵혶혷호혹혺혻혼혽혾혿홀홁홂홃홄홅홆홇홈홉홊홋홌홍홎홏홐홑홒홓화확홖홗환홙홚홛활홝홞홟홠홡홢홣홤홥홦홧홨황홪홫홬홭홮홯홰홱홲홳홴홵홶홷홸홹홺홻홼" # noqa: E501 + "홽홾홿횀횁횂횃횄횅횆횇횈횉횊횋회획횎횏횐횑횒횓횔횕횖횗횘횙횚횛횜횝횞횟횠횡횢횣횤횥횦횧효횩횪횫횬횭횮횯횰횱횲횳횴횵횶횷횸횹횺횻횼횽횾횿훀훁훂훃후훅" # noqa: E501 + "훆훇훈훉훊훋훌훍훎훏훐훑훒훓훔훕훖훗훘훙훚훛훜훝훞훟훠훡훢훣훤훥훦훧훨훩훪훫훬훭훮훯훰훱훲훳훴훵훶훷훸훹훺훻훼훽훾훿휀휁휂휃휄휅휆휇휈휉휊휋휌휍휎" # noqa: E501 + "휏휐휑휒휓휔휕휖휗휘휙휚휛휜휝휞휟휠휡휢휣휤휥휦휧휨휩휪휫휬휭휮휯휰휱휲휳휴휵휶휷휸휹휺휻휼휽휾휿흀흁흂흃흄흅흆흇흈흉흊흋흌흍흎흏흐흑흒흓흔흕흖흗" # noqa: E501 + "흘흙흚흛흜흝흞흟흠흡흢흣흤흥흦흧흨흩흪흫희흭흮흯흰흱흲흳흴흵흶흷흸흹흺흻흼흽흾흿힀힁힂힃힄힅힆힇히힉힊힋힌힍힎힏힐힑힒힓힔힕힖힗힘힙힚힛힜힝힞힟힠" # noqa: E501 + "힡힢힣" + _BASE_VOCABS["punctuation"] + "。・〜°—、「」『』【】゛》《〉〈" # punctuation + _BASE_VOCABS["currency"] + "₩" ) VOCABS["simplified_chinese"] = ( _BASE_VOCABS["digits"] + "㐀㐁㐂㐃㐄㐅㐆㐇㐈㐉㐊㐋㐌㐍㐎㐏㐐㐑㐒㐓㐔㐕㐖㐗㐘㐙㐚㐛㐜㐝㐞㐟㐠㐡㐢㐣㐤㐥㐦㐧㐨㐩㐪㐫㐬㐭㐮㐯㐰㐱㐲㐳㐴㐵㐶㐷㐸㐹㐺㐻㐼㐽㐾㐿㑀㑁㑂" # noqa: E501 + "㑄㑅㑆㑇㑈㑉㑊㑋㑌㑍㑎㑏㑐㑑㑒㑓㑔㑕㑖㑗㑘㑙㑚㑛㑜㑝㑞㑟㑠㑡㑢㑣㑤㑥㑦㑧㑨㑩㑪㑫㑬㑭㑮㑯㑰㑱㑲㑳㑴㑵㑶㑷㑸㑹㑺㑻㑼㑽㑾㑿㒀㒁㒂㒃㒄㒅㒆" # noqa: E501 + "㒇㒈㒉㒊㒋㒌㒍㒎㒏㒐㒑㒒㒓㒔㒕㒖㒗㒘㒙㒚㒛㒜㒝㒞㒟㒠㒡㒢㒣㒤㒥㒦㒧㒨㒩㒪㒫㒬㒭㒮㒯㒰㒱㒲㒳㒴㒵㒶㒷㒸㒹㒺㒻㒼㒽㒾㒿㓀㓁㓂㓃㓄㓅㓆㓇㓈㓉" # noqa: E501 + "㓊㓋㓌㓍㓎㓏㓐㓑㓒㓓㓔㓕㓖㓗㓘㓙㓚㓛㓜㓝㓞㓟㓠㓡㓢㓣㓤㓥㓦㓧㓨㓩㓪㓫㓬㓭㓮㓯㓰㓱㓲㓳㓴㓵㓶㓷㓸㓹㓺㓻㓼㓽㓾㓿㔀㔁㔂㔃㔄㔅㔆㔇㔈㔉㔊㔋㔌" # noqa: E501 + "㔍㔎㔏㔐㔑㔒㔓㔔㔕㔖㔗㔘㔙㔚㔛㔜㔝㔞㔟㔠㔡㔢㔣㔤㔥㔦㔧㔨㔩㔪㔫㔬㔭㔮㔯㔰㔱㔲㔳㔴㔵㔶㔷㔸㔹㔺㔻㔼㔽㔾㔿㕀㕁㕂㕃㕄㕅㕆㕇㕈㕉㕊㕋㕌㕍㕎㕏" # noqa: E501 + "㕐㕑㕒㕓㕔㕕㕖㕗㕘㕙㕚㕛㕜㕝㕞㕟㕠㕡㕢㕣㕤㕥㕦㕧㕨㕩㕪㕫㕬㕭㕮㕯㕰㕱㕲㕳㕴㕵㕶㕷㕸㕹㕺㕻㕼㕽㕾㕿㖀㖁㖂㖃㖄㖅㖆㖇㖈㖉㖊㖋㖌㖍㖎㖏㖐㖑㖒" # noqa: E501 + "㖓㖔㖕㖖㖗㖘㖙㖚㖛㖜㖝㖞㖟㖠㖡㖢㖣㖤㖥㖦㖧㖨㖩㖪㖫㖬㖭㖮㖯㖰㖱㖲㖳㖴㖵㖶㖷㖸㖹㖺㖻㖼㖽㖾㖿㗀㗁㗂㗃㗄㗅㗆㗇㗈㗉㗊㗋㗌㗍㗎㗏㗐㗑㗒㗓㗔㗕" # noqa: E501 + "㗖㗗㗘㗙㗚㗛㗜㗝㗞㗟㗠㗡㗢㗣㗤㗥㗦㗧㗨㗩㗪㗫㗬㗭㗮㗯㗰㗱㗲㗳㗴㗵㗶㗷㗸㗹㗺㗻㗼㗽㗾㗿㘀㘁㘂㘃㘄㘅㘆㘇㘈㘉㘊㘋㘌㘍㘎㘏㘐㘑㘒㘓㘔㘕㘖㘗㘘" # noqa: E501 + "㘙㘚㘛㘜㘝㘞㘟㘠㘡㘢㘣㘤㘥㘦㘧㘨㘩㘪㘫㘬㘭㘮㘯㘰㘱㘲㘳㘴㘵㘶㘷㘸㘹㘺㘻㘼㘽㘾㘿㙀㙁㙂㙃㙄㙅㙆㙇㙈㙉㙊㙋㙌㙍㙎㙏㙐㙑㙒㙓㙔㙕㙖㙗㙘㙙㙚㙛" # noqa: E501 + "㙜㙝㙞㙟㙠㙡㙢㙣㙤㙥㙦㙧㙨㙩㙪㙫㙬㙭㙮㙯㙰㙱㙲㙳㙴㙵㙶㙷㙸㙹㙺㙻㙼㙽㙾㙿㚀㚁㚂㚃㚄㚅㚆㚇㚈㚉㚊㚋㚌㚍㚎㚏㚐㚑㚒㚓㚔㚕㚖㚗㚘㚙㚚㚛㚜㚝㚞" # noqa: E501 + "㚟㚠㚡㚢㚣㚤㚥㚦㚧㚨㚩㚪㚫㚬㚭㚮㚯㚰㚱㚲㚳㚴㚵㚶㚷㚸㚹㚺㚻㚼㚽㚾㚿㛀㛁㛂㛃㛄㛅㛆㛇㛈㛉㛊㛋㛌㛍㛎㛏㛐㛑㛒㛓㛔㛕㛖㛗㛘㛙㛚㛛㛜㛝㛞㛟㛠㛡" # noqa: E501 + "㛢㛣㛤㛥㛦㛧㛨㛩㛪㛫㛬㛭㛮㛯㛰㛱㛲㛳㛴㛵㛶㛷㛸㛹㛺㛻㛼㛽㛾㛿㜀㜁㜂㜃㜄㜅㜆㜇㜈㜉㜊㜋㜌㜍㜎㜏㜐㜑㜒㜓㜔㜕㜖㜗㜘㜙㜚㜛㜜㜝㜞㜟㜠㜡㜢㜣㜤" # noqa: E501 + "㜥㜦㜧㜨㜩㜪㜫㜬㜭㜮㜯㜰㜱㜲㜳㜴㜵㜶㜷㜸㜹㜺㜻㜼㜽㜾㜿㝀㝁㝂㝃㝄㝅㝆㝇㝈㝉㝊㝋㝌㝍㝎㝏㝐㝑㝒㝓㝔㝕㝖㝗㝘㝙㝚㝛㝜㝝㝞㝟㝠㝡㝢㝣㝤㝥㝦㝧" # noqa: E501 + "㝨㝩㝪㝫㝬㝭㝮㝯㝰㝱㝲㝳㝴㝵㝶㝷㝸㝹㝺㝻㝼㝽㝾㝿㞀㞁㞂㞃㞄㞅㞆㞇㞈㞉㞊㞋㞌㞍㞎㞏㞐㞑㞒㞓㞔㞕㞖㞗㞘㞙㞚㞛㞜㞝㞞㞟㞠㞡㞢㞣㞤㞥㞦㞧㞨㞩㞪" # noqa: E501 + "㞫㞬㞭㞮㞯㞰㞱㞲㞳㞴㞵㞶㞷㞸㞹㞺㞻㞼㞽㞾㞿㟀㟁㟂㟃㟄㟅㟆㟇㟈㟉㟊㟋㟌㟍㟎㟏㟐㟑㟒㟓㟔㟕㟖㟗㟘㟙㟚㟛㟜㟝㟞㟟㟠㟡㟢㟣㟤㟥㟦㟧㟨㟩㟪㟫㟬㟭" # noqa: E501 + "㟮㟯㟰㟱㟲㟳㟴㟵㟶㟷㟸㟹㟺㟻㟼㟽㟾㟿㠀㠁㠂㠃㠄㠅㠆㠇㠈㠉㠊㠋㠌㠍㠎㠏㠐㠑㠒㠓㠔㠕㠖㠗㠘㠙㠚㠛㠜㠝㠞㠟㠠㠡㠢㠣㠤㠥㠦㠧㠨㠩㠪㠫㠬㠭㠮㠯㠰" # noqa: E501 + "㠱㠲㠳㠴㠵㠶㠷㠸㠹㠺㠻㠼㠽㠾㠿㡀㡁㡂㡃㡄㡅㡆㡇㡈㡉㡊㡋㡌㡍㡎㡏㡐㡑㡒㡓㡔㡕㡖㡗㡘㡙㡚㡛㡜㡝㡞㡟㡠㡡㡢㡣㡤㡥㡦㡧㡨㡩㡪㡫㡬㡭㡮㡯㡰㡱㡲㡳" # noqa: E501 + "㡴㡵㡶㡷㡸㡹㡺㡻㡼㡽㡾㡿㢀㢁㢂㢃㢄㢅㢆㢇㢈㢉㢊㢋㢌㢍㢎㢏㢐㢑㢒㢓㢔㢕㢖㢗㢘㢙㢚㢛㢜㢝㢞㢟㢠㢡㢢㢣㢤㢥㢦㢧㢨㢩㢪㢫㢬㢭㢮㢯㢰㢱㢲㢳㢴㢵㢶" # noqa: E501 + "㢷㢸㢹㢺㢻㢼㢽㢾㢿㣀㣁㣂㣃㣄㣅㣆㣇㣈㣉㣊㣋㣌㣍㣎㣏㣐㣑㣒㣓㣔㣕㣖㣗㣘㣙㣚㣛㣜㣝㣞㣟㣠㣡㣢㣣㣤㣥㣦㣧㣨㣩㣪㣫㣬㣭㣮㣯㣰㣱㣲㣳㣴㣵㣶㣷㣸㣹" # noqa: E501 + "㣺㣻㣼㣽㣾㣿㤀㤁㤂㤃㤄㤅㤆㤇㤈㤉㤊㤋㤌㤍㤎㤏㤐㤑㤒㤓㤔㤕㤖㤗㤘㤙㤚㤛㤜㤝㤞㤟㤠㤡㤢㤣㤤㤥㤦㤧㤨㤩㤪㤫㤬㤭㤮㤯㤰㤱㤲㤳㤴㤵㤶㤷㤸㤹㤺㤻㤼" # noqa: E501 + "㤽㤾㤿㥀㥁㥂㥃㥄㥅㥆㥇㥈㥉㥊㥋㥌㥍㥎㥏㥐㥑㥒㥓㥔㥕㥖㥗㥘㥙㥚㥛㥜㥝㥞㥟㥠㥡㥢㥣㥤㥥㥦㥧㥨㥩㥪㥫㥬㥭㥮㥯㥰㥱㥲㥳㥴㥵㥶㥷㥸㥹㥺㥻㥼㥽㥾㥿" # noqa: E501 + "㦀㦁㦂㦃㦄㦅㦆㦇㦈㦉㦊㦋㦌㦍㦎㦏㦐㦑㦒㦓㦔㦕㦖㦗㦘㦙㦚㦛㦜㦝㦞㦟㦠㦡㦢㦣㦤㦥㦦㦧㦨㦩㦪㦫㦬㦭㦮㦯㦰㦱㦲㦳㦴㦵㦶㦷㦸㦹㦺㦻㦼㦽㦾㦿㧀㧁㧂" # noqa: E501 + "㧃㧄㧅㧆㧇㧈㧉㧊㧋㧌㧍㧎㧏㧐㧑㧒㧓㧔㧕㧖㧗㧘㧙㧚㧛㧜㧝㧞㧟㧠㧡㧢㧣㧤㧥㧦㧧㧨㧩㧪㧫㧬㧭㧮㧯㧰㧱㧲㧳㧴㧵㧶㧷㧸㧹㧺㧻㧼㧽㧾㧿㨀㨁㨂㨃㨄㨅" # noqa: E501 + "㨆㨇㨈㨉㨊㨋㨌㨍㨎㨏㨐㨑㨒㨓㨔㨕㨖㨗㨘㨙㨚㨛㨜㨝㨞㨟㨠㨡㨢㨣㨤㨥㨦㨧㨨㨩㨪㨫㨬㨭㨮㨯㨰㨱㨲㨳㨴㨵㨶㨷㨸㨹㨺㨻㨼㨽㨾㨿㩀㩁㩂㩃㩄㩅㩆㩇㩈" # noqa: E501 + "㩉㩊㩋㩌㩍㩎㩏㩐㩑㩒㩓㩔㩕㩖㩗㩘㩙㩚㩛㩜㩝㩞㩟㩠㩡㩢㩣㩤㩥㩦㩧㩨㩩㩪㩫㩬㩭㩮㩯㩰㩱㩲㩳㩴㩵㩶㩷㩸㩹㩺㩻㩼㩽㩾㩿㪀㪁㪂㪃㪄㪅㪆㪇㪈㪉㪊㪋" # noqa: E501 + "㪌㪍㪎㪏㪐㪑㪒㪓㪔㪕㪖㪗㪘㪙㪚㪛㪜㪝㪞㪟㪠㪡㪢㪣㪤㪥㪦㪧㪨㪩㪪㪫㪬㪭㪮㪯㪰㪱㪲㪳㪴㪵㪶㪷㪸㪹㪺㪻㪼㪽㪾㪿㫀㫁㫂㫃㫄㫅㫆㫇㫈㫉㫊㫋㫌㫍㫎" # noqa: E501 + "㫏㫐㫑㫒㫓㫔㫕㫖㫗㫘㫙㫚㫛㫜㫝㫞㫟㫠㫡㫢㫣㫤㫥㫦㫧㫨㫩㫪㫫㫬㫭㫮㫯㫰㫱㫲㫳㫴㫵㫶㫷㫸㫹㫺㫻㫼㫽㫾㫿㬀㬁㬂㬃㬄㬅㬆㬇㬈㬉㬊㬋㬌㬍㬎㬏㬐㬑" # noqa: E501 + "㬒㬓㬔㬕㬖㬗㬘㬙㬚㬛㬜㬝㬞㬟㬠㬡㬢㬣㬤㬥㬦㬧㬨㬩㬪㬫㬬㬭㬮㬯㬰㬱㬲㬳㬴㬵㬶㬷㬸㬹㬺㬻㬼㬽㬾㬿㭀㭁㭂㭃㭄㭅㭆㭇㭈㭉㭊㭋㭌㭍㭎㭏㭐㭑㭒㭓㭔" # noqa: E501 + "㭕㭖㭗㭘㭙㭚㭛㭜㭝㭞㭟㭠㭡㭢㭣㭤㭥㭦㭧㭨㭩㭪㭫㭬㭭㭮㭯㭰㭱㭲㭳㭴㭵㭶㭷㭸㭹㭺㭻㭼㭽㭾㭿㮀㮁㮂㮃㮄㮅㮆㮇㮈㮉㮊㮋㮌㮍㮎㮏㮐㮑㮒㮓㮔㮕㮖㮗" # noqa: E501 + "㮘㮙㮚㮛㮜㮝㮞㮟㮠㮡㮢㮣㮤㮥㮦㮧㮨㮩㮪㮫㮬㮭㮮㮯㮰㮱㮲㮳㮴㮵㮶㮷㮸㮹㮺㮻㮼㮽㮾㮿㯀㯁㯂㯃㯄㯅㯆㯇㯈㯉㯊㯋㯌㯍㯎㯏㯐㯑㯒㯓㯔㯕㯖㯗㯘㯙㯚" # noqa: E501 + "㯛㯜㯝㯞㯟㯠㯡㯢㯣㯤㯥㯦㯧㯨㯩㯪㯫㯬㯭㯮㯯㯰㯱㯲㯳㯴㯵㯶㯷㯸㯹㯺㯻㯼㯽㯾㯿㰀㰁㰂㰃㰄㰅㰆㰇㰈㰉㰊㰋㰌㰍㰎㰏㰐㰑㰒㰓㰔㰕㰖㰗㰘㰙㰚㰛㰜㰝" # noqa: E501 + "㰞㰟㰠㰡㰢㰣㰤㰥㰦㰧㰨㰩㰪㰫㰬㰭㰮㰯㰰㰱㰲㰳㰴㰵㰶㰷㰸㰹㰺㰻㰼㰽㰾㰿㱀㱁㱂㱃㱄㱅㱆㱇㱈㱉㱊㱋㱌㱍㱎㱏㱐㱑㱒㱓㱔㱕㱖㱗㱘㱙㱚㱛㱜㱝㱞㱟㱠" # noqa: E501 + "㱡㱢㱣㱤㱥㱦㱧㱨㱩㱪㱫㱬㱭㱮㱯㱰㱱㱲㱳㱴㱵㱶㱷㱸㱹㱺㱻㱼㱽㱾㱿㲀㲁㲂㲃㲄㲅㲆㲇㲈㲉㲊㲋㲌㲍㲎㲏㲐㲑㲒㲓㲔㲕㲖㲗㲘㲙㲚㲛㲜㲝㲞㲟㲠㲡㲢㲣" # noqa: E501 + "㲤㲥㲦㲧㲨㲩㲪㲫㲬㲭㲮㲯㲰㲱㲲㲳㲴㲵㲶㲷㲸㲹㲺㲻㲼㲽㲾㲿㳀㳁㳂㳃㳄㳅㳆㳇㳈㳉㳊㳋㳌㳍㳎㳏㳐㳑㳒㳓㳔㳕㳖㳗㳘㳙㳚㳛㳜㳝㳞㳟㳠㳡㳢㳣㳤㳥㳦" # noqa: E501 + "㳧㳨㳩㳪㳫㳬㳭㳮㳯㳰㳱㳲㳳㳴㳵㳶㳷㳸㳹㳺㳻㳼㳽㳾㳿㴀㴁㴂㴃㴄㴅㴆㴇㴈㴉㴊㴋㴌㴍㴎㴏㴐㴑㴒㴓㴔㴕㴖㴗㴘㴙㴚㴛㴜㴝㴞㴟㴠㴡㴢㴣㴤㴥㴦㴧㴨㴩" # noqa: E501 + "㴪㴫㴬㴭㴮㴯㴰㴱㴲㴳㴴㴵㴶㴷㴸㴹㴺㴻㴼㴽㴾㴿㵀㵁㵂㵃㵄㵅㵆㵇㵈㵉㵊㵋㵌㵍㵎㵏㵐㵑㵒㵓㵔㵕㵖㵗㵘㵙㵚㵛㵜㵝㵞㵟㵠㵡㵢㵣㵤㵥㵦㵧㵨㵩㵪㵫㵬" # noqa: E501 + "㵭㵮㵯㵰㵱㵲㵳㵴㵵㵶㵷㵸㵹㵺㵻㵼㵽㵾㵿㶀㶁㶂㶃㶄㶅㶆㶇㶈㶉㶊㶋㶌㶍㶎㶏㶐㶑㶒㶓㶔㶕㶖㶗㶘㶙㶚㶛㶜㶝㶞㶟㶠㶡㶢㶣㶤㶥㶦㶧㶨㶩㶪㶫㶬㶭㶮㶯" # noqa: E501 + "㶰㶱㶲㶳㶴㶵㶶㶷㶸㶹㶺㶻㶼㶽㶾㶿㷀㷁㷂㷃㷄㷅㷆㷇㷈㷉㷊㷋㷌㷍㷎㷏㷐㷑㷒㷓㷔㷕㷖㷗㷘㷙㷚㷛㷜㷝㷞㷟㷠㷡㷢㷣㷤㷥㷦㷧㷨㷩㷪㷫㷬㷭㷮㷯㷰㷱㷲" # noqa: E501 + "㷳㷴㷵㷶㷷㷸㷹㷺㷻㷼㷽㷾㷿㸀㸁㸂㸃㸄㸅㸆㸇㸈㸉㸊㸋㸌㸍㸎㸏㸐㸑㸒㸓㸔㸕㸖㸗㸘㸙㸚㸛㸜㸝㸞㸟㸠㸡㸢㸣㸤㸥㸦㸧㸨㸩㸪㸫㸬㸭㸮㸯㸰㸱㸲㸳㸴㸵" # noqa: E501 + "㸶㸷㸸㸹㸺㸻㸼㸽㸾㸿㹀㹁㹂㹃㹄㹅㹆㹇㹈㹉㹊㹋㹌㹍㹎㹏㹐㹑㹒㹓㹔㹕㹖㹗㹘㹙㹚㹛㹜㹝㹞㹟㹠㹡㹢㹣㹤㹥㹦㹧㹨㹩㹪㹫㹬㹭㹮㹯㹰㹱㹲㹳㹴㹵㹶㹷㹸" # noqa: E501 + "㹹㹺㹻㹼㹽㹾㹿㺀㺁㺂㺃㺄㺅㺆㺇㺈㺉㺊㺋㺌㺍㺎㺏㺐㺑㺒㺓㺔㺕㺖㺗㺘㺙㺚㺛㺜㺝㺞㺟㺠㺡㺢㺣㺤㺥㺦㺧㺨㺩㺪㺫㺬㺭㺮㺯㺰㺱㺲㺳㺴㺵㺶㺷㺸㺹㺺㺻" # noqa: E501 + "㺼㺽㺾㺿㻀㻁㻂㻃㻄㻅㻆㻇㻈㻉㻊㻋㻌㻍㻎㻏㻐㻑㻒㻓㻔㻕㻖㻗㻘㻙㻚㻛㻜㻝㻞㻟㻠㻡㻢㻣㻤㻥㻦㻧㻨㻩㻪㻫㻬㻭㻮㻯㻰㻱㻲㻳㻴㻵㻶㻷㻸㻹㻺㻻㻼㻽㻾" # noqa: E501 + "㻿㼀㼁㼂㼃㼄㼅㼆㼇㼈㼉㼊㼋㼌㼍㼎㼏㼐㼑㼒㼓㼔㼕㼖㼗㼘㼙㼚㼛㼜㼝㼞㼟㼠㼡㼢㼣㼤㼥㼦㼧㼨㼩㼪㼫㼬㼭㼮㼯㼰㼱㼲㼳㼴㼵㼶㼷㼸㼹㼺㼻㼼㼽㼾㼿㽀㽁" # noqa: E501 + "㽂㽃㽄㽅㽆㽇㽈㽉㽊㽋㽌㽍㽎㽏㽐㽑㽒㽓㽔㽕㽖㽗㽘㽙㽚㽛㽜㽝㽞㽟㽠㽡㽢㽣㽤㽥㽦㽧㽨㽩㽪㽫㽬㽭㽮㽯㽰㽱㽲㽳㽴㽵㽶㽷㽸㽹㽺㽻㽼㽽㽾㽿㾀㾁㾂㾃㾄" # noqa: E501 + "㾅㾆㾇㾈㾉㾊㾋㾌㾍㾎㾏㾐㾑㾒㾓㾔㾕㾖㾗㾘㾙㾚㾛㾜㾝㾞㾟㾠㾡㾢㾣㾤㾥㾦㾧㾨㾩㾪㾫㾬㾭㾮㾯㾰㾱㾲㾳㾴㾵㾶㾷㾸㾹㾺㾻㾼㾽㾾㾿㿀㿁㿂㿃㿄㿅㿆㿇" # noqa: E501 + "㿈㿉㿊㿋㿌㿍㿎㿏㿐㿑㿒㿓㿔㿕㿖㿗㿘㿙㿚㿛㿜㿝㿞㿟㿠㿡㿢㿣㿤㿥㿦㿧㿨㿩㿪㿫㿬㿭㿮㿯㿰㿱㿲㿳㿴㿵㿶㿷㿸㿹㿺㿻㿼㿽㿾㿿䀀䀁䀂䀃䀄䀅䀆䀇䀈䀉䀊" # noqa: E501 + "䀋䀌䀍䀎䀏䀐䀑䀒䀓䀔䀕䀖䀗䀘䀙䀚䀛䀜䀝䀞䀟䀠䀡䀢䀣䀤䀥䀦䀧䀨䀩䀪䀫䀬䀭䀮䀯䀰䀱䀲䀳䀴䀵䀶䀷䀸䀹䀺䀻䀼䀽䀾䀿䁀䁁䁂䁃䁄䁅䁆䁇䁈䁉䁊䁋䁌䁍" # noqa: E501 + "䁎䁏䁐䁑䁒䁓䁔䁕䁖䁗䁘䁙䁚䁛䁜䁝䁞䁟䁠䁡䁢䁣䁤䁥䁦䁧䁨䁩䁪䁫䁬䁭䁮䁯䁰䁱䁲䁳䁴䁵䁶䁷䁸䁹䁺䁻䁼䁽䁾䁿䂀䂁䂂䂃䂄䂅䂆䂇䂈䂉䂊䂋䂌䂍䂎䂏䂐" # noqa: E501 + "䂑䂒䂓䂔䂕䂖䂗䂘䂙䂚䂛䂜䂝䂞䂟䂠䂡䂢䂣䂤䂥䂦䂧䂨䂩䂪䂫䂬䂭䂮䂯䂰䂱䂲䂳䂴䂵䂶䂷䂸䂹䂺䂻䂼䂽䂾䂿䃀䃁䃂䃃䃄䃅䃆䃇䃈䃉䃊䃋䃌䃍䃎䃏䃐䃑䃒䃓" # noqa: E501 + "䃔䃕䃖䃗䃘䃙䃚䃛䃜䃝䃞䃟䃠䃡䃢䃣䃤䃥䃦䃧䃨䃩䃪䃫䃬䃭䃮䃯䃰䃱䃲䃳䃴䃵䃶䃷䃸䃹䃺䃻䃼䃽䃾䃿䄀䄁䄂䄃䄄䄅䄆䄇䄈䄉䄊䄋䄌䄍䄎䄏䄐䄑䄒䄓䄔䄕䄖" # noqa: E501 + "䄗䄘䄙䄚䄛䄜䄝䄞䄟䄠䄡䄢䄣䄤䄥䄦䄧䄨䄩䄪䄫䄬䄭䄮䄯䄰䄱䄲䄳䄴䄵䄶䄷䄸䄹䄺䄻䄼䄽䄾䄿䅀䅁䅂䅃䅄䅅䅆䅇䅈䅉䅊䅋䅌䅍䅎䅏䅐䅑䅒䅓䅔䅕䅖䅗䅘䅙" # noqa: E501 + "䅚䅛䅜䅝䅞䅟䅠䅡䅢䅣䅤䅥䅦䅧䅨䅩䅪䅫䅬䅭䅮䅯䅰䅱䅲䅳䅴䅵䅶䅷䅸䅹䅺䅻䅼䅽䅾䅿䆀䆁䆂䆃䆄䆅䆆䆇䆈䆉䆊䆋䆌䆍䆎䆏䆐䆑䆒䆓䆔䆕䆖䆗䆘䆙䆚䆛䆜" # noqa: E501 + "䆝䆞䆟䆠䆡䆢䆣䆤䆥䆦䆧䆨䆩䆪䆫䆬䆭䆮䆯䆰䆱䆲䆳䆴䆵䆶䆷䆸䆹䆺䆻䆼䆽䆾䆿䇀䇁䇂䇃䇄䇅䇆䇇䇈䇉䇊䇋䇌䇍䇎䇏䇐䇑䇒䇓䇔䇕䇖䇗䇘䇙䇚䇛䇜䇝䇞䇟" # noqa: E501 + "䇠䇡䇢䇣䇤䇥䇦䇧䇨䇩䇪䇫䇬䇭䇮䇯䇰䇱䇲䇳䇴䇵䇶䇷䇸䇹䇺䇻䇼䇽䇾䇿䈀䈁䈂䈃䈄䈅䈆䈇䈈䈉䈊䈋䈌䈍䈎䈏䈐䈑䈒䈓䈔䈕䈖䈗䈘䈙䈚䈛䈜䈝䈞䈟䈠䈡䈢" # noqa: E501 + "䈣䈤䈥䈦䈧䈨䈩䈪䈫䈬䈭䈮䈯䈰䈱䈲䈳䈴䈵䈶䈷䈸䈹䈺䈻䈼䈽䈾䈿䉀䉁䉂䉃䉄䉅䉆䉇䉈䉉䉊䉋䉌䉍䉎䉏䉐䉑䉒䉓䉔䉕䉖䉗䉘䉙䉚䉛䉜䉝䉞䉟䉠䉡䉢䉣䉤䉥" # noqa: E501 + "䉦䉧䉨䉩㑃䉪䉫䉬䉭䉮䉯䉰䉱䉲䉳䉴䉵䉶䉷䉸䉹䉺䉻䉼䉽䉾䉿䊀䊁䊂䊃䊄䊅䊆䊇䊈䊉䊊䊋䊌䊍䊎䊏䊐䊑䊒䊓䊔䊕䊖䊗䊘䊙䊚䊛䊜䊝䊞䊟䊠䊡䊢䊣䊤䊥䊦䊧" # noqa: E501 + "䊨䊩䊪䊫䊬䊭䊮䊯䊰䊱䊲䊳䊴䊵䊶䊷䊸䊹䊺䊻䊼䊽䊾䊿䋀䋁䋂䋃䋄䋅䋆䋇䋈䋉䋊䋋䋌䋍䋎䋏䋐䋑䋒䋓䋔䋕䋖䋗䋘䋙䋚䋛䋜䋝䋞䋟䋠䋡䋢䋣䋤䋥䋦䋧䋨䋩䋪" # noqa: E501 + "䋫䋬䋭䋮䋯䋰䋱䋲䋳䋴䋵䋶䋷䋸䋹䋺䋻䋼䋽䋾䋿䌀䌁䌂䌃䌄䌅䌆䌇䌈䌉䌊䌋䌌䌍䌎䌏䌐䌑䌒䌓䌔䌕䌖䌗䌘䌙䌚䌛䌜䌝䌞䌟䌠䌡䌢䌣䌤䌥䌦䌧䌨䌩䌪䌫䌬䌭" # noqa: E501 + "䌮䌯䌰䌱䌲䌳䌴䌵䌶䌷䌸䌹䌺䌻䌼䌽䌾䌿䍀䍁䍂䍃䍄䍅䍆䍇䍈䍉䍊䍋䍌䍍䍎䍏䍐䍑䍒䍓䍔䍕䍖䍗䍘䍙䍚䍛䍜䍝䍞䍟䍠䍡䍢䍣䍤䍥䍦䍧䍨䍩䍪䍫䍬䍭䍮䍯䍰" # noqa: E501 + "䍱䍲䍳䍴䍵䍶䍷䍸䍹䍺䍻䍼䍽䍾䍿䎀䎁䎂䎃䎄䎅䎆䎇䎈䎉䎊䎋䎌䎍䎎䎏䎐䎑䎒䎓䎔䎕䎖䎗䎘䎙䎚䎛䎜䎝䎞䎟䎠䎡䎢䎣䎤䎥䎦䎧䎨䎩䎪䎫䎬䎭䎮䎯䎰䎱䎲䎳" # noqa: E501 + "䎴䎵䎶䎷䎸䎹䎺䎻䎼䎽䎾䎿䏀䏁䏂䏃䏄䏅䏆䏇䏈䏉䏊䏋䏌䏍䏎䏏䏐䏑䏒䏓䏔䏕䏖䏗䏘䏙䏚䏛䏜䏝䏞䏟䏠䏡䏢䏣䏤䏥䏦䏧䏨䏩䏪䏫䏬䏭䏮䏯䏰䏱䏲䏳䏴䏵䏶" # noqa: E501 + "䏷䏸䏹䏺䏻䏼䏽䏾䏿䐀䐁䐂䐃䐄䐅䐆䐇䐈䐉䐊䐋䐌䐍䐎䐏䐐䐑䐒䐓䐔䐕䐖䐗䐘䐙䐚䐛䐜䐝䐞䐟䐠䐡䐢䐣䐤䐥䐦䐧䐨䐩䐪䐫䐬䐭䐮䐯䐰䐱䐲䐳䐴䐵䐶䐷䐸䐹" # noqa: E501 + "䐺䐻䐼䐽䐾䐿䑀䑁䑂䑃䑄䑅䑆䑇䑈䑉䑊䑋䑌䑍䑎䑏䑐䑑䑒䑓䑔䑕䑖䑗䑘䑙䑚䑛䑜䑝䑞䑟䑠䑡䑢䑣䑤䑥䑦䑧䑨䑩䑪䑫䑬䑭䑮䑯䑰䑱䑲䑳䑴䑵䑶䑷䑸䑹䑺䑻䑼" # noqa: E501 + "䑽䑾䑿䒀䒁䒂䒃䒄䒅䒆䒇䒈䒉䒊䒋䒌䒍䒎䒏䒐䒑䒒䒓䒔䒕䒖䒗䒘䒙䒚䒛䒜䒝䒞䒟䒠䒡䒢䒣䒤䒥䒦䒧䒨䒩䒪䒫䒬䒭䒮䒯䒰䒱䒲䒳䒴䒵䒶䒷䒸䒹䒺䒻䒼䒽䒾䒿" # noqa: E501 + "䓀䓁䓂䓃䓄䓅䓆䓇䓈䓉䓊䓋䓌䓍䓎䓏䓐䓑䓒䓓䓔䓕䓖䓗䓘䓙䓚䓛䓜䓝䓞䓟䓠䓡䓢䓣䓤䓥䓦䓧䓨䓩䓪䓫䓬䓭䓮䓯䓰䓱䓲䓳䓴䓵䓶䓷䓸䓹䓺䓻䓼䓽䓾䓿䔀䔁䔂" # noqa: E501 + "䔃䔄䔅䔆䔇䔈䔉䔊䔋䔌䔍䔎䔏䔐䔑䔒䔓䔔䔕䔖䔗䔘䔙䔚䔛䔜䔝䔞䔟䔠䔡䔢䔣䔤䔥䔦䔧䔨䔩䔪䔫䔬䔭䔮䔯䔰䔱䔲䔳䔴䔵䔶䔷䔸䔹䔺䔻䔼䔽䔾䔿䕀䕁䕂䕃䕄䕅" # noqa: E501 + "䕆䕇䕈䕉䕊䕋䕌䕍䕎䕏䕐䕑䕒䕓䕔䕕䕖䕗䕘䕙䕚䕛䕜䕝䕞䕟䕠䕡䕢䕣䕤䕥䕦䕧䕨䕩䕪䕫䕬䕭䕮䕯䕰䕱䕲䕳䕴䕵䕶䕷䕸䕹䕺䕻䕼䕽䕾䕿䖀䖁䖂䖃䖄䖅䖆䖇䖈" # noqa: E501 + "䖉䖊䖋䖌䖍䖎䖏䖐䖑䖒䖓䖔䖕䖖䖗䖘䖙䖚䖛䖜䖝䖞䖟䖠䖡䖢䖣䖤䖥䖦䖧䖨䖩䖪䖫䖬䖭䖮䖯䖰䖱䖲䖳䖴䖵䖶䖷䖸䖹䖺䖻䖼䖽䖾䖿䗀䗁䗂䗃䗄䗅䗆䗇䗈䗉䗊䗋" # noqa: E501 + "䗌䗍䗎䗏䗐䗑䗒䗓䗔䗕䗖䗗䗘䗙䗚䗛䗜䗝䗞䗟䗠䗡䗢䗣䗤䗥䗦䗧䗨䗩䗪䗫䗬䗭䗮䗯䗰䗱䗲䗳䗴䗵䗶䗷䗸䗹䗺䗻䗼䗽䗾䗿䘀䘁䘂䘃䘄䘅䘆䘇䘈䘉䘊䘋䘌䘍䘎" # noqa: E501 + "䘏䘐䘑䘒䘓䘔䘕䘖䘗䘘䘙䘚䘛䘜䘝䘞䘟䘠䘡䘢䘣䘤䘥䘦䘧䘨䘩䘪䘫䘬䘭䘮䘯䘰䘱䘲䘳䘴䘵䘶䘷䘸䘹䘺䘻䘼䘽䘾䘿䙀䙁䙂䙃䙄䙅䙆䙇䙈䙉䙊䙋䙌䙍䙎䙏䙐䙑" # noqa: E501 + "䙒䙓䙔䙕䙖䙗䙘䙙䙚䙛䙜䙝䙞䙟䙠䙡䙢䙣䙤䙥䙦䙧䙨䙩䙪䙫䙬䙭䙮䙯䙰䙱䙲䙳䙴䙵䙶䙷䙸䙹䙺䙻䙼䙽䙾䙿䚀䚁䚂䚃䚄䚅䚆䚇䚈䚉䚊䚋䚌䚍䚎䚏䚐䚑䚒䚓䚔" # noqa: E501 + "䚕䚖䚗䚘䚙䚚䚛䚜䚝䚞䚟䚠䚡䚢䚣䚤䚥䚦䚧䚨䚩䚪䚫䚬䚭䚮䚯䚰䚱䚲䚳䚴䚵䚶䚷䚸䚹䚺䚻䚼䚽䚾䚿䛀䛁䛂䛃䛄䛅䛆䛇䛈䛉䛊䛋䛌䛍䛎䛏䛐䛑䛒䛓䛔䛕䛖䛗" # noqa: E501 + "䛘䛙䛚䛛䛜䛝䛞䛟䛠䛡䛢䛣䛤䛥䛦䛧䛨䛩䛪䛫䛬䛭䛮䛯䛰䛱䛲䛳䛴䛵䛶䛷䛸䛹䛺䛻䛼䛽䛾䛿䜀䜁䜂䜃䜄䜅䜆䜇䜈䜉䜊䜋䜌䜍䜎䜏䜐䜑䜒䜓䜔䜕䜖䜗䜘䜙䜚" # noqa: E501 + "䜛䜜䜝䜞䜟䜠䜡䜢䜣䜤䜥䜦䜧䜨䜩䜪䜫䜬䜭䜮䜯䜰䜱䜲䜳䜴䜵䜶䜷䜸䜹䜺䜻䜼䜽䜾䜿䝀䝁䝂䝃䝄䝅䝆䝇䝈䝉䝊䝋䝌䝍䝎䝏䝐䝑䝒䝓䝔䝕䝖䝗䝘䝙䝚䝛䝜䝝" # noqa: E501 + "䝞䝟䝠䝡䝢䝣䝤䝥䝦䝧䝨䝩䝪䝫䝬䝭䝮䝯䝰䝱䝲䝳䝴䝵䝶䝷䝸䝹䝺䝻䝼䝽䝾䝿䞀䞁䞂䞃䞄䞅䞆䞇䞈䞉䞊䞋䞌䞍䞎䞏䞐䞑䞒䞓䞔䞕䞖䞗䞘䞙䞚䞛䞜䞝䞞䞟䞠" # noqa: E501 + "䞡䞢䞣䞤䞥䞦䞧䞨䞩䞪䞫䞬䞭䞮䞯䞰䞱䞲䞳䞴䞵䞶䞷䞸䞹䞺䞻䞼䞽䞾䞿䟀䟁䟂䟃䟄䟅䟆䟇䟈䟉䟊䟋䟌䟍䟎䟏䟐䟑䟒䟓䟔䟕䟖䟗䟘䟙䟚䟛䟜䟝䟞䟟䟠䟡䟢䟣" # noqa: E501 + "䟤䟥䟦䟧䟨䟩䟪䟫䟬䟭䟮䟯䟰䟱䟲䟳䟴䟵䟶䟷䟸䟹䟺䟻䟼䟽䟾䟿䠀䠁䠂䠃䠄䠅䠆䠇䠈䠉䠊䠋䠌䠍䠎䠏䠐䠑䠒䠓䠔䠕䠖䠗䠘䠙䠚䠛䠜䠝䠞䠟䠠䠡䠢䠣䠤䠥䠦" # noqa: E501 + "䠧䠨䠩䠪䠫䠬䠭䠮䠯䠰䠱䠲䠳䠴䠵䠶䠷䠸䠹䠺䠻䠼䠽䠾䠿䡀䡁䡂䡃䡄䡅䡆䡇䡈䡉䡊䡋䡌䡍䡎䡏䡐䡑䡒䡓䡔䡕䡖䡗䡘䡙䡚䡛䡜䡝䡞䡟䡠䡡䡢䡣䡤䡥䡦䡧䡨䡩" # noqa: E501 + "䡪䡫䡬䡭䡮䡯䡰䡱䡲䡳䡴䡵䡶䡷䡸䡹䡺䡻䡼䡽䡾䡿䢀䢁䢂䢃䢄䢅䢆䢇䢈䢉䢊䢋䢌䢍䢎䢏䢐䢑䢒䢓䢔䢕䢖䢗䢘䢙䢚䢛䢜䢝䢞䢟䢠䢡䢢䢣䢤䢥䢦䢧䢨䢩䢪䢫䢬" # noqa: E501 + "䢭䢮䢯䢰䢱䢲䢳䢴䢵䢶䢷䢸䢹䢺䢻䢼䢽䢾䢿䣀䣁䣂䣃䣄䣅䣆䣇䣈䣉䣊䣋䣌䣍䣎䣏䣐䣑䣒䣓䣔䣕䣖䣗䣘䣙䣚䣛䣜䣝䣞䣟䣠䣡䣢䣣䣤䣥䣦䣧䣨䣩䣪䣫䣬䣭䣮䣯" # noqa: E501 + "䣰䣱䣲䣳䣴䣵䣶䣷䣸䣹䣺䣻䣼䣽䣾䣿䤀䤁䤂䤃䤄䤅䤆䤇䤈䤉䤊䤋䤌䤍䤎䤏䤐䤑䤒䤓䤔䤕䤖䤗䤘䤙䤚䤛䤜䤝䤞䤟䤠䤡䤢䤣䤤䤥䤦䤧䤨䤩䤪䤫䤬䤭䤮䤯䤰䤱䤲" # noqa: E501 + "䤳䤴䤵䤶䤷䤸䤹䤺䤻䤼䤽䤾䤿䥀䥁䥂䥃䥄䥅䥆䥇䥈䥉䥊䥋䥌䥍䥎䥏䥐䥑䥒䥓䥔䥕䥖䥗䥘䥙䥚䥛䥜䥝䥞䥟䥠䥡䥢䥣䥤䥥䥦䥧䥨䥩䥪䥫䥬䥭䥮䥯䥰䥱䥲䥳䥴䥵" # noqa: E501 + "䥶䥷䥸䥹䥺䥻䥼䥽䥾䥿䦀䦁䦂䦃䦄䦅䦆䦇䦈䦉䦊䦋䦌䦍䦎䦏䦐䦑䦒䦓䦔䦕䦖䦗䦘䦙䦚䦛䦜䦝䦞䦟䦠䦡䦢䦣䦤䦥䦦䦧䦨䦩䦪䦫䦬䦭䦮䦯䦰䦱䦲䦳䦴䦵䦶䦷䦸" # noqa: E501 + "䦹䦺䦻䦼䦽䦾䦿䧀䧁䧂䧃䧄䧅䧆䧇䧈䧉䧊䧋䧌䧍䧎䧏䧐䧑䧒䧓䧔䧕䧖䧗䧘䧙䧚䧛䧜䧝䧞䧟䧠䧡䧢䧣䧤䧥䧦䧧䧨䧩䧪䧫䧬䧭䧮䧯䧰䧱䧲䧳䧴䧵䧶䧷䧸䧹䧺䧻" # noqa: E501 + "䧼䧽䧾䧿䨀䨁䨂䨃䨄䨅䨆䨇䨈䨉䨊䨋䨌䨍䨎䨏䨐䨑䨒䨓䨔䨕䨖䨗䨘䨙䨚䨛䨜䨝䨞䨟䨠䨡䨢䨣䨤䨥䨦䨧䨨䨩䨪䨫䨬䨭䨮䨯䨰䨱䨲䨳䨴䨵䨶䨷䨸䨹䨺䨻䨼䨽䨾" # noqa: E501 + "䨿䩀䩁䩂䩃䩄䩅䩆䩇䩈䩉䩊䩋䩌䩍䩎䩏䩐䩑䩒䩓䩔䩕䩖䩗䩘䩙䩚䩛䩜䩝䩞䩟䩠䩡䩢䩣䩤䩥䩦䩧䩨䩩䩪䩫䩬䩭䩮䩯䩰䩱䩲䩳䩴䩵䩶䩷䩸䩹䩺䩻䩼䩽䩾䩿䪀䪁" # noqa: E501 + "䪂䪃䪄䪅䪆䪇䪈䪉䪊䪋䪌䪍䪎䪏䪐䪑䪒䪓䪔䪕䪖䪗䪘䪙䪚䪛䪜䪝䪞䪟䪠䪡䪢䪣䪤䪥䪦䪧䪨䪩䪪䪫䪬䪭䪮䪯䪰䪱䪲䪳䪴䪵䪶䪷䪸䪹䪺䪻䪼䪽䪾䪿䫀䫁䫂䫃䫄" # noqa: E501 + "䫅䫆䫇䫈䫉䫊䫋䫌䫍䫎䫏䫐䫑䫒䫓䫔䫕䫖䫗䫘䫙䫚䫛䫜䫝䫞䫟䫠䫡䫢䫣䫤䫥䫦䫧䫨䫩䫪䫫䫬䫭䫮䫯䫰䫱䫲䫳䫴䫵䫶䫷䫸䫹䫺䫻䫼䫽䫾䫿䬀䬁䬂䬃䬄䬅䬆䬇" # noqa: E501 + "䬈䬉䬊䬋䬌䬍䬎䬏䬐䬑䬒䬓䬔䬕䬖䬗䬘䬙䬚䬛䬜䬝䬞䬟䬠䬡䬢䬣䬤䬥䬦䬧䬨䬩䬪䬫䬬䬭䬮䬯䬰䬱䬲䬳䬴䬵䬶䬷䬸䬹䬺䬻䬼䬽䬾䬿䭀䭁䭂䭃䭄䭅䭆䭇䭈䭉䭊" # noqa: E501 + "䭋䭌䭍䭎䭏䭐䭑䭒䭓䭔䭕䭖䭗䭘䭙䭚䭛䭜䭝䭞䭟䭠䭡䭢䭣䭤䭥䭦䭧䭨䭩䭪䭫䭬䭭䭮䭯䭰䭱䭲䭳䭴䭵䭶䭷䭸䭹䭺䭻䭼䭽䭾䭿䮀䮁䮂䮃䮄䮅䮆䮇䮈䮉䮊䮋䮌䮍" # noqa: E501 + "䮎䮏䮐䮑䮒䮓䮔䮕䮖䮗䮘䮙䮚䮛䮜䮝䮞䮟䮠䮡䮢䮣䮤䮥䮦䮧䮨䮩䮪䮫䮬䮭䮮䮯䮰䮱䮲䮳䮴䮵䮶䮷䮸䮹䮺䮻䮼䮽䮾䮿䯀䯁䯂䯃䯄䯅䯆䯇䯈䯉䯊䯋䯌䯍䯎䯏䯐" # noqa: E501 + "䯑䯒䯓䯔䯕䯖䯗䯘䯙䯚䯛䯜䯝䯞䯟䯠䯡䯢䯣䯤䯥䯦䯧䯨䯩䯪䯫䯬䯭䯮䯯䯰䯱䯲䯳䯴䯵䯶䯷䯸䯹䯺䯻䯼䯽䯾䯿䰀䰁䰂䰃䰄䰅䰆䰇䰈䰉䰊䰋䰌䰍䰎䰏䰐䰑䰒䰓" # noqa: E501 + "䰔䰕䰖䰗䰘䰙䰚䰛䰜䰝䰞䰟䰠䰡䰢䰣䰤䰥䰦䰧䰨䰩䰪䰫䰬䰭䰮䰯䰰䰱䰲䰳䰴䰵䰶䰷䰸䰹䰺䰻䰼䰽䰾䰿䱀䱁䱂䱃䱄䱅䱆䱇䱈䱉䱊䱋䱌䱍䱎䱏䱐䱑䱒䱓䱔䱕䱖" # noqa: E501 + "䱗䱘䱙䱚䱛䱜䱝䱞䱟䱠䱡䱢䱣䱤䱥䱦䱧䱨䱩䱪䱫䱬䱭䱮䱯䱰䱱䱲䱳䱴䱵䱶䱷䱸䱹䱺䱻䱼䱽䱾䱿䲀䲁䲂䲃䲄䲅䲆䲇䲈䲉䲊䲋䲌䲍䲎䲏䲐䲑䲒䲓䲔䲕䲖䲗䲘䲙" # noqa: E501 + "䲚䲛䲜䲝䲞䲟䲠䲡䲢䲣䲤䲥䲦䲧䲨䲩䲪䲫䲬䲭䲮䲯䲰䲱䲲䲳䲴䲵䲶䲷䲸䲹䲺䲻䲼䲽䲾䲿䳀䳁䳂䳃䳄䳅䳆䳇䳈䳉䳊䳋䳌䳍䳎䳏䳐䳑䳒䳓䳔䳕䳖䳗䳘䳙䳚䳛䳜" # noqa: E501 + "䳝䳞䳟䳠䳡䳢䳣䳤䳥䳦䳧䳨䳩䳪䳫䳬䳭䳮䳯䳰䳱䳲䳳䳴䳵䳶䳷䳸䳹䳺䳻䳼䳽䳾䳿䴀䴁䴂䴃䴄䴅䴆䴇䴈䴉䴊䴋䴌䴍䴎䴏䴐䴑䴒䴓䴔䴕䴖䴗䴘䴙䴚䴛䴜䴝䴞䴟" # noqa: E501 + "䴠䴡䴢䴣䴤䴥䴦䴧䴨䴩䴪䴫䴬䴭䴮䴯䴰䴱䴲䴳䴴䴵䴶䴷䴸䴹䴺䴻䴼䴽䴾䴿䵀䵁䵂䵃䵄䵅䵆䵇䵈䵉䵊䵋䵌䵍䵎䵏䵐䵑䵒䵓䵔䵕䵖䵗䵘䵙䵚䵛䵜䵝䵞䵟䵠䵡䵢" # noqa: E501 + "䵣䵤䵥䵦䵧䵨䵩䵪䵫䵬䵭䵮䵯䵰䵱䵲䵳䵴䵵䵶䵷䵸䵹䵺䵻䵼䵽䵾䵿䶀䶁䶂䶃䶄䶅䶆䶇䶈䶉䶊䶋䶌䶍䶎䶏䶐䶑䶒䶓䶔䶕䶖䶗䶘䶙䶚䶛䶜䶝䶞䶟䶠䶡䶢䶣䶤䶥" # noqa: E501 + "䶦䶧䶨䶩䶪䶫䶬䶭䶮䶯䶰䶱䶲䶳䶴䶵䶶䶷䶸䶹䶺䶻䶼䶽䶾䶿" + _BASE_VOCABS["punctuation"] + "。・〜°—、「」『』【】゛》《〉〈" # punctuation + _BASE_VOCABS["currency"] ) # Multi-lingual VOCABS["multilingual"] = "".join( dict.fromkeys( # latin_based VOCABS["english"] + VOCABS["albanian"] + VOCABS["afrikaans"] + VOCABS["azerbaijani"] + VOCABS["basque"] + VOCABS["bosnian"] + VOCABS["catalan"] + VOCABS["croatian"] + VOCABS["czech"] + VOCABS["danish"] + VOCABS["dutch"] + VOCABS["estonian"] + VOCABS["esperanto"] + VOCABS["french"] + VOCABS["finnish"] + VOCABS["frisian"] + VOCABS["galician"] + VOCABS["german"] + VOCABS["hausa"] + VOCABS["hungarian"] + VOCABS["icelandic"] + VOCABS["indonesian"] + VOCABS["irish"] + VOCABS["italian"] + VOCABS["latvian"] + VOCABS["lithuanian"] + VOCABS["luxembourgish"] + VOCABS["maori"] + VOCABS["malagasy"] + VOCABS["malay"] + VOCABS["maltese"] + VOCABS["montenegrin"] + VOCABS["norwegian"] + VOCABS["polish"] + VOCABS["portuguese"] + VOCABS["quechua"] + VOCABS["romanian"] + VOCABS["scottish_gaelic"] + VOCABS["serbian_latin"] + VOCABS["slovak"] + VOCABS["slovene"] + VOCABS["somali"] + VOCABS["spanish"] + VOCABS["swahili"] + VOCABS["swedish"] + VOCABS["tagalog"] + VOCABS["turkish"] + VOCABS["uzbek_latin"] + VOCABS["vietnamese"] + VOCABS["welsh"] + VOCABS["yoruba"] + VOCABS["zulu"] + "§" # paragraph sign # cyrillic_based + VOCABS["russian"] + VOCABS["belarusian"] + VOCABS["ukrainian"] + VOCABS["tatar"] + VOCABS["tajik"] + VOCABS["kazakh"] + VOCABS["kyrgyz"] + VOCABS["bulgarian"] + VOCABS["macedonian"] + VOCABS["mongolian"] + VOCABS["yakut"] + VOCABS["serbian_cyrillic"] + VOCABS["uzbek_cyrillic"] # greek + VOCABS["greek"] # hebrew + VOCABS["hebrew"] ) ) ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] name = "onnxtr" description = "Onnx Text Recognition (OnnxTR): docTR Onnx-Wrapper for high-performance OCR on documents." authors = [{name = "Felix Dittrich", email = "felixdittrich92@gmail.com"}] maintainers = [ {name = "Felix Dittrich"}, ] readme = "README.md" requires-python = ">=3.10.0,<4" license = {file = "LICENSE"} keywords=["OCR", "deep learning", "computer vision", "onnx", "text detection", "text recognition", "docTR", "document analysis", "document processing", "document AI"] classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dynamic = ["version"] dependencies = [ # For proper typing, mypy needs numpy>=1.20.0 (cf. https://github.com/numpy/numpy/pull/16515) # Additional typing support is brought by numpy>=1.22.4, but core build sticks to >=1.16.0 "numpy>=1.16.0,<3.0.0", "scipy>=1.4.0,<2.0.0", "pypdfium2>=4.11.0,<6.0.0", "pyclipper>=1.2.0,<2.0.0", "rapidfuzz>=3.0.0,<4.0.0", "langdetect>=1.0.9,<2.0.0", "huggingface-hub>=0.23.0,<2.0.0", "Pillow>=9.2.0", "defusedxml>=0.7.0", "anyascii>=0.3.2", "tqdm>=4.30.0", ] [project.optional-dependencies] cpu = [ "onnxruntime>=1.18.0", "opencv-python>=4.5.0,<5.0.0", ] gpu = [ "onnxruntime-gpu>=1.18.0", "opencv-python>=4.5.0,<5.0.0", ] openvino = [ "onnxruntime-openvino>=1.18.0", "opencv-python>=4.5.0,<5.0.0", ] cpu-headless = [ "onnxruntime>=1.18.0", "opencv-python-headless>=4.5.0,<5.0.0", ] gpu-headless = [ "onnxruntime-gpu>=1.18.0", "opencv-python-headless>=4.5.0,<5.0.0", ] openvino-headless = [ "onnxruntime-openvino>=1.18.0", "opencv-python-headless>=4.5.0,<5.0.0", ] html = [ "weasyprint>=55.0", ] viz = [ "matplotlib>=3.1.0", "mplcursors>=0.3", ] testing = [ "pytest>=5.3.2", "coverage[toml]>=4.5.4", "requests>=2.20.0", "pytest-memray>=1.7.0", "psutil>=7.0.0", ] quality = [ "ruff>=0.1.5", "mypy>=0.812", "pre-commit>=2.17.0", ] dev = [ # Runtime "onnxruntime>=1.18.0", "opencv-python>=4.5.0,<5.0.0", # HTML "weasyprint>=55.0", # Visualization "matplotlib>=3.1.0", "mplcursors>=0.3", # Testing "pytest>=5.3.2", "coverage[toml]>=4.5.4", "requests>=2.20.0", "pytest-memray>=1.7.0", "psutil>=7.0.0", # Quality "ruff>=0.1.5", "mypy>=0.812", "pre-commit>=2.17.0", ] [project.urls] repository = "https://github.com/felixdittrich92/OnnxTR" tracker = "https://github.com/felixdittrich92/OnnxTR/issues" changelog = "https://github.com/felixdittrich92/OnnxTR/releases" [tool.setuptools] zip-safe = true [tool.setuptools.packages.find] exclude = ["docs*", "tests*", "scripts*", "demo*"] [tool.setuptools.package-data] onnxtr = ["py.typed"] [tool.mypy] files = "onnxtr/" show_error_codes = true pretty = true warn_unused_ignores = true warn_redundant_casts = true no_implicit_optional = true check_untyped_defs = true implicit_reexport = false [[tool.mypy.overrides]] module = [ "onnxruntime.*", "PIL.*", "scipy.*", "cv2.*", "matplotlib.*", "numpy.*", "pyclipper.*", "mplcursors.*", "defusedxml.*", "weasyprint.*", "pypdfium2.*", "langdetect.*", "huggingface_hub.*", "rapidfuzz.*", "anyascii.*", "tqdm.*", ] ignore_missing_imports = true [tool.ruff] exclude = [".git", "venv*", "build", "**/__init__.py"] line-length = 120 target-version = "py310" preview=true [tool.ruff.lint] select = [ # https://docs.astral.sh/ruff/rules/ "E", "W", "F", "I", "N", "Q", "C4", "T10", "LOG", "D101", "D103", "D201","D202","D207","D208","D214","D215","D300","D301","D417", "D419", "D207" # pydocstyle ] ignore = ["E402", "E203", "F403", "E731", "N812", "N817", "C408", "LOG015"] [tool.ruff.lint.isort] known-first-party = ["onnxtr", "utils"] known-third-party = ["onnxruntime", "cv2"] [tool.ruff.lint.per-file-ignores] "onnxtr/models/**.py" = ["N806", "F841"] "tests/**.py" = ["D"] "scripts/**.py" = ["D"] "demo/**.py" = ["D"] ".github/**.py" = ["D"] [tool.ruff.lint.flake8-quotes] docstring-quotes = "double" [tool.coverage.run] source = ["onnxtr"] ================================================ FILE: scripts/convert_to_float16.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. try: from onnxconverter_common import auto_convert_mixed_precision except ImportError: raise ImportError("Failed to import onnxconverter_common. Please install `pip install onnxconverter-common`.") # Check GPU availability import onnxruntime if onnxruntime.get_device() != "GPU": raise RuntimeError( "Please install OnnxTR with GPU support to run this script. " + "`pip install onnxtr[gpu]` or `pip install -e .[gpu]`" ) import argparse import time from tempfile import TemporaryDirectory from typing import Any import numpy as np import onnx from onnxtr.models import classification, detection, recognition from onnxtr.models.classification.zoo import ORIENTATION_ARCHS from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS def _load_model(arch: str, model_path: str | None = None) -> Any: if arch in DETECTION_ARCHS: model = detection.__dict__[arch]() if model_path is None else detection.__dict__[arch](model_path) elif args.arch in RECOGNITION_ARCHS: model = recognition.__dict__[arch]() if model_path is None else recognition.__dict__[arch](model_path) elif args.arch in ORIENTATION_ARCHS: model = classification.__dict__[arch]() if model_path is None else classification.__dict__[arch](model_path) else: raise ValueError(f"Unknown architecture {arch}") return model def _latency_check(args: Any, size: tuple[int], model: Any, img_tensor: np.ndarray) -> None: # Warmup for _ in range(10): _ = model(img_tensor) timings = [] # Evaluation runs for _ in range(args.it): start_ts = time.perf_counter() _ = model(img_tensor) timings.append(time.perf_counter() - start_ts) _timings = np.array(timings) print(f"{args.arch} ({args.it} runs on ({size}) inputs)") print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms") def _validate(fp32_in: list[np.ndarray], fp16_in: list[np.ndarray]) -> bool: assert fp32_in[0].shape == fp16_in[0].shape, "Input shapes are not the same" # print mean difference between fp32 and fp16 inputs if np.abs(fp32_in[0] - fp16_in[0]).mean() > 1e-3: print( f"Mean difference between fp32 and fp16 inputs: {np.abs(fp32_in[0] - fp16_in[0]).mean()} " + "-> YOU MAY EXPECT DIFFERING RESULTS" ) return True # NOTE: Only warning, not error def main(args): model_float32 = _load_model(args.arch, model_path=args.input_model if args.input_model else None) size = (1, *model_float32.cfg["input_shape"]) img_tensor = np.random.rand(*size).astype(np.float32) with TemporaryDirectory() as temp_dir: model_fp16_path = f"{temp_dir}/model_fp16.onnx" input_feed = {model_float32.runtime_inputs.name: img_tensor} model_float16 = auto_convert_mixed_precision( # NOTE: keep_io_types=True is required to keep the input/output type as float32 onnx.load(str(model_float32.model_path)), input_feed, validate_fn=_validate, keep_io_types=True, ) onnx.save(model_float16, model_fp16_path) model_fp16 = _load_model(args.arch, model_fp16_path) # Latency check _latency_check(args, size, model_float32, img_tensor) _latency_check(args, size, model_fp16, img_tensor) onnx.save(model_float16, args.arch + "_fp16.onnx") print(f"FP16 model saved at {args.arch}_fp16.onnx") print("Attention: FP16 converted models can only run on GPU devices.") if __name__ == "__main__": parser = argparse.ArgumentParser( description="OnnxTR FP32 to FP16 conversion", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "arch", type=str, choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS, help="Architecture to convert", ) parser.add_argument("--input_model", type=str, help="Path to the input model", required=False) parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run") args = parser.parse_args() main(args) ================================================ FILE: scripts/evaluate.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. try: from doctr.version import __version__ print(f"DocTR version: {__version__}") except ImportError: raise ImportError("Failed to import `doctr`. Please install `pip install python-doctr[torch]`.") import os import time from typing import Any import numpy as np from doctr import datasets from doctr import transforms as T from doctr.utils.metrics import LocalizationConfusion, OCRMetric, TextMatch from tqdm import tqdm from onnxtr.models import EngineConfig, ocr_predictor from onnxtr.utils.geometry import extract_crops, extract_rcrops def _pct(val): return "N/A" if val is None else f"{val:.2%}" def main(args): if not args.rotation: args.eval_straight = True if args.profiling: os.environ["ONNXTR_MULTIPROCESSING_DISABLE"] = "TRUE" try: import memray import yappi except ImportError: raise ImportError("Please install yappi and memray to enable profiling - `pip install yappi memray`.") yappi.set_clock_type("cpu") # Drop memray profile and flamegraph if they already exist if os.path.exists("memray_profile.bin"): os.remove("memray_profile.bin") if os.path.exists("memray_flamegraph.html"): os.remove("memray_flamegraph.html") memray_tracker = memray.Tracker("memray_profile.bin") memray_tracker.__enter__() input_shape = (args.size, args.size) # We define a transformation function which does transform the annotation # to the required format for the Resize transformation def _transform(img, target): boxes = target["boxes"] transformed_img, transformed_boxes = T.Resize( input_shape, preserve_aspect_ratio=args.keep_ratio, symmetric_pad=args.symmetric_pad )(img, boxes) return transformed_img, {"boxes": transformed_boxes, "labels": target["labels"]} predictor = ocr_predictor( args.detection, args.recognition, reco_bs=args.batch_size, preserve_aspect_ratio=False, # we handle the transformation directly in the dataset so this is set to False symmetric_pad=False, # we handle the transformation directly in the dataset so this is set to False assume_straight_pages=not args.rotation, load_in_8_bit=args.load_8bit, det_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None, reco_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None, clf_engine_cfg=EngineConfig(providers=["CPUExecutionProvider"]) if args.force_cpu else None, ) # Load the dataset train_set = datasets.__dict__[args.dataset]( train=True, download=True, use_polygons=not args.eval_straight, sample_transforms=_transform, ) val_set = datasets.__dict__[args.dataset]( train=False, download=True, use_polygons=not args.eval_straight, sample_transforms=_transform, ) sets = [train_set, val_set] reco_metric = TextMatch() det_metric = LocalizationConfusion(iou_thresh=args.iou, use_polygons=not args.eval_straight) e2e_metric = OCRMetric(iou_thresh=args.iou, use_polygons=not args.eval_straight) sample_idx = 0 extraction_fn = extract_crops if args.eval_straight else extract_rcrops timings = [] # Warmup print("Warming up the model...") dummy_img = np.zeros((args.size, args.size, 3), dtype=np.uint8) for _ in range(5): _ = predictor([dummy_img]) print("Warmup done.\n") for dataset in sets: for page, target in tqdm(dataset): if hasattr(page, "numpy"): page = page.numpy() if page.ndim == 3 and page.shape[0] in [1, 3]: page = np.moveaxis(page, 0, -1) if page.dtype != np.uint8: page = (page * 255).astype(np.uint8) if np.max(page) <= 1 else page.astype(np.uint8) # GT gt_boxes = target["boxes"] gt_labels = target["labels"] # Forward if args.profiling: yappi.start() start_ts = time.perf_counter() out = predictor(page[None, ...]) timings.append(time.perf_counter() - start_ts) if args.profiling: yappi.stop() crops = extraction_fn(page, gt_boxes, channels_last=True) reco_out = predictor.reco_predictor(crops) reco_words: Any = [] if len(reco_out): reco_words, _ = zip(*reco_out) # Unpack preds pred_boxes: list[list[Any]] = [] pred_labels: list[str] = [] for page in out.pages: height, width = page.dimensions for block in page.blocks: for line in block.lines: for word in line.words: if not args.rotation: (a, b), (c, d) = word.geometry else: ( [x1, y1], [x2, y2], [x3, y3], [x4, y4], ) = word.geometry if np.issubdtype(gt_boxes.dtype, np.integer): if not args.rotation: pred_boxes.append([ int(a * width), int(b * height), int(c * width), int(d * height), ]) else: if args.eval_straight: pred_boxes.append([ int(width * min(x1, x2, x3, x4)), int(height * min(y1, y2, y3, y4)), int(width * max(x1, x2, x3, x4)), int(height * max(y1, y2, y3, y4)), ]) else: pred_boxes.append([ [int(x1 * width), int(y1 * height)], [int(x2 * width), int(y2 * height)], [int(x3 * width), int(y3 * height)], [int(x4 * width), int(y4 * height)], ]) else: if not args.rotation: pred_boxes.append([a, b, c, d]) else: if args.eval_straight: pred_boxes.append([ min(x1, x2, x3, x4), min(y1, y2, y3, y4), max(x1, x2, x3, x4), max(y1, y2, y3, y4), ]) else: pred_boxes.append([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) pred_labels.append(word.value) # Update the metric det_metric.update(gt_boxes, np.asarray(pred_boxes)) reco_metric.update(gt_labels, reco_words) e2e_metric.update(gt_boxes, np.asarray(pred_boxes), gt_labels, pred_labels) # Loop break sample_idx += 1 if isinstance(args.samples, int) and args.samples == sample_idx: break if isinstance(args.samples, int) and args.samples == sample_idx: break # Unpack aggregated metrics print(f"Model Evaluation (model= {args.detection} + {args.recognition}, dataset={args.dataset})") recall, precision, mean_iou = det_metric.summary() print(f"Text Detection - Recall: {_pct(recall)}, Precision: {_pct(precision)}, Mean IoU: {_pct(mean_iou)}") acc = reco_metric.summary() print(f"Text Recognition - Accuracy: {_pct(acc['raw'])} (unicase: {_pct(acc['unicase'])})") recall, precision, mean_iou = e2e_metric.summary() print( f"OCR - Recall: {_pct(recall['raw'])} (unicase: {_pct(recall['unicase'])}), " f"Precision: {_pct(precision['raw'])} (unicase: {_pct(precision['unicase'])}), Mean IoU: {_pct(mean_iou)}\n" ) print(f"Number of samples: {sample_idx}") print(f"Total inference time: {np.sum(timings):.2f} sec") print(f"Average inference time per sample: {np.mean(timings):.6f} sec") if args.profiling: import subprocess memray_tracker.__exit__(None, None, None) with open("yappi_profile.stats", "w") as f: yappi.get_func_stats().print_all(out=f) print("Profiling complete. Generating memray flamegraph and stats...") subprocess.run(["memray", "flamegraph", "memray_profile.bin", "-o", "memray_flamegraph.html"]) subprocess.run(["memray", "stats", "memray_profile.bin"]) def parse_args(): import argparse parser = argparse.ArgumentParser( description="OnnxTR end-to-end evaluation", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("detection", type=str, help="Text detection model to use for analysis") parser.add_argument("recognition", type=str, help="Text recognition model to use for analysis") parser.add_argument("--iou", type=float, default=0.5, help="IoU threshold to match a pair of boxes") parser.add_argument("--dataset", type=str, default="FUNSD", help="choose a dataset: FUNSD, CORD") parser.add_argument("--rotation", dest="rotation", action="store_true", help="run rotated OCR + postprocessing") parser.add_argument("-b", "--batch_size", type=int, default=32, help="batch size for recognition") parser.add_argument("--size", type=int, default=1024, help="model input size, H = W") parser.add_argument("--keep_ratio", action="store_true", help="keep the aspect ratio of the input image") parser.add_argument("--symmetric_pad", action="store_true", help="pad the image symmetrically") parser.add_argument("--samples", type=int, default=None, help="evaluate only on the N first samples") parser.add_argument( "--eval-straight", action="store_true", help="evaluate on straight pages with straight bbox (to use the quick and light metric)", ) parser.add_argument("--load_8bit", action="store_true", help="load model in 8bit mode") parser.add_argument("--force-cpu", action="store_true", help="force CPU execution") parser.add_argument("--profiling", action="store_true", help="enable profiling") args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: scripts/latency.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import argparse import time import numpy as np from onnxtr.models import classification, detection, recognition from onnxtr.models.classification.zoo import ORIENTATION_ARCHS from onnxtr.models.detection.zoo import ARCHS as DETECTION_ARCHS from onnxtr.models.recognition.zoo import ARCHS as RECOGNITION_ARCHS def main(args): if args.arch in DETECTION_ARCHS: model = detection.__dict__[args.arch](load_in_8_bit=args.load8bit) elif args.arch in RECOGNITION_ARCHS: model = recognition.__dict__[args.arch](load_in_8_bit=args.load8bit) elif args.arch in ORIENTATION_ARCHS: model = classification.__dict__[args.arch](load_in_8_bit=args.load8bit) else: raise ValueError(f"Unknown architecture {args.arch}") size = (1, *model.cfg["input_shape"]) img_tensor = np.random.rand(*size).astype(np.float32) # Warmup for _ in range(10): _ = model(img_tensor) timings = [] # Evaluation runs for _ in range(args.it): start_ts = time.perf_counter() _ = model(img_tensor) timings.append(time.perf_counter() - start_ts) _timings = np.array(timings) print(f"{args.arch} ({args.it} runs on ({size}) inputs)") print(f"mean {1000 * _timings.mean():.2f}ms, std {1000 * _timings.std():.2f}ms") if __name__ == "__main__": parser = argparse.ArgumentParser( description="OnnxTR latency benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "arch", type=str, choices=DETECTION_ARCHS + RECOGNITION_ARCHS + ORIENTATION_ARCHS, help="Architecture to benchmark", ) parser.add_argument("--load8bit", action="store_true", help="Load the 8-bit quantized model") parser.add_argument("--it", type=int, default=1000, help="Number of iterations to run") args = parser.parse_args() main(args) ================================================ FILE: scripts/quantize.py ================================================ import argparse import os import time from enum import Enum import numpy as np import onnxruntime from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantType, quantize_dynamic, quantize_static from onnxtr.io.image import read_img_as_numpy from onnxtr.models.preprocessor import PreProcessor from onnxtr.utils.geometry import shape_translate class TaskShapes(Enum): """Enum class to define the shapes of the input tensors for different tasks""" crop_orientation = (256, 256) page_orientation = (512, 512) detection = (1024, 1024) recognition = (32, 128) class CalibrationDataLoader(CalibrationDataReader): def __init__(self, calibration_image_folder: str, model_path: str, task_shape: tuple[int]): self.enum_data = None self.preprocessor = PreProcessor(output_size=task_shape, batch_size=1) self.dataset = [ self.preprocessor( np.expand_dims(read_img_as_numpy(os.path.join(calibration_image_folder, img_file)), axis=0) ) for img_file in os.listdir(calibration_image_folder)[:500] # limit to 500 images ] session = onnxruntime.InferenceSession(model_path, None) self.input_name = session.get_inputs()[0].name self.datasize = len(self.dataset) def get_next(self): if self.enum_data is None: self.enum_data = iter([ {self.input_name: shape_translate(input_data[0], format="BCHW")} for input_data in self.dataset ]) return next(self.enum_data, None) def rewind(self): self.enum_data = None def benchmark(calibration_image_folder: str, model_path: str, task_shape: tuple[int]): session = onnxruntime.InferenceSession(model_path) input_name = session.get_inputs()[0].name output_name = [output.name for output in session.get_outputs()] dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape) sample = shape_translate(dataset.dataset[0][0], format="BCHW") # take 1 sample for benchmarking total = 0.0 runs = 10 # Warming up _ = session.run(output_name, {input_name: sample}) for _ in range(runs): start = time.perf_counter() _ = session.run(output_name, {input_name: sample}) end = (time.perf_counter() - start) * 1000 total += end print(f"{end:.2f}ms") total /= runs print(f"Avg: {total:.2f}ms") def benchmark_mean_diff( calibration_image_folder: str, model_path: str, quantized_model_path: str, task_shape: tuple[int] ): """Check the mean difference between the original and quantized model""" session = onnxruntime.InferenceSession(model_path) quantized_session = onnxruntime.InferenceSession(quantized_model_path) input_name = session.get_inputs()[0].name output_name = [output.name for output in session.get_outputs()] quantized_output_name = [output.name for output in quantized_session.get_outputs()] dataset = CalibrationDataLoader(calibration_image_folder, model_path, task_shape) sample = shape_translate(dataset.dataset[0][0], format="BCHW") # take 1 sample for benchmarking output = session.run(output_name, {input_name: sample})[0] quantized_output = quantized_session.run(quantized_output_name, {input_name: sample})[0] mean_diff = np.mean(np.abs(output - quantized_output)) print(f"Mean difference between original and quantized model: {mean_diff:.2f}") def main(args): input_model_path = args.input_model calibration_dataset_path = args.calibrate_dataset if args.task == "crop_orientation": task_shape = TaskShapes.crop_orientation.value elif args.task == "page_orientation": task_shape = TaskShapes.page_orientation.value elif args.task == "detection": task_shape = TaskShapes.detection.value else: task_shape = TaskShapes.recognition.value print(f"Task: {args.task} | Task shape: {task_shape}") dr = CalibrationDataLoader(calibration_dataset_path, input_model_path, task_shape) base_model_name = input_model_path.split("/")[-1].split("-")[0] static_out_name = base_model_name + "_static_8_bit.onnx" dynamic_out_name = base_model_name + "_dynamic_8_bit.onnx" print("benchmarking fp32 model...") benchmark(calibration_dataset_path, input_model_path, task_shape) # Calibrate and quantize model # Turn off model optimization during quantization if "parseq" not in input_model_path: # Skip static quantization for Parseq print("Calibrating and quantizing model static...") try: quantize_static( input_model_path, static_out_name, dr, quant_format=args.quant_format, weight_type=QuantType.QInt8, activation_type=QuantType.QUInt8, reduce_range=True, ) except Exception: print("Error during static quantization --> Change weight_type also to QUInt8") quantize_static( input_model_path, static_out_name, dr, quant_format=args.quant_format, weight_type=QuantType.QUInt8, activation_type=QuantType.QUInt8, reduce_range=True, ) print("benchmarking static int8 model...") benchmark(calibration_dataset_path, static_out_name, task_shape) print("benchmarking mean difference between fp32 and static int8 model...") benchmark_mean_diff(calibration_dataset_path, input_model_path, static_out_name, task_shape) print("Calibrated and quantized static model saved.") if "sar" not in input_model_path: # Skip dynamic quantization for SAR_ResNet31 print("Dynamic int 8 quantization...") quantize_dynamic( input_model_path, dynamic_out_name, weight_type=QuantType.QUInt8, ) print("Dynamic model saved.") print("benchmarking dynamic int8 model...") benchmark(calibration_dataset_path, dynamic_out_name, task_shape) print("benchmarking mean difference between fp32 and dynamic int8 model...") benchmark_mean_diff(calibration_dataset_path, input_model_path, dynamic_out_name, task_shape) if __name__ == "__main__": parser = argparse.ArgumentParser( description="OnnxTR script to quantize models and benchmark the quantized models", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--input_model", required=True, help="input model") parser.add_argument( "--task", required=True, type=str, choices=["crop_orientation", "page_orientation", "detection", "recognition"], help="task shape", ) parser.add_argument( "--calibrate_dataset", type=str, required=True, help="calibration data set (word crop images for recognition, crop_orientation else page images for detection, page_orientation)", # noqa ) parser.add_argument( "--quant_format", default=QuantFormat.QDQ, type=QuantFormat.from_string, choices=list(QuantFormat), ) args = parser.parse_args() main(args) ================================================ FILE: setup.py ================================================ # Copyright (C) 2021-2026, Mindee | Felix Dittrich. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import os from pathlib import Path from setuptools import setup PKG_NAME = "onnxtr" VERSION = os.getenv("BUILD_VERSION", "0.8.2a0") if __name__ == "__main__": print(f"Building wheel {PKG_NAME}-{VERSION}") # Dynamically set the __version__ attribute cwd = Path(__file__).parent.absolute() with open(cwd.joinpath("onnxtr", "version.py"), "w", encoding="utf-8") as f: f.write(f"__version__ = '{VERSION}'\n") setup(name=PKG_NAME, version=VERSION) ================================================ FILE: tests/common/test_contrib.py ================================================ import numpy as np import pytest from onnxtr.contrib import artefacts from onnxtr.contrib.base import _BasePredictor from onnxtr.io import DocumentFile def test_base_predictor(): # check that we need to provide either a url or a model_path with pytest.raises(ValueError): _ = _BasePredictor(batch_size=2) predictor = _BasePredictor(batch_size=2, url=artefacts.default_cfgs["yolov8_artefact"]["url"]) # check that we need to implement preprocess and postprocess with pytest.raises(NotImplementedError): predictor.preprocess(np.zeros((10, 10, 3))) with pytest.raises(NotImplementedError): predictor.postprocess([np.zeros((10, 10, 3))], [[np.zeros((10, 10, 3))]]) def test_artefact_detector(mock_artefact_image_stream): doc = DocumentFile.from_images([mock_artefact_image_stream]) detector = artefacts.ArtefactDetector(batch_size=2, conf_threshold=0.5, iou_threshold=0.5) results = detector(doc) assert isinstance(results, list) and len(results) == 1 and isinstance(results[0], list) assert all(isinstance(artefact, dict) for artefact in results[0]) # check result keys assert all(key in results[0][0] for key in ["label", "confidence", "box"]) assert all(len(artefact["box"]) == 4 for artefact in results[0]) assert all(isinstance(coord, int) for box in results[0] for coord in box["box"]) assert all(isinstance(artefact["confidence"], float) for artefact in results[0]) assert all(isinstance(artefact["label"], str) for artefact in results[0]) # check results for the mock image are 9 artefacts assert len(results[0]) == 9 # test visualization non-blocking for tests detector.show(block=False) ================================================ FILE: tests/common/test_core.py ================================================ import pytest import onnxtr from onnxtr.file_utils import requires_package def test_version(): assert len(onnxtr.__version__.split(".")) == 3 def test_requires_package(): requires_package("numpy") # availbable with pytest.raises(ImportError): # not available requires_package("non_existent_package") ================================================ FILE: tests/common/test_engine_cfg.py ================================================ import gc import numpy as np import psutil import pytest from onnxruntime import RunOptions, SessionOptions from onnxtr import models from onnxtr.io import Document from onnxtr.models import EngineConfig, detection, recognition from onnxtr.models.predictor import OCRPredictor def _get_rss_mb(): gc.collect() process = psutil.Process() return process.memory_info().rss / (1024 * 1024) def _test_predictor(predictor): # Output checks assert isinstance(predictor, OCRPredictor) doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)] out = predictor(doc) # Document assert isinstance(out, Document) # The input doc has 1 page assert len(out.pages) == 1 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page]) @pytest.mark.parametrize( "det_arch, reco_arch", [[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)], ) def test_engine_cfg(det_arch, reco_arch): session_options = SessionOptions() session_options.enable_cpu_mem_arena = False engine_cfg = EngineConfig( providers=["CPUExecutionProvider"], session_options=session_options, ) assert engine_cfg.__repr__() == "EngineConfig(providers=['CPUExecutionProvider'])" # Model predictor = models.ocr_predictor( det_arch, reco_arch, det_engine_cfg=engine_cfg, reco_engine_cfg=engine_cfg, clf_engine_cfg=engine_cfg ) assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"] assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"] assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena _test_predictor(predictor) # passing model instance directly det_model = detection.__dict__[det_arch](engine_cfg=engine_cfg) assert det_model.providers == ["CPUExecutionProvider"] assert not det_model.session_options.enable_cpu_mem_arena reco_model = recognition.__dict__[reco_arch](engine_cfg=engine_cfg) assert reco_model.providers == ["CPUExecutionProvider"] assert not reco_model.session_options.enable_cpu_mem_arena predictor = models.ocr_predictor(det_model, reco_model) assert predictor.det_predictor.model.providers == ["CPUExecutionProvider"] assert not predictor.det_predictor.model.session_options.enable_cpu_mem_arena assert predictor.reco_predictor.model.providers == ["CPUExecutionProvider"] assert not predictor.reco_predictor.model.session_options.enable_cpu_mem_arena _test_predictor(predictor) det_predictor = models.detection_predictor(det_arch, engine_cfg=engine_cfg) assert det_predictor.model.providers == ["CPUExecutionProvider"] assert not det_predictor.model.session_options.enable_cpu_mem_arena reco_predictor = models.recognition_predictor(reco_arch, engine_cfg=engine_cfg) assert reco_predictor.model.providers == ["CPUExecutionProvider"] assert not reco_predictor.model.session_options.enable_cpu_mem_arena def test_cpu_memory_arena_shrinkage_enabled(): session_options = SessionOptions() session_options.enable_mem_pattern = False session_options.enable_cpu_mem_arena = True enable_shrinkage = False providers = [("CPUExecutionProvider", {"arena_extend_strategy": "kSameAsRequested"})] def enable_arena_shrinkage(run_options: "RunOptions") -> "RunOptions": if enable_shrinkage: run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu:0") assert run_options.get_run_config_entry("memory.enable_memory_arena_shrinkage") == "cpu:0" return run_options engine_cfg = EngineConfig( providers=providers, session_options=session_options, run_options_provider=enable_arena_shrinkage, ) predictor = models.ocr_predictor( det_engine_cfg=engine_cfg, reco_engine_cfg=engine_cfg, clf_engine_cfg=engine_cfg, detect_orientation=True, ) assert predictor.det_predictor.model.providers == providers assert predictor.det_predictor.model.session_options.enable_cpu_mem_arena assert predictor.reco_predictor.model.providers == providers assert predictor.reco_predictor.model.session_options.enable_cpu_mem_arena rng = np.random.RandomState(seed=42) sample = rng.randint(0, 256, (1024, 1024, 3), dtype=np.uint8) start_rss = _get_rss_mb() predictor([sample]) increased_rss = _get_rss_mb() assert increased_rss > start_rss enable_shrinkage = True predictor([sample]) decreased_rss = _get_rss_mb() assert increased_rss > decreased_rss ================================================ FILE: tests/common/test_headers.py ================================================ """Test for python files copyright headers.""" from datetime import datetime from pathlib import Path def test_copyright_header(): copyright_header = "".join([ f"# Copyright (C) {2021}-{datetime.now().year}, Mindee | Felix Dittrich.\n\n", "# This program is licensed under the Apache License 2.0.\n", "# See LICENSE or go to for full license details.\n", ]) excluded_files = ["__init__.py", "version.py"] invalid_files = [] locations = [".github", "onnxtr"] for location in locations: for source_path in Path(__file__).parent.parent.parent.joinpath(location).rglob("*.py"): if source_path.name not in excluded_files: source_path_content = source_path.read_text() if copyright_header not in source_path_content: invalid_files.append(source_path) assert len(invalid_files) == 0, f"Invalid copyright header in the following files: {invalid_files}" ================================================ FILE: tests/common/test_io.py ================================================ from io import BytesIO from pathlib import Path import numpy as np import pytest import requests from onnxtr import io def _check_doc_content(doc_tensors, num_pages): # 1 doc of 8 pages assert len(doc_tensors) == num_pages assert all(isinstance(page, np.ndarray) for page in doc_tensors) assert all(page.dtype == np.uint8 for page in doc_tensors) def test_read_pdf(mock_pdf): doc = io.read_pdf(mock_pdf) _check_doc_content(doc, 2) # Test with Path doc = io.read_pdf(Path(mock_pdf)) _check_doc_content(doc, 2) with open(mock_pdf, "rb") as f: doc = io.read_pdf(f.read()) _check_doc_content(doc, 2) # Wrong input type with pytest.raises(TypeError): _ = io.read_pdf(123) # Wrong path with pytest.raises(FileNotFoundError): _ = io.read_pdf("my_imaginary_file.pdf") def test_read_img_as_numpy(tmpdir_factory, mock_pdf): # Wrong input type with pytest.raises(TypeError): _ = io.read_img_as_numpy(123) # Non-existing file with pytest.raises(FileNotFoundError): io.read_img_as_numpy("my_imaginary_file.jpg") # Invalid image with pytest.raises(ValueError): io.read_img_as_numpy(str(mock_pdf)) # From path url = "https://doctr-static.mindee.com/models?id=v0.2.1/Grace_Hopper.jpg&src=0" file = BytesIO(requests.get(url).content) tmp_path = str(tmpdir_factory.mktemp("data").join("mock_img_file.jpg")) with open(tmp_path, "wb") as f: f.write(file.getbuffer()) # Path & stream with open(tmp_path, "rb") as f: page_stream = io.read_img_as_numpy(f.read()) for page in (io.read_img_as_numpy(tmp_path), page_stream): # Data type assert isinstance(page, np.ndarray) assert page.dtype == np.uint8 # Shape assert page.shape == (606, 517, 3) # RGB bgr_page = io.read_img_as_numpy(tmp_path, rgb_output=False) assert np.all(page == bgr_page[..., ::-1]) # Resize target_size = (200, 150) resized_page = io.read_img_as_numpy(tmp_path, target_size) assert resized_page.shape[:2] == target_size def test_read_html(): url = "https://www.google.com" pdf_stream = io.read_html(url) assert isinstance(pdf_stream, bytes) def test_document_file(mock_pdf, mock_artefact_image_stream): pages = io.DocumentFile.from_images([mock_artefact_image_stream]) _check_doc_content(pages, 1) assert isinstance(io.DocumentFile.from_pdf(mock_pdf), list) assert isinstance(io.DocumentFile.from_url("https://www.google.com"), list) def test_pdf(mock_pdf): pages = io.DocumentFile.from_pdf(mock_pdf) # As images num_pages = 2 _check_doc_content(pages, num_pages) ================================================ FILE: tests/common/test_io_elements.py ================================================ from xml.etree.ElementTree import ElementTree import numpy as np import pytest from onnxtr.io import elements def _mock_words(size=(1.0, 1.0), offset=(0, 0), confidence=0.9, objectness_score=0.9, polygons=False): box_word_elements = [ elements.Word( "hello", confidence, ((offset[0], offset[1]), (size[0] / 2 + offset[0], size[1] / 2 + offset[1])), objectness_score, {"value": 0, "confidence": None}, ), elements.Word( "world", confidence, ((size[0] / 2 + offset[0], size[1] / 2 + offset[1]), (size[0] + offset[0], size[1] + offset[1])), objectness_score, {"value": 0, "confidence": None}, ), ] polygons_word_elements = [ elements.Word( "hello", confidence, # (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2) np.array([ [offset[0], offset[1]], [size[0] / 2 + offset[0], offset[1]], [size[0] / 2 + offset[0], size[1] / 2 + offset[1]], [offset[0], size[1] / 2 + offset[1]], ]), objectness_score, {"value": 0, "confidence": None}, ), elements.Word( "world", confidence, # (x1, y1), (x2, y2), (x3, y3), (x4, y4) with shape (4, 2) np.array([ [size[0] / 2 + offset[0], size[1] / 2 + offset[1]], [size[0] + offset[0], size[1] / 2 + offset[1]], [size[0] + offset[0], size[1] + offset[1]], [size[0] / 2 + offset[0], size[1] + offset[1]], ]), objectness_score, {"value": 0, "confidence": None}, ), ] return polygons_word_elements if polygons else box_word_elements def _mock_artefacts(size=(1, 1), offset=(0, 0), confidence=0.8): sub_size = (size[0] / 2, size[1] / 2) return [ elements.Artefact( "qr_code", confidence, ((offset[0], offset[1]), (sub_size[0] + offset[0], sub_size[1] + offset[1])) ), elements.Artefact( "qr_code", confidence, ((sub_size[0] + offset[0], sub_size[1] + offset[1]), (size[0] + offset[0], size[1] + offset[1])), ), ] def _mock_lines(size=(1, 1), offset=(0, 0), polygons=False): sub_size = (size[0] / 2, size[1] / 2) return [ elements.Line(_mock_words(size=sub_size, offset=offset, polygons=polygons)), elements.Line( _mock_words(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1]), polygons=polygons) ), ] def _mock_blocks(size=(1, 1), offset=(0, 0), polygons=False): sub_size = (size[0] / 4, size[1] / 4) return [ elements.Block( _mock_lines(size=sub_size, offset=offset, polygons=polygons), _mock_artefacts(size=sub_size, offset=(offset[0] + sub_size[0], offset[1] + sub_size[1])), ), elements.Block( _mock_lines( size=sub_size, offset=(offset[0] + 2 * sub_size[0], offset[1] + 2 * sub_size[1]), polygons=polygons ), _mock_artefacts(size=sub_size, offset=(offset[0] + 3 * sub_size[0], offset[1] + 3 * sub_size[1])), ), ] def _mock_pages(block_size=(1, 1), block_offset=(0, 0), polygons=False): return [ elements.Page( np.random.randint(0, 255, (300, 200, 3), dtype=np.uint8), _mock_blocks(block_size, block_offset, polygons), 0, (300, 200), {"value": 0.0, "confidence": 1.0}, {"value": "EN", "confidence": 0.8}, ), elements.Page( np.random.randint(0, 255, (500, 1000, 3), dtype=np.uint8), _mock_blocks(block_size, block_offset), 1, (500, 1000), {"value": 0.15, "confidence": 0.8}, {"value": "FR", "confidence": 0.7}, ), ] def test_element(): with pytest.raises(KeyError): elements.Element(sub_elements=[1]) def test_word(): word_str = "hello" conf = 0.8 geom = ((0, 0), (1, 1)) objectness_score = 0.9 crop_orientation = {"value": 0, "confidence": None} word = elements.Word(word_str, conf, geom, objectness_score, crop_orientation) # Attribute checks assert word.value == word_str assert word.confidence == conf assert word.geometry == geom assert word.objectness_score == objectness_score assert word.crop_orientation == crop_orientation # Render assert word.render() == word_str # Export assert word.export() == { "value": word_str, "confidence": conf, "geometry": geom, "objectness_score": objectness_score, "crop_orientation": crop_orientation, } # Repr assert word.__repr__() == f"Word(value='hello', confidence={conf:.2})" # Class method state_dict = { "value": "there", "confidence": 0.1, "geometry": ((0, 0), (0.5, 0.5)), "objectness_score": objectness_score, "crop_orientation": crop_orientation, } word = elements.Word.from_dict(state_dict) assert word.export() == state_dict def test_line(): geom = ((0, 0), (0.5, 0.5)) objectness_score = 0.9 words = _mock_words(size=geom[1], offset=geom[0]) line = elements.Line(words) # Attribute checks assert len(line.words) == len(words) assert all(isinstance(w, elements.Word) for w in line.words) assert line.geometry == geom assert line.objectness_score == objectness_score # Render assert line.render() == "hello world" # Export assert line.export() == { "words": [w.export() for w in words], "geometry": geom, "objectness_score": objectness_score, } # Repr words_str = " " * 4 + ",\n ".join(repr(word) for word in words) + "," assert line.__repr__() == f"Line(\n (words): [\n{words_str}\n ]\n)" # Ensure that words repr does't span on several lines when there are none assert repr(elements.Line([], ((0, 0), (1, 1)))) == "Line(\n (words): []\n)" # from dict state_dict = { "words": [ { "value": "there", "confidence": 0.1, "geometry": ((0, 0), (1.0, 1.0)), "objectness_score": objectness_score, "crop_orientation": {"value": 0, "confidence": None}, } ], "geometry": ((0, 0), (1.0, 1.0)), "objectness_score": objectness_score, } line = elements.Line.from_dict(state_dict) assert line.export() == state_dict def test_artefact(): artefact_type = "qr_code" conf = 0.8 geom = ((0, 0), (1, 1)) artefact = elements.Artefact(artefact_type, conf, geom) # Attribute checks assert artefact.type == artefact_type assert artefact.confidence == conf assert artefact.geometry == geom # Render assert artefact.render() == "[QR_CODE]" # Export assert artefact.export() == {"type": artefact_type, "confidence": conf, "geometry": geom} # Repr assert artefact.__repr__() == f"Artefact(type='{artefact_type}', confidence={conf:.2})" def test_block(): geom = ((0, 0), (1, 1)) sub_size = (geom[1][0] / 2, geom[1][0] / 2) objectness_score = 0.9 lines = _mock_lines(size=sub_size, offset=geom[0]) artefacts = _mock_artefacts(size=sub_size, offset=sub_size) block = elements.Block(lines, artefacts) # Attribute checks assert len(block.lines) == len(lines) assert len(block.artefacts) == len(artefacts) assert all(isinstance(w, elements.Line) for w in block.lines) assert all(isinstance(a, elements.Artefact) for a in block.artefacts) assert block.geometry == geom # Render assert block.render() == "hello world\nhello world" # Export assert block.export() == { "lines": [line.export() for line in lines], "artefacts": [artefact.export() for artefact in artefacts], "geometry": geom, "objectness_score": objectness_score, } def test_page(): page = np.zeros((300, 200, 3), dtype=np.uint8) page_idx = 0 page_size = (300, 200) orientation = {"value": 0.0, "confidence": 0.0} language = {"value": "EN", "confidence": 0.8} blocks = _mock_blocks() page = elements.Page(page, blocks, page_idx, page_size, orientation, language) # Attribute checks assert len(page.blocks) == len(blocks) assert all(isinstance(b, elements.Block) for b in page.blocks) assert isinstance(page.page, np.ndarray) assert page.page_idx == page_idx assert page.dimensions == page_size assert page.orientation == orientation assert page.language == language # Render assert page.render() == "hello world\nhello world\n\nhello world\nhello world" # Export assert page.export() == { "blocks": [b.export() for b in blocks], "page_idx": page_idx, "dimensions": page_size, "orientation": orientation, "language": language, } # Export XML assert ( isinstance(page.export_as_xml(), tuple) and isinstance(page.export_as_xml()[0], (bytes, bytearray)) and isinstance(page.export_as_xml()[1], ElementTree) ) # Repr assert "\n".join(repr(page).split("\n")[:2]) == f"Page(\n dimensions={page_size!r}" # Show page.show(block=False) # Synthesize img = page.synthesize() assert isinstance(img, np.ndarray) assert img.shape == (*page_size, 3) def test_document(): pages = _mock_pages() doc = elements.Document(pages) # Attribute checks assert len(doc.pages) == len(pages) assert all(isinstance(p, elements.Page) for p in doc.pages) # Render page_export = "hello world\nhello world\n\nhello world\nhello world" assert doc.render() == f"{page_export}\n\n\n\n{page_export}" # Export assert doc.export() == {"pages": [p.export() for p in pages]} # Export XML xml_output = doc.export_as_xml() assert isinstance(xml_output, list) and len(xml_output) == len(pages) # Check that the XML is well-formed in hOCR format for xml_bytes, xml_tree in xml_output: assert isinstance(xml_bytes, bytes) assert isinstance(xml_tree, ElementTree) root = xml_tree.getroot() assert root.tag == "html" assert root[0].tag == "head" assert root[1].tag == "body" assert root[1][0].tag == "div" and root[1][0].attrib["class"] == "ocr_page" for block in root[1][0]: assert block.tag == "div" and block.attrib["class"] == "ocr_carea" assert block[0].tag == "p" and block[0].attrib["class"] == "ocr_par" for line in block[0]: assert line.tag == "span" and line.attrib["class"] == "ocr_line" for word in line: assert word.tag == "span" and word.attrib["class"] == "ocrx_word" # Show doc.show(block=False) # Synthesize img_list = doc.synthesize() assert isinstance(img_list, list) and len(img_list) == len(pages) ================================================ FILE: tests/common/test_models.py ================================================ from io import BytesIO import cv2 import numpy as np import pytest import requests from onnxtr.io import reader from onnxtr.models._utils import estimate_orientation, get_language from onnxtr.utils import geometry @pytest.fixture(scope="function") def mock_image(tmpdir_factory): url = "https://doctr-static.mindee.com/models?id=v0.2.1/bitmap30.png&src=0" file = BytesIO(requests.get(url).content) tmp_path = str(tmpdir_factory.mktemp("data").join("mock_bitmap.jpg")) with open(tmp_path, "wb") as f: f.write(file.getbuffer()) image = reader.read_img_as_numpy(tmp_path) return image @pytest.fixture(scope="function") def mock_bitmap(mock_image): bitmap = np.squeeze(cv2.cvtColor(mock_image, cv2.COLOR_BGR2GRAY) / 255.0) bitmap = np.expand_dims(bitmap, axis=-1) return bitmap def test_estimate_orientation(mock_image, mock_bitmap, mock_tilted_payslip): assert estimate_orientation(mock_image * 0) == 0 # test binarized image angle = estimate_orientation(mock_bitmap) assert abs(angle) - 30 < 1.0 angle = estimate_orientation(mock_bitmap * 255) assert abs(angle) - 30.0 < 1.0 angle = estimate_orientation(mock_image) assert abs(angle) - 30.0 < 1.0 rotated = geometry.rotate_image(mock_image, angle) angle_rotated = estimate_orientation(rotated) assert abs(angle_rotated) == 0 mock_tilted_payslip = reader.read_img_as_numpy(mock_tilted_payslip) assert estimate_orientation(mock_tilted_payslip) == -30 rotated = geometry.rotate_image(mock_tilted_payslip, -30, expand=True) angle_rotated = estimate_orientation(rotated) assert abs(angle_rotated) < 1.0 with pytest.raises(AssertionError): estimate_orientation(np.ones((10, 10, 10))) # test with general_page_orientation assert estimate_orientation(mock_bitmap, (90, 0.9)) in range(140, 160) rotated = geometry.rotate_image(mock_tilted_payslip, -30) assert estimate_orientation(rotated, (0, 0.9)) in range(-10, 10) assert estimate_orientation(mock_image, (0, 0.9)) - 30 < 1.0 # Aspect Ratio Independence (Portrait vs Landscape) # Pad the tilted image to be very tall (Portrait) portrait_img = cv2.copyMakeBorder(mock_tilted_payslip, 500, 500, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0]) # Pad the tilted image to be very wide (Landscape) landscape_img = cv2.copyMakeBorder(mock_tilted_payslip, 0, 0, 500, 500, cv2.BORDER_CONSTANT, value=[0, 0, 0]) assert abs(estimate_orientation(portrait_img) - (-30)) <= 1.0 assert abs(estimate_orientation(landscape_img) - (-30)) <= 1.0 # Perpendicular Noise Test vertical_noise = np.zeros((1000, 1000, 3), dtype=np.uint8) cv2.line(vertical_noise, (500, 100), (500, 900), (255, 255, 255), 10) assert estimate_orientation(vertical_noise) == 0 def test_get_lang(): sentence = "This is a test sentence." expected_lang = "en" threshold_prob = 0.99 lang = get_language(sentence) assert lang[0] == expected_lang assert lang[1] > threshold_prob lang = get_language("a") assert lang[0] == "unknown" assert lang[1] == 0.0 ================================================ FILE: tests/common/test_models_builder.py ================================================ import numpy as np import pytest from onnxtr.io import Document from onnxtr.models import builder words_per_page = 10 def test_documentbuilder(): num_pages = 2 # Don't resolve lines doc_builder = builder.DocumentBuilder(resolve_lines=False, resolve_blocks=False) pages = [np.zeros((100, 200, 3))] * num_pages boxes = np.random.rand(words_per_page, 6) # array format boxes[:2] *= boxes[2:4] objectness_scores = np.array([0.9] * words_per_page) # Arg consistency check with pytest.raises(ValueError): doc_builder( pages, [boxes, boxes], [objectness_scores, objectness_scores], [("hello", 1.0)] * 3, [(100, 200), (100, 200)], [{"value": 0, "confidence": None}] * 3, ) out = doc_builder( pages, [boxes, boxes], [objectness_scores, objectness_scores], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)], [[{"value": 0, "confidence": None}] * words_per_page] * num_pages, ) assert isinstance(out, Document) assert len(out.pages) == num_pages assert all(isinstance(page.page, np.ndarray) for page in out.pages) and all( page.page.shape == (100, 200, 3) for page in out.pages ) # 1 Block & 1 line per page assert len(out.pages[0].blocks) == 1 and len(out.pages[0].blocks[0].lines) == 1 assert len(out.pages[0].blocks[0].lines[0].words) == words_per_page # Resolve lines doc_builder = builder.DocumentBuilder(resolve_lines=True, resolve_blocks=True) out = doc_builder( pages, [boxes, boxes], [objectness_scores, objectness_scores], [[("hello", 1.0)] * words_per_page] * num_pages, [(100, 200), (100, 200)], [[{"value": 0, "confidence": None}] * words_per_page] * num_pages, ) # No detection boxes = np.zeros((0, 4)) objectness_scores = np.zeros([0]) out = doc_builder( pages, [boxes, boxes], [objectness_scores, objectness_scores], [[], []], [(100, 200), (100, 200)], [[]] ) assert len(out.pages[0].blocks) == 0 # Rotated boxes to export as straight boxes boxes = np.array([ [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]], [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]], ]) objectness_scores = np.array([0.99, 0.99]) doc_builder_2 = builder.DocumentBuilder(resolve_blocks=False, resolve_lines=False, export_as_straight_boxes=True) out = doc_builder_2( [np.zeros((100, 100, 3))], [boxes], [objectness_scores], [[("hello", 0.99), ("word", 0.99)]], [(100, 100)], [[{"value": 0, "confidence": None}] * 2], ) assert out.pages[0].blocks[0].lines[0].words[-1].geometry == ((0.45, 0.5), (0.6, 0.65)) assert out.pages[0].blocks[0].lines[0].words[-1].objectness_score == 0.99 # Repr assert ( repr(doc_builder) == "DocumentBuilder(resolve_lines=True, " "resolve_blocks=True, paragraph_break=0.035, export_as_straight_boxes=False)" ) @pytest.mark.parametrize( "input_boxes, sorted_idxs", [ [[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]], # vertical [[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [2, 1, 0]], # diagonal [[[0, 0.5, 0.1, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [0, 1, 2]], # same line, 2p [[[0, 0.5, 0.1, 0.6], [0.2, 0.49, 0.35, 0.59], [0.8, 0.52, 0.9, 0.63]], [0, 1, 2]], # ~same line [[[0, 0.3, 0.4, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]], # 2 lines [[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [0, 1, 2]], # 2 lines [ [ [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]], [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]], ], [0, 1], ], # rot ], ) def test_sort_boxes(input_boxes, sorted_idxs): doc_builder = builder.DocumentBuilder() assert doc_builder._sort_boxes(np.asarray(input_boxes))[0].tolist() == sorted_idxs @pytest.mark.parametrize( "input_boxes, lines", [ [[[0, 0.5, 0.1, 0.6], [0, 0.3, 0.2, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]], # vertical [[[0.7, 0.5, 0.85, 0.6], [0.2, 0.3, 0.4, 0.4], [0, 0, 0.1, 0.1]], [[2], [1], [0]]], # diagonal [[[0, 0.5, 0.14, 0.6], [0.15, 0.5, 0.25, 0.6], [0.5, 0.5, 0.6, 0.6]], [[0, 1], [2]]], # same line, 2p [[[0, 0.5, 0.18, 0.6], [0.2, 0.48, 0.35, 0.58], [0.8, 0.52, 0.9, 0.63]], [[0, 1], [2]]], # ~same line [[[0, 0.3, 0.48, 0.45], [0.5, 0.28, 0.75, 0.42], [0, 0.45, 0.1, 0.55]], [[0, 1], [2]]], # 2 lines [[[0, 0.3, 0.4, 0.35], [0.75, 0.28, 0.95, 0.42], [0, 0.45, 0.1, 0.55]], [[0], [1], [2]]], # 2 lines [ [ [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]], [[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]], ], [[0], [1]], ], # rot ], ) def test_resolve_lines(input_boxes, lines): doc_builder = builder.DocumentBuilder() assert doc_builder._resolve_lines(np.asarray(input_boxes)) == lines ================================================ FILE: tests/common/test_models_classification.py ================================================ import cv2 import numpy as np import pytest from onnxtr.models import classification, detection from onnxtr.models.classification.predictor import OrientationPredictor from onnxtr.models.engine import Engine @pytest.mark.parametrize( "arch_name, input_shape", [ ["mobilenet_v3_small_crop_orientation", (256, 256, 3)], ["mobilenet_v3_small_page_orientation", (512, 512, 3)], ], ) def test_classification_models(arch_name, input_shape): batch_size = 8 model = classification.__dict__[arch_name]() assert isinstance(model, Engine) input_tensor = np.random.rand(batch_size, *input_shape).astype(np.float32) out = model(input_tensor) assert isinstance(out, np.ndarray) assert out.shape == (8, 4) @pytest.mark.parametrize( "arch_name", [ "mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation", ], ) def test_classification_zoo(arch_name): if "crop" in arch_name: batch_size = 16 input_array = np.random.rand(batch_size, 3, 256, 256).astype(np.float32) # Model predictor = classification.zoo.crop_orientation_predictor(arch_name) with pytest.raises(ValueError): predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model") else: batch_size = 2 input_array = np.random.rand(batch_size, 3, 512, 512).astype(np.float32) # Model predictor = classification.zoo.page_orientation_predictor(arch_name) with pytest.raises(ValueError): predictor = classification.zoo.page_orientation_predictor(arch="wrong_model") # object check assert isinstance(predictor, OrientationPredictor) out = predictor(input_array) class_idxs, classes, confs = out[0], out[1], out[2] assert isinstance(class_idxs, list) and len(class_idxs) == batch_size assert isinstance(classes, list) and len(classes) == batch_size assert isinstance(confs, list) and len(confs) == batch_size assert all(isinstance(pred, int) for pred in class_idxs) assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes) assert all(isinstance(pred, float) for pred in confs) @pytest.mark.parametrize("quantized", [False, True]) def test_crop_orientation_model(mock_text_box, quantized): text_box_0 = cv2.imread(mock_text_box) # rotates counter-clockwise text_box_270 = np.rot90(text_box_0, 1) text_box_180 = np.rot90(text_box_0, 2) text_box_90 = np.rot90(text_box_0, 3) classifier = classification.crop_orientation_predictor( "mobilenet_v3_small_crop_orientation", load_in_8_bit=quantized ) assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] # 270 degrees is equivalent to -90 degrees assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) # Test custom model loading classifier = classification.crop_orientation_predictor( classification.mobilenet_v3_small_crop_orientation(load_in_8_bit=quantized) ) assert isinstance(classifier, OrientationPredictor) with pytest.raises(ValueError): _ = classification.crop_orientation_predictor(detection.db_resnet34()) # Test with disabled predictor classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation", disabled=True) assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [ [0, 0, 0, 0], [0, 0, 0, 0], [1.0, 1.0, 1.0, 1.0], ] @pytest.mark.parametrize("quantized", [False, True]) def test_page_orientation_model(mock_payslip, quantized): text_box_0 = cv2.imread(mock_payslip) # rotates counter-clockwise text_box_270 = np.rot90(text_box_0, 1) text_box_180 = np.rot90(text_box_0, 2) text_box_90 = np.rot90(text_box_0, 3) classifier = classification.crop_orientation_predictor( "mobilenet_v3_small_page_orientation", load_in_8_bit=quantized ) assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] # 270 degrees is equivalent to -90 degrees assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) # Test custom model loading classifier = classification.page_orientation_predictor( classification.mobilenet_v3_small_page_orientation(load_in_8_bit=quantized) ) assert isinstance(classifier, OrientationPredictor) with pytest.raises(ValueError): _ = classification.page_orientation_predictor(detection.db_resnet34()) # Test with disabled predictor classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation", disabled=True) assert classifier([text_box_0, text_box_270, text_box_180, text_box_90]) == [ [0, 0, 0, 0], [0, 0, 0, 0], [1.0, 1.0, 1.0, 1.0], ] ================================================ FILE: tests/common/test_models_detection.py ================================================ import numpy as np import pytest from onnxtr.models import detection from onnxtr.models.detection.postprocessor.base import GeneralDetectionPostProcessor from onnxtr.models.detection.predictor import DetectionPredictor from onnxtr.models.engine import Engine def test_postprocessor(): postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=True) r_postprocessor = GeneralDetectionPostProcessor(assume_straight_pages=False) with pytest.raises(AssertionError): postprocessor(np.random.rand(2, 512, 512).astype(np.float32)) mock_batch = np.random.rand(2, 512, 512, 1).astype(np.float32) out = postprocessor(mock_batch) r_out = r_postprocessor(mock_batch) # Batch composition assert isinstance(out, list) assert len(out) == 2 assert all(isinstance(sample, list) and all(isinstance(v, np.ndarray) for v in sample) for sample in out) assert all(all(v.shape[1] == 5 for v in sample) for sample in out) assert all(all(v.shape[1] == 5 and v.shape[2] == 2 for v in sample) for sample in r_out) # Relative coords assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in out) assert all(all(np.all(np.logical_and(v[:, :4] >= 0, v[:, :4] <= 1)) for v in sample) for sample in r_out) # Repr assert repr(postprocessor) == "GeneralDetectionPostProcessor(bin_thresh=0.1, box_thresh=0.1)" # Edge case when the expanded points of the polygon has two lists issue_points = np.array( [ [869, 561], [923, 581], [925, 595], [915, 583], [889, 583], [905, 593], [882, 601], [901, 595], [904, 604], [876, 608], [915, 614], [911, 605], [925, 601], [930, 616], [911, 617], [900, 636], [931, 637], [904, 649], [932, 649], [932, 628], [918, 627], [934, 624], [935, 573], [909, 569], [934, 562], ], dtype=np.int32, ) out = postprocessor.polygon_to_box(issue_points) r_out = r_postprocessor.polygon_to_box(issue_points) assert isinstance(out, tuple) and len(out) == 4 assert isinstance(r_out, np.ndarray) and r_out.shape == (4, 2) @pytest.mark.parametrize("quantized", [False, True]) @pytest.mark.parametrize( "arch_name, input_shape, output_size, out_prob", [ ["db_resnet34", (1024, 1024, 3), (1024, 1024, 1), True], ["db_resnet50", (1024, 1024, 3), (1024, 1024, 1), True], ["db_mobilenet_v3_large", (1024, 1024, 3), (1024, 1024, 1), True], ["linknet_resnet18", (1024, 1024, 3), (1024, 1024, 1), True], ["linknet_resnet34", (1024, 1024, 3), (1024, 1024, 1), True], ["linknet_resnet50", (1024, 1024, 3), (1024, 1024, 1), True], ["fast_tiny", (1024, 1024, 3), (1024, 1024, 1), True], ["fast_small", (1024, 1024, 3), (1024, 1024, 1), True], ["fast_base", (1024, 1024, 3), (1024, 1024, 1), True], ], ) def test_detection_models(arch_name, input_shape, output_size, out_prob, quantized): batch_size = 2 model = detection.__dict__[arch_name](load_in_8_bit=quantized) assert isinstance(model, Engine) input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) out = model(input_array, return_model_output=True) assert isinstance(out, dict) assert len(out) == 2 # Check proba map assert out["out_map"].shape == (batch_size, *output_size) assert out["out_map"].dtype == np.float32 if out_prob: assert np.all(out["out_map"] >= 0) and np.all(out["out_map"] <= 1) # Check boxes for boxes_list in out["preds"]: for boxes in boxes_list: assert boxes.shape[1] == 5 assert np.all(boxes[:, :2] < boxes[:, 2:4]) assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) @pytest.mark.parametrize("quantized", [False, True]) @pytest.mark.parametrize( "arch_name", [ "db_resnet34", "db_resnet50", "db_mobilenet_v3_large", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50", "fast_tiny", "fast_small", "fast_base", ], ) def test_detection_zoo(arch_name, quantized): # Model predictor = detection.zoo.detection_predictor( arch_name, load_in_8_bit=quantized, preserve_aspect_ratio=False, symmetric_pad=False ) # object check assert isinstance(predictor, DetectionPredictor) input_array = np.random.rand(2, 3, 1024, 1024).astype(np.float32) out, seq_maps = predictor(input_array, return_maps=True) assert isinstance(out, list) for box in out: assert isinstance(box, np.ndarray) assert box.shape[1] == 5 assert np.all(box[:, :2] < box[:, 2:4]) assert np.all(box[:, :4] >= 0) and np.all(box[:, :4] <= 1) assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps) assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps) # check that all values in the seq_maps are between 0 and 1 assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps) ================================================ FILE: tests/common/test_models_detection_utils.py ================================================ import numpy as np import pytest from onnxtr.models.detection._utils import _remove_padding @pytest.mark.parametrize("pages", [[np.zeros((1000, 1000))], [np.zeros((1000, 2000))], [np.zeros((2000, 1000))]]) @pytest.mark.parametrize("preserve_aspect_ratio", [True, False]) @pytest.mark.parametrize("symmetric_pad", [True, False]) @pytest.mark.parametrize("assume_straight_pages", [True, False]) def test_remove_padding(pages, preserve_aspect_ratio, symmetric_pad, assume_straight_pages): h, w = pages[0].shape # straight pages test cases if assume_straight_pages: loc_preds = [np.array([[0.7, 0.1, 0.7, 0.2]])] if h == w or not preserve_aspect_ratio: expected = loc_preds else: if symmetric_pad: if h > w: expected = [np.array([[0.9, 0.1, 0.9, 0.2]])] else: expected = [np.array([[0.7, 0.0, 0.7, 0.0]])] else: if h > w: expected = [np.array([[1.0, 0.1, 1.0, 0.2]])] else: expected = [np.array([[0.7, 0.2, 0.7, 0.4]])] # non-straight pages test cases else: loc_preds = [np.array([[[0.9, 0.1], [0.9, 0.2], [0.8, 0.2], [0.8, 0.2]]])] if h == w or not preserve_aspect_ratio: expected = loc_preds else: if symmetric_pad: if h > w: expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])] else: expected = [np.array([[[0.9, 0.0], [0.9, 0.0], [0.8, 0.0], [0.8, 0.0]]])] else: if h > w: expected = [np.array([[[1.0, 0.1], [1.0, 0.2], [1.0, 0.2], [1.0, 0.2]]])] else: expected = [np.array([[[0.9, 0.2], [0.9, 0.4], [0.8, 0.4], [0.8, 0.4]]])] result = _remove_padding(pages, loc_preds, preserve_aspect_ratio, symmetric_pad, assume_straight_pages) for res, exp in zip(result, expected): assert np.allclose(res, exp) ================================================ FILE: tests/common/test_models_factory.py ================================================ import json import os import tempfile import pytest from onnxtr import models from onnxtr.models.factory import _save_model_and_config_for_hf_hub, from_hub, push_to_hf_hub AVAILABLE_ARCHS = { "classification": models.classification.zoo.ORIENTATION_ARCHS, "detection": models.detection.zoo.ARCHS, "recognition": models.recognition.zoo.ARCHS, } def test_push_to_hf_hub(): model = models.classification.mobilenet_v3_small_crop_orientation() with pytest.raises(ValueError): # run_config and/or arch must be specified push_to_hf_hub(model, model_name="test", task="classification") with pytest.raises(ValueError): # task must be one of classification, detection, recognition, obj_detection push_to_hf_hub(model, model_name="test", task="invalid_task", arch="mobilenet_v3_small") with pytest.raises(ValueError): # arch not in available architectures for task push_to_hf_hub(model, model_name="test", task="detection", arch="crnn_mobilenet_v3_large") def test_models_huggingface_hub(tmpdir): with tempfile.TemporaryDirectory() as tmp_dir: for task_name, archs in AVAILABLE_ARCHS.items(): for arch_name in archs: model = models.__dict__[task_name].__dict__[arch_name]() _save_model_and_config_for_hf_hub(model, arch=arch_name, task=task_name, save_dir=tmp_dir) assert hasattr(model, "cfg") assert len(os.listdir(tmp_dir)) == 2 assert os.path.exists(tmp_dir + "/model.onnx") assert os.path.exists(tmp_dir + "/config.json") tmp_config = json.load(open(tmp_dir + "/config.json")) assert arch_name == tmp_config["arch"] assert task_name == tmp_config["task"] assert all(key in model.cfg.keys() for key in tmp_config.keys()) # test from hub hub_model = from_hub(repo_id="Felix92/onnxtr-{}".format(arch_name).replace("_", "-")) assert isinstance(hub_model, type(model)) ================================================ FILE: tests/common/test_models_preprocessor.py ================================================ import numpy as np import pytest from onnxtr.models.preprocessor import PreProcessor @pytest.mark.parametrize( "batch_size, output_size, input_tensor, expected_batches, expected_value", [ [2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, 0.5], # numpy uint8 [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, 0.5], # numpy fp32 [2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, 0.5], # list of numpy uint8 [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, 0.5], # list of numpy fp32 list of tf fp32 ], ) def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value): processor = PreProcessor(output_size, batch_size) # Invalid input type with pytest.raises(TypeError): processor(42) # 4D check with pytest.raises(AssertionError): processor(np.full((256, 128, 3), 255, dtype=np.uint8)) with pytest.raises(TypeError): processor(np.full((1, 256, 128, 3), 255, dtype=np.int32)) # 3D check with pytest.raises(AssertionError): processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)]) with pytest.raises(TypeError): processor([np.full((256, 128, 3), 255, dtype=np.int32)]) out = processor(input_tensor) assert isinstance(out, list) and len(out) == expected_batches assert all(isinstance(b, np.ndarray) for b in out) assert all(b.dtype == np.float32 for b in out) assert all(b.shape[1:3] == output_size for b in out) assert all(np.all(b == expected_value) for b in out) assert len(repr(processor).split("\n")) == 4 ================================================ FILE: tests/common/test_models_recognition.py ================================================ import numpy as np import pytest from onnxtr.models import recognition from onnxtr.models.engine import Engine from onnxtr.models.recognition.core import RecognitionPostProcessor from onnxtr.models.recognition.predictor import RecognitionPredictor from onnxtr.models.recognition.predictor._utils import remap_preds, split_crops from onnxtr.utils.vocabs import VOCABS def test_recognition_postprocessor(): mock_vocab = VOCABS["french"] post_processor = RecognitionPostProcessor(mock_vocab) assert post_processor.extra_repr() == f"vocab_size={len(mock_vocab)}" assert post_processor.vocab == mock_vocab assert post_processor._embedding == list(mock_vocab) + [""] @pytest.mark.parametrize( "crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops", [ # No split required [[np.zeros((32, 128, 3), dtype=np.uint8)], 8, 4, 0.5, True, 1], [[np.zeros((3, 32, 128), dtype=np.uint8)], 8, 4, 0.5, False, 1], # Split required [[np.zeros((32, 1024, 3), dtype=np.uint8)], 8, 6, 0.5, True, 10], [[np.zeros((3, 32, 1024), dtype=np.uint8)], 8, 6, 0.5, False, 10], ], ) def test_split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last, num_crops): new_crops, crop_map, should_remap = split_crops(crops, max_ratio, target_ratio, target_overlap_ratio, channels_last) assert len(new_crops) == num_crops assert len(crop_map) == len(crops) assert should_remap == (len(crops) != len(new_crops)) @pytest.mark.parametrize( "preds, crop_map, split_overlap_ratio, pred", [ # Nothing to remap ([("hello", 0.5)], [0], 0.5, [("hello", 0.5)]), # Merge ([("hellowo", 0.5), ("loworld", 0.6)], [(0, 2, 0.5)], 0.5, [("helloworld", 0.55)]), ], ) def test_remap_preds(preds, crop_map, split_overlap_ratio, pred): preds = remap_preds(preds, crop_map, split_overlap_ratio) assert len(preds) == len(pred) assert preds == pred assert all(isinstance(pred, tuple) for pred in preds) assert all(isinstance(pred[0], str) and isinstance(pred[1], float) for pred in preds) @pytest.mark.parametrize( "inputs, max_ratio, target_ratio, target_overlap_ratio, expected_remap_required, expected_len, expected_shape, " "expected_crop_map, channels_last", [ # Don't split ([np.zeros((32, 32 * 4, 3))], 4, 4, 0.5, False, 1, (32, 128, 3), 0, True), # Split needed ([np.zeros((32, 32 * 4 + 1, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.9921875), True), # Larger max ratio prevents split ([np.zeros((32, 32 * 8, 3))], 8, 4, 0.5, False, 1, (32, 256, 3), 0, True), # Half-overlap, two crops ([np.zeros((32, 128 + 64, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True), # Half-overlap, two crops, channels first ([np.zeros((3, 32, 128 + 64))], 4, 4, 0.5, True, 2, (3, 32, 128), (0, 2, 0.5), False), # Half-overlap with small max_ratio forces split ([np.zeros((32, 128 + 64, 3))], 2, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.5), True), # > half last overlap ratio ([np.zeros((32, 128 + 32, 3))], 4, 4, 0.5, True, 2, (32, 128, 3), (0, 2, 0.75), True), # 3 crops, half last overlap ([np.zeros((32, 128 + 128, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.5), True), # 3 crops, > half last overlap ([np.zeros((32, 128 + 64 + 32, 3))], 4, 4, 0.5, True, 3, (32, 128, 3), (0, 3, 0.75), True), # Split into larger crops ([np.zeros((32, 192 * 2, 3))], 4, 6, 0.5, True, 3, (32, 192, 3), (0, 3, 0.5), True), # Test fallback for empty splits ([np.empty((1, 0, 3))], -1, 4, 0.5, False, 1, (1, 0, 3), (0), True), ], ) def test_split_crops_cases( inputs, max_ratio, target_ratio, target_overlap_ratio, expected_remap_required, expected_len, expected_shape, expected_crop_map, channels_last, ): new_crops, crop_map, _remap_required = split_crops( inputs, max_ratio=max_ratio, target_ratio=target_ratio, split_overlap_ratio=target_overlap_ratio, channels_last=channels_last, ) assert _remap_required == expected_remap_required assert len(new_crops) == expected_len assert len(crop_map) == 1 if expected_remap_required: assert isinstance(crop_map[0], tuple) assert crop_map[0] == expected_crop_map for crop in new_crops: assert crop.shape == expected_shape @pytest.mark.parametrize( "split_overlap_ratio", [ # lower bound 0.0, # upper bound 1.0, ], ) def test_invalid_split_overlap_ratio(split_overlap_ratio): with pytest.raises(ValueError): split_crops( [np.zeros((32, 32 * 4, 3))], max_ratio=4, target_ratio=4, split_overlap_ratio=split_overlap_ratio, ) @pytest.mark.parametrize("quantized", [False, True]) @pytest.mark.parametrize( "arch_name, input_shape", [ ["crnn_vgg16_bn", (32, 128, 3)], ["crnn_mobilenet_v3_small", (32, 128, 3)], ["crnn_mobilenet_v3_large", (32, 128, 3)], ["sar_resnet31", (32, 128, 3)], ["master", (32, 128, 3)], ["vitstr_small", (32, 128, 3)], ["vitstr_base", (32, 128, 3)], ["parseq", (32, 128, 3)], ["viptr_tiny", (32, 128, 3)], ], ) def test_recognition_models(arch_name, input_shape, quantized): mock_vocab = VOCABS["french"] batch_size = 4 model = recognition.__dict__[arch_name](load_in_8_bit=quantized) assert isinstance(model, Engine) input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) out = model(input_array, return_model_output=True) assert isinstance(out, dict) assert len(out) == 2 assert isinstance(out["preds"], list) assert len(out["preds"]) == batch_size assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) assert isinstance(out["out_map"], np.ndarray) assert out["out_map"].shape[0] == 4 # test model post processor post_processor = model.postprocessor decoded = post_processor(np.random.rand(2, len(mock_vocab), 30).astype(np.float32)) assert isinstance(decoded, list) assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded) assert len(decoded) == 2 assert all(char in mock_vocab for word, _ in decoded for char in word) # Testing with a fixed batch size model = recognition.__dict__[arch_name]() model.fixed_batch_size = 1 assert isinstance(model, Engine) input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) out = model(input_array, return_model_output=True) assert isinstance(out, dict) assert len(out) == 2 assert isinstance(out["preds"], list) assert len(out["preds"]) == batch_size assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) assert isinstance(out["out_map"], np.ndarray) assert out["out_map"].shape[0] == 4 @pytest.mark.parametrize("quantized", [False, True]) @pytest.mark.parametrize( "input_shape", [ (128, 128, 3), (32, 1024, 3), # test case split wide crops ], ) @pytest.mark.parametrize( "arch_name", [ "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large", "sar_resnet31", "master", "vitstr_small", "vitstr_base", "parseq", "viptr_tiny", ], ) def test_recognition_zoo(arch_name, input_shape, quantized): batch_size = 2 # Model predictor = recognition.zoo.recognition_predictor(arch_name, load_in_8_bit=quantized) # object check assert isinstance(predictor, RecognitionPredictor) input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) out = predictor(input_array) assert isinstance(out, list) and len(out) == batch_size assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out) with pytest.raises(ValueError): _ = recognition.zoo.recognition_predictor(arch="wrong_model") ================================================ FILE: tests/common/test_models_recognition_utils.py ================================================ import pytest from onnxtr.models.recognition.utils import merge_multi_strings, merge_strings @pytest.mark.parametrize( "a, b, overlap_ratio, merged", [ # Last character of first string and first of last string will be cropped when merging - indicated by X ("abcX", "Xdef", 0.5, "abcdef"), ("abcdX", "Xdef", 0.75, "abcdef"), ("abcdeX", "Xdef", 0.9, "abcdef"), ("abcdefX", "Xdef", 0.9, "abcdef"), # Long repetition - four of seven characters in the second string are in the estimated overlap # X-chars will be cropped during merge, because they might be cut off during splitting of corresponding image ("abccccX", "Xcccccc", 4 / 7, "abcccccccc"), ("abc", "", 0.5, "abc"), ("", "abc", 0.5, "abc"), ("a", "b", 0.5, "ab"), # No overlap of input strings after crop ("abcdX", "Xefghi", 0.33, "abcdefghi"), # No overlap of input strings after crop with shorter inputs ("bcdX", "Xefgh", 0.4, "bcdefgh"), # No overlap of input strings after crop with even shorter inputs ("cdX", "Xefg", 0.5, "cdefg"), # Full overlap of input strings ("abcdX", "Xbcde", 1.0, "abcde"), # One repetition within inputs ("ababX", "Xabde", 0.8, "ababde"), # Multiple repetitions within inputs ("ababX", "Xabab", 0.8, "ababab"), # Multiple repetitions within inputs with shorter input strings ("abaX", "Xbab", 1.0, "abab"), # Longer multiple repetitions within inputs with half overlap ("cabababX", "Xabababc", 0.5, "cabababababc"), # Longer multiple repetitions within inputs with full overlap ("ababaX", "Xbabab", 1.0, "ababab"), # One different letter in overlap ("one_differon", "ferent_letter", 0.5, "one_differont_letter"), # First string empty after crop ("-", "test", 0.9, "-test"), # Second string empty after crop ("test", "-", 0.9, "test-"), ], ) def test_merge_strings(a, b, overlap_ratio, merged): assert merged == merge_strings(a, b, overlap_ratio) @pytest.mark.parametrize( "seq_list, overlap_ratio, last_overlap_ratio, merged", [ # One character at each conjunction point will be cropped when merging - indicated by X (["abcX", "Xdef"], 0.5, 0.5, "abcdef"), (["abcdX", "XdefX", "XefghX", "Xijk"], 0.5, 0.5, "abcdefghijk"), (["abcdX", "XdefX", "XefghiX", "Xaijk"], 0.5, 0.8, "abcdefghijk"), (["aaaa", "aaab", "aabc"], 0.8, 0.3, "aaaabc"), # Handle empty input ([], 0.5, 0.4, ""), ], ) def test_merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio, merged): assert merged == merge_multi_strings(seq_list, overlap_ratio, last_overlap_ratio) ================================================ FILE: tests/common/test_models_zoo.py ================================================ import numpy as np import pytest from onnxtr import models from onnxtr.io import Document, DocumentFile from onnxtr.models import detection, recognition from onnxtr.models.classification import mobilenet_v3_small_crop_orientation, mobilenet_v3_small_page_orientation from onnxtr.models.classification.zoo import crop_orientation_predictor, page_orientation_predictor from onnxtr.models.detection.predictor import DetectionPredictor from onnxtr.models.detection.zoo import ARCHS as DET_ARCHS from onnxtr.models.detection.zoo import detection_predictor from onnxtr.models.predictor import OCRPredictor from onnxtr.models.preprocessor import PreProcessor from onnxtr.models.recognition.predictor import RecognitionPredictor from onnxtr.models.recognition.zoo import ARCHS as RECO_ARCHS from onnxtr.models.recognition.zoo import recognition_predictor from onnxtr.models.zoo import ocr_predictor from onnxtr.utils.repr import NestedObject # Create a dummy callback class _DummyCallback: def __call__(self, loc_preds): return loc_preds @pytest.mark.parametrize( "assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation", [ [True, False, False, False], [False, False, True, True], [True, True, False, False], [False, True, True, True], [True, False, True, False], ], ) def test_ocrpredictor( mock_pdf, assume_straight_pages, straighten_pages, disable_page_orientation, disable_crop_orientation ): det_bsize = 4 det_predictor = DetectionPredictor( PreProcessor(output_size=(1024, 1024), batch_size=det_bsize), detection.db_mobilenet_v3_large(assume_straight_pages=assume_straight_pages), ) reco_bsize = 16 reco_predictor = RecognitionPredictor( PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), recognition.crnn_vgg16_bn(), ) doc = DocumentFile.from_pdf(mock_pdf) predictor = OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=assume_straight_pages, straighten_pages=straighten_pages, detect_orientation=True, detect_language=True, resolve_lines=True, resolve_blocks=True, disable_page_orientation=disable_page_orientation, disable_crop_orientation=disable_crop_orientation, ) assert ( predictor._page_orientation_disabled if disable_page_orientation else not predictor._page_orientation_disabled ) assert ( predictor._crop_orientation_disabled if disable_crop_orientation else not predictor._crop_orientation_disabled ) if assume_straight_pages: assert predictor.crop_orientation_predictor is None if predictor.detect_orientation or predictor.straighten_pages: assert isinstance(predictor.page_orientation_predictor, NestedObject) else: assert predictor.page_orientation_predictor is None else: assert isinstance(predictor.crop_orientation_predictor, NestedObject) assert isinstance(predictor.page_orientation_predictor, NestedObject) out = predictor(doc) assert isinstance(out, Document) assert len(out.pages) == 2 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page]) assert out.pages[0].orientation["value"] in range(-2, 3) assert isinstance(out.pages[0].language["value"], str) assert isinstance(out.render(), str) assert isinstance(out.pages[0].render(), str) assert isinstance(out.export(), dict) assert isinstance(out.pages[0].export(), dict) with pytest.raises(ValueError): _ = ocr_predictor("unknown_arch") # Test with custom orientation models custom_crop_orientation_model = mobilenet_v3_small_crop_orientation() custom_page_orientation_model = mobilenet_v3_small_page_orientation() if assume_straight_pages: if predictor.detect_orientation or predictor.straighten_pages: # Overwrite the default orientation models predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model) predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model) else: # Overwrite the default orientation models predictor.crop_orientation_predictor = crop_orientation_predictor(custom_crop_orientation_model) predictor.page_orientation_predictor = page_orientation_predictor(custom_page_orientation_model) out = predictor(doc) orientation = 0 assert out.pages[0].orientation["value"] == orientation def test_trained_ocr_predictor(mock_payslip): doc = DocumentFile.from_images(mock_payslip) det_predictor = detection_predictor( "db_resnet50", batch_size=2, assume_straight_pages=True, symmetric_pad=True, preserve_aspect_ratio=False, ) reco_predictor = recognition_predictor("crnn_vgg16_bn", batch_size=128) predictor = OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=False, resolve_lines=True, resolve_blocks=True, ) # test hooks predictor.add_hook(_DummyCallback()) out = predictor(doc) assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05) assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised" geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05) det_predictor = detection_predictor( "db_resnet50", batch_size=2, assume_straight_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, ) predictor = OCRPredictor( det_predictor, reco_predictor, assume_straight_pages=True, straighten_pages=True, preserve_aspect_ratio=True, symmetric_pad=True, resolve_lines=True, resolve_blocks=True, ) out = predictor(doc) assert "Mr" in out.pages[0].blocks[0].lines[0].words[0].value # test list archs archs = predictor.list_archs() assert isinstance(archs, dict) assert archs["recognition_archs"] == RECO_ARCHS assert archs["detection_archs"] == DET_ARCHS def _test_predictor(predictor): # Output checks assert isinstance(predictor, OCRPredictor) doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)] out = predictor(doc) # Document assert isinstance(out, Document) # The input doc has 1 page assert len(out.pages) == 1 # Dimension check with pytest.raises(ValueError): input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) _ = predictor([input_page]) @pytest.mark.parametrize("quantized", [False, True]) @pytest.mark.parametrize( "det_arch, reco_arch", [[det_arch, reco_arch] for det_arch, reco_arch in zip(detection.zoo.ARCHS, recognition.zoo.ARCHS)], ) def test_zoo_models(det_arch, reco_arch, quantized): # Model predictor = models.ocr_predictor(det_arch, reco_arch, load_in_8_bit=quantized) _test_predictor(predictor) # passing model instance directly det_model = detection.__dict__[det_arch]() reco_model = recognition.__dict__[reco_arch]() predictor = models.ocr_predictor(det_model, reco_model) _test_predictor(predictor) # passing recognition model as detection model with pytest.raises(ValueError): models.ocr_predictor(det_arch=reco_model) # passing detection model as recognition model with pytest.raises(ValueError): models.ocr_predictor(reco_arch=det_model) ================================================ FILE: tests/common/test_transforms.py ================================================ import numpy as np import pytest from onnxtr.transforms import Normalize, Resize def test_resize(): output_size = (32, 32) transfo = Resize(output_size) input_t = np.ones((64, 64, 3), dtype=np.float32) out = transfo(input_t) assert np.all(out == 255) assert out.shape[:2] == output_size assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='2')" transfo = Resize(output_size, preserve_aspect_ratio=True) input_t = np.ones((32, 64, 3), dtype=np.float32) out = transfo(input_t) assert out.shape[:2] == output_size assert not np.all(out == 255) # Asymetric padding assert np.all(out[-1] == 0) and np.all(out[0] == 255) # Symetric padding transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) assert repr(transfo) == ( f"Resize(output_size={output_size}, interpolation='2', preserve_aspect_ratio=True, symmetric_pad=True)" ) out = transfo(input_t) assert out.shape[:2] == output_size # symetric padding assert np.all(out[-1] == 0) and np.all(out[0] == 0) # Inverse aspect ratio input_t = np.ones((64, 32, 3), dtype=np.float32) out = transfo(input_t) assert not np.all(out == 1) assert out.shape[:2] == output_size # Same aspect ratio output_size = (32, 128) transfo = Resize(output_size, preserve_aspect_ratio=True) out = transfo(np.ones((16, 64, 3), dtype=np.float32)) assert out.shape[:2] == output_size @pytest.mark.parametrize( "input_shape", [ [8, 32, 32, 3], [32, 32, 3], [32, 3], ], ) def test_normalize(input_shape): mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5] transfo = Normalize(mean, std) input_t = np.ones(input_shape, dtype=np.float32) out = transfo(input_t) assert np.all(out == 1) assert repr(transfo) == f"Normalize(mean={mean}, std={std})" with pytest.raises(AssertionError): Normalize(mean="32") with pytest.raises(AssertionError): Normalize(std="32") ================================================ FILE: tests/common/test_utils_data.py ================================================ import os import tempfile from pathlib import PosixPath from unittest.mock import patch import pytest from onnxtr.utils.data import _urlretrieve, download_from_url def test__urlretrieve(): with tempfile.TemporaryDirectory() as temp_dir: file_path = os.path.join(temp_dir, "crnn_mobilenet_v3_small-bded4d49.onnx") _urlretrieve( "https://github.com/felixdittrich92/OnnxTR/releases/download/v0.0.1/crnn_mobilenet_v3_small-bded4d49.onnx", file_path, ) assert os.path.exists(file_path), f"File {file_path} does not exist." @patch("onnxtr.utils.data._urlretrieve") @patch("pathlib.Path.mkdir") @patch.dict(os.environ, {"HOME": "/"}, clear=True) def test_download_from_url(mkdir_mock, urlretrieve_mock): download_from_url("test_url") urlretrieve_mock.assert_called_with("test_url", PosixPath("/.cache/onnxtr/test_url")) @patch.dict(os.environ, {"ONNXTR_CACHE_DIR": "/test"}, clear=True) @patch("onnxtr.utils.data._urlretrieve") @patch("pathlib.Path.mkdir") def test_download_from_url_customizing_cache_dir(mkdir_mock, urlretrieve_mock): download_from_url("test_url") urlretrieve_mock.assert_called_with("test_url", PosixPath("/test/test_url")) @patch.dict(os.environ, {"HOME": "/"}, clear=True) @patch("pathlib.Path.mkdir", side_effect=OSError) @patch("logging.error") def test_download_from_url_error_creating_directory(logging_mock, mkdir_mock): with pytest.raises(OSError): download_from_url("test_url") logging_mock.assert_called_with( "Failed creating cache direcotry at /.cache/onnxtr." " You can change default cache directory using 'ONNXTR_CACHE_DIR' environment variable if needed." ) @patch.dict(os.environ, {"HOME": "/", "ONNXTR_CACHE_DIR": "/test"}, clear=True) @patch("pathlib.Path.mkdir", side_effect=OSError) @patch("logging.error") def test_download_from_url_error_creating_directory_with_env_var(logging_mock, mkdir_mock): with pytest.raises(OSError): download_from_url("test_url") logging_mock.assert_called_with( "Failed creating cache direcotry at /test using path from 'ONNXTR_CACHE_DIR' environment variable." ) ================================================ FILE: tests/common/test_utils_fonts.py ================================================ from PIL.ImageFont import FreeTypeFont, ImageFont from onnxtr.utils.fonts import get_font def test_get_font(): # Attempts to load recommended OS font font = get_font() assert isinstance(font, (ImageFont, FreeTypeFont)) ================================================ FILE: tests/common/test_utils_geometry.py ================================================ from copy import deepcopy from math import hypot import numpy as np import pytest from onnxtr.io import DocumentFile from onnxtr.utils import geometry def test_bbox_to_polygon(): assert geometry.bbox_to_polygon(((0, 0), (1, 1))) == ((0, 0), (1, 0), (0, 1), (1, 1)) def test_polygon_to_bbox(): assert geometry.polygon_to_bbox(((0, 0), (1, 0), (0, 1), (1, 1))) == ((0, 0), (1, 1)) def test_order_points(): # bbox format (xmin, ymin, xmax, ymax) bbox = np.array([1, 2, 5, 6]) expected_bbox = np.array([ [1, 2], # top-left [5, 2], # top-right [5, 6], # bottom-right [1, 6], # bottom-left ]) out_bbox = geometry.order_points(bbox) assert np.all(out_bbox == expected_bbox) # quadrangle (unordered) quad = np.array([ [5, 6], # br [1, 2], # tl [1, 6], # bl [5, 2], # tr ]) expected_quad = expected_bbox out_quad = geometry.order_points(quad) assert np.all(out_quad == expected_quad) # already ordered quad ordered_quad = expected_bbox.copy() out_ordered = geometry.order_points(ordered_quad) assert np.all(out_ordered == expected_bbox) # float inputs quad_float = quad.astype(np.float32) out_float = geometry.order_points(quad_float) assert out_float.dtype == quad_float.dtype assert np.allclose(out_float, expected_quad) with pytest.raises(ValueError): geometry.order_points(np.array([1, 2, 3])) # wrong shape with pytest.raises(ValueError): geometry.order_points(np.zeros((5, 2))) # too many points def test_detach_scores(): # box test boxes = np.array([[0.1, 0.1, 0.2, 0.2, 0.9], [0.15, 0.15, 0.2, 0.2, 0.8]]) pred = geometry.detach_scores([boxes]) target1 = np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]]) target2 = np.array([0.9, 0.8]) assert np.all(pred[0] == target1) and np.all(pred[1] == target2) # polygon test boxes = np.array([ [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.9]], [[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15], [0.0, 0.8]], ]) pred = geometry.detach_scores([boxes]) target1 = np.array([ [[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]], [[0.15, 0.15], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]], ]) target2 = np.array([0.9, 0.8]) assert np.all(pred[0] == target1) and np.all(pred[1] == target2) def test_resolve_enclosing_bbox(): assert geometry.resolve_enclosing_bbox([((0, 0.5), (1, 0)), ((0.5, 0), (1, 0.25))]) == ((0, 0), (1, 0.5)) pred = geometry.resolve_enclosing_bbox(np.array([[0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.2, 0.2]])) assert pred.all() == np.array([0.1, 0.1, 0.2, 0.2]).all() def test_resolve_enclosing_rbbox(): box1 = np.asarray([[0.1, 0.1], [0.2, 0.2], [0.15, 0.25], [0.05, 0.15]]) box2 = np.asarray([[0.5, 0.5], [0.6, 0.6], [0.55, 0.65], [0.45, 0.55]]) pred = geometry.resolve_enclosing_rbbox([box1, box2]) expected_raw = np.asarray([[0.05, 0.15], [0.1, 0.1], [0.6, 0.6], [0.55, 0.65]]) target = geometry.order_points(expected_raw) assert np.allclose(pred, target, atol=1e-3) def test_remap_boxes(): pred = geometry.remap_boxes( np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 20) ) target = np.asarray([[[0.375, 0.375], [0.375, 0.625], [0.625, 0.375], [0.625, 0.625]]]) assert np.all(pred == target) pred = geometry.remap_boxes( np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (10, 10), (20, 10) ) target = np.asarray([[[0.25, 0.375], [0.25, 0.625], [0.75, 0.375], [0.75, 0.625]]]) assert np.all(pred == target) with pytest.raises(ValueError): geometry.remap_boxes( np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40, 150), (160, 40) ) with pytest.raises(ValueError): geometry.remap_boxes(np.asarray([[[0.25, 0.25], [0.25, 0.75], [0.75, 0.25], [0.75, 0.75]]]), (80, 40), (160,)) orig_dimension = (100, 100) dest_dimensions = (200, 100) # Unpack dimensions height_o, width_o = orig_dimension height_d, width_d = dest_dimensions orig_box = np.asarray([[[0.25, 0.25], [0.25, 0.25], [0.75, 0.75], [0.75, 0.75]]]) pred = geometry.remap_boxes(orig_box, orig_dimension, dest_dimensions) # Switch to absolute coords orig = np.stack((orig_box[:, :, 0] * width_o, orig_box[:, :, 1] * height_o), axis=2)[0] dest = np.stack((pred[:, :, 0] * width_d, pred[:, :, 1] * height_d), axis=2)[0] len_orig = hypot(orig[0][0] - orig[2][0], orig[0][1] - orig[2][1]) len_dest = hypot(dest[0][0] - dest[2][0], dest[0][1] - dest[2][1]) assert len_orig == len_dest alpha_orig = np.rad2deg(np.arctan((orig[0][1] - orig[2][1]) / (orig[0][0] - orig[2][0]))) alpha_dest = np.rad2deg(np.arctan((dest[0][1] - dest[2][1]) / (dest[0][0] - dest[2][0]))) assert alpha_orig == alpha_dest def test_rotate_boxes(): boxes = np.array([[0.1, 0.1, 0.8, 0.3, 0.5]]) rboxes = np.array([[0.1, 0.1], [0.8, 0.1], [0.8, 0.3], [0.1, 0.3]]) # Angle = 0 rotated = geometry.rotate_boxes(boxes, angle=0.0, orig_shape=(1, 1)) assert np.all(rotated == rboxes) # Angle < 1: rotated = geometry.rotate_boxes(boxes, angle=0.5, orig_shape=(1, 1)) assert np.all(rotated == rboxes) # Angle = 30 rotated = geometry.rotate_boxes(boxes, angle=30, orig_shape=(1, 1)) assert rotated.shape == (1, 4, 2) boxes = np.array([[0.0, 0.0, 0.6, 0.2, 0.5]]) # Angle = -90: rotated = geometry.rotate_boxes(boxes, angle=-90, orig_shape=(1, 1), min_angle=0) assert np.allclose(rotated, np.array([[[1, 0.0], [1, 0.6], [0.8, 0.6], [0.8, 0.0]]])) # Angle = 90 rotated = geometry.rotate_boxes(boxes, angle=+90, orig_shape=(1, 1), min_angle=0) assert np.allclose(rotated, np.array([[[0, 1.0], [0, 0.4], [0.2, 0.4], [0.2, 1.0]]])) @pytest.fixture def sample_geoms(): return np.array([ [[10, 10], [20, 10], [20, 20], [10, 20]], [ [ 30, 30, ], [40, 30], [40, 40], [30, 40], ], ]) def test_rotate_abs_geoms(sample_geoms): img_shape = (100, 100) angle = 45.0 expanded_polys = geometry.rotate_abs_geoms(sample_geoms, angle, img_shape) # Check if the output has the correct shape assert expanded_polys.shape == sample_geoms.shape def test_rotate_image(): img = np.ones((32, 64, 3), dtype=np.float32) rotated = geometry.rotate_image(img, 30.0) assert rotated.shape[:-1] == (32, 64) assert rotated[0, 0, 0] == 0 assert rotated[0, :, 0].sum() > 1 # Expand rotated = geometry.rotate_image(img, 30.0, expand=True) assert rotated.shape[:-1] == (60, 120) assert rotated[0, :, 0].sum() <= 1 # Expand rotated = geometry.rotate_image(img, 30.0, expand=True, preserve_origin_shape=True) assert rotated.shape[:-1] == (32, 64) assert rotated[0, :, 0].sum() <= 1 # Expand with 90° rotation rotated = geometry.rotate_image(img, 90.0, expand=True) assert rotated.shape[:-1] == (64, 128) assert rotated[0, :, 0].sum() <= 1 def test_remove_image_padding(): img = np.ones((32, 64, 3), dtype=np.float32) padded = np.pad(img, ((10, 10), (20, 20), (0, 0))) cropped = geometry.remove_image_padding(padded) assert np.all(cropped == img) # No padding cropped = geometry.remove_image_padding(img) assert np.all(cropped == img) @pytest.mark.parametrize( "abs_geoms, img_size, rel_geoms", [ # Full image (boxes) [np.array([[0, 0, 32, 32]]), (32, 32), np.array([[0, 0, 1, 1]], dtype=np.float32)], # Full image (polygons) [ np.array([[[0, 0], [32, 0], [32, 32], [0, 32]]]), (32, 32), np.array([[[0, 0], [1, 0], [1, 1], [0, 1]]], dtype=np.float32), ], # Quarter image (boxes) [np.array([[0, 0, 16, 16]]), (32, 32), np.array([[0, 0, 0.5, 0.5]], dtype=np.float32)], # Quarter image (polygons) [ np.array([[[0, 0], [16, 0], [16, 16], [0, 16]]]), (32, 32), np.array([[[0, 0], [0.5, 0], [0.5, 0.5], [0, 0.5]]], dtype=np.float32), ], ], ) def test_convert_to_relative_coords(abs_geoms, img_size, rel_geoms): assert np.all(geometry.convert_to_relative_coords(abs_geoms, img_size) == rel_geoms) # Wrong format with pytest.raises(ValueError): geometry.convert_to_relative_coords(np.zeros((3, 5)), (32, 32)) def test_estimate_page_angle(): straight_polys = np.array([ [[0.3, 0.3], [0.4, 0.3], [0.4, 0.4], [0.3, 0.4]], [[0.4, 0.4], [0.5, 0.4], [0.5, 0.5], [0.4, 0.5]], [[0.5, 0.5], [0.6, 0.5], [0.6, 0.6], [0.5, 0.6]], ]) rotated_polys = geometry.rotate_boxes(straight_polys, angle=20, orig_shape=(512, 512)) angle = geometry.estimate_page_angle(rotated_polys) assert np.isclose(angle, 20) # Test divide by zero / NaN invalid_poly = np.array([[[0.5, 0.5], [0.5, 0.5], [0.5, 0.5], [0.5, 0.5]]]) angle = geometry.estimate_page_angle(invalid_poly) assert angle == 0.0 def test_extract_crops(mock_pdf): doc_img = DocumentFile.from_pdf(mock_pdf)[0] num_crops = 2 rel_boxes = np.array( [[idx / num_crops, idx / num_crops, (idx + 1) / num_crops, (idx + 1) / num_crops] for idx in range(num_crops)], dtype=np.float32, ) abs_boxes = np.array( [ [ int(idx * doc_img.shape[1] / num_crops), int(idx * doc_img.shape[0]) / num_crops, int((idx + 1) * doc_img.shape[1] / num_crops), int((idx + 1) * doc_img.shape[0] / num_crops), ] for idx in range(num_crops) ], dtype=np.float32, ) with pytest.raises(AssertionError): geometry.extract_crops(doc_img, np.zeros((1, 5))) for boxes in (rel_boxes, abs_boxes): croped_imgs = geometry.extract_crops(doc_img, boxes) # Number of crops assert len(croped_imgs) == num_crops # Data type and shape assert all(isinstance(crop, np.ndarray) for crop in croped_imgs) assert all(crop.ndim == 3 for crop in croped_imgs) # Identity assert np.all( doc_img == geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=True)[0] ) torch_img = np.transpose(doc_img, axes=(-1, 0, 1)) assert np.all( torch_img == np.transpose( geometry.extract_crops(doc_img, np.array([[0, 0, 1, 1]], dtype=np.float32), channels_last=False)[0], axes=(-1, 0, 1), ) ) # No box assert geometry.extract_crops(doc_img, np.zeros((0, 4))) == [] @pytest.mark.parametrize("assume_horizontal", [True, False]) def test_extract_rcrops(mock_pdf, assume_horizontal): doc_img = DocumentFile.from_pdf(mock_pdf)[0] num_crops = 2 rel_boxes = np.array( [ [ [idx / num_crops, idx / num_crops], [idx / num_crops + 0.1, idx / num_crops], [idx / num_crops + 0.1, idx / num_crops + 0.1], [idx / num_crops, idx / num_crops], ] for idx in range(num_crops) ], dtype=np.float32, ) abs_boxes = deepcopy(rel_boxes) abs_boxes[:, :, 0] *= doc_img.shape[1] abs_boxes[:, :, 1] *= doc_img.shape[0] abs_boxes = abs_boxes.astype(np.int64) with pytest.raises(AssertionError): geometry.extract_rcrops(doc_img, np.zeros((1, 8)), assume_horizontal=assume_horizontal) for boxes in (rel_boxes, abs_boxes): croped_imgs = geometry.extract_rcrops(doc_img, boxes, assume_horizontal=assume_horizontal) # Number of crops assert len(croped_imgs) == num_crops # Data type and shape assert all(isinstance(crop, np.ndarray) for crop in croped_imgs) assert all(crop.ndim == 3 for crop in croped_imgs) # No box assert geometry.extract_rcrops(doc_img, np.zeros((0, 4, 2)), assume_horizontal=assume_horizontal) == [] @pytest.mark.parametrize( "format,input_shape,expected_shape", [ ("BCHW", (32, 3, 64, 64), (32, 3, 64, 64)), ("BCHW", (32, 64, 64, 3), (32, 3, 64, 64)), ("BHWC", (32, 64, 64, 3), (32, 64, 64, 3)), ("BHWC", (32, 3, 64, 64), (32, 64, 64, 3)), ("XYZ", (32, 3, 64, 64), (32, 3, 64, 64)), ("CHW", (3, 64, 64), (3, 64, 64)), ("CHW", (64, 64, 3), (3, 64, 64)), ("HWC", (64, 64, 3), (64, 64, 3)), ("HWC", (3, 64, 64), (64, 64, 3)), ], ) def test_shape_translate(format, input_shape, expected_shape): sample_data = np.random.rand(*input_shape).astype(np.float32) output_data = geometry.shape_translate(sample_data, format) # Assert that the output data has the expected shape assert output_data.shape == expected_shape ================================================ FILE: tests/common/test_utils_multithreading.py ================================================ import os from multiprocessing.pool import ThreadPool from unittest.mock import patch import pytest from onnxtr.utils.multithreading import multithread_exec @pytest.mark.parametrize( "input_seq, func, output_seq", [ [[1, 2, 3], lambda x: 2 * x, [2, 4, 6]], [[1, 2, 3], lambda x: x**2, [1, 4, 9]], [ ["this is", "show me", "I know"], lambda x: x + " the way", ["this is the way", "show me the way", "I know the way"], ], ], ) def test_multithread_exec(input_seq, func, output_seq): assert list(multithread_exec(func, input_seq)) == output_seq assert list(multithread_exec(func, input_seq, 0)) == output_seq @patch.dict(os.environ, {"ONNXTR_MULTIPROCESSING_DISABLE": "TRUE"}, clear=True) def test_multithread_exec_multiprocessing_disable(): with patch.object(ThreadPool, "map") as mock_tp_map: multithread_exec(lambda x: x, [1, 2]) assert not mock_tp_map.called ================================================ FILE: tests/common/test_utils_reconstitution.py ================================================ import numpy as np from test_io_elements import _mock_pages from onnxtr.utils import reconstitution def test_synthesize_page(): pages = _mock_pages() # Test without probability rendering render_no_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=False) assert isinstance(render_no_proba, np.ndarray) assert render_no_proba.shape == (*pages[0].dimensions, 3) # Test with probability rendering render_with_proba = reconstitution.synthesize_page(pages[0].export(), draw_proba=True) assert isinstance(render_with_proba, np.ndarray) assert render_with_proba.shape == (*pages[0].dimensions, 3) # Test with only one line pages_one_line = pages[0].export() pages_one_line["blocks"][0]["lines"] = [pages_one_line["blocks"][0]["lines"][0]] render_one_line = reconstitution.synthesize_page(pages_one_line, draw_proba=True) assert isinstance(render_one_line, np.ndarray) assert render_one_line.shape == (*pages[0].dimensions, 3) # Test with polygons pages_poly = pages[0].export() pages_poly["blocks"][0]["lines"][0]["geometry"] = [(0, 0), (0, 1), (1, 1), (1, 0)] render_poly = reconstitution.synthesize_page(pages_poly, draw_proba=True) assert isinstance(render_poly, np.ndarray) assert render_poly.shape == (*pages[0].dimensions, 3) ================================================ FILE: tests/common/test_utils_visualization.py ================================================ import numpy as np import pytest from test_io_elements import _mock_pages from onnxtr.utils import visualization def test_visualize_page(): pages = _mock_pages() image = np.ones((300, 200, 3)) visualization.visualize_page(pages[0].export(), image, words_only=False) visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False) visualization.visualize_page( pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True ) # geometry checks with pytest.raises(ValueError): visualization.create_obj_patch([1, 2], (100, 100)) with pytest.raises(ValueError): visualization.create_obj_patch((1, 2), (100, 100)) with pytest.raises(ValueError): visualization.create_obj_patch((1, 2, 3, 4, 5), (100, 100)) # polygon patch pages = _mock_pages(polygons=True) image = np.ones((300, 200, 3)) visualization.visualize_page(pages[0].export(), image, words_only=False) visualization.visualize_page(pages[0].export(), image, words_only=True, interactive=False) visualization.visualize_page( pages[0].export(), image, words_only=True, interactive=False, preserve_aspect_ratio=True ) def test_draw_boxes(): image = np.ones((256, 256, 3), dtype=np.float32) boxes = [ [0.1, 0.1, 0.2, 0.2], [0.15, 0.15, 0.19, 0.2], # to suppress [0.5, 0.5, 0.6, 0.55], [0.55, 0.5, 0.7, 0.55], # to suppress ] visualization.draw_boxes(boxes=np.array(boxes), image=image, block=False) ================================================ FILE: tests/common/test_utils_vocabs.py ================================================ from collections import Counter from onnxtr.utils import VOCABS def test_vocabs_duplicates(): for key, vocab in VOCABS.items(): assert isinstance(vocab, str) duplicates = [char for char, count in Counter(vocab).items() if count > 1] assert not duplicates, f"Duplicate characters in {key} vocab: {duplicates}" ================================================ FILE: tests/conftest.py ================================================ from io import BytesIO import cv2 import pytest import requests from PIL import Image, ImageDraw from onnxtr.io import reader from onnxtr.utils import geometry from onnxtr.utils.fonts import get_font def synthesize_text_img( text: str, font_size: int = 32, font_family=None, background_color=None, text_color=None, ) -> Image.Image: background_color = (0, 0, 0) if background_color is None else background_color text_color = (255, 255, 255) if text_color is None else text_color font = get_font(font_family, font_size) left, top, right, bottom = font.getbbox(text) text_w, text_h = right - left, bottom - top h, w = int(round(1.3 * text_h)), int(round(1.1 * text_w)) # If single letter, make the image square, otherwise expand to meet the text size img_size = (h, w) if len(text) > 1 else (max(h, w), max(h, w)) img = Image.new("RGB", img_size[::-1], color=background_color) d = ImageDraw.Draw(img) # Offset so that the text is centered text_pos = (int(round((img_size[1] - text_w) / 2)), int(round((img_size[0] - text_h) / 2))) # Draw the text d.text(text_pos, text, font=font, fill=text_color) return img @pytest.fixture(scope="session") def mock_vocab(): return "3K}7eé;5àÎYho]QwV6qU~W\"XnbBvcADfËmy.9ÔpÛ*{CôïE%M4#ÈR:g@T$x?0î£|za1ù8,OG€P-kçHëÀÂ2É/ûIJ'j(LNÙFut[)èZs+&°Sd=Ï!<â_Ç>rêi`l" # noqa @pytest.fixture(scope="session") def mock_pdf(tmpdir_factory): # Page 1 text_img = synthesize_text_img("I am a jedi!", background_color=(255, 255, 255), text_color=(0, 0, 0)) page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255)) page.paste(text_img, (50, 100)) # Page 2 text_img = synthesize_text_img("No, I am your father.", background_color=(255, 255, 255), text_color=(0, 0, 0)) _page = Image.new(text_img.mode, (1240, 1754), (255, 255, 255)) _page.paste(text_img, (40, 300)) # Save the PDF fn = tmpdir_factory.mktemp("data").join("mock_pdf_file.pdf") page.save(str(fn), "PDF", save_all=True, append_images=[_page]) return str(fn) @pytest.fixture(scope="session") def mock_payslip(tmpdir_factory): url = "https://3.bp.blogspot.com/-Es0oHTCrVEk/UnYA-iW9rYI/AAAAAAAAAFI/hWExrXFbo9U/s1600/003.jpg" file = BytesIO(requests.get(url).content) folder = tmpdir_factory.mktemp("data") fn = str(folder.join("mock_payslip.jpeg")) with open(fn, "wb") as f: f.write(file.getbuffer()) return fn @pytest.fixture(scope="session") def mock_tilted_payslip(mock_payslip, tmpdir_factory): image = reader.read_img_as_numpy(mock_payslip) image = geometry.rotate_image(image, 30, expand=True) tmp_path = str(tmpdir_factory.mktemp("data").join("mock_tilted_payslip.jpg")) cv2.imwrite(tmp_path, image) return tmp_path @pytest.fixture(scope="session") def mock_text_box_stream(): url = "https://doctr-static.mindee.com/models?id=v0.5.1/word-crop.png&src=0" return requests.get(url).content @pytest.fixture(scope="session") def mock_text_box(mock_text_box_stream, tmpdir_factory): file = BytesIO(mock_text_box_stream) fn = tmpdir_factory.mktemp("data").join("mock_text_box_file.png") with open(fn, "wb") as f: f.write(file.getbuffer()) return str(fn) @pytest.fixture(scope="session") def mock_artefact_image_stream(): url = "https://github.com/mindee/doctr/releases/download/v0.8.1/artefact_dummy.jpg" return requests.get(url).content