Repository: py-pdf/pypdf
Branch: main
Commit: 04b0a38f56ad
Files: 207
Total size: 2.4 MB

Directory structure:
gitextract_mui37wu0/

├── .git-blame-ignore-revs
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   └── feature-request.md
│   ├── SECURITY.md
│   ├── dependabot.yaml
│   ├── scripts/
│   │   ├── check_gh_pages_updates.py
│   │   ├── check_pr_title.py
│   │   └── check_urls.py
│   └── workflows/
│       ├── benchmark.yaml
│       ├── create-github-release.yaml
│       ├── gh-pages-check.yaml
│       ├── github-ci.yaml
│       ├── publish-to-pypi.yaml
│       ├── release.yaml
│       ├── title-check.yaml
│       └── urls-check.yaml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── docs/
│   ├── Makefile
│   ├── _static/
│   │   └── releasing.drawio
│   ├── conf.py
│   ├── dev/
│   │   ├── cmaps.md
│   │   ├── deprecations.md
│   │   ├── documentation.md
│   │   ├── intro.md
│   │   ├── pdf-format.md
│   │   ├── pypdf-parsing.md
│   │   ├── pypdf-writing.md
│   │   ├── releasing.md
│   │   └── testing.md
│   ├── index.rst
│   ├── make.bat
│   ├── meta/
│   │   ├── changelog-v1.md
│   │   ├── comparisons.md
│   │   ├── faq.md
│   │   ├── history.md
│   │   ├── migration-1-to-2.md
│   │   ├── project-governance.md
│   │   ├── scope-of-pypdf.md
│   │   └── taking-ownership.md
│   ├── modules/
│   │   ├── Destination.rst
│   │   ├── DocumentInformation.rst
│   │   ├── Field.rst
│   │   ├── Fit.rst
│   │   ├── PageObject.rst
│   │   ├── PageRange.rst
│   │   ├── PaperSize.rst
│   │   ├── PdfDocCommon.rst
│   │   ├── PdfReader.rst
│   │   ├── PdfWriter.rst
│   │   ├── RectangleObject.rst
│   │   ├── Transformation.rst
│   │   ├── XmpInformation.rst
│   │   ├── annotations.rst
│   │   ├── constants.rst
│   │   ├── errors.rst
│   │   └── generic.rst
│   └── user/
│       ├── add-javascript.md
│       ├── add-watermark.md
│       ├── adding-pdf-annotations.md
│       ├── cropping-and-transforming.md
│       ├── encryption-decryption.md
│       ├── extract-images.md
│       ├── extract-text.md
│       ├── file-size.md
│       ├── forms.md
│       ├── handle-attachments.md
│       ├── handling-outlines.md
│       ├── installation.md
│       ├── merging-pdfs.md
│       ├── metadata.md
│       ├── pdf-version-support.md
│       ├── pdfa-compliance.md
│       ├── post-processing-in-text-extraction.md
│       ├── reading-pdf-annotations.md
│       ├── robustness.md
│       ├── security.md
│       ├── streaming-data.md
│       ├── suppress-warnings.md
│       └── viewer-preferences.md
├── make_release.py
├── pypdf/
│   ├── __init__.py
│   ├── _cmap.py
│   ├── _codecs/
│   │   ├── __init__.py
│   │   ├── _codecs.py
│   │   ├── adobe_glyphs.py
│   │   ├── core_font_metrics.py
│   │   ├── pdfdoc.py
│   │   ├── std.py
│   │   ├── symbol.py
│   │   └── zapfding.py
│   ├── _crypt_providers/
│   │   ├── __init__.py
│   │   ├── _base.py
│   │   ├── _cryptography.py
│   │   ├── _fallback.py
│   │   └── _pycryptodome.py
│   ├── _doc_common.py
│   ├── _encryption.py
│   ├── _font.py
│   ├── _page.py
│   ├── _page_labels.py
│   ├── _protocols.py
│   ├── _reader.py
│   ├── _text_extraction/
│   │   ├── __init__.py
│   │   ├── _layout_mode/
│   │   │   ├── __init__.py
│   │   │   ├── _fixed_width_page.py
│   │   │   ├── _text_state_manager.py
│   │   │   └── _text_state_params.py
│   │   └── _text_extractor.py
│   ├── _utils.py
│   ├── _version.py
│   ├── _writer.py
│   ├── annotations/
│   │   ├── __init__.py
│   │   ├── _base.py
│   │   ├── _markup_annotations.py
│   │   └── _non_markup_annotations.py
│   ├── constants.py
│   ├── errors.py
│   ├── filters.py
│   ├── generic/
│   │   ├── __init__.py
│   │   ├── _appearance_stream.py
│   │   ├── _base.py
│   │   ├── _data_structures.py
│   │   ├── _files.py
│   │   ├── _fit.py
│   │   ├── _image_inline.py
│   │   ├── _image_xobject.py
│   │   ├── _link.py
│   │   ├── _outline.py
│   │   ├── _rectangle.py
│   │   ├── _utils.py
│   │   └── _viewerpref.py
│   ├── pagerange.py
│   ├── papersizes.py
│   ├── py.typed
│   ├── types.py
│   └── xmp.py
├── pyproject.toml
├── requirements/
│   ├── ci-3.11.txt
│   ├── ci.in
│   ├── ci.txt
│   ├── dev.in
│   ├── dev.txt
│   ├── docs.in
│   └── docs.txt
├── resources/
│   ├── 010-pdflatex-forms.txt
│   ├── AEO.1172.layout.rot180.txt
│   ├── AEO.1172.layout.txt
│   ├── Claim Maker Alerts Guide_pg2.layout.txt
│   ├── Epic.Page.layout.txt
│   ├── afm_to_dataclass.py
│   ├── crazyones.txt
│   ├── crazyones_layout_vertical_space.txt
│   ├── crazyones_layout_vertical_space_font_height_weight.txt
│   ├── jpeg.txt
│   ├── multicolumn-lorem-ipsum.txt
│   └── toy.layout.txt
└── tests/
    ├── __init__.py
    ├── bench.py
    ├── conftest.py
    ├── example_files.yaml
    ├── generic/
    │   ├── __init__.py
    │   ├── test_base.py
    │   ├── test_data_structures.py
    │   ├── test_files.py
    │   ├── test_image_inline.py
    │   ├── test_image_xobject.py
    │   └── test_link.py
    ├── scripts/
    │   ├── __init__.py
    │   ├── data/
    │   │   └── commits__version_4_0_1.json
    │   ├── test_example_files.py
    │   └── test_make_release.py
    ├── test_annotations.py
    ├── test_appearance_stream.py
    ├── test_cmap.py
    ├── test_codecs.py
    ├── test_constants.py
    ├── test_doc_common.py
    ├── test_encryption.py
    ├── test_filters.py
    ├── test_font.py
    ├── test_forms.py
    ├── test_generic.py
    ├── test_images.py
    ├── test_javascript.py
    ├── test_merger.py
    ├── test_page.py
    ├── test_page_labels.py
    ├── test_pagerange.py
    ├── test_papersizes.py
    ├── test_pdfa.py
    ├── test_protocols.py
    ├── test_reader.py
    ├── test_text_extraction.py
    ├── test_utils.py
    ├── test_workflows.py
    ├── test_writer.py
    ├── test_xmp.py
    └── utils.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .git-blame-ignore-revs
================================================
# This file helps us to ignore style / formatting / doc changes
# in git blame. That is useful when we're trying to find the root cause of an
# error.

# Docstring formatting
a89ff74d8c0203278a039d9496a3d8df4d134f84

# STY: Apply pre-commit (black, isort) + use snake_case variables (#832)
eef03d935dfeacaa75848b39082cf94d833d3174

# STY: Apply black and isort
baeb7d23278de0f8d00ca9f2b656bf0674f08937

# STY: Documentation, Variable names (#839)
444fca22836df061d9d23e71ffb7d68edcdfa766


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: Report a bug
about: Something broke!
title: ''
labels: Bug
assignees: ''

---

Replace this: What happened? What were you trying to achieve?

## Environment

Which environment were you using when you encountered the problem?

```bash
$ python -m platform
# TODO: Your output goes here

$ python -c "import pypdf;print(pypdf._debug_versions)"
# TODO: Your output goes here
```

## Code + PDF

This is a minimal, complete example that shows the issue:

```python
# TODO: Your code goes here
```

Share here the PDF file(s) that cause the issue. The smaller they are, the
better. Let us know if we may add them to our tests!

## Traceback

This is the complete traceback I see:

```
# TODO: Your traceback goes here (if applicable)
```


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: Request a Feature
about: What do you think is missing in pypdf?
title: ''
labels: Feature Request
assignees: ''

---

## Explanation

Explain briefly what you want to achieve.

## Code Example

How would your feature be used? (Remove this if it is not applicable.)

```python
from pypdf import PdfReader, PdfWriter

...  # your new feature in action!
```


================================================
FILE: .github/SECURITY.md
================================================
# Security Policy

## Supported Versions

Security fixes are applied to the latest version.

## Reporting a Vulnerability

If you find a potential security issue, please report it using the
[private vulnerability reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing-information-about-vulnerabilities/privately-reporting-a-security-vulnerability) feature of GitHub to
automatically inform all relevant team members. Otherwise, please
get in touch with stefan6419846 through e-mail (current maintainer,
address in GitHub profile).

Please have a look at our [corresponding user documentation](https://pypdf.readthedocs.io/en/stable/user/security.html)
as well, which includes some information about possibly invalid reports as well.

We will try to find a fix in a timely manner and will then issue a security
advisory together with the update via GitHub, as well as requesting a CVE
([example](https://github.com/py-pdf/pypdf/security/advisories/GHSA-xcjx-m2pj-8g79)).

If you do not get a reaction within 30 days, please open a public issue on GitHub.


================================================
FILE: .github/dependabot.yaml
================================================
# Set update schedule for GitHub Actions

version: 2
updates:

  - package-ecosystem: "github-actions"
    directory: "/"
    schedule:
      interval: "daily"
    commit-message:
      prefix: "DEV"


================================================
FILE: .github/scripts/check_gh_pages_updates.py
================================================
"""Check that all GitHub pages JavaScript dependencies are up-to-date."""  # noqa: INP001

import base64
import hashlib
import json
import re
import sys
import urllib.request
from pathlib import Path

JSDELIVR_RE = re.compile(
    r"(https://cdn\.jsdelivr\.net/npm/"
    r"(?P<name>[^@/]+)@(?P<version>[^/]+)"
    r"/(?P<path>[^\"']+))"
)


def fetch_json(url: str) -> dict:
    """Retrieve JSON data from the given URL."""
    with urllib.request.urlopen(url, timeout=15) as resp:  # noqa: S310  # Controlled input.
        return json.load(resp)


def fetch_bytes(url: str) -> bytes:
    """Retrieve bytes data from the given URL."""
    with urllib.request.urlopen(url, timeout=30) as resp:  # noqa: S310  # Controlled input.
        return resp.read()


def get_latest_version(pkg: str) -> str:
    """Get the latest version for this package."""
    data = fetch_json(f"https://registry.npmjs.org/{pkg}")
    return data["dist-tags"]["latest"]


def sri_hash(content: bytes) -> str:
    """Calculate the SRI hash for the given content."""
    digest = hashlib.sha384(content).digest()
    return "sha384-" + base64.b64encode(digest).decode("ascii")


def scan_html(path: Path) -> list[re.Match[str]]:
    """Scan the given HTML file for external JavaScript includes."""
    text = path.read_text(encoding="utf-8", errors="ignore")
    return list(JSDELIVR_RE.finditer(text))


def main() -> None:
    """Perform the checks."""
    outdated_found = False

    for html_path in sorted(Path("gh-pages").rglob("*.html"), key=str):
        matches = scan_html(html_path)
        if not matches:
            continue

        sys.stdout.write(f"\n📄 {html_path} ...\n\n")

        for m in matches:
            pkg = m.group("name")
            current_version = m.group("version")
            full_url = m.group(1)

            try:
                latest_version = get_latest_version(pkg)
            except Exception as e:
                sys.stdout.write(f"  ⚠️  {pkg}: npm lookup failed ({e})\n")
                continue

            if current_version == latest_version:
                sys.stdout.write(f"  ✅ {pkg} {current_version}\n")
                continue

            outdated_found = True
            latest_url = full_url.replace(
                f"@{current_version}/", f"@{latest_version}/"
            )

            try:
                latest_bytes = fetch_bytes(latest_url)
                latest_sri = sri_hash(latest_bytes)
            except Exception as e:
                sys.stdout.write(f"  ⚠️  {pkg}: failed to fetch latest file ({e})\n")
                continue

            sys.stdout.write(f"  ❌ {pkg}\n")
            sys.stdout.write(f"     Current: {current_version}\n")
            sys.stdout.write(f"     Latest:  {latest_version}\n")
            sys.stdout.write(f"     Latest SRI: {latest_sri}\n")
            sys.stdout.write("\n")

    if outdated_found:
        sys.stdout.write("\n❗ Outdated dependencies detected\n")
        sys.exit(1)

    sys.stdout.write("\n🎉 All CDN dependencies are up to date\n")


if __name__ == "__main__":
    main()


================================================
FILE: .github/scripts/check_pr_title.py
================================================
"""Check that all PR titles follow the desired scheme."""  # noqa: INP001

import os
import sys

KNOWN_PREFIXES = (
    "SEC: ",
    "BUG: ",
    "ENH: ",
    "DEP: ",
    "PI: ",
    "ROB: ",
    "DOC: ",
    "TST: ",
    "DEV: ",
    "STY: ",
    "MAINT: ",
    "REL: ",  # For internal use only.
)
PR_TITLE = os.getenv("PR_TITLE", "")

if not PR_TITLE.startswith(KNOWN_PREFIXES) or not PR_TITLE.split(": ", maxsplit=1)[1]:
    sys.stderr.write(
        f"The PR title '{PR_TITLE}' does not follow the projects naming scheme: "
        "https://pypdf.readthedocs.io/en/latest/dev/intro.html#commit-messages\n",
    )
    sys.stderr.write(
        "If you do not know which one to choose or if multiple apply, make a best guess. "
        "Nobody will complain if it does not quite fit :-)\n",
    )
    sys.exit(1)
else:
    sys.stdout.write(f"PR title '{PR_TITLE}' appears to be valid.\n")


================================================
FILE: .github/scripts/check_urls.py
================================================
"""Check that all test data URLs are still accessible."""  # noqa: INP001
import ast
import sys
from collections.abc import Iterator
from operator import itemgetter
from pathlib import Path

from tests import _get_data_from_url, read_yaml_to_list_of_dicts

URL_PREFIXES_TO_IGNORE = (
    "http://ns.adobe.com/tiff/1.0/",
    "http://www.example.com",
    "https://example.com",
    "https://martin-thoma.com",
    "https://pypdf.readthedocs.io/",
    "https://www.example.com",
)

PDF_URLS_WHICH_DO_NOT_LOOK_LIKE_PDFS = {
    "https://github.com/user-attachments/files/18381726/tika-957721.pdf",
}


def get_urls_from_test_files() -> Iterator[str]:
    """Retrieve all URLs defined the test files."""
    tests_directory = Path(__file__).parent.parent.parent / "tests"
    for test_file in sorted(tests_directory.rglob("test_*.py")):
        tree = ast.parse(source=test_file.read_text(encoding="utf-8"), filename=str(test_file))
        for node in ast.walk(tree):
            if not isinstance(node, ast.Constant):
                continue
            if not isinstance(node.value, str):
                continue
            if not node.value.startswith(("http://", "https://")):
                continue
            yield node.value


def get_urls_from_example_files() -> Iterator[str]:
    """Retrieve all URLs defined in the `example_files.yaml`."""
    pdfs = read_yaml_to_list_of_dicts(Path(__file__).parent.parent.parent / "tests" / "example_files.yaml")
    yield from map(itemgetter("url"), pdfs)


def check_url(url: str) -> bool:
    """Check if the given URL appears to still be valid."""
    if url.startswith(URL_PREFIXES_TO_IGNORE):
        return True

    try:
        data = _get_data_from_url(url)
    except Exception as exception:
        sys.stderr.write(f"Error getting data from {url}: {exception}\n")
        return False

    if len(data) < 75:
        sys.stderr.write(f"Not enough data from {url}: {data}\n")
        return False

    if (
            url.lower().endswith(".pdf") and
            url not in PDF_URLS_WHICH_DO_NOT_LOOK_LIKE_PDFS and
            not data.startswith(b"%PDF-")
    ):
        sys.stderr.write(f"The file at {url} does not look like a PDF: {data[:50]}\n")
        return False

    sys.stdout.write(f"URL {url} looks good.\n")
    return True


def main() -> bool:
    """Check if there are invalid URLs."""
    urls: set[str] = set()
    for url in get_urls_from_test_files():
        urls.add(url)
    for url in get_urls_from_example_files():
        urls.add(url)

    is_valid = True
    for url in sorted(urls):
        is_valid &= check_url(url)
    return not is_valid


if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: .github/workflows/benchmark.yaml
================================================
name: Benchmarking pypdf
on:
  push:
    branches:
      - main

permissions:
  contents: write
  deployments: write

jobs:
  benchmark:
    name: "Benchmark ${{ matrix.name }}"
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.x']
        include:
          - python-version: '3.x'
            name: 'CPython'
          - python-version: 'pypy3.11'
            name: 'PyPy 3.11'
    steps:
    - name: Checkout Code
      uses: actions/checkout@v6
      with:
        submodules: 'recursive'
    - name: Setup Python
      uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install requirements
      run: |
        pip install -r requirements/ci-3.11.txt
    - name: Install pypdf
      run: |
        pip install .
    - name: Run benchmark
      run: |
        pytest tests/bench.py --benchmark-json output.json
    - name: Store benchmark result
      uses: benchmark-action/github-action-benchmark@v1
      with:
        name: "${{ matrix.name }} Benchmark"
        tool: 'pytest'
        output-file-path: output.json
        # Use personal access token instead of GITHUB_TOKEN due to https://github.community/t/github-action-not-triggering-gh-pages-upon-push/16096
        github-token: ${{ secrets.GITHUB_TOKEN }}
        auto-push: true
        # Show alert with commit comment on detecting possible performance regression
        alert-threshold: '200%'
        comment-on-alert: true
        fail-on-alert: true


================================================
FILE: .github/workflows/create-github-release.yaml
================================================
name: Create a GitHub release page

on:
  push:
    tags:
      - '*.*.*'
  workflow_dispatch:

permissions:
  contents: write

jobs:
  build_and_publish:
    name: Create a GitHub release page
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v6
      - name: Prepare variables
        id: prepare_variables
        run: |
          git fetch --tags --force
          latest_tag=$(git describe --tags --abbrev=0)
          echo "latest_tag=${latest_tag}" >> "$GITHUB_ENV"
          echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV"
          EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
          echo "tag_body<<$EOF" >> "$GITHUB_ENV"
          git --no-pager tag -l "${latest_tag}" --format='%(contents:body)' >> "$GITHUB_ENV"
          echo "$EOF" >> "$GITHUB_ENV"
      - name: Create GitHub Release 🚀
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ env.latest_tag }}
          name: Version ${{ env.latest_tag }}, ${{ env.date }}
          draft: false
          prerelease: false
          body: ${{ env.tag_body }}


================================================
FILE: .github/workflows/gh-pages-check.yaml
================================================
name: 'GitHub Pages Check'
on:
  workflow_dispatch:
  schedule:
    - cron: 0 6 * * 1

jobs:
  url-check:
    name: GitHub Pages check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout GitHub Pages
        uses: actions/checkout@v6
        with:
          ref: 'gh-pages'
          path: 'gh-pages'
      - name: Checkout main (tools)
        uses: actions/checkout@v6
        with:
          ref: main
          path: main
      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.x'
      - name: Check GitHub Pages
        run: |
          export PYTHONPATH="$GITHUB_WORKSPACE"
          python main/.github/scripts/check_gh_pages_updates.py


================================================
FILE: .github/workflows/github-ci.yaml
================================================
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/tutorials/build-and-test-code/python

name: CI

on:
  push:
    branches:
      - main
    paths-ignore:
      - '**/*.md'
      - '**/*.rst'
  pull_request:
    branches:
      - main
    paths-ignore:
      - '**/*.md'
      - '**/*.rst'
  workflow_dispatch:

jobs:
  test_windows:
    name: pytest on windows
    runs-on: windows-latest
    steps:
    - name: Checkout Code
      uses: actions/checkout@v6
      with:
        submodules: 'recursive'
    - name: Cache Downloaded Files
      id: cache-downloaded-files-windows
      uses: actions/cache@v5
      if: github.ref == 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-${{ github.run_id }}
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
        enableCrossOsArchive: true
    - name: Restore Downloaded Files
      uses: actions/cache/restore@v5
      if: github.ref != 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
        enableCrossOsArchive: true
    - name: Setup Python
      uses: actions/setup-python@v6
      with:
        python-version: '3.x'
        allow-prereleases: true
    - name: Upgrade pip
      run: |
        python -m pip install --upgrade pip
    - name: Install requirements (Python 3.11+)
      run: |
        pip install -r requirements/ci-3.11.txt
    - name: Install cryptography
      run: |
        pip install cryptography
    - name: Install pypdf
      run: |
        pip install .
    - name: Prepare
      run: |
        python -c "from tests import download_test_pdfs; download_test_pdfs()"
    - name: Test with pytest
      run: |
        python -m pytest tests --cov=pypdf --cov-append -n auto -vv -p no:benchmark

  test_macos:
    name: pytest on macOS
    runs-on: macos-latest
    steps:
    - name: Checkout Code
      uses: actions/checkout@v6
      with:
        submodules: 'recursive'
    - name: Cache Downloaded Files
      id: cache-downloaded-files-mac
      uses: actions/cache@v5
      if: github.ref == 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-${{ github.run_id }}
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
    - name: Restore Downloaded Files
      uses: actions/cache/restore@v5
      if: github.ref != 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
    - name: Setup Python (3.11+)
      uses: actions/setup-python@v6
      with:
        python-version: '3.x'
        allow-prereleases: true
    - name: Upgrade pip
      run: |
        python -m pip install --upgrade pip
    - name: Install requirements (Python 3.11+)
      run: |
        pip install -r requirements/ci-3.11.txt
    - name: Install cryptography
      run: |
        pip install cryptography
    - name: Install OS dependencies
      run:
        brew install ghostscript jbig2dec poppler
    - name: Install pypdf
      run: |
        pip install .
    - name: Prepare
      run: |
        python -c "from tests import download_test_pdfs; download_test_pdfs()"
    - name: Test with pytest
      run: |
        python -m pytest tests --cov=pypdf --cov-append -n auto -vv -p no:benchmark

  tests:
    name: "pytest on ${{ matrix.python-version }} (crypto-lib: ${{ matrix.use-crypto-lib }})"
    runs-on: ubuntu-24.04
    strategy:
      matrix:
        python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14', 'pypy3.11']
        use-crypto-lib: ['cryptography']
        include:
          - python-version: '3.9'
            use-crypto-lib: 'pycryptodome'
          - python-version: '3.9'
            use-crypto-lib: 'none'
    steps:
    - name: Update APT packages
      run:
        sudo apt-get update
    - name: Install APT dependencies
      run:
        sudo apt-get install ghostscript jbig2dec poppler-utils
    - name: Checkout Code
      uses: actions/checkout@v6
      with:
        submodules: 'recursive'
    - name: Cache Downloaded Files
      id: cache-downloaded-files
      uses: actions/cache@v5
      if: github.ref == 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-${{ github.run_id }}
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
    - name: Restore Downloaded Files
      uses: actions/cache/restore@v5
      if: github.ref != 'refs/heads/main'
      with:
        path: '**/tests/pdf_cache/*'
        key: cache-downloaded-files-main-
        restore-keys: |
          cache-downloaded-files-main-
          cache-downloaded-files
    - name: Setup Python
      uses: actions/setup-python@v6
      if: matrix.python-version == '3.9' || matrix.python-version == '3.10'
      with:
        python-version: ${{ matrix.python-version }}
        cache: 'pip'
        cache-dependency-path: '**/requirements/ci.txt'
    - name: Setup Python (3.11+)
      uses: actions/setup-python@v6
      if: matrix.python-version != '3.9' && matrix.python-version != '3.10'
      with:
        python-version: ${{ matrix.python-version }}
        allow-prereleases: true
        cache: 'pip'
        cache-dependency-path: '**/requirements/ci-3.11.txt'
    - name: Upgrade pip
      run: |
        python -m pip install --upgrade pip
    - name: Install requirements (Python 3)
      run: |
        pip install -r requirements/ci.txt
      if: matrix.python-version == '3.9' || matrix.python-version == '3.10'
    - name: Install requirements (Python 3.11+)
      run: |
        pip install -r requirements/ci-3.11.txt
      if: matrix.python-version != '3.9' && matrix.python-version != '3.10'
    - name: Remove pycryptodome and cryptography
      run: |
        pip uninstall pycryptodome cryptography -y
    - name: Install cryptography
      run: |
        pip install cryptography
      if: matrix.use-crypto-lib == 'cryptography'
    - name: Install pycryptodome
      run: |
        pip install pycryptodome
      if: matrix.use-crypto-lib == 'pycryptodome'
    - name: Install pypdf
      run: |
        pip install .
    - name: Download test files
      run: |
        python -c "from tests import download_test_pdfs; download_test_pdfs()"
    - name: Test with pytest
      run: |
        python -m pytest tests --cov=pypdf --cov-append -n auto -vv -p no:benchmark
      if: ${{ !startsWith(matrix.python-version, 'pypy') }}
    - name: Test with pytest (PyPy, no coverage)
      # Coverage on PyPy is skipped because running coverage with PyPy is slow and CPython test already provides
      # complete coverage data for the same code
      run: |
        python -m pytest tests -n auto -vv -p no:benchmark -o faulthandler_timeout=60 --dist=loadfile
      if: ${{ startsWith(matrix.python-version, 'pypy') }}
    - name: Rename coverage data file
      run: mv .coverage ".coverage.$RANDOM"
      if: ${{ !startsWith(matrix.python-version, 'pypy') }}
    - name: Upload coverage data
      uses: actions/upload-artifact@v7
      if: ${{ !startsWith(matrix.python-version, 'pypy') }}
      with:
        name: coverage-data.${{ matrix.python-version }}-${{ matrix.use-crypto-lib }}
        path: .coverage.*
        if-no-files-found: ignore
        include-hidden-files: true

  codestyle:
    name: Check code style issues
    runs-on: ubuntu-24.04
    steps:
    - name: Checkout Code
      uses: actions/checkout@v6
      with:
        submodules: 'recursive'
    - name: Setup Python
      uses: actions/setup-python@v6
      with:
        python-version: '3.x'
        cache: 'pip'
        cache-dependency-path: '**/requirements/ci-3.11.txt'
    - name: Upgrade pip
      run: |
        python -m pip install --upgrade pip
    - name: Install requirements
      run: |
        pip install -r requirements/ci-3.11.txt
    - name: Install pypdf
      run: |
        pip install .
    - name: Test with ruff
      run: |
        echo `ruff --version`
        ruff check .
    - name: Test with mypy
      run : |
        mypy pypdf
    - name: Install docs requirements
      run: |
        pip install -r requirements/docs.txt
    - name: Test docs build
      working-directory: ./docs
      run: |
        sphinx-build --nitpicky --fail-on-warning --keep-going --show-traceback -d _build/doctrees --builder html . _build/html
    - name: Test docs examples
      working-directory: ./docs
      run: |
        sphinx-build -d _build/doctrees --builder doctest . _build/doctest
    - name: Check with pre-commit
      run: |
        pip install -r requirements/dev.txt
        pre-commit run --all-files --show-diff-on-failure

  package:
    name: Build & verify package
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.x'

      - run: python -m pip install flit check-wheel-contents
      - run: flit build
      - run: ls -l dist

      - name: Test CHANGELOG.md present in sdist
        run: tar -tzf dist/*.tar.gz | grep -q 'CHANGELOG.md'

      - name: Test of bdist
        run: check-wheel-contents dist/*.whl

      - name: Test installing package
        run: python -m pip install .

      - name: Test running installed package
        working-directory: /tmp
        run: python -c "import pypdf;print(pypdf.__version__)"

  coverage:
    name: Combine & check coverage.
    runs-on: ubuntu-latest
    needs: tests

    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v6
        with:
          python-version: '3.x'

      - run: python -m pip install --upgrade coverage[toml]

      - uses: actions/download-artifact@v8
        with:
          pattern: coverage-data*
          merge-multiple: true

      - name: Check Number of Downloaded Files
        run: |
          downloaded_files_count=$(find \.coverage* -type f | wc -l)
          if [ $downloaded_files_count -eq 8 ]; then
            echo "The expected number of files (8) were downloaded."
          else
            echo "ERROR: Expected 8 files, but found $downloaded_files_count files."
            exit 1
          fi

      - name: Combine coverage & create xml report
        run: |
          python -m coverage combine
          python -m coverage xml
      - name: Upload Coverage to Codecov
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}
          files: ./coverage.xml


================================================
FILE: .github/workflows/publish-to-pypi.yaml
================================================
name: Publish Python Package to PyPI

on:
  push:
    tags:
      - '*.*.*'
  workflow_dispatch:

jobs:
  build:
    name: Build distribution
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v6
    - name: Set up Python
      uses: actions/setup-python@v6
      with:
        python-version: '3.x'
    - name: Install pypa/build
      run: >-
        python3 -m
        pip install
        build
        --user
    - name: Build a binary wheel and a source tarball
      run: python3 -m build
    - name: Store the distribution packages
      uses: actions/upload-artifact@v7
      with:
        name: python-package-distributions
        path: dist/

  publish-to-pypi:
    name: Publish Python distribution to PyPI
    needs:
    - build
    runs-on: ubuntu-latest
    environment:
      name: pypi
      url: https://pypi.org/p/pypdf
    permissions:
      id-token: write  # IMPORTANT: mandatory for trusted publishing

    steps:
    - name: Download all the dists
      uses: actions/download-artifact@v8
      with:
        name: python-package-distributions
        path: dist/
    - name: Publish distribution to PyPI
      uses: pypa/gh-action-pypi-publish@release/v1


================================================
FILE: .github/workflows/release.yaml
================================================
# This action assumes that there is a REL-commit which already has a
# Markdown-formatted git tag. Hence, the CHANGELOG is already adjusted,
# and it's decided what should be in the release.
# This action only ensures the release is done with the proper contents
# and that it's announced with a GitHub release.
name: Create git tag
# Disable for now and uses dummy `workflow_dispatch` trigger we usually do not use anyway.
# To activate this again, we have to fix https://github.com/py-pdf/pypdf/issues/2753
on:
    workflow_dispatch:
#   push:
#     branches:
#       - main

permissions:
  contents: write

env:
  HEAD_COMMIT_MESSAGE: ${{ github.event.head_commit.message }}

jobs:
  build_and_publish:
    name: Publish a new version
    runs-on: ubuntu-latest
    if: "${{ startsWith(github.event.head_commit.message, 'REL: ') }}"
    steps:
      - name: Checkout Repository
        uses: actions/checkout@v6

      - name: Extract version from commit message
        id: extract_version
        run: |
          VERSION=$(echo "$HEAD_COMMIT_MESSAGE" | grep -oP '(?<=REL: )\d+\.\d+\.\d+')
          echo "version=$VERSION" >> $GITHUB_OUTPUT

      - name: Extract tag message from commit message
        id: extract_message
        run: |
          VERSION="${{ steps.extract_version.outputs.version }}"
          delimiter="$(openssl rand -hex 8)"
          MESSAGE=$(echo "$HEAD_COMMIT_MESSAGE" | sed "0,/REL: $VERSION/s///" )
          echo "message<<${delimiter}" >> $GITHUB_OUTPUT
          echo "$MESSAGE" >> $GITHUB_OUTPUT
          echo "${delimiter}" >> $GITHUB_OUTPUT

      - name: Create Git Tag
        run: |
          VERSION="${{ steps.extract_version.outputs.version }}"
          MESSAGE="${{ steps.extract_message.outputs.message }}"
          git config user.name github-actions
          git config user.email github-actions@github.com
          git tag "$VERSION" -m "$MESSAGE"
          git push origin $VERSION


================================================
FILE: .github/workflows/title-check.yaml
================================================
name: 'PR Title Check'
on:
  pull_request:
    # check when PR
    # * is created,
    # * title is edited, and
    # * new commits are added (to ensure failing title blocks merging)
    types: [opened, reopened, edited, synchronize]

jobs:
  title-check:
    name: Title check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Code
        uses: actions/checkout@v6
      - name: Check PR title
        env:
          PR_TITLE: ${{ github.event.pull_request.title }}
        run: python .github/scripts/check_pr_title.py


================================================
FILE: .github/workflows/urls-check.yaml
================================================
name: 'URL Check'
on:
  workflow_dispatch:
  schedule:
    - cron: 0 6 * * 1

jobs:
  url-check:
    name: URL check
    runs-on: ubuntu-latest
    steps:
      - name: Checkout Code
        uses: actions/checkout@v6
      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: '3.x'
      - name: Install requirements
        run:
          pip install pyyaml Pillow
      - name: Check URLs
        run: |
          export PYTHONPATH="$GITHUB_WORKSPACE"
          python .github/scripts/check_urls.py


================================================
FILE: .gitignore
================================================
*.pyc
*.swp
.DS_Store
.tox
build
.idea/*
*.egg-info/
dist/*
__pycache__/

# in-project virtual environments
venv/
.venv/

# Code coverage artifacts
.coverage*
coverage.xml

# Editors / IDEs
.vscode/

# Docs
docs/_build/

.cspell/

# Files generated by some of the scripts
dont_commit_*.pdf
pypdf-output.pdf
annotated-pdf-link.pdf
Image9.png
pypdf_pdfLocation.txt

.python-version
tests/pdf_cache/
docs/meta/CHANGELOG.md
docs/meta/CONTRIBUTORS.md
extracted-images/

RELEASE_COMMIT_MSG.md
RELEASE_TAG_MSG.md
.envrc


================================================
FILE: .gitmodules
================================================
[submodule "sample-files"]
	path = sample-files
	url = https://github.com/py-pdf/sample-files


================================================
FILE: .pre-commit-config.yaml
================================================
# pre-commit run --all-files
repos:
-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v6.0.0
    hooks:
    -   id: check-ast
    -   id: check-case-conflict
    -   id: check-docstring-first
    -   id: check-yaml
    -   id: debug-statements
    -   id: end-of-file-fixer
        exclude: "resources/.*|docs/make.bat"
    -   id: fix-byte-order-marker
    -   id: trailing-whitespace
    -   id: mixed-line-ending
        args: ['--fix=lf']
        exclude: "docs/make.bat"
    -   id: check-added-large-files
        args: ['--maxkb=1000']

-   repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.15.0
    hooks:
    -   id: ruff-check
        args: ['--fix']

-   repo: https://github.com/asottile/pyupgrade
    rev: v3.21.2
    hooks:
    -   id: pyupgrade
        args: [--py39-plus]

-   repo: https://github.com/pre-commit/mirrors-mypy
    rev: v1.17.1
    hooks:
      - id: mypy
        files: ^pypdf/.*


================================================
FILE: .readthedocs.yaml
================================================
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2


build:
  os: ubuntu-lts-latest
  tools:
    python: "latest"

# Build documentation in the "docs/" directory with Sphinx
sphinx:
   configuration: docs/conf.py

# If using Sphinx, optionally build your docs in additional formats such as PDF
formats: all

# Optionally declare the Python requirements required to build your docs
python:
  install:
    - requirements: requirements/docs.txt
    - method: pip
      path: .
      extra_requirements:
        - full


================================================
FILE: CHANGELOG.md
================================================
# CHANGELOG

## Version 6.9.1, 2026-03-17

### Security (SEC)
- Improve performance and limit length of array-based content streams (#3686)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.9.0...6.9.1)

## Version 6.9.0, 2026-03-15

### New Features (ENH)
- Expose /Perms verification result on Encryption object (#3672)

### Performance Improvements (PI)
- Fix O(n²) performance in NameObject read/write (#3679)
- Batch-parse all objects in ObjStm on first access (#3677)

### Bug Fixes (BUG)
- Avoid sharing array-based content streams between pages (#3681)
- Avoid accessing invalid page when inserting blank page under some conditions (#3529)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.8.0...6.9.0)

## Version 6.8.0, 2026-03-09

### Security (SEC)
- Limit allowed `/Length` value of stream  (#3675)

### New Features (ENH)
- Add /IRT (in-reply-to) support for markup annotations (#3631)

### Documentation (DOC)
- Avoid using `PageObject.replace_contents` on PdfReader (#3669)
- Document how to disable jbig2dec calls

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.5...6.8.0)

## Version 6.7.5, 2026-03-02

### Security (SEC)
- Improve the performance of the ASCIIHexDecode filter (#3666)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.4...6.7.5)

## Version 6.7.4, 2026-02-27

### Security (SEC)
- Allow limiting output length for RunLengthDecode filter (#3664)

### Robustness (ROB)
- Deal with invalid annotations in extract_links (#3659)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.3...6.7.4)

## Version 6.7.3, 2026-02-24

### Security (SEC)
- Use zlib decompression limit when retrieving XFA data (#3658)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.2...6.7.3)

## Version 6.7.2, 2026-02-22

### Security (SEC)
- Prevent infinite loop from circular xref /Prev references (#3655)

### Bug Fixes (BUG)
- Fix wrong LUT size error (#3651)
- Fix handling of page boxes defined on `/Pages` (#3650)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.1...6.7.2)

## Version 6.7.1, 2026-02-17

### Security (SEC)
- Detect cyclic references when accessing TreeObject.children (#3645)
- Limit size of `/ToUnicode` entries (#3646)
- Limit FlateDecode recovery attempts (#3644)

### Bug Fixes (BUG)
- Avoid own object replacement logic in `PageObject.replace_contents` (#3638)
- Fix UnboundLocalError when update_page_form_field_values with /Sig (#3634)

### Robustness (ROB)
- Avoid divison by zero when decoding FlateDecode PNG prediction (#3641)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.7.0...6.7.1)

## Version 6.7.0, 2026-02-08

### Deprecations (DEP)
- Deprecate support for abbreviations in decode_stream_data (#3617)

### New Features (ENH)
- Add ability to add font resources for 14 Adobe Core fonts in text widget annotations (#3624)

### Bug Fixes (BUG)
- Avoid invalid load for ICCBased FlateDecode images in mode 1 (#3619)

### Robustness (ROB)
- Fix AESV2 decryption when /Length missing in encrypt dict (#3629)
- Fix merging when annotations point to NullObject (#3613)
- Check for `self._info` being None in `compress_identical_objects` (#3612)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.6.2...6.7.0)

## Version 6.6.2, 2026-01-26

### Security (SEC)
- Detect cyclic references when retrieving outlines (#3610)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.6.1...6.6.2)

## Version 6.6.1, 2026-01-25

### Robustness (ROB)
- `/AcroForm` might be NullObject (#3601)
- Handle missing font bounding boxes gracefully (#3600)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.6.0...6.6.1)

## Version 6.6.0, 2026-01-09

### Security (SEC)
- Improve handling of partially broken PDF files (#3594)

### Deprecations (DEP)
- Block common page content modifications when assigned to reader (#3582)

### New Features (ENH)
- Embellishments to generated text appearance streams (#3571)

### Bug Fixes (BUG)
- Do not consider multi-byte BOM-like sequences as BOMs (#3589)

### Robustness (ROB)
- Avoid empty FlateDecode outputs without warning (#3579)

### Documentation (DOC)
- Add outlines documentation and link it in User Guide (#3511)

### Developer Experience (DEV)
- Add PyPy 3.11 to test matrix and benchmarks (#3574)

### Maintenance (MAINT)
- Fix compatibility with Pillow >= 12.1.0 (#3590)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.5.0...6.6.0)

## Version 6.5.0, 2025-12-21

### New Features (ENH)
- Limit jbig2dec memory usage (#3576)
- FontDescriptor: Initiate from embedded font resource (#3551)

### Robustness (ROB)
- Allow fallback to PBM files for jbig2dec without PNG support (#3567)
- Use warning instead of error for early EOD for RunLengthDecode (#3548)

### Developer Experience (DEV)
- Test with macOS as well (#3401)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.4.2...6.5.0)

## Version 6.4.2, 2025-12-14

### Bug Fixes (BUG)
- Fix KeyError when flattening form field without /Font in resources (#3554)

### Robustness (ROB)
- Allow deleting non-existent annotations (#3559)

### Documentation (DOC)
- Fix level of attachment heading (#3560)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.4.1...6.4.2)

## Version 6.4.1, 2025-12-07

### Performance Improvements (PI)
- Optimize loop for layout mode text extraction (#3543)

### Bug Fixes (BUG)
- Do not fail on choice field without /Opt key (#3540)

### Documentation (DOC)
- Document possible issues with merge_page and clipping (#3546)
- Add some notes about library security (#3545)

### Maintenance (MAINT)
- Use CORE_FONT_METRICS for widths where possible (#3526)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.4.0...6.4.1)

## Version 6.4.0, 2025-11-23

### Security (SEC)
- Reduce default limit for LZW decoding

### New Features (ENH)
- Parse and format comb fields in text widget annotations (#3519)

### Robustness (ROB)
- Silently ignore Adobe Ascii85 whitespace for suffix detection (#3528)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.3.0...6.4.0)

## Version 6.3.0, 2025-11-16

### New Features (ENH)
- Wrap and align text in flattened PDF forms (#3465)

### Bug Fixes (BUG)
- Fix missing "PreventGC" when cloning (#3520)
- Preserve JPEG image quality by default (#3516)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.2.0...6.3.0)

## Version 6.2.0, 2025-11-09

### New Features (ENH)
- Add 'strict' parameter to PDFWriter (#3503)

### Bug Fixes (BUG)
- PdfWriter.append fails when there are articles being None (#3509)

### Documentation (DOC)
- Execute docs examples in CI (#3507)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.1.3...6.2.0)

## Version 6.1.3, 2025-10-22

### Security (SEC)
- Allow limiting size of LZWDecode streams (#3502)
- Avoid infinite loop when reading broken DCT-based inline images (#3501)

### Bug Fixes (BUG)
- PageObject.scale() scales media box incorrectly (#3489)

### Robustness (ROB)
- Fail with explicit exception when image mode is an empty array (#3500)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.1.2...6.1.3)

## Version 6.1.2, 2025-10-19

### Bug Fixes (BUG)
- Fix handling of zero-length StreamObject (#3485)

### Robustness (ROB)
- Deal with wrong size for incremental PDF files (#3495)
- Improve handling for malformed cross-reference tables (#3483)

### Developer Experience (DEV)
- Use released Python 3.14
- Use Mapping instead of dict in type hint of update_page_form_field_values (#3490)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.1.1...6.1.2)

## Version 6.1.1, 2025-09-28

### Bug Fixes (BUG)
- Insert new embedded files in a sorted manner (#3477)
- Fix name tree handling for embedded files with Kids-based inputs (#3475)
- Make embedding files not break PDF/A-3 compliance (#3472)

### Documentation (DOC)
- Document AFRelationship handling for PDF/A and provide constants (#3478)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.1.0...6.1.1)

## Version 6.1.0, 2025-09-21

### New Features (ENH)
- Enhance XMP metadata handling with creation and setter methods (#3410)
- Add all font metrics for base 14 Type 1 PDF fonts (#3363)
- Allow deleting embedded files (#3461)
- Add support for Python in FIPS mode for document identifier (#3438)

### Bug Fixes (BUG)
- Fix handling of UTF-16 encoded destination titles (#3463)
- Guard empty input to prevent IndexError (#3448)

### Developer Experience (DEV)
- Fix type hint for XMP metadata setter to add bytes type (#3464)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/6.0.0...6.1.0)

## Version 6.0.0, 2025-08-11

### Security (SEC)
- Limit decompressed size for FlateDecode filter (#3430)

### Deprecations (DEP)
- Drop Python 3.8 support (#3412)

### New Features (ENH)
- Move BlackIs1 functionality to tiff_header (#3421)

### Robustness (ROB)
- Skip Go-To actions without a destination (#3420)

### Developer Experience (DEV)
- Update code style related libraries (#3414)
- Update mypy to 1.17.0 (#3413)
- Stop testing on Python 3.8 and start testing on Python 3.14 (#3411)

### Maintenance (MAINT)
- Cleanup deprecations (#3424)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.9.0...6.0.0)

## Version 5.9.0, 2025-07-27

### New Features (ENH)
- Automatically preserve links in added pages (#3298)
- Allow writing/updating all properties of an embedded file (#3374)

### Bug Fixes (BUG)
- Fix XMP handling dropping indirect references (#3392)

### Robustness (ROB)
- Deal with DecodeParms being empty list (#3388)

### Documentation (DOC)
- Document how to read and modify XMP metadata (#3383)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.8.0...5.9.0)

## Version 5.8.0, 2025-07-13

### New Features (ENH)
- Implement flattening for writer (#3312)

### Bug Fixes (BUG)
- Unterminated object when using PdfWriter with incremental=True (#3345)

### Robustness (ROB)
- Resolve some image extraction edge cases (#3371)
- Ignore faulty trailing newline during RLE decoding (#3355)
- Gracefully handle odd-length strings in parse_bfchar (#3348)

### Developer Experience (DEV)
- Modernize license specifiers (#3338)

### Maintenance (MAINT)
- Reduce max-complexity of tool.ruff.lint.mccabe (#3365)
- Refactor text extraction code

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.7.0...5.8.0)

## Version 5.7.0, 2025-06-29

### Performance Improvements (PI)
- Performance optimization for LZW decoding (#3329)

### Robustness (ROB)
- Flate decoding for streams with faulty tail bytes (#3332)
- dc_creator could be a Bag as well (#3333)
- Handle tree being NullObject when retrieving named destinations (#3331)

### Maintenance (MAINT)
- Move inline-image mappings to constants (#3328)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.6.1...5.7.0)

## Version 5.6.1, 2025-06-22

### New Features (ENH)
- Add PDF/A XMP metadata support (#3314)

### Robustness (ROB)
- Deal with annotations not being lists on merge (#3321)
- Handle NullObject for cmap encoding Differences entry (#3317)

### Developer Experience (DEV)
- Update ruff to 0.12.0 (#3316)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.6.0...5.6.1)

## Version 5.6.0, 2025-06-01

### New Features (ENH)
- Add basic support for JBIG2 by using jbig2dec (#3163)

### Bug Fixes (BUG)
- Fix crashes by removing unnecessary line (#3293)
- Add delimiters to NameObject.renumber_table (#3286)

### Robustness (ROB)
- Handle DecodeParms being a NullObject (#3285)

### Code Style (STY)
- Update to mypy 1.16.0 (#3300)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.5.0...5.6.0)

## Version 5.5.0, 2025-05-11

### New Features (ENH)
- Add support for IndirectObject.__iter__ (#3228)
- Allow filtering by font when removing text (#3216)

### Bug Fixes (BUG)
- Add missing named destinations being ByteStringObjects (#3282)
- Get font information more reliably when removing text (#3252)
- T* 2D Translation consistent with PDF 1.7 Spec (#3250)
- Add font stack to q/Q operations in layout mode (#3225)
- Avoid completely hiding image loading issues like exceeding image size limits (#3221)
- Using compress_identical_objects on transformed content duplicates differing content (#3197)
- Consider BlackIs1 parameter for CCITTFaxDecode filter (#3196)

### Robustness (ROB)
- Deal with insufficient cm matrix during text extraction (#3283)
- Allow merging when annotations miss D entry (#3281)
- Fix merging documents if there are no Dests (#3280)
- Fix crash on malformed action in outline (#3278)
- Fix compression issues for removed images which might be None (#3246)
- Attempt to deal with non-rectangular FlateDecode streams (#3245)
- Handle some None values for broken PDF files (#3230)

### Developer Experience (DEV)
- Multiple style improvements
- Update ruff to 0.11.0

### Maintenance (MAINT)
- Conform ASCIIHexDecode implementation to specification (#3274)
- Modify comments of filters that do not use decode_parms (#3260)

### Code Style (STY)
- Simplify warnings & debugging in layout mode text extraction (#3271)
- Standardize mypy assert statements (#3276)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.4.0...5.5.0)

## Version 5.4.0, 2025-03-16

### New Features (ENH)
- Add support for `IndirectObject.__contains__` (#3155)

### Bug Fixes (BUG)
- Fix detection of inline images followed by names or numbers (#3173)

### Robustness (ROB)
- Consider root objects without catalog type as fallback (#3175)
- Raise proper error on infinite loop when reading objects (#3169)

### Documentation (DOC)
- Mention memory consumption of text extraction (#3168)

### Developer Experience (DEV)
- Upgrade to ruff 0.10.0 (#3191)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.3.1...5.4.0)

## Version 5.3.1, 2025-03-02

### Bug Fixes (BUG)
- Use the correct name StandardEncoding for the predefined cmap (#3156)
- Handle inline images containing `EI ` sequences (#3152)
- Fix check box value which should be name object (#3124)
- Fix stream position on inline image fallback extraction (#3120)
- Fix object count for incremental writer (#3117)

### Robustness (ROB)
- Avoid index errors on empty lines in xref table (#3162)
- Improve handling of LZW decoder table overflow (#3159)
- Ignore non-numbers for width when building font width map (#3158)
- Avoid negative seek values when reading partially broken files (#3157)

### Documentation (DOC)
- Fixed PageObject.images example usage for replacing image (#3149)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.3.0...5.3.1)

## Version 5.3.0, 2025-02-09

### New Features (ENH)
- Handle attachments in /Kids and provide object-oriented API (#3108)

### Bug Fixes (BUG)
- Handle annotations being None on merging (#3111)

### Robustness (ROB)
- Prevent excessive layout mode text output from Type3 fonts (#3082)

### Documentation (DOC)
- stefan6419846 becomes BDFL of pypdf (#3078)
- Tidy the visitor function description (#3086)

### Developer Experience (DEV)
- Remove ignoring multiple Ruff rules
- Remove unused mutmut configuration (#3092)

### Testing (TST)
- Fix warning assertions to use `pytest.warns()` (#3083)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.2.0...5.3.0)

## Version 5.2.0, 2025-01-26

### Deprecations (DEP)
- Deprecate with replacement CCITParameters (#3019)
- Correct deprecation of interiour_color (#2947)

### New Features (ENH)
- Support alternative (U)F names for embedded file retrieval (#3072)
- Adding support for reading .metadata.keywords (#2939)

### Bug Fixes (BUG)
- Handle further Tf operators in text extraction layout mode (#3073)
- Ensure `add_metadata` can deal with `_info = None` (#3040)
- Handle IndirectObject in CCITTFaxDecode filter (#2965)
- Handle chained colorspace for inline images when no filter is set (#3008)
- Avoid extracting inline images twice and dropping other operators (#3002)
- Fixed reference of value with `str.__new__` in TextStringObject (#2952)
- Handle indirect objects in font width calculations (#2967)
- Title sometimes is bytes and not str (#2930)
- Fix undefined variable for text extraction (regression) (#2934)
- Don't close stream passed to PdfWriter.write() (#2909)

### Robustness (ROB)
- Handle zero height fonts when extracting text (#3075)
- Deal with content streams not containing streams (#3005)
- Gracefully handle some text operators when the operands are missing (#3006)
- Fall back to non-Adobe Ascii85 format for missing end markers (#3007)
- Ignore odd-length strings when processing cmap lines (#3009)
- Skip annotation destination being NullObject in PdfWriter (#2964)
- Skip destination page being None in PdfWriter (#2963)
- Fix infinite loop case when reading null objects within an Array
- Fixing infinite loop in ArrayObject read_from_stream (#2928)

### Documentation (DOC)
- Add note about default line colors (#3014)

### Developer Experience (DEV)
- Remove ignoring Ruff rule PGH004 (#3071)
- Tidy ignore array in tool.ruff.lint (#3069)
- Move Windows CI to Python 3.13 (#3003)
- Move to Ubuntu 22.04 (#3004)

### Maintenance (MAINT)
- Fix formatting of warning message and include exception message (#3076)
- Narrow return type for `ContentStream.operations` (#2941)

### Testing (TST)
- Fix image similarity for upcoming Ubuntu 24.04 (#3039)
- Replace broken Apache Tika Corpora urls (#3041)

### Code Style (STY)
- Add form feed to WHITESPACES (#3054)
- Lots of small internal changes

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.1.0...5.2.0)

## Version 5.1.0, 2024-10-27

### New Features (ENH)
- Add `layout_mode_font_height_weight` argument to `PageObject.extract_text()` (#2920)

### Bug Fixes (BUG)
- Fix font specificier for FreeText annotation (#2893)
- Line breaks are not generated due to incorrect calculation of text leading (#2890)
- Improve handling of spaces in text extraction (#2882)

### Robustness (ROB)
- Soft failure for flate encode image mode 1 with wrong LUT size (#2900)

### Documentation (DOC)
- Use latest package versions (#2907)
- Correct example of reading FileAttachment annotation (#2906)

### Developer Experience (DEV)
- Update pinned requirements (#2918)
- Make make_release.py compatible with Windows environment (#2894)

### Maintenance (MAINT)
- Remove references to outdated Python versions (#2919)
- Generalize the method of obtaining space_code (#2891)
- Unnecessary character mapping process (#2888)
- New LZW decoding implementation (#2887)

### Testing (TST)
- Add LzwCodec for encoding (#2883)

### Code Style (STY)
- Capitalize error messages (#2903)
- Modify error messages in PdfWriter (#2902)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.0.1...5.1.0)

## Version 5.0.1, 2024-09-29

### New Features (ENH)
- Add `full` parameter to PdfWriter constructor (#2865)

### Bug Fixes (BUG)
- Update pyproject.toml with minimum Python version of 3.8 (#2859)
- Cope with unbalanced delimiters in dictionary object (#2878)
- Cope with encoding with too many differences (#2873)
- Missing spaces in extract_text() method (#1328) (#2868)
- Tolerate truncated files and no warning when jumping startxref (#2855)

### Robustness (ROB)
- Repair PDF with invalid Root object (#2880)
- Continue parsing dictionary object when error is detected (#2872)
- Merge documents with invalid pages in named destinations (#2857)
- Tolerate comments in arrays (#2856)

### Developer Experience (DEV)
- Use latest Python version for benchmarking (#2879)

### Maintenance (MAINT)
- Add tests to source distributions (#2874)
- Refactor _update_field_annotation (#2862)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/5.0.0...5.0.1)

## Version 5.0.0, 2024-09-15

This version drops support for Python 3.7 (not maintained since July 2023), PdfMerger (use PdfWriter instead) and AnnotationBuilder (use annotations instead).

### Deprecations (DEP)
- Remove the deprecated PdfMerger and AnnotationBuilder classes and other deprecations cleanup (#2813)
- Drop Python 3.7 support (#2793)

### New Features (ENH)
- Add capability to remove /Info from PDF (#2820)
- Add incremental capability to PdfWriter (#2811)
- Add UniGB-UTF16 encodings (#2819)
- Accept utf strings for metadata (#2802)
- Report PdfReadError instead of RecursionError (#2800)
- Compress PDF files merging identical objects (#2795)

### Bug Fixes (BUG)
- Fix sheared image (#2801)

### Robustness (ROB)
- Robustify .set_data() (#2821)
- Raise PdfReadError when missing /Root in trailer (#2808)
- Fix extract_text() issues on damaged PDFs (#2760)
- Handle images with empty data when processing an image from bytes (#2786)

### Developer Experience (DEV)
- Fix coverage uploads (#2832)
- Test against Python 3.13 (#2776)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.3.1...5.0.0)

## Version 4.3.1, 2024-07-21

### Bug Fixes (BUG)
- Cope with Matrix entry in field annotations (#2736)

### Robustness (ROB)
- Cope with fields with upside down box/rectangle (#2729)

### Maintenance (MAINT)
- Add deprecate_with_replacement to StreamObject.initializeFromD… (#2728)
- Deal with cryptography>=43 moving ARC4 (#2765)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.3.0...4.3.1)

## Version 4.3.0, 2024-06-23

### New Features (ENH)
- Accept ETen-B5 and UniCNS-UTF16 encodings (#2721)
- Add decode_as_image() to ContentStreams (#2615)
- context manager for PdfReader (#2666)
- Add capability to set font and size in fields (#2636)
- Allow to pass input file without named argument (#2576)

### Bug Fixes (BUG)
- Fix deprecation for Ressources when using old constants (#2705)
- Fix images issue 4 bits encoding and LUT starting with UTF16_BOM (#2675)
- Reading large compressed images takes huge time to process (#2644)
- Highlighted Text Cannot Be Printed (#2604)
- Fix UnboundLocalError on malformed pdf (#2619)

### Robustness (ROB)
- Cope with missing Standard 14 fonts in fields (#2677)
- Improve inline image extraction (#2622)
- Cope with loops in Fields tree (#2656)
- Discard /I in choice fields for compatibility with Acrobat (#2614)
- Cope with some issues in pillow (#2595)
- Cope with some image extraction issues (#2591)

### Documentation (DOC)
- Various improvements on docstrings and examples

### Maintenance (MAINT)
- Deprecate interiour_color with replacement interior_color (#2706)
- Add deprecate_with_replacement to PdfWriter.find_bookmark (#2674)

### Code Style (STY)
- Change Link to be a non-markup annotation (#2714)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.2.0...4.3.0)

## Version 4.2.0, 2024-04-07

### New Features (ENH)
- Allow multiple charsets for NameObject.read_from_stream (#2585)
- Add support for /Kids in page labels (#2562)
- Allow to update fields on many pages (#2571)
- Tolerate PDF with invalid xref pointed objects (#2335)
- Add Enforce from PDF2.0 in viewer_preferences (#2511)
- Add += and -= operators to ArrayObject (#2510)

### Bug Fixes (BUG)
- Fix merge_page sometimes generating unknown operator 'QQ' (#2588)
- Fix fields update where annotations are kids of field (#2570)
- Process CMYK images without a filter correctly (#2557)
- Extract text in layout mode without finding resources (#2555)
- Prevent recursive loop in some PDF files (#2505)

### Robustness (ROB)
- Tolerate "truncated" xref (#2580)
- Replace error by warning for EOD in RunLengthDecode/ASCIIHexDecode (#2334)
- Rebuild xref table if one entry is invalid (#2528)
- Robustify stream extraction (#2526)

### Documentation (DOC)
- Update release process for latest changes (#2564)
- Encryption/decryption: Clone document instead of copying all pages (#2546)
- Minor improvements (#2542)
- Update annotation list (#2534)
- Update references and formatting (#2529)
- Correct threads reference, plus minor changes (#2521)
- Minor readability increases (#2515)
- Simplify PaperSize examples (#2504)
- Minor improvements (#2501)

### Developer Experience (DEV)
- Remove unused dependencies (#2572)
- Remove page labels PR link from message (#2561)
- Fix changelog generator regarding whitespace and handling of "Other" group (#2492)
- Add REL to known PR prefixes (#2554)
- Release using the REL commit instead of git tag (#2500)
- Unify code between PdfReader and PdfWriter (#2497)
- Bump softprops/action-gh-release from 1 to 2 (#2514)

### Maintenance (MAINT)
- Ressources → Resources (and internal name childs) (#2550)
- Fix typos found by codespell (#2549)
- Update Read the Docs configuration (#2538)
- Add root_object, _info and _ID to PdfReader (#2495)

### Testing (TST)
- Allow loading truncated images if required (#2586)
- Fix download issues from #2562 (#2578)
- Improve test_get_contents_from_nullobject to show real use-case (#2524)
- Add missing test annotations (#2507)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.1.0...4.2.0)

## Version 4.1.0, 2024-03-03

Generating name objects (`NameObject`) without a leading slash
is considered deprecated now. Previously, just a plain warning
would be logged, leading to possibly invalid PDF files. According
to our deprecation policy, this will log a *DeprecationWarning*
for now.

### New Features (ENH)
- Add get_pages_from_field  (#2494)
- Add reattach_fields function (#2480)
- Automatic access to pointed object for IndirectObject (#2464)

### Bug Fixes (BUG)
- Missing error on name without leading / (#2387)
- encode_pdfdocencoding() always returns bytes (#2440)
- BI in text content identified as image tag (#2459)

### Robustness (ROB)
- Missing basefont entry in type 3 font (#2469)

### Documentation (DOC)
- Improve lossless compression example (#2488)
- Amend robustness documentation (#2479)

### Developer Experience (DEV)
- Fix changelog for UTF-8 characters (#2462)

### Maintenance (MAINT)
- Add _get_page_number_from_indirect in writer (#2493)
- Remove user assignment for feature requests (#2483)
- Remove reference to old 2.0.0 branch (#2482)

### Testing (TST)
- Fix benchmark failures (#2481)
- Broken test due to expired test file URL (#2468)
- Resolve file naming conflict in test_iss1767 (#2445)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.0.2...4.1.0)

## Version 4.0.2, 2024-02-18

### Bug Fixes (BUG)
-  Use NumberObject for /Border elements of annotations (#2451)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.0.1...4.0.2)

## Version 4.0.1, 2024-01-28

### Bug Fixes (BUG)
-  layout mode text extraction ZeroDivisionError (#2417)

### Testing (TST)
-  Skip tests using fpdf2 if it's not installed (#2419)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/4.0.0...4.0.1)

## Version 4.0.0, 2024-01-19

### Deprecations (DEP)
-  Drop Python 3.6 support (#2369)
-  Remove deprecated code (#2367)
-  Remove deprecated XMP properties (#2386)

### New Features (ENH)
-  Add "layout" mode for text extraction (#2388)
-  Add Jupyter Notebook integration for PdfReader (#2375)
-  Improve/rewrite PDF permission retrieval (#2400)

### Bug Fixes (BUG)
-  PdfWriter.add_uri was setting the wrong type (#2406)
-  Add support for GBK2K cmaps (#2385)

### Maintenance (MAINT)
-  Return None instead of -1 when page is not attached (#2376)
-  Complete FileSpecificationDictionaryEntries constants (#2416)
-  Replace warning with logging.error (#2377)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.17.4...4.0.0)

## Version 3.17.4, 2023-12-24

### Bug Fixes (BUG)
-  Handle IndirectObject as image filter (#2355)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.17.3...3.17.4)

## Version 3.17.3, 2023-12-17

### Robustness (ROB)
-  Out-of-bounds issue in handle_tj (text extraction) (#2342)

### Developer Experience (DEV)
-  Make make_release.py easier to configure (#2348)

### Maintenance (MAINT)
-  Bump actions/download-artifact from 3 to 4 (#2344)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.17.2...3.17.3)

## Version 3.17.2, 2023-12-10

### Bug Fixes (BUG)
-  Cope with deflated images with CMYK Black Only (#2322)
-  Handle indirect objects as parameters for CCITTFaxDecode (#2307)
-  check words length in _cmap type1_alternative function (#2310)

### Robustness (ROB)
-  Relax flate decoding for too many lookup values (#2331)
-  Let _build_destination skip in case of missing /D key (#2018)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.17.1...3.17.2)

## Version 3.17.1, 2023-11-14

### Bug Fixes (BUG)
-  Mediabox expansion size when applying non-right angle rotation (#2282)

### Robustness (ROB)
-  MissingWidth is IndirectObject (#2288)
-  Initialize states array with an empty value (#2280)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.17.0...3.17.1)

## Version 3.17.0, 2023-10-29

### Security (SEC)
-  Infinite recursion when using PdfWriter(clone_from=reader) (#2264)

### New Features (ENH)
-  Add parameter to select images to be removed (#2214)

### Bug Fixes (BUG)
-  Correctly handle image mode 1 with FlateDecode (#2249)
-  Error when filling a value with parentheses #2268 (#2269)
-  Handle empty root outline (#2239)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.16.4...3.17.0)

## Version 3.16.4, 2023-10-10

### Bug Fixes (BUG)
-  Avoid exceeding recursion depth when retrieving image mode (#2251)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.16.3...3.16.4)

## Version 3.16.3, 2023-10-08

### Bug Fixes (BUG)
-  Invalid cm/tm in visitor functions (#2206)
-  Encrypt / decrypt Stream object dictionaries (#2228)
-  Support nested color spaces for the /DeviceN color space (#2241)
-  Images property fails if NullObject in list (#2215)

### Developer Experience (DEV)
-  Unify mypy options and warn redundant workarounds (#2223)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.16.2...3.16.3)

## Version 3.16.2, 2023-09-24

### Bug Fixes (BUG)
-  PDF size increases because of too high float writing precision (#2213)
-  Fix test_watermarking_reportlab_rendering() (#2203)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.16.1...3.16.2)

## Version 3.16.1, 2023-09-17

⚠️ The 'rename PdfWriter.create_viewer_preference to
PdfWriter.create_viewer_preferences (#2190)' could be a breaking change for you,
if you use it. As it was only introduced last week I'm confident enough that
nobody will be affected though. Hence only the patch update.

### Bug Fixes (BUG)
-  Missing new line in extract_text with cm operations (#2142)
-  _get_fonts not processing properly CIDFonts and annotations (#2194)

### Maintenance (MAINT)
-  Rename PdfWriter.create_viewer_preference to PdfWriter.create_viewer_preferences (#2190)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.16.0...3.16.1)

## Version 3.16.0, 2023-09-10

### Security (SEC)
-  Infinite recursion caused by IndirectObject clone (#2156)

### New Features (ENH)
-  Ease access to ViewerPreferences (#2144)

### Bug Fixes (BUG)
-  Catch the case where w[0] is an IndirectObject instead of an int (#2154)
-  Cope with indirect objects in filters and remove deprecated code (#2177)
-  Accept tabs in cmaps (#2174) / cope with extra space (#2151)
-  Merge pages without resources (#2150)
-  getcontents() shall return None if contents is NullObject (#2161)
-  Fix conversion from 1 to LA (#2175)

### Robustness (ROB)
-  Accept XYZ with no arguments (#2178)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.5...3.16.0)

## Version 3.15.5, 2023-09-03

### Bug Fixes (BUG)
-  Cope with missing /I in articles (#2134)
-  Fix image look-up table in EncodedStreamObject (#2128)
-  remove_images not operating in sub level forms (#2133)

### Robustness (ROB)
-  Cope with damaged PDF (#2129)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.4...3.15.5)

## Version 3.15.4, 2023-08-27

### Performance Improvements (PI)
-  Making pypdf as fast as pdfrw (#2086)

### Maintenance (MAINT)
-  Relax typing_extensions version (#2104)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.3...3.15.4)

## Version 3.15.3, 2023-08-26

### Bug Fixes (BUG)
-  Check version of crypt provider (#2115)
-  TypeError: can't concat str to bytes (#2114)
-  Require flit_core >= 3.9 (#2091)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.2...3.15.3)

## Version 3.15.2, 2023-08-20

### Security (SEC)
-  Avoid endless recursion of reading damaged PDF file (#2093)

### Performance Improvements (PI)
-  Reuse content stream (#2101)

### Maintenance (MAINT)
-  Make ParseError inherit from PyPdfError (#2097)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.1...3.15.2)

## Version 3.15.1, 2023-08-13

### Performance Improvements (PI)
-  optimize _decode_png_prediction (#2068)

### Bug Fixes (BUG)
-  Fix incorrect tm_matrix in call to visitor_text (#2060)
-  Writing German characters into form fields (#2047)
-  Prevent stall when accessing image in corrupted pdf (#2081)
-  append() fails when articles do not have /T (#2080)

### Robustness (ROB)
-  Cope with xref not followed by separator (#2083)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.15.0...3.15.1)

## Version 3.15.0, 2023-08-06

### New Features (ENH)
-  Add `level` parameter to compress_content_streams (#2044)
-  Process /uniHHHH for text_extract (#2043)

### Bug Fixes (BUG)
-  Fix AnnotationBuilder.link (#2066)
-  JPX image without ColorSpace  (#2062)
-  Added check for field /Info when cloning reader document (#2055)
-  Fix indexed/CMYK images (#2039)

### Maintenance (MAINT)
-  Cryptography as primary dependency (#2053)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.14.0...3.15.0)

## Version 3.14.0, 2023-07-29

### New Features (ENH)
-  Accelerate image list keys generation (#2014)
-  Use `cryptography` for encryption/decryption as a fallback for PyCryptodome (#2000)
-  Extract LaTeX characters (#2016)
-  ASCIIHexDecode.decode now returns bytes instead of str (#1994)

### Bug Fixes (BUG)
-  Add RunLengthDecode filter (#2012)
-  Process /Separation ColorSpace (#2007)
-  Handle single element ColorSpace list (#2026)
-  Process lookup decoded as TextStringObjects (#2008)

### Robustness (ROB)
-  Cope with garbage collector during cloning (#1841)

### Maintenance (MAINT)
-  Cleanup of annotations (#1745)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.13.0...3.14.0)

## Version 3.13.0, 2023-07-23

### New Features (ENH)
-  Add is_open in outlines in PdfReader and PdfWriter (#1960)

### Bug Fixes (BUG)
-  Search /DA in hierarchy fields (#2002)
-  Cope with different ISO date length (#1999)
-  Decode Black only/CMYK deviceN images (#1984)
-  Process CMYK in deflate images (#1977)

### Developer Experience (DEV)
-  Add mypy to pre-commit (#2001)
-  Release automation (#1991, #1985)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.12.2...3.13.0)

## Version 3.12.2, 2023-07-16

### Bug Fixes (BUG)
-  Accept calRGB and calGray color_spaces (#1968)
-  Process 2bits and 4bits images (#1967)
-  Check for AcroForm and ensure it is not None (#1965)

### Developer Experience (DEV)
-  Automate the release process (#1970)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.12.1...3.12.2)

## Version 3.12.1, 2023-07-09

### Bug Fixes (BUG)
-  Prevent updating page contents after merging page (stamping/watermarking) (#1952)
-  % to be hex encoded in names (#1958)
-  Inverse color in CMYK images (#1947)
-  Dates conversion not working with Z00\'00\' (#1946)
-  Support UTF-16-LE Strings (#1884)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.12.0...3.12.1)

## Version 3.12.0, 2023-07-02

### New Features (ENH)
-  Add AES support for encrypting PDF files (#1918, #1935, #1936, #1938)
-  Add page deletion feature to PdfWriter (#1843)

### Bug Fixes (BUG)
-  PdfReader.get_fields() attempts to delete non-existing index "/Off" (#1933)
-  Remove unused objects when cloning_from (#1926)
-  Add the TK.SIZE into the trailer (#1911)
-  add_named_destination() maintains named destination list sort order (#1930)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.11.1...3.12.0)

## Version 3.11.1, 2023-06-25

### Bug Fixes (BUG)
- Cascaded filters in image objects (#1913)
- Append pdf with named destination using numbers for pages (#1858)
- Ignore "/B" fields only on pages in PdfWriter.append() (#1875)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.11.0...3.11.1)

## Version 3.11.0, 2023-06-23

### New Features (ENH)
-  Add page_number property (#1856)

### Bug Fixes (BUG)
- File expansion when updating with Page Contents (#1906)
- Missing Alternate in indexed/ICCbased colorspaces (#1896)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.10.0...3.11.0)

## Version 3.10.0, 2023-06-18

### New Features (ENH)
-  Extraction of inline images (#1850)
-  Add capability to replace image (#1849)
-  Extend images interface by returning an ImageFile(File) class (#1848)
-  Add set_data to EncodedStreamObject (#1854)

### Bug Fixes (BUG)
-  Fix RGB FlateEncode Images(PNG) and transparency (#1834)
-  Generate static appearance for fields (#1864)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.9.1...3.10.0)

## Version 3.9.1, 2023-06-04

### Deprecations (DEP)
-  Deprecate PdfMerger (#1866)

### Bug Fixes (BUG)
-  Ignore UTF-8 decode errors (#1865)

### Robustness (ROB)
-  Handle missing /Type entry in Page tree (#1859)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.9.0...3.9.1)

## Version 3.9.0, 2023-05-21

### New Features (ENH)
-  Simplify metadata input (Document Information Dictionary) (#1851)
-  Extend cmap compatibility to GBK_EUC_H/V (#1812)

### Bug Fixes (BUG)
-  Prevent infinite loop when no character follows after a comment (#1828)
-  get_contents does not return ContentStream (#1847)
-  Accept XYZ destination with zoom missing (default to zoom=0.0) (#1844)
-  Cope with 1 Bit images (#1815)

### Robustness (ROB)
-  Handle missing /Type entry in Page tree (#1845)

### Documentation (DOC)
-  Expand file size explanations (#1835)
-  Add comparison with pdfplumber (#1837)
-  Clarify that PyPDF2 is dead (#1827)
-  Add Hunter King as Contributor for #1806

### Maintenance (MAINT)
-  Refactor internal Encryption class (#1821)
-  Add R parameter to generate_values (#1820)
-  Make encryption_key parameter of write_to_stream optional (#1819)
-  Prepare for adding AES encryption support (#1818)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.8.1...3.9.0)


## Version 3.8.1, 2023-04-23

### Bug Fixes (BUG)
-  Convert color space before saving (#1802)

### Documentation (DOC)
-  PDF/A (#1807)
-  Use append instead of add_page
-  Document core mechanics of pypdf (#1783)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.8.0...3.8.1)

## Version 3.8.0, 2023-04-16

### New Features (ENH)
-  Add transform method to Transformation class (#1765)
-  Cope with UC2 fonts in text_extraction (#1785)

### Robustness (ROB)
-  Invalid startxref pointing 1 char before (#1784)

### Maintenance (MAINT)
-  Mark code handling old parameters as deprecated (#1798)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.7.1...3.8.0)


## Version 3.7.1, 2023-04-09

### Security (SEC)
-  Warn about PDF encryption security (#1755)

### Robustness (ROB)
-  Prevent loop in Cloning (#1770)
-  Capture UnicodeDecodeError at PdfReader.pdf_header (#1768)

### Documentation (DOC)
-  Add .readthedocs.yaml and bump docs dependencies using `tox -e deps` (#1750, #1752)

### Developer Experience (DEV)
-  Make make_changelog.py idempotent

### Maintenance (MAINT)
-  Move generation of file identifiers to a method (#1760)

### Testing (TST)
-  Add xmp test (#1775)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.7.0...3.7.1)

## Version 3.7.0, 2023-03-26

### Security (SEC)
-  Use Python's secrets module instead of random module (#1748)

### New Features (ENH)
-  Add AnnotationBuilder.highlight text markup annotation (#1740)
-  Add AnnotationBuilder.popup (#1665)
-  Add AnnotationBuilder.polyline annotation support (#1726)
-  Add clone_from parameter in PdfWriter constructor (#1703)

### Bug Fixes (BUG)
-  'DictionaryObject' object has no attribute 'indirect_reference' (#1729)

### Robustness (ROB)
-  Handle params NullObject in decode_stream_data (#1738)

### Documentation (DOC)
-  Project scope (#1743)

### Maintenance (MAINT)
-  Add AnnotationFlag (#1746)
-  Add LazyDict.__str__ (#1727)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.6.0...3.7.0)


## Version 3.6.0, 2023-03-18

### New Features (ENH)
-  Extend PdfWriter.append() to PageObjects (#1704)
-  Support qualified names in update_page_form_field_values (#1695)

### Robustness (ROB)
-  Tolerate streams without length field (#1717)
-  Accept DictionaryObject in /D of NamedDestination (#1720)
-  Widths def in cmap calls IndirectObject (#1719)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.5.2...3.6.0)


## Version 3.5.2, 2023-03-12

⚠️ We discovered that compress_content_stream has to be applied to a page of
  the PdfWriter. It may not be applied to a page of the PdfReader!

### Bug Fixes (BUG)
-  compress_content_stream not readable in Adobe Acrobat (#1698)
-  Pass logging parameters correctly in set_need_appearances_writer (#1697)
-  Write /Root/AcroForm in set_need_appearances_writer (#1639)

### Robustness (ROB)
-  Allow more whitespaces within linearized file (#1701)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.5.1...3.5.2)


## Version 3.5.1, 2023-03-05

### Robustness (ROB)
-  Some attributes not copied in DictionaryObject._clone (#1635)
-  Allow merging multiple time pages with annots (#1624)

### Testing (TST)
-  Replace pytest.mark.external by enable_socket (#1657)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.5.0...3.5.1)

## Version 3.5.0, 2023-02-26

### New Features (ENH)
-  Add reader.attachments public interface (#1611, #1661)
-  Add PdfWriter.remove_objects_from_page(page: PageObject, to_delete: ObjectDeletionFlag) (#1648)
-  Allow free-text annotation to have transparent border/background (#1664)

### Bug Fixes (BUG)
-  Allow decryption with empty password for AlgV5 (#1663)
-  Let PdfWriter.pages return PageObject after calling `clone_document_from_reader()` (#1613)
-  Invalid font pointed during merge_resources (#1641)

### Robustness (ROB)
-  Cope with invalid objects in IndirectObject.clone (#1637)
-  Improve tolerance to invalid Names/Dests (#1658)
-  Decode encoded values in get_fields (#1636)
-  Let PdfWriter.merge cope with missing "/Fields" (#1628)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.4.1...3.5.0)


## Version 3.4.1, 2023-02-12

### Bug Fixes (BUG)
-  Switch from trimbox to cropbox when merging pages (#1622)
-  Text extraction not working with one glyph to char sequence (#1620)

### Robustness (ROB)
-  Fix 2 cases of "object has no attribute \'indirect_reference\'" (#1616)

### Testing (TST)
-  Add multiple retry on get_url for external PDF downloads (#1626)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.4.0...3.4.1)

## Version 3.4.0, 2023-02-05

NOTICE: pypdf changed the way it represents numbers parsed from PDF files.
  pypdf<3.4.0 represented numbers as Decimal, pypdf>=3.4.0 represents them as
  floats. Several other PDF libraries to this, as well as many PDF viewers.
  We hope to fix issues with too high precision like this and get a speed boost.
  In case your PDF documents rely on more than 18 decimals of precision you
  should check if it still works as expected.
  To clarify: This does not affect the text shown in PDF documents. It affects
  numbers, e.g. when graphics are drawn on the PDF or very exact positions are
  used. Typically, 5 decimals should be enough.

### New Features (ENH)
-  Enable merging forms with overlapping names (#1553)
-  Add 'over' parameter to merge_transformend_page & co (#1567)

### Bug Fixes (BUG)
-  Fix getter of the PageObject.rotation property with an indirect object (#1602)
-  Restore merge_transformed_page & co (#1567)
-  Replace decimal by float (#1563)

### Robustness (ROB)
-  PdfWriter.remove_images: /Contents might not be in page_ref (#1598)

### Developer Experience (DEV)
-  Introduce ruff (#1586, #1609)

### Maintenance (MAINT)
-  Remove decimal (#1608)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.3.0...3.4.0)

## Version 3.3.0, 2023-01-22

### New Features (ENH)
-  Add page label support to PdfWriter (#1558)
-  Accept inline images with space before EI (#1552)
-  Add circle annotation support (#1556)
-  Add polygon annotation support (#1557)
-  Make merging pages produce a deterministic PDF (#1542, #1543)

### Bug Fixes (BUG)
-  Fix error in cmap extraction (#1544)
-  Remove erroneous assertion check (#1564)
-  Fix dictionary access of optional page label keys (#1562)

### Robustness (ROB)
-  Set ignore_eof=True for read_until_regex (#1521)

### Documentation (DOC)
-  Paper size (#1550)

### Developer Experience (DEV)
-  Fix broken combination of dependencies of docs.txt
-  Annotate tests appropriately (#1551)

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.2.1...3.3.0)


## Version 3.2.1, 2023-01-08

### Bug Fixes (BUG)
-  Accept hierarchical fields (#1529)

### Documentation (DOC)
-  Use google style docstrings (#1534)
-  Fix linked markdown documents (#1537)

### Developer Experience (DEV)
-  Update docs config (#1535)

## Version 3.2.0, 2022-12-31

### Performance Improvement (PI)
-  Help the specializing adaptive interpreter (#1522)

### New Features (ENH)
-  Add support for page labels (#1519)

### Bug Fixes (BUG)
-  upgrade clone_document_root (#1520)


[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.1.0...3.1.1)

## Version 3.1.0, 2022-12-23

Move PyPDF2 to pypdf (#1513). This now it's all lowercase, no number in the
name. For installation and for import. PyPDF2 will no longer receive updates.
The community should move back to its roots.

If you were still using pyPdf or PyPDF2 < 2.0.0, I recommend reading the
migration guide: https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html

pypdf==3.1.0 is only different from PyPDF2==3.0.0 in the package name.
Replacing "PyPDF2" by "pypdf" should be enough if you migrate from
`PyPDF2==3.0.0` to `pypdf==3.1.0`.

[Full Changelog](https://github.com/py-pdf/pypdf/compare/3.0.0...3.1.0)

## Version 3.0.0, 2022-12-22

### BREAKING CHANGES ⚠️
-  Deprecate features with PyPDF2==3.0.0 (#1489)
-  Refactor Fit / Zoom parameters (#1437)

### New Features (ENH)
-  Add Cloning  (#1371)
-  Allow int for indirect_reference in PdfWriter.get_object (#1490)

### Documentation (DOC)
-  How to read PDFs from S3 (#1509)
-  Make MyST parse all links as simple hyperlinks (#1506)
-  Changed 'latest' for 'stable' generated docs (#1495)
-  Adjust deprecation procedure (#1487)

### Maintenance (MAINT)
-  Use typing.IO for file streams (#1498)


[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.12.1...3.0.0)

## Version 2.12.1, 2022-12-10

### Documentation (DOC)
-  Deduplicate extract_text docstring (#1485)
-  How to cite PyPDF2 (#1476)

### Maintenance (MAINT)
Consistency changes:
  -  indirect_ref/ido ➔ indirect_reference, dest➔ page_destination (#1467)
  -  owner_pwd/user_pwd ➔ owner_password/user_password (#1483)
  -  position ➜ page_number in Merger.merge (#1482)
  -  indirect_ref ➜ indirect_reference (#1484)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.12.0...2.12.1)


## Version 2.12.0, 2022-12-10

### New Features (ENH)
-  Add support to extract gray scale images (#1460)
-  Add 'threads' property to PdfWriter (#1458)
-  Add 'open_destination' property to PdfWriter (#1431)
-  Make PdfReader.get_object accept integer arguments (#1459)

### Bug Fixes (BUG)
-  Scale PDF annotations (#1479)

### Robustness (ROB)
-  Padding issue with AES encryption (#1469)
-  Accept empty object as null objects (#1477)

### Documentation (DOC)
-  Add module documentation the PaperSize class (#1447)

### Maintenance (MAINT)
-  Use 'page_number' instead of 'pagenum' (#1365)
-  Add List of pages to PageRangeSpec (#1456)

### Testing (TST)
-  Cleanup temporary files (#1454)
-  Mark test_tounicode_is_identity as external (#1449)
-  Use Ubuntu 20.04 for running CI test suite (#1452)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.11.2...2.12.0)


## Version 2.11.2, 2022-11-20

### New Features (ENH)
-  Add remove_from_tree (#1432)
-  Add AnnotationBuilder.rectangle (#1388)

### Bug Fixes (BUG)
-  JavaScript executed twice (#1439)
-  ToUnicode stores /Identity-H instead of stream (#1433)
-  Declare Pillow as optional dependency (#1392)

### Developer Experience (DEV)
-  Link 'Full Changelog' automatically
-  Modify read_string_from_stream to a benchmark (#1415)
-  Improve error reporting of read_object (#1412)
-  Test Python 3.11 (#1404)
-  Extend Flake8 ignore list (#1410)
-  Use correct pytest markers (#1407)
-  Move project configuration to pyproject.toml (#1382)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.11.1...2.11.2)

## Version 2.11.1, 2022-10-09

### Bug Fixes (BUG)
- td matrix (#1373)
- Cope with cmap from #1322 (#1372)

### Robustness (ROB)
-  Cope with str returned from get_data in cmap (#1380)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.11.0...2.11.1)

## Version 2.11.0, 2022-09-25

### New Features (ENH)
-  Addition of optional visitor-functions in extract_text() (#1252)
-  Add metadata.creation_date and modification_date (#1364)
-  Add PageObject.images attribute (#1330)

### Bug Fixes (BUG)
-  Lookup index in _xobj_to_image can be ByteStringObject (#1366)
-  'IndexError: index out of range' when using extract_text (#1361)
-  Errors in transfer_rotation_to_content() (#1356)

### Robustness (ROB)
-  Ensure update_page_form_field_values does not fail if no fields (#1346)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.9...2.11.0)

## Version 2.10.9, 2022-09-18

### New Features (ENH)
-  Add rotation property and transfer_rotate_to_content (#1348)

### Performance Improvements (PI)
-  Avoid string concatenation with large embedded base64-encoded images (#1350)

### Bug Fixes (BUG)
-  Format floats using their intrinsic decimal precision (#1267)

### Robustness (ROB)
-  Fix merge_page for pages without resources (#1349)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.8...2.10.9)

## Version 2.10.8, 2022-09-14

### New Features (ENH)
-  Add PageObject.user_unit property (#1336)

### Robustness (ROB)
-  Improve NameObject reading/writing (#1345)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.7...2.10.8)

## Version 2.10.7, 2022-09-11

### Bug Fixes (BUG)
-  Fix Error in transformations (#1341)
-  Decode #23 in NameObject (#1342)

### Testing (TST)
-  Use pytest.warns() for warnings, and .raises() for exceptions (#1325)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.6...2.10.7)


## Version 2.10.6, 2022-09-09

### Robustness (ROB)
-  Fix infinite loop due to Invalid object (#1331)
-  Fix image extraction issue with superfluous whitespaces (#1327)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.5...2.10.6)

## Version 2.10.5, 2022-09-04

### New Features (ENH)
-  Process XRefStm (#1297)
-  Auto-detect RTL for text extraction (#1309)

### Bug Fixes (BUG)
-  Avoid scaling cropbox twice (#1314)

### Robustness (ROB)
-  Fix offset correction in revised PDF (#1318)
-  Crop data of /U and /O in encryption dictionary to 48 bytes (#1317)
-  MultiLine bfrange in cmap (#1299)
-  Cope with 2 digit codes in bfchar (#1310)
-  Accept '/annn' charset as ASCII code (#1316)
-  Log errors during Float / NumberObject initialization (#1315)
-  Cope with corrupted entries in xref table (#1300)

### Documentation (DOC)
-  Migration guide (PyPDF2 1.x ➔ 2.x) (#1324)
-  Creating a coverage report (#1319)
-  Fix AnnotationBuilder.free_text example (#1311)
-  Fix usage of page.scale by replacing it with page.scale_by (#1313)

### Maintenance (MAINT)
-  PdfReaderProtocol (#1303)
-  Throw PdfReadError if Trailer can't be read (#1298)
-  Remove catching OverflowException (#1302)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.4...2.10.5)


## Version 2.10.4, 2022-08-28

### Robustness (ROB)
-  Fix errors/warnings on no /Resources within extract_text (#1276)
-  Add required line separators in ContentStream ArrayObjects (#1281)

### Maintenance (MAINT)
-  Use NameObject idempotency (#1290)

### Testing (TST)
-  Rectangle deletion (#1289)
-  Add workflow tests (#1287)
-  Remove files after tests ran (#1286)

### Packaging (PKG)
-  Add minimum version for typing_extensions requirement (#1277)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.3...2.10.4)

## Version 2.10.3, 2022-08-21

### Robustness (ROB)
-  Decrypt returns empty bytestring (#1258)

### Developer Experience (DEV)
-  Modify CI to better verify built package contents (#1244)

### Maintenance (MAINT)
-  Remove 'mine' as PdfMerger always creates the stream (#1261)
-  Let PdfMerger._create_stream raise NotImplemented (#1251)
-  password param of _security._alg32(...) is only a string, not bytes (#1259)
-  Remove unreachable code in read_block_backwards (#1250)
   and sign function in _extract_text (#1262)

### Testing (TST)
-  Delete annotations (#1263)
-  Close PdfMerger in tests (#1260)
-  PdfReader.xmp_metadata workflow (#1257)
-  Various PdfWriter (Layout, Bookmark deprecation) (#1249)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.2...2.10.3)

## Version 2.10.2, 2022-08-15

BUG: Add PyPDF2.generic to PyPI distribution

## Version 2.10.1, 2022-08-15

### Bug Fixes (BUG)
-  TreeObject.remove_child had a non-PdfObject assignment for Count (#1233, #1234)
-  Fix stream truncated prematurely (#1223)

### Documentation (DOC)
-  Fix docstring formatting (#1228)

### Maintenance (MAINT)
-  Split generic.py (#1229)

### Testing (TST)
-  Decrypt AlgV4 with owner password (#1239)
-  AlgV5.generate_values (#1238)
-  TreeObject.remove_child / empty_tree (#1235, #1236)
-  create_string_object (#1232)
-  Free-Text annotations (#1231)
-  generic._base (#1230)
-  Strict get fonts (#1226)
-  Increase PdfReader coverage (#1219, #1225)
-  Increase PdfWriter coverage (#1237)
-  100% coverage for utils.py (#1217)
-  PdfWriter exception non-binary stream (#1218)
-  Don't check coverage for deprecated code (#1216)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.10.0...2.10.1)


## Version 2.10.0, 2022-08-07

### New Features (ENH)
-  "with" support for PdfMerger and PdfWriter (#1193)
-  Add AnnotationBuilder.text(...) to build text annotations (#1202)

### Bug Fixes (BUG)
-  Allow IndirectObjects as stream filters (#1211)

### Documentation (DOC)
-  Font scrambling
-  Page vs Content scaling (#1208)
-  Example for orientation parameter of extract_text (#1206)
-  Fix AnnotationBuilder parameter formatting (#1204)

### Developer Experience (DEV)
-  Add flake8-print (#1203)

### Maintenance (MAINT)
-  Introduce WrongPasswordError / FileNotDecryptedError / EmptyFileError  (#1201)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.9.0...2.10.0)

## Version 2.9.0, 2022-07-31

### New Features (ENH)
-  Add ability to add hex encoded colors to outline items (#1186)
-  Add support for pathlib.Path in PdfMerger.merge (#1190)
-  Add link annotation (#1189)
-  Add capability to filter text extraction by orientation (#1175)

### Bug Fixes (BUG)
-  Named Dest in PDF1.1 (#1174)
-  Incomplete Graphic State save/restore (#1172)

### Documentation (DOC)
-  Update changelog url in package metadata (#1180)
-  Mention camelot for table extraction (#1179)
-  Mention pyHanko for signing PDF documents (#1178)
-  Weow have CMAP support since a while (#1177)

### Maintenance (MAINT)
-  Consistent usage of warnings / log messages (#1164)
-  Consistent terminology for outline items (#1156)


[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.8.1...2.9.0)

## Version 2.8.1, 2022-07-25

### Bug Fixes (BUG)
-  u_hash in AlgV4.compute_key (#1170)

### Robustness (ROB)
-  Fix loading of file from #134 (#1167)
-  Cope with empty DecodeParams (#1165)

### Documentation (DOC)
-  Typo in merger deprecation warning message (#1166)

### Maintenance (MAINT)
-  Package updates; solve mypy strict remarks (#1163)

### Testing (TST)
-  Add test from #325 (#1169)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.8.0...2.8.1)


## Version 2.8.0, 2022-07-24

### New Features (ENH)
-  Add writer.add_annotation, page.annotations, and generic.AnnotationBuilder (#1120)

### Bug Fixes (BUG)
-  Set /AS for /Btn form fields in writer (#1161)
-  Ignore if /Perms verify failed (#1157)

### Robustness (ROB)
-  Cope with utf16 character for space calculation (#1155)
-  Cope with null params for FitH / FitV destination (#1152)
-  Handle outlines without valid destination (#1076)

### Developer Experience (DEV)
-  Introduce _utils.logger_warning (#1148)

### Maintenance (MAINT)
-  Break up parse_to_unicode (#1162)
-  Add diagnostic output to exception in read_from_stream (#1159)
-  Reduce PdfReader.read complexity (#1151)

### Testing (TST)
-  Add workflow tests found by arc testing (#1154)
-  Decrypt file which is not encrypted (#1149)
-  Test CryptRC4 encryption class; test image extraction filters (#1147)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.7.0...2.8.0)

## Version 2.7.0, 2022-07-21

### New Features (ENH)
-  Add `outline_count` property (#1129)

### Bug Fixes (BUG)
-  Make reader.get_fields also return dropdowns with options (#1114)
-  Add deprecated EncodedStreamObject functions back until PyPDF2==3.0.0 (#1139)

### Robustness (ROB)
-  Cope with missing /W entry (#1136)
-  Cope with invalid parent xref (#1133)

### Documentation (DOC)
-  Contributors file (#1132)
-  Fix type in signature of PdfWriter.add_uri (#1131)

### Developer Experience (DEV)
-  Add .git-blame-ignore-revs (#1141)

### Code Style (STY)
-  Fixing typos (#1137)
-  Reuse code via get_outlines_property in tests (#1130)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.6.0...2.7.0)

## Version 2.6.0, 2022-07-17

### New Features (ENH)
-  Add color and font_format to PdfReader.outlines[i] (#1104)
-  Extract Text Enhancement (whitespaces) (#1084)

### Bug Fixes (BUG)
-  Use `build_destination` for named destination outlines (#1128)
-  Avoid a crash when a ToUnicode CMap has an empty dstString in beginbfchar (#1118)
-  Prevent deduplication of PageObject (#1105)
-  None-check in DictionaryObject.read_from_stream (#1113)
-  Avoid IndexError in _cmap.parse_to_unicode (#1110)

### Documentation (DOC)
-  Explanation for git submodule
-  Watermark and stamp (#1095)

### Maintenance (MAINT)
-  Text extraction improvements (#1126)
-  Destination.color returns ArrayObject instead of tuple as fallback (#1119)
-  Use add_bookmark_destination in add_bookmark (#1100)
-  Use add_bookmark_destination in add_bookmark_dict (#1099)

### Testing (TST)
-  Add test for arab text (#1127)
-  Add xfail for decryption fail (#1125)
-  Add xfail test for IndexError when extracting text (#1124)
-  Add MCVE showing outline title issue (#1123)

### Code Style (STY)
-  Use IntFlag for permissions_flag / update_page_form_field_values (#1094)
-  Simplify code (#1101)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.5.0...2.6.0)

## Version 2.5.0, 2022-07-10

### New Features (ENH)
-  Add support for indexed color spaces / BitsPerComponent for decoding PNGs (#1067)
-  Add PageObject._get_fonts (#1083)

### Performance Improvements (PI)
-  Use iterative DFS in PdfWriter._sweep_indirect_references (#1072)

### Bug Fixes (BUG)
-  Let Page.scale also scale the crop-/trim-/bleed-/artbox (#1066)
-  Column default for CCITTFaxDecode (#1079)

### Robustness (ROB)
-  Guard against None-value in _get_outlines (#1060)

### Documentation (DOC)
-  Stamps and watermarks (#1082)
-  OCR vs PDF text extraction (#1081)
-  Python Version support
-  Formatting of CHANGELOG

### Developer Experience (DEV)
-  Cache downloaded files (#1070)
-  Speed-up for CI (#1069)

### Maintenance (MAINT)
-  Set page.rotate(angle: int) (#1092)
-  Issue #416 was fixed by #1015 (#1078)

### Testing (TST)
-  Image extraction (#1080)
-  Image extraction (#1077)

### Code Style (STY)
-  Apply black
-  Typo in Changelog

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.4.2...2.5.0)

## Version 2.4.2, 2022-07-05

### New Features (ENH)
-  Add PdfReader.xfa attribute (#1026)

### Bug Fixes (BUG)
-  Wrong page inserted when PdfMerger.merge is done (#1063)
-  Resolve IndirectObject when it refers to a free entry (#1054)

### Developer Experience (DEV)
-  Added {posargs} to tox.ini (#1055)

### Maintenance (MAINT)
-  Remove PyPDF2._utils.bytes_type (#1053)

### Testing (TST)
-  Scale page (indirect rect object) (#1057)
-  Simplify pathlib PdfReader test (#1056)
-  IndexError of VirtualList (#1052)
-  Invalid XML in xmp information (#1051)
-  No pycryptodome (#1050)
-  Increase test coverage (#1045)

### Code Style (STY)
-  DOC of compress_content_streams (#1061)
-  Minimize diff for #879 (#1049)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.4.1...2.4.2)

## Version 2.4.1, 2022-06-30

### New Features (ENH)
-  Add writer.pdf_header property (getter and setter) (#1038)

### Performance Improvements (PI)
-  Remove b_ call in FloatObject.write_to_stream (#1044)
-  Check duplicate objects in writer._sweep_indirect_references (#207)

### Documentation (DOC)
-  How to surppress exceptions/warnings/log messages (#1037)
-  Remove hyphen from lossless (#1041)
-  Compression of content streams (#1040)
-  Fix inconsistent variable names in add-watermark.md (#1039)
-  File size reduction
-  Add CHANGELOG to the rendered docs (#1023)

### Maintenance (MAINT)
-  Handle XML error when reading XmpInformation (#1030)
-  Deduplicate Code / add mutmut config (#1022)

### Code Style (STY)
-  Use unnecessary one-line function / class attribute (#1043)
-  Docstring formatting (#1033)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.4.0...2.4.1)

## Version 2.4.0, 2022-06-26

### New Features (ENH):
-  Support R6 decrypting (#1015)
-  Add PdfReader.pdf_header (#1013)

### Performance Improvements (PI):
-  Remove ord_ calls (#1014)

### Bug Fixes (BUG):
-  Fix missing page for bookmark (#1016)

### Robustness (ROB):
-  Deal with invalid Destinations (#1028)

### Documentation (DOC):
-  get_form_text_fields does not extract dropdown data (#1029)
-  Adjust PdfWriter.add_uri docstring
-  Mention crypto extra_requires for installation (#1017)

### Developer Experience (DEV):
-  Use /n line endings everywhere (#1027)
-  Adjust string formatting to be able to use mutmut (#1020)
-  Update Bug report template

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.3.1...2.4.0)

## Version 2.3.1, 2022-06-19

BUG: Forgot to add the internal `_codecs` subpackage.

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.3.0...2.3.1)

## Version 2.3.0, 2022-06-19

The highlight of this release is improved support for file encryption
(AES-128 and AES-256, R5 only). See #749 for the amazing work of
@exiledkingcc 🎊 Thank you 🤗

### Deprecations (DEP)
-  Rename names to be PEP8-compliant (#967)
  - `PdfWriter.get_page`: the pageNumber parameter is renamed to page_number
  - `PyPDF2.filters`:
    * For all classes, a parameter rename: decodeParms ➔ decode_parms
    * decodeStreamData ➔ decode_stream_data
  - `PyPDF2.xmp`:
    * XmpInformation.rdfRoot ➔ XmpInformation.rdf_root
    * XmpInformation.xmp_createDate ➔ XmpInformation.xmp_create_date
    * XmpInformation.xmp_creatorTool ➔ XmpInformation.xmp_creator_tool
    * XmpInformation.xmp_metadataDate ➔ XmpInformation.xmp_metadata_date
    * XmpInformation.xmp_modifyDate ➔ XmpInformation.xmp_modify_date
    * XmpInformation.xmpMetadata ➔ XmpInformation.xmp_metadata
    * XmpInformation.xmpmm_documentId ➔ XmpInformation.xmpmm_document_id
    * XmpInformation.xmpmm_instanceId ➔ XmpInformation.xmpmm_instance_id
  - `PyPDF2.generic`:
    * readHexStringFromStream ➔ read_hex_string_from_stream
    * initializeFromDictionary ➔ initialize_from_dictionary
    * createStringObject ➔ create_string_object
    * TreeObject.hasChildren ➔ TreeObject.has_children
    * TreeObject.emptyTree ➔ TreeObject.empty_tree

### New Features (ENH)
-  Add decrypt support for V5 and AES-128, AES-256 (R5 only) (#749)

### Robustness (ROB)
-  Fix corrupted (wrongly) linear PDF (#1008)

### Maintenance (MAINT)
-  Move PDF_Samples folder into resources
-  Fix typos (#1007)

### Testing (TST)
-  Improve encryption/decryption test (#1009)
-  Add merger test cases with real PDFs (#1006)
-  Add mutmut config

### Code Style (STY)
-  Put pure data mappings in separate files (#1005)
-  Make encryption module private, apply pre-commit (#1010)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.2.1...2.3.0)

## Version 2.2.1, 2022-06-17

### Performance Improvements (PI)
-  Remove b_ calls (#992, #986)
-  Apply improvements to _utils suggested by perflint (#993)

### Robustness (ROB)
-  utf-16-be codec can't decode (...) (#995)

### Documentation (DOC)
-  Remove reference to Scripts (#987)

### Developer Experience (DEV)
-  Fix type annotations for add_bookmarks (#1000)

### Testing (TST)
-  Add test for PdfMerger (#1001)
-  Add tests for XMP information (#996)
-  reader.get_fields / zlib issue / LZW decode issue (#1004)
-  reader.get_fields with report generation (#1002)
-  Improve test coverage by extracting texts (#998)

### Code Style (STY)
-  Apply fixes suggested by pylint (#999)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.2.0...2.2.1)

## Version 2.2.0, 2022-06-13

The 2.2.0 release improves text extraction again via (#969):

* Improvements around /Encoding / /ToUnicode
* Extraction of CMaps improved
* Fallback for font def missing
* Support for /Identity-H and /Identity-V: utf-16-be
* Support for /GB-EUC-H / /GB-EUC-V / GBp/c-EUC-H / /GBpc-EUC-V (beta release for evaluation)
* Arabic (for evaluation)
* Whitespace extraction improvements

Those changes should mainly improve the text extraction for non-ASCII alphabets,
e.g. Russian / Chinese / Japanese / Korean / Arabic.

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.1.1...2.2.0)

## Version 2.1.1, 2022-06-12

### New Features (ENH)
-  Add support for pathlib as input for PdfReader (#979)

### Performance Improvements (PI)
-  Optimize read_next_end_line (#646)

### Bug Fixes (BUG)
-  Adobe Acrobat 'Would you like to save this file?' (#970)

### Documentation (DOC)
-  Notes on annotations (#982)
-  Who uses PyPDF2
-  intendet \xe2\x9e\x94 in robustness page  (#958)

### Maintenance (MAINT)
-  pre-commit / requirements.txt updates (#977)
-  Mark read_next_end_line as deprecated (#965)
-  Export `PageObject` in PyPDF2 root (#960)

### Testing (TST)
-  Add MCVE of issue #416 (#980)
-  FlateDecode.decode decodeParms (#964)
-  Xmp module (#962)
-  utils.paeth_predictor (#959)

### Code Style (STY)
-  Use more tuples and list/dict comprehensions (#976)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.1.0...2.1.1)


## Version 2.1.0, 2022-06-06

The highlight of the 2.1.0 release is the most massive improvement to the
text extraction capabilities of PyPDF2 since 2016 🥳🎊 A very big thank you goes
to [pubpub-zz](https://github.com/pubpub-zz) who took a lot of time and
knowledge about the PDF format to finally get those improvements into PyPDF2.
Thank you 🤗💚

In case the new function causes any issues, you can use `_extract_text_old`
for the old functionality. Please also open a bug ticket in that case.

There were several people who have attempted to bring similar improvements to
PyPDF2. All of those were valuable. The main reason why they didn't get merged
is the big amount of open PRs / issues. pubpub-zz was the most comprehensive
PR which also incorporated the latest changes of PyPDF2 2.0.0.

Thank you to [VictorCarlquist](https://github.com/VictorCarlquist) for #858 and
[asabramo](https://github.com/asabramo) for #464 🤗

### New Features (ENH)
-  Massive text extraction improvement (#924). Closed many open issues:
    - Exceptions / missing spaces in extract_text() method (#17) 🕺
      - Whitespace issues in extract_text() (#42) 💃
      - pypdf2 reads the hifenated words in a new line (#246)
    - PyPDF2 failing to read unicode character (#37)
      - Unable to read bullets (#230)
    - ExtractText yields nothing for apparently good PDF (#168) 🎉
    - Encoding issue in extract_text() (#235)
    - extractText() doesn't work on Chinese PDF (#252)
    - encoding error (#260)
    - Trouble with apostophes in names in text "O'Doul" (#384)
    - extract_text works for some PDF files, but not the others (#437)
    - Euro sign not being recognized by extractText (#443)
    - Failed extracting text from French texts (#524)
    - extract_text doesn't extract ligatures correctly (#598)
    - reading spanish text - mark convert issue (#635)
    - Read PDF changed from text to random symbols (#654)
    - .extractText() reads / as 1. (#789)
-  Update glyphlist (#947) - inspired by #464
-  Allow adding PageRange objects (#948)

### Bug Fixes (BUG)
-  Delete .python-version file (#944)
-  Compare StreamObject.decoded_self with None (#931)

### Robustness (ROB)
-  Fix some conversion errors on non conform PDF (#932)

### Documentation (DOC)
-  Elaborate on PDF text extraction difficulties (#939)
-  Add logo (#942)
-  rotate vs Transformation().rotate (#937)
-  Example how to use PyPDF2 with AWS S3 (#938)
-  How to deprecate (#930)
-  Fix typos on robustness page (#935)
-  Remove scripts (pdfcat) from docs (#934)

### Developer Experience (DEV)
-  Ignore .python-version file
-  Mark deprecated code with no-cover (#943)
-  Automatically create Github releases from tags (#870)

### Testing (TST)
-  Text extraction for non-latin alphabets (#954)
-  Ignore PdfReadWarning in benchmark (#949)
-  writer.remove_text (#946)
-  Add test for Tree and _security (#945)

### Code Style (STY)
-  black, isort, Flake8, splitting buildCharMap (#950)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/2.0.0...2.1.0)

## Version 2.0.0, 2022-06-01

The 2.0.0 release of PyPDF2 includes three core changes:

1. Dropping support for Python 3.5 and older.
2. Introducing type annotations.
3. Interface changes, mostly to have PEP8-compliant names

We introduced a [deprecation process](https://github.com/py-pdf/PyPDF2/pull/930)
that hopefully helps users to avoid unexpected breaking changes.

### Breaking Changes (DEP)
- PyPDF2 2.0 requires Python 3.6+. Python 2.7 and 3.5 support were dropped.
- PdfFileReader: The "warndest" parameter was removed
- PdfFileReader and PdfFileMerger no longer have the `overwriteWarnings`
  parameter. The new behavior is `overwriteWarnings=False`.
- merger: OutlinesObject was removed without replacement.
- merger.py ➔ _merger.py: You must import PdfFileMerger from PyPDF2 directly.
- utils:
  * `ConvertFunctionsToVirtualList` was removed
  * `formatWarning` was removed
  * `isInt(obj)`: Use `instance(obj, int)` instead
  * `u_(s)`: Use `s` directly
  * `chr_(c)`: Use `chr(c)` instead
  * `barray(b)`: Use `bytearray(b)` instead
  * `isBytes(b)`: Use `instance(b, type(bytes()))` instead
  * `xrange_fn`: Use `range` instead
  * `string_type`: Use `str` instead
  * `isString(s)`: Use `instance(s, str)` instead
  * `_basestring`: Use `str` instead
  * All Exceptions are now in `PyPDF2.errors`:
    - PageSizeNotDefinedError
    - PdfReadError
    - PdfReadWarning
    - PyPdfError
- `PyPDF2.pdf` (the `pdf` module) no longer exists. The contents were moved with
  the library. You should most likely import directly from `PyPDF2` instead.
  The `RectangleObject` is in `PyPDF2.generic`.
- The `Resources`, `Scripts`, and `Tests` will no longer be part of the distribution
  files on PyPI. This should have little to no impact on most people. The
  `Tests` are renamed to `tests`, the `Resources` are renamed to `resources`.
  Both are still in the git repository. The `Scripts` are now in
  [cpdf](https://github.com/py-pdf/cpdf). `Sample_Code` was moved to the `docs`.

For a full list of deprecated functions, please see the changelog of version
1.28.0.

### New Features (ENH)
-  Improve space setting for text extraction (#922)
-  Allow setting the decryption password in `PdfReader.__init__` (#920)
-  Add Page.add_transformation (#883)

### Bug Fixes (BUG)
-  Fix error adding transformation to page without /Contents (#908)

### Robustness (ROB)
-  Cope with invalid length in streams (#861)

### Documentation (DOC)
-  Fix style of 1.25 and 1.27 patch notes (#927)
-  Transformation (#907)

### Developer Experience (DEV)
-  Create flake8 config file (#916)
-  Use relative imports (#875)

### Maintenance (MAINT)
-  Use Python 3.6 language features (#849)
-  Add wrapper function for PendingDeprecationWarnings (#928)
-  Use new PEP8 compliant names (#884)
-  Explicitly represent transformation matrix (#878)
-  Inline PAGE_RANGE_HELP string (#874)
-  Remove unnecessary generics imports (#873)
-  Remove star imports (#865)
-  merger.py ➔ _merger.py (#864)
-  Type annotations for all functions/methods (#854)
-  Add initial type support with mypy (#853)

### Testing (TST)
-  Regression test for xmp_metadata converter (#923)
-  Checkout submodule sample-files for benchmark
-  Add text extracting performance benchmark
-  Use new PyPDF2 API in benchmark (#902)
-  Make test suite fail for uncaught warnings (#892)
-  Remove -OO testrun from CI (#901)
-  Improve tests for convert_to_int (#899)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.28.4...2.0.0)

## PyPDF2 1.X

See [CHANGELOG PyPDF2 1.X](changelog-v1.md)


================================================
FILE: CONTRIBUTING.md
================================================
Please check the [documentation page dedicated to development](https://pypdf.readthedocs.io/en/stable/dev/intro.html).

## Creating issues / tickets

Please go here: https://github.com/py-pdf/pypdf/issues

Typically you should not send e-mails. E-mails might only reach one person and
it could go into spam or that person might be busy. Please create issues on
GitHub instead.

Please use the templates provided.

Keep in mind that although PDF has an official specification, there are tons of
variations which might require special handling. Thus, please always provide a
reproducing example file for us to work with. Otherwise, we have to guess possible
issues, leading to unnecessary overhead - especially since most of the contributions
happen during our free time.

If you already know a fix, consider opening a pull request after reporting the issue
to make life easier for everyone.

## Creating Pull Requests

We appreciate if people make PRs, but please be aware that pypdf is used by many
people. That means:

* We rarely make breaking changes and have a [deprecation process](https://pypdf.readthedocs.io/en/latest/dev/deprecations.html).
* New features, especially adding to the public interface, typically need to be
  discussed first.

Before you make bigger changes, open an issue to make the suggestion.
Note which interface changes you want to make.


================================================
FILE: CONTRIBUTORS.md
================================================
# Contributors

pypdf had a lot of contributors since it started as pyPdf in 2005. We are
a free software project without any company affiliation. We cannot pay
contributors, but we do value their contributions. A lot of time, effort, and
expertise went into this project. With this list, we recognize these awesome
people 🤗

The list is definitely not complete. You can find more contributors via the git
history and [GitHub's 'Contributors' feature](https://github.com/py-pdf/pypdf/graphs/contributors).

## Contributors to the pypdf (formerly pyPdf / PyPDF2) project

* [abyesilyurt](https://github.com/abyesilyurt)
* [ArkieCoder](https://github.com/ArkieCoder)
* [Beers, PJ](https://github.com/PJBrs)
* [Clauss, Christian](https://github.com/cclauss)
* [DL6ER](https://github.com/DL6ER)
* [Duy, Phan Thanh](https://github.com/zuypt)
* [ediamondscience](https://github.com/ediamondscience)
* [Ermeson, Felipe](https://github.com/FelipeErmeson)
* [Freitag, François](https://github.com/francoisfreitag)
* [Gagnon, William G.](https://github.com/williamgagnon)
* [Gillard, James](https://github.com/jgillard)
* [Górny, Michał](https://github.com/mgorny)
* [Grillo, Miguel](https://github.com/Ineffable22)
* [Gutteridge, David H.](https://github.com/dhgutteridge)
* [Hale, Joseph](https://github.com/thehale)
* [harshhes](https://github.com/harshhes)
* [Jackowitz, Noah](https://github.com/hackowitz-af) | [LinkedIn](https://www.linkedin.com/in/noah-jackowitz/)
* [JianzhengLuo](https://github.com/JianzhengLuo)
* [Karvonen, Harry](https://github.com/Hatell/)
* [King, Hunter](https://github.com/neversphere)
* [Kotler, Mitchell](https://github.com/mitchelljkotler)
* [KourFrost](https://github.com/KourFrost)
* [Lightup1](https://github.com/Lightup1)
* [Majumder, Jonah](https://github.com/jonahmajumder)
* [Manini, Lorenzo](https://github.com/lorenzomanini)
* [maxbeer99](https://github.com/maxbeer99)
* [McNeil, Karen](https://github.com/karenlmcneil): Arabic Language Support
* [Mérino, Antoine](https://github.com/Merinorus)
* [Murphy, Kevin](https://github.com/kmurphy4)
* [nalin-udhaar](https://github.com/nalin-udhaar)
* [Noah-Houghton](https://github.com/Noah-Houghton) | [LinkedIn](https://www.linkedin.com/in/noah-h-554992a0/)
* [Paramonov, Alexey](https://github.com/alexey-v-paramonov)
* [Paternault, Louis](https://framagit.org/spalax)
* [Perrensen, Olsen](https://github.com/olsonperrensen)
* [pilotandy](https://github.com/pilotandy)
* [Pinheiro, Arthur](https://github.com/xilopaint)
* [pmiller66](https://github.com/pmiller66)
* [Poddar, Arka](https://github.com/postmeback)
* [programmarchy](https://github.com/programmarchy)
* [pubpub-zz](https://github.com/pubpub-zz): involved in community development
* [Ramos, Leodanis Pozo](https://github.com/lpozo)
* [RitchieP](https://github.com/RitchieP) | [LinkedIn](https://www.linkedin.com/in/ritchie-p-892b31115/) | [StackOverflow](https://stackoverflow.com/users/13328625/casual-r?tab=profile)
* [robbiebusinessacc](https://github.com/robbiebusinessacc)
* [Roder, Thomas](https://github.com/MrTomRod)
* [Rogmann, Sascha](https://github.com/srogmann)
* [Röthenbacher, Thomas](https://github.com/troethe)
* [shartzog](https://github.com/shartzog)
* [stefan6419846](https://github.com/stefan6419846): Maintainer of pypdf since January 2025
* [sietzeberends](https://github.com/sietzeberends)
* [Stober, Marc](https://github.com/marcstober)
* [Stüber, Timo](https://github.com/omit66)
* [Thoma, Martin](https://github.com/MartinThoma): Maintainer of pypdf from April 2022 to January 2025. I hope to build a great community with many awesome contributors. [LinkedIn](https://www.linkedin.com/in/martin-thoma/) | [StackOverflow](https://stackoverflow.com/users/562769/martin-thoma) | [Blog](https://martin-thoma.com/)
* [Thomas, Reuben](https://github.com/rrthomas)
* [Tobeabellwether](https://github.com/Tobeabellwether)
* [van Alst, Ludo](https://github.com/LudovA)
* [WevertonGomes](https://github.com/WevertonGomesCosta)
* [Wilson, Huon](https://github.com/huonw)
* ztravis

## Adding a new contributor

Contributors are:

* Anybody who has a commit in `main` - no matter how small or how many. Also if it's via *co-authored-by*.
* People who opened helpful issues:

  1. Bugs: with complete MCVE
  2. Well-described feature requests
  3. Potentially some more.

  The maintainers of pypdf have the last call on that one.
* Community work: This is exceptional. If the maintainers of pypdf see people
  being super helpful in answering issues / discussions or being very active on
  Stackoverflow, we also consider them being contributors to pypdf.

Contributors can add themselves or ask via an GitHub Issue to be added.

Please use the following format:

```
* Last name, First name: 140-characters of text; links to LinkedIn / GitHub / other profiles and personal pages are ok
```

OR

```
* GitHub Username: 140-characters of text; links to LinkedIn / GitHub / other profiles and personal pages are ok
```

and add the entry in the alphabetical order. The 140 characters are everything visible after the `Name:`.

Please don't use images.


================================================
FILE: LICENSE
================================================
Copyright (c) 2006-2008, Mathieu Fenniak
Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>

All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: Makefile
================================================
maint:
	pre-commit autoupdate
	pip-compile -U requirements/ci.in
	pip-compile -U requirements/dev.in
	pip-compile -U requirements/docs.in

release:
	python make_release.py
	git commit -eF RELEASE_COMMIT_MSG.md

clean:
	python -m pip install pyclean
	pyclean .
	rm -rf tests/__pycache__ pypdf/__pycache__ htmlcov docs/_build dist pypdf.egg-info .pytest_cache .mypy_cache .benchmarks

test:
	pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=60 pypdf

testtype:
	pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30 --typeguard-packages=pypdf

benchmark:
	pytest tests/bench.py

mypy:
	mypy pypdf --ignore-missing-imports --check-untyped --strict

ruff:
	ruff check pypdf tests make_release.py


================================================
FILE: README.md
================================================
[![PyPI version](https://badge.fury.io/py/pypdf.svg)](https://badge.fury.io/py/pypdf)
[![Python Support](https://img.shields.io/pypi/pyversions/pypdf.svg)](https://pypi.org/project/pypdf/)
[![](https://img.shields.io/badge/-documentation-green)](https://pypdf.readthedocs.io/en/stable/)
[![GitHub last commit](https://img.shields.io/github/last-commit/py-pdf/pypdf)](https://github.com/py-pdf/pypdf)
[![codecov](https://codecov.io/gh/py-pdf/pypdf/branch/main/graph/badge.svg?token=id42cGNZ5Z)](https://codecov.io/gh/py-pdf/pypdf)

# pypdf

pypdf is a free and open-source pure-python PDF library capable of splitting,
[merging](https://pypdf.readthedocs.io/en/stable/user/merging-pdfs.html),
[cropping, and transforming](https://pypdf.readthedocs.io/en/stable/user/cropping-and-transforming.html)
the pages of PDF files. It can also add
custom data, viewing options, and
[passwords](https://pypdf.readthedocs.io/en/stable/user/encryption-decryption.html)
to PDF files. pypdf can
[retrieve text](https://pypdf.readthedocs.io/en/stable/user/extract-text.html)
and
[metadata](https://pypdf.readthedocs.io/en/stable/user/metadata.html)
from PDFs as well.

See [pdfly](https://github.com/py-pdf/pdfly) for a CLI application that uses pypdf to interact with PDFs.

## Installation

Install pypdf using pip:

```
pip install pypdf
```

For using pypdf with AES encryption or decryption, install extra dependencies:

```
pip install pypdf[crypto]
```

> **NOTE**: `pypdf` 3.1.0 and above include significant improvements compared to
> previous versions. Please refer to [the migration
> guide](https://pypdf.readthedocs.io/en/latest/user/migration-1-to-2.html) for
> more information.

## Usage

```python
from pypdf import PdfReader

reader = PdfReader("example.pdf")
number_of_pages = len(reader.pages)
page = reader.pages[0]
text = page.extract_text()
```

pypdf can do a lot more, e.g. splitting, merging, reading and creating annotations, decrypting and encrypting. Check out the
[documentation](https://pypdf.readthedocs.io/en/stable/) for additional usage
examples!

For questions and answers, visit
[StackOverflow](https://stackoverflow.com/questions/tagged/pypdf)
(tagged with [pypdf](https://stackoverflow.com/questions/tagged/pypdf)).

## Contributions

Maintaining pypdf is a collaborative effort. You can support the project by
writing documentation, helping to narrow down issues, and submitting code.
See the [CONTRIBUTING.md](https://github.com/py-pdf/pypdf/blob/main/CONTRIBUTING.md) file for more information.

### Q&A

The experience pypdf users have covers the whole range from beginner to expert. You can contribute to the pypdf community by answering questions
on [StackOverflow](https://stackoverflow.com/questions/tagged/pypdf),
helping in [discussions](https://github.com/py-pdf/pypdf/discussions),
and asking users who report issues for [MCVE](https://stackoverflow.com/help/minimal-reproducible-example)'s (Code + example PDF!).


### Issues

A good bug ticket includes a MCVE - a minimal complete verifiable example.
For pypdf, this means that you must upload a PDF that causes the bug to occur
as well as the code you're executing with all of the output. Use
`print(pypdf.__version__)` to tell us which version you're using.

### Code

All code contributions are welcome, but smaller ones have a better chance to
get included in a timely manner. Adding unit tests for new features or test
cases for bugs you've fixed help us to ensure that the Pull Request (PR) is fine.

pypdf includes a test suite which can be executed with `pytest`:

```bash
$ pytest
===================== test session starts =====================
platform linux -- Python 3.6.15, pytest-7.0.1, pluggy-1.0.0
rootdir: /home/moose/GitHub/Martin/pypdf
plugins: cov-3.0.0
collected 233 items

tests/test_basic_features.py ..                         [  0%]
tests/test_constants.py .                               [  1%]
tests/test_filters.py .................x.....           [ 11%]
tests/test_generic.py ................................. [ 25%]
.............                                           [ 30%]
tests/test_javascript.py ..                             [ 31%]
tests/test_merger.py .                                  [ 32%]
tests/test_page.py .........................            [ 42%]
tests/test_pagerange.py ................                [ 49%]
tests/test_papersizes.py ..................             [ 57%]
tests/test_reader.py .................................. [ 72%]
...............                                         [ 78%]
tests/test_utils.py ....................                [ 87%]
tests/test_workflows.py ..........                      [ 91%]
tests/test_writer.py .................                  [ 98%]
tests/test_xmp.py ...                                   [100%]

========== 232 passed, 1 xfailed, 1 warning in 4.52s ==========
```


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/_static/releasing.drawio
================================================
<mxfile host="Electron" type="device">
  <diagram name="Seite-1" id="xmn08oupI2gSAHxAwkuE">
    <mxGraphModel dx="394" dy="220" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
      <root>
        <mxCell id="0" />
        <mxCell id="1" parent="0" />
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-33" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;arcSize=21;" parent="1" vertex="1">
          <mxGeometry x="130" y="790" width="280" height="290" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-21" value="" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#f5f5f5;fontColor=#333333;strokeColor=#666666;" parent="1" vertex="1">
          <mxGeometry x="60" y="330" width="480" height="250" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-4" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-1" target="Sy3GnD-ZVnJThFurnhwo-3" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-1" value="python make_release.py" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="180" y="80" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-6" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-3" target="Sy3GnD-ZVnJThFurnhwo-5" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-3" value="Manually adjust CHANGELOG.md changes" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="180" y="170" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-5" target="Sy3GnD-ZVnJThFurnhwo-8" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-10" value="Yes" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="Sy3GnD-ZVnJThFurnhwo-9" vertex="1" connectable="0">
          <mxGeometry x="0.1768" y="-2" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-12" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-5" target="Sy3GnD-ZVnJThFurnhwo-11" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-13" value="No" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="Sy3GnD-ZVnJThFurnhwo-12" vertex="1" connectable="0">
          <mxGeometry x="0.3105" y="2" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-5" value="Is there a breaking change" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="180" y="260" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-24" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-7" target="Sy3GnD-ZVnJThFurnhwo-23" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-7" value="Adjust the CHANGELOG.md" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="170" y="600" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-8" target="Sy3GnD-ZVnJThFurnhwo-7" edge="1">
          <mxGeometry relative="1" as="geometry">
            <Array as="points">
              <mxPoint x="150" y="460" />
              <mxPoint x="230" y="460" />
            </Array>
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-8" value="Major version bump in _version.py" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="90" y="370" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-15" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-11" target="Sy3GnD-ZVnJThFurnhwo-14" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-16" value="Yes" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="Sy3GnD-ZVnJThFurnhwo-15" vertex="1" connectable="0">
          <mxGeometry x="-0.2562" y="3" relative="1" as="geometry">
            <mxPoint as="offset" />
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-20" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-11" target="Sy3GnD-ZVnJThFurnhwo-19" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-11" value="Is there a new feature?" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="250" y="370" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-14" target="Sy3GnD-ZVnJThFurnhwo-7" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-14" value="Minor version bump" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="250" y="490" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-35" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-19" target="Sy3GnD-ZVnJThFurnhwo-23" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-19" value="Patch version bump" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="400" y="490" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-22" value="Semantic Versioning" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontStyle=1;fontSize=18;fontColor=#6E6E6E;" parent="1" vertex="1">
          <mxGeometry x="450" y="350" width="60" height="30" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-27" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-23" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="230" y="810" as="targetPoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-23" value="git commit -eF RELEASE_COMMIT_MSG.md" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="75" y="700" width="310" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-30" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" target="Sy3GnD-ZVnJThFurnhwo-28" edge="1">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="230" y="870" as="sourcePoint" />
          </mxGeometry>
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-31" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="Sy3GnD-ZVnJThFurnhwo-28" target="Sy3GnD-ZVnJThFurnhwo-29" edge="1">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-28" value="Build and push packages to PyPI" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="170" y="910" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-29" value="Create release on GitHub" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1">
          <mxGeometry x="170" y="1010" width="120" height="60" as="geometry" />
        </mxCell>
        <mxCell id="Sy3GnD-ZVnJThFurnhwo-36" value="GitHub Action" style="text;html=1;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontStyle=1;fontSize=18;fontColor=#6F9958;" parent="1" vertex="1">
          <mxGeometry x="325" y="813" width="60" height="30" as="geometry" />
        </mxCell>
        <mxCell id="srRZveQdFgRCeiaoivwE-1" value="Create tag on&lt;div&gt;GitHub&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;" vertex="1" parent="1">
          <mxGeometry x="170" y="810" width="120" height="60" as="geometry" />
        </mxCell>
      </root>
    </mxGraphModel>
  </diagram>
</mxfile>


================================================
FILE: docs/conf.py
================================================
"""
Configuration file for the Sphinx documentation builder.

This file only contains a selection of the most common options.
For a full list see the documentation:
https://www.sphinx-doc.org/en/master/usage/configuration.html
"""
# -- Path setup --------------------------------------------------------------

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import datetime
import os
import shutil
import sys
from pathlib import Path

sys.path.insert(0, os.path.abspath("."))
sys.path.insert(0, os.path.abspath("../"))

import pypdf as py_pkg

shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md")
shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md")

# -- Project information -----------------------------------------------------

project = py_pkg.__name__
copyright = f"2006 - {datetime.datetime.now(tz=datetime.timezone.utc).year}, Mathieu Fenniak and pypdf contributors"
author = "Mathieu Fenniak"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = py_pkg.__version__
# The full version, including alpha/beta/rc tags.
release = py_pkg.__version__

# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = "4.0.0"

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    "sphinx.ext.autodoc",
    "sphinx.ext.intersphinx",
    "sphinx.ext.autosummary",
    "sphinx.ext.coverage",
    "sphinx.ext.mathjax",
    "sphinx.ext.viewcode",
    "sphinx.ext.napoleon",
    "sphinx.ext.doctest",
    # External
    "myst_parser",
]

python_version = ".".join(map(str, sys.version_info[:2]))
intersphinx_mapping = {
    "python": (f"https://docs.python.org/{python_version}", None),
    "Pillow": ("https://pillow.readthedocs.io/en/latest/", None),
}

nitpick_ignore_regex = [
    # For reasons unclear at this stage, the io module prefixes everything with _io
    # and this confuses sphinx
    (
        r"py:class",
        r"(_io.(FileIO|BytesIO|Buffered(Reader|Writer))|pypdf.*PdfDocCommon)",
    ),
]

autodoc_default_options = {
    "member-order": "bysource",
    "members": True,
    "show-inheritance": True,
    "undoc-members": True,
}
autodoc_inherit_docstrings = False
autodoc_typehints_format = "short"
python_use_unqualified_type_names = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# Configure MyST extension.
myst_all_links_external = False
myst_heading_anchors = 3


# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
    "canonical_url": "",
    "analytics_id": "",
    "logo_only": True,
    "prev_next_buttons_location": "bottom",
    "style_external_links": False,
    # Toc options
    "collapse_navigation": True,
    "sticky_navigation": True,
    "navigation_depth": 4,
    "includehidden": True,
    "titles_only": False,
}
html_logo = "_static/logo.png"


# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]

# -- Options for Napoleon  -----------------------------------------------------

napoleon_google_docstring = True
napoleon_numpy_docstring = False  # Explicitly prefer Google style docstring
napoleon_use_param = True  # for type hint support
napoleon_use_rtype = False  # False, so the return type is inline with the description.

# -- Options for Doctest  ------------------------------------------------------

# Most of doc examples use hardcoded input and output file names.
# To execute these examples real files need to be read and written.
#
# By default, documentation examples run with the working directory set to where
# "sphinx-build" command was invoked. To avoid relative paths in docs and to
# allow to run "sphinx-build" command from any directory, we modify the current
# working directory in each tested file. Tests are executed against our
# temporary directory where we have copied all nessesary resources.
#
# Each doc page that requires file operations must use "testsetup" directive
# to call "pypdf_test_setup" function to prepare the test environment for that
# page.
#
# def pypdf_test_setup(group: str, resources: dict[str, str] = {}) -> None
#
# Args:
#   group: A unique name for group of tests. Typically we group tests by doc page.
#       For each doc page we create a test folder under
#       "_build/doctest/pypdf_test/<group>". This allows to avoid file name conflicts
#       between different doc pages.
#   resources: A dictionary of source files to copy into the test folder.
#       Key is the destination file name (relative to the test folder).
#       Value is the source file path (relative to the root folder).
#
# Examples:
#   ```{testsetup}
#   pypdf_test_setup("user/add-javascript", {
#       "example.pdf": "../resources/example.pdf",
#   })
#   ```

pypdf_test_src_root_dir = os.path.abspath(".")
pypdf_test_dst_root_dir = os.path.abspath("_build/doctest/pypdf_test")
if Path(pypdf_test_dst_root_dir).exists():
   shutil.rmtree(pypdf_test_dst_root_dir)
Path(pypdf_test_dst_root_dir).mkdir(parents=True)

doctest_global_setup = f"""
def pypdf_test_global_setup():
    import os
    import shutil
    from pathlib import Path

    src_root_dir = {pypdf_test_src_root_dir.__repr__()}
    dst_root_dir = {pypdf_test_dst_root_dir.__repr__()}

    global pypdf_test_orig_dir
    pypdf_test_orig_dir = os.getcwd()
    os.chdir(dst_root_dir)

    global pypdf_test_setup
    def pypdf_test_setup(group: str, resources: dict[str, str] = {{}}) -> None:
        dst_dir = os.path.join(dst_root_dir, group)
        Path(dst_dir).mkdir(parents=True)
        os.chdir(dst_dir)

        for (dst_path, src_path) in resources.items():
            src = os.path.normpath(os.path.join(src_root_dir, src_path))
            dst = os.path.join(dst_dir, dst_path)

            shutil.copyfile(src, dst)

pypdf_test_global_setup()
"""

doctest_global_cleanup = f"""
def pypdf_test_global_cleanup():
    import os

    dst_root_dir = {pypdf_test_dst_root_dir.__repr__()}

    os.chdir(pypdf_test_orig_dir)

    has_files = False
    for name in os.listdir(dst_root_dir):
        file_name = os.path.join(dst_root_dir, name)
        if os.path.isfile(file_name):
            if not has_files:
                print("Docs page was not configured propery for running code examples")
                print("Please use 'pypdf_test_setup' function in 'testsetup' directive")
                print("Deleting unexpected file(s) in " + dst_root_dir)
                has_files = True
            print(f"- {{name}}")
            os.remove(file_name)  # Avoid side effects on other tests

pypdf_test_global_cleanup()
"""


================================================
FILE: docs/dev/cmaps.md
================================================
# CMaps

Looking at the cmap of "crazyones":

```bash
pdftk crazyones.pdf output crazyones-uncomp.pdf uncompress
```

You can see this:

```text
begincmap
/CMapName /T1Encoding-UTF16 def
/CMapType 2 def
/CIDSystemInfo <<
  /Registry (Adobe)
  /Ordering (UCS)
  /Supplement 0
>> def
1 begincodespacerange
<00> <FF>
endcodespacerange
1 beginbfchar
<1B> <FB00>
endbfchar
endcmap
CMapName currentdict /CMap defineresource pop
```

## codespacerange

A codespacerange maps a complete sequence of bytes to a range of Unicode glyphs.
It defines a starting point:

```text
1 beginbfchar
<1B> <FB00>
```

That means that `1B` (Hex for 27) maps to the Unicode character [`FB00`](https://unicode-table.com/en/FB00/) - the ligature ﬀ (two lowercase f's).

The two numbers in `begincodespacerange` mean that it starts with an offset of
0 (hence from `1B ➜ FB00`) up to an offset of FF (dec: 255), hence 1B+FF = 282
➜ [FBFF](https://www.compart.com/de/unicode/U+FBFF).

Within the text stream, there is

```text
(The)-342(mis\034ts.)
```

`\034 ` is octal for the decimal value 28.


================================================
FILE: docs/dev/deprecations.md
================================================
# The Deprecation Process

pypdf strives to be an excellent library for its current users and for new
ones. We are careful with introducing potentially breaking changes, but we
will do them if they provide value for the community in the long run.

We hope and think that deprecations will not happen frequently. If they do,
users can rely on the following procedure.

## Semantic Versioning

pypdf uses [semantic versioning](https://semver.org/). If you want to avoid
breaking changes, please use dependency pinning (also known as version pinning).
In Python, this is done by specifying the exact version you want to use in a
`requirements.txt` file. A tool that can support you is `pip-compile` from
[`pip-tools`](https://pypi.org/project/pip-tools/).

If you are using [Poetry](https://pypi.org/project/poetry/) it is done with the
`poetry.lock` file.

## How pypdf deprecates features

Assume the current version of pypdf is `x.y.z`. After a discussion (e.g., via
GitHub issues), we decided to remove a class / function / method. This is how
we do it:

1. `x.y.(z+1)`: Add a DeprecationWarning. If there is a replacement,
   the replacement is also introduced and the warning informs about the change
   and when it will happen.
   The docs let users know about the deprecation and when it will happen and the new function.
   The CHANGELOG informs about it.
2. `(x+1).0.0`: Remove / change the code in the breaking way by replacing
   DeprecationWarnings by DeprecationErrors.
   We do this to help people who didn't look at the warnings before.
   The CHANGELOG informs about it.
3. `(x+2).0.0`: The DeprecationErrors are removed.

This means the users have three warnings in the CHANGELOG, a DeprecationWarning
until the next major release and a DeprecationError until the major release
after that.

Please note that adding warnings can be a breaking change for some users; most
likely just in the CI.
This means it needs to be properly documented.


================================================
FILE: docs/dev/documentation.md
================================================
# Documentation

This documentation is build with [Sphinx](https://www.sphinx-doc.org/) and
hosted by [Read the Docs](https://about.readthedocs.com/)

## Testing code snippets

Almost all python code snippets in documentation tested using Sphinx's extension
[sphinx.ext.doctest](https://www.sphinx-doc.org/en/master/usage/extensions/doctest.html).
This allows to make sure that we have no typos, missed imports and other problems in:
- code snippets marked with `testcode` directive in `*.md` files
- code snippets from python's docstrings imported via `autoclass` directive in `*.rst` files

CI pipeline is configured run Sphinx's `doctest` build automatically for each PR.
It is also possible to run it locally:

1. First you need to install docs requirements

   ```bash
   pip install -r requirements/docs.txt
   ```

2. Change current directory

   ```bash
   cd docs
   ```

3. Run `doctest` build. It uses indirectly `sphinx-build` command line tool
    installed with docs requrements. See
   [Sphinx's docs](https://www.sphinx-doc.org/en/master/usage/quickstart.html#running-the-build)
   for details.

   ```bash
   make doctest
   ```

4. If everything is okay you should see in output `Doctest summary` without failures

## API Reference

### Method / Function Docstrings

We use Google-Style Docstrings:

```
def example(param1: int, param2: str) -> bool:
    """
    Example function with PEP 484 type annotations.

    Args:
      param1: The first parameter.
      param2: The second parameter.

    Returns:
      The return value. True for success, False otherwise.

    Raises:
      AttributeError: The ``Raises`` section is a list of all exceptions
        that are relevant to the interface.
      ValueError: If `param2` is equal to `param1`.

    Examples:
        Examples should be written in doctest format, and should illustrate how
        to use the function.

        >>> print([i for i in example_generator(4)])
        [0, 1, 2, 3]
    """
```

* The order of sections is (1) Args (2) Returns (3) Raises (4) Examples
* If there is no return value, remove the 'Returns' block
* Properties should not have any sections


## Issues and PRs

An issue can be used to discuss what we want to achieve.

A PR can be used to discuss how we achieve it.

## Commit Messages

We want to have descriptive commits in the `main` branch. For this reason, every
pull request (PR) is squashed. That means no matter how many commits a PR has,
in the end only one combined commit will be in `main`.

The title of the PR will be used as the first line of that combined commit message.

The first comment within the commit will be used as the message body.

See [developer intro](intro.md#commit-messages) for more details.


================================================
FILE: docs/dev/intro.md
================================================
# Developer Intro

pypdf is a library and hence its users are developers. This document is not for
the users, but for people who want to work on pypdf itself.

```{note}
Our CI (continuous integration) validates that relevant standards are met with your contribution.
Especially for regular contributors or larger changes, it is highly recommended that you set up your own development environment
to already cover the most important aspects locally. This greatly helps us to reduce the noise compared to when you open an untested
PR early and use our CI to do your debugging and improvements from there. The maintainers usually receive a notification on every push
to a branch where a corresponding PR is open, possibly hiding important notifications.
```

## Installing Requirements

```
pip install -r requirements/dev.txt
```

## Running Tests

See [testing pypdf with pytest](testing.md).

## The sample-files git submodule
The reason for having the submodule `sample-files` is that we want to keep
the size of the pypdf repository small while we also want to have an extensive
test suite. Those two goals contradict each other.

The `resources` folder should contain a select set of core examples that cover
most cases we typically want to test for. The `sample-files` might cover a lot
more edge cases, the behavior we get when file sizes get bigger, different
PDF producers.

To get the sample-files folder, you need to execute:

```
git submodule update --init
```

## Tools: git and pre-commit

Git is a command line application for version control. If you don't know it,
you can [play ohmygit](https://ohmygit.org/) to learn it.

GitHub is the service where the pypdf project is hosted. While git is free and
open source, GitHub is a paid service by Microsoft, but free in a lot of
cases.

[pre-commit](https://pypi.org/project/pre-commit/) is a command line application
that uses git hooks to automatically execute code. This allows you to avoid
style issues and other code quality issues. After you entered `pre-commit install`
once in your local copy of pypdf, it will automatically be executed when
you `git commit`.

## Commit Messages

Having a clean commit message helps people to quickly understand what the commit
is about, without actually looking at the changes. The first line of the
commit message is used to [auto-generate the CHANGELOG](https://github.com/py-pdf/pypdf/blob/main/make_release.py).
For this reason, the format should be:

```
PREFIX: DESCRIPTION

BODY
```

The `PREFIX` can be:

* `SEC`: Security improvements. Typically, an infinite loop that was possible.
* `BUG`: A bug was fixed. Likely there are one or multiple issues. Then write in
   the `BODY`: `Closes #123` where 123 is the issue number on GitHub.
   It would be absolutely amazing if you could write a regression test in those
   cases. That is a test that would fail without the fix.
   A bug is always an issue for pypdf users - test code or CI that was fixed is
   not considered a bug here.
* `ENH`: A new feature! Describe in the body what it can be used for.
* `DEP`: Deprecation. Either marking something as "this is going to be removed"
   or actually removing it.
* `PI`: A performance improvement. This could also be a reduction in the
        file size of PDF files generated by pypdf.
* `ROB`: A robustness change. Dealing better with broken PDF files.
* `DOC`: A documentation change.
* `TST`: Adding or adjusting tests.
* `DEV`: Developer experience improvements, e.g., pre-commit or setting up CI.
* `MAINT`: Quite a lot of different stuff. Performance improvements are, for sure,
           the most interesting changes in here. Refactorings as well.
* `STY`: A style change. Something that makes pypdf code more consistent.
         Typically, a small change. It could also be better error messages for
         end users.

The prefix is used to generate the CHANGELOG. Every PR must have exactly one -
if you feel like several match, take the top one from this list that matches for
your PR.

## Pull Request Size

Smaller Pull Requests (PRs) are preferred as it's typically easier to merge
them. For example, if you have some typos, a few code-style changes, a new
feature, and a bug-fix, that could be three or four PRs.

A PR must be complete. That means if you introduce a new feature, it must be
finished within the PR and have a test for that feature.

## Benchmarks

We need to keep an eye on performance, and thus we have a few benchmarks.

See [py-pdf.github.io/pypdf/dev/bench](https://py-pdf.github.io/pypdf/dev/bench/)


================================================
FILE: docs/dev/pdf-format.md
================================================
# The PDF Format

It is recommended to look in the PDF specification for details and clarifications.

* [PDF Specification Archive](https://pdfa.org/resource/pdf-specification-archive/)
* [Portable Document Format Reference Manual, 1993. ISBN 0-201-62628-4](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/pdfreference1.0.pdf)
* [ISO 32000-1:2008 (PDF 1.7)](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf)
* ISO 32000-2:2020 (PDF 2.0)

```{note}
We currently generate files with a header for PDF 1.3 by default. At the same time, we strive
to support the PDF 1.7 specification.

Features specific to PDF 2.0 might be available, but we always ensure that older versions do
not break due to the rather limited general PDF 2.0 support in the wild and to not break for
old PDF files. For this reason, some historical aspects (like insecure encryption algorithms)
are required to be supported, although PDF 2.0 deprecates most of them and allows more secure
variants.
```

Below is only intended to give a very rough overview of the format.

## Overall Structure

A PDF consists of:

1. Header: Contains the version of the PDF, e.g. `%PDF-1.7`
2. Body: Contains a sequence of indirect objects
3. Cross-reference table (xref): Contains a list of the indirect objects in the body
4. Trailer

## The xref table

A cross-reference table (xref) is a table of the indirect objects in the body.
It allows quick access to those objects by pointing to their location in the file.

It looks like this:

```text
xref 42 5
0000001000 65535 f
0000001234 00000 n
0000001987 00000 n
0000011987 00000 n
0000031987 00000 n
```

Let's go through it step-by-step:

* `xref` is just a keyword that specifies the start of the xref table.
* `42` is the numerical ID of the first object in this xref section; `5` is the number of entries in the xref table.
* Now every object has 3 entries `nnnnnnnnnn ggggg n`: a 10-digit byte offset,
  a 5-digit generation number, and a literal keyword which is either `n` or `f`.
    * `nnnnnnnnnn` is the byte offset of the object. It tells the reader where
      the object is in the file.
    * `ggggg` is the generation number. It tells the reader how old the object is.
    * `n` means that the object is a normal in-use object, `f` means that the object
      is a free object.
        * The first free object always has a generation number of 65535. It forms
          the head of a linked-list of all free objects.
        * The generation number of a normal object is always 0. The generation
          number allows the PDF format to contain multiple versions of the same
          object. This is a version history mechanism.

## The body

The body is a sequence of indirect objects:

`counter generation_number << the_object >> endobj`

* `counter` (integer) is a unique identifier for the object.
* `generation_number` (integer) is the generation number of the object.
* `the_object` is the object itself. It can be empty. Starts with `/Keyword` to
  specify which kind of object it is.
* `endobj` marks the end of the object.

A concrete example can be found in `test_reader.py::test_get_images_raw`:

```text
1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj
2 0 obj << >> endobj
3 0 obj << >> endobj
4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]
 /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R
 /Resources << /Font << >> >>
 /Rotate 0 /Type /Page >> endobj
5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj
```

## The trailer

The trailer looks like this:

```text
trailer << /Root 5 0 R
           /Size 6
        >>
startxref 1234
%%EOF
```

Let's go through it:

* `trailer <<` indicates that the *trailer dictionary* starts. It ends with `>>`.
* `startxref` is a keyword followed by the byte-location of the `xref` keyword.
  As the trailer is always at the bottom of the file, this allows readers to
  quickly find the xref table.
* `%%EOF` is the end-of-file marker.

The trailer dictionary is a key-value list. The keys are specified in
Table 15 of the PDF Reference 1.7, e.g. `/Root` and `/Size` (both are required).

* `/Root` (dictionary) contains the document catalog.
    * The `5` is the object number of the catalog dictionary.
    * `0` is the generation number of the catalog dictionary.
    * `R` is the keyword that indicates that the object is a reference to the
      catalog dictionary.
* `/Size` (integer) contains the total number of entries in the files xref table.


## Reading PDF files

Most PDF files are compressed. If you want to read them, first uncompress them:

```bash
pdftk crazyones.pdf output crazyones-uncomp.pdf uncompress
```

Then rename `crazyones-uncomp.pdf` to `crazyones-uncomp.txt` and open it in
your favorite IDE / text editor.


================================================
FILE: docs/dev/pypdf-parsing.md
================================================
# How pypdf parses PDF files

pypdf uses {class}`~pypdf.PdfReader` to parse PDF files.
The method {py:meth}`PdfReader.read <pypdf.PdfReader.read>` shows the basic
structure of parsing:

1. **Finding and reading the cross-reference tables / trailer**: The
   cross-reference table (xref table) is a table of byte offsets that indicate
   the locations of objects within the file. The trailer provides additional
   information such as the root object (Catalog) and the Info object containing
   metadata.
2. **Parsing the objects**: After locating the xref table and the trailer, pypdf
   proceeds to parse the objects in the PDF. Objects in a PDF can be of various
   types such as dictionaries, arrays, streams, and simple data types (e.g.,
   integers, strings). pypdf parses these objects and stores them in
   {py:meth}`PdfReader.resolved_objects <pypdf.PdfReader.resolved_objects>`,
   populated by {py:meth}`cache_indirect_object <pypdf.PdfReader.cache_indirect_object>`.
3. **Decoding content streams**: The content of a PDF is typically stored in
   content streams, which are sequences of PDF operators and operands. pypdf
   decodes these content streams by applying filters (e.g., `FlateDecode`,
   `LZWDecode`) specified in the stream's dictionary. This is only done when the
   object is requested by {py:meth}`PdfReader.get_object <pypdf.PdfReader.get_object>`
   which uses the `PdfReader._get_object_from_stream` method.

## References

[PDF 1.7 specification](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf):
* 7.5 File Structure
* 7.5.4 Cross-Reference Table
* 7.8 Content Streams and Resources


================================================
FILE: docs/dev/pypdf-writing.md
================================================
# How pypdf writes PDF files

pypdf uses {py:class}`PdfWriter <pypdf.PdfWriter>` to write PDF files. pypdf has
{py:class}`PdfObject <pypdf.generic.PdfObject>` and several subclasses with the
{py:meth}`write_to_stream <pypdf.generic.PdfObject.write_to_stream>` method.
The {py:meth}`PdfWriter.write <pypdf.PdfWriter.write>` method uses the
`write_to_stream` methods of the referenced objects.

The {py:meth}`PdfWriter.write_stream <pypdf.PdfWriter.write_stream>` method
has the following core steps:

1. `_sweep_indirect_references`: This step ensures that any circular references
   to objects are correctly handled. It adds the object reference numbers of any
   circularly referenced objects to an external reference map, so that
   self-page-referencing trees can reference the correct new object location,
   rather than copying in a new copy of the page object.
2. **Write the File Header and Body** with `_write_pdf_structure`: In this step,
   the PDF header and objects are written to the output stream. This includes
   the PDF version (e.g., %PDF-1.7) and the objects that make up the content of
   the PDF, such as pages, annotations, and form fields. The locations (byte
   offsets) of these objects are stored for later use in generating the xref
   table.
3. **Write the Cross-Reference Table** with `_write_xref_table`: Using the stored
   object locations, this step generates and writes the cross-reference table
   (xref table) to the output stream. The cross-reference table contains the
   byte offsets for each object in the PDF file, allowing for quick random
   access to objects when reading the PDF.
4. **Write the File Trailer** with `_write_trailer`: The trailer is written to
   the output stream in this step. The trailer contains essential information,
   such as the number of objects in the PDF, the location of the root object
   (Catalog), and the Info object containing metadata. The trailer also
   specifies the location of the xref table.


## How others do it

Looking at alternative software designs and implementations can help to improve
our choices.

### fpdf2

[fpdf2](https://pypi.org/project/fpdf2/) has a [`PDFObject` class](https://github.com/PyFPDF/fpdf2/blob/master/fpdf/syntax.py)
with a serialize method which roughly maps to `pypdf.PdfObject.write_to_stream`.
Some other similarities include:

* [fpdf.output.OutputProducer.buffersize](https://github.com/PyFPDF/fpdf2/blob/master/fpdf/output.py#L370-L485) vs. {py:meth}`pypdf.PdfWriter.write_stream <pypdf.PdfWriter.write_stream>`
* [fpdpf.syntax.Name](https://github.com/PyFPDF/fpdf2/blob/master/fpdf/syntax.py#L124) vs. {py:class}`pypdf.generic.NameObject <pypdf.generic.NameObject>`
* [fpdf.syntax.build_obj_dict](https://github.com/PyFPDF/fpdf2/blob/master/fpdf/syntax.py#L222) vs. {py:class}`pypdf.generic.DictionaryObject <pypdf.generic.DictionaryObject>`
* [fpdf.structure_tree.NumberTree](https://github.com/PyFPDF/fpdf2/blob/master/fpdf/structure_tree.py#L17) vs. {py:class}`pypdf.generic.TreeObject <pypdf.generic.TreeObject>`


### pdfrw

[pdfrw](https://pypi.org/project/pdfrw/), in contrast, seems to work more with
the standard Python objects (bool, float, string) and not wrap them in custom
objects, if possible. It still has:

* [PdfArray](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/objects/pdfarray.py#L13)
* [PdfDict](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/objects/pdfdict.py#L49)
* [PdfName](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/objects/pdfname.py#L65)
* [PdfString](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/objects/pdfstring.py#L322)
* [PdfIndirect](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/objects/pdfindirect.py#L10)

The core classes of pdfrw are
[PdfReader](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/pdfreader.py#L26)
and
[PdfWriter](https://github.com/pmaupin/pdfrw/blob/master/pdfrw/pdfwriter.py#L224)


================================================
FILE: docs/dev/releasing.md
================================================
# Releasing

A `pypdf` release contains the following artifacts:

* A new [release on PyPI](https://pypi.org/project/pypdf/)
* A [release commit](https://github.com/py-pdf/pypdf/commit/91391b18bb8ec9e6e561e2795d988e8634a01a50)
    * Containing a changelog update
    * A new [git tag](https://github.com/py-pdf/pypdf/tags)
        * A [GitHub release](https://github.com/py-pdf/pypdf/releases/tag/3.15.0)

## Who does it?

`pypdf` should typically only be released by one of the core maintainers / the
core maintainer. At the moment, this usually is stefan6419846.

Any owner of the py-pdf organization also has the technical permissions to
release.

## How is it done?

### With direct push permissions

This is the typical way for the core maintainer/benevolent dictator.

The release contains the following steps:

1. Update the CHANGELOG.md and the _version.py via `python make_release.py`.
   This also prepares the release commit message.
2. Create a release commit: `git commit -eF RELEASE_COMMIT_MSG.md`.
3. Push commit: `git push`.
4. Create the tag: `git tag -s 6.7.1 -eF RELEASE_COMMIT_MSG.md`.
5. Push the tag: `git push origin 6.7.1`.
6. CI now builds a source and a wheels package which it pushes to PyPI. It also
   creates the corresponding GitHub release.

![](../_static/releasing.drawio.png)

### Using a Pull Request

This is the typical way for collaborators which do not have direct push permissions for
the `main` branch.

The release contains the following steps:

1. Update the CHANGELOG.md and the _version.py via `python make_release.py`.
   This also prepares the release commit message.
2. Push the changes to a dedicated branch.
3. Open a pull request starting with `REL: `, followed by the new version number.
4. Wait for the approval of another eligible maintainer.
5. Merge the pull request with the name being the PR title and the body being
   the content of `RELEASE_COMMIT_MSG.md`.
6. Create the tag: `git tag -s 6.7.1 -eF RELEASE_COMMIT_MSG.md`.
7. Push the tag: `git push origin 6.7.1`.
8. CI now builds a source and a wheels package which it pushes to PyPI. It also
   creates the corresponding GitHub release.

### The Release Tag

* Use the release version as the tag name. No need for a leading "v".
* Use the changelog entry as the body.


## When are releases done?

There is no need to wait for anything. If the CI is green (all tests succeeded),
we can release.

At the moment, there is no fixed release cycle - except that we usually release
on Sunday.


================================================
FILE: docs/dev/testing.md
================================================
# Testing

pypdf uses [`pytest`](https://docs.pytest.org/en/7.1.x/) for testing.

To run the tests, you need to install the CI (Continuous Integration) requirements by running `pip install -r requirements/ci.txt` or
`pip install -r requirements/ci-3.11.txt` if running Python ≥ 3.11.

## Deselecting groups of tests

pypdf makes use of the following pytest markers:

* `slow`: Tests that require more than 5 seconds.
* `samples`: Tests that require [the `sample-files` git submodule](https://github.com/py-pdf/sample-files) to be initialized. As of October 2022, this is about 25 MB.
* `enable_socket`: Tests that download PDF documents. They are stored locally and thus only need to be downloaded once. As of October 2022, this is about 200 MB.
  * To successfully run the tests, please download most of the documents beforehand: `python -c "from tests import download_test_pdfs; download_test_pdfs()"`

You can disable them by `pytest -m "not enable_socket"` or `pytest -m "not samples"`.
You can even disable all of them: `pytest -m "not enable_socket" -m "not samples" -m "not slow"`.

Please note that this reduces test coverage. The CI will always test all files.

## Docstrings in Unit tests

The first line of a docstring in a unit test should be written in a way that
you could prefix it with "This tests ensures that ...", e.g.

* Invalid XML in xmp_metadata is gracefully handled.
* The identity is returning its input.
* xmp_modify_date is extracted correctly.

This way, plugins like [`pytest-testdox`](https://pypi.org/project/pytest-testdox/)
can generate really nice output when the tests are running. This looks similar
to the output of [mocha.js](https://mochajs.org/).

If the test is a regression test, write

> This test is a regression test for issue #1234

If the regression test is just one parameter of other tests, then add it as
a comment for that parameter.

## Evaluate a PR in-progress version

You may want to test a version from a PR which has not been released yet.
The easiest way is to use pip and install a version from git:

a) Go the PR and identify the repository and branch.

Example from below : repository: __pubpub-zz__ / branch: __iss2200__ :
![PR Header example](PR_Header_example.png)

b) you can then install the version using pip from git:

Example:
```
pip install git+https://github.com/pubpub-zz/pypdf.git@iss2200
```


================================================
FILE: docs/index.rst
================================================
.. pypdf documentation main file, created by
   sphinx-quickstart on Thu Apr  7 20:13:19 2022.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to pypdf
=================

pypdf is a `free <https://en.wikipedia.org/wiki/Free_software>`_ and open
source pure-python PDF library capable of splitting,
merging, cropping, and transforming the pages of PDF files. It can also add
custom data, viewing options, and passwords to PDF files.
pypdf can retrieve text and metadata from PDFs as well.

See `pdfly <https://github.com/py-pdf/pdfly>`_ for a CLI application that uses pypdf to interact with PDFs.

You can contribute to `pypdf on GitHub <https://github.com/py-pdf/pypdf>`_.

.. toctree::
   :caption: User Guide
   :maxdepth: 1

   user/installation
   user/robustness
   user/security
   user/suppress-warnings
   user/metadata
   user/extract-text
   user/post-processing-in-text-extraction
   user/extract-images
   user/handle-attachments
   user/encryption-decryption
   user/merging-pdfs
   user/cropping-and-transforming
   user/reading-pdf-annotations
   user/adding-pdf-annotations
   user/add-watermark
   user/add-javascript
   user/viewer-preferences
   user/forms
   user/handling-outlines
   user/streaming-data
   user/file-size
   user/pdf-version-support
   user/pdfa-compliance


.. toctree::
   :caption: API Reference
   :maxdepth: 1

   modules/PdfReader
   modules/PdfWriter
   modules/Destination
   modules/DocumentInformation
   modules/Field
   modules/Fit
   modules/PageObject
   modules/PageRange
   modules/PaperSize
   modules/RectangleObject
   modules/Transformation
   modules/XmpInformation
   modules/annotations
   modules/constants
   modules/errors
   modules/generic
   modules/PdfDocCommon

.. toctree::
   :caption: Developer Guide
   :maxdepth: 1

   dev/intro
   dev/pdf-format
   dev/pypdf-parsing
   dev/pypdf-writing
   dev/cmaps
   dev/deprecations
   dev/documentation
   dev/testing
   dev/releasing

.. toctree::
   :caption: About pypdf
   :maxdepth: 1

   meta/CHANGELOG
   meta/changelog-v1
   meta/migration-1-to-2
   meta/project-governance
   meta/taking-ownership
   meta/history
   meta/CONTRIBUTORS
   meta/scope-of-pypdf
   meta/comparisons
   meta/faq

Indices and tables
==================

* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/meta/changelog-v1.md
================================================
# Changelog of PyPDF2 1.X

## Version 1.28.4, 2022-05-29

Bug Fixes (BUG):
-  XmpInformation._converter_date was unusable (#921)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.28.3...1.28.4)

## Version 1.28.3, 2022-05-28

### Deprecations (DEP)
-  PEP8 renaming (#905)

### Bug Fixes (BUG)
-  XmpInformation missing method _getText (#917)
-  Fix PendingDeprecationWarning on _merge_page (#904)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.28.2...1.28.3)

## Version 1.28.2, 2022-05-23

### Bug Fixes (BUG)
-  PendingDeprecationWarning for getContents (#893)
-  PendingDeprecationWarning on using PdfMerger (#891)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.28.1...1.28.2)

## Version 1.28.1, 2022-05-22

### Bug Fixes (BUG)
-  Incorrectly show deprecation warnings on internal usage (#887)

### Maintenance (MAINT)
-  Add stacklevel=2 to deprecation warnings (#889)
-  Remove duplicate warnings imports (#888)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.28.0...1.28.1)

## Version 1.28.0, 2022-05-22

This release adds a lot of deprecation warnings in preparation of the
PyPDF2 2.0.0 release. The changes are mostly using snake_case function-, method-,
and variable-names as well as using properties instead of getter-methods.

Maintenance (MAINT):
-  Remove IronPython Fallback for zlib (#868)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.12...1.27.13)

### Deprecations (DEP)

* Make the `PyPDF2.utils` module private
* Rename of core classes:
  * PdfFileReader ➔ PdfReader
  * PdfFileWriter ➔ PdfWriter
  * PdfFileMerger ➔ PdfMerger
* Use PEP8 conventions for function names and parameters
* If a property and a getter-method are both present, use the property

#### Details

In many places:
  - getObject ➔ get_object
  - writeToStream ➔ write_to_stream
  - readFromStream ➔ read_from_stream

PyPDF2.generic
  - readObject ➔ read_object
  - convertToInt ➔ convert_to_int
  - DocumentInformation.getText ➔ DocumentInformation._get_text :
    This method should typically not be used; please let me know if you need it.

PdfReader class:
  - `reader.getPage(pageNumber)` ➔ `reader.pages[page_number]`
  - `reader.getNumPages()` / `reader.numPages` ➔ `len(reader.pages)`
  - getDocumentInfo ➔ metadata
  - flattenedPages attribute ➔ flattened_pages
  - resolvedObjects attribute ➔ resolved_objects
  - xrefIndex attribute ➔ xref_index
  - getNamedDestinations / namedDestinations attribute ➔ named_destinations
  - getPageLayout / pageLayout ➔ page_layout attribute
  - getPageMode / pageMode ➔ page_mode attribute
  - getIsEncrypted / isEncrypted ➔ is_encrypted attribute
  - getOutlines ➔ get_outlines
  - readObjectHeader ➔ read_object_header
  - cacheGetIndirectObject ➔ cache_get_indirect_object
  - cacheIndirectObject ➔ cache_indirect_object
  - getDestinationPageNumber ➔ get_destination_page_number
  - readNextEndLine ➔ read_next_end_line
  - _zeroXref ➔ _zero_xref
  - _authenticateUserPassword ➔ _authenticate_user_password
  - _pageId2Num attribute ➔ _page_id2num
  - _buildDestination ➔ _build_destination
  - _buildOutline ➔ _build_outline
  - _getPageNumberByIndirect(indirectRef) ➔ _get_page_number_by_indirect(indirect_ref)
  - _getObjectFromStream ➔ _get_object_from_stream
  - _decryptObject ➔ _decrypt_object
  - _flatten(..., indirectRef) ➔ _flatten(..., indirect_ref)
  - _buildField ➔ _build_field
  - _checkKids ➔ _check_kids
  - _writeField ➔ _write_field
  - _write_field(..., fieldAttributes) ➔ _write_field(..., field_attributes)
  - _read_xref_subsections(..., getEntry, ...) ➔ _read_xref_subsections(..., get_entry, ...)

PdfWriter class:
  - `writer.getPage(pageNumber)` ➔ `writer.pages[page_number]`
  - `writer.getNumPages()` ➔ `len(writer.pages)`
  - addMetadata ➔ add_metadata
  - addPage ➔ add_page
  - addBlankPage ➔ add_blank_page
  - addAttachment(fname, fdata) ➔ add_attachment(filename, data)
  - insertPage ➔ insert_page
  - insertBlankPage ➔ insert_blank_page
  - appendPagesFromReader ➔ append_pages_from_reader
  - updatePageFormFieldValues ➔ update_page_form_field_values
  - cloneReaderDocumentRoot ➔ clone_reader_document_root
  - cloneDocumentFromReader ➔ clone_document_from_reader
  - getReference ➔ get_reference
  - getOutlineRoot ➔ get_outline_root
  - getNamedDestRoot ➔ get_named_dest_root
  - addBookmarkDestination ➔ add_bookmark_destination
  - addBookmarkDict ➔ add_bookmark_dict
  - addBookmark ➔ add_bookmark
  - addNamedDestinationObject ➔ add_named_destination_object
  - addNamedDestination ➔ add_named_destination
  - removeLinks ➔ remove_links
  - removeImages(ignoreByteStringObject) ➔ remove_images(ignore_byte_string_object)
  - removeText(ignoreByteStringObject) ➔ remove_text(ignore_byte_string_object)
  - addURI ➔ add_uri
  - addLink ➔ add_link
  - getPage(pageNumber) ➔ get_page(page_number)
  - getPageLayout / setPageLayout / pageLayout ➔ page_layout attribute
  - getPageMode / setPageMode / pageMode ➔ page_mode attribute
  - _addObject ➔ _add_object
  - _addPage ➔ _add_page
  - _sweepIndirectReferences ➔ _sweep_indirect_references

PdfMerger class
  - `__init__` parameter: strict=True ➔ strict=False (the PdfFileMerger still has the old default)
  - addMetadata ➔ add_metadata
  - addNamedDestination ➔ add_named_destination
  - setPageLayout ➔ set_page_layout
  - setPageMode ➔ set_page_mode

Page class:
  - artBox / bleedBox/ cropBox/ mediaBox / trimBox ➔ artbox / bleedbox/ cropbox/ mediabox / trimbox
    - getWidth, getHeight  ➔ width / height
    - getLowerLeft_x / getUpperLeft_x ➔ left
    - getUpperRight_x / getLowerRight_x ➔ right
    - getLowerLeft_y / getLowerRight_y ➔ bottom
    - getUpperRight_y / getUpperLeft_y ➔ top
    - getLowerLeft / setLowerLeft ➔ lower_left property
    - upperRight ➔ upper_right
  - mergePage ➔ merge_page
  - rotateClockwise / rotateCounterClockwise ➔ rotate_clockwise
  - _mergeResources ➔ _merge_resources
  - _contentStreamRename ➔ _content_stream_rename
  - _pushPopGS ➔ _push_pop_gs
  - _addTransformationMatrix ➔ _add_transformation_matrix
  - _mergePage ➔ _merge_page

XmpInformation class:
  - getElement(..., aboutUri, ...) ➔ get_element(..., about_uri, ...)
  - getNodesInNamespace(..., aboutUri, ...) ➔ get_nodes_in_namespace(..., aboutUri, ...)
  - _getText ➔ _get_text

utils.py:
  - matrixMultiply ➔ matrix_multiply
  - RC4_encrypt is moved to the security module

## Version 1.27.12, 2022-05-02

### Bug Fixes (BUG)
-  _rebuild_xref_table expects trailer to be a dict (#857)

### Documentation (DOC)
-  Security Policy

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.11...1.27.12)

## Version 1.27.11, 2022-05-02

### Bug Fixes (BUG)
-  Incorrectly issued xref warning/exception (#855)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.10...1.27.11)

## Version 1.27.10, 2022-05-01

### Robustness (ROB)
-  Handle missing destinations in reader (#840)
-  warn-only in readStringFromStream (#837)
-  Fix corruption in startxref or xref table (#788 and #830)

### Documentation (DOC)
-  Project Governance (#799)
-  History of PyPDF2
-  PDF feature/version support (#816)
-  More details on text parsing issues (#815)

### Developer Experience (DEV)
-  Add benchmark command to Makefile
-  Ignore IronPython parts for code coverage (#826)

### Maintenance (MAINT)
-  Split pdf module (#836)
-  Separated CCITTFax param parsing/decoding (#841)
-  Update requirements files

### Testing (TST)
-  Use external repository for larger/more PDFs for testing (#820)
-  Swap incorrect test names (#838)
-  Add test for PdfFileReader and page properties (#835)
-  Add tests for PyPDF2.generic (#831)
-  Add tests for utils, form fields, PageRange (#827)
-  Add test for ASCII85Decode (#825)
-  Add test for FlateDecode (#823)
-  Add test for filters.ASCIIHexDecode (#822)

### Code Style (STY)
-  Apply pre-commit (black, isort) + use snake_case variables (#832)
-  Remove debug code (#828)
-  Documentation, Variable names (#839)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.9...1.27.10)

## Version 1.27.9, 2022-04-24

A change I would like to highlight is the performance improvement for
large PDF files (#808) 🎉

### New Features (ENH)
-  Add papersizes (#800)
-  Allow setting permission flags when encrypting (#803)
-  Allow setting form field flags (#802)

### Bug Fixes (BUG)
-  TypeError in xmp._converter_date (#813)
-  Improve spacing for text extraction (#806)
-  Fix PDFDocEncoding Character Set (#809)

### Robustness (ROB)
-  Use null ID when encrypted but no ID given (#812)
-  Handle recursion error (#804)

### Documentation (DOC)
-  CMaps (#811)
-  The PDF Format + commit prefixes (#810)
-  Add compression example (#792)

### Developer Experience (DEV)
-  Add Benchmark for Performance Testing (#781)

### Maintenance (MAINT)
-  Validate PDF magic byte in strict mode (#814)
-  Make PdfFileMerger.addBookmark() behave life PdfFileWriters' (#339)
-  Quadratic runtime while parsing reduced to linear (#808)

### Testing (TST)
-  Newlines in text extraction (#807)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.8...1.27.9)

## Version 1.27.8, 2022-04-21

### Bug Fixes (BUG)
-  Use 1MB as offset for readNextEndLine (#321)
-  'PdfFileWriter' object has no attribute 'stream' (#787)

### Robustness (ROB)
-  Invalid float object; use 0 as fallback (#782)

### Documentation (DOC)
-  Robustness (#785)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.7...1.27.8)

## Version 1.27.7, 2022-04-19

### Bug Fixes (BUG)
- Import exceptions from PyPDF2.errors in PyPDF2.utils (#780)

### Code Style (STY)
-  Naming in 'make_changelog.py'

## Version 1.27.6, 2022-04-18

### Deprecations (DEP)
-  Remove support for Python 2.6 and older (#776)

### New Features (ENH)
-  Extract document permissions (#320)

### Bug Fixes (BUG)
-  Clip by trimBox when merging pages, which would otherwise be ignored (#240)
-  Add overwriteWarnings parameter PdfFileMerger (#243)
-  IndexError for getPage() of decrypted file (#359)
-  Handle cases where decodeParms is an ArrayObject (#405)
-  Updated PDF fields don't show up when page is written (#412)
-  Set Linked Form Value (#414)
-  Fix zlib -5 error for corrupt files (#603)
-  Fix reading more than last1K for EOF (#642)
-  Accidental import

### Robustness (ROB)
-  Allow extra whitespace before "obj" in readObjectHeader (#567)

### Documentation (DOC)
-  Link to pdftoc in Sample_Code (#628)
-  Working with annotations (#764)
-  Structure history

### Developer Experience (DEV)
-  Add issue templates (#765)
-  Add tool to generate changelog

### Maintenance (MAINT)
-  Use grouped constants instead of string literals (#745)
-  Add error module (#768)
-  Use decorators for @staticmethod (#775)
-  Split long functions (#777)

### Testing (TST)
-  Run tests in CI once with -OO Flags (#770)
-  Filling out forms (#771)
-  Add tests for Writer (#772)
-  Error cases (#773)
-  Check Error messages (#769)
-  Regression test for issue #88
-  Regression test for issue #327

### Code Style (STY)
-  Make variable naming more consistent in tests


[Full changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.5...1.27.6)

## Version 1.27.5, 2022-04-15

### Security (SEC)

- ContentStream_readInlineImage had potential infinite loop (#740)

### Bug fixes (BUG)

- Fix merging encrypted files (#757)
- CCITTFaxDecode decodeParms can be an ArrayObject (#756)

### Robustness improvements (ROBUST)

- title sometimes None (#744)

### Documentation (DOC)

- Adjust short description of the package

### Tests and Test setup (TST)

- Rewrite JS tests from unittest to pytest (#746)
- Increase Test coverage, mainly with filters (#756)
- Add test for inline images (#758)

### Developer Experience Improvements (DEV)

- Remove unused Travis-CI configuration (#747)
- Show code coverage (#754, #755)
- Add mutmut (#760)

### Miscellaneous

- STY: Closing file handles, explicit exports, ... (#743)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.4...1.27.5)


## Version 1.27.4, 2022-04-12

### Bug fixes (BUG)

- Guard formatting of `__init__.__doc__` string (#738)

### Packaging (PKG)

- Add more precise license field to setup (#733)

### Testing (TST)

- Add test for issue #297

### Miscellaneous

- DOC: Miscallenious ➔ Miscellaneous (Typo)
- TST: Fix CI triggering (master ➔ main) (#739)
- STY: Fix various style issues (#742)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.3...1.27.4)

## Version 1.27.3, 2022-04-10

- PKG: Make Tests not a subpackage (#728)
- BUG: Fix ASCII85Decode.decode assertion (#729)
- BUG: Error in Chinese character encoding (#463)
- BUG: Code duplication in Scripts/2-up.py
- ROBUST: Guard 'obj.writeToStream' with 'if obj is not None'
- ROBUST: Ignore a /Prev entry with value 0 in the trailer
- MAINT: Remove Sample_Code (#726)
- TST: Close file handle in test_writer (#722)
- TST: Fix test_get_images (#730)
- DEV: Make tox use pytest and add more Python versions (#721)
- DOC: Many (#720, #723-725, #469)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.2...1.27.3)

## Version 1.27.2, 2022-04-09

- Add Scripts (including `pdfcat`), Resources, Tests, and Sample_Code back to
  PyPDF2. It was removed by accident in 1.27.0, but might get removed with 2.0.0
  See [discussions/718](https://github.com/py-pdf/PyPDF2/discussions/718).

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.1...1.27.2)

## Version 1.27.1, 2022-04-08

- Fixed project links on PyPI page after migration from mstamy2
  to MartinThoma to the py-pdf organization on GitHub
- Documentation is now at [pypdf2.readthedocs.io](https://pypdf2.readthedocs.io/en/latest/)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.27.0...1.27.1)

## Version 1.27.0, 2022-04-07

Features:

 - Add alpha channel support for png files in Script (#614)

### Bug fixes (BUG)

 - Fix formatWarning for filename without slash (#612)
 - Add whitespace between words for extractText() (#569, #334)
 - "invalid escape sequence" SyntaxError (#522)
 - Avoid error when printing warning in pythonw (#486)
 - Stream operations can be List or Dict (#665)

### Documentation (DOC)

 - Added Scripts/pdf-image-extractor.py
 - Documentation improvements (#550, #538, #324, #426, #394)

### Tests and Test setup (TST)

 - Add GitHub Action which automatically runs unit tests via pytest and
   static code analysis with Flake8 (#660)
 - Add several unit tests (#661, #663)
 - Add .coveragerc to create coverage reports

### Developer Experience Improvements (DEV)

 - Pre commit: Developers can now `pre-commit install` to avoid tiny issues like trailing whitespaces

### Miscellaneous

 - Add the LICENSE file to the distributed packages (#288)
 - Use setuptools instead of distutils (#599)
 - Improvements for the PyPI page (#644)
 - Python 3 changes (#504, #366)

[Full Changelog](https://github.com/py-pdf/PyPDF2/compare/1.26.0...1.27.0)

## Version 1.26.0, 2016-05-18

 - NOTE: Active maintenance on PyPDF2 is resuming after a hiatus

 - Fixed a bug where image resources where incorrectly
   overwritten when merging pages

 - Added dictionary for JavaScript actions to the root (louib)

 - Added unit tests for the JS functionality (louib)

 - Add more Python 3 compatibility when reading inline images (im2703
   and (VyacheslavHashov)

 - Return NullObject instead of raising error when failing to resolve
   object (ctate)

 - Don't output warning for non-zeroed xref table when strict=False
   (BenRussert)

 - Remove extraneous zeroes from output formatting (speedplane)

 - Fix bug where reading an inline image would cut off prematurely
   in certain cases (speedplane)

## Version 1.25.1, 2015-07-20

 - Fix bug when parsing inline images. Occurred when merging
   certain pages with inline images

 - Fixed type error when creating outlines by utilizing the
   isString() test

## Version 1.25, 2015-07-07

BUGFIXES:

 - Added Python 3 algorithm for ASCII85Decode. Fixes issue when
   reading reportlab-generated files with Py 3 (jerickbixly)

 - Recognize more escape sequence which would otherwise throw an
   exception (manuelzs, robertsoakes)

 - Fixed overflow error in generic.py. Occurred
   when reading a too-large int in Python 2 (by Raja Jamwal)

 - Allow access to files which were encrypted with an empty
   password. Previously threw a "File has not been decrypted"
   exception (Elena Williams)

 - Do not attempt to decode an empty data stream. Previously
   would cause an error in decode algorithms (vladir)

 - Fixed some type issues specific to Py 2 or Py 3

 - Fix issue when stream data begins with whitespace (soloma83)

 - Recognize abbreviated filter names (AlmightyOatmeal and
   Matthew Weiss)

 - Copy decryption key from PdfFileReader to PdfFileMerger.
   Allows usage of PdfFileMerger with encrypted files (twolfson)

 - Fixed bug which occurred when a NameObject is present at end
   of a file stream. Threw a "Stream has ended unexpectedly"
   exception (speedplane)

FEATURES:

 - Initial work on a test suite; to be expanded in future.
   Tests and Resources directory added, README updated (robertsoakes)

 - Added document cloning methods to PdfFileWriter:
   appendPagesFromReader, cloneReaderDocumentRoot, and
   cloneDocumentFromReader. See official documentation (robertsoakes)

 - Added method for writing to form fields: updatePageFormFieldValues.
   This will be enhanced in the future. See official documentation
   (robertsoakes)

 - New addAttachment method. See documentation. Support for adding
   and extracting embedded files to be enhanced in the future
   (moshekaplan)

 - Added methods to get page number of given PageObject or
   Destination: getPageNumber and getDestinationPageNumber.
   See documentation (mozbugbox)

OTHER ENHANCEMENTS:

 - Enhanced type handling (Brent Amrhein)

 - Enhanced exception handling in NameObject (sbywater)

 - Enhanced extractText method output (peircej)

 - Better exception handling

 - Enhanced regex usage in NameObject class (speedplane)


## Version 1.24, 2014-12-31

 - Bugfixes for reading files in Python 3 (by Anthony Tuininga and
   pqqp)

 - Appropriate errors are now raised instead of infinite loops (by
   naure and Cyrus Vafadari)

 - Bugfix for parsing number tokens with leading spaces (by Maxim
   Kamenkov)

 - Don't crash on bad /Outlines reference (by eshellman)

 - Conform tabs/spaces and blank lines to PEP 8 standards

 - Utilize the readUntilRegex method when reading Number Objects
   (by Brendan Jurd)

 - More bugfixes for Python 3 and clearer exception handling

 - Fixed encoding issue in merger (with eshellman)

 - Created separate folder for scripts


## Version 1.23, 2014-08-11

 - Documentation now available at pythonhosted.org

 - Bugfix in pagerange.py for when `__init__.__doc__` has no value (by
   Vladir Cruz)

 - Fix typos in OutlinesObject().add() (by shilluc)

 - Re-added a missing return statement in a utils.py method

 - Corrected viewing mode names (by Jason Scheirer)

 - New PdfFileWriter method: addJS() (by vfigueiro)

 - New bookmark features: color, boldness, italics, and page fit
   (by Joshua Arnott)

 - New PdfFileReader method: getFields(). Used to extract field
   information from PDFs with interactive forms. See documentation
   for details

 - Converted README file to markdown format (by Stephen Bussard)

 - Several improvements to overall performance and efficiency
   (by mozbugbox)

 - Fixed a bug where geospatial information was not scaling along with
   its page

 - Fixed a type issue and a Python 3 issue in the decryption algorithms
   (with Francisco Vieira and koba-ninkigumi)

 - Fixed a bug causing an infinite loop in the ASCII 85 decoding
   algorithm (by madmaardigan)

 - Annotations (links, comment windows, etc.) are now preserved when
   pages are merged together

 - Used the Destination class in addLink() and addBookmark() so that
   the page fit option could be properly customized


## Version 1.22, 2014-05-29

 - Added .DS_Store to .gitignore (for Mac users) (by Steve Witham)

 - Removed `__init__()` implementation in NameObject (by Steve Witham)

 - Fixed bug (inf. loop) when merging pages in Python 3 (by commx)

 - Corrected error when calculating height in scaleTo()

 - Removed unnecessary code from DictionaryObject (by Georges Dubus)

 - Fixed bug where an exception was thrown upon reading a NULL string
   (by speedplane)

 - Allow string literals (non-unicode strings in Python 2) to be passed
   to PdfFileReader

 - Allow ConvertFunctionsToVirtualList to be indexed with slices and
   longs (in Python 2) (by Matt Gilson)

 - Major improvements and bugfixes to addLink() method (see documentation
   in source code) (by Henry Keiter)

 - General code clean-up and improvements (with Steve Witham and Henry Keiter)

 - Fixed bug that caused crash when comments are present at end of
   dictionary


## Version 1.21, 2014-04-21

 - Fix for when /Type isn't present in the Pages dictionary (by Rob1080)

 - More tolerance for extra whitespace in Indirect Objects

 - Improved Exception handling

 - Fixed error in getHeight() method (by Simon Kaempflein)

 - implement use of utils.string_type to resolve Py2-3 compatibility issues

 - Prevent exception for multiple definitions in a dictionary (with carlosfunk)
   (only when strict = False)

 - Fixed errors when parsing a slice using pdfcat on command line (by
   Steve Witham)

 - Tolerance for EOF markers within 1024 bytes of the actual end of the
   file (with David Wolever)

 - Added overwriteWarnings parameter to PdfFileReader constructor, if False
   PyPDF2 will NOT overwrite methods from Python's warnings.py module with
   a custom implementation.

 - Fix NumberObject and NameObject constructors for compatibility with PyPy
   (Rüdiger Jungbeck, Xavier Dupré, shezadkhan137, Steven Witham)

 - Utilize  utils.Str in pdf.py and pagerange.py to resolve type issues (by
   egbutter)

 - Improvements in implementing StringIO for Python 2 and BytesIO for
   Python 3 (by Xavier Dupré)

 - Added /x00 to Whitespaces, defined utils.WHITESPACES to clarify code (by
   Maxim Kamenkov)

 - Bugfix for merging 3 or more resources with the same name (by lucky-user)

 - Improvements to Xref parsing algorithm (by speedplane)


## Version 1.20, 2014-01-27

 - Official Python 3+ support (with contributions from TWAC and cgammans)
   Support for Python versions 2.6 and 2.7 will be maintained

 - Command line concatenation (see pdfcat in sample code) (by Steve Witham)

 - New FAQ; link included in README

 - Allow more (although unnecessary) escape sequences

 - Prevent exception when reading a null object in decoding parameters

 - Corrected error in reading destination types (added a slash since they
   are name objects)

 - Corrected TypeError in scaleTo() method

 - addBookmark() method in PdfFileMerger now returns bookmark (so nested
   bookmarks can be created)

 - Additions to Sample Code and Sample PDFs

 - changes to allow 2up script to work (see sample code) (by Dylan McNamee)

 - changes to metadata encoding (by Chris Hiestand)

 - New methods for links: addLink() (by Enrico Lambertini) and removeLinks()

 - Bugfix to handle nested bookmarks correctly (by Jamie Lentin)

 - New methods removeImages() and removeText() available for PdfFileWriter
   (by Tien Haï)

 - Exception handling for illegal characters in Name Objects


## Version 1.19, 2013-10-08

BUGFIXES:
 - Removed pop in sweepIndirectReferences to prevent infinite loop
   (provided by ian-su-sirca)

 - Fixed bug caused by whitespace when parsing PDFs generated by AutoCad

 - Fixed a bug caused by reading a 'null' ASCII value in a dictionary
   object (primarily in PDFs generated by AutoCad).

FEATURES:
 - Added new folders for PyPDF2 sample code and example PDFs; see README
   for each folder

 - Added a method for debugging purposes to show current location while
   parsing

 - Ability to create custom metadata (by jamma313)

 - Ability to access and customize document layout and view mode
   (by Joshua Arnott)

OTHER:
 - Added and corrected some documentation

 - Added some more warnings and exception messages

 - Removed old test/debugging code

UPCOMING:
 - More bugfixes (We have received many problematic PDFs via email, we
   will work with them)

 - Documentation - It's time for PyPDF2 to get its own documentation
   since it has grown much since the original pyPdf

 - A FAQ to answer common questions


## Version 1.18, 2013-08-19

 - Fixed a bug where older versions of objects were incorrectly added to the
   cache, resulting in outdated or missing pages, images, and other objects
   (from speedplane)

 - Fixed a bug in parsing the xref table where new xref values were
   overwritten; also cleaned up code (from speedplane)

 - New method mergeRotatedAroundPointPage which merges a page while rotating
   it around a point (from speedplane)

 - Updated Destination syntax to respect PDF 1.6 specifications (from
   jamma313)

 - Prevented infinite loop when a PdfFileReader object was instantiated
   with an empty file (from Jerome Nexedi)

Other Changes:

 - Downloads now available via PyPI
 - Installation through pip library is fixed


## Version 1.17, 2013-07-25

 - Removed one (from pdf.py) of the two Destination classes. Both
   classes had the same name, but were slightly different in content,
   causing some errors. (from Janne Vanhala)

 - Corrected and Expanded README file to demonstrate PdfFileMerger

 - Added filter for LZW encoded streams (from Michal Horejsek)

 - PyPDF2 issue tracker enabled on Github to allow community
   discussion and collaboration


## Versions -1.16, -2013-06-30

 - Note: This ChangeLog has not been kept up-to-date for a while.
   Hopefully we can keep better track of it from now on. Some of the
   changes listed here come from previous versions 1.14 and 1.15; they
   were only vaguely defined. With the new _version.py file we should
   have more structured and better documented versioning from now on.

 - Defined `PyPDF2.__version__`

 - Fixed encrypt() method (from Martijn The)

 - Improved error handling on PDFs with truncated streams (from cecilkorik)

 - Python 3 support (from kushal-kumaran)

 - Fixed example code in README (from Jeremy Bethmont)

 - Fixed an bug caused by DecimalError Exception (from Adam Morris)

 - Many other bug fixes and features by:

	jeansch
	Anton Vlasenko
	Joseph Walton
	Jan Oliver Oelerich
	Fabian Henze
	And any others I missed.
	Thanks for contributing!


## Version 1.13, 2010-12-04

 - Fixed a typo in code for reading a "\b" escape character in strings.

 - Improved `__repr__` in FloatObject.

 - Fixed a bug in reading octal escape sequences in strings.

 - Added getWidth and getHeight methods to the RectangleObject class.

 - Fixed compatibility warnings with Python 2.4 and 2.5.

 - Added addBlankPage and insertBlankPage methods on PdfFileWriter class.

 - Fixed a bug with circular references in page's object trees (typically
   annotations) that prevented correctly writing out a copy of those pages.

 - New merge page functions allow application of a transformation matrix.

 - To all patch contributors: I did a poor job of keeping this ChangeLog
   up-to-date for this release, so I am missing attributions here for any
   changes you submitted.  Sorry!  I'll do better in the future.


## Version 1.12, 2008-09-02

 - Added support for XMP metadata.

 - Fix reading files with xref streams with multiple /Index values.

 - Fix extracting content streams that use graphics operators longer than 2
   characters.  Affects merging PDF files.


## Version 1.11, 2008-05-09

 - Patch from Hartmut Goebel to permit RectangleObjects to accept NumberObject
   or FloatObject values.

 - PDF compatibility fixes.

 - Fix to read object xref stream in correct order.

 - Fix for comments inside content streams.


## Version 1.10, 2007-10-04

 - Text strings from PDF files are returned as Unicode string objects when
 pyPdf determines that they can be decoded (as UTF-16 strings, or as
 PDFDocEncoding strings).  Unicode objects are also written out when
 necessary.  This means that string objects in pyPdf can be either
 generic.ByteStringObject instances, or generic.TextStringObject instances.

 - The extractText method now returns a unicode string object.

 - All document information properties now return unicode string objects.  In
 the event that a document provides docinfo properties that are not decoded by
 pyPdf, the raw byte strings can be accessed with an "_raw" property (ie.
 title_raw rather than title)

 - generic.DictionaryObject instances have been enhanced to be easier to use.
 Values coming out of dictionary objects will automatically be de-referenced
 (.getObject will be called on them), unless accessed by the new "raw_get"
 method.  DictionaryObjects can now only contain PdfObject instances (as keys
 and values), making it easier to debug where non-PdfObject values (which
 cannot be written out) are entering dictionaries.

 - Support for reading named destinations and outlines in PDF files.  Original
 patch by Ashish Kulkarni.

 - Stream compatibility reading enhancements for malformed PDF files.

 - Cross reference table reading enhancements for malformed PDF files.

 - Encryption documentation.

 - Replace some "assert" statements with error raising.

 - Minor optimizations to FlateDecode algorithm increase speed when using PNG
 predictors.

## Version 1.9, 2006-12-15

 - Fix several serious bugs introduced in version 1.8, caused by a failure to
   run through our PDF test suite before releasing that version.

 - Fix bug in NullObject reading and writing.

## Version 1.8, 2006-12-14

 - Add support for decryption with the standard PDF security handler.  This
   allows for decrypting PDF files given the proper user or owner password.

 - Add support for encryption with the standard PDF security handler.

 - Add new pythondoc documentation.

 - Fix bug in ASCII85 decode that occurs when whitespace exists inside the
   two terminating characters of the stream.

## Version 1.7, 2006-12-10

 - Fix a bug when using a single page object in two PdfFileWriter objects.

 - Adjust PyPDF to be tolerant of whitespace characters that don't belong
   during a stream object.

 - Add documentInfo property to PdfFileReader.

 - Add numPages property to PdfFileReader.

 - Add pages property to PdfFileReader.

 - Add extractText function to PdfFileReader.


## Version 1.6, 2006-06-06

 - Add basic support for comments in PDF files.  This allows us to read some
   ReportLab PDFs that could not be read before.

 - Add "auto-repair" for finding xref table at slightly bad locations.

 - New StreamObject backend, cleaner and more powerful.  Allows the use of
   stream filters more easily, including compressed streams.

 - Add a graphics state push/pop around page merges.  Improves quality of
   page merges when one page's content stream leaves the graphics
   in an abnormal state.

 - Add PageObject.compressContentStreams function, which filters all content
   streams and compresses them.  This will reduce the size of PDF pages,
   especially after they could have been decompressed in a mergePage
   operation.

 - Support inline images in PDF content streams.

 - Add support for using .NET framework compression when zlib is not
   available.  This does not make pyPdf compatible with IronPython, but it
   is a first step.

 - Add support for reading the document information dictionary, and extracting
   title, author, subject, producer and creator tags.

 - Add patch to support NullObject and multiple xref streams, from Bradley
   Lawrence.


## Version 1.5, 2006-01-28

- Fix a bug where merging pages did not work in "no-rename" cases when the
  second page has an array of content streams.

- Remove some debugging output that should not have been present.


## Version 1.4, 2006-01-27

- Add capability to merge pages from multiple PDF files into a single page
  using the PageObject.mergePage function.  See example code (README or web
  site) for more information.

- Add ability to modify a page's MediaBox, CropBox, BleedBox, TrimBox, and
  ArtBox properties through PageObject.  See example code (README or web site)
  for more information.

- Refactor pdf.py into multiple files: generic.py (contains objects like
  NameObject, DictionaryObject), filters.py (contains filter code),
  utils.py (various).  This does not affect importing PdfFileReader
  or PdfFileWriter.

- Add new decoding functions for standard PDF filters ASCIIHexDecode and
  ASCII85Decode.

- Change url and download_url to refer to new pybrary.net web site.


## Version 1.3, 2006-01-23

- Fix new bug introduced in 1.2 where PDF files with \r line endings did not
  work properly anymore.  A new test suite developed with various PDF files
  should prevent regression bugs from now on.

- Fix a bug where inheriting attributes from page nodes did not work.


## Version 1.2, 2006-01-23

- Improved support for files with CRLF-based line endings, fixing a common
  reported problem stating "assertion error: assert line == "%%EOF"".

- Software author/maintainer is now officially a proud married person, which
  is sure to result in better software... somehow.


## Version 1.1, 2006-01-18

- Add capability to rotate pages.

- Improved PDF reading support to properly manage inherited attributes from
  /Type=/Pages nodes.  This means that page groups that are rotated or have
  different media boxes or whatever will now work properly.

- Added PDF 1.5 support.  Namely cross-reference streams and object streams.
  This release can mangle Adobe's PDFReference16.pdf successfully.


## Version 1.0, 2006-01-17

- First distutils-capable true public release.  Supports a wide variety of PDF
  files that I found sitting around on my system.

- Does not support some PDF 1.5 features, such as object streams,
  cross-reference streams.


================================================
FILE: docs/meta/comparisons.md
================================================
# pypdf vs X

pypdf is a [free] and open source pure-python PDF library capable of
splitting, merging, cropping, and transforming the pages of PDF files.
It can also add custom data, viewing options, and passwords to PDF
files. pypdf can retrieve text and metadata from PDFs as well.

## PyMuPDF and PikePDF

[PyMuPDF] is a Python binding to [MuPDF] and [PikePDF] is the Python
binding to [QPDF].

While both are excellent libraries for various use-cases, using them is
not always possible even when they support the use-case. Both of them
are powered by C libraries which make installation harder and might
cause security concerns. For MuPDF, you might also need to buy a
commercial license.

A core feature of pypdf is that it's pure Python. That means there is
no C dependency. It has been used for over 10 years and for this reason
a lot of support via StackOverflow and examples on the internet.

## pypdf

PyPDF2 was merged back into `pypdf`. The development continues at `pypdf`.

## PyPDF3 and PyPDF4

Developing and maintaining open source software is extremely
time-intensive and in the case of pypdf not paid at all. Having
continuous support is hard.

pypdf was initially released in 2012 on PyPI and received releases
until 2016. From 2016 to 2022, there was no update - but people were
still using it.

As pypdf is free software, there were attempts to fork it and continue
the development. PyPDF3 was first released in 2018 and still receives
updates. PyPDF4 has only one release from 2018.

Martin Thoma has worked on bringing the community back to one path of
development. He deprecated PyPDF2 in favor of pypdf already, and pypdf has
more features and a cleaner interface than PyPDF2 now. See [history of
pypdf](history.md).

  [free]: https://en.wikipedia.org/wiki/Free_software
  [PyMuPDF]: https://pypi.org/project/PyMuPDF/
  [MuPDF]: https://mupdf.com/
  [PikePDF]: https://pypi.org/project/pikepdf/
  [QPDF]: https://github.com/qpdf/qpdf


## pdfminer.six and pdfplumber

[`pdfminer.six`](https://pypi.org/project/pdfminer.six/) is capable of
extracting the [font size](https://stackoverflow.com/a/69962459/562769)
/ font weight (bold-ness). It has no capabilities for writing PDF files.

[`pdfplumber`](https://pypi.org/project/pdfplumber/) is a library focused on extracting data from PDF documents. Since `pdfplumber` is built on top of `pdfminer.six`, there are **no capabilities of exporting or modifying a PDF file** (see [#440 (discussions)](https://github.com/jsvine/pdfplumber/discussions/440#discussioncomment-803880)). However, `pdfplumber` is capable of converting a PDF file into an image, [draw lines and rectangles on the image](https://github.com/jsvine/pdfplumber#drawing-methods), and save it as an image file. Please note that the image conversion is done via ImageMagick (see [`pdfplumber`'s documentation](https://github.com/jsvine/pdfplumber#visual-debugging)).

The `pdfplumber` community is active in answering questions and the library is maintained as of May 2023.

## pdfrw / pdfrw2

I don't have experience with any of those libraries. Please add a
comparison if you know pypdf and [`pdfrw`](https://pypi.org/project/pdfrw/)!

Please be aware that there is also
[`pdfminer`](https://pypi.org/project/pdfminer/) which is not maintained.
Then there is [`pdfrw2`](https://pypi.org/project/pdfrw2/) which doesn't have
a large community behind it.

## Document Generation

There are (Python) [tools to generate PDF documents](https://github.com/py-pdf/awesome-pdf#generators).
pypdf is not one of them.


## CLI applications

pypdf is a pure Python PDF library. If you're looking for an application which
you can use from the terminal, give [`pdfly`](https://pdfly.readthedocs.io/en/latest/)
a shot.


================================================
FILE: docs/meta/faq.md
================================================
# Frequently Asked Questions

## How is pypdf related to PyPDF2?

PyPDF2 was a fork from the original pyPdf. After several years, the fork was
merged back into `pypdf` (now all lowercase).

## Which Python versions are supported?

pypdf 3.0+ supports Python 3.6 and later.
PyPDF2 2.0+ supports Python 3.6 and later.
PyPDF2 1.27.10 supported Python 2.7 to 3.10.

  [Matthew]: https://github.com/mstamy2
  [source]: https://github.com/py-pdf/PyPDF2/commit/24b270d876518d15773224b5d0d6c2206db29f64#commitcomment-5038317
  [this sort of thing]: https://github.com/py-pdf/PyPDF2/issues/24
  [GitHub issue]: https://github.com/py-pdf/PyPDF2/issues

## Who uses pypdf?

pyPdf is vendored [into](https://github.com/Buyanbat/XacCRM/tree/ee78e8df967182f661b6494a86444501e7d89c8f/report/pyPdf) [several](https://github.com/MyBook/calibre/tree/ca1efe3c21f6553e096dab745b3cdeb36244a5a9/src/pyPdf) [projects](https://github.com/Giacomo-De-Florio-Dev/Make_Your_PDF_Safe/tree/ec439f92243d12d54ae024668792470c6b40ee96/MakeYourPDFsafe_V1.3/PyPDF2). That
means the code of pyPdf was copied into that project.

Projects that depend on pypdf:

* [Camelot](https://github.com/camelot-dev/camelot): A Python library to extract tabular data from PDFs
* [edi](https://github.com/OCA/edi): Electronic Data Interchange modules
* [amazon-textract-textractor](https://github.com/aws-samples/amazon-textract-textractor/blob/42444b08c672607eadbdcd64f3c5adb2d85383de/helper/setup.py): Analyze documents with Amazon Textract and generate output in multiple formats.
* [maigret](https://github.com/soxoj/maigret): Collect a dossier on a person by username from thousands of sites
* [deda](https://github.com/dfd-tud/deda): tracking Dots Extraction, Decoding and Anonymisation toolkit
* [opencanary](https://github.com/thinkst/opencanary)
* Document Conversions
  * [rst2pdf](https://github.com/rst2pdf/rst2pdf)
  * [xhtml2pdf](https://github.com/xhtml2pdf/xhtml2pdf)
  * [doc2text](https://github.com/jlsutherland/doc2text)
* [pdfalyzer](https://pypi.org/project/pdfalyzer/): A PDF analysis tool for visualizing the inner tree-like data structure of a PDF in spectacularly large and colorful diagrams as well as scanning the binary streams embedded in the PDF for hidden potentially malicious content.

## How do I cite pypdf?

In BibTeX format:

```
@misc{pypdf,
 title         = {The {pypdf} library},
 author        = {Mathieu Fenniak and
                  Matthew Stamy and
                  pubpub-zz and
                  Martin Thoma and
                  Matthew Peveler and
                  exiledkingcc and {pypdf Contributors}},
 year          = {2024},
 url           = {https://pypi.org/project/pypdf/}
 note          = {See https://pypdf.readthedocs.io/en/latest/meta/CONTRIBUTORS.html for all contributors}
}
```

## Which License does pypdf use?

`pypdf` uses the [BSD-3-Clause license](https://en.wikipedia.org/wiki/BSD_licenses#3-clause), see the LICENSE file.


================================================
FILE: docs/meta/history.md
================================================
# History of pypdf

## The Origins: pyPdf (2005-2010)

In 2005, [Mathieu Fenniak] launched pyPdf "as a PDF toolkit..."
focused on

-   document manipulation: by-page splitting, concatenation, and
    merging;
-   document introspection;
-   page cropping; and
-   document encryption and decryption.

The last release of PyPI was [pyPdf 1.13](https://pypi.org/project/pyPdf/#history)
in 2010.

## PyPDF2 is born (2011-2016)

At the end of 2011, after consultation with Mathieu and others, Phaseit
sponsored PyPDF2 as a fork of pyPdf on GitHub. The initial impetus was
to handle a wider range of input PDF instances; Phaseit\'s commercial
work often encounters PDF instances \"in the wild\" that it needs to
manage (mostly concatenate and paginate), but that deviates so much from
PDF standards that pyPdf can't read them. PyPDF2 reads a considerably
wider range of real-world PDF instances.

Neither pyPdf nor PyPDF2 aims to be universal, that is, to provide all
possible PDF-related functionality. Note that the similar-appearing
[pyfpdf] of Mariano Reingart is most comparable to [ReportLab], in that
both ReportLab and pyfpdf emphasize document generation. Interestingly
enough, pyfpdf builds in a basic HTML→PDF converter while PyPDF2 has no
knowledge of HTML.

So what is PyPDF2 truly about? Think about popular [pdftk] for a moment.
PyPDF2 does what pdftk does, and it does so within your current Python
process, and it handles a wider range of variant PDF formats
\[explain\]. PyPDF2 has its own FAQ to answer other questions that have
arisen.

The Reddit [/r/python crowd chatted] obliquely and briefly about PyPDF2
in March 2012.

The core developer / maintainer was Matthew Stamy.

## PyPDF3 and PyPDF4 (2018-2022)

Two approaches were made to get PyPDF2 active again: PyPDF3 and PyPDF4.

PyPDF3 had its first release in 2018 and its last one in February 2022.
It never got the user base from PyPDF2.

PyPDF4 only had one release in 2018.

## PyPDF2: Reborn (2022)

Martin Thoma took over maintenance of PyPDF2 in April 2022. It had over 100
open PRs and 321 open issues.

[pubpub-zz](https://github.com/pubpub-zz) was extremely active, especially
for text extraction.

[Matthew Peveler](https://github.com/MasterOdin) helped a lot with reviews
and general project decisions.

[exiledkingcc](https://github.com/exiledkingcc) added support for modern
encryption schemes.


## pypdf: Back to the Roots (2023-2024)

In order to simplify things for beginners, PyPDF2 was merged back into
pypdf. Now all lowercase, without a number. We hope that the folks who
develop PyPDF3 and PyPDF4 also join us.

Compared to `PyPDF2 >= 3.0.0`, `pypdf >= 3.1.0` now offers:

* AES reading and writing support. Not only with PyCryptoDome, but also with cryptography.
* Text extraction improvements, e.g., for math content. [pypdf is now comparable with Tika, pypdfium2, and PyMuPDF](https://github.com/py-pdf/benchmarks)
* Annotation support
* Performance Improvements and Bugfixes
* Page Label support

stefan6419846 made his [first PR for pypdf](https://github.com/py-pdf/pypdf/pull/2022)
in July 2023 and joined the project.


  [Mathieu Fenniak]: https://mathieu.fenniak.net/
  [pyfpdf]: https://github.com/reingart/pyfpdf
  [ReportLab]: https://www.reportlab.com/software/opensource/rl-toolkit/
  [pdftk]: https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/
  [/r/python crowd chatted]: https://www.reddit.com/r/Python/comments/qsvfm/pypdf2_updates_pypdf_pypdf2_is_an_opensource/


================================================
FILE: docs/meta/migration-1-to-2.md
================================================
# Migration Guide: 1.x to 2.x

`PyPDF2<2.0.0` ([docs](https://pypdf2.readthedocs.io/en/1.27.12/meta/history.html))
is very different from `PyPDF2>=2.0.0` ([docs](../meta/history.md)).

Luckily, most changes are simple naming adjustments. This guide helps you to
make the step from `PyPDF2 1.x` (or even the original PyPdf) to `PyPDF2>=2.0.0`.

You can execute your code with the updated version and show deprecation warnings
by running `python -W all your_code.py`.

## Imports and Modules

* `PyPDF2.utils` no longer exists
* `PyPDF2.pdf` no longer exists. You can import from `PyPDF2` directly or from
  `PyPDF2.generic`

## Naming Adjustments

### Classes

The base classes were renamed as they also allow operating with BytesIO streams
instead of files. Also, the `strict` parameter changed the default value from
`strict=True` to `strict=False`.

* `PdfFileReader` ➔ `PdfReader`
* `PdfFileWriter` ➔ `PdfWriter`
* `PdfFileMerger` ➔ `PdfMerger`

PdfFileReader and PdfFileMerger no longer have the `overwriteWarnings`
parameter. The new behavior is `overwriteWarnings=False`.

### Function, Method, and Property Names

In `PyPDF2.xmp.XmpInformation`:

* `rdfRoot` ➔ `rdf_root`
* `xmp_createDate` ➔ `xmp_create_date`
* `xmp_creatorTool` ➔ `xmp_creator_tool`
* `xmp_metadataDate` ➔ `xmp_metadata_date`
* `xmp_modifyDate` ➔ `xmp_modify_date`
* `xmpMetadata` ➔ `xmp_metadata`
* `xmpmm_documentId` ➔ `xmpmm_document_id`
* `xmpmm_instanceId` ➔ `xmpmm_instance_id`

In `PyPDF2.generic`:

* `readObject` ➔ `read_object`
* `convertToInt` ➔ `convert_to_int`
* `DocumentInformation.getText` ➔ `DocumentInformation._get_text` : This method should typically not be used; please let me know if you need it.
* `readHexStringFromStream` ➔ `read_hex_string_from_stream`
* `initializeFromDictionary` ➔ `initialize_from_dictionary`
* `createStringObject` ➔ `create_string_object`
* `TreeObject.hasChildren` ➔ `TreeObject.has_children`
* `TreeObject.emptyTree` ➔ `TreeObject.empty_tree`

In many places:
  - `getObject` ➔ `get_object`
  - `writeToStream` ➔ `write_to_stream`
  - `readFromStream` ➔ `read_from_stream`


PdfReader class:
  - `reader.getPage(pageNumber)` ➔ `reader.pages[page_number]`
  - `reader.getNumPages()` / `reader.numPages` ➔ `len(reader.pages)`
  - `getDocumentInfo` ➔ `metadata`
  - `flattenedPages` attribute ➔ `flattened_pages`
  - `resolvedObjects` attribute ➔ `resolved_objects`
  - `xrefIndex` attribute ➔ `xref_index`
  - `getNamedDestinations` / `namedDestinations` attribute ➔ `named_destinations`
  - `getPageLayout` / `pageLayout` ➔ `page_layout` attribute
  - `getPageMode` / `pageMode` ➔ `page_mode` attribute
  - `getIsEncrypted` / `isEncrypted` ➔ `is_encrypted` attribute
  - `getOutlines` ➔ `get_outlines`
  - `readObjectHeader` ➔ `read_object_header`
  - `cacheGetIndirectObject` ➔ `cache_get_indirect_object`
  - `cacheIndirectObject` ➔ `cache_indirect_object`
  - `getDestinationPageNumber` ➔ `get_destination_page_number`
  - `readNextEndLine` ➔ `read_next_end_line`
  - `_zeroXref` ➔ `_zero_xref`
  - `_authenticateUserPassword` ➔ `_authenticate_user_password`
  - `_pageId2Num` attribute ➔ `_page_id2num`
  - `_buildDestination` ➔ `_build_destination`
  - `_buildOutline` ➔ `_build_outline`
  - `_getPageNumberByIndirect(indirectRef)` ➔ `_get_page_number_by_indirect(indirect_ref)`
  - `_getObjectFromStream` ➔ `_get_object_from_stream`
  - `_decryptObject` ➔ `_decrypt_object`
  - `_flatten(..., indirectRef)` ➔ `_flatten(..., indirect_ref)`
  - `_buildField` ➔ `_build_field`
  - `_checkKids` ➔ `_check_kids`
  - `_writeField` ➔ `_write_field`
  - `_write_field(..., fieldAttributes)` ➔ `_write_field(..., field_attributes)`
  - `_read_xref_subsections(..., getEntry, ...)` ➔ `_read_xref_subsections(..., get_entry, ...)`

PdfWriter class:
  - `writer.getPage(pageNumber)` ➔ `writer.pages[page_number]`
  - `writer.getNumPages()` ➔ `len(writer.pages)`
  - `addMetadata` ➔ `add_metadata`
  - `addPage` ➔ `add_page`
  - `addBlankPage` ➔ `add_blank_page`
  - `addAttachment(fname, fdata)` ➔ `add_attachment(filename, data)`
  - `insertPage` ➔ `insert_page`
  - `insertBlankPage` ➔ `insert_blank_page`
  - `appendPagesFromReader` ➔ `append_pages_from_reader`
  - `updatePageFormFieldValues` ➔ `update_page_form_field_values`
  - `cloneReaderDocumentRoot` ➔ `clone_reader_document_root`
  - `cloneDocumentFromReader` ➔ `clone_document_from_reader`
  - `getReference` ➔ `get_reference`
  - `getOutlineRoot` ➔ `get_outline_root`
  - `getNamedDestRoot` ➔ `get_named_dest_root`
  - `addBookmarkDestination` ➔ `add_bookmark_destination`
  - `addBookmarkDict` ➔ `add_bookmark_dict`
  - `addBookmark` ➔ `add_bookmark`
  - `addNamedDestinationObject` ➔ `add_named_destination_object`
  - `addNamedDestination` ➔ `add_named_destination`
  - `removeLinks` ➔ `remove_links`
  - `removeImages(ignoreByteStringObject)` ➔ `remove_images(ignore_byte_string_object)`
  - `removeText(ignoreByteStringObject)` ➔ `remove_text(ignore_byte_string_object)`
  - `addURI` ➔ `add_uri`
  - `addLink` ➔ `add_link`
  - `getPage(pageNumber)` ➔ `get_page(page_number)`
  - `getPageLayout / setPageLayout / pageLayout` ➔ `page_layout attribute`
  - `getPageMode / setPageMode / pageMode` ➔ `page_mode attribute`
  - `_addObject` ➔ `_add_object`
  - `_addPage` ➔ `_add_page`
  - `_sweepIndirectReferences` ➔ `_sweep_indirect_references`

PdfMerger class
  - `__init__` parameter: `strict=True` ➔ `strict=False` (the `PdfFileMerger` still has the old default)
  - `addMetadata` ➔ `add_metadata`
  - `addNamedDestination` ➔ `add_named_destination`
  - `setPageLayout` ➔ `set_page_layout`
  - `setPageMode` ➔ `set_page_mode`

Page class:
  - `artBox` / `bleedBox` / `cropBox` / `mediaBox` / `trimBox` ➔ `artbox` / `bleedbox` / `cropbox` / `mediabox` / `trimbox`
    - `getWidth`, `getHeight ` ➔ `width` / `height`
    - `getLowerLeft_x` / `getUpperLeft_x` ➔ `left`
    - `getUpperRight_x` / `getLowerRight_x` ➔ `right`
    - `getLowerLeft_y` / `getLowerRight_y` ➔ `bottom`
    - `getUpperRight_y` / `getUpperLeft_y` ➔ `top`
    - `getLowerLeft` / `setLowerLeft` ➔ `lower_left` property
    - `upperRight` ➔ `upper_right`
  - `mergePage` ➔ `merge_page`
  - `rotateClockwise` / `rotateCounterClockwise` ➔ `rotate_clockwise`
  - `_mergeResources` ➔ `_merge_resources`
  - `_contentStreamRename` ➔ `_content_stream_rename`
  - `_pushPopGS` ➔ `_push_pop_gs`
  - `_addTransformationMatrix` ➔ `_add_transformation_matrix`
  - `_mergePage` ➔ `_merge_page`

XmpInformation class:
  - `getElement(..., aboutUri, ...)` ➔ `get_element(..., about_uri, ...)`
  - `getNodesInNamespace(..., aboutUri, ...)` ➔ `get_nodes_in_namespace(..., aboutUri, ...)`
  - `_getText` ➔ `_get_text`

utils.py:
  - `matrixMultiply` ➔ `matrix_multiply
  - `RC4_encrypt` is moved to the security module

### Parameter Names

* `PdfWriter.get_page`: `pageNumber` ➔ `page_number`
* `PyPDF2.filters` (all classes): `decodeParms` ➔ `decode_parms`
* `PyPDF2.filters` (all classes): `decodeStreamData` ➔ `decode_stream_data`
* `pagenum` ➔ `page_number`
* `PdfMerger.merge`: `position` ➔ `page_number`
* `PdfWriter.add_outline_item_destination`: `dest` ➔ `page_destination`
* `PdfWriter.add_named_destination_object`: `dest` ➔ `page_destination`
* `PdfWriter.encrypt`: `user_pwd` ➔ `user_password`
* `PdfWriter.encrypt`: `owner_pwd` ➔ `owner_password`

### Deprecations

A few classes / functions were deprecated without replacement:

* `PyPDF2.utils.ConvertFunctionsToVirtualList`
* `PyPDF2.utils.formatWarning`
* `PyPDF2.isInt(obj)`: Use `instance(obj, int)` instead
* `PyPDF2.u_(s)`: Use `s` directly
* `PyPDF2.chr_(c)`: Use `chr(c)` instead
* `PyPDF2.barray(b)`: Use `bytearray(b)` instead
* `PyPDF2.isBytes(b)`: Use `instance(b, type(bytes()))` instead
* `PyPDF2.xrange_fn`: Use `range` instead
* `PyPDF2.string_type`: Use `str` instead
* `PyPDF2.isString(s)`: Use `instance(s, str)` instead
* `PyPDF2._basestring`: Use `str` instead
* `b_(...)` was removed. You should typically be able to use the bytes object directly, otherwise you can [copy this](https://github.com/py-pdf/PyPDF2/pull/986#issuecomment-1230698069)


================================================
FILE: docs/meta/project-governance.md
================================================
# Project Governance

This document describes how the pypdf project is managed. It describes the
different actors, their roles, and the responsibilities they have.

## Terminology

* The **project** is pypdf - a free and open-source pure-python PDF library
capable of splitting, merging, cropping, and transforming the pages of PDF files.
  It includes the [code, issues, and discussions on GitHub](https://github.com/py-pdf/pypdf),
  and [the documentation on ReadTheDocs](https://pypdf.readthedocs.io/en/latest/),
  [the package on PyPI](https://pypi.org/project/pypdf/), and
  [the website on GitHub](https://py-pdf.github.io/pypdf/dev/bench/).
* A **maintainer** is a person who has technical permissions to change one or
  more parts of the projects. It is a person driven to keep the project running
  and improving.
* A **contributor** is a person who contributes to the project. That could be
  through writing code - in the best case through forking and creating a pull
  request, but that is up to the maintainer. Other contributors describe issues,
  help to ask questions on existing issues to make them easier to answer,
  participate in discussions, and help to improve the documentation. Contributors
  are similar to maintainers, but without technical permissions.
* A **user** is a person who imports pypdf into their code. All pypdf users
  are developers, but not developers who know the internals of pypdf. They only
  use the public interface of pypdf. They will likely have less knowledge about
  PDF than contributors.
* The **community** is all of that - the users, the contributors, and the maintainers.


## Governance, Leadership, and Steering pypdf forward

pypdf is a free and open source project with over 100 contributors and likely
(way) more than 1000 users.

As pypdf does not have any formal relationship with any company and no funding,
all the work done by the community are voluntary contributions. People don't
get paid, but choose to spend their free time to create software of which
many more are profiting. This has to be honored and respected.

Despite such a big community, the project was dormant from 2016 to 2022.
There were still questions asked, issues reported, and pull requests created.
But the maintainer didn't have the time to move pypdf forward. During that
time, nobody else stepped up to become the new maintainer.

For this reason, pypdf has the **Benevolent Dictator**
governance model. The benevolent dictator is a maintainer with all technical permissions -
most importantly the permission to push new pypdf versions on PyPI.

Being benevolent, the benevolent dictator listens for decisions to the community and tries
their best to make decisions from which the overall community profits - the
current one and the potential future one. Being a dictator, the benevolent dictator always has
the power and the right to make decisions on their own - also against some
members of the community.

As pypdf is free software, parts of the community can split off (fork the code)
and create a new community. This should limit the harm a bad benevolent dictator can do.


## Project Language

The project language is (american) English. All documentation and issues must
be written in English to ensure that the community can understand it.

We appreciate the fact that large parts of the community don't have English
as their native language. We try our best to understand others -
[automatic translators](https://translate.google.com/) might help.


## Expectations

The community can expect the following:

* The **benevolent dictator** tries their best to make decisions from which the overall
  community profits. The benevolent dictator is aware that his/her decisions can shape the
  overall community. Once the benevolent dictator notices that she/he doesn't have the time
  to advance pypdf, he/she looks for a new benevolent dictator. As it is expected
  that the benevolent dictator will step down at some point of their choice
  (hopefully before their death), it is NOT a benevolent dictator for life
  (BDFL).
* Every **maintainer** (including the benevolent dictator) is aware of their permissions and
  the harm they could do. They value security and ensure that the project is
  not harmed. They give their technical permissions back if they don't need them
  any longer. Any long-time contributor can become a maintainer. Maintainers
  can - and should! - step down from their role when they realize that they
  can no longer commit that time. Their contribution will be honored in the
  {doc}`history`.
* Every **contributor** is aware that the time of maintainers and the benevolent dictator is
  limited. Short pull requests that briefly describe the solved issue and have
  a unit test have a higher chance to get merged soon - simply because it's
  easier for maintainers to see that the contribution will not harm the overall
  project. Their contributions are documented in the git history and in the
  public issues. [Let us know](https://github.com/py-pdf/pypdf/discussions/798)
  if you would appreciate something else!
* Every **community member** uses a respectful language. We are all human, we
  get upset about things we care and other things than what's visible on the
  internet go on in our live. pypdf does not pay its contributors - keep all
  of that in mind when you interact with others. We are here because we want to
  help others.


### Issues and Discussions

An issue is any technical description that aims at bringing pypdf forward:

* Bugs tickets: Something went wrong because pypdf developers made a mistake.
* Feature requests: pypdf does not support all features of the PDF specifications.
  There are certainly also convenience methods that would help users a lot.
* Robustness requests: There are many broken PDFs around. In some cases, we can
  deal with that. It's kind of a mixture between a bug ticket and a feature
  request.
* Performance tickets: pypdf could be faster - let us know about your specific
  scenario.

Any comment that is in those technical descriptions which is not helping the
discussion can be deleted. This is especially true for "me too" comments on bugs
or "bump" comments for desired features. People can express this with 👍 / 👎
reactions.

[Discussions](https://github.com/py-pdf/pypdf/discussions) are open. No comments
will be deleted there - except if they are unrelated spam or only
try to insult people (luckily, the community was very respectful so far 🤞)


### Releases

The maintainers follow [semantic versioning](https://semver.org/). Most
importantly, that means that breaking changes will have a major version bump.

Be aware that unintentional breaking changes might still happen. The pypdf
maintainers do their best to fix that in a timely manner - please
[report such issues](https://github.com/py-pdf/pypdf/issues)!


## People

* [stefan6419846](https://github.com/stefan6419846) is the benevolent dictator since January 2025
* [Martin Thoma](https://github.com/MartinThoma) was the benevolent dictator from April 2022 to January 2025.
  He still has most of the permissions as a fallback.
* Maintainers:
    * Matthew Stamy (mstamy2) was the benevolent dictator for a long time.
      He still is around on GitHub once in a while and has permissions on PyPI and GitHub.
    * Matthew Peveler (MasterOdin) is a maintainer on GitHub.


================================================
FILE: docs/meta/scope-of-pypdf.md
================================================
# Scope of pypdf

What features should pypdf have and which features will it never have?

pypdf aims at simplifying interactions with PDF documents. Core tasks that
pypdf can perform are:

* Document manipulation: Splitting, merging, cropping, and transforming the pages of PDF files
* Data Extraction: Extract text and metadata from PDF documents
* Security: Decrypt / encrypt PDF documents

Typical indicators that pypdf should do something:

* The task needs in-depth knowledge of the PDF format
* It currently requires a lot of code or even is impossible to do with pypdf
* It's neither mentioned in "belongs in user code" nor in "out of scope"
* It already is in the issue list with the [is-feature tag](https://github.com/py-pdf/pypdf/labels/is-feature).

The [moonshot extensions](https://github.com/py-pdf/pypdf/discussions/1181) are
features we would like to have, but are currently not able to add (PRs are
welcome 😉)

## Belongs in user code

Here are a few indicators that a feature belongs into users' code (and not into pypdf):

1. The use-case is very specific. Most people will not encounter the same need.
2. It can be done without knowledge of the PDF specification
3. It cannot be done without (non-pdf) domain knowledge. Anything that is
   specific to your industry.

## Out of scope

While this list is infinitely long, there are a few topics that are asked
multiple times.

Those topics are out of scope for pypdf. They will never be part of pypdf:

1. **Optical Character Recognition (OCR)**: OCR is about extracting text from
   images. That is very different from the kind of text extraction pypdf is
   doing. Please note that images can be within PDF documents. In the case of
   scanned documents, the whole page is an image. Some scanners automatically
   execute OCR and add a text-layer behind the scanned page. That is something
   pypdf can use if it's present. As a rule-of-thumb: If you cannot mark/copy
   the text, it's likely an image. A noteworthy open source OCR project is
   [tesseract](https://github.com/tesseract-ocr/tesseract).
2. **Format Conversion**: Converting docx / HTML to PDF or PDF to those formats.
   You might want to have a look at [`pdfkit`](https://pypi.org/project/pdfkit/)
   and similar projects.

Out of scope for the moment, but might be added if there are enough contributors:

* **Digital Signature Support** ([reference ticket](https://github.com/py-pdf/pypdf/issues/302)): Cryptography is
  complicated. It's important to get it right. pypdf currently doesn't have
  enough active contributors to properly add digital signature support. For the
  moment, [pyhanko](https://pypi.org/project/pyHanko/) seems to be the best choice.
* **PDF Generation from Scratch**: pypdf can manipulate existing PDF documents,
  add annotations, combine / split / crop / transform. It can add blank pages.
  But if you want to generate invoices, you might want to have a look at
  [`reportlab`](https://pypi.org/project/reportlab/) /
  [`fpdf2`](https://pypi.org/project/fpdf2/) or document conversion tools like
  [`pdfkit`](https://pypi.org/project/pdfkit/).
* **Replacing words within a PDF**: [Extracting text from PDF is hard](../user/extract-text.md#why-text-extraction-is-hard).
   Replacing text in a reliable way is even harder. For example, one word might
   be split into multiple tokens. Hence, it's not a simple "search and replace"
   in some cases.
* **(Not) Extracting headers/footers/page numbers**: While you can apply
  heuristics, there is no way to always make it work. PDF documents simply
  don't contain the information what a header/footer/page number is.


### Library vs. Application

It's also worth pointing out that `pypdf` is designed to be a library. It is not
an application. That has several implications:

* Execution: pypdf cannot be executed directly, but only be called from within
  a program written by a pypdf user. In contrast, an application is executed
  on its own.
* Dependencies: pypdf should have a minimal set of dependencies and only
  restrict them where it is strictly necessary. In contrast, applications should
  be installed in environments which are isolated from other applications. They
  can pin their dependencies.

If you're looking for a way to interact with PDF files via Shell, you should
either write a script using pypdf or use [`pdfly`](https://pypi.org/project/pdfly/).


================================================
FILE: docs/meta/taking-ownership.md
================================================
# Taking Ownership of pypdf

pypdf is currently maintained by stefan6419846. We want to avoid that
pypdf ever goes unmaintained again. This document serves as a guide to avoid
that if I become unavailable, e.g., due to severe health issues.

This currently is just an abstract scenario. I'm fine, and I will likely do this
for several more years, but I have seen how projects stand still for many years
because of the maintainer becoming inactive.

## What belongs to pypdf?

The resources needed for maintaining pypdf are:

* PyPI: [pypdf](https://pypi.org/project/pypdf/) and [PyPDF2](https://pypi.org/project/PyPDF2/)
* GitHub: [pypdf](https://github.com/py-pdf/pypdf) (the repository, not the organization)
* ReadTheDocs: [pypdf](https://readthedocs.org/projects/pypdf/) and [PyPDF2](https://readthedocs.org/projects/pypdf2/)

## When may somebody take ownership?

**No activity in 180 days**: If I don't answer e-mails (see my GitHub profile)
and don't make any commits / merges for half a year, you can consider pypdf "not
maintained."

## Who may take ownership?

Preferably, one of the owners of the GitHub `py-pdf` organization takes care of
that.

As of 27th of August 2023, the following people might be candidates:

* [Lucas-C](https://github.com/Lucas-C): He maintains fpdf2 and is a py-pdf owner
* [pubpub-zz](https://github.com/pubpub-zz): He is one of the most active contributors
  to pypdf
* [Matthew Peveler](https://github.com/MasterOdin): Less active, but he is cautious
  about breaking changes and an experienced software developer.
* [exiledkingcc](https://github.com/exiledkingcc): He has contributed the core
  changes related to encryption.

## How to take ownership?

* PyPI: Follow [PEP 541 – Package Index Name Retention](https://peps.python.org/pep-0541/)
* GitHub: Talk with one of the other py-pdf organization owners
* ReadTheDocs: Follow the [Abandoned projects policy](https://docs.readthedocs.io/en/latest/abandoned-projects.html)


================================================
FILE: docs/modules/Destination.rst
================================================
The Destination Class
---------------------

.. autoclass:: pypdf.generic.Destination
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/DocumentInformation.rst
================================================
The DocumentInformation Class
-----------------------------

.. autoclass:: pypdf.DocumentInformation
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/Field.rst
================================================
The Field Class
---------------

.. autoclass:: pypdf.generic.Field
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/Fit.rst
================================================
The Fit Class
-------------

.. autoclass:: pypdf.generic.Fit
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/PageObject.rst
================================================
The PageObject Class
--------------------

.. autoclass:: pypdf._page.PageObject
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf._page.VirtualListImages
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf._page.ImageFile
    :members:
    :inherited-members: File
    :undoc-members:

.. autofunction:: pypdf.mult


================================================
FILE: docs/modules/PageRange.rst
================================================
The PageRange Class
-------------------

.. autoclass:: pypdf.PageRange
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/PaperSize.rst
================================================
The PaperSize Class
-------------------

.. autoclass:: pypdf.PaperSize
    :members:
    :undoc-members:
    :show-inheritance:

Add blank page with PaperSize
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. testsetup ::

    pypdf_test_setup("modules/PaperSize", {
        "example.pdf": "../resources/example.pdf",
    })

.. testcode ::

    from pypdf import PaperSize, PdfWriter

    writer = PdfWriter(clone_from="example.pdf")
    writer.add_blank_page(PaperSize.A8.width, PaperSize.A8.height)
    writer.write("out-add-page.pdf")

Insert blank page with PaperSize
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. testcode ::

    from pypdf import PaperSize, PdfWriter

    writer = PdfWriter(clone_from="example.pdf")
    writer.insert_blank_page(PaperSize.A8.width, PaperSize.A8.height, 1)
    writer.write("out-insert-page.pdf")


================================================
FILE: docs/modules/PdfDocCommon.rst
================================================
The PdfDocCommon Class
----------------------

**PdfDocCommon** is an abstract class which is inherited by :class:`~pypdf.PdfReader` and :class:`~pypdf.PdfWriter`.

Where identified in the API, you can use any of the derived class.

.. autoclass:: pypdf._doc_common.PdfDocCommon
    :members:
    :inherited-members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/PdfReader.rst
================================================
The PdfReader Class
-------------------

.. autoclass:: pypdf.PdfReader
    :members:
    :inherited-members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.PasswordType
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/PdfWriter.rst
================================================
The PdfWriter Class
-------------------

.. autoclass:: pypdf.PdfWriter
    :members:
    :inherited-members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.ObjectDeletionFlag
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/RectangleObject.rst
================================================
The RectangleObject Class
-------------------------

.. autoclass:: pypdf.generic.RectangleObject
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/Transformation.rst
================================================
The Transformation Class
------------------------

.. autoclass:: pypdf.Transformation
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/XmpInformation.rst
================================================
The XmpInformation Class
-------------------------

.. autoclass:: pypdf.xmp.XmpInformation
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/annotations.rst
================================================
The annotations module
----------------------

.. automodule:: pypdf.annotations
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/constants.rst
================================================
Constants
---------

.. autoclass:: pypdf.constants.AnnotationFlag
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.constants.ImageType
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.constants.PageLabelStyle
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.constants.UserAccessPermissions
    :members:
    :undoc-members:
    :show-inheritance:

.. autoclass:: pypdf.constants.FieldDictionaryAttributes
       :members:
       :undoc-members:
       :exclude-members: FT, Parent, Kids, T, TU, TM, V, DV, AA, Opt, attributes, attributes_dict
       :show-inheritance:


================================================
FILE: docs/modules/errors.rst
================================================
Errors
------

.. automodule:: pypdf.errors
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/modules/generic.rst
================================================
Generic PDF objects
-------------------

.. automodule:: pypdf.generic
    :members:
    :undoc-members:
    :show-inheritance:
    :exclude-members: Destination, Field, Fit, RectangleObject


.. autoclass:: pypdf._protocols.PdfObjectProtocol
    :members:
    :undoc-members:
    :show-inheritance:


.. autoclass:: pypdf._protocols.XmpInformationProtocol
    :members:
    :undoc-members:
    :show-inheritance:


.. autoclass:: pypdf._protocols.PdfCommonDocProtocol
       :members:
       :undoc-members:
       :show-inheritance:


.. autoclass:: pypdf._protocols.PdfReaderProtocol
    :members:
    :undoc-members:
    :show-inheritance:


.. autoclass:: pypdf._protocols.PdfWriterProtocol
    :members:
    :undoc-members:
    :show-inheritance:


================================================
FILE: docs/user/add-javascript.md
================================================
# Adding JavaScript to a PDF

PDF readers vary in the extent they support JavaScript, with some not supporting it at all.

Adobe has documentation on its support here:
[https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html](https://opensource.adobe.com/dc-acrobat-sdk-docs/library/jsapiref/index.html)

## Launch print window on opening

```{testsetup}
pypdf_test_setup("user/add-javascript", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")

# Add JavaScript to launch the print window on opening this PDF.
writer.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

writer.write("out-print-window.pdf")
```


================================================
FILE: docs/user/add-watermark.md
================================================
# Adding a Stamp or Watermark to a PDF

Adding stamps or watermarks are two common ways to manipulate PDF files.
A stamp is adding something on top of the document, a watermark is in the
background of the document.

## Stamp (Overlay) / Watermark (Underlay)

The process of stamping and watermarking is the same, you just need to set `over` parameter to `True` for stamping and `False` for watermarking.

You can use {func}`~pypdf._page.PageObject.merge_page` if you don't need to transform the stamp:

```{testsetup}
pypdf_test_setup("user/add-watermark", {
    "crazyones.pdf": "../resources/crazyones.pdf",
    "nup-source.png": "../docs/user/nup-source.png",
    "jpeg.pdf": "../resources/jpeg.pdf",
})
```

```{testcode}
from pypdf import PdfReader, PdfWriter

stamp = PdfReader("jpeg.pdf").pages[0]
writer = PdfWriter(clone_from="crazyones.pdf")
for page in writer.pages:
    page.merge_page(stamp, over=False)  # here set to False for watermarking

writer.write("out-watermark.pdf")
```

Otherwise use {func}`~pypdf._page.PageObject.merge_transformed_page` with {class}`~pypdf.Transformation` if you need to translate, rotate, scale, etc. the stamp before merging it to the content page.

```{testcode}
from pathlib import Path
from typing import List, Union

from pypdf import PdfReader, PdfWriter, Transformation


def stamp(
    content_pdf: Union[Path, str],
    stamp_pdf: Union[Path, str],
    pdf_result: Union[Path, str],
    page_indices: Union[None, List[int]] = None,
):
    stamp_page = PdfReader(stamp_pdf).pages[0]

    writer = PdfWriter()
    # page_indices can be a List(array) of page, tuples are for range definition
    reader = PdfReader(content_pdf)
    writer.append(reader, pages=page_indices)

    for content_page in writer.pages:
        content_page.merge_transformed_page(
            stamp_page,
            Transformation().scale(0.5),
        )

    writer.write(pdf_result)


stamp("crazyones.pdf", "jpeg.pdf", "out-scale.pdf")
```

If you are experiencing wrongly rotated watermarks/stamps, try to use
{func}`~pypdf._page.PageObject.transfer_rotation_to_content` on the corresponding pages beforehand
to fix the page boxes.

Example of stamp:
![stamp.png](stamp.png)

Example of watermark:
![watermark.png](watermark.png)


## Stamping images directly

The above code only works for stamps that are already in PDF format.
However, you can easily convert an image to PDF image using
[Pillow](https://pypi.org/project/Pillow/).


```{testcode}
from io import BytesIO
from pathlib import Path
from typing import List, Union

from PIL import Image
from pypdf import PageRange, PdfReader, PdfWriter, Transformation


def image_to_pdf(stamp_img: Union[Path, str]) -> PdfReader:
    img = Image.open(stamp_img)
    img_as_pdf = BytesIO()
    img.save(img_as_pdf, "pdf")
    return PdfReader(img_as_pdf)


def stamp_img(
    content_pdf: Union[Path, str],
    stamp_img: Union[Path, str],
    pdf_result: Union[Path, str],
    page_indices: Union[PageRange, List[int], None] = None,
):
    # Convert the image to a PDF
    stamp_pdf = image_to_pdf(stamp_img)

    # Then use the same stamp code from above
    stamp_page = stamp_pdf.pages[0]

    writer = PdfWriter()

    reader = PdfReader(content_pdf)
    writer.append(reader, pages=page_indices)
    for content_page in writer.pages:
        content_page.merge_transformed_page(
            stamp_page,
            Transformation(),
        )

    writer.write(pdf_result)


stamp_img("crazyones.pdf", "nup-source.png", "out-image.pdf")
```


================================================
FILE: docs/user/adding-pdf-annotations.md
================================================
# Adding PDF Annotations

```{note}
By default, some annotations might be invisible, for example polylines, as the default color is "transparent".

To circumvent this, make sure to add the `/C` entry to the annotation, being an array and each array value being in the range 0.0 to 1.0:

  * With one element, a grayscale value.
  * With three elements, a RGB definition.
  * With four elements, a CMYK definition.
```

## Attachments

```{testsetup}
pypdf_test_setup("user/adding-pdf-annotations", {
    "crazyones.pdf": "../resources/crazyones.pdf",
})
```

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter()
writer.add_blank_page(width=200, height=200)

data = b"any bytes - typically read from a file"
writer.add_attachment("smile.png", data)

writer.write("out-attachment.pdf")
```


## Free Text

If you want to add text in a box like this

![](free-text-annotation.png)

you can use {class}`~pypdf.annotations.FreeText`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import FreeText

# Fill the writer with the pages you want
reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Create the annotation and add it
annotation = FreeText(
    text="Hello World\nThis is the second line!",
    rect=(50, 550, 200, 650),
    font="Arial",
    bold=True,
    italic=True,
    font_size="20pt",
    font_color="00ff00",
    border_color="0000ff",
    background_color="cdcdcd",
)

# Set annotation flags to 4 for printable annotations.
# See "AnnotationFlag" for other options, e.g. hidden etc.
annotation.flags = 4

writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-free-text.pdf")
```

## Text

A text annotation looks like this:

![](text-annotation.png)

## Line

If you want to add a line like this:

![](annotation-line.png)

you can use {class}`~pypdf.annotations.Line`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Line

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the line
annotation = Line(
    text="Hello World\nLine2",
    rect=(50, 550, 200, 650),
    p1=(50, 550),
    p2=(200, 650),
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-line.pdf")
```

## PolyLine

If you want to add a line like this:

![](annotation-polyline.png)

you can use {class}`~pypdf.annotations.PolyLine`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import PolyLine
from pypdf.generic import ArrayObject, FloatObject, NameObject

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the polyline
# By default, the line will be transparent. Set an explicit color.
annotation = PolyLine(
    vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
)
annotation[NameObject("/C")] = ArrayObject(
    [FloatObject(0.9), FloatObject(0.1), FloatObject(0)]
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-polyline.pdf")
```

## Rectangle

If you want to add a rectangle like this:

![](annotation-square.png)

you can use {class}`~pypdf.annotations.Rectangle`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Rectangle

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the rectangle
annotation = Rectangle(
    rect=(50, 550, 200, 650),
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-rectangle.pdf")
```

If you want the rectangle to be filled, use the `interiour_color="ff0000"` parameter.

This method uses the "square" annotation type of the PDF format.


## Ellipse

If you want to add a circle like this:

![](annotation-circle.png)

you can use {class}`~pypdf.annotations.Ellipse`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Ellipse

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the rectangle
annotation = Ellipse(
    rect=(50, 550, 200, 650),
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-ellipse.pdf")
```

## Polygon

If you want to add a polygon like this:

![](annotation-polygon.png)

you can use {class}`~pypdf.annotations.Polygon`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Polygon

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the line
annotation = Polygon(
    vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-polygon.pdf")
```

## Popup

Manage the Popup windows for markups, looks like this:

![](annotation-popup.png)

you can use {py:class}`~pypdf.annotations.Popup`:

```{testcode}
from pypdf import PdfWriter
from pypdf.annotations import Popup, Text

# Arrange
writer = PdfWriter()
writer.append("crazyones.pdf", [0])

# Act
text_annotation = writer.add_annotation(
    0,
    Text(
        text="Hello World\nThis is the second line!",
        rect=(50, 550, 200, 650),
        open=True,
    ),
)

popup_annotation = Popup(
    rect=(50, 550, 200, 650),
    open=True,
    parent=text_annotation,  # use the output of add_annotation
)

writer.write("out-popup.pdf")
```

You have to use the returned result from add_annotation() as it is
the parent annotation with which this popup annotation shall be associated.

## Link

If you want to add a link, you can use {class}`~pypdf.annotations.Link`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Link

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the link
annotation = Link(
    rect=(50, 550, 200, 650),
    url="https://martin-thoma.com/",
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-link.pdf")
```

You can also add internal links:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Link
from pypdf.generic import Fit

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the link
annotation = Link(
    rect=(50, 550, 200, 650),
    target_page_index=3,
    fit=Fit(fit_type="/FitH", fit_args=(123,)),
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-internal-link.pdf")
```

## Text Markup Annotations

Text markup annotations refer to a specific piece of text within the document.

These are a bit more complicated as you need to know exactly where the text
is, the so-called "Quad points".

### Highlighting

If you want to highlight text like this:

![](annotation-highlight.png)

you can use {class}`~pypdf.annotations.Highlight`:

```{testcode}
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import Highlight
from pypdf.generic import ArrayObject, FloatObject

reader = PdfReader("crazyones.pdf")
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

rect = (50, 550, 200, 650)
quad_points = [rect[0], rect[1], rect[2], rect[1], rect[0], rect[3], rect[2], rect[3]]

# Add the highlight
annotation = Highlight(
    rect=rect,
    quad_points=ArrayObject([FloatObject(quad_point) for quad_point in quad_points]),
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
writer.write("out-highlight.pdf")
```


================================================
FILE: docs/user/cropping-and-transforming.md
================================================
# Cropping and Transforming PDFs

```{note}
Just because content is no longer visible, it is not gone.
Cropping works by adjusting the viewbox. That means content that was cropped
away can still be restored.
```

```{testsetup}
pypdf_test_setup("user/cropping-and-transforming", {
    "example.pdf": "../resources/example.pdf",
    "Seige_of_Vicksburg_Sample_OCR.pdf": "../resources/Seige_of_Vicksburg_Sample_OCR.pdf",
    "labeled-edges-center-image.pdf": "../resources/labeled-edges-center-image.pdf",
    "side-by-side-subfig.pdf": "../resources/side-by-side-subfig.pdf",
    "nup-source.pdf": "../resources/box.pdf",
    "box.pdf": "../resources/box.pdf",
})
```

```{testcode}
from pypdf import PdfReader, PdfWriter

reader = PdfReader("Seige_of_Vicksburg_Sample_OCR.pdf")
writer = PdfWriter()

# Add page 1 from reader to output document, unchanged.
writer.add_page(reader.pages[0])

# Add page 2 from reader, but rotated clockwise 90 degrees.
writer.add_page(reader.pages[1].rotate(90))

# Add page 3 from reader, but crop it to half size.
page3 = writer.add_page(reader.pages[2])
page3.mediabox.upper_right = (
    page3.mediabox.right / 2,
    page3.mediabox.top / 2,
)

writer.write("out-all-in-one.pdf")
```

## Page rotation

The most typical rotation is a clockwise rotation of the page by multiples of
90 degrees. That is done when the orientation of the page is wrong. You can
do that with the {func}`~pypdf._page.PageObject.rotate` method:

```{testcode}
from pypdf import PdfReader, PdfWriter

reader = PdfReader("example.pdf")
writer = PdfWriter()

writer.add_page(reader.pages[0])
writer.pages[0].rotate(90)

writer.write("out-page-rotation.pdf")
```

The rotate method is typically preferred over the `page.add_transformation(Transformation().rotate())`
method, because `rotate` will ensure that the page is still in the mediabox/cropbox.
The transformation object operates on the coordinates of the page
contents and does not change the mediabox or cropbox.


## Plain Merge

![](plain-merge.png)

is the result of

```{testcode}
from pypdf import PdfReader, PdfWriter, Transformation

# Get the data
reader_base = PdfReader("labeled-edges-center-image.pdf")
page_base = reader_base.pages[0]

reader = PdfReader("box.pdf")
page_box = reader.pages[0]

# Write the result back
writer = PdfWriter()
page = writer.add_page(page_base)
page.merge_page(page_box)
writer.write("out-plain-merge.pdf")
```

## Merge with Rotation

![](merge-45-deg-rot.png)

```{testcode}
from pypdf import PdfReader, PdfWriter, Transformation

# Get the data
reader_base = PdfReader("labeled-edges-center-image.pdf")
page_base = reader_base.pages[0]

reader = PdfReader("box.pdf")
page_box = reader.pages[0]

# Prepare writer
writer = PdfWriter()

# Add base page.
writer_page = writer.add_page(page_base)

# Apply the transformation and merge the pages.
transformation = Transformation().rotate(45)
writer_page.merge_transformed_page(page_box, transformation)

# Write the result back
writer.write("out-merge-with-rotation.pdf")
```

If you add the `expand` parameter:

```{testcode}
transformation = Transformation().rotate(45)
writer_page.merge_transformed_page(page_box, transformation, expand=True)
```

you get:

![](merge-rotate-expand.png)

Alternatively, you can move the merged image a bit to the right by using

```{testcode}
op = Transformation().rotate(45).translate(tx=50)
```

![](merge-translated.png)


## Scaling

In pypdf, the content and the page can either be scaled together or separately.
Content scaling scales the contents on a page, and page scaling scales just the page size (the canvas).
Typically, you want to combine both.

![](scaling.png)

### Scaling both the Page and contents together

```{testcode}
from pypdf import PdfReader, PdfWriter

# Read the input
reader = PdfReader("side-by-side-subfig.pdf")
page = reader.pages[0]

# Add to the writer
writer = PdfWriter()
writer_page = writer.add_page(page)

# Scale
writer_page.scale_by(0.5)

# Write the result to a file
writer.write("out-scale-all.pdf")
```

### Scaling the content only

The content is scaled around the origin of the coordinate system.
Typically, that is the lower-left corner.

```{testcode}
from pypdf import PdfReader, PdfWriter, Transformation

# Read the input
reader = PdfReader("side-by-side-subfig.pdf")
page = reader.pages[0]

# Prepare the writer
writer = PdfWriter()
writer_page = writer.add_page(page)

# Scale
op = Transformation().scale(sx=0.7, sy=0.7)
writer_page.add_transformation(op)

# Write the result to a file
writer.write("out-scale-content.pdf")
```

### Scaling the page only

To scale the page by `sx` in the X direction and `sy` in the Y direction:

```{testcode}
page.mediabox = page.mediabox.scale(sx=0.7, sy=0.7)
```

If you wish to have more control, you can adjust the various page boxes directly:

```{testcode}
from pypdf.generic import RectangleObject

mb = page.mediabox

page.mediabox = RectangleObject((mb.left, mb.bottom, mb.right, mb.top))
page.cropbox = RectangleObject((mb.left, mb.bottom, mb.right, mb.top))
page.trimbox = RectangleObject((mb.left, mb.bottom, mb.right, mb.top))
page.bleedbox = RectangleObject((mb.left, mb.bottom, mb.right, mb.top))
page.artbox = RectangleObject((mb.left, mb.bottom, mb.right, mb.top))
```

### pypdf._page.MERGE_CROP_BOX

`pypdf<=3.4.0` used to merge the other page with `trimbox`.
`pypdf>3.4.0` changes this behavior to `cropbox`.

In case anybody has good reasons to use/expect `trimbox`, you can add the
following code to get the old behavior:

```{testcode}
import pypdf

pypdf._page.MERGE_CROP_BOX = "trimbox"
```

## Transforming several copies of the same page

We have designed the following business card (A8 format) to advertise our new startup.

![](nup-source.png)

We would like to copy this card sixteen times on an A4 page, to print it, cut it, and give it to all our friends. Having learned about the {func}`~pypdf._page.PageObject.merge_page` method and the {class}`~pypdf.Transformation` class, we run the following code. Notice that we had to tweak the media box of the source page to extend it, which is already a dirty hack (in this case).

```{testcode}
from pypdf import PaperSize, PdfReader, PdfWriter, Transformation

# Read source file
reader = PdfReader("nup-source.pdf")
sourcepage = reader.pages[0]

# Create a destination file, and add a blank page to it
writer = PdfWriter()
destpage = writer.add_blank_page(width=PaperSize.A4.height, height=PaperSize.A4.width)

# Copy source page to destination page, several times
for x in range(4):
    for y in range(4):
        # Translate page
        transformation = Transformation().translate(
            x * PaperSize.A8.height,
            y * PaperSize.A8.width,
        )
        # Merge translated page
        destpage.merge_transformed_page(sourcepage, transformation)

# Write file
writer.write("out-nup-dest1.pdf")
```

![](nup-dest2.png)

There is still some work to do, for instance, to insert margins between and around cards, but this is left as an exercise for the reader…

## Possible issues

Especially when combining {func}`~pypdf._page.PageObject.merge_page` with transformations, you might end up with a cropped PDF file.
In these cases, consider setting `expand=True` to re-calculate the corresponding media box.


================================================
FILE: docs/user/encryption-decryption.md
================================================
# Encryption and Decryption of PDFs

PDF encryption makes use of [`RC4`](https://en.wikipedia.org/wiki/RC4) and
[`AES`](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) algorithms
with different key length. `pypdf` supports all of them until `PDF-2.0`, which
is the latest PDF standard.

`pypdf` use an extra dependency to do encryption or decryption for `AES` algorithms.
We recommend [`pyca/cryptography`](https://cryptography.io/en/latest/). Alternatively,
you can use [`pycryptodome`](https://pypi.org/project/pycryptodome/).

```{note}
Please see the note in the [installation guide](installation.md)
for installing the extra dependencies if interacting with PDFs that use AES.
```

## Encrypt

You can encrypt a PDF by using a password:

```{testsetup}
pypdf_test_setup("user/encryption-decryption", {
    "example.pdf": "../resources/example.pdf",
    "encrypted-file.pdf": "../resources/encrypted-file.pdf",
})
```

```{testcode}
from pypdf import PdfReader, PdfWriter

reader = PdfReader("example.pdf")
writer = PdfWriter(clone_from=reader)

# Add a password to the new PDF
writer.encrypt("my-secret-password", algorithm="AES-256")

# Save the new PDF to a file
writer.write("out-encrypt.pdf")
```

The algorithm can be one of `RC4-40`, `RC4-128`, `AES-128`, `AES-256-R5`, `AES-256`.
We recommend using `AES-256-R5`.

```{warning}
pypdf uses `RC4` by default for compatibility if you omit the "algorithm" parameter.
Since `RC4` is insecure, you should use `AES` algorithms.
```

## Decrypt

You can decrypt a PDF using the appropriate password:

```{testcode}
from pypdf import PdfReader, PdfWriter

reader = PdfReader("encrypted-file.pdf")

if reader.is_encrypted:
    reader.decrypt("test")  # secret password

writer = PdfWriter(clone_from=reader)

# Save the new PDF to a file
writer.write("out-decrypt.pdf")
```


================================================
FILE: docs/user/extract-images.md
================================================
# Extract Images

```{note}
In order to use the following code you need to install optional
dependencies, see [installation guide](installation.md).
```

Every page of a PDF document can contain an arbitrary number of images.
The names of the files may not be unique.

```{testsetup}
pypdf_test_setup("user/extract-images", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

page = reader.pages[0]

for i, image_file_object in enumerate(page.images):
    file_name = "out-image-" + str(i) + "-" + image_file_object.name
    image_file_object.image.save(file_name)
```

## Other images

Some other objects can contain images, such as stamp annotations.

You can extract the image from the annotation with the following code:

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")
im = (
    reader.pages[0]["/Annots"][4]["/Parent"]
    .get_object()["/AP"]["/N"]["/Resources"]["/XObject"]["/Im4"]
    .decode_as_image()
)

im.save("out-annotation-image.png")
```

## Error handling

Iterating over `page.images` directly will raise an exception on the first issue.
If you expect some more or less broken PDF files, but still want to retrieve as many images as possible,
consider making this a multistep process:

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for page in reader.pages:
    for name in page.images.keys():
        try:
            # Try to retrieve actual image.
            image = page.images[name]
        except Exception as exception:
            # Handle exceptions.
            pass
```


================================================
FILE: docs/user/extract-text.md
================================================
# Extract Text from a PDF

You can extract text from a PDF:

```{testsetup}
pypdf_test_setup("user/extract-text", {
    "test Orient.pdf": "../resources/test Orient.pdf",
    "GeoBase_NHNC1_Data_Model_UML_EN.pdf": "../resources/GeoBase_NHNC1_Data_Model_UML_EN.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("test Orient.pdf")
page = reader.pages[0]
print(page.extract_text())

# extract only text oriented up
print(page.extract_text(0))

# extract text oriented up and turned left
print(page.extract_text((0, 90)))

# extract text in a fixed width format that closely adheres to the rendered
# layout in the source pdf
print(page.extract_text(extraction_mode="layout"))

# extract text preserving horizontal positioning without excess vertical
# whitespace (removes blank and "whitespace only" lines)
print(page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False))

# adjust horizontal spacing
print(page.extract_text(extraction_mode="layout", layout_mode_scale_weight=1.0))

# exclude (default) or include (as shown below) text rotated w.r.t. the page
print(page.extract_text(extraction_mode="layout", layout_mode_strip_rotated=False))
```

```{testoutput}
:options: +NORMALIZE_WHITESPACE
:hide:


(T) This is box text at top
written down from top
(B)  This is box text at bottom written up from bottom
(L) This is box text on left written vertically to starboard
(R) This is box text on righy written vertically to port


(T) This is box text at top
written down from top


(T) This is box text at top
written down from top
(L) This is box text on left written vertically to starboard

 (B)

This is box text at bottom
 from bottom upwritten


(T) This is box text at top
written down from top
 (B)
This is box text at bottom
 from bottom upwritten
(T) This is box text at top
written down from top
 (B)

This is box text at bottom
 from bottom upwritten


(T) This is box text at top
written down from top
 (B)

This is box text at bottom
 from bottom upwritten

(L) This is box textwritten vertically to starboard


 on righy


on left

 ) This is box text
written vertically to port (R


(T) This is box text at top
written down from top

```

Refer to {func}`~pypdf._page.PageObject.extract_text` for more details.

```{note}
Extracting the text of a page requires parsing its whole content stream. This can require quite a lot of memory -
we have seen 10 GB RAM being required for an uncompressed content stream of about 300 MB (which should not occur
very often).

To limit the size of the content streams to process (and avoid OOM errors in your application), consider
checking `len(page.get_contents().get_data())` beforehand.
```

```{note}
If a PDF page appears to contain only an image (e.g., a scanned document), the extracted text may be minimal or visually empty.
In such cases, consider using OCR software such as [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) to extract text from images.
```

## Using a visitor

You can use visitor functions to control which part of a page you want to process and extract. The visitor functions
you provide will get called for each operator or for each text fragment.

The function provided in argument visitor_text of function extract_text has five arguments:
* text: the current text (as long as possible, can be up to a full line)
* user_matrix: current matrix to move from user coordinate space (also known as CTM)
* tm_matrix: current matrix from text coordinate space
* font_dictionary: full font dictionary
* font_size: the size (in text coordinate space)

The matrix stores six parameters. The first four provide the rotation/scaling matrix, and the last two provide the translation (horizontal/vertical).
It is recommended to use the user_matrix as it takes into account all transformations.

Notes :

 - As indicated in §8.3.3 of the PDF 1.7 or PDF 2.0 specification, the user matrix applies to text space/image space/form space/pattern space.
 - If you want to get the full transformation from text to user space, you can use the {func}`~.pypdf.mult` function as follows:
`txt2user = mult(tm, cm)`.
The font size is the raw text size and affected by the `user_matrix`.


The `font_dictionary` may be `None` in case of unknown fonts.
If not `None`, it could contain something like the key `"/BaseFont"` with the value `"/Arial,Bold"`.

**Caveat**: In complicated documents, the calculated positions may be difficult to determine (if you move from multiple forms to page user space, for example).

The function provided in argument visitor_operand_before has four arguments:
operator, operand-arguments, current transformation matrix, and text matrix.

### Example 1: Ignore header and footer

The following example reads the text of page four of [this PDF document](https://github.com/py-pdf/pypdf/blob/main/resources/GeoBase_NHNC1_Data_Model_UML_EN.pdf), but ignores the header (y > 720) and footer (y < 50). In this file we also need to include new line characters (y == 0).

```{testcode}
from pypdf import PdfReader

reader = PdfReader("GeoBase_NHNC1_Data_Model_UML_EN.pdf")
page = reader.pages[3]

parts = []


def visitor_body(text, cm, tm, font_dict, font_size):
    y = tm[5]
    if 50 < y < 720 or y == 0:
        parts.append(text)


page.extract_text(visitor_text=visitor_body)
text_body = "".join(parts)

print(text_body)
```

```{testoutput}
:options: +NORMALIZE_WHITESPACE
:hide:

TABLE OF CONTENTS

1 OVERVIEW ............................................................................................................................................ 6
2 LRS ........................................................................................................................................................ 6
2.1 LRS MODEL ...................................................................................................................................... 7
3 MODEL .................................................................................................................................................. 8
3.1 LRS MODEL ...................................................................................................................................... 9
3.1.1 Logical view ............................................................................................................................... 9
3.1.2 Hydro network.......................................................................................................................... 10
3.1.3 Hydro events............................................................................................................................ 11
3.1.4 Hydrographic ........................................................................................................................... 14
3.1.5 Toponymy (external package) ................................................................................................. 18
3.1.6 Metadata .................................................................................................................................. 19
```

### Example 2: Extract rectangles and texts into an SVG file

The following example converts page three of [this PDF document](https://github.com/py-pdf/pypdf/blob/main/resources/GeoBase_NHNC1_Data_Model_UML_EN.pdf) into
an [SVG file](https://en.wikipedia.org/wiki/Scalable_Vector_Graphics).

Such an SVG export may help to understand what is going on in a page.

% We prefer not to execute doc examples for unmaintained third-party package "svgwrite"
```{testcode}
:skipif: True

from pypdf import PdfReader
import svgwrite

reader = PdfReader("GeoBase_NHNC1_Data_Model_UML_EN.pdf")
page = reader.pages[2]

dwg = svgwrite.Drawing("GeoBase_test.svg", profile="tiny")


def visitor_svg_rect(op, args, cm, tm):
    if op == b"re":
        (x, y, w, h) = (args[i].as_numeric() for i in range(4))
        dwg.add(dwg.rect((x, y), (w, h), stroke="red", fill_opacity=0.05))


def visitor_svg_text(text, cm, tm, font_dict, font_size):
    (x, y) = (cm[4], cm[5])
    dwg.add(dwg.text(text, insert=(x, y), fill="blue"))


page.extract_text(
    visitor_operand_before=visitor_svg_rect, visitor_text=visitor_svg_text
)
dwg.save()
```

The SVG generated here is bottom-up because the coordinate systems of PDF and SVG differ.

Unfortunately, in complicated PDF documents the coordinates given to the visitor functions may be wrong.

## Why Text Extraction is hard

### Unclear Objective

Extracting text from a PDF can be tricky. In several cases, there is no
clear answer to what the expected result should look like:

1. **Paragraphs**: Should the text of a paragraph have line breaks at the same places
   where the original PDF had them or should it rather be one block of text?
2. **Page numbers**: Should they be included in the extract?
3. **Headers and Footers**: Similar to page numbers - should they be extracted?
4. **Outlines**: Should outlines be extracted at all?
5. **Formatting**: If the text is **bold** or *italic*, should it be included in the
   output?
6. **Tables**: Should the text extraction skip tables? Should it extract just the
   text? Should the borders be shown in some Markdown-like way or should the
   structure be present e.g. as an HTML table? How would you deal with merged
   cells?
7. **Captions**: Should image and table captions be included?
8. **Ligatures**: The Unicode symbol [U+FB00](https://www.compart.com/de/unicode/U+FB00)
   is a single symbol ﬀ for two lowercase letters 'f'. Should that be parsed as
   the Unicode symbol 'ﬀ' or as two ASCII symbols 'ff'?
9. **SVG images**: Should the text parts be extracted?
10. **Mathematical Formulas**: Should they be extracted? Formulas have indices
    and nested fractions.
11. **Whitespace characters**: How many new lines should be extracted for 3 cm of
    vertical whitespace? How many spaces should be extracted if there is 3 cm of
    horizontal whitespace? When would you extract tabs and when spaces?
12. **Footnotes**: When the text of multiple pages is extracted, where should footnotes be shown?
13. **Hyperlinks and Metadata**: Should it be extracted at all? Where should it
    be placed in which format?
14. **Linearization**: Assume you have a floating figure in between a paragraph.
    Do you first finish the paragraph, or do you put the figure text in between?

Then there are issues where most people would agree on the correct output, but
the way PDF stores information just makes it hard to achieve that:

1. **Tables**: Typically, tables are just absolutely positioned text. In the worst
   case, every single letter could be absolutely positioned. That makes it hard
   to tell where columns / rows are.
2. **Images**: Sometimes PDFs do not contain the text as it is displayed, but
    instead an image. You notice that when you cannot copy the text. Then there
    are PDF files that contain an image and a text layer in the background.
    That typically happens when a document was scanned. Although the scanning
    software (OCR) is pretty good today, it still fails once in a while. pypdf
    is no OCR software; it will not be able to detect those failures. pypdf
    will also never be able to extract text from images.

Finally, there are issues that pypdf will deal with. If you find such a
text extraction bug, please share the PDF with us so we can work on it!

### Missing Semantic Layer

The PDF file format is all about producing the desired visual result for
printing. It was not created for parsing the content. PDF files don't contain a
semantic layer.

Specifically, there is no information what the header, footer, page numbers,
tables, and paragraphs are. The visual appearance is there, and people might
find heuristics to make educated guesses, but there is no way of being certain.

This is a shortcoming of the PDF file format, not of pypdf.

It is possible to apply machine learning on PDF documents to make good
heuristics, but that will not be part of pypdf. However, pypdf could be used to
feed such a machine learning system with the relevant information.

### Whitespaces

The PDF format is meant for printing. It is not designed to be read by machines.
The text within a PDF document is absolutely positioned, meaning that every single
character could be positioned on the page.

The text

> This is a test document by Ethan Nelson.

can be represented as

> [(This is a )9(te)-3(st)9( do)-4(cu)13(m)-4(en)12(t )-3(b)3(y)-3( )9(Et)-2(h)3(an)4( Nels)13(o)-5(n)3(.)] TJ

Where the numbers are adjustments of vertical space. This representation used
within the PDF file makes it very hard to guarantee correct whitespaces.


More information:

* [issue #1507](https://github.com/py-pdf/pypdf/issues/1507)
* [Negative numbers in PDF content stream text object](https://stackoverflow.com/a/28203655/562769)
* Mark Stephens: [Understanding PDF text objects](https://blog.idrsolutions.com/understanding-pdf-text-objects/), 2010.

## OCR vs. Text Extraction

Optical Character Recognition (OCR) is the process of extracting text from
images. Software which does this is called *OCR software*. The
[tesseract OCR engine](https://github.com/tesseract-ocr/tesseract) is the
most commonly known open source OCR software.

pypdf is **not** OCR software.

### Digitally-born vs. Scanned PDF files

PDF documents can contain images and text. PDF files don't store text in a
semantically meaningful way, but in a way that makes it easy to show the
text on screen or print it. For this reason, text extraction from PDFs is hard.

If you scan a document, the resulting PDF typically shows the image of the scan.
Scanners then also run OCR software and put the recognized text in the background
of the image. pypdf can extract this result of the scanners OCR software. However,
in such cases, it's recommended to directly use OCR software as
errors can accumulate: The OCR software is not perfect in recognizing the text.
Then it stores the text in a format that is not meant for text extraction and
pypdf might make mistakes parsing that.

Hence, I would distinguish three types of PDF documents:

* **Digitally born PDF files**: The file was created digitally on the computer.
  It can contain images, texts, links, outline items (a.k.a., bookmarks), JavaScript, ...
  If you Zoom in a lot, the text still looks sharp.
* **Scanned PDF files**: Any number of pages was scanned. The images were then
  stored in a PDF file. Hence, the file is just a container for those images.
  You cannot copy the text, you don't have links, outline items, JavaScript.
* **OCRed PDF files**: The scanner ran OCR software and put the recognized text
  in the background of the image. Hence, you can copy the text, but it still looks
  like a scan. If you zoom in enough, you can recognize pixels.

### Can we just always use OCR?

You might now wonder if it makes sense to just always use OCR software. If the
PDF file is digitally-born, you can render it to an image.

I would recommend not to do that.

Text extraction software like pypdf can use more information from the
PDF than just the image. It can know about fonts, encodings, typical character
distances and similar topics.

That means pypdf has a clear advantage when it
comes to characters which are easy to confuse such as `oO0ö`.
**pypdf will never confuse characters**. It just reads what is in the file.

pypdf also has an edge when it comes to characters which are rare, e.g.
🤰. OCR software will not be able to recognize smileys correctly.

## Attempts to prevent text extraction

If people who share PDF documents want to prevent text extraction, they have
multiple ways to do so:

1. Store the contents of the PDF as an image
2. [Use a scrambled font](https://stackoverflow.com/a/43466923/562769)

However, text extraction cannot be completely prevented if people should still
be able to read the document. In the worst case, people can make a screenshot,
print it, scan it, and run OCR over it.


================================================
FILE: docs/user/file-size.md
================================================
# Reduce PDF File Size

There are multiple ways to reduce the size of a given PDF file. The easiest
one is to remove content (e.g., images) or pages.

## Removing duplication

Some PDF documents contain the same object multiple times. For example, if an
image appears three times in a PDF, it could be embedded three times. Or it can
be embedded once and referenced twice.

When adding data to a PdfWriter, the data is copied while respecting the original format.
For example, if two pages include the same image which is duplicated in the source document, the object will be duplicated in the PdfWriter object.

Additionally, when you delete objects in a document, pypdf cannot easily identify whether the objects are used elsewhere or not or if the user wants to keep them in. When writing the PDF file, these objects will be hidden within (part of the file, but not displayed).

To reduce the file size, use a compression call: `writer.compress_identical_objects(remove_identicals=True, remove_orphans=True)`

* `remove_identicals` enables/disables compression merging identical objects.
* `remove_orphans` enables/disables suppression of unused objects.

It is recommended to apply this process just before writing to the file/stream.

It depends on the PDF how well this works, but we have seen an 86% file
reduction (from 5.7 MB to 0.8 MB) within a real PDF.


## Removing Images

```{testsetup}
pypdf_test_setup("user/file-size", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")

writer.remove_images()

writer.write("out-no-images.pdf")
```

## Reducing Image Quality

If we reduce the quality of the images within the PDF, we can **sometimes**
reduce the file size of the PDF overall. That depends on how well the reduced
quality image can be compressed.

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")

for page in writer.pages:
    for img in page.images:
        img.replace(img.image, quality=80)

writer.write("out-low-quality.pdf")
```

## Lossless Compression

pypdf supports the FlateDecode filter which uses the zlib/deflate compression
method. It is a lossless compression, meaning the resulting PDF looks exactly
the same.

Deflate compression can be applied to a page via
{meth}`page.compress_content_streams <pypdf._page.PageObject.compress_content_streams>`:

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")

for page in writer.pages:
    page.compress_content_streams()  # This is CPU intensive!

writer.write("out-lossless.pdf")
```

`page.compress_content_streams` uses [`zlib.compress`](https://docs.python.org/3/library/zlib.html#zlib.compress)
and supports the `level` parameter: `level=0` means no compression,
`level=9` refers to the highest compression.

Using this method, we have seen a reduction by 70% (from 11.8 MB to 3.5 MB)
with a real PDF.

## Removing Sources

When a page is removed from the page list, its content will still be present in
the PDF file. This means that the data may still be used elsewhere.

Simply removing a page from the page list will reduce the page count but not the
file size. To exclude the content completely, the pages should not be
added to the PDF using the PdfWriter.append() function. Instead, only the
desired pages should be selected for inclusion
(note: [PR #1843](https://github.com/py-pdf/pypdf/pull/1843) will add a page
deletion feature).

There can be issues with poor PDF formatting, such as when all pages are linked
to the same resource. In such cases, dropping references to specific pages
becomes useless because there is only one source for all pages.

Cropping is an ineffective method for reducing the file size because it only
adjusts the viewboxes and not the external parts of the source image. Therefore,
the content that is no longer visible will still be present in the PDF.

## Going Further

The presentation [Putting a Squeeze on Your PDF](https://youtube.com/watch?v=tgOABUhVwFs) has other suggestions. One takeaway is that most of the significant size optimizations usually come from image and font modification. However, font optimization, such as replacing, merging, and subsetting, is not within the functionality of pypdf at the moment.


================================================
FILE: docs/user/forms.md
================================================
# Interactions with PDF Forms

## Reading form fields

```{testsetup}
pypdf_test_setup("user/forms", {
    "form.pdf": "../resources/form.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("form.pdf")
fields = reader.get_form_text_fields()
fields == {"key": "value", "key2": "value2"}

# You can also get all fields:
fields = reader.get_fields()
```

## Filling out forms

```{testcode}
from pypdf import PdfReader, PdfWriter

reader = PdfReader("form.pdf")
writer = PdfWriter()

page = reader.pages[0]
fields = reader.get_fields()

writer.append(reader)

writer.update_page_form_field_values(
    writer.pages[0],
    {"fieldname": "some filled in text"},
    auto_regenerate=False,
)

writer.write("out-filled-form.pdf")
```

Generally speaking, you will always want to use `auto_regenerate=False`. The
parameter is `True` by default for legacy compatibility, but this flags the PDF
processor to recompute the field's rendering, and may trigger a "save changes"
dialog for users who open the generated PDF.

If you want to flatten your form, that is, keeping all form field contents while
removing the form fields themselves, you can set the `flatten` parameter in
{func}`~pypdf.PdfWriter.update_page_form_field_values` to `True`. This
will convert form field  contents to regular PDF content. Afterwards, use
{func}`~pypdf.PdfWriter.remove_annotations` with `subtypes="/Widget"`
to remove all form fields to get an actual flattened PDF.

## Some notes about form fields and annotations

PDF forms have a dual-nature approach to the fields:

* Within the root object, an `/AcroForm` structure exists.
  Inside it, you could find (optional):

  - some global elements (Fonts, Resources,...)
  - some global flags (like `/NeedAppearances` (set/cleared with `auto_regenerate` parameter in `update_page_form_field_values()`) that indicates if the reading program should re-render the visual fields upon document launch)
  - `/XFA` that houses a form in XDP format (very specific XML that describes the form rendered by some viewers); the `/XFA` form overrides the page content
  - `/Fields` that houses an array of indirect references that reference the upper _Field_ Objects (roots)

* Within the page `/Annots`, you will spot `/Widget` annotations that define the visual rendering.

To flesh out this overview:

* The core-specific properties of a field are:
  - `/FT`: The field type (Button, Text, Choice, or Signature).
  - `/T`:  The partial field name.
  - `/V`:  The field’s value, whose format varies depending on the field type.
  - `/DV`: The default value to which the field reverts when a reset-form action is executed.
* To streamline readability, _Field_ Objects and _Widget_ Objects can be fused housing all properties.
* Fields can be organized hierarchically, id est one field can be placed under another. In such instances, the `/Parent` will have an IndirectObject providing Bottom-Up links and `/Kids` is an array carrying IndirectObjects for Top-Down navigation; _Widget_ Objects are still required for visual rendering. To call upon them, use the *fully qualified field name* (where all the individual names of the parent objects are separated by `.`)

  For instance, take two (visual) fields both called _city_, but attached below _sender_ and _receiver_; the corresponding full names will be _sender.city_ and _receiver.city_.
* When a field is repeated on multiple pages, the Field Object will have many _Widget_ Objects in  `/Kids`. These objects are pure _widgets_, containing no _field_ specific data.
* If Fields stores only hidden values, no _Widgets_ are required.

In _pypdf_ fields are extracted from the `/Fields` array:

```{testcode}
from pypdf import PdfReader

reader = PdfReader("form.pdf")
fields = reader.get_fields()
```

```{testcode}
from pypdf import PdfReader
from pypdf.constants import AnnotationDictionaryAttributes

reader = PdfReader("form.pdf")
fields = []
for page in reader.pages:
    for annot in page.annotations:
        annot = annot.get_object()
        if annot[AnnotationDictionaryAttributes.Subtype] == "/Widget":
            fields.append(annot)
```

However, while similar, there are some relevant differences between the two above blocks of code. Most importantly, the first block will return a list of Field objects, whereas the second will return more generic dictionary-like objects. The objects lists will *mostly* reference the same object in the underlying PDF, meaning you'll find that `obj_taken_fom_first_list.indirect_reference == obj_taken_from _second_list.indirect_reference`. Field objects are generally more ergonomic, as the exposed data can be accessed via clearly named properties. However, the more generic dictionary-like objects will contain data that the Field object does not expose, such as the Rect (the widget's position on the page). Therefore, the correct approach depends on your use case.

However, it is also important to note that the two lists do not *always* refer to the same underlying PDF object. For example, if the form contains radio buttons, you will find that `reader.get_fields()` will get the parent object (the group of radio buttons) whereas `page.annotations` will return all the child objects (the individual radio buttons).

```{note}
Remember that fields are not stored in pages; if you use `add_page()` the field structure is not copied. It is recommended to use `.append()` with the proper parameters instead.
```

In case of missing _field_ objects in `/Fields`, `writer.reattach_fields()` will parse page(s) annotations and will reattach them. This fix cannot guess intermediate fields and will not report fields using the same _name_.

## Identify pages where fields are used

To ease locating page fields you can use `get_pages_showing_field` of PdfReader or PdfWriter. This method accepts a field object, a *PdfObject* that represents a field (as extracted from `_root_object["/AcroForm"]["/Fields"]`). The method returns a list of pages, because a field can have multiple widgets as mentioned previously (e.g., radio buttons or text displayed on multiple pages).

The page numbers can then be retrieved as usual by using `page.page_number`.


================================================
FILE: docs/user/handle-attachments.md
================================================
# Handle Attachments

PDF documents can contain attachments, from time to time named embedded file as well.

## Retrieve Attachments

Attachments have a name, but it might not be unique. For this reason, the value of `reader.attachments["attachment_name"]`
is a list.

You can extract all attachments like this:

```{testsetup}
pypdf_test_setup("user/handle-attachments", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for name, content_list in reader.attachments.items():
    for i, content in enumerate(content_list):
        with open(f"out-attachment-{i}-{name}", "wb") as fp:
            fp.write(content)
```

Alternatively, you can retrieve them in an object-oriented fashion if you need
further details for these files:

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for attachment in reader.attachment_list:
    print(attachment.name, attachment.alternative_name, attachment.content)
```

## Add Attachments

To add a new attachment, use the following code:

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")
writer.add_attachment(filename="test.txt", data=b"Hello World!")
```

As you can see, the basic attachment properties are its name and content. If you
want to modify further properties of it, the returned object provides corresponding
setters:

```{testcode}
import datetime
import hashlib

from pypdf import PdfWriter
from pypdf.generic import create_string_object, ByteStringObject, NameObject, NumberObject


writer = PdfWriter(clone_from="example.pdf")
embedded_file = writer.add_attachment(filename="test.txt", data=b"Hello World!")

embedded_file.size = NumberObject(len(b"Hello World!"))
embedded_file.alternative_name = create_string_object("test1.txt")
embedded_file.description = create_string_object("My test file")
embedded_file.subtype = NameObject("/text/plain")
embedded_file.checksum = ByteStringObject(hashlib.md5(b"Hello World!").digest())
embedded_file.modification_date = datetime.datetime.now(tz=datetime.timezone.utc)
# embedded_file.content = "My new content."

writer.write("out-add-attachment.pdf")
```

The same functionality is available if you iterate over the attachments of a writer
using `writer.attachment_list`.

## Delete Attachments

To delete an existing attachment, use the following code:

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")
attachment = writer.add_attachment(filename="test.txt", data=b"Hello World!")
attachment.delete()
assert list(writer.attachment_list) == []
```

Please note that this will not delete the associated file relationship
if it exists. Deleting them as well would require us to know where this has
been defined, which requires more complexity. For now, please consider looking
for the corresponding definition yourself and delete it from the array.

## PDF/A compliance

The following example shows how to add an attachment to a PDF/A-3B compliant document
without breaking compliance:

```{testcode}
from pypdf import PdfWriter
from pypdf.constants import AFRelationship
from pypdf.generic import create_string_object, ArrayObject, NameObject

writer = PdfWriter(clone_from="example.pdf")
attachment = writer.add_attachment(filename="test.txt", data="Hello World!")
attachment.subtype = NameObject("/text/plain")
attachment.associated_file_relationship = NameObject(AFRelationship.SUPPLEMENT)
attachment.alternative_name = create_string_object(attachment.name)

if "/AF" in writer.root_object:
    af = writer.root_object["/AF"].get_object()
else:
    af = ArrayObject()
    writer.root_object[NameObject("/AF")] = af
af.append(attachment.pdf_object.indirect_reference)

writer.write("out-a3b.pdf")
```

This example marks a relationship of the attachment to the whole document.
Alternatively, it can be added to most of the other PDF objects as well.
For details, see the corresponding PDF specification, like section 14.13
of the PDF 2.0 specification.


================================================
FILE: docs/user/handling-outlines.md
================================================
# Handling Outlines

PDF outlines - also known as bookmarks - provide a structured navigation panel in PDF readers. `pypdf` allows you to read, create, and modify both simple and deeply nested outlines.

## Writing PDF Outlines

To add outlines, use the {meth}`~pypdf.PdfWriter.add_outline_item` method. This method returns a reference to the created outline, which you can use as a parent to create nested (hierarchical) bookmarks.

### Adding a Simple Outline

The following example shows how to add a single top-level bookmark. We add an outline item pointing to the first page (index `0`) and save the result.


```{testsetup}
pypdf_test_setup("user/handling-outlines", {
    "crazyones.pdf":"../resources/crazyones.pdf",
})
```

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="crazyones.pdf")

# Add a top-level bookmark
writer.add_outline_item(
    title="Introduction",
    page_number=0
)

writer.write("simple-example.pdf")
```


### Adding Nested Outlines

You can build hierarchies (like Chapter → Section) by passing the parent outline item to the `parent` parameter of a new item.

In the example below, we create a root item "Introduction" and nest two sections under it.

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="crazyones.pdf")

# Add parent (Chapter)
introduction = writer.add_outline_item(
    title="Chapter 1",
    page_number=0
)

# Add children (sections) nested under the introduction
writer.add_outline_item(
    title="Section 1.1",
    page_number=0,
    parent=introduction
)

writer.add_outline_item(
    title="Section 1.2",
    page_number=0,
    parent=introduction
)

writer.write("nested-example.pdf")
```


### Advanced Styling and View Modes (Fit Options)

You can customize the appearance and behavior of bookmarks using optional parameters, such as changing the text color or applying bold and italic styles.

For detailed information on all available parameters and their formats, please refer to the {meth}`~pypdf.PdfWriter.add_outline_item` API documentation.

The ``fit`` parameter determines how the page is displayed when the user clicks the bookmark. You can use the {class}`~pypdf.generic.Fit` helper to specify modes like {meth}`~pypdf.generic.Fit.fit`, {meth}`~pypdf.generic.Fit.fit_horizontally`, or {meth}`~pypdf.generic.Fit.xyz`.


```{testcode}
from pypdf import PdfWriter
from pypdf.generic import Fit

writer = PdfWriter(clone_from="crazyones.pdf")

# Top-level chapter (Points to Page 3, Index 2)
chapter2 = writer.add_outline_item(
    title="Chapter 2",
    page_number=0,
    color=(0, 0, 1),
    bold=True,
    italic=False,
    is_open=True,
    fit=Fit.fit()
)

# Section under Chapter 2 (Points to Page 3, Index 2)
section2_1 = writer.add_outline_item(
    title="Section 2.1",
    page_number=0,
    parent=chapter2,
    color=(0, 0.5, 0),
    bold=False,
    italic=True,
    is_open=False,
    fit=Fit.fit_horizontally(top=800)
)

# Section with custom zoom (Points to Page 3, Index 2)
section2_2 = writer.add_outline_item(
    title="Section 2.2",
    page_number=0,
    parent=chapter2,
    color=(1, 0, 0),
    bold=True,
    italic=True,
    is_open=True,
    fit=Fit.xyz(left=0, top=800, zoom=1.25)
)

writer.write("advanced-example.pdf")
```

```{figure} complete-outlines.png
:alt: An annotated screenshot illustrating simple, nested, and advanced PDF bookmarks.

An annotated screenshot illustrating simple, nested, and advanced PDF bookmarks in a Table of Contents.
```

## Reading PDF Outlines

`pypdf` represents outlines as a list of {class}`~pypdf.generic.Destination` objects. If an outline has children, they appear as a nested list directly following their parent.

To retrieve the page number a bookmark points to, use the {meth}`~pypdf.PdfReader.get_destination_page_number` method, which returns a zero-based page index.

### Reading Simple Outlines

To extract only the top-level bookmarks (ignoring nested sections), you can iterate over the {attr}`~pypdf.PdfReader.outline` property. Since nested children appear as lists within the outline structure, you must explicitly check for and skip them (`isinstance(outline, list)`) to avoid errors. The example below reads the file created in the previous section.

```{testcode}
from pypdf import PdfReader

reader = PdfReader("simple-example.pdf")

print("Simple Outline (Top-Level Only):")
print("-" * 32)

for outline in reader.outline:
    # Check if the item is a list (which represents nested children)
    if isinstance(outline, list):
        continue  # Skip the nested parts completely

    page_number = reader.get_destination_page_number(outline)

    if page_number is None:
        print(f"{outline.title} -> No page destination")
    else:
        print(f"{outline.title} -> page {page_number + 1}")
```

```{testoutput}
Simple Outline (Top-Level Only):
--------------------------------
Introduction -> page 1
```

### Reading Nested Outlines

When dealing with hierarchical bookmarks, the {attr}`~pypdf.PdfReader.outline` property may contain lists inside lists. You should use a recursive function to traverse the tree.

The following example defines a `print_outline` function that handles indentation and nested lists to display the structure of the document we created earlier.

```{testcode}
from typing import List, Union

from pypdf import PdfReader
from pypdf.generic import Destination


def print_outline(
    outlines: List[Union[Destination, List[Destination]]],
    reader: PdfReader,
    level: int = 0
) -> None:
    """Recursively print all outline items with indentation."""
    for item in outlines:
        if isinstance(item, list):
            # Recursively handle the nested list of children
            print_outline(item, reader, level + 1)
        else:
            page_number = reader.get_destination_page_number(item)

            indent = "  " * level

            if page_number is None:
                print(f"{indent}- {item.title} (No page destination)")
            else:
                print(f"{indent}- {item.title} (Page {page_number + 1})")


reader = PdfReader("nested-example.pdf")

print("Nested Outline Hierarchy:")
print("-" * 25)

print_outline(reader.outline, reader)
```

```{testoutput}
Nested Outline Hierarchy:
-------------------------
- Chapter 1 (Page 1)
  - Section 1.1 (Page 1)
  - Section 1.2 (Page 1)
```


================================================
FILE: docs/user/installation.md
================================================
# Installation

There are several ways to install pypdf. The most common option is to use pip.

## pip

pypdf requires Python 3.9+ to run.

Typically, Python comes with `pip`, a package installer. Using it, you can
install pypdf:

```bash
pip install pypdf
```

If you are not a superuser (a system administrator / root), you can also just
install pypdf for your current user:

```bash
pip install --user pypdf
```

### Optional dependencies

pypdf tries to be as self-contained as possible, but for some tasks, the amount
of work to properly maintain the code would be too high. This is especially the
case for cryptography and image formats.

If you simply want to install all optional dependencies, run:

```
pip install pypdf[full]
```

Alternatively, you can install just some:

If you plan to use pypdf for encrypting or decrypting PDFs that use AES, you
will need to install some extra dependencies. Encryption using RC4 is supported
using the regular installation.

```
pip install pypdf[crypto]
```

If you plan to use image extraction, you need Pillow:

```
pip install pypdf[image]
```

For JBIG2 support, you need to install a global OS-level package as well:
[`jbig2dec`](https://github.com/ArtifexSoftware/jbig2dec) The installation procedure
depends on our operating system. For Ubuntu, use the following, for example:

```
sudo apt-get install jbig2dec
```

## Python Version Support

Since pypdf 4.0, every release, including point releases, should work with all
supported versions of [Python](https://devguide.python.org/versions/). Thus,
every point release is designed to work with all existing Python versions,
excluding end-of-life versions.

Previous versions of pypdf support the following versions of Python:

| Python                 | 3.11 | 3.10 | 3.9 | 3.8 | 3.7 | 3.6 | 2.7 |
|------------------------|:----:|:----:|:---:|:---:|:---:|:---:|:---:|
| pypdf 3.x              |  ✅   |  ✅   |  ✅  |  ✅  |  ✅  |  ✅  |  ❌  |
| PyPDF2 >= 2.0          |  ✅   |  ✅   |  ✅  |  ✅  |  ✅  |  ✅  |  ❌  |
| PyPDF2 1.20.0 - 1.28.4 |  ❌   |  ✅   |  ✅  |  ✅  |  ✅  |  ✅  |  ✅  |
| PyPDF2 1.15.0 - 1.20.0 |  ❌   |  ❌   |  ❌  |  ❌  |  ❌  |  ❌  |  ✅  |


## Anaconda

Anaconda users can [install pypdf via conda-forge](https://anaconda.org/conda-forge/pypdf).


## Development Version

In case you want to use the current version under development:

```bash
pip install git+https://github.com/py-pdf/pypdf.git
```


================================================
FILE: docs/user/merging-pdfs.md
================================================
# Merging PDF files

## Basic Example

```{testsetup}
pypdf_test_setup("user/merging-pdfs", {
    "example.pdf": "../resources/example.pdf",
    "hello-world.pdf": "../resources/hello-world.pdf",
    "jpeg.pdf": "../resources/jpeg.pdf",
    "GeoBase_NHNC1_Data_Model_UML_EN.pdf": "../resources/GeoBase_NHNC1_Data_Model_UML_EN.pdf",
    "Seige_of_Vicksburg_Sample_OCR.pdf": "../resources/Seige_of_Vicksburg_Sample_OCR.pdf",
    "two-different-pages.pdf": "../resources/two-different-pages.pdf",
})
```

```{testcode}
from pypdf import PdfWriter

merger = PdfWriter()

for pdf in ["example.pdf", "hello-world.pdf", "jpeg.pdf"]:
    merger.append(pdf)

merger.write("out-basic.pdf")
```

For more details, see an excellent answer on
[StackOverflow](https://stackoverflow.com/questions/3444645/merge-pdf-files)
by Paul Rooney.

````{note}
Dealing with large PDF files might reach the recursion limit of the current
Python interpreter. In these cases, increasing the limit might help:

```{testcode}
import sys

# Example: Increase the current limit by factor 5.
sys.setrecursionlimit(sys.getrecursionlimit() * 5)
```
````

## Showing more merging options

```{testcode}
from pypdf import PdfWriter

merger = PdfWriter()

with (
    open("Seige_of_Vicksburg_Sample_OCR.pdf", "rb") as input1,
    open("two-different-pages.pdf", "rb") as input2,
    open("example.pdf", "rb") as input3
):
    # Add the first 3 pages of input1 document to output
    merger.append(fileobj=input1, pages=(0, 3))

    # Insert the first page of input2 into the output beginning after the second page
    merger.merge(position=2, fileobj=input2, pages=(0, 1))

    # Append entire input3 document to the end of the output document
    merger.append(input3)

    # Write to an output PDF document
    merger.write("out-advanced.pdf")
```

## append

`append` has been slightly extended in `PdfWriter`. See {func}`~pypdf.PdfWriter.append` for more details.

### Examples

```{testcode}
from pypdf import PdfWriter, PdfReader

writer = PdfWriter()

source_file_name = "GeoBase_NHNC1_Data_Model_UML_EN.pdf"

# Append the first 10 pages from pdf file
writer.append(source_file_name, (0, 10))

reader = PdfReader(source_file_name)

# Append the first and 10th page from reader and create an outline
writer.append(reader, "page 1 and 10", [0, 9])
```

During merging, the relevant named destination will also be imported.

If you want to insert pages in the middle of the destination, use `merge` (which provides an insertion position).
You can insert the same page multiple times, if necessary, even using a list-based syntax:

```{testcode}
# Insert pages 2 and 3, with page 1 before, between, and after
writer.append(reader, [0, 1, 0, 2, 0])
```

## add_page / insert_page

It is recommended to use `append` or `merge` instead.

## Merging forms

When merging forms, some form fields may have the same names, preventing access to some data.

A grouping field should be added before adding the source PDF to prevent that.
The original fields will be identified by adding the group name.

For example, after calling `reader.add_form_topname("form1")`, the field
previously named `field1` is now identified as `form1.field1` when calling
`reader.get_form_text_fields(True)` or `reader.get_fields()`.

After that, you can append the input PDF completely or partially using
`writer.append` or `writer.merge`. If you insert a set of pages, only those
fields will be listed.

## reset_translation

During cloning, if an object has been already cloned, it will not be cloned again, and a pointer
to this previously cloned object is returned instead. Because of that, if you add/merge a page that has
already been added, the same object will be added the second time. If you modify any of these two pages later,
both pages can be modified independently.

To reset, call  `writer.reset_translation(reader)`.

## Advanced cloning

To prevent side effects between pages/objects and all objects linked cloning is done during the merge.

This process will be automatically applied if you use `PdfWriter.append/merge/add_page/insert_page`.
If you want to clone an object before attaching it "manually", use the `clone` method of any *PdfObject*:

```{testcode}
from pypdf.generic import NameObject, NumberObject, StreamObject

stream_object = StreamObject()

cloned_object = stream_object.clone(writer)
```

If you try to clone an object already belonging to the writer, it will return the same object:

```{testcode}
assert cloned_object == stream_object.clone(writer)
```

The same holds true if you try to clone an object twice. It will return the previously cloned object:

```{testcode}
assert stream_object.clone(writer) == stream_object.clone(writer)
```

Please note that if you clone an object, you will clone all the objects below as well,
including the objects pointed by *IndirectObject*. Due to this, if you clone a page that
includes some articles (`"/B"`), not only the first article, but also all the chained articles
and the pages where those articles can be read will be copied.
This means that you may copy lots of objects which will be saved in the output PDF as well.

To prevent this, you can provide the list of fields in the dictionaries to be ignored:

```{testcode}
new_page = writer.add_page(reader.pages[0], excluded_keys=["/B"])
```

### Merging rotated pages

If you are working with rotated pages, you might want to call {func}`~pypdf._page.PageObject.transfer_rotation_to_content` on the page
before merging to avoid wrongly rotated results:

```{testcode}
background = PdfReader("jpeg.pdf").pages[0]

for page in writer.pages:
    if page.rotation != 0:
        page.transfer_rotation_to_content()
    page.merge_page(background, over=False)
```


================================================
FILE: docs/user/metadata.md
================================================
# Metadata

PDF files can have two types of metadata: "Regular" and XMP ones. They can both exist at the same time.

## Reading metadata

```{testsetup}
pypdf_test_setup("user/metadata", {
    "example.pdf": "../resources/example.pdf",
    "commented-xmp.pdf": "../resources/commented-xmp.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

meta = reader.metadata

# All the following could be None!
print(meta.title)
print(meta.author)
print(meta.subject)
print(meta.creator)
print(meta.producer)
print(meta.creation_date)
print(meta.modification_date)
```

% Two last rows masked to allow to change example.pdf
```{testoutput}
:hide:

PDF Example Document
None
None
None
Skia/PDF m103 Google Docs Renderer
...
...
```

## Writing metadata

```{testcode}
from datetime import datetime
from pypdf import PdfReader, PdfWriter

reader = PdfReader("example.pdf")
writer = PdfWriter()

# Add all pages to the writer
for page in reader.pages:
    writer.add_page(page)

# If you want to add the old metadata, include these two lines
if reader.metadata is not None:
    writer.add_metadata(reader.metadata)

# Format the current date and time for the metadata
utc_time = "-05'00'"  # UTC time optional
time = datetime.now().strftime(f"D\072%Y%m%d%H%M%S{utc_time}")

# Add the new metadata
writer.add_metadata(
    {
        "/Author": "Martin",
        "/Producer": "Libre Writer",
        "/Title": "Title",
        "/Subject": "Subject",
        "/Keywords": "Keywords",
        "/CreationDate": time,
        "/ModDate": time,
        "/Creator": "Creator",
        "/CustomField": "CustomField",
    }
)

# Save the new PDF to a file
writer.write("out-meta-create.pdf")
```

## Updating metadata

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="example.pdf")

# Change some values
writer.add_metadata(
    {
        "/Author": "Martin",
        "/Producer": "Libre Writer",
        "/Title": "Title",
    }
)

# Clear all data but keep the entry in PDF
writer.metadata = {}

# Replace all entries with new set of entries
writer.metadata = {
    "/Author": "Martin",
    "/Producer": "Libre Writer",
}

# Save the new PDF to a file
writer.write("out-meta-update.pdf")
```

## Removing metadata entry

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter("example.pdf")

# Remove Metadata (/Info entry)
writer.metadata = None

# Save the new PDF to a file
writer.write("out-meta-remove.pdf")
```

## Reading XMP metadata

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

meta = reader.xmp_metadata
if meta:
    print(meta.dc_title)
    print(meta.dc_description)
    print(meta.xmp_create_date)
```

```{testoutput}
:hide:

{'x-default': 'PDF Example Document'}
{}
2025-10-30 09:29:55
```

## Creating XMP metadata

You can create XMP metadata easily using the `XmpInformation.create()` method:

```{testcode}
from pypdf import PdfWriter
from pypdf.xmp import XmpInformation

# Create a new XMP metadata object
xmp = XmpInformation.create()

# Set metadata fields
xmp.dc_title = {"x-default": "My Document Title"}
xmp.dc_creator = ["Author One", "Author Two"]
xmp.dc_description = {"x-default": "Document description"}
xmp.dc_subject = ["keyword1", "keyword2", "keyword3"]
xmp.pdf_producer = "pypdf"

# Create a writer and add the metadata
writer = PdfWriter()
writer.add_blank_page(612, 792)  # Add a page
writer.xmp_metadata = xmp
writer.write("out-xmp-create.pdf")
```

## Setting XMP metadata fields

The `XmpInformation` class provides property-based access for all supported metadata fields:

### Dublin Core fields

```{testcode}
from datetime import datetime
from pypdf.xmp import XmpInformation

xmp = XmpInformation.create()

# Single value fields
xmp.dc_coverage = "Global coverage"
xmp.dc_format = "application/pdf"
xmp.dc_identifier = "unique-id-123"
xmp.dc_source = "Original Source"

# Array fields (bags - unordered)
xmp.dc_contributor = ["Contributor One", "Contributor Two"]
xmp.dc_language = ["en", "fr", "de"]
xmp.dc_publisher = ["Publisher One"]
xmp.dc_relation = ["Related Doc 1", "Related Doc 2"]
xmp.dc_subject = ["keyword1", "keyword2"]
xmp.dc_type = ["Document", "Text"]

# Sequence fields (ordered arrays)
xmp.dc_creator = ["Primary Author", "Secondary Author"]
xmp.dc_date = [datetime.now()]

# Language alternative fields
xmp.dc_title = {"x-default": "Title", "en": "English Title", "fr": "Titre français"}
xmp.dc_description = {"x-default": "Description", "en": "English Description"}
xmp.dc_rights = {"x-default": "All rights reserved"}
```

### XMP fields

```{testcode}
from datetime import datetime

# Date fields accept both datetime objects and strings
xmp.xmp_create_date = datetime.now()
xmp.xmp_modify_date = datetime.fromisoformat("2023-12-25T10:30:45Z")
xmp.xmp_metadata_date = datetime.now()

# Text field
xmp.xmp_creator_tool = "pypdf"
```

### PDF fields

```{testcode}
xmp.pdf_keywords = "keyword1, keyword2, keyword3"
xmp.pdf_pdfversion = "1.4"
xmp.pdf_producer = "pypdf"
```

### XMP Media Management fields

```{testcode}
xmp.xmpmm_document_id = "uuid:12345678-1234-1234-1234-123456789abc"
xmp.xmpmm_instance_id = "uuid:87654321-4321-4321-4321-cba987654321"
```

### PDF/A fields

```{testcode}
xmp.pdfaid_part = "1"
xmp.pdfaid_conformance = "B"
```

### Clearing metadata fields

You can clear any field by assigning `None`:

```{testcode}
xmp.dc_title = None
xmp.dc_creator = None
xmp.pdf_producer = None
```

### Incrementally updating XMP metadata fields

When modifying existing XMP metadata, it is often necessary to add or update individual entries while preserving existing values. The XMP properties return standard Python data structures that can be manipulated directly:

```{testcode}
from pypdf.xmp import XmpInformation

xmp = XmpInformation.create()

# Language alternative fields return dictionaries
title = xmp.dc_title or {}
title["en"] = "English Title"
title["fr"] = "Titre français"
xmp.dc_title = title

# Bag fields (unordered collections) return lists
subjects = xmp.dc_subject or []
subjects.append("new_keyword")
xmp.dc_subject = subjects

# Sequence fields (ordered collections) return lists
creators = xmp.dc_creator or []
creators.append("New Author")
xmp.dc_creator = creators
```

This approach provides direct control over the data structures while maintaining the property-based interface.

## Modifying XMP metadata

Modifying XMP metadata is a bit more complicated.

As an example, we want to add the following PDF/UA identifier section to the XMP metadata:

```xml
<rdf:Description rdf:about="" xmlns:pdfuaid="http://www.aiim.org/pdfua/ns/id/">
    <pdfuaid:part>1</pdfuaid:part>
</rdf:Description>
```

This could be written like this:

```{testcode}
from pypdf import PdfWriter

writer = PdfWriter(clone_from="commented-xmp.pdf")

metadata = writer.xmp_metadata
assert metadata  # Ensure that it is not `None`.
rdf_root = metadata.rdf_root
xmp_meta = rdf_root.parentNode
xmp_document = xmp_meta.parentNode

# Please note that without a text node, the corresponding elements might
# be omitted completely.
pdfuaid_description = xmp_document.createElement("rdf:Description")
pdfuaid_description.setAttribute("rdf:about", "")
pdfuaid_description.setAttribute("xmlns:pdfuaid", "http://www.aiim.org/pdfua/ns/id/")
pdfuaid_part = xmp_document.createElement("pdfuaid:part")
pdfuaid_part_text = xmp_document.createTextNode("1")
pdfuaid_part.appendChild(pdfuaid_part_text)
pdfuaid_description.appendChild(pdfuaid_part)
rdf_root.appendChild(pdfuaid_description)

metadata.stream.set_data(xmp_document.toxml().encode("utf-8"))

writer.write("out-xmp-update.pdf")
```

For further details on modifying the structure, please refer to {py:mod}`xml.dom.minidom`.


================================================
FILE: docs/user/pdf-version-support.md
================================================
# PDF Version Support

PDF comes in the following versions:

* 1993: 1.0
* 1994: 1.1
* 1996: 1.2
* 1999: 1.3
* 2001: 1.4
* 2003: 1.5
* 2004: 1.6
* 2008: 1.7, ISO 32000-1:2008
* 2017: 2.0, ISO 32000-2:2017

The general format didn't change, but new features got added. It can be that
pypdf can do the operations you want on PDF 2.0 files without fully supporting
all features of PDF 2.0.

## PDF Feature Support by pypdf

| Feature                        | PDF Version | pypdf Support |
|--------------------------------|:-----------:|:-------------:|
| CMaps                          |     1.4     |       ✅       |
| Transparent Graphics           |     1.4     |       ✅       |
| Content Stream Compression     |     1.5     |       ✅       |
| Cross-reference Streams        |     1.5     |       ✅       |
| Object Streams                 |     1.5     |       ✅       |
| Optional Content Groups (OCGs) |     1.5     |       ❓       |
| AES Encryption                 |     1.6     |       ✅       |

This table is not complete - if in doubt, consider having a look at the API documentation or
inside the issues or try with a corresponding PDF file. In general, we are open to
add support for missing features. Please open a new issue if it does not exist yet, and
keep in mind that we rely on external contributors to support us with the implementation.

One commonly requested feature is proper support reading/handling incremental PDF files, see
[issue #3304](https://github.com/py-pdf/pypdf/issues/3304).

See [History of PDF](https://en.wikipedia.org/wiki/History_of_PDF) for more
features.

Some PDF features are not supported by pypdf, but other libraries can be used
for them:

* [pyHanko](https://pyhanko.readthedocs.io/en/latest/index.html): Cryptographically sign a PDF ([#302](https://github.com/py-pdf/pypdf/issues/302))
* [camelot-py](https://pypi.org/project/camelot-py/): Table Extraction ([#231](https://github.com/py-pdf/pypdf/issues/231))


================================================
FILE: docs/user/pdfa-compliance.md
================================================
# PDF/A Compliance

PDF/A is a specialized, ISO-standardized version of the Portable Document Format
(PDF) specifically designed for the long-term preservation and archiving of
electronic documents. It ensures that files remain accessible, readable, and
true to their original appearance by embedding all necessary fonts, images, and
metadata within the document itself. By adhering to strict guidelines and
minimizing dependencies on external resources or proprietary software, PDF/A
ensures the consistent and reliable reproduction of content, safeguarding it
against future technological changes and obsolescence.

## PDF/A Versions

* **PDF/A-1**: Based on PDF 1.4, PDF/A-1 is the first version of the standard
  and is divided into two levels: PDF/A-1a (Level A, ensuring accessibility) and
  PDF/A-1b (Level B, ensuring visual preservation).
    * **Level B** (Basic): Ensures visual preservation and basic requirements for archiving.
    * **Level A** (Accessible): Everything from level B, but includes additional
      requirements for accessibility, such as tagging, Unicode character
      mapping, and logical structure.
* **PDF/A-2**: Based on PDF 1.7 (ISO 32000-1), PDF/A-2 adds features and
  improvements over PDF/A-1, while maintaining compatibility with PDF/A-1b
  (Level B) documents.
    * **Level B** (Basic): Like PDF/A-1b, but support for PDF 1.7 features such
      as transparency layers.
    * **Level U** (Unicode): Ensures Unicode mapping without the full
      accessibility requirements of PDF/A-1a (Level A).
    * **Level A** (Accessible): Similar to PDF/A-1a
* **PDF/A-3**: Based on PDF 1.7 (ISO 32000-1), PDF/A-3 is similar to PDF/A-2 but
  allows the embedding of non-PDF/A files as attachments, enabling the archiving
  of source or supplementary data alongside the PDF/A document. This is
  interesting for invoices which can add XML files.
* **PDF/A-4**: Based on PDF 2.0 (ISO 32000-2), PDF/A-4 introduces new features
  and improvements for better archiving and accessibility. The previous levels
  are replaced by PDF/A-4f (ensuring visual preservation and allowing attachments)
  and PDF/A-4e (Engineering, allows 3D content).

## PDF/A-1b

In contrast to other PDF documents, PDF/A-1b documents must fulfill those
requirements:

* **MarkInfo Object**: The MarkInfo object is a dictionary object within a PDF/A
  file that provides information about the logical structure and tagging of the
  document. The MarkInfo object indicates whether the document is tagged,
  contains optional content, or has a structure tree that describes the logical
  arrangement of content such as headings, paragraphs, lists, and tables. By
  including the MarkInfo object, PDF/A ensures that electronic documents are
  accessible to users with disabilities, such as those using screen readers or
  other assistive technologies.
* **Embedded fonts**: All fonts used in the document must be embedded to ensure
  consistent text rendering across different devices and systems.
* **Color Spaces**: DeviceRGB is a device-dependent color space that relies on
  the specific characteristics of the output device, which can lead to
  inconsistent color rendering across various devices. To achieve accurate and
  consistent color representation, PDF/A requires the use of device-independent
  color spaces, such as ICC-based color profiles.
* **XMP (Extensible Metadata Platform) metadata**: XMP metadata provides a
  standardized and extensible way to store essential information about a
  document and its properties. XMP metadata is an XML-based format embedded
  directly within a PDF/A file. It contains various types of information, such
  as document title, author, creation and modification dates, keywords, and
  copyright information, as well as PDF/A-specific details like conformance
  level and OutputIntent.

## Validation

[VeraPDF](https://docs.verapdf.org/install/) is the go-to PDF/A validator.

There are several online validators that allow you to simply upload the document:

* [pdfen.com](https://www.pdfen.com/pdf-a-validator)
* [avepdf.com](https://avepdf.com/pdfa-validation) : Gives an error report
* [pdfa.org](https://pdfa.org/pdfa-online-verification-service/)
* [visual-paradigm.com](https://online.visual-paradigm.com/de/online-pdf-editor/pdfa-validator/) - can convert the PDF to a PDF/A
* [pdf2go.com](https://www.pdf2go.com/validate-pdfa)
* [slub-dresden.de](https://www.slub-dresden.de/veroeffentlichen/dissertationen-habilitationen/elektronische-veroeffentlichung/slub-pdfa-validator) links to relevant parts in the specification.

## pypdf and PDF/A

At the moment, pypdf does not make any guarantees regarding PDF/A.
[Support is very welcome](https://github.com/py-pdf/pypdf/labels/is-pdf%2Fa-compliance).


================================================
FILE: docs/user/post-processing-in-text-extraction.md
================================================
# Post-Processing of Text Extraction

Post-processing can recognizably improve the results of text extraction. It is,
however, outside the scope of pypdf itself. Hence, the library will not give
any direct support for it. It is a natural language processing (NLP) task.

This page lists a few examples of what can be done as well as a community recipe
that can be used as a general purpose post-processing step. If you know more
about the specific domain of your documents, e.g., the language, it is likely
that you can find custom solutions that work better in your context.

## Ligature Replacement

```{testcode}
def replace_ligatures(text: str) -> str:
    ligatures = {
        "ﬀ": "ff",
        "ﬁ": "fi",
        "ﬂ": "fl",
        "ﬃ": "ffi",
        "ﬄ": "ffl",
        "ﬅ": "ft",
        "ﬆ": "st",
        # "Ꜳ": "AA",
        # "Æ": "AE",
        "ꜳ": "aa",
    }
    for search, replace in ligatures.items():
        text = text.replace(search, replace)
    return text
```

## Dehyphenation

Hyphens are used to break words up so that the appearance of the page is nicer.

```{testcode}
from typing import List


def remove_hyphens(text: str) -> str:
    """

    This fails for:
    * Natural dashes: well-known, self-replication, use-cases, non-semantic,
                      Post-processing, Window-wise, viewpoint-dependent
    * Trailing math operands: 2 - 4
    * Names: Lopez-Ferreras, VGG-19, CIFAR-100
    """
    lines = [line.rstrip() for line in text.split("\n")]

    # Find dashes
    line_numbers = []
    for line_no, line in enumerate(lines[:-1]):
        if line.endswith("-"):
            line_numbers.append(line_no)

    # Replace
    for line_no in line_numbers:
        lines = dehyphenate(lines, line_no)

    return "\n".join(lines)


def dehyphenate(lines: List[str], line_no: int) -> List[str]:
    next_line = lines[line_no + 1]
    word_suffix = next_line.split(" ")[0]

    lines[line_no] = lines[line_no][:-1] + word_suffix
    lines[line_no + 1] = lines[line_no + 1][len(word_suffix) :]
    return lines
```

## Header/Footer Removal

The following header/footer removal has several drawbacks:

* False-positives, e.g., for the first page when there is a date like 2024.
* False-negatives in many cases:
    * Dynamic part, e.g., page label is in the header.
    * Even/odd pages have different headers.
    * Some pages, e.g., the first one or chapter pages, do not have a header.

```{testcode}
def remove_footer(extracted_texts: list[str], page_labels: list[str]):
    def remove_page_labels(extracted_texts, page_labels):
        processed = []
        for text, label in zip(extracted_texts, page_labels):
            text_left = text.lstrip()
            if text_left.startswith(label):
                text = text_left[len(label) :]

            text_right = text.rstrip()
            if text_right.endswith(label):
                text = text_right[: -len(label)]

            processed.append(text)
        return processed

    extracted_texts = remove_page_labels(extracted_texts, page_labels)
    return extracted_texts
```

## Other ideas

* Whitespaces in units: Between a number and its unit should be a space.
  ([source](https://tex.stackexchange.com/questions/20962/should-i-put-a-space-between-a-number-and-its-unit)).
  That means: 42 ms, 42 GHz, 42 GB.
* Percent: English style guides prescribe writing the percent sign following the number without any space between (e.g., 50%).
* Whitespaces before dots: Should typically be removed.
* Whitespaces after dots: Should typically be added.


================================================
FILE: docs/user/reading-pdf-annotations.md
================================================
# Reading PDF Annotations

PDF 2.0 defines the following annotation types:

* Text
* Link
* FreeText
* Line
* Square
* Circle
* Polygon
* PolyLine
* Highlight
* Underline
* Squiggly
* StrikeOut
* Caret
* Stamp
* Ink
* Popup
* FileAttachment
* Sound
* Movie
* Screen
* Widget
* PrinterMark
* TrapNet
* Watermark
* 3D
* Redact
* Projection
* RichMedia

In general, annotations can be read like this:

```{testsetup}
pypdf_test_setup("user/reading-pdf-annotations", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for page in reader.pages:
    if "/Annots" in page:
        for annotation in page["/Annots"]:
            obj = annotation.get_object()
            print({"subtype": obj["/Subtype"], "location": obj["/Rect"]})
```

```{testoutput}
:hide:

{'subtype': '/Highlight', 'location': [376.771, 406.213, 413.78, 422.506]}
{'subtype': '/Popup', 'location': [531.053, 327.965, 715.198, 422.219]}
{'subtype': '/FileAttachment', 'location': [245.819, 223.288, 252.819, 240.288]}
{'subtype': '/Stamp', 'location': [68.7536, 187.259, 151.442, 254.124]}
{'subtype': '/Popup', 'location': [612, 631.925, 816, 745.925]}
{'subtype': '/Text', 'location': [176.9, 216.719, 200.9, 240.719]}
{'subtype': '/Popup', 'location': [596, 709.445, 780, 801.445]}
```

Examples of reading three of the most common annotations:

## Text

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for page in reader.pages:
    if "/Annots" in page:
        for annotation in page["/Annots"]:
            subtype = annotation.get_object()["/Subtype"]
            if subtype == "/Text":
                print(annotation.get_object()["/Contents"])
```

```{testoutput}
:hide:

Text comment
```

## Highlights

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

for page in reader.pages:
    if "/Annots" in page:
        for annotation in page["/Annots"]:
            subtype = annotation.get_object()["/Subtype"]
            if subtype == "/Highlight":
                coords = annotation.get_object()["/QuadPoints"]
                x1, y1, x2, y2, x3, y3, x4, y4 = coords
```

## Attachments

```{testcode}
from pypdf import PdfReader

reader = PdfReader("example.pdf")

attachments = {}
for page in reader.pages:
    if "/Annots" in page:
        for annotation in page["/Annots"]:
            subtype = annotation.get_object()["/Subtype"]
            if subtype == "/FileAttachment":
                fileobj = annotation.get_object()["/FS"]
                attachments[fileobj["/F"]] = fileobj["/EF"]["/F"].get_data()
```


================================================
FILE: docs/user/robustness.md
================================================
# Robustness and strict=False

PDF is [specified in various versions](https://pdfa.org/resource/pdf-specification-archive/).
The specification of PDF 2.0 has 1003 pages. This length makes it hard to get
everything right. As a consequence, a lot of PDF files are not strictly following the
specification.

If a PDF file does not follow the specification, it is not always possible to
be certain what the intended effect would be. Think of the following broken
Python code as an example:

```{testcode}
# Broken
function (foo, bar):

# Potentially intended:
def function(foo, bar):
    ...

# Also possible:
function = (foo, bar)
```

```{testoutput}
:hide:

Traceback (most recent call last):
    ...
SyntaxError: invalid syntax
```

Writing a parser, you can go two paths: Either you try to be forgiving and try
to figure out what the user intended, or you are strict and just tell the user
that they should fix their stuff.

pypdf gives you the option to be strict or not.

pypdf has two core objects:

* {class}`~pypdf.PdfReader`
* {class}`~pypdf.PdfWriter`

PdfReader and PdfWriter both have a `strict` parameter.

Choosing `strict=True` means that pypdf will raise an exception if a PDF does
not follow the specification.

Choosing `strict=False` means that pypdf will try to be forgiving and do
something reasonable, but it will log a warning message. It is a best-effort
approach.


================================================
FILE: docs/user/security.md
================================================
# Security

We strive to provide a library with secure defaults.

## Configuration

### Filters

*pypdf* currently employs output size limits for some filters which are known to possibly have large compression ratios.

The usual limit is at 75 MB of uncompressed data during decompression. If this is too low for your use case, and you are
aware of the possible side effects, you can modify the following constants which define the desired maximal output size in bytes:

* `pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH` for the *FlateDecode* filter (zlib compression)
* `pypdf.filters.LZW_MAX_OUTPUT_LENGTH` for the *LZWDecode* filter (LZW compression)
* `pypdf.filters.RUN_LENGTH_MAX_OUTPUT_LENGTH` for the *RunLengthDecode* filter (run-length compression)

For JBIG2 images, there is a similar parameter to limit the memory usage during decoding: `pypdf.filters.JBIG2_MAX_OUTPUT_LENGTH`
It defaults to 75 MB as well.

For all streams, the maximum allowed value for the `/Length` field is limited to `pypdf.filters.MAX_DECLARED_STREAM_LENGTH`, which
defaults to 75 MB as well.

For all array-based streams, the maximum allowed output length is limited to `pypdf.filters.MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH`,
which defaults to 75 MB as well.

For the *FlateDecode* filter, the number of bytes to attempt recovery with can be set by `pypdf.filters.ZLIB_MAX_RECOVERY_INPUT_LENGTH`.
It defaults to 5 MB due to the much more complex recovery approach.

For the *JBIG2Decode* filter, calling the external *jbig2dec* tool can be disabled by setting `pypdf.filters.JBIG2DEC_BINARY = None`.

### Reading

*pypdf* currently employs the following reading limits on *PdfReader* instances:

* `root_object_recovery_limit` limits the number of objects to read before stopping with Root object recovery in
  non-strict mode. It defaults to 10 000. Setting it to `None` will fully disable this limit.

If you want to employ custom limits for the *PdfWriter* as well, the currently preferred way
is to initialize it from the reader, id est something like
`PdfWriter(clone_from=PdfReader("file.pdf", root_object_recovery_limit=42))`.

## Reporting possible vulnerabilities

Please refer to our [security policy](https://github.com/py-pdf/pypdf/security/policy).

## Invalid reports

### Exceptions

Most exceptions raised by our code are considered bugs or robustness issues and can be reported publicly.
We consider it the task of the library user to catch exceptions which could cause their service to crash, although we try to
only raise a known set of exception types.

### Cryptographic functions

We receive reports about possibly insecure cryptography from time to time. This includes the following aspects:

* Using the ARC4 cipher
* Using the AES cipher in ECB mode
* Using MD5 for hashing

These are requirements of the PDF standard, which we need to achieve the greatest compatibility with.
Although some of them might be deprecated in PDF 2.0, the PDF 2.0 adoption rate is very low and legacy documents need to be supported.

### XML parsing

We use `xml.minidom` for parsing XMP information. Given recent Python versions built against recent Expat versions, the usual attacks
(exponential entity expansion and external entity expansion) should not be possible. We have corresponding tests in place to ensure
this for the platforms our tests run against.

For some details, see [the official documentation](https://docs.python.org/3/library/xml.html#xml-security) and the
[README for defusedxml](https://github.com/tiran/defusedxml/blob/main/README.md#python-xml-libraries).

Please note that automated scanners tend to still flag any direct imports of XML modules from the Python standard library as unsafe.
There have been discussions about this being outdated already, but they are still being flagged.


================================================
FILE: docs/user/streaming-data.md
================================================
# Streaming Data with pypdf

In some cases, you might want to avoid saving things explicitly as a file
to disk, e.g. when you want to store the PDF in a database or AWS S3.

pypdf supports streaming data to a file-like object:


```{testsetup}
pypdf_test_setup("user/streaming-data", {
    "example.pdf": "../resources/example.pdf",
})
```

```{testcode}
from io import BytesIO
from pypdf import PdfReader, PdfWriter

# Prepare example
with open("example.pdf", "rb") as fh:
    bytes_stream = BytesIO(fh.read())

# Read from bytes_stream
reader = PdfReader(bytes_stream)

# Write to bytes_stream
writer = PdfWriter()
with BytesIO() as bytes_stream:
    writer.write(bytes_stream)
```

## Writing a PDF directly to AWS S3

Suppose you want to manipulate a PDF and write it directly to AWS S3 without having
to write the document to a file first. We have the original PDF in `raw_bytes_data` as `bytes`
and want to set `my-secret-password`:

% We prefer not to execute doc examples which require access to cloud providers
```{testcode}
:skipif: True

from io import BytesIO

import boto3
from pypdf import PdfReader, PdfWriter


reader = PdfReader(BytesIO(raw_bytes_data))
writer = PdfWriter()

# Add all pages to the writer
for page in reader.pages:
    writer.add_page(page)

# Add a password to the new PDF
writer.encrypt("my-secret-password")

# Save the new PDF to a file
with BytesIO() as bytes_stream:
    writer.write(bytes_stream)
    bytes_stream.seek(0)
    s3 = boto3.client("s3")
    s3.write_get_object_response(
        Body=bytes_stream, RequestRoute=request_route, RequestToken=request_token
    )
```

## Reading PDFs directly from cloud services

One option is to first download the file and then pass the local file path to `PdfReader`.
Another option is to get a byte stream.

For AWS S3 it works like this:

% We prefer not to execute doc examples which require access to cloud providers
```{testcode}
:skipif: True

from io import BytesIO

import boto3
from pypdf import PdfReader


s3 = boto3.client("s3")
obj = s3.get_object(Body=csv_buffer.getvalue(), Bucket="my-bucket", Key="my/doc.pdf")
reader = PdfReader(BytesIO(obj["Body"].read()))
```

To use with Google Cloud storage:

% We prefer not to execute doc examples which require access to cloud providers
```{testcode}
:skipif: True

from io import BytesIO

from google.cloud import storage

# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] must be set
storage_client = storage.Client()
blob = storage_client.bucket("my-bucket").blob("mydoc.pdf")
file_stream = BytesIO()
blob.download_to_file(file_stream)
reader = PdfReader(file_stream)
```


================================================
FILE: docs/user/suppress-warnings.md
================================================
# Exceptions, Warnings, and Log messages

pypdf makes use of three mechanisms to show if something went wrong:

* **Exceptions** are error cases that pypdf users should explicitly handle.
  In the `strict=True` mode, most log messages with the warning level will
  become exceptions. This can be useful in applications where you can require
  a user to fix the broken PDF.
* **Warnings** are avoidable issues, such as using deprecated classes /
  functions / parameters. Another example is missing capabilities of pypdf.
  In those cases, pypdf users should adjust their code. Warnings
  are issued by the `warnings` module - those are different from the log-level
  "warning."
* **Log messages** are informative messages that can be used for post-mortem
  analysis. Most of the time, users can ignore them. They come in different
  *levels*, such as info / warning / error indicating the severity.
  Examples are non-standard compliant PDF files which pypdf can deal with or
  a missing implementation that leads to a part of the text not being extracted.


## Exceptions

Exceptions need to be caught if you want to handle them. For example, you could
want to read the text from a PDF as a part of a search function.

Most PDF files do not follow the specification. In this case, pypdf needs to
guess which kinds of mistakes were potentially done when the PDF file was created.
See [the robustness page](robustness.md) for the related issues.

As a user, you likely do not care about it. If it is readable in any way, you
want the text. You might use pdfminer.six as a fallback and do this:

% We prefer not to execute doc examples for third-party package "pdfminer.six" used in one code snippet only
```{testcode}
:skipif: True

from pypdf import PdfReader
from pdfminer.high_level import extract_text as fallback_text_extraction

text = ""
try:
    reader = PdfReader("example.pdf")
    for page in reader.pages:
        text += page.extract_text()
except Exception as exc:
    text = fallback_text_extraction("example.pdf")
```

You could also capture [`pypdf.errors.PyPdfError`](https://github.com/py-pdf/pypdf/blob/main/pypdf/errors.py)
if you prefer something more specific.

## Warnings

The [`warnings` module](https://docs.python.org/3/library/warnings.html) allows
you to ignore warnings:

```{testcode}
import warnings

warnings.filterwarnings("ignore")
```

In many cases, you actually want to start Python with the `-W` flag so that you
see all warnings. This is especially true for Continuous Integration (CI).

## Log messages

Log messages can be noisy in some cases. pypdf hopefully has a reasonable
level of log messages, but you can reduce which types of messages you want to
see:

```{testcode}
import logging

logger = logging.getLogger("pypdf")
logger.setLevel(logging.ERROR)
```

The [`logging` module](https://docs.python.org/3/library/logging.html#logging-levels)
defines six log levels:

* CRITICAL
* ERROR
* WARNING
* INFO
* DEBUG
* NOTSET


================================================
FILE: docs/user/viewer-preferences.md
================================================
# Adding Viewer Preferences

It is possible to set viewer preferences of a PDF file.
§12.2 of the [PDF 1.7 specification](https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf).

Note that the `/ViewerPreferences` dictionary does not exist by default.
If it is not already present, it must be created by calling the
{func}`~pypdf.PdfWriter.create_viewer_preferences` method.

If viewer preferences exist in a PDF file being read with {class}`~pypdf.PdfReader`,
you can access them as properties of {attr}`~pypdf.PdfReader.viewer_preferences`.
Otherwise, the {attr}`~pypdf.PdfReader.viewer_preferences` property will be set to `None`.

## Example

```{testsetup}
pypdf_test_setup("user/viewer-preferences")
```

```{testcode}
from pypdf import PdfWriter
from pypdf.generic import ArrayObject, NumberObject

writer = PdfWriter()

writer.create_viewer_preferences()

# /HideToolbar
writer.viewer_preferences.hide_toolbar = True
# /HideMenubar
writer.viewer_preferences.hide_menubar = True
# /HideWindowUI
writer.viewer_preferences.hide_windowui = True
# /FitWindow
writer.viewer_preferences.fit_window = True
# /CenterWindow
writer.viewer_preferences.center_window = True
# /DisplayDocTitle
writer.viewer_preferences.display_doctitle = True

# /NonFullScreenPageMode
writer.viewer_preferences.non_fullscreen_pagemode = "/UseNone"  # default
writer.viewer_preferences.non_fullscreen_pagemode = "/UseOutlines"
writer.viewer_preferences.non_fullscreen_pagemode = "/UseThumbs"
writer.viewer_preferences.non_fullscreen_pagemode = "/UseOC"

# /Direction
writer.viewer_preferences.direction = "/L2R"  # default
writer.viewer_preferences.direction = "/R2L"

# /ViewArea
writer.viewer_preferences.view_area = "/CropBox"
# /ViewClip
writer.viewer_preferences.view_clip = "/CropBox"
# /PrintArea
writer.viewer_preferences.print_area = "/CropBox"
# /PrintClip
writer.viewer_preferences.print_clip = "/CropBox"

# /PrintScaling
writer.viewer_preferences.print_scaling = "/None"
writer.viewer_preferences.print_scaling = "/AppDefault"  # default according to PDF spec

# /Duplex
writer.viewer_preferences.duplex = "/Simplex"
writer.viewer_preferences.duplex = "/DuplexFlipShortEdge"
writer.viewer_preferences.duplex = "/DuplexFlipLongEdge"

# /PickTrayByPDFSize
writer.viewer_preferences.pick_tray_by_pdfsize = True
# /PrintPageRange
writer.viewer_preferences.print_pagerange = ArrayObject(
    [NumberObject("1"), NumberObject("10"), NumberObject("20"), NumberObject("30")]
)
# /NumCopies
writer.viewer_preferences.num_copies = 2

for i in range(40):
    writer.add_blank_page(10, 10)

writer.write("out.pdf")
```

The names beginning with a slash character are part of the PDF file format. They are
included here to ease searching the pypdf documentation
for these names from the PDF specification.


================================================
FILE: make_release.py
================================================
"""Internal tool to update the CHANGELOG."""

import json
import subprocess
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone

GH_ORG = "py-pdf"
GH_PROJECT = "pypdf"
VERSION_FILE_PATH = "pypdf/_version.py"
CHANGELOG_FILE_PATH = "CHANGELOG.md"


@dataclass(frozen=True)
class Change:
    """Capture the data of a git commit."""

    commit_hash: str
    prefix: str
    message: str
    author: str
    author_login: str


def main(changelog_path: str) -> None:
    """
    Create a changelog.

    Args:
        changelog_path: The location of the CHANGELOG file

    """
    changelog = get_changelog(changelog_path)
    git_tag = get_most_recent_git_tag()
    changes, changes_with_author = get_formatted_changes(git_tag)
    if changes == "":
        print("No changes")
        return

    new_version = version_bump(git_tag)
    new_version = get_version_interactive(new_version, changes)
    adjust_version_py(new_version)

    today = datetime.now(tz=timezone.utc)
    header = f"## Version {new_version}, {today:%Y-%m-%d}\n"
    url = f"https://github.com/{GH_ORG}/{GH_PROJECT}/compare/{git_tag}...{new_version}"
    trailer = f"\n[Full Changelog]({url})\n\n"
    new_entry = header + changes + trailer
    print(new_entry)
    write_commit_msg_file(new_version, changes_with_author + trailer)
    # write_release_msg_file(new_version, changes_with_author + trailer, today)

    # Make the script idempotent by checking if the new entry is already in the changelog
    if new_entry in changelog:
        print("Changelog is already up-to-date!")
        return

    new_changelog = "# CHANGELOG\n\n" + new_entry + strip_header(changelog)
    write_changelog(new_changelog, changelog_path)
    print_instructions(new_version)


def print_instructions(new_version: str) -> None:
    """Print release instructions."""
    print("=" * 80)
    print(f"☑  {VERSION_FILE_PATH} was adjusted to '{new_version}'")
    print(f"☑  {CHANGELOG_FILE_PATH} was adjusted")
    print()
    print("Now run:")
    print("  git commit -eF RELEASE_COMMIT_MSG.md")
    print("  git push")


def adjust_version_py(version: str) -> None:
    """Adjust the __version__ string."""
    with open(VERSION_FILE_PATH, "w") as fp:
        fp.write(f'__version__ = "{version}"\n')


def get_version_interactive(new_version: str, changes: str) -> str:
    """Get the new __version__ interactively."""
    from rich.prompt import Prompt  # noqa: PLC0415

    print("The changes are:")
    print(changes)
    orig = new_version
    new_version = Prompt.ask("New semantic version", default=orig)
    while not is_semantic_version(new_version):
        new_version = Prompt.ask(
            "That was not a semantic version. Please enter a semantic version",
            default=orig,
        )
    return new_version


def is_semantic_version(version: str) -> bool:
    """Check if the given version is a semantic version."""
    # This doesn't cover the edge-cases like pre-releases
    if version.count(".") != 2:
        return False
    try:
        return bool([int(part) for part in version.split(".")])
    except Exception:
        return False


def write_commit_msg_file(new_version: str, commit_changes: str) -> None:
    """
    Write a file that can be used as a commit message.

    Like this:

        git commit -eF RELEASE_COMMIT_MSG.md && git push
    """
    with open("RELEASE_COMMIT_MSG.md", "w") as fp:
        fp.write(f"REL: {new_version}\n\n")
        fp.write("## What's new\n")
        fp.write(commit_changes)


def write_release_msg_file(
    new_version: str, commit_changes: str, today: datetime
) -> None:
    """
    Write a file that can be used as a git tag message.

    Like this:

        git tag -eF RELEASE_TAG_MSG.md && git push
    """
    with open("RELEASE_TAG_MSG.md", "w") as fp:
        fp.write(f"Version {new_version}, {today:%Y-%m-%d}\n\n")
        fp.write("## What's new\n")
        fp.write(commit_changes)


def strip_header(md: str) -> str:
    """Remove the 'CHANGELOG' header."""
    return md.removeprefix("# CHANGELOG").lstrip()


def version_bump(git_tag: str) -> str:
    """
    Increase the patch version of the git tag by one.

    Args:
        git_tag: Old version tag

    Returns:
        The new version where the patch version is bumped.

    """
    # just assume a patch version change
    major, minor, patch = git_tag.split(".")
    return f"{major}.{minor}.{int(patch) + 1}"


def get_changelog(changelog_path: str) -> str:
    """
    Read the changelog.

    Args:
        changelog_path: Path to the CHANGELOG file

    Returns:
        Data of the CHANGELOG

    """
    with open(changelog_path, encoding="utf-8") as fh:
        return fh.read()


def write_changelog(new_changelog: str, changelog_path: str) -> None:
    """
    Write the changelog.

    Args:
        new_changelog: Contents of the new CHANGELOG
        changelog_path: Path where the CHANGELOG file is

    """
    with open(changelog_path, "w", encoding="utf-8") as fh:
        fh.write(new_changelog)


def get_formatted_changes(git_tag: str) -> tuple[str, str]:
    """
    Format the changes done since the last tag.

    Args:
        git_tag: the reference tag

    Returns:
        Changes done since git_tag

    """
    commits = get_git_commits_since_tag(git_tag)

    # Group by prefix
    grouped = {}
    for commit in commits:
        if commit.prefix not in grouped:
            grouped[commit.prefix] = []
        grouped[commit.prefix].append(
            {"msg": commit.message, "author": commit.author_login}
        )

    # Order prefixes
    order = [
        "SEC",
        "DEP",
        "ENH",
        "PI",
        "BUG",
        "ROB",
        "DOC",
        "DEV",
        "CI",
        "MAINT",
        "TST",
        "STY",
    ]
    abbrev2long = {
        "SEC": "Security",
        "DEP": "Deprecations",
        "ENH": "New Features",
        "BUG": "Bug Fixes",
        "ROB": "Robustness",
        "DOC": "Documentation",
        "DEV": "Developer Experience",
        "CI": "Continuous Integration",
        "MAINT": "Maintenance",
        "TST": "Testing",
        "STY": "Code Style",
        "PI": "Performance Improvements",
    }

    # Create output
    output = ""
    output_with_user = ""
    for prefix in order:
        if prefix not in grouped:
            continue
        tmp = f"\n### {abbrev2long[prefix]} ({prefix})\n"  # header
        output += tmp
        output_with_user += tmp
        for commit in grouped[prefix]:
            output += f"- {commit['msg']}\n"
            output_with_user += f"- {commit['msg']} by @{commit['author']}\n"
        del grouped[prefix]

    if grouped:
        output += "\n### Other\n"
        output_with_user += "\n### Other\n"
        for prefix, commits in grouped.items():
            for commit in commits:
                output += f"- {prefix}: {commit['msg']}\n"
                output_with_user += (
                    f"- {prefix}: {commit['msg']} by @{commit['author']}\n"
                )

    return output, output_with_user


def get_most_recent_git_tag() -> str:
    """
    Get the git tag most recently created.

    Returns:
        Most recently created git tag.

    """
    return subprocess.check_output(
        ["git", "describe", "--tag", "--abbrev=0"], stderr=subprocess.STDOUT, text=True
    ).strip()


def get_author_mapping(line_count: int) -> dict[str, str]:
    """
    Get the authors for each commit.

    Args:
        line_count: Number of lines from Git log output. Used for determining how
            many commits to fetch.

    Returns:
        A mapping of long commit hashes to author login handles.

    """
    per_page = min(line_count, 100)
    page = 1
    mapping: dict[str, str] = {}
    for _ in range(0, line_count, per_page):
        with urllib.request.urlopen(
            f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
        ) as response:
            commits = json.loads(response.read())
        page += 1
        for commit in commits:
            mapping[commit["sha"]] = commit["author"]["login"]
    return mapping


def get_git_commits_since_tag(git_tag: str) -> list[Change]:
    """
    Get all commits since the last tag.

    Args:
        git_tag: Reference tag from which the changes to the current commit are
            fetched.

    Returns:
        List of all changes since git_tag.

    """
    commits = (
        subprocess.check_output(
            [
                "git",
                "--no-pager",
                "log",
                f"{git_tag}..HEAD",
                '--pretty=format:"%H:::%s:::%aN"',
            ],
            stderr=subprocess.STDOUT,
        )
        .decode("UTF-8")
        .strip()
    )
    lines = commits.splitlines()
    authors = get_author_mapping(len(lines))
    return [parse_commit_line(line, authors) for line in lines if line != ""]


def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
    """
    Parse the first line of a git commit message.

    Args:
        line: The first line of a git commit message.

    Returns:
        The parsed Change object

    Raises:
        ValueError: The commit line is not well-structured

    """
    parts = line.strip().strip('"\\').split(":::")
    if len(parts) != 3:
        raise ValueError(f"Invalid commit line: '{line}'")
    commit_hash, rest, author = parts
    if ":" in rest:
        prefix, message = rest.split(": ", 1)
    else:
        prefix = ""
        message = rest

    # Standardize
    message = message.strip()
    commit_hash = commit_hash.strip()

    author_login = authors[commit_hash]

    prefix = prefix.strip()
    if prefix == "DOCS":
        prefix = "DOC"

    return Change(
        commit_hash=commit_hash,
        prefix=prefix,
        message=message,
        author=author,
        author_login=author_login,
    )


if __name__ == "__main__":
    main(CHANGELOG_FILE_PATH)


================================================
FILE: pypdf/__init__.py
================================================
"""
pypdf is a free and open-source pure-python PDF library capable of splitting,
merging, cropping, and transforming the pages of PDF files. It can also add
custom data, viewing options, and passwords to PDF files. pypdf can retrieve
text and metadata from PDFs as well.

You can read the full docs at https://pypdf.readthedocs.io/.
"""

from ._crypt_providers import crypt_provider
from ._doc_common import DocumentInformation
from ._encryption import PasswordType
from ._page import PageObject, Transformation
from ._reader import PdfReader
from ._text_extraction import mult
from ._version import __version__
from ._writer import ObjectDeletionFlag, PdfWriter
from .constants import ImageType
from .pagerange import PageRange, parse_filename_page_ranges
from .papersizes import PaperSize

try:
    import PIL

    pil_version = PIL.__version__
except ImportError:
    pil_version = "none"

_debug_versions = (
    f"pypdf=={__version__}, {crypt_provider=}, PIL={pil_version}"
)

__all__ = [
    "DocumentInformation",
    "ImageType",
    "ObjectDeletionFlag",
    "PageObject",
    "PageRange",
    "PaperSize",
    "PasswordType",
    "PdfReader",
    "PdfWriter",
    "Transformation",
    "__version__",
    "_debug_versions",
    "mult",
    "parse_filename_page_ranges",
]


================================================
FILE: pypdf/_cmap.py
================================================
import binascii
from binascii import Error as BinasciiError
from binascii import unhexlify
from math import ceil
from typing import Any, Union, cast

from ._codecs import adobe_glyphs, charset_encoding
from ._utils import logger_error, logger_warning
from .errors import LimitReachedError
from .generic import (
    DecodedStreamObject,
    DictionaryObject,
    NullObject,
    StreamObject,
    is_null_or_none,
)

_predefined_cmap: dict[str, str] = {
    "/Identity-H": "utf-16-be",
    "/Identity-V": "utf-16-be",
    "/GB-EUC-H": "gbk",
    "/GB-EUC-V": "gbk",
    "/GBpc-EUC-H": "gb2312",
    "/GBpc-EUC-V": "gb2312",
    "/GBK-EUC-H": "gbk",
    "/GBK-EUC-V": "gbk",
    "/GBK2K-H": "gb18030",
    "/GBK2K-V": "gb18030",
    "/ETen-B5-H": "cp950",
    "/ETen-B5-V": "cp950",
    "/ETenms-B5-H": "cp950",
    "/ETenms-B5-V": "cp950",
    "/UniCNS-UTF16-H": "utf-16-be",
    "/UniCNS-UTF16-V": "utf-16-be",
    "/UniGB-UTF16-H": "gb18030",
    "/UniGB-UTF16-V": "gb18030",
    # UCS2 in code
}


def get_encoding(
    ft: DictionaryObject
) -> tuple[Union[str, dict[int, str]], dict[Any, Any]]:
    encoding = _parse_encoding(ft)
    map_dict, int_entry = _parse_to_unicode(ft)

    # Apply rule from PDF ref 1.7 §5.9.1, 1st bullet:
    #   if cmap not empty encoding should be discarded
    #   (here transformed into identity for those characters)
    # If encoding is a string, it is expected to be an identity translation.
    if isinstance(encoding, dict):
        for x in int_entry:
            if x <= 255:
                encoding[x] = chr(x)

    return encoding, map_dict


def _parse_encoding(
    ft: DictionaryObject
) -> Union[str, dict[int, str]]:
    encoding: Union[str, list[str], dict[int, str]] = []
    if "/Encoding" not in ft:
        if "/BaseFont" in ft and cast(str, ft["/BaseFont"]) in charset_encoding:
            encoding = dict(
                zip(range(256), charset_encoding[cast(str, ft["/BaseFont"])])
            )
        else:
            encoding = "charmap"
        return encoding
    enc: Union[str, DictionaryObject, NullObject] = cast(
        Union[str, DictionaryObject, NullObject], ft["/Encoding"].get_object()
    )
    if isinstance(enc, str):
        try:
            # already done : enc = NameObject.unnumber(enc.encode()).decode()
            # for #xx decoding
            if enc in charset_encoding:
                encoding = charset_encoding[enc].copy()
            elif enc in _predefined_cmap:
                encoding = _predefined_cmap[enc]
            elif "-UCS2-" in enc:
                encoding = "utf-16-be"
            else:
                raise Exception("not found")
        except Exception:
            logger_error("Advanced encoding %(encoding)s not implemented yet", source=__name__, encoding=enc)
            encoding = enc
    elif isinstance(enc, DictionaryObject) and "/BaseEncoding" in enc:
        try:
            encoding = charset_encoding[cast(str, enc["/BaseEncoding"])].copy()
        except Exception:
            logger_error(
                "Advanced encoding %(encoding)s not implemented yet",
                source=__name__, encoding=encoding
            )
            encoding = charset_encoding["/StandardEncoding"].copy()
    else:
        encoding = charset_encoding["/StandardEncoding"].copy()
    if isinstance(enc, DictionaryObject) and "/Differences" in enc:
        x: int = 0
        o: Union[int, str]
        for o in cast(DictionaryObject, enc["/Differences"]):
            if isinstance(o, int):
                x = o
            else:  # isinstance(o, str):
                try:
                    if x < len(encoding):
                        encoding[x] = adobe_glyphs[o]  # type: ignore
                except Exception:
                    encoding[x] = o  # type: ignore
                x += 1
    if isinstance(encoding, list):
        encoding = dict(zip(range(256), encoding))
    return encoding


def _parse_to_unicode(
    ft: DictionaryObject
) -> tuple[dict[Any, Any], list[int]]:
    # will store all translation code
    # and map_dict[-1] we will have the number of bytes to convert
    map_dict: dict[Any, Any] = {}

    # will provide the list of cmap keys as int to correct encoding
    int_entry: list[int] = []

    if "/ToUnicode" not in ft:
        if ft.get("/Subtype", "") == "/Type1":
            return _type1_alternative(ft, map_dict, int_entry)
        return {}, []
    process_rg: bool = False
    process_char: bool = False
    multiline_rg: Union[
        None, tuple[int, int]
    ] = None  # tuple = (current_char, remaining size) ; cf #1285 for example of file
    cm = prepare_cm(ft)
    for line in cm.split(b"\n"):
        process_rg, process_char, multiline_rg = process_cm_line(
            line.strip(b" \t"),
            process_rg,
            process_char,
            multiline_rg,
            map_dict,
            int_entry,
        )

    return map_dict, int_entry


def prepare_cm(ft: DictionaryObject) -> bytes:
    tu = ft["/ToUnicode"]
    cm: bytes
    if isinstance(tu, StreamObject):
        cm = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data()
    else:  # if (tu is None) or cast(str, tu).startswith("/Identity"):
        # the full range 0000-FFFF will be processed
        cm = b"beginbfrange\n<0000> <0001> <0000>\nendbfrange"
    if isinstance(cm, str):
        cm = cm.encode()
    # we need to prepare cm before due to missing return line in pdf printed
    # to pdf from word
    cm = (
        cm.strip()
        .replace(b"beginbfchar", b"\nbeginbfchar\n")
        .replace(b"endbfchar", b"\nendbfchar\n")
        .replace(b"beginbfrange", b"\nbeginbfrange\n")
        .replace(b"endbfrange", b"\nendbfrange\n")
        .replace(b"<<", b"\n{\n")  # text between << and >> not used but
        .replace(b">>", b"\n}\n")  # some solution to find it back
    )
    ll = cm.split(b"<")
    for i in range(len(ll)):
        j = ll[i].find(b">")
        if j >= 0:
            if j == 0:
                # string is empty: stash a placeholder here (see below)
                # see https://github.com/py-pdf/pypdf/issues/1111
                content = b"."
            else:
                content = ll[i][:j].replace(b" ", b"")
            ll[i] = content + b" " + ll[i][j + 1 :]
    cm = (
        (b" ".join(ll))
        .replace(b"[", b" [ ")
        .replace(b"]", b" ]\n ")
        .replace(b"\r", b"\n")
    )
    return cm


def process_cm_line(
    line: bytes,
    process_rg: bool,
    process_char: bool,
    multiline_rg: Union[None, tuple[int, int]],
    map_dict: dict[Any, Any],
    int_entry: list[int],
) -> tuple[bool, bool, Union[None, tuple[int, int]]]:
    if line == b"" or line[0] == 37:  # 37 = %
        return process_rg, process_char, multiline_rg
    line = line.replace(b"\t", b" ")
    if b"beginbfrange" in line:
        process_rg = True
    elif b"endbfrange" in line:
        process_rg = False
    elif b"beginbfchar" in line:
        process_char = True
    elif b"endbfchar" in line:
        process_char = False
    elif process_rg:
        try:
            multiline_rg = parse_bfrange(line, map_dict, int_entry, multiline_rg)
        except binascii.Error as error:
            logger_warning(f"Skipping broken line {line!r}: {error}", __name__)
    elif process_char:
        parse_bfchar(line, map_dict, int_entry)
    return process_rg, process_char, multiline_rg


# Usual values should be up to 65_536.
MAPPING_DICTIONARY_SIZE_LIMIT = 100_000


def _check_mapping_size(size: int) -> None:
    if size > MAPPING_DICTIONARY_SIZE_LIMIT:
        raise LimitReachedError(f"Maximum /ToUnicode size limit reached: {size} > {MAPPING_DICTIONARY_SIZE_LIMIT}.")


def parse_bfrange(
    line: bytes,
    map_dict: dict[Any, Any],
    int_entry: list[int],
    multiline_rg: Union[None, tuple[int, int]],
) -> Union[None, tuple[int, int]]:
    lst = [x for x in line.split(b" ") if x]
    closure_found = False
    entry_count = len(int_entry)
    _check_mapping_size(entry_count)
    if multiline_rg is not None:
        fmt = b"%%0%dX" % (map_dict[-1] * 2)
        a = multiline_rg[0]  # a, b not in the current line
        b = multiline_rg[1]
        for sq in lst:
            if sq == b"]":
                closure_found = True
                break
            entry_count += 1
            _check_mapping_size(entry_count)
            map_dict[
                unhexlify(fmt % a).decode(
                    "charmap" if map_dict[-1] == 1 else "utf-16-be",
                    "surrogatepass",
                )
            ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
            int_entry.append(a)
            a += 1
    else:
        a = int(lst[0], 16)
        b = int(lst[1], 16)
        nbi = max(len(lst[0]), len(lst[1]))
        map_dict[-1] = ceil(nbi / 2)
        fmt = b"%%0%dX" % (map_dict[-1] * 2)
        if lst[2] == b"[":
            for sq in lst[3:]:
                if sq == b"]":
                    closure_found = True
                    break
                entry_count += 1
                _check_mapping_size(entry_count)
                map_dict[
                    unhexlify(fmt % a).decode(
                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
                        "surrogatepass",
                    )
                ] = unhexlify(sq).decode("utf-16-be", "surrogatepass")
                int_entry.append(a)
                a += 1
        else:  # case without list
            c = int(lst[2], 16)
            fmt2 = b"%%0%dX" % max(4, len(lst[2]))
            closure_found = True
            range_size = max(0, b - a + 1)
            _check_mapping_size(entry_count + range_size)  # This can be checked beforehand.
            while a <= b:
                map_dict[
                    unhexlify(fmt % a).decode(
                        "charmap" if map_dict[-1] == 1 else "utf-16-be",
                        "surrogatepass",
                    )
                ] = unhexlify(fmt2 % c).decode("utf-16-be", "surrogatepass")
                int_entry.append(a)
                a += 1
                c += 1
    return None if closure_found else (a, b)


def parse_bfchar(line: bytes, map_dict: dict[Any, Any], int_entry: list[int]) -> None:
    lst = [x for x in line.split(b" ") if x]
    new_count = len(lst) // 2
    _check_mapping_size(len(int_entry) + new_count)  # This can be checked beforehand.
    map_dict[-1] = len(lst[0]) // 2
    while len(lst) > 1:
        map_to = ""
        # placeholder (see above) means empty string
        if lst[1] != b".":
            try:
                map_to = unhexlify(lst[1]).decode(
                    "charmap" if len(lst[1]) < 4 else "utf-16-be", "surrogatepass"
                )  # join is here as some cases where the code was split
            except BinasciiError as exception:
                logger_warning(f"Got invalid hex string: {exception!s} ({lst[1]!r})", __name__)
        map_dict[
            unhexlify(lst[0]).decode(
                "charmap" if map_dict[-1] == 1 else "utf-16-be", "surrogatepass"
            )
        ] = map_to
        int_entry.append(int(lst[0], 16))
        lst = lst[2:]


def _type1_alternative(
    ft: DictionaryObject,
    map_dict: dict[Any, Any],
    int_entry: list[int],
) -> tuple[dict[Any, Any], list[int]]:
    if "/FontDescriptor" not in ft:
        return map_dict, int_entry
    ft_desc = cast(DictionaryObject, ft["/FontDescriptor"]).get("/FontFile")
    if is_null_or_none(ft_desc):
        return map_dict, int_entry
    assert ft_desc is not None, "mypy"
    txt = ft_desc.get_object().get_data()
    txt = txt.split(b"eexec\n")[0]  # only clear part
    txt = txt.split(b"/Encoding")[1]  # to get the encoding part
    lines = txt.replace(b"\r", b"\n").split(b"\n")
    for li in lines:
        if li.startswith(b"dup"):
            words = [_w for _w in li.split(b" ") if _w != b""]
            if len(words) > 3 and words[3] != b"put":
                continue
            try:
                i = int(words[1])
            except ValueError:  # pragma: no cover
                continue
            try:
                v = adobe_glyphs[words[2].decode()]
            except KeyError:
                if words[2].startswith(b"/uni"):
                    try:
                        v = chr(int(words[2][4:], 16))
                    except ValueError:  # pragma: no cover
                        continue
                else:
                    continue
            map_dict[chr(i)] = v
            int_entry.append(i)
    return map_dict, int_entry


================================================
FILE: pypdf/_codecs/__init__.py
================================================
from .adobe_glyphs import adobe_glyphs
from .pdfdoc import _pdfdoc_encoding
from .std import _std_encoding
from .symbol import _symbol_encoding
from .zapfding import _zapfding_encoding


def fill_from_encoding(enc: str) -> list[str]:
    lst: list[str] = []
    for x in range(256):
        try:
            lst += (bytes((x,)).decode(enc),)
        except Exception:
            lst += (chr(x),)
    return lst


def rev_encoding(enc: list[str]) -> dict[str, int]:
    rev: dict[str, int] = {}
    for i in range(256):
        char = enc[i]
        if char == "\u0000":
            continue
        assert char not in rev, f"{char} at {i} already at {rev[char]}"
        rev[char] = i
    return rev


_win_encoding = fill_from_encoding("cp1252")
_mac_encoding = fill_from_encoding("mac_roman")


_pdfdoc_encoding_rev: dict[str, int] = rev_encoding(_pdfdoc_encoding)


charset_encoding: dict[str, list[str]] = {
    "/StandardEncoding": _std_encoding,
    "/WinAnsiEncoding": _win_encoding,
    "/MacRomanEncoding": _mac_encoding,
    "/PDFDocEncoding": _pdfdoc_encoding,
    "/Symbol": _symbol_encoding,
    "/ZapfDingbats": _zapfding_encoding,
}

__all__ = [
    "_mac_encoding",
    "_pdfdoc_encoding",
    "_pdfdoc_encoding_rev",
    "_std_encoding",
    "_symbol_encoding",
    "_win_encoding",
    "_zapfding_encoding",
    "adobe_glyphs",
    "charset_encoding",
]


================================================
FILE: pypdf/_codecs/_codecs.py
================================================
"""
This module is for codecs only.

While the codec implementation can contain details of the PDF specification,
the module should not do any PDF parsing.
"""

import io
from abc import ABC, abstractmethod

from pypdf._utils import logger_warning
from pypdf.errors import LimitReachedError


class Codec(ABC):
    """Abstract base class for all codecs."""

    @abstractmethod
    def encode(self, data: bytes) -> bytes:
        """
        Encode the input data.

        Args:
            data: Data to encode.

        Returns:
            Encoded data.

        """

    @abstractmethod
    def decode(self, data: bytes) -> bytes:
        """
        Decode the input data.

        Args:
            data: Data to decode.

        Returns:
            Decoded data.

        """


class LzwCodec(Codec):
    """Lempel-Ziv-Welch (LZW) adaptive compression codec."""

    CLEAR_TABLE_MARKER = 256  # Special code to indicate table reset
    EOD_MARKER = 257  # End-of-data marker
    INITIAL_BITS_PER_CODE = 9  # Initial code bit width
    MAX_BITS_PER_CODE = 12  # Maximum code bit width

    def __init__(self, max_output_length: int = 75_000_000) -> None:
        self.max_output_length = max_output_length

    def _initialize_encoding_table(self) -> None:
        """Initialize the encoding table and state to initial conditions."""
        self.encoding_table: dict[bytes, int] = {bytes([i]): i for i in range(256)}
        self.next_code = self.EOD_MARKER + 1
        self.bits_per_code = self.INITIAL_BITS_PER_CODE
        self.max_code_value = (1 << self.bits_per_code) - 1

    def _increase_next_code(self) -> None:
        """Update bits_per_code and max_code_value if necessary."""
        self.next_code += 1
        if (
            self.next_code > self.max_code_value
            and self.bits_per_code < self.MAX_BITS_PER_CODE
        ):
            self.bits_per_code += 1
            self.max_code_value = (1 << self.bits_per_code) - 1

    def encode(self, data: bytes) -> bytes:
        """
        Encode data using the LZW compression algorithm.

        Taken from PDF 1.7 specs, "7.4.4.2 Details of LZW Encoding".
        """
        result_codes: list[int] = []

        # The encoder shall begin by issuing a clear-table code
        result_codes.append(self.CLEAR_TABLE_MARKER)
        self._initialize_encoding_table()

        current_sequence = b""
        for byte in data:
            next_sequence = current_sequence + bytes([byte])

            if next_sequence in self.encoding_table:
                # Extend current sequence if already in the table
                current_sequence = next_sequence
            else:
                # Output code for the current sequence
                result_codes.append(self.encoding_table[current_sequence])

                # Add the new sequence to the table if there's room
                if self.next_code <= (1 << self.MAX_BITS_PER_CODE) - 1:
                    self.encoding_table[next_sequence] = self.next_code
                    self._increase_next_code()
                else:
                    # If the table is full, emit a clear-table command
                    result_codes.append(self.CLEAR_TABLE_MARKER)
                    self._initialize_encoding_table()

                # Start new sequence
                current_sequence = bytes([byte])

        # Ensure everything actually is encoded
        if current_sequence:
            result_codes.append(self.encoding_table[current_sequence])
        result_codes.append(self.EOD_MARKER)

        return self._pack_codes_into_bytes(result_codes)

    def _pack_codes_into_bytes(self, codes: list[int]) -> bytes:
        """
        Convert the list of result codes into a continuous byte stream, with codes packed as per the code bit-width.
        The bit-width starts at 9 bits and expands as needed.
        """
        self._initialize_encoding_table()
        buffer = 0
        bits_in_buffer = 0
        output = bytearray()

        for code in codes:
            buffer = (buffer << self.bits_per_code) | code
            bits_in_buffer += self.bits_per_code

            # Codes shall be packed into a continuous bit stream, high-order bit
            # first. This stream shall then be divided into bytes, high-order bit
            # first.
            while bits_in_buffer >= 8:
                bits_in_buffer -= 8
                output.append((buffer >> bits_in_buffer) & 0xFF)

            if code == self.CLEAR_TABLE_MARKER:
                self._initialize_encoding_table()
            elif code == self.EOD_MARKER:
                continue
            else:
                self._increase_next_code()

        # Flush any remaining bits in the buffer
        if bits_in_buffer > 0:
            output.append((buffer << (8 - bits_in_buffer)) & 0xFF)

        return bytes(output)

    def _initialize_decoding_table(self) -> None:
        self.max_code_value = (1 << self.MAX_BITS_PER_CODE) - 1
        self.decoding_table = [bytes([i]) for i in range(self.CLEAR_TABLE_MARKER)] + [
            b""
        ] * (self.max_code_value - self.CLEAR_TABLE_MARKER + 1)
        self._table_index = self.EOD_MARKER + 1
        self._bits_to_get = 9

    def _next_code_decode(self, data: bytes) -> int:
        self._next_data: int
        try:
            while self._next_bits < self._bits_to_get:
                self._next_data = (self._next_data << 8) | (
                    data[self._byte_pointer]
                )
                self._byte_pointer += 1
                self._next_bits += 8

            code = (
                self._next_data >> (self._next_bits - self._bits_to_get)
            ) & self._and_table[self._bits_to_get - 9]
            self._next_bits -= self._bits_to_get

            # Reduce data to get rid of the overhead,
            # which increases performance on large streams significantly.
            self._next_data = self._next_data & 0xFFFFF

            return code
        except IndexError:
            return self.EOD_MARKER

    # The following method has been converted to Python from PDFsharp:
    # https://github.com/empira/PDFsharp/blob/5fbf6ed14740bc4e16786816882d32e43af3ff5d/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
    #
    # Original license:
    #
    # -------------------------------------------------------------------------
    # Copyright (c) 2001-2024 empira Software GmbH, Troisdorf (Cologne Area),
    # Germany
    #
    # http://docs.pdfsharp.net
    #
    # MIT License
    #
    # Permission is hereby granted, free of charge, to any person obtaining a
    # copy of this software and associated documentation files (the "Software"),
    # to deal in the Software without restriction, including without limitation
    # the rights to use, copy, modify, merge, publish, distribute, sublicense,
    # and/or sell copies of the Software, and to permit persons to whom the
    # Software is furnished to do so, subject to the following conditions:
    #
    # The above copyright notice and this permission notice shall be included
    # in all copies or substantial portions of the Software.
    #
    # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    # DEALINGS IN THE SOFTWARE.
    # --------------------------------------------------------------------------
    def decode(self, data: bytes) -> bytes:
        """
        The following code was converted to Python from the following code:
        https://github.com/empira/PDFsharp/blob/master/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Filters/LzwDecode.cs
        """
        self._and_table = [511, 1023, 2047, 4095]
        self._table_index = 0
        self._bits_to_get = 9
        self._byte_pointer = 0
        self._next_data = 0
        self._next_bits = 0

        output_stream = io.BytesIO()
        output_length = 0

        self._initialize_decoding_table()
        self._byte_pointer = 0
        self._next_data = 0
        self._next_bits = 0
        old_code = self.CLEAR_TABLE_MARKER

        while True:
            code = self._next_code_decode(data)
            if code == self.EOD_MARKER:
                break

            if code == self.CLEAR_TABLE_MARKER:
                self._initialize_decoding_table()
                code = self._next_code_decode(data)
                if code == self.EOD_MARKER:
                    break
                output_stream.write(decoded := self.decoding_table[code])
                old_code = code
            elif code < self._table_index:
                decoded = self.decoding_table[code]
                output_stream.write(decoded)
                if old_code != self.CLEAR_TABLE_MARKER:
                    self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                old_code = code
            else:
                # The code is not in the table and not one of the special codes
                decoded = (
                    self.decoding_table[old_code] + self.decoding_table[old_code][:1]
                )
                output_stream.write(decoded)
                self._add_entry_decode(self.decoding_table[old_code], decoded[0])
                old_code = code

            output_length += len(decoded)
            if output_length > self.max_output_length:
                raise LimitReachedError(
                    f"Limit reached while decompressing: {output_length} > {self.max_output_length}"
                )

        return output_stream.getvalue()

    def _add_entry_decode(self, old_string: bytes, new_char: int) -> None:
        new_string = old_string + bytes([new_char])
        if self._table_index > self.max_code_value:
            logger_warning("Ignoring too large LZW table index.", __name__)
            return
        self.decoding_table[self._table_index] = new_string
        self._table_index += 1

        # Update the number of bits to get based on the table index
        if self._table_index == 511:
            self._bits_to_get = 10
        elif self._table_index == 1023:
            self._bits_to_get = 11
        elif self._table_index == 2047:
            self._bits_to_get = 12


================================================
FILE: pypdf/_codecs/adobe_glyphs.py
================================================
# https://raw.githubusercontent.com/adobe-type-tools/agl-aglfn/master/glyphlist.txt

# converted manually to python
# Extended with data from GlyphNameFormatter:
#    https://github.com/LettError/glyphNameFormatter

# -----------------------------------------------------------
# Copyright 2002-2019 Adobe (http://www.adobe.com/).
#
# Redistribution and use in source and binary forms, with or
# without modification, are permitted provided that the
# following conditions are met:
#
# Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer.
#
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# Neither the name of Adobe nor the names of its contributors
# may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------
# Name:          Adobe Glyph List
# Table version: 2.0
# Date:          September 20, 2002
# URL:           https://github.com/adobe-type-tools/agl-aglfn
#
# Format: two semicolon-delimited fields:
#   (1) glyph name--upper/lowercase letters and digits
#   (2) Unicode scalar value--four uppercase hexadecimal digits
#
adobe_glyphs = {
    "/A": "\u0041",
    "/AA": "\uA732",
    "/AE": "\u00C6",
    "/AEacute": "\u01FC",
    "/AEmacron": "\u01E2",
    "/AEsmall": "\uF7E6",
    "/AO": "\uA734",
    "/AU": "\uA736",
    "/AV": "\uA738",
    "/AVhorizontalbar": "\uA73A",
    "/AY": "\uA73C",
    "/Aacute": "\u00C1",
    "/Aacutesmall": "\uF7E1",
    "/Abreve": "\u0102",
    "/Abreveacute": "\u1EAE",
    "/Abrevecyr": "\u04D0",
    "/Abrevecyrillic": "\u04D0",
    "/Abrevedotbelow": "\u1EB6",
    "/Abrevegrave": "\u1EB0",
    "/Abrevehoi": "\u1EB2",
    "/Abrevehookabove": "\u1EB2",
    "/Abrevetilde": "\u1EB4",
    "/Acaron": "\u01CD",
    "/Acircle": "\u24B6",
    "/Acircleblack": "\u1F150",
    "/Acircumflex": "\u00C2",
    "/Acircumflexacute": "\u1EA4",
    "/Acircumflexdotbelow": "\u1EAC",
    "/Acircumflexgrave": "\u1EA6",
    "/Acircumflexhoi": "\u1EA8",
    "/Acircumflexhookabove": "\u1EA8",
    "/Acircumflexsmall": "\uF7E2",
    "/Acircumflextilde": "\u1EAA",
    "/Acute": "\uF6C9",
    "/Acutesmall": "\uF7B4",
    "/Acyr": "\u0410",
    "/Acyrillic": "\u0410",
    "/Adblgrave": "\u0200",
    "/Adieresis": "\u00C4",
    "/Adieresiscyr": "\u04D2",
    "/Adieresiscyrillic": "\u04D2",
    "/Adieresismacron": "\u01DE",
    "/Adieresissmall": "\uF7E4",
    "/Adot": "\u0226",
    "/Adotbelow": "\u1EA0",
    "/Adotmacron": "\u01E0",
    "/Agrave": "\u00C0",
    "/Agravedbl": "\u0200",
    "/Agravesmall": "\uF7E0",
    "/Ahoi": "\u1EA2",
    "/Ahookabove": "\u1EA2",
    "/Aiecyr": "\u04D4",
    "/Aiecyrillic": "\u04D4",
    "/Ainvertedbreve": "\u0202",
    "/Akbar": "\uFDF3",
    "/Alayhe": "\uFDF7",
    "/Allah": "\uFDF2",
    "/Alpha": "\u0391",
    "/Alphaacute": "\u1FBB",
    "/Alphaasper": "\u1F09",
    "/Alphaasperacute": "\u1F0D",
    "/Alphaasperacuteiotasub": "\u1F8D",
    "/Alphaaspergrave": "\u1F0B",
    "/Alphaaspergraveiotasub": "\u1F8B",
    "/Alphaasperiotasub": "\u1F89",
    "/Alphaaspertilde": "\u1F0F",
    "/Alphaaspertildeiotasub": "\u1F8F",
    "/Alphabreve": "\u1FB8",
    "/Alphagrave": "\u1FBA",
    "/Alphaiotasub": "\u1FBC",
    "/Alphalenis": "\u1F08",
    "/Alphalenisacute": "\u1F0C",
    "/Alphalenisacuteiotasub": "\u1F8C",
    "/Alphalenisgrave": "\u1F0A",
    "/Alphalenisgraveiotasub": "\u1F8A",
    "/Alphalenisiotasub": "\u1F88",
    "/Alphalenistilde": "\u1F0E",
    "/Alphalenistildeiotasub": "\u1F8E",
    "/Alphatonos": "\u0386",
    "/Alphawithmacron": "\u1FB9",
    "/Amacron": "\u0100",
    "/Amonospace": "\uFF21",
    "/Aogonek": "\u0104",
    "/Aparens": "\u1F110",
    "/Aring": "\u00C5",
    "/Aringacute": "\u01FA",
    "/Aringbelow": "\u1E00",
    "/Aringsmall": "\uF7E5",
    "/Asmall": "\uF761",
    "/Asquare": "\u1F130",
    "/Asquareblack": "\u1F170",
    "/Astroke": "\u023A",
    "/Atilde": "\u00C3",
    "/Atildesmall": "\uF7E3",
    "/Aturned": "\u2C6F",
    "/Ayahend": "\u06DD",
    "/Aybarmenian": "\u0531",
    "/B": "\u0042",
    "/Bcircle": "\u24B7",
    "/Bcircleblack": "\u1F151",
    "/Bdot": "\u1E02",
    "/Bdotaccent": "\u1E02",
    "/Bdotbelow": "\u1E04",
    "/Becyr": "\u0411",
    "/Becyrillic": "\u0411",
    "/Benarmenian": "\u0532",
    "/Beta": "\u0392",
    "/Bflourish": "\uA796",
    "/Bhook": "\u0181",
    "/BismillahArRahmanArRaheem": "\uFDFD",
    "/Blinebelow": "\u1E06",
    "/Bmonospace": "\uFF22",
    "/Bparens": "\u1F111",
    "/Brevesmall": "\uF6F4",
    "/Bscript": "\u212C",
    "/Bsmall": "\uF762",
    "/Bsquare": "\u1F131",
    "/Bsquareblack": "\u1F171",
    "/Bstroke": "\u0243",
    "/Btopbar": "\u0182",
    "/C": "\u0043",
    "/CDcircle": "\u1F12D",
    "/Caarmenian": "\u053E",
    "/Cacute": "\u0106",
    "/Caron": "\uF6CA",
    "/Caronsmall": "\uF6F5",
    "/Cbar": "\uA792",
    "/Ccaron": "\u010C",
    "/Ccedilla": "\u00C7",
    "/Ccedillaacute": "\u1E08",
    "/Ccedillasmall": "\uF7E7",
    "/Ccircle": "\u24B8",
    "/Ccircleblack": "\u1F152",
    "/Ccircumflex": "\u0108",
    "/Cdblstruck": "\u2102",
    "/Cdot": "\u010A",
    "/Cdotaccent": "\u010A",
    "/Cdotreversed": "\uA73E",
    "/Cedillasmall": "\uF7B8",
    "/Cfraktur": "\u212D",
    "/Chaarmenian": "\u0549",
    "/Cheabkhasiancyrillic": "\u04BC",
    "/Cheabkhcyr": "\u04BC",
    "/Cheabkhtailcyr": "\u04BE",
    "/Checyr": "\u0427",
    "/Checyrillic": "\u0427",
    "/Chedescenderabkhasiancyrillic": "\u04BE",
    "/Chedescendercyrillic": "\u04B6",
    "/Chedieresiscyr": "\u04F4",
    "/Chedieresiscyrillic": "\u04F4",
    "/Cheharmenian": "\u0543",
    "/Chekhakascyr": "\u04CB",
    "/Chekhakassiancyrillic": "\u04CB",
    "/Chetailcyr": "\u04B6",
    "/Chevertcyr": "\u04B8",
    "/Cheverticalstrokecyrillic": "\u04B8",
    "/Chi": "\u03A7",
    "/Chook": "\u0187",
    "/Circumflexsmall": "\uF6F6",
    "/Citaliccircle": "\u1F12B",
    "/Cmonospace": "\uFF23",
    "/Coarmenian": "\u0551",
    "/Con": "\uA76E",
    "/Cparens": "\u1F112",
    "/Csmall": "\uF763",
    "/Csquare": "\u1F132",
    "/Csquareblack": "\u1F172",
    "/Cstretched": "\u0297",
    "/Cstroke": "\u023B",
    "/Cuatrillo": "\uA72C",
    "/Cuatrillocomma": "\uA72E",
    "/D": "\u0044",
    "/DZ": "\u01F1",
    "/DZcaron": "\u01C4",
    "/Daarmenian": "\u0534",
    "/Dafrican": "\u0189",
    "/Dcaron": "\u010E",
    "/Dcedilla": "\u1E10",
    "/Dchecyr": "\u052C",
    "/Dcircle": "\u24B9",
    "/Dcircleblack": "\u1F153",
    "/Dcircumflexbelow": "\u1E12",
    "/Dcroat": "\u0110",
    "/Ddblstruckitalic": "\u2145",
    "/Ddot": "\u1E0A",
    "/Ddotaccent": "\u1E0A",
    "/Ddotbelow": "\u1E0C",
    "/Decyr": "\u0414",
    "/Decyrillic": "\u0414",
    "/Deicoptic": "\u03EE",
    "/Dekomicyr": "\u0500",
    "/Delta": "\u2206",
    "/Deltagreek": "\u0394",
    "/Dhook": "\u018A",
    "/Dieresis": "\uF6CB",
    "/DieresisAcute": "\uF6CC",
    "/DieresisGrave": "\uF6CD",
    "/Dieresissmall": "\uF7A8",
    "/Digamma": "\u03DC",
    "/Digammagreek": "\u03DC",
    "/Digammapamphylian": "\u0376",
    "/Dinsular": "\uA779",
    "/Djecyr": "\u0402",
    "/Djecyrillic": "\u0402",
    "/Djekomicyr": "\u0502",
    "/Dlinebelow": "\u1E0E",
    "/Dmonospace": "\uFF24",
    "/Dotaccentsmall": "\uF6F7",
    "/Dparens": "\u1F113",
    "/Dslash": "\u0110",
    "/Dsmall": "\uF764",
    "/Dsquare": "\u1F133",
    "/Dsquareblack": "\u1F173",
    "/Dtopbar": "\u018B",
    "/Dz": "\u01F2",
    "/Dzcaron": "\u01C5",
    "/Dzeabkhasiancyrillic": "\u04E0",
    "/Dzeabkhcyr": "\u04E0",
    "/Dzecyr": "\u0405",
    "/Dzecyrillic": "\u0405",
    "/Dzhecyr": "\u040F",
    "/Dzhecyrillic": "\u040F",
    "/Dzjekomicyr": "\u0506",
    "/Dzzhecyr": "\u052A",
    "/E": "\u0045",
    "/Eacute": "\u00C9",
    "/Eacutesmall": "\uF7E9",
    "/Ebreve": "\u0114",
    "/Ecaron": "\u011A",
    "/Ecedilla": "\u0228",
    "/Ecedillabreve": "\u1E1C",
    "/Echarmenian": "\u0535",
    "/Ecircle": "\u24BA",
    "/Ecircleblack": "\u1F154",
    "/Ecircumflex": "\u00CA",
    "/Ecircumflexacute": "\u1EBE",
    "/Ecircumflexbelow": "\u1E18",
    "/Ecircumflexdotbelow": "\u1EC6",
    "/Ecircumflexgrave": "\u1EC0",
    "/Ecircumflexhoi": "\u1EC2",
    "/Ecircumflexhookabove": "\u1EC2",
    "/Ecircumflexsmall": "\uF7EA",
    "/Ecircumflextilde": "\u1EC4",
    "/Ecyrillic": "\u0404",
    "/Edblgrave": "\u0204",
    "/Edieresis": "\u00CB",
    "/Edieresissmall": "\uF7EB",
    "/Edot": "\u0116",
    "/Edotaccent": "\u0116",
    "/Edotbelow": "\u1EB8",
    "/Efcyr": "\u0424",
    "/Efcyrillic": "\u0424",
    "/Egrave": "\u00C8",
    "/Egravedbl": "\u0204",
    "/Egravesmall": "\uF7E8",
    "/Egyptain": "\uA724",
    "/Egyptalef": "\uA722",
    "/Eharmenian": "\u0537",
    "/Ehoi": "\u1EBA",
    "/Ehookabove": "\u1EBA",
    "/Eightroman": "\u2167",
    "/Einvertedbreve": "\u0206",
    "/Eiotifiedcyr": "\u0464",
    "/Eiotifiedcyrillic": "\u0464",
    "/Elcyr": "\u041B",
    "/Elcyrillic": "\u041B",
    "/Elevenroman": "\u216A",
    "/Elhookcyr": "\u0512",
    "/Elmiddlehookcyr": "\u0520",
    "/Elsharptailcyr": "\u04C5",
    "/Eltailcyr": "\u052E",
    "/Emacron": "\u0112",
    "/Emacronacute": "\u1E16",
    "/Emacrongrave": "\u1E14",
    "/Emcyr": "\u041C",
    "/Emcyrillic": "\u041C",
    "/Emonospace": "\uFF25",
    "/Emsharptailcyr": "\u04CD",
    "/Encyr": "\u041D",
    "/Encyrillic": "\u041D",
    "/Endescendercyrillic": "\u04A2",
    "/Eng": "\u014A",
    "/Engecyr": "\u04A4",
    "/Enghecyrillic": "\u04A4",
    "/Enhookcyr": "\u04C7",
    "/Enhookcyrillic": "\u04C7",
    "/Enhookleftcyr": "\u0528",
    "/Enmiddlehookcyr": "\u0522",
    "/Ensharptailcyr": "\u04C9",
    "/Entailcyr": "\u04A2",
    "/Eogonek": "\u0118",
    "/Eopen": "\u0190",
    "/Eparens": "\u1F114",
    "/Epsilon": "\u0395",
    "/Epsilonacute": "\u1FC9",
    "/Epsilonasper": "\u1F19",
    "/Epsilonasperacute": "\u1F1D",
    "/Epsilonaspergrave": "\u1F1B",
    "/Epsilongrave": "\u1FC8",
    "/Epsilonlenis": "\u1F18",
    "/Epsilonlenisacute": "\u1F1C",
    "/Epsilonlenisgrave": "\u1F1A",
    "/Epsilontonos": "\u0388",
    "/Ercyr": "\u0420",
    "/Ercyrillic": "\u0420",
    "/Ereversed": "\u018E",
    "/Ereversedcyr": "\u042D",
    "/Ereversedcyrillic": "\u042D",
    "/Ereverseddieresiscyr": "\u04EC",
    "/Ereversedopen": "\uA7AB",
    "/Ertickcyr": "\u048E",
    "/Escript": "\u2130",
    "/Escyr": "\u0421",
    "/Escyrillic": "\u0421",
    "/Esdescendercyrillic": "\u04AA",
    "/Esh": "\u01A9",
    "/Esmall": "\uF765",
    "/Esmallturned": "\u2C7B",
    "/Esquare": "\u1F134",
    "/Esquareblack": "\u1F174",
    "/Estailcyr": "\u04AA",
    "/Estroke": "\u0246",
    "/Et": "\uA76A",
    "/Eta": "\u0397",
    "/Etaacute": "\u1FCB",
    "/Etaasper": "\u1F29",
    "/Etaasperacute": "\u1F2D",
    "/Etaasperacuteiotasub": "\u1F9D",
    "/Etaaspergrave": "\u1F2B",
    "/Etaaspergraveiotasub": "\u1F9B",
    "/Etaasperiotasub": "\u1F99",
    "/Etaaspertilde": "\u1F2F",
    "/Etaaspertildeiotasub": "\u1F9F",
    "/Etagrave": "\u1FCA",
    "/Etaiotasub": "\u1FCC",
    "/Etalenis": "\u1F28",
    "/Etalenisacute": "\u1F2C",
    "/Etalenisacuteiotasub": "\u1F9C",
    "/Etalenisgrave": "\u1F2A",
    "/Etalenisgraveiotasub": "\u1F9A",
    "/Etalenisiotasub": "\u1F98",
    "/Etalenistilde": "\u1F2E",
    "/Etalenistildeiotasub": "\u1F9E",
    "/Etarmenian": "\u0538",
    "/Etatonos": "\u0389",
    "/Eth": "\u00D0",
    "/Ethsmall": "\uF7F0",
    "/Etilde": "\u1EBC",
    "/Etildebelow": "\u1E1A",
    "/Eukrcyr": "\u0404",
    "/Euro": "\u20AC",
    "/Ezh": "\u01B7",
    "/Ezhcaron": "\u01EE",
    "/Ezhreversed": "\u01B8",
    "/F": "\u0046",
    "/Fcircle": "\u24BB",
    "/Fcircleblack": "\u1F155",
    "/Fdot": "\u1E1E",
    "/Fdotaccent": "\u1E1E",
    "/Feharmenian": "\u0556",
    "/Feicoptic": "\u03E4",
    "/Fhook": "\u0191",
    "/Finsular": "\uA77B",
    "/Fitacyr": "\u0472",
    "/Fitacyrillic": "\u0472",
    "/Fiveroman": "\u2164",
    "/Fmonospace": "\uFF26",
    "/Fourroman": "\u2163",
    "/Fparens": "\u1F115",
    "/Fscript": "\u2131",
    "/Fsmall": "\uF766",
    "/Fsquare": "\u1F135",
    "/Fsquareblack": "\u1F175",
    "/Fstroke": "\uA798",
    "/Fturned": "\u2132",
    "/G": "\u0047",
    "/GBsquare": "\u3387",
    "/Gacute": "\u01F4",
    "/Gamma": "\u0393",
    "/Gammaafrican": "\u0194",
    "/Gammadblstruck": "\u213E",
    "/Gangiacoptic": "\u03EA",
    "/Gbreve": "\u011E",
    "/Gcaron": "\u01E6",
    "/Gcedilla": "\u0122",
    "/Gcircle": "\u24BC",
    "/Gcircleblack": "\u1F156",
    "/Gcircumflex": "\u011C",
    "/Gcommaaccent": "\u0122",
    "/Gdot": "\u0120",
    "/Gdotaccent": "\u0120",
    "/Gecyr": "\u0413",
    "/Gecyrillic": "\u0413",
    "/Gehookcyr": "\u0494",
    "/Gehookstrokecyr": "\u04FA",
    "/Germandbls": "\u1E9E",
    "/Gestrokecyr": "\u0492",
    "/Getailcyr": "\u04F6",
    "/Geupcyr": "\u0490",
    "/Ghadarmenian": "\u0542",
    "/Ghemiddlehookcyrillic": "\u0494",
    "/Ghestrokecyrillic": "\u0492",
    "/Gheupturncyrillic": "\u0490",
    "/Ghook": "\u0193",
    "/Ghooksmall": "\u029B",
    "/Gimarmenian": "\u0533",
    "/Ginsular": "\uA77D",
    "/Ginsularturned": "\uA77E",
    "/Gjecyr": "\u0403",
    "/Gjecyrillic": "\u0403",
    "/Glottalstop": "\u0241",
    "/Gmacron": "\u1E20",
    "/Gmonospace": "\uFF27",
    "/Gobliquestroke": "\uA7A0",
    "/Gparens": "\u1F116",
    "/Grave": "\uF6CE",
    "/Gravesmall": "\uF760",
    "/Gsmall": "\uF767",
    "/Gsmallhook": "\u029B",
    "/Gsquare": "\u1F136",
    "/Gsquareblack": "\u1F176",
    "/Gstroke": "\u01E4",
    "/Gturnedsans": "\u2141",
    "/H": "\u0048",
    "/H18533": "\u25CF",
    "/H18543": "\u25AA",
    "/H18551": "\u25AB",
    "/H22073": "\u25A1",
    "/HPsquare": "\u33CB",
    "/HVsquare": "\u1F14A",
    "/Haabkhasiancyrillic": "\u04A8",
    "/Haabkhcyr": "\u04A8",
    "/Hacyr": "\u0425",
    "/Hadescendercyrillic": "\u04B2",
    "/Hahookcyr": "\u04FC",
    "/Hardcyr": "\u042A",
    "/Hardsigncyrillic": "\u042A",
    "/Hastrokecyr": "\u04FE",
    "/Hbar": "\u0126",
    "/Hbrevebelow": "\u1E2A",
    "/Hcaron": "\u021E",
    "/Hcedilla": "\u1E28",
    "/Hcircle": "\u24BD",
    "/Hcircleblack": "\u1F157",
    "/Hcircumflex": "\u0124",
    "/Hdblstruck": "\u210D",
    "/Hdescender": "\u2C67",
    "/Hdieresis": "\u1E26",
    "/Hdot": "\u1E22",
    "/Hdotaccent": "\u1E22",
    "/Hdotbelow": "\u1E24",
    "/Heng": "\uA726",
    "/Heta": "\u0370",
    "/Hfraktur": "\u210C",
    "/Hgfullwidth": "\u32CC",
    "/Hhalf": "\u2C75",
    "/Hhook": "\uA7AA",
    "/Hmonospace": "\uFF28",
    "/Hoarmenian": "\u0540",
    "/HonAA": "\u0611",
    "/HonRA": "\u0612",
    "/HonSAW": "\u0610",
    "/Horicoptic": "\u03E8",
    "/Hparens": "\u1F117",
    "/Hscript": "\u210B",
    "/Hsmall": "\uF768",
    "/Hsquare": "\u1F137",
    "/Hsquareblack": "\u1F177",
    "/Hstrokemod": "\uA7F8",
    "/Hturned": "\uA78D",
    "/Hungarumlaut": "\uF6CF",
    "/Hungarumlautsmall": "\uF6F8",
    "/Hwair": "\u01F6",
    "/Hzsquare": "\u3390",
    "/I": "\u0049",
    "/IAcyrillic": "\u042F",
    "/ICsquareblack": "\u1F18B",
    "/IJ": "\u0132",
    "/IUcyrillic": "\u042E",
    "/Iacute": "\u00CD",
    "/Iacutesmall": "\uF7ED",
    "/Ibreve": "\u012C",
    "/Icaron": "\u01CF",
    "/Icircle": "\u24BE",
    "/Icircleblack": "\u1F158",
    "/Icircumflex": "\u00CE",
    "/Icircumflexsmall": "\uF7EE",
    "/Icyr": "\u0418",
    "/Icyrillic": "\u0406",
    "/Idblgrave": "\u0208",
    "/Idieresis": "\u00CF",
    "/Idieresisacute": "\u1E2E",
    "/Idieresiscyr": "\u04E4",
    "/Idieresiscyrillic": "\u04E4",
    "/Idieresissmall": "\uF7EF",
    "/Idot": "\u0130",
    "/Idotaccent": "\u0130",
    "/Idotbelow": "\u1ECA",
    "/Iebrevecyr": "\u04D6",
    "/Iebrevecyrillic": "\u04D6",
    "/Iecyr": "\u0415",
    "/Iecyrillic": "\u0415",
    "/Iegravecyr": "\u0400",
    "/Ifraktur": "\u2111",
    "/Igrave": "\u00CC",
    "/Igravecyr": "\u040D",
    "/Igravedbl": "\u0208",
    "/Igravesmall": "\uF7EC",
    "/Ihoi": "\u1EC8",
    "/Ihookabove": "\u1EC8",
    "/Iicyrillic": "\u0418",
    "/Iinvertedbreve": "\u020A",
    "/Iishortcyrillic": "\u0419",
    "/Imacron": "\u012A",
    "/Imacroncyr": "\u04E2",
    "/Imacroncyrillic": "\u04E2",
    "/Imonospace": "\uFF29",
    "/Iniarmenian": "\u053B",
    "/Iocyr": "\u0401",
    "/Iocyrillic": "\u0401",
    "/Iogonek": "\u012E",
    "/Iota": "\u0399",
    "/Iotaacute": "\u1FDB",
    "/Iotaafrican": "\u0196",
    "/Iotaasper": "\u1F39",
    "/Iotaasperacute": "\u1F3D",
    "/Iotaaspergrave": "\u1F3B",
    "/Iotaaspertilde": "\u1F3F",
    "/Iotabreve": "\u1FD8",
    "/Iotadieresis": "\u03AA",
    "/Iotagrave": "\u1FDA",
    "/Iotalenis": "\u1F38",
    "/Iotalenisacute": "\u1F3C",
    "/Iotalenisgrave": "\u1F3A",
    "/Iotalenistilde": "\u1F3E",
    "/Iotatonos": "\u038A",
    "/Iotawithmacron": "\u1FD9",
    "/Iparens": "\u1F118",
    "/Is": "\uA76C",
    "/Iscript": "\u2110",
    "/Ishortcyr": "\u0419",
    "/Ishortsharptailcyr": "\u048A",
    "/Ismall": "\uF769",
    "/Isquare": "\u1F138",
    "/Isquareblack": "\u1F178",
    "/Istroke": "\u0197",
    "/Itilde": "\u0128",
    "/Itildebelow": "\u1E2C",
    "/Iukrcyr": "\u0406",
    "/Izhitsacyr": "\u0474",
    "/Izhitsacyrillic": "\u0474",
    "/Izhitsadblgravecyrillic": "\u0476",
    "/Izhitsagravedblcyr": "\u0476",
    "/J": "\u004A",
    "/Jaarmenian": "\u0541",
    "/Jallajalalouhou": "\uFDFB",
    "/Jcircle": "\u24BF",
    "/Jcircleblack": "\u1F159",
    "/Jcircumflex": "\u0134",
    "/Jcrossed-tail": "\uA7B2",
    "/Jecyr": "\u0408",
    "/Jecyrillic": "\u0408",
    "/Jheharmenian": "\u054B",
    "/Jmonospace": "\uFF2A",
    "/Jparens": "\u1F119",
    "/Jsmall": "\uF76A",
    "/Jsquare": "\u1F139",
    "/Jsquareblack": "\u1F179",
    "/Jstroke": "\u0248",
    "/K": "\u004B",
    "/KBsquare": "\u3385",
    "/KKsquare": "\u33CD",
    "/KORONIS": "\u1FBD",
    "/Kaaleutcyr": "\u051E",
    "/Kabashkcyr": "\u04A0",
    "/Kabashkircyrillic": "\u04A0",
    "/Kacute": "\u1E30",
    "/Kacyr": "\u041A",
    "/Kacyrillic": "\u041A",
    "/Kadescendercyrillic": "\u049A",
    "/Kahookcyr": "\u04C3",
    "/Kahookcyrillic": "\u04C3",
    "/Kaisymbol": "\u03CF",
    "/Kappa": "\u039A",
    "/Kastrokecyr": "\u049E",
    "/Kastrokecyrillic": "\u049E",
    "/Katailcyr": "\u049A",
    "/Kaverticalstrokecyr": "\u049C",
    "/Kaverticalstrokecyrillic": "\u049C",
    "/Kcaron": "\u01E8",
    "/Kcedilla": "\u0136",
    "/Kcircle": "\u24C0",
    "/Kcircleblack": "\u1F15A",
    "/Kcommaaccent": "\u0136",
    "/Kdescender": "\u2C69",
    "/Kdiagonalstroke": "\uA742",
    "/Kdotbelow": "\u1E32",
    "/Keharmenian": "\u0554",
    "/Kenarmenian": "\u053F",
    "/Khacyrillic": "\u0425",
    "/Kheicoptic": "\u03E6",
    "/Khook": "\u0198",
    "/Kjecyr": "\u040C",
    "/Kjecyrillic": "\u040C",
    "/Klinebelow": "\u1E34",
    "/Kmonospace": "\uFF2B",
    "/Kobliquestroke": "\uA7A2",
    "/Koppa": "\u03DE",
    "/Koppaarchaic": "\u03D8",
    "/Koppacyr": "\u0480",
    "/Koppacyrillic": "\u0480",
    "/Koppagreek": "\u03DE",
    "/Kparens": "\u1F11A",
    "/Ksicyr": "\u046E",
    "/Ksicyrillic": "\u046E",
    "/Ksmall": "\uF76B",
    "/Ksquare": "\u1F13A",
    "/Ksquareblack": "\u1F17A",
    "/Kstroke": "\uA740",
    "/Kstrokediagonalstroke": "\uA744",
    "/Kturned": "\uA7B0",
    "/L": "\u004C",
    "/LJ": "\u01C7",
    "/LL": "\uF6BF",
    "/LLwelsh": "\u1EFA",
    "/LTDfullwidth": "\u32CF",
    "/Lacute": "\u0139",
    "/Lambda": "\u039B",
    "/Lbar": "\u023D",
    "/Lbelt": "\uA7AD",
    "/Lbroken": "\uA746",
    "/Lcaron": "\u013D",
    "/Lcedilla": "\u013B",
    "/Lcircle": "\u24C1",
    "/Lcircleblack": "\u1F15B",
    "/Lcircumflexbelow": "\u1E3C",
    "/Lcommaaccent": "\u013B",
    "/Ldblbar": "\u2C60",
    "/Ldot": "\u013F",
    "/Ldotaccent": "\u013F",
    "/Ldotbelow": "\u1E36",
    "/Ldotbelowmacron": "\u1E38",
    "/Lhacyr": "\u0514",
    "/Liwnarmenian": "\u053C",
    "/Lj": "\u01C8",
    "/Ljecyr": "\u0409",
    "/Ljecyrillic": "\u0409",
    "/Ljekomicyr": "\u0508",
    "/Llinebelow": "\u1E3A",
    "/Lmacrondot": "\u1E38",
    "/Lmiddletilde": "\u2C62",
    "/Lmonospace": "\uFF2C",
    "/Lparens": "\u1F11B",
    "/Lreversedsans": "\u2143",
    "/Lscript": "\u2112",
    "/Lslash": "\u0141",
    "/Lslashsmall": "\uF6F9",
    "/Lsmall": "\uF76C",
    "/Lsquare": "\u1F13B",
    "/Lsquareblack": "\u1F17B",
    "/Lstroke": "\uA748",
    "/Lturned": "\uA780",
    "/Lturnedsans": "\u2142",
    "/M": "\u004D",
    "/MBsquare": "\u3386",
    "/MVsquare": "\u1F14B",
    "/Macron": "\uF6D0",
    "/Macronsmall": "\uF7AF",
    "/Macute": "\u1E3E",
    "/Mcircle": "\u24C2",
    "/Mcircleblack": "\u1F15C",
    "/Mdot": "\u1E40",
    "/Mdotaccent": "\u1E40",
    "/Mdotbelow": "\u1E42",
    "/Menarmenian": "\u0544",
    "/Mhook": "\u2C6E",
    "/Mmonospace": "\uFF2D",
    "/Mohammad": "\uFDF4",
    "/Mparens": "\u1F11C",
    "/Mscript": "\u2133",
    "/Msmall": "\uF76D",
    "/Msquare": "\u1F13C",
    "/Msquareblack": "\u1F17C",
    "/Mturned": "\u019C",
    "/Mturnedsmall": "\uA7FA",
    "/Mu": "\u039C",
    "/N": "\u004E",
    "/NJ": "\u01CA",
    "/Nacute": "\u0143",
    "/Ncaron": "\u0147",
    "/Ncedilla": "\u0145",
    "/Ncircle": "\u24C3",
    "/Ncircleblack": "\u1F15D",
    "/Ncircumflexbelow": "\u1E4A",
    "/Ncommaaccent": "\u0145",
    "/Ndblstruck": "\u2115",
    "/Ndescender": "\uA790",
    "/Ndot": "\u1E44",
    "/Ndotaccent": "\u1E44",
    "/Ndotbelow": "\u1E46",
    "/Ngrave": "\u01F8",
    "/Nhookleft": "\u019D",
    "/Nineroman": "\u2168",
    "/Nj": "\u01CB",
    "/Njecyr": "\u040A",
    "/Njecyrillic": "\u040A",
    "/Njekomicyr": "\u050A",
    "/Nlinebelow": "\u1E48",
    "/Nlongrightleg": "\u0220",
    "/Nmonospace": "\uFF2E",
    "/Nobliquestroke": "\uA7A4",
    "/Nowarmenian": "\u0546",
    "/Nparens": "\u1F11D",
    "/Nsmall": "\uF76E",
    "/Nsquare": "\u1F13D",
    "/Nsquareblack": "\u1F17D",
    "/Ntilde": "\u00D1",
    "/Ntildesmall": "\uF7F1",
    "/Nu": "\u039D",
    "/O": "\u004F",
    "/OE": "\u0152",
    "/OEsmall": "\uF6FA",
    "/OO": "\uA74E",
    "/Oacute": "\u00D3",
    "/Oacutesmall": "\uF7F3",
    "/Obar": "\u019F",
    "/Obarcyr": "\u04E8",
    "/Obardieresiscyr": "\u04EA",
    "/Obarredcyrillic": "\u04E8",
    "/Obarreddieresiscyrillic": "\u04EA",
    "/Obreve": "\u014E",
    "/Ocaron": "\u01D1",
    "/Ocenteredtilde": "\u019F",
    "/Ocircle": "\u24C4",
    "/Ocircleblack": "\u1F15E",
    "/Ocircumflex": "\u00D4",
    "/Ocircumflexacute": "\u1ED0",
    "/Ocircumflexdotbelow": "\u1ED8",
    "/Ocircumflexgrave": "\u1ED2",
    "/Ocircumflexhoi": "\u1ED4",
    "/Ocircumflexhookabove": "\u1ED4",
    "/Ocircumflexsmall": "\uF7F4",
    "/Ocircumflextilde": "\u1ED6",
    "/Ocyr": "\u041E",
    "/Ocyrillic": "\u041E",
    "/Odblacute": "\u0150",
    "/Odblgrave": "\u020C",
    "/Odieresis": "\u00D6",
    "/Odieresiscyr": "\u04E6",
    "/Odieresiscyrillic": "\u04E6",
    "/Odieresismacron": "\u022A",
    "/Odieresissmall": "\uF7F6",
    "/Odot": "\u022E",
    "/Odotbelow": "\u1ECC",
    "/Odotmacron": "\u0230",
    "/Ogoneksmall": "\uF6FB",
    "/Ograve": "\u00D2",
    "/Ogravedbl": "\u020C",
    "/Ogravesmall": "\uF7F2",
    "/Oharmenian": "\u0555",
    "/Ohm": "\u2126",
    "/Ohoi": "\u1ECE",
    "/Ohookabove": "\u1ECE",
    "/Ohorn": "\u01A0",
    "/Ohornacute": "\u1EDA",
    "/Ohorndotbelow": "\u1EE2",
    "/Ohorngrave": "\u1EDC",
    "/Ohornhoi": "\u1EDE",
    "/Ohornhookabove": "\u1EDE",
    "/Ohorntilde": "\u1EE0",
    "/Ohungarumlaut": "\u0150",
    "/Oi": "\u01A2",
    "/Oinvertedbreve": "\u020E",
    "/Oloop": "\uA74C",
    "/Omacron": "\u014C",
    "/Omacronacute": "\u1E52",
    "/Omacrongrave": "\u1E50",
    "/Omega": "\u2126",
    "/Omegaacute": "\u1FFB",
    "/Omegaasper": "\u1F69",
    "/Omegaasperacute": "\u1F6D",
    "/Omegaasperacuteiotasub": "\u1FAD",
    "/Omegaaspergrave": "\u1F6B",
    "/Omegaaspergraveiotasub": "\u1FAB",
    "/Omegaasperiotasub": "\u1FA9",
    "/Omegaaspertilde": "\u1F6F",
    "/Omegaaspertildeiotasub": "\u1FAF",
    "/Omegacyr": "\u0460",
    "/Omegacyrillic": "\u0460",
    "/Omegagrave": "\u1FFA",
    "/Omegagreek": "\u03A9",
    "/Omegaiotasub": "\u1FFC",
    "/Omegalenis": "\u1F68",
    "/Omegalenisacute": "\u1F6C",
    "/Omegalenisacuteiotasub": "\u1FAC",
    "/Omegalenisgrave": "\u1F6A",
    "/Omegalenisgraveiotasub": "\u1FAA",
    "/Omegalenisiotasub": "\u1FA8",
    "/Omegalenistilde": "\u1F6E",
    "/Omegalenistildeiotasub": "\u1FAE",
    "/Omegaroundcyr": "\u047A",
    "/Omegaroundcyrillic": "\u047A",
    "/Omegatitlocyr": "\u047C",
    "/Omegatitlocyrillic": "\u047C",
    "/Omegatonos": "\u038F",
    "/Omicron": "\u039F",
    "/Omicronacute": "\u1FF9",
    "/Omicronasper": "\u1F49",
    "/Omicronasperacute": "\u1F4D",
    "/Omicronaspergrave": "\u1F4B",
    "/Omicrongrave": "\u1FF8",
    "/Omicronlenis": "\u1F48",
    "/Omicronlenisacute": "\u1F4C",
    "/Omicronlenisgrave": "\u1F4A",
    "/Omicrontonos": "\u038C",
    "/Omonospace": "\uFF2F",
    "/Oneroman": "\u2160",
    "/Oogonek": "\u01EA",
    "/Oogonekmacron": "\u01EC",
    "/Oopen": "\u0186",
    "/Oparens": "\u1F11E",
    "/Oslash": "\u00D8",
    "/Oslashacute": "\u01FE",
    "/Oslashsmall": "\uF7F8",
    "/Osmall": "\uF76F",
    "/Osquare": "\u1F13E",
    "/Osquareblack": "\u1F17E",
    "/Ostroke": "\uA74A",
    "/Ostrokeacute": "\u01FE",
    "/Otcyr": "\u047E",
    "/Otcyrillic": "\u047E",
    "/Otilde": "\u00D5",
    "/Otildeacute": "\u1E4C",
    "/Otildedieresis": "\u1E4E",
    "/Otildemacron": "\u022C",
    "/Otildesmall": "\uF7F5",
    "/Ou": "\u0222",
    "/P": "\u0050",
    "/PAsquareblack": "\u1F18C",
    "/PPVsquare": "\u1F14E",
    "/Pacute": "\u1E54",
    "/Palochkacyr": "\u04C0",
    "/Pcircle": "\u24C5",
    "/Pcircleblack": "\u1F15F",
    "/Pcrosssquareblack": "\u1F18A",
    "/Pdblstruck": "\u2119",
    "/Pdot": "\u1E56",
    "/Pdotaccent": "\u1E56",
    "/Pecyr": "\u041F",
    "/Pecyrillic": "\u041F",
    "/Peharmenian": "\u054A",
    "/Pehookcyr": "\u04A6",
    "/Pemiddlehookcyrillic": "\u04A6",
    "/Petailcyr": "\u0524",
    "/Pflourish": "\uA752",
    "/Phi": "\u03A6",
    "/Phook": "\u01A4",
    "/Pi": "\u03A0",
    "/Pidblstruck": "\u213F",
    "/Piwrarmenian": "\u0553",
    "/Pmonospace": "\uFF30",
    "/Pparens": "\u1F11F",
    "/Psi": "\u03A8",
    "/Psicyr": "\u0470",
    "/Psicyrillic": "\u0470",
    "/Psmall": "\uF770",
    "/Psquare": "\u1F13F",
    "/Psquareblack": "\u1F17F",
    "/Pstroke": "\u2C63",
    "/Pstrokedescender": "\uA750",
    "/Ptail": "\uA754",
    "/Q": "\u0051",
    "/Qacyr": "\u051A",
    "/QalaUsedAsKoranicStopSign": "\uFDF1",
    "/Qcircle": "\u24C6",
    "/Qcircleblack": "\u1F160",
    "/Qdblstruck": "\u211A",
    "/Qdiagonalstroke": "\uA758",
    "/Qmonospace": "\uFF31",
    "/Qparens": "\u1F120",
    "/Qrotated": "\u213A",
    "/Qsmall": "\uF771",
    "/Qsmallhooktail": "\u024A",
    "/Qsquare": "\u1F140",
    "/Qsquareblack": "\u1F180",
    "/Qstrokedescender": "\uA756",
    "/R": "\u0052",
    "/Raarmenian": "\u054C",
    "/Racute": "\u0154",
    "/Rasoul": "\uFDF6",
    "/Rcaron": "\u0158",
    "/Rcedilla": "\u0156",
    "/Rcircle": "\u24C7",
    "/Rcircleblack": "\u1F161",
    "/Rcommaaccent": "\u0156",
    "/Rdblgrave": "\u0210",
    "/Rdblstruck": "\u211D",
    "/Rdot": "\u1E58",
    "/Rdotaccent": "\u1E58",
    "/Rdotbelow": "\u1E5A",
    "/Rdotbelowmacron": "\u1E5C",
    "/Reharmenian": "\u0550",
    "/Reverseddottedsigmalunatesymbol": "\u03FF",
    "/Reversedzecyr": "\u0510",
    "/Rfraktur": "\u211C",
    "/Rgravedbl": "\u0210",
    "/Rhacyr": "\u0516",
    "/Rho": "\u03A1",
    "/Rhoasper": "\u1FEC",
    "/Ringsmall": "\uF6FC",
    "/Rinsular": "\uA782",
    "/Rinvertedbreve": "\u0212",
    "/Rinvertedsmall": "\u0281",
    "/Ritaliccircle": "\u1F12C",
    "/Rlinebelow": "\u1E5E",
    "/Rmacrondot": "\u1E5C",
    "/Rmonospace": "\uFF32",
    "/Robliquestroke": "\uA7A6",
    "/Rparens": "\u1F121",
    "/Rrotunda": "\uA75A",
    "/Rscript": "\u211B",
    "/Rsmall": "\uF772",
    "/Rsmallinverted": "\u0281",
    "/Rsmallinvertedsuperior": "\u02B6",
    "/Rsquare": "\u1F141",
    "/Rsquareblack": "\u1F181",
    "/Rstroke": "\u024C",
    "/Rsupinvertedmod": "\u02B6",
    "/Rtail": "\u2C64",
    "/RubElHizbstart": "\u06DE",
    "/Rumrotunda": "\uA75C",
    "/Rumsmall": "\uA776",
    "/S": "\u0053",
    "/SAsquareblack": "\u1F18D",
    "/SDsquare": "\u1F14C",
    "/SF010000": "\u250C",
    "/SF020000": "\u2514",
    "/SF030000": "\u2510",
    "/SF040000": "\u2518",
    "/SF050000": "\u253C",
    "/SF060000": "\u252C",
    "/SF070000": "\u2534",
    "/SF080000": "\u251C",
    "/SF090000": "\u2524",
    "/SF100000": "\u2500",
    "/SF110000": "\u2502",
    "/SF190000": "\u2561",
    "/SF200000": "\u2562",
    "/SF210000": "\u2556",
    "/SF220000": "\u2555",
    "/SF230000": "\u2563",
    "/SF240000": "\u2551",
    "/SF250000": "\u2557",
    "/SF260000": "\u255D",
    "/SF270000": "\u255C",
    "/SF280000": "\u255B",
    "/SF360000": "\u255E",
    "/SF370000": "\u255F",
    "/SF380000": "\u255A",
    "/SF390000": "\u2554",
    "/SF400000": "\u2569",
    "/SF410000": "\u2566",
    "/SF420000": "\u2560",
    "/SF430000": "\u2550",
    "/SF440000": "\u256C",
    "/SF450000": "\u2567",
    "/SF460000": "\u2568",
    "/SF470000": "\u2564",
    "/SF480000": "\u2565",
    "/SF490000": "\u2559",
    "/SF500000": "\u2558",
    "/SF510000": "\u2552",
    "/SF520000": "\u2553",
    "/SF530000": "\u256B",
    "/SF540000": "\u256A",
    "/SSsquare": "\u1F14D",
    "/Sacute": "\u015A",
    "/Sacutedotaccent": "\u1E64",
    "/Safha": "\u0603",
    "/Sajdah": "\u06E9",
    "/Salam": "\uFDF5",
    "/Salla": "\uFDF9",
    "/SallaUsedAsKoranicStopSign": "\uFDF0",
    "/SallallahouAlayheWasallam": "\uFDFA",
    "/Saltillo": "\uA78B",
    "/Sampi": "\u03E0",
    "/Sampiarchaic": "\u0372",
    "/Sampigreek": "\u03E0",
    "/San": "\u03FA",
    "/Sanah": "\u0601",
    "/Scaron": "\u0160",
    "/Scarondot": "\u1E66",
    "/Scarondotaccent": "\u1E66",
    "/Scaronsmall": "\uF6FD",
    "/Scedilla": "\u015E",
    "/Schwa": "\u018F",
    "/Schwacyr": "\u04D8",
    "/Schwacyrillic": "\u04D8",
    "/Schwadieresiscyr": "\u04DA",
    "/Schwadieresiscyrillic": "\u04DA",
    "/Scircle": "\u24C8",
    "/Scircleblack": "\u1F162",
    "/Scircumflex": "\u015C",
    "/Scommaaccent": "\u0218",
    "/Scriptg": "\uA7AC",
    "/Sdot": "\u1E60",
    "/Sdotaccent": "\u1E60",
    "/Sdotbelow": "\u1E62",
    "/Sdotbelowdotabove": "\u1E68",
    "/Sdotbelowdotaccent": "\u1E68",
    "/Seharmenian": "\u054D",
    "/Semisoftcyr": "\u048C",
    "/Sevenroman": "\u2166",
    "/Shaarmenian": "\u0547",
    "/Shacyr": "\u0428",
    "/Shacyrillic": "\u0428",
    "/Shchacyr": "\u0429",
    "/Shchacyrillic": "\u0429",
    "/Sheicoptic": "\u03E2",
    "/SheneGerishin:hb": "\u059E",
    "/Shhacyr": "\u04BA",
    "/Shhacyrillic": "\u04BA",
    "/Shhatailcyr": "\u0526",
    "/Shimacoptic": "\u03EC",
    "/Sho": "\u03F7",
    "/Sigma": "\u03A3",
    "/Sigmalunatesymbol": "\u03F9",
    "/Sigmalunatesymboldotted": "\u03FE",
    "/Sigmareversedlunatesymbol": "\u03FD",
    "/Sinsular": "\uA784",
    "/Sixroman": "\u2165",
    "/Sjekomicyr": "\u050C",
    "/Smonospace": "\uFF33",
    "/Sobliquestroke": "\uA7A8",
    "/Softcyr": "\u042C",
    "/Softsigncyrillic": "\u042C",
    "/Sparens": "\u1F122",
    "/Sshell": "\u1F12A",
    "/Ssmall": "\uF773",
    "/Ssquare": "\u1F142",
    "/Ssquareblack": "\u1F182",
    "/Sswashtail": "\u2C7E",
    "/Stigma": "\u03DA",
    "/Stigmagreek": "\u03DA",
    "/T": "\u0054",
    "/Tau": "\u03A4",
    "/Tbar": "\u0166",
    "/Tcaron": "\u0164",
    "/Tcedilla": "\u0162",
    "/Tcircle": "\u24C9",
    "/Tcircleblack": "\u1F163",
    "/Tcircumflexbelow": "\u1E70",
    "/Tcommaaccent": "\u0162",
    "/Tdot": "\u1E6A",
    "/Tdotaccent": "\u1E6A",
    "/Tdotbelow": "\u1E6C",
    "/Tecyr": "\u0422",
    "/Tecyrillic": "\u0422",
    "/Tedescendercyrillic": "\u04AC",
    "/Tenroman": "\u2169",
    "/Tetailcyr": "\u04AC",
    "/Tetsecyr": "\u04B4",
    "/Tetsecyrillic": "\u04B4",
    "/Theta": "\u0398",
    "/Thetasymbol": "\u03F4",
    "/Thook": "\u01AC",
    "/Thorn": "\u00DE",
    "/Thornsmall": "\uF7FE",
    "/Thornstroke": "\uA764",
    "/Thornstrokedescender": "\uA766",
    "/Threeroman": "\u2162",
    "/Tildesmall": "\uF6FE",
    "/Tinsular": "\uA786",
    "/Tiwnarmenian": "\u054F",
    "/Tjekomicyr": "\u050E",
    "/Tlinebelow": "\u1E6E",
    "/Tmonospace": "\uFF34",
    "/Toarmenian": "\u0539",
    "/Tonefive": "\u01BC",
    "/Tonesix": "\u0184",
    "/Tonetwo": "\u01A7",
    "/Tparens": "\u1F123",
    "/Tresillo": "\uA72A",
    "/Tretroflexhook": "\u01AE",
    "/Tsecyr": "\u0426",
    "/Tsecyrillic": "\u0426",
    "/Tshecyr": "\u040B",
    "/Tshecyrillic": "\u040B",
    "/Tsmall": "\uF774",
    "/Tsquare": "\u1F143",
    "/Tsquareblack": "\u1F183",
    "/Tturned": "\uA7B1",
    "/Twelveroman": "\u216B",
    "/Twithdiagonalstroke": "\u023E",
    "/Tworoman": "\u2161",
    "/Tz": "\uA728",
    "/U": "\u0055",
    "/Uacute": "\u00DA",
    "/Uacutedblcyr": "\u04F2",
    "/Uacutesmall": "\uF7FA",
    "/Ubar": "\u0244",
    "/Ubreve": "\u016C",
    "/Ucaron": "\u01D3",
    "/Ucircle": "\u24CA",
    "/Ucircleblack": "\u1F164",
    "/Ucircumflex": "\u00DB",
    "/Ucircumflexbelow": "\u1E76",
    "/Ucircumflexsmall": "\uF7FB",
    "/Ucyr": "\u0423",
    "/Ucyrillic": "\u0423",
    "/Udblacute": "\u0170",
    "/Udblgrave": "\u0214",
    "/Udieresis": "\u00DC",
    "/Udieresisacute": "\u01D7",
    "/Udieresisbelow": "\u1E72",
    "/Udieresiscaron": "\u01D9",
    "/Udieresiscyr": "\u04F0",
    "/Udieresiscyrillic": "\u04F0",
    "/Udieresisgrave": "\u01DB",
    "/Udieresismacron": "\u01D5",
    "/Udieresissmall": "\uF7FC",
    "/Udotbelow": "\u1EE4",
    "/Ugrave": "\u00D9",
    "/Ugravedbl": "\u0214",
    "/Ugravesmall": "\uF7F9",
    "/Uhoi": "\u1EE6",
    "/Uhookabove": "\u1EE6",
    "/Uhorn": "\u01AF",
    "/Uhornacute": "\u1EE8",
    "/Uhorndotbelow": "\u1EF0",
    "/Uhorngrave": "\u1EEA",
    "/Uhornhoi": "\u1EEC",
    "/Uhornhookabove": "\u1EEC",
    "/Uhorntilde": "\u1EEE",
    "/Uhungarumlaut": "\u0170",
    "/Uhungarumlautcyrillic": "\u04F2",
    "/Uinvertedbreve": "\u0216",
    "/Ukcyr": "\u0478",
    "/Ukcyrillic": "\u0478",
    "/Umacron": "\u016A",
    "/Umacroncyr": "\u04EE",
    "/Umacroncyrillic": "\u04EE",
    "/Umacrondieresis": "\u1E7A",
    "/Umonospace": "\uFF35",
    "/Uogonek": "\u0172",
    "/Uparens": "\u1F124",
    "/Upsilon": "\u03A5",
    "/Upsilon1": "\u03D2",
    "/Upsilonacute": "\u1FEB",
    "/Upsilonacutehooksymbol": "\u03D3",
    "/Upsilonacutehooksymbolgreek": "\u03D3",
    "/Upsilonadieresishooksymbol": "\u03D4",
    "/Upsilonafrican": "\u01B1",
    "/Upsilonasper": "\u1F59",
    "/Upsilonasperacute": "\u1F5D",
    "/Upsilonaspergrave": "\u1F5B",
    "/Upsilonaspertilde": "\u1F5F",
    "/Upsilonbreve": "\u1FE8",
    "/Upsilondieresis": "\u03AB",
    "/Upsilondieresishooksymbolgreek": "\u03D4",
    "/Upsilongrave": "\u1FEA",
    "/Upsilonhooksymbol": "\u03D2",
    "/Upsilontonos": "\u038E",
    "/Upsilonwithmacron": "\u1FE9",
    "/Uring": "\u016E",
    "/Ushortcyr": "\u040E",
    "/Ushortcyrillic": "\u040E",
    "/Usmall": "\uF775",
    "/Usquare": "\u1F144",
    "/Usquareblack": "\u1F184",
    "/Ustraightcyr": "\u04AE",
    "/Ustraightcyrillic": "\u04AE",
    "/Ustraightstrokecyr": "\u04B0",
    "/Ustraightstrokecyrillic": "\u04B0",
    "/Utilde": "\u0168",
    "/Utildeacute": "\u1E78",
    "/Utildebelow": "\u1E74",
    "/V": "\u0056",
    "/Vcircle": "\u24CB",
    "/Vcircleblack": "\u1F165",
    "/Vdiagonalstroke": "\uA75E",
    "/Vdotbelow": "\u1E7E",
    "/Vecyr": "\u0412",
    "/Vecyrillic": "\u0412",
    "/Vend": "\uA768",
    "/Vewarmenian": "\u054E",
    "/Vhook": "\u01B2",
    "/Visigothicz": "\uA762",
    "/Vmod": "\u2C7D",
    "/Vmonospace": "\uFF36",
    "/Voarmenian": "\u0548",
    "/Volapukae": "\uA79A",
    "/Volapukoe": "\uA79C",
    "/Volapukue": "\uA79E",
    "/Vparens": "\u1F125",
    "/Vsmall": "\uF776",
    "/Vsquare": "\u1F145",
    "/Vsquareblack": "\u1F185",
    "/Vtilde": "\u1E7C",
    "/Vturned": "\u0245",
    "/Vwelsh": "\u1EFC",
    "/Vy": "\uA760",
    "/W": "\u0057",
    "/WZcircle": "\u1F12E",
    "/Wacute": "\u1E82",
    "/Wasallam": "\uFDF8",
    "/Wcircle": "\u24CC",
    "/Wcircleblack": "\u1F166",
    "/Wcircumflex": "\u0174",
    "/Wdieresis": "\u1E84",
    "/Wdot": "\u1E86",
    "/Wdotaccent": "\u1E86",
    "/Wdotbelow": "\u1E88",
    "/Wecyr": "\u051C",
    "/Wgrave": "\u1E80",
    "/Whook": "\u2C72",
    "/Wmonospace": "\uFF37",
    "/Wparens": "\u1F126",
    "/Wsmall": "\uF777",
    "/Wsquare": "\u1F146",
    "/Wsquareblack": "\u1F186",
    "/Wynn": "\u01F7",
    "/X": "\u0058",
    "/Xatailcyr": "\u04B2",
    "/Xcircle": "\u24CD",
    "/Xcircleblack": "\u1F167",
    "/Xdieresis": "\u1E8C",
    "/Xdot": "\u1E8A",
    "/Xdotaccent": "\u1E8A",
    "/Xeharmenian": "\u053D",
    "/Xi": "\u039E",
    "/Xmonospace": "\uFF38",
    "/Xparens": "\u1F127",
    "/Xsmall": "\uF778",
    "/Xsquare": "\u1F147",
    "/Xsquareblack": "\u1F187",
    "/Y": "\u0059",
    "/Yacute": "\u00DD",
    "/Yacutesmall": "\uF7FD",
    "/Yacyr": "\u042F",
    "/Yaecyr": "\u0518",
    "/Yatcyr": "\u0462",
    "/Yatcyrillic": "\u0462",
    "/Ycircle": "\u24CE",
    "/Ycircleblack": "\u1F168",
    "/Ycircumflex": "\u0176",
    "/Ydieresis": "\u0178",
    "/Ydieresissmall": "\uF7FF",
    "/Ydot": "\u1E8E",
    "/Ydotaccent": "\u1E8E",
    "/Ydotbelow": "\u1EF4",
    "/Yericyrillic": "\u042B",
    "/Yerudieresiscyrillic": "\u04F8",
    "/Ygrave": "\u1EF2",
    "/Yhoi": "\u1EF6",
    "/Yhook": "\u01B3",
    "/Yhookabove": "\u1EF6",
    "/Yiarmenian": "\u0545",
    "/Yicyrillic": "\u0407",
    "/Yiwnarmenian": "\u0552",
    "/Ylongcyr": "\u042B",
    "/Ylongdieresiscyr": "\u04F8",
    "/Yloop": "\u1EFE",
    "/Ymacron": "\u0232",
    "/Ymonospace": "\uFF39",
    "/Yogh": "\u021C",
    "/Yot": "\u037F",
    "/Yparens": "\u1F128",
    "/Ysmall": "\uF779",
    "/Ysquare": "\u1F148",
    "/Ysquareblack": "\u1F188",
    "/Ystroke": "\u024E",
    "/Ytilde": "\u1EF8",
    "/Yturnedsans": "\u2144",
    "/Yucyr": "\u042E",
    "/Yukrcyr": "\u0407",
    "/Yusbigcyr": "\u046A",
    "/Yusbigcyrillic": "\u046A",
    "/Yusbigiotifiedcyr": "\u046C",
    "/Yusbigiotifiedcyrillic": "\u046C",
    "/Yuslittlecyr": "\u0466",
    "/Yuslittlecyrillic": "\u0466",
    "/Yuslittleiotifiedcyr": "\u0468",
    "/Yuslittleiotifiedcyrillic": "\u0468",
    "/Z": "\u005A",
    "/Zaarmenian": "\u0536",
    "/Zacute": "\u0179",
    "/Zcaron": "\u017D",
    "/Zcaronsmall": "\uF6FF",
    "/Zcircle": "\u24CF",
    "/Zcircleblack": "\u1F169",
    "/Zcircumflex": "\u1E90",
    "/Zdblstruck": "\u2124",
    "/Zdescender": "\u2C6B",
    "/Zdot": "\u017B",
    "/Zdotaccent": "\u017B",
    "/Zdotbelow": "\u1E92",
    "/Zecyr": "\u0417",
    "/Zecyrillic": "\u0417",
    "/Zedescendercyrillic": "\u0498",
    "/Zedieresiscyr": "\u04DE",
    "/Zedieresiscyrillic": "\u04DE",
    "/Zeta": "\u0396",
    "/Zetailcyr": "\u0498",
    "/Zfraktur": "\u2128",
    "/Zhearmenian": "\u053A",
    "/Zhebrevecyr": "\u04C1",
    "/Zhebrevecyrillic": "\u04C1",
    "/Zhecyr": "\u0416",
    "/Zhecyrillic": "\u0416",
    "/Zhedescendercyrillic": "\u0496",
    "/Zhedieresiscyr": "\u04DC",
    "/Zhedieresiscyrillic": "\u04DC",
    "/Zhetailcyr": "\u0496",
    "/Zhook": "\u0224",
    "/Zjekomicyr": "\u0504",
    "/Zlinebelow": "\u1E94",
    "/Zmonospace": "\uFF3A",
    "/Zparens": "\u1F129",
    "/Zsmall": "\uF77A",
    "/Zsquare": "\u1F149",
    "/Zsquareblack": "\u1F189",
    "/Zstroke": "\u01B5",
    "/Zswashtail": "\u2C7F",
    "/a": "\u0061",
    "/a.inferior": "\u2090",
    "/aHonRAA": "\u0613",
    "/aa": "\uA733",
    "/aabengali": "\u0986",
    "/aacute": "\u00E1",
    "/aadeva": "\u0906",
    "/aagujarati": "\u0A86",
    "/aagurmukhi": "\u0A06",
    "/aamatragurmukhi": "\u0A3E",
    "/aarusquare": "\u3303",
    "/aavowelsignbengali": "\u09BE",
    "/aavowelsigndeva": "\u093E",
    "/aavowelsigngujarati": "\u0ABE",
    "/abbreviationmarkarmenian": "\u055F",
    "/abbreviationsigndeva": "\u0970",
    "/abengali": "\u0985",
    "/abopomofo": "\u311A",
    "/abreve": "\u0103",
    "/abreveacute": "\u1EAF",
    "/abrevecyr": "\u04D1",
    "/abrevecyrillic": "\u04D1",
    "/abrevedotbelow": "\u1EB7",
    "/abrevegrave": "\u1EB1",
    "/abrevehoi": "\u1EB3",
    "/abrevehookabove": "\u1EB3",
    "/abrevetilde": "\u1EB5",
    "/absquareblack": "\u1F18E",
    "/acaron": "\u01CE",
    "/accountof": "\u2100",
    "/accurrent": "\u23E6",
    "/acircle": "\u24D0",
    "/acirclekatakana": "\u32D0",
    "/acircumflex": "\u00E2",
    "/acircumflexacute": "\u1EA5",
    "/acircumflexdotbelow": "\u1EAD",
    "/acircumflexgrave": "\u1EA7",
    "/acircumflexhoi": "\u1EA9",
    "/acircumflexhookabove": "\u1EA9",
    "/acircumflextilde": "\u1EAB",
    "/activatearabicformshaping": "\u206D",
    "/activatesymmetricswapping": "\u206B",
    "/acute": "\u00B4",
    "/acutebelowcmb": "\u0317",
    "/acutecmb": "\u0301",
    "/acutecomb": "\u0301",
    "/acutedblmiddlemod": "\u02F6",
    "/acutedeva": "\u0954",
    "/acutelowmod": "\u02CF",
    "/acutemod": "\u02CA",
    "/acutetonecmb": "\u0341",
    "/acyr": "\u0430",
    "/acyrillic": "\u0430",
    "/adblgrave": "\u0201",
    "/addakgurmukhi": "\u0A71",
    "/addressedsubject": "\u2101",
    "/adegadegpada": "\uA9CB",
    "/adegpada": "\uA9CA",
    "/adeva": "\u0905",
    "/adieresis": "\u00E4",
    "/adieresiscyr": "\u04D3",
    "/adieresiscyrillic": "\u04D3",
    "/adieresismacron": "\u01DF",
    "/adishakti": "\u262C",
    "/admissionTickets": "\u1F39F",
    "/adot": "\u0227",
    "/adotbelow": "\u1EA1",
    "/adotmacron": "\u01E1",
    "/ae": "\u00E6",
    "/aeacute": "\u01FD",
    "/aekorean": "\u3150",
    "/aemacron": "\u01E3",
    "/aerialTramway": "\u1F6A1",
    "/afghani": "\u060B",
    "/afii00208": "\u2015",
    "/afii08941": "\u20A4",
    "/afii10017": "\u0410",
    "/afii10018": "\u0411",
    "/afii10019": "\u0412",
    "/afii10020": "\u0413",
    "/afii10021": "\u0414",
    "/afii10022": "\u0415",
    "/afii10023": "\u0401",
    "/afii10024": "\u0416",
    "/afii10025": "\u0417",
    "/afii10026": "\u0418",
    "/afii10027": "\u0419",
    "/afii10028": "\u041A",
    "/afii10029": "\u041B",
    "/afii10030": "\u041C",
    "/afii10031": "\u041D",
    "/afii10032": "\u041E",
    "/afii10033": "\u041F",
    "/afii10034": "\u0420",
    "/afii10035": "\u0421",
    "/afii10036": "\u0422",
    "/afii10037": "\u0423",
    "/afii10038": "\u0424",
    "/afii10039": "\u0425",
    "/afii10040": "\u0426",
    "/afii10041": "\u0427",
    "/afii10042": "\u0428",
    "/afii10043": "\u0429",
    "/afii10044": "\u042A",
    "/afii10045": "\u042B",
    "/afii10046": "\u042C",
    "/afii10047": "\u042D",
    "/afii10048": "\u042E",
    "/afii10049": "\u042F",
    "/afii10050": "\u0490",
    "/afii10051": "\u0402",
    "/afii10052": "\u0403",
    "/afii10053": "\u0404",
    "/afii10054": "\u0405",
    "/afii10055": "\u0406",
    "/afii10056": "\u0407",
    "/afii10057": "\u0408",
    "/afii10058": "\u0409",
    "/afii10059": "\u040A",
    "/afii10060": "\u040B",
    "/afii10061": "\u040C",
    "/afii10062": "\u040E",
    "/afii10063": "\uF6C4",
    "/afii10064": "\uF6C5",
    "/afii10065": "\u0430",
    "/afii10066": "\u0431",
    "/afii10067": "\u0432",
    "/afii10068": "\u0433",
    "/afii10069": "\u0434",
    "/afii10070": "\u0435",
    "/afii10071": "\u0451",
    "/afii10072": "\u0436",
    "/afii10073": "\u0437",
    "/afii10074": "\u0438",
    "/afii10075": "\u0439",
    "/afii10076": "\u043A",
    "/afii10077": "\u043B",
    "/afii10078": "\u043C",
    "/afii10079": "\u043D",
    "/afii10080": "\u043E",
    "/afii10081": "\u043F",
    "/afii10082": "\u0440",
    "/afii10083": "\u0441",
    "/afii10084": "\u0442",
    "/afii10085": "\u0443",
    "/afii10086": "\u0444",
    "/afii10087": "\u0445",
    "/afii10088": "\u0446",
    "/afii10089": "\u0447",
    "/afii10090": "\u0448",
    "/afii10091": "\u0449",
    "/afii10092": "\u044A",
    "/afii10093": "\u044B",
    "/afii10094": "\u044C",
    "/afii10095": "\u044D",
    "/afii10096": "\u044E",
    "/afii10097": "\u044F",
    "/afii10098": "\u0491",
    "/afii10099": "\u0452",
    "/afii10100": "\u0453",
    "/afii10101": "\u0454",
    "/afii10102": "\u0455",
    "/afii10103": "\u0456",
    "/afii10104": "\u0457",
    "/afii10105": "\u0458",
    "/afii10106": "\u0459",
    "/afii10107": "\u045A",
    "/afii10108": "\u045B",
    "/afii10109": "\u045C",
    "/afii10110": "\u045E",
    "/afii10145": "\u040F",
    "/afii10146": "\u0462",
    "/afii10147": "\u0472",
    "/afii10148": "\u0474",
    "/afii10192": "\uF6C6",
    "/afii10193": "\u045F",
    "/afii10194": "\u0463",
    "/afii10195": "\u0473",
    "/afii10196": "\u0475",
    "/afii10831": "\uF6C7",
    "/afii10832": "\uF6C8",
    "/afii10846": "\u04D9",
    "/afii299": "\u200E",
    "/afii300": "\u200F",
    "/afii301": "\u200D",
    "/afii57381": "\u066A",
    "/afii57388": "\u060C",
    "/afii57392": "\u0660",
    "/afii57393": "\u0661",
    "/afii57394": "\u0662",
    "/afii57395": "\u0663",
    "/afii57396": "\u0664",
    "/afii57397": "\u0665",
    "/afii57398": "\u0666",
    "/afii57399": "\u0667",
    "/afii57400": "\u0668",
    "/afii57401": "\u0669",
    "/afii57403": "\u061B",
    "/afii57407": "\u061F",
    "/afii57409": "\u0621",
    "/afii57410": "\u0622",
    "/afii57411": "\u0623",
    "/afii57412": "\u0624",
    "/afii57413": "\u0625",
    "/afii57414": "\u0626",
    "/afii57415": "\u0627",
    "/afii57416": "\u0628",
    "/afii57417": "\u0629",
    "/afii57418": "\u062A",
    "/afii57419": "\u062B",
    "/afii57420": "\u062C",
    "/afii57421": "\u062D",
    "/afii57422": "\u062E",
    "/afii57423": "\u062F",
    "/afii57424": "\u0630",
    "/afii57425": "\u0631",
    "/afii57426": "\u0632",
    "/afii57427": "\u0633",
    "/afii57428": "\u0634",
    "/afii57429": "\u0635",
    "/afii57430": "\u0636",
    "/afii57431": "\u0637",
    "/afii57432": "\u0638",
    "/afii57433": "\u0639",
    "/afii57434": "\u063A",
    "/afii57440": "\u0640",
    "/afii57441": "\u0641",
    "/afii57442": "\u0642",
    "/afii57443": "\u0643",
    "/afii57444": "\u0644",
    "/afii57445": "\u0645",
    "/afii57446": "\u0646",
    "/afii57448": "\u0648",
    "/afii57449": "\u0649",
    "/afii57450": "\u064A",
    "/afii57451": "\u064B",
    "/afii57452": "\u064C",
    "/afii57453": "\u064D",
    "/afii57454": "\u064E",
    "/afii57455": "\u064F",
    "/afii57456": "\u0650",
    "/afii57457": "\u0651",
    "/afii57458": "\u0652",
    "/afii57470": "\u0647",
    "/afii57505": "\u06A4",
    "/afii57506": "\u067E",
    "/afii57507": "\u0686",
    "/afii57508": "\u0698",
    "/afii57509": "\u06AF",
    "/afii57511": "\u0679",
    "/afii57512": "\u0688",
    "/afii57513": "\u0691",
    "/afii57514": "\u06BA",
    "/afii57519": "\u06D2",
    "/afii57534": "\u06D5",
    "/afii57636": "\u20AA",
    "/afii57645": "\u05BE",
    "/afii57658": "\u05C3",
    "/afii57664": "\u05D0",
    "/afii57665": "\u05D1",
    "/afii57666": "\u05D2",
    "/afii57667": "\u05D3",
    "/afii57668": "\u05D4",
    "/afii57669": "\u05D5",
    "/afii57670": "\u05D6",
    "/afii57671": "\u05D7",
    "/afii57672": "\u05D8",
    "/afii57673": "\u05D9",
    "/afii57674": "\u05DA",
    "/afii57675": "\u05DB",
    "/afii57676": "\u05DC",
    "/afii57677": "\u05DD",
    "/afii57678": "\u05DE",
    "/afii57679": "\u05DF",
    "/afii57680": "\u05E0",
    "/afii57681": "\u05E1",
    "/afii57682": "\u05E2",
    "/afii57683": "\u05E3",
    "/afii57684": "\u05E4",
    "/afii57685": "\u05E5",
    "/afii57686": "\u05E6",
    "/afii57687": "\u05E7",
    "/afii57688": "\u05E8",
    "/afii57689": "\u05E9",
    "/afii57690": "\u05EA",
    "/afii57694": "\uFB2A",
    "/afii57695": "\uFB2B",
    "/afii57700": "\uFB4B",
    "/afii57705": "\uFB1F",
    "/afii57716": "\u05F0",
    "/afii57717": "\u05F1",
    "/afii57718": "\u05F2",
    "/afii57723": "\uFB35",
    "/afii57793": "\u05B4",
    "/afii57794": "\u05B5",
    "/afii57795": "\u05B6",
    "/afii57796": "\u05BB",
    "/afii57797": "\u05B8",
    "/afii57798": "\u05B7",
    "/afii57799": "\u05B0",
    "/afii57800": "\u05B2",
    "/afii57801": "\u05B1",
    "/afii57802": "\u05B3",
    "/afii57803": "\u05C2",
    "/afii57804": "\u05C1",
    "/afii57806": "\u05B9",
    "/afii57807": "\u05BC",
    "/afii57839": "\u05BD",
    "/afii57841": "\u05BF",
    "/afii57842": "\u05C0",
    "/afii57929": "\u02BC",
    "/afii61248": "\u2105",
    "/afii61289": "\u2113",
    "/afii61352": "\u2116",
    "/afii61573": "\u202C",
    "/afii61574": "\u202D",
    "/afii61575": "\u202E",
    "/afii61664": "\u200C",
    "/afii63167": "\u066D",
    "/afii64937": "\u02BD",
    "/agrave": "\u00E0",
    "/agravedbl": "\u0201",
    "/agujarati": "\u0A85",
    "/agurmukhi": "\u0A05",
    "/ahiragana": "\u3042",
    "/ahoi": "\u1EA3",
    "/ahookabove": "\u1EA3",
    "/aibengali": "\u0990",
    "/aibopomofo": "\u311E",
    "/aideva": "\u0910",
    "/aiecyr": "\u04D5",
    "/aiecyrillic": "\u04D5",
    "/aigujarati": "\u0A90",
    "/aigurmukhi": "\u0A10",
    "/aimatragurmukhi": "\u0A48",
    "/ain.fina": "\uFECA",
    "/ain.init": "\uFECB",
    "/ain.init_alefmaksura.fina": "\uFCF7",
    "/ain.init_jeem.fina": "\uFC29",
    "/ain.init_jeem.medi": "\uFCBA",
    "/ain.init_jeem.medi_meem.medi": "\uFDC4",
    "/ain.init_meem.fina": "\uFC2A",
    "/ain.init_meem.medi": "\uFCBB",
    "/ain.init_meem.medi_meem.medi": "\uFD77",
    "/ain.init_yeh.fina": "\uFCF8",
    "/ain.isol": "\uFEC9",
    "/ain.medi": "\uFECC",
    "/ain.medi_alefmaksura.fina": "\uFD13",
    "/ain.medi_jeem.medi_meem.fina": "\uFD75",
    "/ain.medi_meem.medi_alefmaksura.fina": "\uFD78",
    "/ain.medi_meem.medi_meem.fina": "\uFD76",
    "/ain.medi_meem.medi_yeh.fina": "\uFDB6",
    "/ain.medi_yeh.fina": "\uFD14",
    "/ainThreeDotsDownAbove": "\u075E",
    "/ainTwoDotsAbove": "\u075D",
    "/ainTwoDotsVerticallyAbove": "\u075F",
    "/ainarabic": "\u0639",
    "/ainfinalarabic": "\uFECA",
    "/aininitialarabic": "\uFECB",
    "/ainmedialarabic": "\uFECC",
    "/ainthreedotsabove": "\u06A0",
    "/ainvertedbreve": "\u0203",
    "/airplaneArriving": "\u1F6EC",
    "/airplaneDeparture": "\u1F6EB",
    "/aivowelsignbengali": "\u09C8",
    "/aivowelsigndeva": "\u0948",
    "/aivowelsigngujarati": "\u0AC8",
    "/akatakana": "\u30A2",
    "/akatakanahalfwidth": "\uFF71",
    "/akorean": "\u314F",
    "/aktieselskab": "\u214D",
    "/alarmclock": "\u23F0",
    "/alef": "\u05D0",
    "/alef.fina": "\uFE8E",
    "/alef.init_fathatan.fina": "\uFD3D",
    "/alef.isol": "\uFE8D",
    "/alef.medi_fathatan.fina": "\uFD3C",
    "/alef:hb": "\u05D0",
    "/alefDigitThreeAbove": "\u0774",
    "/alefDigitTwoAbove": "\u0773",
    "/alefLamYehabove": "\u0616",
    "/alefabove": "\u0670",
    "/alefarabic": "\u0627",
    "/alefdageshhebrew": "\uFB30",
    "/aleffinalarabic": "\uFE8E",
    "/alefhamza": "\u0623",
    "/alefhamza.fina": "\uFE84",
    "/alefhamza.isol": "\uFE83",
    "/alefhamzaabovearabic": "\u0623",
    "/alefhamzaabovefinalarabic": "\uFE84",
    "/alefhamzabelow": "\u0625",
    "/alefhamzabelow.fina": "\uFE88",
    "/alefhamzabelow.isol": "\uFE87",
    "/alefhamzabelowarabic": "\u0625",
    "/alefhamzabelowfinalarabic": "\uFE88",
    "/alefhebrew": "\u05D0",
    "/alefhighhamza": "\u0675",
    "/aleflamedhebrew": "\uFB4F",
    "/alefmadda": "\u0622",
    "/alefmadda.fina": "\uFE82",
    "/alefmadda.isol": "\uFE81",
    "/alefmaddaabovearabic": "\u0622",
    "/alefmaddaabovefinalarabic": "\uFE82",
    "/alefmaksura": "\u0649",
    "/alefmaksura.fina": "\uFEF0",
    "/alefmaksura.init_superscriptalef.fina": "\uFC5D",
    "/alefmaksura.isol": "\uFEEF",
    "/alefmaksura.medi_superscriptalef.fina": "\uFC90",
    "/alefmaksuraarabic": "\u0649",
    "/alefmaksurafinalarabic": "\uFEF0",
    "/alefmaksurainitialarabic": "\uFEF3",
    "/alefmaksuramedialarabic": "\uFEF4",
    "/alefpatahhebrew": "\uFB2E",
    "/alefqamatshebrew": "\uFB2F",
    "/alefwasla": "\u0671",
    "/alefwasla.fina": "\uFB51",
    "/alefwasla.isol": "\uFB50",
    "/alefwavyhamza": "\u0672",
    "/alefwavyhamzabelow": "\u0673",
    "/alefwide:hb": "\uFB21",
    "/alefwithmapiq:hb": "\uFB30",
    "/alefwithpatah:hb": "\uFB2E",
    "/alefwithqamats:hb": "\uFB2F",
    "/alembic": "\u2697",
    "/aleph": "\u2135",
    "/alienMonster": "\u1F47E",
    "/allaroundprofile": "\u232E",
    "/allequal": "\u224C",
    "/allianceideographiccircled": "\u32AF",
    "/allianceideographicparen": "\u323F",
    "/almostequalorequal": "\u224A",
    "/alpha": "\u03B1",
    "/alphaacute": "\u1F71",
    "/alphaacuteiotasub": "\u1FB4",
    "/alphaasper": "\u1F01",
    "/alphaasperacute": "\u1F05",
    "/alphaasperacuteiotasub": "\u1F85",
    "/alphaaspergrave": "\u1F03",
    "/alphaaspergraveiotasub": "\u1F83",
    "/alphaasperiotasub": "\u1F81",
    "/alphaaspertilde": "\u1F07",
    "/alphaaspertildeiotasub": "\u1F87",
    "/alphabreve": "\u1FB0",
    "/alphafunc": "\u237A",
    "/alphagrave": "\u1F70",
    "/alphagraveiotasub": "\u1FB2",
    "/alphaiotasub": "\u1FB3",
    "/alphalenis": "\u1F00",
    "/alphalenisacute": "\u1F04",
    "/alphalenisacuteiotasub": "\u1F84",
    "/alphalenisgrave": "\u1F02",
    "/alphalenisgraveiotasub": "\u1F82",
    "/alphalenisiotasub": "\u1F80",
    "/alphalenistilde": "\u1F06",
    "/alphalenistildeiotasub": "\u1F86",
    "/alphatilde": "\u1FB6",
    "/alphatildeiotasub": "\u1FB7",
    "/alphatonos": "\u03AC",
    "/alphaturned": "\u0252",
    "/alphaunderlinefunc": "\u2376",
    "/alphawithmacron": "\u1FB1",
    "/alternateonewayleftwaytraffic": "\u26D5",
    "/alternative": "\u2387",
    "/amacron": "\u0101",
    "/ambulance": "\u1F691",
    "/americanFootball": "\u1F3C8",
    "/amfullwidth": "\u33C2",
    "/amonospace": "\uFF41",
    "/amountofcheck": "\u2447",
    "/ampersand": "\u0026",
    "/ampersandSindhi": "\u06FD",
    "/ampersandmonospace": "\uFF06",
    "/ampersandsmall": "\uF726",
    "/ampersandturned": "\u214B",
    "/amphora": "\u1F3FA",
    "/amsquare": "\u33C2",
    "/anbopomofo": "\u3122",
    "/anchor": "\u2693",
    "/ancoradown": "\u2E14",
    "/ancoraup": "\u2E15",
    "/andappada": "\uA9C3",
    "/angbopomofo": "\u3124",
    "/anger": "\u1F4A2",
    "/angkhankhuthai": "\u0E5A",
    "/angle": "\u2220",
    "/anglearcright": "\u22BE",
    "/anglebracketleft": "\u3008",
    "/anglebracketleftvertical": "\uFE3F",
    "/anglebracketright": "\u3009",
    "/anglebracketrightvertical": "\uFE40",
    "/angledottedright": "\u2E16",
    "/angleleft": "\u2329",
    "/anglemarkerdottedsubstitutionright": "\u2E01",
    "/anglemarkersubstitutionright": "\u2E00",
    "/angleright": "\u232A",
    "/anglezigzagarrowdownright": "\u237C",
    "/angryFace": "\u1F620",
    "/angstrom": "\u212B",
    "/anguishedFace": "\u1F627",
    "/ankh": "\u2625",
    "/anoteleia": "\u0387",
    "/anpeasquare": "\u3302",
    "/ant": "\u1F41C",
    "/antennaBars": "\u1F4F6",
    "/anticlockwiseDownwardsAndUpwardsOpenCircleArrows": "\u1F504",
    "/anudattadeva": "\u0952",
    "/anusvarabengali": "\u0982",
    "/anusvaradeva": "\u0902",
    "/anusvaragujarati": "\u0A82",
    "/ao": "\uA735",
    "/aogonek": "\u0105",
    "/aovermfullwidth": "\u33DF",
    "/apaatosquare": "\u3300",
    "/aparen": "\u249C",
    "/aparenthesized": "\u249C",
    "/apostrophearmenian": "\u055A",
    "/apostrophedblmod": "\u02EE",
    "/apostrophemod": "\u02BC",
    "/apple": "\uF8FF",
    "/approaches": "\u2250",
    "/approacheslimit": "\u2250",
    "/approxequal": "\u2248",
    "/approxequalorimage": "\u2252",
    "/approximatelybutnotactuallyequal": "\u2246",
    "/approximatelyequal": "\u2245",
    "/approximatelyequalorimage": "\u2252",
    "/apriltelegraph": "\u32C3",
    "/aquarius": "\u2652",
    "/ar:ae": "\u06D5",
    "/ar:ain": "\u0639",
    "/ar:alef": "\u0627",
    "/ar:comma": "\u060C",
    "/ar:cuberoot": "\u0606",
    "/ar:decimalseparator": "\u066B",
    "/ar:e": "\u06D0",
    "/ar:eight": "\u0668",
    "/ar:feh": "\u0641",
    "/ar:five": "\u0665",
    "/ar:four": "\u0664",
    "/ar:fourthroot": "\u0607",
    "/ar:kaf": "\u0643",
    "/ar:ng": "\u06AD",
    "/ar:nine": "\u0669",
    "/ar:numbersign": "\u0600",
    "/ar:oe": "\u06C6",
    "/ar:one": "\u0661",
    "/ar:peh": "\u067E",
    "/ar:percent": "\u066A",
    "/ar:perthousand": "\u060A",
    "/ar:question": "\u061F",
    "/ar:reh": "\u0631",
    "/ar:semicolon": "\u061B",
    "/ar:seven": "\u0667",
    "/ar:shadda": "\u0651",
    "/ar:six": "\u0666",
    "/ar:sukun": "\u0652",
    "/ar:three": "\u0663",
    "/ar:two": "\u0662",
    "/ar:u": "\u06C7",
    "/ar:ve": "\u06CB",
    "/ar:yu": "\u06C8",
    "/ar:zero": "\u0660",
    "/araeaekorean": "\u318E",
    "/araeakorean": "\u318D",
    "/arc": "\u2312",
    "/archaicmepigraphic": "\uA7FF",
    "/aries": "\u2648",
    "/arighthalfring": "\u1E9A",
    "/aring": "\u00E5",
    "/aringacute": "\u01FB",
    "/aringbelow": "\u1E01",
    "/armn:Ayb": "\u0531",
    "/armn:Ben": "\u0532",
    "/armn:Ca": "\u053E",
    "/armn:Cha": "\u0549",
    "/armn:Cheh": "\u0543",
    "/armn:Co": "\u0551",
    "/armn:DRAMSIGN": "\u058F",
    "/armn:Da": "\u0534",
    "/armn:Ech": "\u0535",
    "/armn:Eh": "\u0537",
    "/armn:Et": "\u0538",
    "/armn:Feh": "\u0556",
    "/armn:Ghad": "\u0542",
    "/armn:Gim": "\u0533",
    "/armn:Ho": "\u0540",
    "/armn:Ini": "\u053B",
    "/armn:Ja": "\u0541",
    "/armn:Jheh": "\u054B",
    "/armn:Keh": "\u0554",
    "/armn:Ken": "\u053F",
    "/armn:Liwn": "\u053C",
    "/armn:Men": "\u0544",
    "/armn:Now": "\u0546",
    "/armn:Oh": "\u0555",
    "/armn:Peh": "\u054A",
    "/armn:Piwr": "\u0553",
    "/armn:Ra": "\u054C",
    "/armn:Reh": "\u0550",
    "/armn:Seh": "\u054D",
    "/armn:Sha": "\u0547",
    "/armn:Tiwn": "\u054F",
    "/armn:To": "\u0539",
    "/armn:Vew": "\u054E",
    "/armn:Vo": "\u0548",
    "/armn:Xeh": "\u053D",
    "/armn:Yi": "\u0545",
    "/armn:Yiwn": "\u0552",
    "/armn:Za": "\u0536",
    "/armn:Zhe": "\u053A",
    "/armn:abbreviationmark": "\u055F",
    "/armn:apostrophe": "\u055A",
    "/armn:ayb": "\u0561",
    "/armn:ben": "\u0562",
    "/armn:ca": "\u056E",
    "/armn:cha": "\u0579",
    "/armn:cheh": "\u0573",
    "/armn:co": "\u0581",
    "/armn:comma": "\u055D",
    "/armn:da": "\u0564",
    "/armn:ech": "\u0565",
    "/armn:ech_yiwn": "\u0587",
    "/armn:eh": "\u0567",
    "/armn:emphasismark": "\u055B",
    "/armn:et": "\u0568",
    "/armn:exclam": "\u055C",
    "/armn:feh": "\u0586",
    "/armn:ghad": "\u0572",
    "/armn:gim": "\u0563",
    "/armn:ho": "\u0570",
    "/armn:hyphen": "\u058A",
    "/armn:ini": "\u056B",
    "/armn:ja": "\u0571",
    "/armn:jheh": "\u057B",
    "/armn:keh": "\u0584",
    "/armn:ken": "\u056F",
    "/armn:leftfacingeternitysign": "\u058E",
    "/armn:liwn": "\u056C",
    "/armn:men": "\u0574",
    "/armn:men_ech": "\uFB14",
    "/armn:men_ini": "\uFB15",
    "/armn:men_now": "\uFB13",
    "/armn:men_xeh": "\uFB17",
    "/armn:now": "\u0576",
    "/armn:oh": "\u0585",
    "/armn:peh": "\u057A",
    "/armn:period": "\u0589",
    "/armn:piwr": "\u0583",
    "/armn:question": "\u055E",
    "/armn:ra": "\u057C",
    "/armn:reh": "\u0580",
    "/armn:rightfacingeternitysign": "\u058D",
    "/armn:ringhalfleft": "\u0559",
    "/armn:seh": "\u057D",
    "/armn:sha": "\u0577",
    "/armn:tiwn": "\u057F",
    "/armn:to": "\u0569",
    "/armn:vew": "\u057E",
    "/armn:vew_now": "\uFB16",
    "/armn:vo": "\u0578",
    "/armn:xeh": "\u056D",
    "/armn:yi": "\u0575",
    "/armn:yiwn": "\u0582",
    "/armn:za": "\u0566",
    "/armn:zhe": "\u056A",
    "/arrowNE": "\u2197",
    "/arrowNW": "\u2196",
    "/arrowSE": "\u2198",
    "/arrowSW": "\u2199",
    "/arrowanticlockwiseopencircle": "\u21BA",
    "/arrowanticlockwisesemicircle": "\u21B6",
    "/arrowboth": "\u2194",
    "/arrowclockwiseopencircle": "\u21BB",
    "/arrowclockwisesemicircle": "\u21B7",
    "/arrowdashdown": "\u21E3",
    "/arrowdashleft": "\u21E0",
    "/arrowdashright": "\u21E2",
    "/arrowdashup": "\u21E1",
    "/arrowdblboth": "\u21D4",
    "/arrowdbldown": "\u21D3",
    "/arrowdblleft": "\u21D0",
    "/arrowdblright": "\u21D2",
    "/arrowdblup": "\u21D1",
    "/arrowdown": "\u2193",
    "/arrowdowndashed": "\u21E3",
    "/arrowdownfrombar": "\u21A7",
    "/arrowdownleft": "\u2199",
    "/arrowdownright": "\u2198",
    "/arrowdowntwoheaded": "\u21A1",
    "/arrowdownwhite": "\u21E9",
    "/arrowdownzigzag": "\u21AF",
    "/arrowheaddown": "\u2304",
    "/arrowheaddownlowmod": "\u02EF",
    "/arrowheaddownmod": "\u02C5",
    "/arrowheadleftlowmod": "\u02F1",
    "/arrowheadleftmod": "\u02C2",
    "/arrowheadrightlowmod": "\u02F2",
    "/arrowheadrightmod": "\u02C3",
    "/arrowheadtwobarsuphorizontal": "\u2324",
    "/arrowheadup": "\u2303",
    "/arrowheaduplowmod": "\u02F0",
    "/arrowheadupmod": "\u02C4",
    "/arrowhorizex": "\uF8E7",
    "/arrowleft": "\u2190",
    "/arrowleftdashed": "\u21E0",
    "/arrowleftdbl": "\u21D0",
    "/arrowleftdblstroke": "\u21CD",
    "/arrowleftdowncorner": "\u21B5",
    "/arrowleftdowntip": "\u21B2",
    "/arrowleftfrombar": "\u21A4",
    "/arrowlefthook": "\u21A9",
    "/arrowleftloop": "\u21AB",
    "/arrowleftlowmod": "\u02FF",
    "/arrowleftoverright": "\u21C6",
    "/arrowleftoverrighttobar": "\u21B9",
    "/arrowleftright": "\u2194",
    "/arrowleftrightstroke": "\u21AE",
    "/arrowleftrightwave": "\u21AD",
    "/arrowleftsquiggle": "\u21DC",
    "/arrowleftstroke": "\u219A",
    "/arrowlefttail": "\u21A2",
    "/arrowlefttobar": "\u21E4",
    "/arrowlefttwoheaded": "\u219E",
    "/arrowleftuptip": "\u21B0",
    "/arrowleftwave": "\u219C",
    "/arrowleftwhite": "\u21E6",
    "/arrowlongNWtobar": "\u21B8",
    "/arrowright": "\u2192",
    "/arrowrightdashed": "\u21E2",
    "/arrowrightdblstroke": "\u21CF",
    "/arrowrightdowncorner": "\u21B4",
    "/arrowrightdowntip": "\u21B3",
    "/arrowrightfrombar": "\u21A6",
    "/arrowrightheavy": "\u279E",
    "/arrowrighthook": "\u21AA",
    "/arrowrightloop": "\u21AC",
    "/arrowrightoverleft": "\u21C4",
    "/arrowrightsmallcircle": "\u21F4",
    "/arrowrightsquiggle": "\u21DD",
    "/arrowrightstroke": "\u219B",
    "/arrowrighttail": "\u21A3",
    "/arrowrighttobar": "\u21E5",
    "/arrowrighttwoheaded": "\u21A0",
    "/arrowrightwave": "\u219D",
    "/arrowrightwhite": "\u21E8",
    "/arrowspaireddown": "\u21CA",
    "/arrowspairedleft": "\u21C7",
    "/arrowspairedright": "\u21C9",
    "/arrowspairedup": "\u21C8",
    "/arrowtableft": "\u21E4",
    "/arrowtabright": "\u21E5",
    "/arrowup": "\u2191",
    "/arrowupdashed": "\u21E1",
    "/arrowupdn": "\u2195",
    "/arrowupdnbse": "\u21A8",
    "/arrowupdown": "\u2195",
    "/arrowupdownbase": "\u21A8",
    "/arrowupdownwithbase": "\u21A8",
    "/arrowupfrombar": "\u21A5",
    "/arrowupleft": "\u2196",
    "/arrowupleftofdown": "\u21C5",
    "/arrowupright": "\u2197",
    "/arrowuprighttip": "\u21B1",
    "/arrowuptwoheaded": "\u219F",
    "/arrowupwhite": "\u21E7",
    "/arrowvertex": "\uF8E6",
    "/articulatedLorry": "\u1F69B",
    "/artistPalette": "\u1F3A8",
    "/aruhuasquare": "\u3301",
    "/asciicircum": "\u005E",
    "/asciicircummonospace": "\uFF3E",
    "/asciitilde": "\u007E",
    "/asciitildemonospace": "\uFF5E",
    "/ascript": "\u0251",
    "/ascriptturned": "\u0252",
    "/asmallhiragana": "\u3041",
    "/asmallkatakana": "\u30A1",
    "/asmallkatakanahalfwidth": "\uFF67",
    "/asper": "\u1FFE",
    "/asperacute": "\u1FDE",
    "/aspergrave": "\u1FDD",
    "/aspertilde": "\u1FDF",
    "/assertion": "\u22A6",
    "/asterisk": "\u002A",
    "/asteriskaltonearabic": "\u066D",
    "/asteriskarabic": "\u066D",
    "/asteriskmath": "\u2217",
    "/asteriskmonospace": "\uFF0A",
    "/asterisksmall": "\uFE61",
    "/asterism": "\u2042",
    "/astonishedFace": "\u1F632",
    "/astroke": "\u2C65",
    "/astronomicaluranus": "\u26E2",
    "/asuperior": "\uF6E9",
    "/asympticallyequal": "\u2243",
    "/asymptoticallyequal": "\u2243",
    "/at": "\u0040",
    "/athleticShoe": "\u1F45F",
    "/atilde": "\u00E3",
    "/atmonospace": "\uFF20",
    "/atnachHafukh:hb": "\u05A2",
    "/atom": "\u269B",
    "/atsmall": "\uFE6B",
    "/attentionideographiccircled": "\u329F",
    "/aturned": "\u0250",
    "/au": "\uA737",
    "/aubengali": "\u0994",
    "/aubergine": "\u1F346",
    "/aubopomofo": "\u3120",
    "/audeva": "\u0914",
    "/aufullwidth": "\u3373",
    "/augujarati": "\u0A94",
    "/augurmukhi": "\u0A14",
    "/augusttelegraph": "\u32C7",
    "/aulengthmarkbengali": "\u09D7",
    "/aumatragurmukhi": "\u0A4C",
    "/austral": "\u20B3",
    "/automatedTellerMachine": "\u1F3E7",
    "/automobile": "\u1F697",
    "/auvowelsignbengali": "\u09CC",
    "/auvowelsigndeva": "\u094C",
    "/auvowelsigngujarati": "\u0ACC",
    "/av": "\uA739",
    "/avagrahadeva": "\u093D",
    "/avhorizontalbar": "\uA73B",
    "/ay": "\uA73D",
    "/aybarmenian": "\u0561",
    "/ayin": "\u05E2",
    "/ayin:hb": "\u05E2",
    "/ayinalt:hb": "\uFB20",
    "/ayinaltonehebrew": "\uFB20",
    "/ayinhebrew": "\u05E2",
    "/azla:hb": "\u059C",
    "/b": "\u0062",
    "/baarerusquare": "\u332D",
    "/babengali": "\u09AC",
    "/babyAngel": "\u1F47C",
    "/babyBottle": "\u1F37C",
    "/babyChick": "\u1F424",
    "/backLeftwardsArrowAbove": "\u1F519",
    "/backOfEnvelope": "\u1F582",
    "/backslash": "\u005C",
    "/backslashbarfunc": "\u2340",
    "/backslashdbl": "\u244A",
    "/backslashmonospace": "\uFF3C",
    "/bactrianCamel": "\u1F42B",
    "/badeva": "\u092C",
    "/badmintonRacquetAndShuttlecock": "\u1F3F8",
    "/bagdelimitersshapeleft": "\u27C5",
    "/bagdelimitersshaperight": "\u27C6",
    "/baggageClaim": "\u1F6C4",
    "/bagujarati": "\u0AAC",
    "/bagurmukhi": "\u0A2C",
    "/bahiragana": "\u3070",
    "/bahtthai": "\u0E3F",
    "/bakatakana": "\u30D0",
    "/balloon": "\u1F388",
    "/ballotBoldScriptX": "\u1F5F6",
    "/ballotBoxBallot": "\u1F5F3",
    "/ballotBoxBoldCheck": "\u1F5F9",
    "/ballotBoxBoldScriptX": "\u1F5F7",
    "/ballotBoxScriptX": "\u1F5F5",
    "/ballotScriptX": "\u1F5F4",
    "/bamurda": "\uA9A8",
    "/banana": "\u1F34C",
    "/bank": "\u1F3E6",
    "/banknoteDollarSign": "\u1F4B5",
    "/banknoteEuroSign": "\u1F4B6",
    "/banknotePoundSign": "\u1F4B7",
    "/banknoteYenSign": "\u1F4B4",
    "/bar": "\u007C",
    "/barChart": "\u1F4CA",
    "/barberPole": "\u1F488",
    "/barfullwidth": "\u3374",
    "/barmonospace": "\uFF5C",
    "/barquillverticalleft": "\u2E20",
    "/barquillverticalright": "\u2E21",
    "/baseball": "\u26BE",
    "/basketballAndHoop": "\u1F3C0",
    "/bath": "\u1F6C0",
    "/bathtub": "\u1F6C1",
    "/battery": "\u1F50B",
    "/bbopomofo": "\u3105",
    "/bcircle": "\u24D1",
    "/bdot": "\u1E03",
    "/bdotaccent": "\u1E03",
    "/bdotbelow": "\u1E05",
    "/beachUmbrella": "\u1F3D6",
    "/beamedAscendingMusicalNotes": "\u1F39C",
    "/beamedDescendingMusicalNotes": "\u1F39D",
    "/beamedeighthnotes": "\u266B",
    "/beamedsixteenthnotes": "\u266C",
    "/beamfunc": "\u2336",
    "/bearFace": "\u1F43B",
    "/beatingHeart": "\u1F493",
    "/because": "\u2235",
    "/becyr": "\u0431",
    "/becyrillic": "\u0431",
    "/bed": "\u1F6CF",
    "/beeh": "\u067B",
    "/beeh.fina": "\uFB53",
    "/beeh.init": "\uFB54",
    "/beeh.isol": "\uFB52",
    "/beeh.medi": "\uFB55",
    "/beerMug": "\u1F37A",
    "/beetasquare": "\u333C",
    "/beh": "\u0628",
    "/beh.fina": "\uFE90",
    "/beh.init": "\uFE91",
    "/beh.init_alefmaksura.fina": "\uFC09",
    "/beh.init_hah.fina": "\uFC06",
    "/beh.init_hah.medi": "\uFC9D",
    "/beh.init_heh.medi": "\uFCA0",
    "/beh.init_jeem.fina": "\uFC05",
    "/beh.init_jeem.medi": "\uFC9C",
    "/beh.init_khah.fina": "\uFC07",
    "/beh.init_khah.medi": "\uFC9E",
    "/beh.init_meem.fina": "\uFC08",
    "/beh.init_meem.medi": "\uFC9F",
    "/beh.init_yeh.fina": "\uFC0A",
    "/beh.isol": "\uFE8F",
    "/beh.medi": "\uFE92",
    "/beh.medi_alefmaksura.fina": "\uFC6E",
    "/beh.medi_hah.medi_yeh.fina": "\uFDC2",
    "/beh.medi_heh.medi": "\uFCE2",
    "/beh.medi_khah.medi_yeh.fina": "\uFD9E",
    "/beh.medi_meem.fina": "\uFC6C",
    "/beh.medi_meem.medi": "\uFCE1",
    "/beh.medi_noon.fina": "\uFC6D",
    "/beh.medi_reh.fina": "\uFC6A",
    "/beh.medi_yeh.fina": "\uFC6F",
    "/beh.medi_zain.fina": "\uFC6B",
    "/behDotBelowThreeDotsAbove": "\u0751",
    "/behInvertedSmallVBelow": "\u0755",
    "/behSmallV": "\u0756",
    "/behThreeDotsHorizontallyBelow": "\u0750",
    "/behThreeDotsUpBelow": "\u0752",
    "/behThreeDotsUpBelowTwoDotsAbove": "\u0753",
    "/behTwoDotsBelowDotAbove": "\u0754",
    "/beharabic": "\u0628",
    "/beheh": "\u0680",
    "/beheh.fina": "\uFB5B",
    "/beheh.init": "\uFB5C",
    "/beheh.isol": "\uFB5A",
    "/beheh.medi": "\uFB5D",
    "/behfinalarabic": "\uFE90",
    "/behinitialarabic": "\uFE91",
    "/behiragana": "\u3079",
    "/behmedialarabic": "\uFE92",
    "/behmeeminitialarabic": "\uFC9F",
    "/behmeemisolatedarabic": "\uFC08",
    "/behnoonfinalarabic": "\uFC6D",
    "/bekatakana": "\u30D9",
    "/bellCancellationStroke": "\u1F515",
    "/bellhopBell": "\u1F6CE",
    "/beltbuckle": "\u2444",
    "/benarmenian": "\u0562",
    "/beng:a": "\u0985",
    "/beng:aa": "\u0986",
    "/beng:aasign": "\u09BE",
    "/beng:abbreviationsign": "\u09FD",
    "/beng:ai": "\u0990",
    "/beng:aisign": "\u09C8",
    "/beng:anji": "\u0980",
    "/beng:anusvara": "\u0982",
    "/beng:au": "\u0994",
    "/beng:aulengthmark": "\u09D7",
    "/beng:ausign": "\u09CC",
    "/beng:avagraha": "\u09BD",
    "/beng:ba": "\u09AC",
    "/beng:bha": "\u09AD",
    "/beng:ca": "\u099A",
    "/beng:candrabindu": "\u0981",
    "/beng:cha": "\u099B",
    "/beng:currencyoneless": "\u09F8",
    "/beng:da": "\u09A6",
    "/beng:dda": "\u09A1",
    "/beng:ddha": "\u09A2",
    "/beng:dha": "\u09A7",
    "/beng:e": "\u098F",
    "/beng:eight": "\u09EE",
    "/beng:esign": "\u09C7",
    "/beng:five": "\u09EB",
    "/beng:four": "\u09EA",
    "/beng:fourcurrencynumerator": "\u09F7",
    "/beng:ga": "\u0997",
    "/beng:gandamark": "\u09FB",
    "/beng:gha": "\u0998",
    "/beng:ha": "\u09B9",
    "/beng:i": "\u0987",
    "/beng:ii": "\u0988",
    "/beng:iisign": "\u09C0",
    "/beng:isign": "\u09BF",
    "/beng:isshar": "\u09FA",
    "/beng:ja": "\u099C",
    "/beng:jha": "\u099D",
    "/beng:ka": "\u0995",
    "/beng:kha": "\u0996",
    "/beng:khandata": "\u09CE",
    "/beng:la": "\u09B2",
    "/beng:llvocal": "\u09E1",
    "/beng:llvocalsign": "\u09E3",
    "/beng:lvocal": "\u098C",
    "/beng:lvocalsign": "\u09E2",
    "/beng:ma": "\u09AE",
    "/beng:na": "\u09A8",
    "/beng:nga": "\u0999",
    "/beng:nine": "\u09EF",
    "/beng:nna": "\u09A3",
    "/beng:nukta": "\u09BC",
    "/beng:nya": "\u099E",
    "/beng:o": "\u0993",
    "/beng:one": "\u09E7",
    "/beng:onecurrencynumerator": "\u09F4",
    "/beng:osign": "\u09CB",
    "/beng:pa": "\u09AA",
    "/beng:pha": "\u09AB",
    "/beng:ra": "\u09B0",
    "/beng:ralowdiagonal": "\u09F1",
    "/beng:ramiddiagonal": "\u09F0",
    "/beng:rha": "\u09DD",
    "/beng:rra": "\u09DC",
    "/beng:rrvocal": "\u09E0",
    "/beng:rrvocalsign": "\u09C4",
    "/beng:rupee": "\u09F3",
    "/beng:rupeemark": "\u09F2",
    "/beng:rvocal": "\u098B",
    "/beng:rvocalsign": "\u09C3",
    "/beng:sa": "\u09B8",
    "/beng:seven": "\u09ED",
    "/beng:sha": "\u09B6",
    "/beng:six": "\u09EC",
    "/beng:sixteencurrencydenominator": "\u09F9",
    "/beng:ssa": "\u09B7",
    "/beng:ta": "\u09A4",
    "/beng:tha": "\u09A5",
    "/beng:three": "\u09E9",
    "/beng:threecurrencynumerator": "\u09F6",
    "/beng:tta": "\u099F",
    "/beng:ttha": "\u09A0",
    "/beng:two": "\u09E8",
    "/beng:twocurrencynumerator": "\u09F5",
    "/beng:u": "\u0989",
    "/beng:usign": "\u09C1",
    "/beng:uu": "\u098A",
    "/beng:uusign": "\u09C2",
    "/beng:vedicanusvara": "\u09FC",
    "/beng:virama": "\u09CD",
    "/beng:visarga": "\u0983",
    "/beng:ya": "\u09AF",
    "/beng:yya": "\u09DF",
    "/beng:zero": "\u09E6",
    "/bentoBox": "\u1F371",
    "/benzenering": "\u232C",
    "/benzeneringcircle": "\u23E3",
    "/bet": "\u05D1",
    "/bet:hb": "\u05D1",
    "/beta": "\u03B2",
    "/betasymbol": "\u03D0",
    "/betasymbolgreek": "\u03D0",
    "/betdagesh": "\uFB31",
    "/betdageshhebrew": "\uFB31",
    "/bethebrew": "\u05D1",
    "/betrafehebrew": "\uFB4C",
    "/between": "\u226C",
    "/betwithdagesh:hb": "\uFB31",
    "/betwithrafe:hb": "\uFB4C",
    "/bflourish": "\uA797",
    "/bhabengali": "\u09AD",
    "/bhadeva": "\u092D",
    "/bhagujarati": "\u0AAD",
    "/bhagurmukhi": "\u0A2D",
    "/bhook": "\u0253",
    "/bicycle": "\u1F6B2",
    "/bicyclist": "\u1F6B4",
    "/bihiragana": "\u3073",
    "/bikatakana": "\u30D3",
    "/bikini": "\u1F459",
    "/bilabialclick": "\u0298",
    "/billiards": "\u1F3B1",
    "/bindigurmukhi": "\u0A02",
    "/biohazard": "\u2623",
    "/bird": "\u1F426",
    "/birthdayCake": "\u1F382",
    "/birusquare": "\u3331",
    "/bishopblack": "\u265D",
    "/bishopwhite": "\u2657",
    "/bitcoin": "\u20BF",
    "/blackDownPointingBackhandIndex": "\u1F5A3",
    "/blackDroplet": "\u1F322",
    "/blackFolder": "\u1F5BF",
    "/blackHardShellFloppyDisk": "\u1F5AA",
    "/blackHeart": "\u1F5A4",
    "/blackLeftPointingBackhandIndex": "\u1F59C",
    "/blackPennant": "\u1F3F2",
    "/blackPushpin": "\u1F588",
    "/blackRightPointingBackhandIndex": "\u1F59D",
    "/blackRosette": "\u1F3F6",
    "/blackSkullAndCrossbones": "\u1F571",
    "/blackSquareButton": "\u1F532",
    "/blackTouchtoneTelephone": "\u1F57F",
    "/blackUpPointingBackhandIndex": "\u1F5A2",
    "/blackcircle": "\u25CF",
    "/blackcircleforrecord": "\u23FA",
    "/blackdiamond": "\u25C6",
    "/blackdownpointingtriangle": "\u25BC",
    "/blackforstopsquare": "\u23F9",
    "/blackleftpointingpointer": "\u25C4",
    "/blackleftpointingtriangle": "\u25C0",
    "/blacklenticularbracketleft": "\u3010",
    "/blacklenticularbracketleftvertical": "\uFE3B",
    "/blacklenticularbracketright": "\u3011",
    "/blacklenticularbracketrightvertical": "\uFE3C",
    "/blacklowerlefttriangle": "\u25E3",
    "/blacklowerrighttriangle": "\u25E2",
    "/blackmediumpointingtriangledown": "\u23F7",
    "/blackmediumpointingtriangleleft": "\u23F4",
    "/blackmediumpointingtriangleright": "\u23F5",
    "/blackmediumpointingtriangleup": "\u23F6",
    "/blackpointingdoubletrianglebarverticalleft": "\u23EE",
    "/blackpointingdoubletrianglebarverticalright": "\u23ED",
    "/blackpointingdoubletriangledown": "\u23EC",
    "/blackpointingdoubletriangleleft": "\u23EA",
    "/blackpointingdoubletriangleright": "\u23E9",
    "/blackpointingdoubletriangleup": "\u23EB",
    "/blackpointingtriangledoublebarverticalright": "\u23EF",
    "/blackrectangle": "\u25AC",
    "/blackrightpointingpointer": "\u25BA",
    "/blackrightpointingtriangle": "\u25B6",
    "/blacksmallsquare": "\u25AA",
    "/blacksmilingface": "\u263B",
    "/blacksquare": "\u25A0",
    "/blackstar": "\u2605",
    "/blackupperlefttriangle": "\u25E4",
    "/blackupperrighttriangle": "\u25E5",
    "/blackuppointingsmalltriangle": "\u25B4",
    "/blackuppointingtriangle": "\u25B2",
    "/blackwardsbulletleft": "\u204C",
    "/blackwardsbulletright": "\u204D",
    "/blank": "\u2423",
    "/blinebelow": "\u1E07",
    "/block": "\u2588",
    "/blossom": "\u1F33C",
    "/blowfish": "\u1F421",
    "/blueBook": "\u1F4D8",
    "/blueHeart": "\u1F499",
    "/bmonospace": "\uFF42",
    "/boar": "\u1F417",
    "/board": "\u2328",
    "/bobaimaithai": "\u0E1A",
    "/bohiragana": "\u307C",
    "/bokatakana": "\u30DC",
    "/bomb": "\u1F4A3",
    "/book": "\u1F56E",
    "/bookmark": "\u1F516",
    "/bookmarkTabs": "\u1F4D1",
    "/books": "\u1F4DA",
    "/bopo:a": "\u311A",
    "/bopo:ai": "\u311E",
    "/bopo:an": "\u3122",
    "/bopo:ang": "\u3124",
    "/bopo:au": "\u3120",
    "/bopo:b": "\u3105",
    "/bopo:c": "\u3118",
    "/bopo:ch": "\u3114",
    "/bopo:d": "\u3109",
    "/bopo:e": "\u311C",
    "/bopo:eh": "\u311D",
    "/bopo:ei": "\u311F",
    "/bopo:en": "\u3123",
    "/bopo:eng": "\u3125",
    "/bopo:er": "\u3126",
    "/bopo:f": "\u3108",
    "/bopo:g": "\u310D",
    "/bopo:gn": "\u312C",
    "/bopo:h": "\u310F",
    "/bopo:i": "\u3127",
    "/bopo:ih": "\u312D",
    "/bopo:iu": "\u3129",
    "/bopo:j": "\u3110",
    "/bopo:k": "\u310E",
    "/bopo:l": "\u310C",
    "/bopo:m": "\u3107",
    "/bopo:n": "\u310B",
    "/bopo:ng": "\u312B",
    "/bopo:o": "\u311B",
    "/bopo:ou": "\u3121",
    "/bopo:owithdotabove": "\u312E",
    "/bopo:p": "\u3106",
    "/bopo:q": "\u3111",
    "/bopo:r": "\u3116",
    "/bopo:s": "\u3119",
    "/bopo:sh": "\u3115",
    "/bopo:t": "\u310A",
    "/bopo:u": "\u3128",
    "/bopo:v": "\u312A",
    "/bopo:x": "\u3112",
    "/bopo:z": "\u3117",
    "/bopo:zh": "\u3113",
    "/borutosquare": "\u333E",
    "/bottlePoppingCork": "\u1F37E",
    "/bouquet": "\u1F490",
    "/bouquetOfFlowers": "\u1F395",
    "/bowAndArrow": "\u1F3F9",
    "/bowlOfHygieia": "\u1F54F",
    "/bowling": "\u1F3B3",
    "/boxlineverticalleft": "\u23B8",
    "/boxlineverticalright": "\u23B9",
    "/boy": "\u1F466",
    "/boys": "\u1F6C9",
    "/bparen": "\u249D",
    "/bparenthesized": "\u249D",
    "/bqfullwidth": "\u33C3",
    "/bqsquare": "\u33C3",
    "/braceex": "\uF8F4",
    "/braceleft": "\u007B",
    "/braceleftbt": "\uF8F3",
    "/braceleftmid": "\uF8F2",
    "/braceleftmonospace": "\uFF5B",
    "/braceleftsmall": "\uFE5B",
    "/bracelefttp": "\uF8F1",
    "/braceleftvertical": "\uFE37",
    "/braceright": "\u007D",
    "/bracerightbt": "\uF8FE",
    "/bracerightmid": "\uF8FD",
    "/bracerightmonospace": "\uFF5D",
    "/bracerightsmall": "\uFE5C",
    "/bracerighttp": "\uF8FC",
    "/bracerightvertical": "\uFE38",
    "/bracketangledblleft": "\u27EA",
    "/bracketangledblright": "\u27EB",
    "/bracketangleleft": "\u27E8",
    "/bracketangleright": "\u27E9",
    "/bracketbottomcurly": "\u23DF",
    "/bracketbottomsquare": "\u23B5",
    "/bracketcornerupleftsquare": "\u23A1",
    "/bracketcorneruprightsquare": "\u23A4",
    "/bracketdottedsubstitutionleft": "\u2E04",
    "/bracketdottedsubstitutionright": "\u2E05",
    "/bracketextensioncurly": "\u23AA",
    "/bracketextensionleftsquare": "\u23A2",
    "/bracketextensionrightsquare": "\u23A5",
    "/brackethalfbottomleft": "\u2E24",
    "/brackethalfbottomright": "\u2E25",
    "/brackethalftopleft": "\u2E22",
    "/brackethalftopright": "\u2E23",
    "/brackethookupleftcurly": "\u23A7",
    "/brackethookuprightcurly": "\u23AB",
    "/bracketleft": "\u005B",
    "/bracketleftbt": "\uF8F0",
    "/bracketleftex": "\uF8EF",
    "/bracketleftmonospace": "\uFF3B",
    "/bracketleftsquarequill": "\u2045",
    "/bracketlefttp": "\uF8EE",
    "/bracketlowercornerleftsquare": "\u23A3",
    "/bracketlowercornerrightsquare": "\u23A6",
    "/bracketlowerhookleftcurly": "\u23A9",
    "/bracketlowerhookrightcurly": "\u23AD",
    "/bracketmiddlepieceleftcurly": "\u23A8",
    "/bracketmiddlepiecerightcurly": "\u23AC",
    "/bracketoverbrackettopbottomsquare": "\u23B6",
    "/bracketparaphraselowleft": "\u2E1C",
    "/bracketparaphraselowright": "\u2E1D",
    "/bracketraisedleft": "\u2E0C",
    "/bracketraisedright": "\u2E0D",
    "/bracketright": "\u005D",
    "/bracketrightbt": "\uF8FB",
    "/bracketrightex": "\uF8FA",
    "/bracketrightmonospace": "\uFF3D",
    "/bracketrightsquarequill": "\u2046",
    "/bracketrighttp": "\uF8F9",
    "/bracketsectionupleftlowerrightcurly": "\u23B0",
    "/bracketsectionuprightlowerleftcurly": "\u23B1",
    "/bracketshellbottom": "\u23E1",
    "/bracketshelltop": "\u23E0",
    "/bracketshellwhiteleft": "\u27EC",
    "/bracketshellwhiteright": "\u27ED",
    "/bracketsubstitutionleft": "\u2E02",
    "/bracketsubstitutionright": "\u2E03",
    "/brackettopcurly": "\u23DE",
    "/brackettopsquare": "\u23B4",
    "/brackettranspositionleft": "\u2E09",
    "/brackettranspositionright": "\u2E0A",
    "/bracketwhitesquareleft": "\u27E6",
    "/bracketwhitesquareright": "\u27E7",
    "/branchbankidentification": "\u2446",
    "/bread": "\u1F35E",
    "/breve": "\u02D8",
    "/brevebelowcmb": "\u032E",
    "/brevecmb": "\u0306",
    "/breveinvertedbelowcmb": "\u032F",
    "/breveinvertedcmb": "\u0311",
    "/breveinverteddoublecmb": "\u0361",
    "/brevemetrical": "\u23D1",
    "/brideVeil": "\u1F470",
    "/bridgeAtNight": "\u1F309",
    "/bridgebelowcmb": "\u032A",
    "/bridgeinvertedbelowcmb": "\u033A",
    "/briefcase": "\u1F4BC",
    "/brll:blank": "\u2800",
    "/brokenHeart": "\u1F494",
    "/brokenbar": "\u00A6",
    "/brokencirclenorthwestarrow": "\u238B",
    "/bstroke": "\u0180",
    "/bsuperior": "\uF6EA",
    "/btopbar": "\u0183",
    "/bug": "\u1F41B",
    "/buhiragana": "\u3076",
    "/buildingConstruction": "\u1F3D7",
    "/bukatakana": "\u30D6",
    "/bullet": "\u2022",
    "/bulletinverse": "\u25D8",
    "/bulletoperator": "\u2219",
    "/bullhorn": "\u1F56B",
    "/bullhornSoundWaves": "\u1F56C",
    "/bullseye": "\u25CE",
    "/burrito": "\u1F32F",
    "/bus": "\u1F68C",
    "/busStop": "\u1F68F",
    "/bussyerusquare": "\u3334",
    "/bustInSilhouette": "\u1F464",
    "/bustsInSilhouette": "\u1F465",
    "/c": "\u0063",
    "/caarmenian": "\u056E",
    "/cabengali": "\u099A",
    "/cactus": "\u1F335",
    "/cacute": "\u0107",
    "/cadauna": "\u2106",
    "/cadeva": "\u091A",
    "/caduceus": "\u2624",
    "/cagujarati": "\u0A9A",
    "/cagurmukhi": "\u0A1A",
    "/cakraconsonant": "\uA9BF",
    "/calendar": "\u1F4C5",
    "/calfullwidth": "\u3388",
    "/callideographicparen": "\u323A",
    "/calsquare": "\u3388",
    "/camera": "\u1F4F7",
    "/cameraFlash": "\u1F4F8",
    "/camping": "\u1F3D5",
    "/camurda": "\uA996",
    "/cancellationX": "\u1F5D9",
    "/cancer": "\u264B",
    "/candle": "\u1F56F",
    "/candrabindubengali": "\u0981",
    "/candrabinducmb": "\u0310",
    "/candrabindudeva": "\u0901",
    "/candrabindugujarati": "\u0A81",
    "/candy": "\u1F36C",
    "/canoe": "\u1F6F6",
    "/capitulum": "\u2E3F",
    "/capricorn": "\u2651",
    "/capslock": "\u21EA",
    "/cardFileBox": "\u1F5C3",
    "/cardIndex": "\u1F4C7",
    "/cardIndexDividers": "\u1F5C2",
    "/careof": "\u2105",
    "/caret": "\u2038",
    "/caretinsertionpoint": "\u2041",
    "/carettildedownfunc": "\u2371",
    "/carettildeupfunc": "\u2372",
    "/caron": "\u02C7",
    "/caronbelowcmb": "\u032C",
    "/caroncmb": "\u030C",
    "/carouselHorse": "\u1F3A0",
    "/carpStreamer": "\u1F38F",
    "/carriagereturn": "\u21B5",
    "/carsliding": "\u26D0",
    "/castle": "\u26EB",
    "/cat": "\u1F408",
    "/catFace": "\u1F431",
    "/catFaceWithTearsOfJoy": "\u1F639",
    "/catFaceWithWrySmile": "\u1F63C",
    "/caution": "\u2621",
    "/cbar": "\uA793",
    "/cbopomofo": "\u3118",
    "/ccaron": "\u010D",
    "/ccedilla": "\u00E7",
    "/ccedillaacute": "\u1E09",
    "/ccfullwidth": "\u33C4",
    "/ccircle": "\u24D2",
    "/ccircumflex": "\u0109",
    "/ccurl": "\u0255",
    "/cdfullwidth": "\u33C5",
    "/cdot": "\u010B",
    "/cdotaccent": "\u010B",
    "/cdotreversed": "\uA73F",
    "/cdsquare": "\u33C5",
    "/cecak": "\uA981",
    "/cecaktelu": "\uA9B3",
    "/cedi": "\u20B5",
    "/cedilla": "\u00B8",
    "/cedillacmb": "\u0327",
    "/ceilingleft": "\u2308",
    "/ceilingright": "\u2309",
    "/celticCross": "\u1F548",
    "/cent": "\u00A2",
    "/centigrade": "\u2103",
    "/centinferior": "\uF6DF",
    "/centmonospace": "\uFFE0",
    "/centoldstyle": "\uF7A2",
    "/centreddotwhitediamond": "\u27D0",
    "/centreideographiccircled": "\u32A5",
    "/centreline": "\u2104",
    "/centrelineverticalsquarewhite": "\u2385",
    "/centsuperior": "\uF6E0",
    "/ceres": "\u26B3",
    "/chaarmenian": "\u0579",
    "/chabengali": "\u099B",
    "/chadeva": "\u091B",
    "/chagujarati": "\u0A9B",
    "/chagurmukhi": "\u0A1B",
    "/chains": "\u26D3",
    "/chair": "\u2441",
    "/chamkocircle": "\u327C",
    "/charactertie": "\u2040",
    "/chartDownwardsTrend": "\u1F4C9",
    "/chartUpwardsTrend": "\u1F4C8",
    "/chartUpwardsTrendAndYenSign": "\u1F4B9",
    "/chbopomofo": "\u3114",
    "/cheabkhasiancyrillic": "\u04BD",
    "/cheabkhcyr": "\u04BD",
    "/cheabkhtailcyr": "\u04BF",
    "/checkbox": "\u2610",
    "/checkboxchecked": "\u2611",
    "/checkboxx": "\u2612",
    "/checkmark": "\u2713",
    "/checyr": "\u0447",
    "/checyrillic": "\u0447",
    "/chedescenderabkhasiancyrillic": "\u04BF",
    "/chedescendercyrillic": "\u04B7",
    "/chedieresiscyr": "\u04F5",
    "/chedieresiscyrillic": "\u04F5",
    "/cheeringMegaphone": "\u1F4E3",
    "/cheharmenian": "\u0573",
    "/chekhakascyr": "\u04CC",
    "/chekhakassiancyrillic": "\u04CC",
    "/chequeredFlag": "\u1F3C1",
    "/cherries": "\u1F352",
    "/cherryBlossom": "\u1F338",
    "/chestnut": "\u1F330",
    "/chetailcyr": "\u04B7",
    "/chevertcyr": "\u04B9",
    "/cheverticalstrokecyrillic": "\u04B9",
    "/chi": "\u03C7",
    "/chicken": "\u1F414",
    "/chieuchacirclekorean": "\u3277",
    "/chieuchaparenkorean": "\u3217",
    "/chieuchcirclekorean": "\u3269",
    "/chieuchkorean": "\u314A",
    "/chieuchparenkorean": "\u3209",
    "/childrenCrossing": "\u1F6B8",
    "/chipmunk": "\u1F43F",
    "/chirho": "\u2627",
    "/chiron": "\u26B7",
    "/chochangthai": "\u0E0A",
    "/chochanthai": "\u0E08",
    "/chochingthai": "\u0E09",
    "/chochoethai": "\u0E0C",
    "/chocolateBar": "\u1F36B",
    "/chook": "\u0188",
    "/christmasTree": "\u1F384",
    "/church": "\u26EA",
    "/cieucacirclekorean": "\u3276",
    "/cieucaparenkorean": "\u3216",
    "/cieuccirclekorean": "\u3268",
    "/cieuckorean": "\u3148",
    "/cieucparenkorean": "\u3208",
    "/cieucuparenkorean": "\u321C",
    "/cinema": "\u1F3A6",
    "/circle": "\u25CB",
    "/circleallbutupperquadrantleftblack": "\u25D5",
    "/circlebackslashfunc": "\u2349",
    "/circleblack": "\u25CF",
    "/circledCrossPommee": "\u1F540",
    "/circledInformationSource": "\u1F6C8",
    "/circledasteriskoperator": "\u229B",
    "/circledbarnotchhorizontal": "\u2389",
    "/circledcrossinglanes": "\u26D2",
    "/circleddash": "\u229D",
    "/circleddivisionslash": "\u2298",
    "/circleddotoperator": "\u2299",
    "/circledequals": "\u229C",
    "/circlediaeresisfunc": "\u2365",
    "/circledminus": "\u2296",
    "/circledot": "\u2299",
    "/circledotrightwhite": "\u2686",
    "/circledotted": "\u25CC",
    "/circledringoperator": "\u229A",
    "/circledtriangledown": "\u238A",
    "/circlehalfleftblack": "\u25D0",
    "/circlehalfrightblack": "\u25D1",
    "/circleinversewhite": "\u25D9",
    "/circlejotfunc": "\u233E",
    "/circlelowerhalfblack": "\u25D2",
    "/circlelowerquadrantleftwhite": "\u25F5",
    "/circlelowerquadrantrightwhite": "\u25F6",
    "/circlemultiply": "\u2297",
    "/circleot": "\u2299",
    "/circleplus": "\u2295",
    "/circlepostalmark": "\u3036",
    "/circlestarfunc": "\u235F",
    "/circlestilefunc": "\u233D",
    "/circlestroketwodotsaboveheavy": "\u26E3",
    "/circletwodotsblackwhite": "\u2689",
    "/circletwodotswhite": "\u2687",
    "/circleunderlinefunc": "\u235C",
    "/circleupperhalfblack": "\u25D3",
    "/circleupperquadrantleftwhite": "\u25F4",
    "/circleupperquadrantrightblack": "\u25D4",
    "/circleupperquadrantrightwhite": "\u25F7",
    "/circleverticalfill": "\u25CD",
    "/circlewhite": "\u25CB",
    "/circlewhitedotrightblack": "\u2688",
    "/circlewithlefthalfblack": "\u25D0",
    "/circlewithrighthalfblack": "\u25D1",
    "/circumflex": "\u02C6",
    "/circumflexbelowcmb": "\u032D",
    "/circumflexcmb": "\u0302",
    "/circumflexlow": "\uA788",
    "/circusTent": "\u1F3AA",
    "/cityscape": "\u1F3D9",
    "/cityscapeAtDusk": "\u1F306",
    "/cjk:ideographiccomma": "\u3001",
    "/cjk:tortoiseshellbracketleft": "\u3014",
    "/cjk:tortoiseshellbracketright": "\u3015",
    "/clamshellMobilePhone": "\u1F581",
    "/clapperBoard": "\u1F3AC",
    "/clappingHandsSign": "\u1F44F",
    "/classicalBuilding": "\u1F3DB",
    "/clear": "\u2327",
    "/clearscreen": "\u239A",
    "/clickalveolar": "\u01C2",
    "/clickbilabial": "\u0298",
    "/clickdental": "\u01C0",
    "/clicklateral": "\u01C1",
    "/clickretroflex": "\u01C3",
    "/clinkingBeerMugs": "\u1F37B",
    "/clipboard": "\u1F4CB",
    "/clockFaceEight-thirty": "\u1F563",
    "/clockFaceEightOclock": "\u1F557",
    "/clockFaceEleven-thirty": "\u1F566",
    "/clockFaceElevenOclock": "\u1F55A",
    "/clockFaceFive-thirty": "\u1F560",
    "/clockFaceFiveOclock": "\u1F554",
    "/clockFaceFour-thirty": "\u1F55F",
    "/clockFaceFourOclock": "\u1F553",
    "/clockFaceNine-thirty": "\u1F564",
    "/clockFaceNineOclock": "\u1F558",
    "/clockFaceOne-thirty": "\u1F55C",
    "/clockFaceOneOclock": "\u1F550",
    "/clockFaceSeven-thirty": "\u1F562",
    "/clockFaceSevenOclock": "\u1F556",
    "/clockFaceSix-thirty": "\u1F561",
    "/clockFaceSixOclock": "\u1F555",
    "/clockFaceTen-thirty": "\u1F565",
    "/clockFaceTenOclock": "\u1F559",
    "/clockFaceThree-thirty": "\u1F55E",
    "/clockFaceThreeOclock": "\u1F552",
    "/clockFaceTwelve-thirty": "\u1F567",
    "/clockFaceTwelveOclock": "\u1F55B",
    "/clockFaceTwo-thirty": "\u1F55D",
    "/clockFaceTwoOclock": "\u1F551",
    "/clockwiseDownwardsAndUpwardsOpenCircleArrows": "\u1F503",
    "/clockwiseRightAndLeftSemicircleArrows": "\u1F5D8",
    "/clockwiseRightwardsAndLeftwardsOpenCircleArrows": "\u1F501",
    "/clockwiseRightwardsAndLeftwardsOpenCircleArrowsCircledOneOverlay": "\u1F502",
    "/closedBook": "\u1F4D5",
    "/closedLockKey": "\u1F510",
    "/closedMailboxLoweredFlag": "\u1F4EA",
    "/closedMailboxRaisedFlag": "\u1F4EB",
    "/closedUmbrella": "\u1F302",
    "/closedentryleft": "\u26DC",
    "/closeup": "\u2050",
    "/cloud": "\u2601",
    "/cloudLightning": "\u1F329",
    "/cloudRain": "\u1F327",
    "/cloudSnow": "\u1F328",
    "/cloudTornado": "\u1F32A",
    "/clsquare": "\u1F191",
    "/club": "\u2663",
    "/clubblack": "\u2663",
    "/clubsuitblack": "\u2663",
    "/clubsuitwhite": "\u2667",
    "/clubwhite": "\u2667",
    "/cm2fullwidth": "\u33A0",
    "/cm3fullwidth": "\u33A4",
    "/cmb:a": "\u0363",
    "/cmb:aaboveflat": "\u1DD3",
    "/cmb:aboveogonek": "\u1DCE",
    "/cmb:acute": "\u0301",
    "/cmb:acutebelow": "\u0317",
    "/cmb:acutegraveacute": "\u1DC9",
    "/cmb:acutemacron": "\u1DC7",
    "/cmb:acutetone": "\u0341",
    "/cmb:adieresis": "\u1DF2",
    "/cmb:ae": "\u1DD4",
    "/cmb:almostequalabove": "\u034C",
    "/cmb:almostequaltobelow": "\u1DFD",
    "/cmb:alpha": "\u1DE7",
    "/cmb:ao": "\u1DD5",
    "/cmb:arrowheadleftbelow": "\u0354",
    "/cmb:arrowheadrightabove": "\u0350",
    "/cmb:arrowheadrightarrowheadupbelow": "\u0356",
    "/cmb:arrowheadrightbelow": "\u0355",
    "/cmb:arrowleftrightbelow": "\u034D",
    "/cmb:arrowrightdoublebelow": "\u0362",
    "/cmb:arrowupbelow": "\u034E",
    "/cmb:asteriskbelow": "\u0359",
    "/cmb:av": "\u1DD6",
    "/cmb:b": "\u1DE8",
    "/cmb:belowbreve": "\u032E",
    "/cmb:beta": "\u1DE9",
    "/cmb:breve": "\u0306",
    "/cmb:brevemacron": "\u1DCB",
    "/cmb:bridgeabove": "\u0346",
    "/cmb:bridgebelow": "\u032A",
    "/cmb:c": "\u0368",
    "/cmb:candrabindu": "\u0310",
    "/cmb:caron": "\u030C",
    "/cmb:caronbelow": "\u032C",
    "/cmb:ccedilla": "\u1DD7",
    "/cmb:cedilla": "\u0327",
    "/cmb:circumflex": "\u0302",
    "/cmb:circumflexbelow": "\u032D",
    "/cmb:commaaccentbelow": "\u0326",
    "/cmb:commaturnedabove": "\u0312",
    "/cmb:d": "\u0369",
    "/cmb:dblarchinvertedbelow": "\u032B",
    "/cmb:dbloverline": "\u033F",
    "/cmb:dblverticallineabove": "\u030E",
    "/cmb:dblverticallinebelow": "\u0348",
    "/cmb:deletionmark": "\u1DFB",
    "/cmb:dialytikatonos": "\u0344",
    "/cmb:dieresis": "\u0308",
    "/cmb:dieresisbelow": "\u0324",
    "/cmb:dotaboveleft": "\u1DF8",
    "/cmb:dotaccent": "\u0307",
    "/cmb:dotbelowcomb": "\u0323",
    "/cmb:dotrightabove": "\u0358",
    "/cmb:dottedacute": "\u1DC1",
    "/cmb:dottedgrave": "\u1DC0",
    "/cmb:doubleabovecircumflex": "\u1DCD",
    "/cmb:doublebelowbreve": "\u035C",
    "/cmb:doublebreve": "\u035D",
    "/cmb:doubleinvertedbelowbreve": "\u1DFC",
    "/cmb:doubleringbelow": "\u035A",
    "/cmb:downtackbelow": "\u031E",
    "/cmb:e": "\u0364",
    "/cmb:equalbelow": "\u0347",
    "/cmb:esh": "\u1DEF",
    "/cmb:eth": "\u1DD9",
    "/cmb:f": "\u1DEB",
    "/cmb:fermata": "\u0352",
    "/cmb:g": "\u1DDA",
    "/cmb:graphemejoiner": "\u034F",
    "/cmb:grave": "\u0300",
    "/cmb:graveacutegrave": "\u1DC8",
    "/cmb:gravebelow": "\u0316",
    "/cmb:gravedouble": "\u030F",
    "/cmb:gravemacron": "\u1DC5",
    "/cmb:gravetone": "\u0340",
    "/cmb:gsmall": "\u1DDB",
    "/cmb:h": "\u036A",
    "/cmb:halfleftringabove": "\u0351",
    "/cmb:halfleftringbelow": "\u031C",
    "/cmb:halfrightringabove": "\u0357",
    "/cmb:halfrightringbelow": "\u0339",
    "/cmb:homotheticabove": "\u034B",
    "/cmb:hookabove": "\u0309",
    "/cmb:horn": "\u031B",
    "/cmb:hungarumlaut": "\u030B",
    "/cmb:i": "\u0365",
    "/cmb:insulard": "\u1DD8",
    "/cmb:invertedbelowbreve": "\u032F",
    "/cmb:invertedbreve": "\u0311",
    "/cmb:invertedbridgebelow": "\u033A",
    "/cmb:inverteddoublebreve": "\u0361",
    "/cmb:iotasub": "\u0345",
    "/cmb:isbelow": "\u1DD0",
    "/cmb:k": "\u1DDC",
    "/cmb:kavykaaboveleft": "\u1DF7",
    "/cmb:kavykaaboveright": "\u1DF6",
    "/cmb:koronis": "\u0343",
    "/cmb:l": "\u1DDD",
    "/cmb:leftangleabove": "\u031A",
    "/cmb:leftanglebelow": "\u0349",
    "/cmb:leftarrowheadabove": "\u1DFE",
    "/cmb:lefttackbelow": "\u0318",
    "/cmb:lineverticalabove": "\u030D",
    "/cmb:lineverticalbelow": "\u0329",
    "/cmb:longs": "\u1DE5",
    "/cmb:lowline": "\u0332",
    "/cmb:lowlinedouble": "\u0333",
    "/cmb:lsmall": "\u1DDE",
    "/cmb:lwithdoublemiddletilde": "\u1DEC",
    "/cmb:m": "\u036B",
    "/cmb:macron": "\u0304",
    "/cmb:macronacute": "\u1DC4",
    "/cmb:macronbelow": "\u0331",
    "/cmb:macronbreve": "\u1DCC",
    "/cmb:macrondouble": "\u035E",
    "/cmb:macrondoublebelow": "\u035F",
    "/cmb:macrongrave": "\u1DC6",
    "/cmb:minusbelow": "\u0320",
    "/cmb:msmall": "\u1DDF",
    "/cmb:n": "\u1DE0",
    "/cmb:nottildeabove": "\u034A",
    "/cmb:nsmall": "\u1DE1",
    "/cmb:o": "\u0366",
    "/cmb:odieresis": "\u1DF3",
    "/cmb:ogonek": "\u0328",
    "/cmb:overlaystrokelong": "\u0336",
    "/cmb:overlaystrokeshort": "\u0335",
    "/cmb:overline": "\u0305",
    "/cmb:owithlightcentralizationstroke": "\u1DED",
    "/cmb:p": "\u1DEE",
    "/cmb:palatalizedhookbelow": "\u0321",
    "/cmb:perispomeni": "\u0342",
    "/cmb:plusbelow": "\u031F",
    "/cmb:r": "\u036C",
    "/cmb:rbelow": "\u1DCA",
    "/cmb:retroflexhookbelow": "\u0322",
    "/cmb:reversedcommaabove": "\u0314",
    "/cmb:rightarrowheadanddownarrowheadbelow": "\u1DFF",
    "/cmb:righttackbelow": "\u0319",
    "/cmb:ringabove": "\u030A",
    "/cmb:ringbelow": "\u0325",
    "/cmb:rrotunda": "\u1DE3",
    "/cmb:rsmall": "\u1DE2",
    "/cmb:s": "\u1DE4",
    "/cmb:schwa": "\u1DEA",
    "/cmb:seagullbelow": "\u033C",
    "/cmb:snakebelow": "\u1DC2",
    "/cmb:soliduslongoverlay": "\u0338",
    "/cmb:solidusshortoverlay": "\u0337",
    "/cmb:squarebelow": "\u033B",
    "/cmb:suspensionmark": "\u1DC3",
    "/cmb:t": "\u036D",
    "/cmb:tilde": "\u0303",
    "/cmb:tildebelow": "\u0330",
    "/cmb:tildedouble": "\u0360",
    "/cmb:tildeoverlay": "\u0334",
    "/cmb:tildevertical": "\u033E",
    "/cmb:turnedabove": "\u0313",
    "/cmb:turnedcommaabove": "\u0315",
    "/cmb:u": "\u0367",
    "/cmb:udieresis": "\u1DF4",
    "/cmb:uptackabove": "\u1DF5",
    "/cmb:uptackbelow": "\u031D",
    "/cmb:urabove": "\u1DD1",
    "/cmb:usabove": "\u1DD2",
    "/cmb:uwithlightcentralizationstroke": "\u1DF0",
    "/cmb:v": "\u036E",
    "/cmb:w": "\u1DF1",
    "/cmb:wideinvertedbridgebelow": "\u1DF9",
    "/cmb:x": "\u036F",
    "/cmb:xabove": "\u033D",
    "/cmb:xbelow": "\u0353",
    "/cmb:z": "\u1DE6",
    "/cmb:zigzagabove": "\u035B",
    "/cmb:zigzagbelow": "\u1DCF",
    "/cmcubedsquare": "\u33A4",
    "/cmfullwidth": "\u339D",
    "/cmonospace": "\uFF43",
    "/cmsquaredsquare": "\u33A0",
    "/cntr:acknowledge": "\u2406",
    "/cntr:backspace": "\u2408",
    "/cntr:bell": "\u2407",
    "/cntr:blank": "\u2422",
    "/cntr:cancel": "\u2418",
    "/cntr:carriagereturn": "\u240D",
    "/cntr:datalinkescape": "\u2410",
    "/cntr:delete": "\u2421",
    "/cntr:deleteformtwo": "\u2425",
    "/cntr:devicecontrolfour": "\u2414",
    "/cntr:devicecontrolone": "\u2411",
    "/cntr:devicecontrolthree": "\u2413",
    "/cntr:devicecontroltwo": "\u2412",
    "/cntr:endofmedium": "\u2419",
    "/cntr:endoftext": "\u2403",
    "/cntr:endoftransmission": "\u2404",
    "/cntr:endoftransmissionblock": "\u2417",
    "/cntr:enquiry": "\u2405",
    "/cntr:escape": "\u241B",
    "/cntr:fileseparator": "\u241C",
    "/cntr:formfeed": "\u240C",
    "/cntr:groupseparator": "\u241D",
    "/cntr:horizontaltab": "\u2409",
    "/cntr:linefeed": "\u240A",
    "/cntr:negativeacknowledge": "\u2415",
    "/cntr:newline": "\u2424",
    "/cntr:null": "\u2400",
    "/cntr:openbox": "\u2423",
    "/cntr:recordseparator": "\u241E",
    "/cntr:shiftin": "\u240F",
    "/cntr:shiftout": "\u240E",
    "/cntr:space": "\u2420",
    "/cntr:startofheading": "\u2401",
    "/cntr:startoftext": "\u2402",
    "/cntr:substitute": "\u241A",
    "/cntr:substituteformtwo": "\u2426",
    "/cntr:synchronousidle": "\u2416",
    "/cntr:unitseparator": "\u241F",
    "/cntr:verticaltab": "\u240B",
    "/coarmenian": "\u0581",
    "/cocktailGlass": "\u1F378",
    "/coffin": "\u26B0",
    "/cofullwidth": "\u33C7",
    "/collision": "\u1F4A5",
    "/colon": "\u003A",
    "/colonequals": "\u2254",
    "/colonmod": "\uA789",
    "/colonmonetary": "\u20A1",
    "/colonmonospace": "\uFF1A",
    "/colonraisedmod": "\u02F8",
    "/colonsign": "\u20A1",
    "/colonsmall": "\uFE55",
    "/colontriangularhalfmod": "\u02D1",
    "/colontriangularmod": "\u02D0",
    "/comet": "\u2604",
    "/comma": "\u002C",
    "/commaabovecmb": "\u0313",
    "/commaaboverightcmb": "\u0315",
    "/commaaccent": "\uF6C3",
    "/commaarabic": "\u060C",
    "/commaarmenian": "\u055D",
    "/commabarfunc": "\u236A",
    "/commainferior": "\uF6E1",
    "/commamonospace": "\uFF0C",
    "/commaraised": "\u2E34",
    "/commareversed": "\u2E41",
    "/commareversedabovecmb": "\u0314",
    "/commareversedmod": "\u02BD",
    "/commasmall": "\uFE50",
    "/commasuperior": "\uF6E2",
    "/commaturnedabovecmb": "\u0312",
    "/commaturnedmod": "\u02BB",
    "/commercialat": "\uFE6B",
    "/commercialminussign": "\u2052",
    "/compass": "\u263C",
    "/complement": "\u2201",
    "/composition": "\u2384",
    "/compression": "\u1F5DC",
    "/con": "\uA76F",
    "/confettiBall": "\u1F38A",
    "/confoundedFace": "\u1F616",
    "/confusedFace": "\u1F615",
    "/congratulationideographiccircled": "\u3297",
    "/congratulationideographicparen": "\u3237",
    "/congruent": "\u2245",
    "/conicaltaper": "\u2332",
    "/conjunction": "\u260C",
    "/consquareupblack": "\u26FE",
    "/constructionSign": "\u1F6A7",
    "/constructionWorker": "\u1F477",
    "/containsasmembersmall": "\u220D",
    "/containsasnormalsubgroorequalup": "\u22B5",
    "/containsasnormalsubgroup": "\u22B3",
    "/containslonghorizontalstroke": "\u22FA",
    "/containsoverbar": "\u22FD",
    "/containsoverbarsmall": "\u22FE",
    "/containssmallverticalbarhorizontalstroke": "\u22FC",
    "/containsverticalbarhorizontalstroke": "\u22FB",
    "/continuousunderline": "\u2381",
    "/contourintegral": "\u222E",
    "/control": "\u2303",
    "/controlACK": "\u0006",
    "/controlBEL": "\u0007",
    "/controlBS": "\u0008",
    "/controlCAN": "\u0018",
    "/controlCR": "\u000D",
    "/controlDC1": "\u0011",
    "/controlDC2": "\u0012",
    "/controlDC3": "\u0013",
    "/controlDC4": "\u0014",
    "/controlDEL": "\u007F",
    "/controlDLE": "\u0010",
    "/controlEM": "\u0019",
    "/controlENQ": "\u0005",
    "/controlEOT": "\u0004",
    "/controlESC": "\u001B",
    "/controlETB": "\u0017",
    "/controlETX": "\u0003",
    "/controlFF": "\u000C",
    "/controlFS": "\u001C",
    "/controlGS": "\u001D",
    "/controlHT": "\u0009",
    "/controlKnobs": "\u1F39B",
    "/controlLF": "\u000A",
    "/controlNAK": "\u0015",
    "/controlRS": "\u001E",
    "/controlSI": "\u000F",
    "/controlSO": "\u000E",
    "/controlSOT": "\u0002",
    "/controlSTX": "\u0001",
    "/controlSUB": "\u001A",
    "/controlSYN": "\u0016",
    "/controlUS": "\u001F",
    "/controlVT": "\u000B",
    "/convavediamondwhite": "\u27E1",
    "/convenienceStore": "\u1F3EA",
    "/cookedRice": "\u1F35A",
    "/cookie": "\u1F36A",
    "/cooking": "\u1F373",
    "/coolsquare": "\u1F192",
    "/coproductarray": "\u2210",
    "/copyideographiccircled": "\u32A2",
    "/copyright": "\u00A9",
    "/copyrightsans": "\uF8E9",
    "/copyrightserif": "\uF6D9",
    "/cornerbottomleft": "\u231E",
    "/cornerbottomright": "\u231F",
    "/cornerbracketleft": "\u300C",
    "/cornerbracketlefthalfwidth": "\uFF62",
    "/cornerbracketleftvertical": "\uFE41",
    "/cornerbracketright": "\u300D",
    "/cornerbracketrighthalfwidth": "\uFF63",
    "/cornerbracketrightvertical": "\uFE42",
    "/cornerdotupleft": "\u27D4",
    "/cornertopleft": "\u231C",
    "/cornertopright": "\u231D",
    "/coroniseditorial": "\u2E0E",
    "/corporationsquare": "\u337F",
    "/correctideographiccircled": "\u32A3",
    "/corresponds": "\u2258",
    "/cosquare": "\u33C7",
    "/couchAndLamp": "\u1F6CB",
    "/counterbore": "\u2334",
    "/countersink": "\u2335",
    "/coupleHeart": "\u1F491",
    "/coverkgfullwidth": "\u33C6",
    "/coverkgsquare": "\u33C6",
    "/cow": "\u1F404",
    "/cowFace": "\u1F42E",
    "/cpalatalhook": "\uA794",
    "/cparen": "\u249E",
    "/cparenthesized": "\u249E",
    "/creditCard": "\u1F4B3",
    "/crescentMoon": "\u1F319",
    "/creversed": "\u2184",
    "/cricketBatAndBall": "\u1F3CF",
    "/crocodile": "\u1F40A",
    "/cropbottomleft": "\u230D",
    "/cropbottomright": "\u230C",
    "/croptopleft": "\u230F",
    "/croptopright": "\u230E",
    "/crossPommee": "\u1F542",
    "/crossPommeeHalf-circleBelow": "\u1F541",
    "/crossedFlags": "\u1F38C",
    "/crossedswords": "\u2694",
    "/crossinglanes": "\u26CC",
    "/crossmod": "\u02DF",
    "/crossofjerusalem": "\u2629",
    "/crossoflorraine": "\u2628",
    "/crossonshieldblack": "\u26E8",
    "/crown": "\u1F451",
    "/crrn:rupee": "\u20A8",
    "/cruzeiro": "\u20A2",
    "/cryingCatFace": "\u1F63F",
    "/cryingFace": "\u1F622",
    "/crystalBall": "\u1F52E",
    "/cstretched": "\u0297",
    "/cstroke": "\u023C",
    "/cuatrillo": "\uA72D",
    "/cuatrillocomma": "\uA72F",
    "/curlyand": "\u22CF",
    "/curlylogicaland": "\u22CF",
    "/curlylogicalor": "\u22CE",
    "/curlyor": "\u22CE",
    "/currency": "\u00A4",
    "/currencyExchange": "\u1F4B1",
    "/curryAndRice": "\u1F35B",
    "/custard": "\u1F36E",
    "/customeraccountnumber": "\u2449",
    "/customs": "\u1F6C3",
    "/cyclone": "\u1F300",
    "/cylindricity": "\u232D",
    "/cyrBreve": "\uF6D1",
    "/cyrFlex": "\uF6D2",
    "/cyrbreve": "\uF6D4",
    "/cyrflex": "\uF6D5",
    "/d": "\u0064",
    "/daarmenian": "\u0564",
    "/daasusquare": "\u3324",
    "/dabengali": "\u09A6",
    "/dad": "\u0636",
    "/dad.fina": "\uFEBE",
    "/dad.init": "\uFEBF",
    "/dad.init_alefmaksura.fina": "\uFD07",
    "/dad.init_hah.fina": "\uFC23",
    "/dad.init_hah.medi": "\uFCB5",
    "/dad.init_jeem.fina": "\uFC22",
    "/dad.init_jeem.medi": "\uFCB4",
    "/dad.init_khah.fina": "\uFC24",
    "/dad.init_khah.medi": "\uFCB6",
    "/dad.init_khah.medi_meem.medi": "\uFD70",
    "/dad.init_meem.fina": "\uFC25",
    "/dad.init_meem.medi": "\uFCB7",
    "/dad.init_reh.fina": "\uFD10",
    "/dad.init_yeh.fina": "\uFD08",
    "/dad.isol": "\uFEBD",
    "/dad.medi": "\uFEC0",
    "/dad.medi_alefmaksura.fina": "\uFD23",
    "/dad.medi_hah.medi_alefmaksura.fina": "\uFD6E",
    "/dad.medi_hah.medi_yeh.fina": "\uFDAB",
    "/dad.medi_khah.medi_meem.fina": "\uFD6F",
    "/dad.medi_reh.fina": "\uFD2C",
    "/dad.medi_yeh.fina": "\uFD24",
    "/dadarabic": "\u0636",
    "/daddotbelow": "\u06FB",
    "/dadeva": "\u0926",
    "/dadfinalarabic": "\uFEBE",
    "/dadinitialarabic": "\uFEBF",
    "/dadmedialarabic": "\uFEC0",
    "/dafullwidth": "\u3372",
    "/dagesh": "\u05BC",
    "/dagesh:hb": "\u05BC",
    "/dageshhebrew": "\u05BC",
    "/dagger": "\u2020",
    "/daggerKnife": "\u1F5E1",
    "/daggerdbl": "\u2021",
    "/daggerwithguardleft": "\u2E36",
    "/daggerwithguardright": "\u2E37",
    "/dagujarati": "\u0AA6",
    "/dagurmukhi": "\u0A26",
    "/dahal": "\u068C",
    "/dahal.fina": "\uFB85",
    "/dahal.isol": "\uFB84",
    "/dahiragana": "\u3060",
    "/dakatakana": "\u30C0",
    "/dal": "\u062F",
    "/dal.fina": "\uFEAA",
    "/dal.isol": "\uFEA9",
    "/dalInvertedSmallVBelow": "\u075A",
    "/dalTwoDotsVerticallyBelowSmallTah": "\u0759",
    "/dalarabic": "\u062F",
    "/daldotbelow": "\u068A",
    "/daldotbelowtahsmall": "\u068B",
    "/daldownthreedotsabove": "\u068F",
    "/dalet": "\u05D3",
    "/dalet:hb": "\u05D3",
    "/daletdagesh": "\uFB33",
    "/daletdageshhebrew": "\uFB33",
    "/dalethatafpatah": "\u05D3",
    "/dalethatafpatahhebrew": "\u05D3",
    "/dalethatafsegol": "\u05D3",
    "/dalethatafsegolhebrew": "\u05D3",
    "/dalethebrew": "\u05D3",
    "/dalethiriq": "\u05D3",
    "/dalethiriqhebrew": "\u05D3",
    "/daletholam": "\u05D3",
    "/daletholamhebrew": "\u05D3",
    "/daletpatah": "\u05D3",
    "/daletpatahhebrew": "\u05D3",
    "/daletqamats": "\u05D3",
    "/daletqamatshebrew": "\u05D3",
    "/daletqubuts": "\u05D3",
    "/daletqubutshebrew": "\u05D3",
    "/daletsegol": "\u05D3",
    "/daletsegolhebrew": "\u05D3",
    "/daletsheva": "\u05D3",
    "/daletshevahebrew": "\u05D3",
    "/dalettsere": "\u05D3",
    "/dalettserehebrew": "\u05D3",
    "/daletwide:hb": "\uFB22",
    "/daletwithdagesh:hb": "\uFB33",
    "/dalfinalarabic": "\uFEAA",
    "/dalfourdotsabove": "\u0690",
    "/dalinvertedV": "\u06EE",
    "/dalring": "\u0689",
    "/damahaprana": "\uA9A3",
    "/damma": "\u064F",
    "/dammaIsol": "\uFE78",
    "/dammaMedi": "\uFE79",
    "/dammaarabic": "\u064F",
    "/dammalowarabic": "\u064F",
    "/dammareversed": "\u065D",
    "/dammasmall": "\u0619",
    "/dammatan": "\u064C",
    "/dammatanIsol": "\uFE72",
    "/dammatanaltonearabic": "\u064C",
    "/dammatanarabic": "\u064C",
    "/dancer": "\u1F483",
    "/danda": "\u0964",
    "/dango": "\u1F361",
    "/darga:hb": "\u05A7",
    "/dargahebrew": "\u05A7",
    "/dargalefthebrew": "\u05A7",
    "/darkShade": "\u2593",
    "/darkSunglasses": "\u1F576",
    "/dashwithupturnleft": "\u2E43",
    "/dasiacmbcyr": "\u0485",
    "/dasiapneumatacyrilliccmb": "\u0485",
    "/dateseparator": "\u060D",
    "/dayeighteentelegraph": "\u33F1",
    "/dayeighttelegraph": "\u33E7",
    "/dayeleventelegraph": "\u33EA",
    "/dayfifteentelegraph": "\u33EE",
    "/dayfivetelegraph": "\u33E4",
    "/dayfourteentelegraph": "\u33ED",
    "/dayfourtelegraph": "\u33E3",
    "/daynineteentelegraph": "\u33F2",
    "/dayninetelegraph": "\u33E8",
    "/dayonetelegraph": "\u33E0",
    "/dayseventeentelegraph": "\u33F0",
    "/dayseventelegraph": "\u33E6",
    "/daysixteentelegraph": "\u33EF",
    "/daysixtelegraph": "\u33E5",
    "/daytentelegraph": "\u33E9",
    "/daythirteentelegraph": "\u33EC",
    "/daythirtyonetelegraph": "\u33FE",
    "/daythirtytelegraph": "\u33FD",
    "/daythreetelegraph": "\u33E2",
    "/daytwelvetelegraph": "\u33EB",
    "/daytwentyeighttelegraph": "\u33FB",
    "/daytwentyfivetelegraph": "\u33F8",
    "/daytwentyfourtelegraph": "\u33F7",
    "/daytwentyninetelegraph": "\u33FC",
    "/daytwentyonetelegraph": "\u33F4",
    "/daytwentyseventelegraph": "\u33FA",
    "/daytwentysixtelegraph": "\u33F9",
    "/daytwentytelegraph": "\u33F3",
    "/daytwentythreetelegraph": "\u33F6",
    "/daytwentytwotelegraph": "\u33F5",
    "/daytwotelegraph": "\u33E1",
    "/dbdigraph": "\u0238",
    "/dbfullwidth": "\u33C8",
    "/dblGrave": "\uF6D3",
    "/dblanglebracketleft": "\u300A",
    "/dblanglebracketleftvertical": "\uFE3D",
    "/dblanglebracketright": "\u300B",
    "/dblanglebracketrightvertical": "\uFE3E",
    "/dblarchinvertedbelowcmb": "\u032B",
    "/dblarrowNE": "\u21D7",
    "/dblarrowNW": "\u21D6",
    "/dblarrowSE": "\u21D8",
    "/dblarrowSW": "\u21D9",
    "/dblarrowdown": "\u21D3",
    "/dblarrowleft": "\u21D4",
    "/dblarrowleftright": "\u21D4",
    "/dblarrowleftrightstroke": "\u21CE",
    "/dblarrowleftstroke": "\u21CD",
    "/dblarrowright": "\u21D2",
    "/dblarrowrightstroke": "\u21CF",
    "/dblarrowup": "\u21D1",
    "/dblarrowupdown": "\u21D5",
    "/dbldanda": "\u0965",
    "/dbldnhorz": "\u2566",
    "/dbldnleft": "\u2557",
    "/dbldnright": "\u2554",
    "/dblgrave": "\uF6D6",
    "/dblgravecmb": "\u030F",
    "/dblhorz": "\u2550",
    "/dblintegral": "\u222C",
    "/dbllowline": "\u2017",
    "/dbllowlinecmb": "\u0333",
    "/dbloverlinecmb": "\u033F",
    "/dblprimemod": "\u02BA",
    "/dblstrokearrowdown": "\u21DF",
    "/dblstrokearrowup": "\u21DE",
    "/dbluphorz": "\u2569",
    "/dblupleft": "\u255D",
    "/dblupright": "\u255A",
    "/dblvert": "\u2551",
    "/dblverthorz": "\u256C",
    "/dblverticalbar": "\u2016",
    "/dblverticallineabovecmb": "\u030E",
    "/dblvertleft": "\u2563",
    "/dblvertright": "\u2560",
    "/dbopomofo": "\u3109",
    "/dbsquare": "\u33C8",
    "/dcaron": "\u010F",
    "/dcedilla": "\u1E11",
    "/dchecyr": "\u052D",
    "/dcircle": "\u24D3",
    "/dcircumflexbelow": "\u1E13",
    "/dcroat": "\u0111",
    "/dcurl": "\u0221",
    "/ddabengali": "\u09A1",
    "/ddadeva": "\u0921",
    "/ddagujarati": "\u0AA1",
    "/ddagurmukhi": "\u0A21",
    "/ddahal": "\u068D",
    "/ddahal.fina": "\uFB83",
    "/ddahal.isol": "\uFB82",
    "/ddal": "\u0688",
    "/ddal.fina": "\uFB89",
    "/ddal.isol": "\uFB88",
    "/ddalarabic": "\u0688",
    "/ddalfinalarabic": "\uFB89",
    "/ddamahaprana": "\uA99E",
    "/ddblstruckitalic": "\u2146",
    "/dddhadeva": "\u095C",
    "/ddhabengali": "\u09A2",
    "/ddhadeva": "\u0922",
    "/ddhagujarati": "\u0AA2",
    "/ddhagurmukhi": "\u0A22",
    "/ddot": "\u1E0B",
    "/ddotaccent": "\u1E0B",
    "/ddotbelow": "\u1E0D",
    "/decembertelegraph": "\u32CB",
    "/deciduousTree": "\u1F333",
    "/decimalexponent": "\u23E8",
    "/decimalseparatorarabic": "\u066B",
    "/decimalseparatorpersian": "\u066B",
    "/decreaseFontSize": "\u1F5DB",
    "/decyr": "\u0434",
    "/decyrillic": "\u0434",
    "/degree": "\u00B0",
    "/degreecelsius": "\u2103",
    "/degreefahrenheit": "\u2109",
    "/dehi:hb": "\u05AD",
    "/dehihebrew": "\u05AD",
    "/dehiragana": "\u3067",
    "/deicoptic": "\u03EF",
    "/dekatakana": "\u30C7",
    "/dekomicyr": "\u0501",
    "/deldiaeresisfunc": "\u2362",
    "/deleteleft": "\u232B",
    "/deleteright": "\u2326",
    "/deliveryTruck": "\u1F69A",
    "/delstilefunc": "\u2352",
    "/delta": "\u03B4",
    "/deltaequal": "\u225C",
    "/deltastilefunc": "\u234B",
    "/deltaturned": "\u018D",
    "/deltaunderlinefunc": "\u2359",
    "/deltildefunc": "\u236B",
    "/denominatorminusonenumeratorbengali": "\u09F8",
    "/dentistrybottomverticalleft": "\u23CC",
    "/dentistrybottomverticalright": "\u23BF",
    "/dentistrycircledownhorizontal": "\u23C1",
    "/dentistrycircleuphorizontal": "\u23C2",
    "/dentistrycirclevertical": "\u23C0",
    "/dentistrydownhorizontal": "\u23C9",
    "/dentistrytopverticalleft": "\u23CB",
    "/dentistrytopverticalright": "\u23BE",
    "/dentistrytriangledownhorizontal": "\u23C4",
    "/dentistrytriangleuphorizontal": "\u23C5",
    "/dentistrytrianglevertical": "\u23C3",
    "/dentistryuphorizontal": "\u23CA",
    "/dentistrywavedownhorizontal": "\u23C7",
    "/dentistrywaveuphorizontal": "\u23C8",
    "/dentistrywavevertical": "\u23C6",
    "/departmentStore": "\u1F3EC",
    "/derelictHouseBuilding": "\u1F3DA",
    "/desert": "\u1F3DC",
    "/desertIsland": "\u1F3DD",
    "/desisquare": "\u3325",
    "/desktopComputer": "\u1F5A5",
    "/desktopWindow": "\u1F5D4",
    "/deva:a": "\u0905",
    "/deva:aa": "\u0906",
    "/deva:aasign": "\u093E",
    "/deva:abbreviation": "\u0970",
    "/deva:acandra": "\u0972",
    "/deva:acute": "\u0954",
    "/deva:ai": "\u0910",
    "/deva:aisign": "\u0948",
    "/deva:anudatta": "\u0952",
    "/deva:anusvara": "\u0902",
    "/deva:ashort": "\u0904",
    "/deva:au": "\u0914",
    "/deva:ausign": "\u094C",
    "/deva:avagraha": "\u093D",
    "/deva:aw": "\u0975",
    "/deva:awsign": "\u094F",
    "/deva:ba": "\u092C",
    "/deva:bba": "\u097F",
    "/deva:bha": "\u092D",
    "/deva:ca": "\u091A",
    "/deva:candrabindu": "\u0901",
    "/deva:candrabinduinverted": "\u0900",
    "/deva:cha": "\u091B",
    "/deva:da": "\u0926",
    "/deva:danda": "\u0964",
    "/deva:dbldanda": "\u0965",
    "/deva:dda": "\u0921",
    "/deva:ddda": "\u097E",
    "/deva:dddha": "\u095C",
    "/deva:ddha": "\u0922",
    "/deva:dha": "\u0927",
    "/deva:dothigh": "\u0971",
    "/deva:e": "\u090F",
    "/deva:ecandra": "\u090D",
    "/deva:eight": "\u096E",
    "/deva:eshort": "\u090E",
    "/deva:esign": "\u0947",
    "/deva:esigncandra": "\u0945",
    "/deva:esignprishthamatra": "\u094E",
    "/deva:esignshort": "\u0946",
    "/deva:fa": "\u095E",
    "/deva:five": "\u096B",
    "/deva:four": "\u096A",
    "/deva:ga": "\u0917",
    "/deva:gga": "\u097B",
    "/deva:gha": "\u0918",
    "/deva:ghha": "\u095A",
    "/deva:glottalstop": "\u097D",
    "/deva:grave": "\u0953",
    "/deva:ha": "\u0939",
    "/deva:i": "\u0907",
    "/deva:ii": "\u0908",
    "/deva:iisign": "\u0940",
    "/deva:isign": "\u093F",
    "/deva:ja": "\u091C",
    "/deva:jha": "\u091D",
    "/deva:jja": "\u097C",
    "/deva:ka": "\u0915",
    "/deva:kha": "\u0916",
    "/deva:khha": "\u0959",
    "/deva:la": "\u0932",
    "/deva:lla": "\u0933",
    "/deva:llla": "\u0934",
    "/deva:llvocal": "\u0961",
    "/deva:llvocalsign": "\u0963",
    "/deva:lvocal": "\u090C",
    "/deva:lvocalsign": "\u0962",
    "/deva:ma": "\u092E",
    "/deva:marwaridda": "\u0978",
    "/deva:na": "\u0928",
    "/deva:nga": "\u0919",
    "/deva:nine": "\u096F",
    "/deva:nna": "\u0923",
    "/deva:nnna": "\u0929",
    "/deva:nukta": "\u093C",
    "/deva:nya": "\u091E",
    "/deva:o": "\u0913",
    "/deva:ocandra": "\u0911",
    "/deva:oe": "\u0973",
    "/deva:oesign": "\u093A",
    "/deva:om": "\u0950",
    "/deva:one": "\u0967",
    "/deva:ooe": "\u0974",
    "/deva:ooesign": "\u093B",
    "/deva:oshort": "\u0912",
    "/deva:osign": "\u094B",
    "/deva:osigncandra": "\u0949",
    "/deva:osignshort": "\u094A",
    "/deva:pa": "\u092A",
    "/deva:pha": "\u092B",
    "/deva:qa": "\u0958",
    "/deva:ra": "\u0930",
    "/deva:rha": "\u095D",
    "/deva:rra": "\u0931",
    "/deva:rrvocal": "\u0960",
    "/deva:rrvocalsign": "\u0944",
    "/deva:rvocal": "\u090B",
    "/deva:rvocalsign": "\u0943",
    "/deva:sa": "\u0938",
    "/deva:seven": "\u096D",
    "/deva:sha": "\u0936",
    "/deva:signelongcandra": "\u0955",
    "/deva:six": "\u096C",
    "/deva:ssa": "\u0937",
    "/deva:ta": "\u0924",
    "/deva:tha": "\u0925",
    "/deva:three": "\u0969",
    "/deva:tta": "\u091F",
    "/deva:ttha": "\u0920",
    "/deva:two": "\u0968",
    "/deva:u": "\u0909",
    "/deva:udatta": "\u0951",
    "/deva:ue": "\u0976",
    "/deva:uesign": "\u0956",
    "/deva:usign": "\u0941",
    "/deva:uu": "\u090A",
    "/deva:uue": "\u0977",
    "/deva:uuesign": "\u0957",
    "/deva:uusign": "\u0942",
    "/deva:va": "\u0935",
    "/deva:virama": "\u094D",
    "/deva:visarga": "\u0903",
    "/deva:ya": "\u092F",
    "/deva:yaheavy": "\u097A",
    "/deva:yya": "\u095F",
    "/deva:za": "\u095B",
    "/deva:zero": "\u0966",
    "/deva:zha": "\u0979",
    "/dezh": "\u02A4",
    "/dfemaledbl": "\u26A2",
    "/dhabengali": "\u09A7",
    "/dhadeva": "\u0927",
    "/dhagujarati": "\u0AA7",
    "/dhagurmukhi": "\u0A27",
    "/dhook": "\u0257",
    "/diaeresisgreaterfunc": "\u2369",
    "/dialytikatonos": "\u0385",
    "/dialytikatonoscmb": "\u0344",
    "/diametersign": "\u2300",
    "/diamond": "\u2666",
    "/diamondShapeADotInside": "\u1F4A0",
    "/diamondinsquarewhite": "\u26CB",
    "/diamondoperator": "\u22C4",
    "/diamondsuitwhite": "\u2662",
    "/diamondunderlinefunc": "\u235A",
    "/diamondwhitewithdiamondsmallblack": "\u25C8",
    "/diefive": "\u2684",
    "/diefour": "\u2683",
    "/dieone": "\u2680",
    "/dieresis": "\u00A8",
    "/dieresisacute": "\uF6D7",
    "/dieresisbelowcmb": "\u0324",
    "/dieresiscmb": "\u0308",
    "/dieresisgrave": "\uF6D8",
    "/dieresistilde": "\u1FC1",
    "/dieresistonos": "\u0385",
    "/dieselLocomotive": "\u1F6F2",
    "/diesix": "\u2685",
    "/diethree": "\u2682",
    "/dietwo": "\u2681",
    "/differencebetween": "\u224F",
    "/digamma": "\u03DD",
    "/digammapamphylian": "\u0377",
    "/digramgreateryang": "\u268C",
    "/digramgreateryin": "\u268F",
    "/digramlesseryang": "\u268E",
    "/digramlesseryin": "\u268D",
    "/dihiragana": "\u3062",
    "/dikatakana": "\u30C2",
    "/dimensionorigin": "\u2331",
    "/dingbatSAns-serifzerocircle": "\u1F10B",
    "/dingbatSAns-serifzerocircleblack": "\u1F10C",
    "/dinsular": "\uA77A",
    "/directHit": "\u1F3AF",
    "/directcurrentformtwo": "\u2393",
    "/dirgamurevowel": "\uA9BB",
    "/disabledcar": "\u26CD",
    "/disappointedButRelievedFace": "\u1F625",
    "/disappointedFace": "\u1F61E",
    "/discontinuousunderline": "\u2382",
    "/dittomark": "\u3003",
    "/divide": "\u00F7",
    "/divides": "\u2223",
    "/divisionslash": "\u2215",
    "/divisiontimes": "\u22C7",
    "/divorce": "\u26AE",
    "/dizzy": "\u1F4AB",
    "/dizzyFace": "\u1F635",
    "/djecyr": "\u0452",
    "/djecyrillic": "\u0452",
    "/djekomicyr": "\u0503",
    "/dkshade": "\u2593",
    "/dlfullwidth": "\u3397",
    "/dlinebelow": "\u1E0F",
    "/dlogicalorsquare": "\u27CF",
    "/dlogicalsquare": "\u27CE",
    "/dlsquare": "\u3397",
    "/dm2fullwidth": "\u3378",
    "/dm3fullwidth": "\u3379",
    "/dmacron": "\u0111",
    "/dmaledbl": "\u26A3",
    "/dmfullwidth": "\u3377",
    "/dmonospace": "\uFF44",
    "/dnblock": "\u2584",
    "/dndblhorzsng": "\u2565",
    "/dndblleftsng": "\u2556",
    "/dndblrightsng": "\u2553",
    "/dngb:airplane": "\u2708",
    "/dngb:arrowfeatheredblackNE": "\u27B6",
    "/dngb:arrowfeatheredblackSE": "\u27B4",
    "/dngb:arrowfeatheredblackheavyNE": "\u27B9",
    "/dngb:arrowfeatheredblackheavySE": "\u27B7",
    "/dngb:arrowheadrightblack": "\u27A4",
    "/dngb:arrowheadrightthreeDbottomlight": "\u27A3",
    "/dngb:arrowheadrightthreeDtoplight": "\u27A2",
    "/dngb:arrowheavyNE": "\u279A",
    "/dngb:arrowheavySE": "\u2798",
    "/dngb:arrowrightbacktiltedshadowedwhite": "\u27AB",
    "/dngb:arrowrightblack": "\u27A1",
    "/dngb:arrowrightcircledwhiteheavy": "\u27B2",
    "/dngb:arrowrightcurvedownblackheavy": "\u27A5",
    "/dngb:arrowrightcurveupblackheavy": "\u27A6",
    "/dngb:arrowrightfeatheredblack": "\u27B5",
    "/dngb:arrowrightfeatheredblackheavy": "\u27B8",
    "/dngb:arrowrightfeatheredwhite": "\u27B3",
    "/dngb:arrowrightfronttiltedshadowedwhite": "\u27AC",
    "/dngb:arrowrightheavy": "\u2799",
    "/dngb:arrowrightleftshadedwhite": "\u27AA",
    "/dngb:arrowrightoutlinedopen": "\u27BE",
    "/dngb:arrowrightpointed": "\u279B",
    "/dngb:arrowrightpointedblackheavy": "\u27A8",
    "/dngb:arrowrightrightshadedwhite": "\u27A9",
    "/dngb:arrowrightroundheavy": "\u279C",
    "/dngb:arrowrightsquatblack": "\u27A7",
    "/dngb:arrowrighttriangle": "\u279D",
    "/dngb:arrowrighttriangledashed": "\u279F",
    "/dngb:arrowrighttriangledashedheavy": "\u27A0",
    "/dngb:arrowrighttriangleheavy": "\u279E",
    "/dngb:arrowrightwedge": "\u27BC",
    "/dngb:arrowrightwedgeheavy": "\u27BD",
    "/dngb:arrowrightwideheavy": "\u2794",
    "/dngb:arrowshadowrightlowerwhiteheavy": "\u27AD",
    "/dngb:arrowshadowrightnotchedlowerwhite": "\u27AF",
    "/dngb:arrowshadowrightnotchedupperwhite": "\u27B1",
    "/dngb:arrowshadowrightupperwhiteheavy": "\u27AE",
    "/dngb:arrowteardropright": "\u27BA",
    "/dngb:arrowteardroprightheavy": "\u27BB",
    "/dngb:asteriskballoon": "\u2749",
    "/dngb:asteriskballoonfour": "\u2723",
    "/dngb:asteriskballoonheavyfour": "\u2724",
    "/dngb:asteriskcentreopen": "\u2732",
    "/dngb:asteriskclubfour": "\u2725",
    "/dngb:asteriskheavy": "\u2731",
    "/dngb:asteriskpointedsixteen": "\u273A",
    "/dngb:asteriskteardrop": "\u273B",
    "/dngb:asteriskteardropcentreopen": "\u273C",
    "/dngb:asteriskteardropfour": "\u2722",
    "/dngb:asteriskteardropheavy": "\u273D",
    "/dngb:asteriskteardroppinwheelheavy": "\u2743",
    "/dngb:asteriskteardroppropellereight": "\u274A",
    "/dngb:asteriskteardroppropellerheavyeight": "\u274B",
    "/dngb:ballotx": "\u2717",
    "/dngb:ballotxheavy": "\u2718",
    "/dngb:bracketleftpointedangleheavyornament": "\u2770",
    "/dngb:bracketleftpointedanglemediumornament": "\u276C",
    "/dngb:bracketrightpointedangleheavyornament": "\u2771",
    "/dngb:bracketrightpointedanglemediumornament": "\u276D",
    "/dngb:bracketshellleftlightornament": "\u2772",
    "/dngb:bracketshellrightlightornament": "\u2773",
    "/dngb:check": "\u2713",
    "/dngb:checkheavy": "\u2714",
    "/dngb:checkwhiteheavy": "\u2705",
    "/dngb:chevronsnowflakeheavy": "\u2746",
    "/dngb:circleshadowedwhite": "\u274D",
    "/dngb:commaheavydoubleornament": "\u275E",
    "/dngb:commaheavydoubleturnedornament": "\u275D",
    "/dngb:commaheavyornament": "\u275C",
    "/dngb:commaheavyturnedornament": "\u275B",
    "/dngb:compasstarpointedblackeight": "\u2737",
    "/dngb:compasstarpointedblackheavyeight": "\u2738",
    "/dngb:cross": "\u274C",
    "/dngb:crosscentreopen": "\u271B",
    "/dngb:crosscentreopenheavy": "\u271C",
    "/dngb:curlybracketleftmediumornament": "\u2774",
    "/dngb:curlybracketrightmediumornament": "\u2775",
    "/dngb:curlyloop": "\u27B0",
    "/dngb:curlyloopdouble": "\u27BF",
    "/dngb:curvedstemparagraphsignornament": "\u2761",
    "/dngb:diamondminusxblackwhite": "\u2756",
    "/dngb:divisionsignheavy": "\u2797",
    "/dngb:eightnegativecircled": "\u277D",
    "/dngb:eightsanscircled": "\u2787",
    "/dngb:eightsansnegativecircled": "\u2791",
    "/dngb:envelope": "\u2709",
    "/dngb:exclamationheavy": "\u2757",
    "/dngb:exclamationheavyornament": "\u2762",
    "/dngb:exclamationwhiteornament": "\u2755",
    "/dngb:fivenegativecircled": "\u277A",
    "/dngb:fivesanscircled": "\u2784",
    "/dngb:fivesansnegativecircled": "\u278E",
    "/dngb:floralheart": "\u2766",
    "/dngb:floralheartbulletrotated": "\u2767",
    "/dngb:floretteblack": "\u273F",
    "/dngb:floretteoutlinedpetalledblackeight": "\u2741",
    "/dngb:florettepetalledblackwhitesix": "\u273E",
    "/dngb:florettewhite": "\u2740",
    "/dngb:fournegativecircled": "\u2779",
    "/dngb:foursanscircled": "\u2783",
    "/dngb:foursansnegativecircled": "\u278D",
    "/dngb:greekcrossheavy": "\u271A",
    "/dngb:greekcrossoutlined": "\u2719",
    "/dngb:heartblackheavy": "\u2764",
    "/dngb:heartbulletrotatedblackheavy": "\u2765",
    "/dngb:heartexclamationheavyornament": "\u2763",
    "/dngb:hvictory": "\u270C",
    "/dngb:hwriting": "\u270D",
    "/dngb:latincross": "\u271D",
    "/dngb:latincrossoutlined": "\u271F",
    "/dngb:latincrossshadowedwhite": "\u271E",
    "/dngb:lowcommaheavydoubleornament": "\u2760",
    "/dngb:lowcommaheavyornament": "\u275F",
    "/dngb:maltesecross": "\u2720",
    "/dngb:minussignheavy": "\u2796",
    "/dngb:multiplicationx": "\u2715",
    "/dngb:multiplicationxheavy": "\u2716",
    "/dngb:nibblack": "\u2712",
    "/dngb:nibwhite": "\u2711",
    "/dngb:ninenegativecircled": "\u277E",
    "/dngb:ninesanscircled": "\u2788",
    "/dngb:ninesansnegativecircled": "\u2792",
    "/dngb:onenegativecircled": "\u2776",
    "/dngb:onesanscircled": "\u2780",
    "/dngb:onesansnegativecircled": "\u278A",
    "/dngb:parenthesisleftflattenedmediumornament": "\u276A",
    "/dngb:parenthesisleftmediumornament": "\u2768",
    "/dngb:parenthesisrightflattenedmediumornament": "\u276B",
    "/dngb:parenthesisrightmediumornament": "\u2769",
    "/dngb:pencil": "\u270F",
    "/dngb:pencillowerright": "\u270E",
    "/dngb:pencilupperright": "\u2710",
    "/dngb:plussignheavy": "\u2795",
    "/dngb:questionblackornament": "\u2753",
    "/dngb:questionwhiteornament": "\u2754",
    "/dngb:quotationleftpointedangleheavyornament": "\u276E",
    "/dngb:quotationrightpointedangleheavyornament": "\u276F",
    "/dngb:raisedfist": "\u270A",
    "/dngb:raisedh": "\u270B",
    "/dngb:safetyscissorsblack": "\u2700",
    "/dngb:scissorsblack": "\u2702",
    "/dngb:scissorslowerblade": "\u2703",
    "/dngb:scissorsupperblade": "\u2701",
    "/dngb:scissorswhite": "\u2704",
    "/dngb:sevennegativecircled": "\u277C",
    "/dngb:sevensanscircled": "\u2786",
    "/dngb:sevensansnegativecircled": "\u2790",
    "/dngb:sixnegativecircled": "\u277B",
    "/dngb:sixsanscircled": "\u2785",
    "/dngb:sixsansnegativecircled": "\u278F",
    "/dngb:snowflake": "\u2744",
    "/dngb:snowflaketight": "\u2745",
    "/dngb:sparkle": "\u2747",
    "/dngb:sparkleheavy": "\u2748",
    "/dngb:sparkles": "\u2728",
    "/dngb:spokedasteriskeight": "\u2733",
    "/dngb:squaredcrossnegative": "\u274E",
    "/dngb:squarelowerrightshadowedwhite": "\u2751",
    "/dngb:squareshadowlowerrightwhite": "\u274F",
    "/dngb:squareshadowupperrightwhite": "\u2750",
    "/dngb:squareupperrightshadowedwhite": "\u2752",
    "/dngb:starcentreblackwhite": "\u272C",
    "/dngb:starcentreopenblack": "\u272B",
    "/dngb:starcentreopenpointedcircledeight": "\u2742",
    "/dngb:starcircledwhite": "\u272A",
    "/dngb:starofdavid": "\u2721",
    "/dngb:staroutlinedblack": "\u272D",
    "/dngb:staroutlinedblackheavy": "\u272E",
    "/dngb:staroutlinedstresswhite": "\u2729",
    "/dngb:starpinwheel": "\u272F",
    "/dngb:starpointedblackeight": "\u2734",
    "/dngb:starpointedblackfour": "\u2726",
    "/dngb:starpointedblacksix": "\u2736",
    "/dngb:starpointedblacktwelve": "\u2739",
    "/dngb:starpointedpinwheeleight": "\u2735",
    "/dngb:starpointedwhitefour": "\u2727",
    "/dngb:starshadowedwhite": "\u2730",
    "/dngb:tapedrive": "\u2707",
    "/dngb:telephonelocationsign": "\u2706",
    "/dngb:tennegativecircled": "\u277F",
    "/dngb:tensanscircled": "\u2789",
    "/dngb:tensansnegativecircled": "\u2793",
    "/dngb:threenegativecircled": "\u2778",
    "/dngb:threesanscircled": "\u2782",
    "/dngb:threesansnegativecircled": "\u278C",
    "/dngb:twonegativecircled": "\u2777",
    "/dngb:twosanscircled": "\u2781",
    "/dngb:twosansnegativecircled": "\u278B",
    "/dngb:verticalbarheavy": "\u275A",
    "/dngb:verticalbarlight": "\u2758",
    "/dngb:verticalbarmedium": "\u2759",
    "/dnheavyhorzlight": "\u2530",
    "/dnheavyleftlight": "\u2512",
    "/dnheavyleftuplight": "\u2527",
    "/dnheavyrightlight": "\u250E",
    "/dnheavyrightuplight": "\u251F",
    "/dnheavyuphorzlight": "\u2541",
    "/dnlighthorzheavy": "\u252F",
    "/dnlightleftheavy": "\u2511",
    "/dnlightleftupheavy": "\u2529",
    "/dnlightrightheavy": "\u250D",
    "/dnlightrightupheavy": "\u2521",
    "/dnlightuphorzheavy": "\u2547",
    "/dnsnghorzdbl": "\u2564",
    "/dnsngleftdbl": "\u2555",
    "/dnsngrightdbl": "\u2552",
    "/doNotLitter": "\u1F6AF",
    "/dochadathai": "\u0E0E",
    "/document": "\u1F5CE",
    "/documentPicture": "\u1F5BB",
    "/documentText": "\u1F5B9",
    "/documentTextAndPicture": "\u1F5BA",
    "/dodekthai": "\u0E14",
    "/doesnotcontainasnormalsubgroorequalup": "\u22ED",
    "/doesnotcontainasnormalsubgroup": "\u22EB",
    "/doesnotdivide": "\u2224",
    "/doesnotforce": "\u22AE",
    "/doesnotprecede": "\u2280",
    "/doesnotprecedeorequal": "\u22E0",
    "/doesnotprove": "\u22AC",
    "/doesnotsucceed": "\u2281",
    "/doesnotsucceedorequal": "\u22E1",
    "/dog": "\u1F415",
    "/dogFace": "\u1F436",
    "/dohiragana": "\u3069",
    "/dokatakana": "\u30C9",
    "/dollar": "\u0024",
    "/dollarinferior": "\uF6E3",
    "/dollarmonospace": "\uFF04",
    "/dollaroldstyle": "\uF724",
    "/dollarsmall": "\uFE69",
    "/dollarsuperior": "\uF6E4",
    "/dolphin": "\u1F42C",
    "/dominohorizontal_00_00": "\u1F031",
    "/dominohorizontal_00_01": "\u1F032",
    "/dominohorizontal_00_02": "\u1F033",
    "/dominohorizontal_00_03": "\u1F034",
    "/dominohorizontal_00_04": "\u1F035",
    "/dominohorizontal_00_05": "\u1F036",
    "/dominohorizontal_00_06": "\u1F037",
    "/dominohorizontal_01_00": "\u1F038",
    "/dominohorizontal_01_01": "\u1F039",
    "/dominohorizontal_01_02": "\u1F03A",
    "/dominohorizontal_01_03": "\u1F03B",
    "/dominohorizontal_01_04": "\u1F03C",
    "/dominohorizontal_01_05": "\u1F03D",
    "/dominohorizontal_01_06": "\u1F03E",
    "/dominohorizontal_02_00": "\u1F03F",
    "/dominohorizontal_02_01": "\u1F040",
    "/dominohorizontal_02_02": "\u1F041",
    "/dominohorizontal_02_03": "\u1F042",
    "/dominohorizontal_02_04": "\u1F043",
    "/dominohorizontal_02_05": "\u1F044",
    "/dominohorizontal_02_06": "\u1F045",
    "/dominohorizontal_03_00": "\u1F046",
    "/dominohorizontal_03_01": "\u1F047",
    "/dominohorizontal_03_02": "\u1F048",
    "/dominohorizontal_03_03": "\u1F049",
    "/dominohorizontal_03_04": "\u1F04A",
    "/dominohorizontal_03_05": "\u1F04B",
    "/dominohorizontal_03_06": "\u1F04C",
    "/dominohorizontal_04_00": "\u1F04D",
    "/dominohorizontal_04_01": "\u1F04E",
    "/dominohorizontal_04_02": "\u1F04F",
    "/dominohorizontal_04_03": "\u1F050",
    "/dominohorizontal_04_04": "\u1F051",
    "/dominohorizontal_04_05": "\u1F052",
    "/dominohorizontal_04_06": "\u1F053",
    "/dominohorizontal_05_00": "\u1F054",
    "/dominohorizontal_05_01": "\u1F055",
    "/dominohorizontal_05_02": "\u1F056",
    "/dominohorizontal_05_03": "\u1F057",
    "/dominohorizontal_05_04": "\u1F058",
    "/dominohorizontal_05_05": "\u1F059",
    "/dominohorizontal_05_06": "\u1F05A",
    "/dominohorizontal_06_00": "\u1F05B",
    "/dominohorizontal_06_01": "\u1F05C",
    "/dominohorizontal_06_02": "\u1F05D",
    "/dominohorizontal_06_03": "\u1F05E",
    "/dominohorizontal_06_04": "\u1F05F",
    "/dominohorizontal_06_05": "\u1F060",
    "/dominohorizontal_06_06": "\u1F061",
    "/dominohorizontalback": "\u1F030",
    "/dominovertical_00_00": "\u1F063",
    "/dominovertical_00_01": "\u1F064",
    "/dominovertical_00_02": "\u1F065",
    "/dominovertical_00_03": "\u1F066",
    "/dominovertical_00_04": "\u1F067",
    "/dominovertical_00_05": "\u1F068",
    "/dominovertical_00_06": "\u1F069",
    "/dominovertical_01_00": "\u1F06A",
    "/dominovertical_01_01": "\u1F06B",
    "/dominovertical_01_02": "\u1F06C",
    "/dominovertical_01_03": "\u1F06D",
    "/dominovertical_01_04": "\u1F06E",
    "/dominovertical_01_05": "\u1F06F",
    "/dominovertical_01_06": "\u1F070",
    "/dominovertical_02_00": "\u1F071",
    "/dominovertical_02_01": "\u1F072",
    "/dominovertical_02_02": "\u1F073",
    "/dominovertical_02_03": "\u1F074",
    "/dominovertical_02_04": "\u1F075",
    "/dominovertical_02_05": "\u1F076",
    "/dominovertical_02_06": "\u1F077",
    "/dominovertical_03_00": "\u1F078",
    "/dominovertical_03_01": "\u1F079",
    "/dominovertical_03_02": "\u1F07A",
    "/dominovertical_03_03": "\u1F07B",
    "/dominovertical_03_04": "\u1F07C",
    "/dominovertical_03_05": "\u1F07D",
    "/dominovertical_03_06": "\u1F07E",
    "/dominovertical_04_00": "\u1F07F",
    "/dominovertical_04_01": "\u1F080",
    "/dominovertical_04_02": "\u1F081",
    "/dominovertical_04_03": "\u1F082",
    "/dominovertical_04_04": "\u1F083",
    "/dominovertical_04_05": "\u1F084",
    "/dominovertical_04_06": "\u1F085",
    "/dominovertical_05_00": "\u1F086",
    "/dominovertical_05_01": "\u1F087",
    "/dominovertical_05_02": "\u1F088",
    "/dominovertical_05_03": "\u1F089",
    "/dominovertical_05_04": "\u1F08A",
    "/dominovertical_05_05": "\u1F08B",
    "/dominovertical_05_06": "\u1F08C",
    "/dominovertical_06_00": "\u1F08D",
    "/dominovertical_06_01": "\u1F08E",
    "/dominovertical_06_02": "\u1F08F",
    "/dominovertical_06_03": "\u1F090",
    "/dominovertical_06_04": "\u1F091",
    "/dominovertical_06_05": "\u1F092",
    "/dominovertical_06_06": "\u1F093",
    "/dominoverticalback": "\u1F062",
    "/dong": "\u20AB",
    "/door": "\u1F6AA",
    "/dorusquare": "\u3326",
    "/dot": "\u27D1",
    "/dotaccent": "\u02D9",
    "/dotaccentcmb": "\u0307",
    "/dotbelowcmb": "\u0323",
    "/dotbelowcomb": "\u0323",
    "/dotkatakana": "\u30FB",
    "/dotlessbeh": "\u066E",
    "/dotlessfeh": "\u06A1",
    "/dotlessi": "\u0131",
    "/dotlessj": "\uF6BE",
    "/dotlessjstroke": "\u025F",
    "/dotlessjstrokehook": "\u0284",
    "/dotlesskhahabove": "\u06E1",
    "/dotlessqaf": "\u066F",
    "/dotlower:hb": "\u05C5",
    "/dotmath": "\u22C5",
    "/dotminus": "\u2238",
    "/dotplus": "\u2214",
    "/dotraised": "\u2E33",
    "/dots1": "\u2801",
    "/dots12": "\u2803",
    "/dots123": "\u2807",
    "/dots1234": "\u280F",
    "/dots12345": "\u281F",
    "/dots123456": "\u283F",
    "/dots1234567": "\u287F",
    "/dots12345678": "\u28FF",
    "/dots1234568": "\u28BF",
    "/dots123457": "\u285F",
    "/dots1234578": "\u28DF",
    "/dots123458": "\u289F",
    "/dots12346": "\u282F",
    "/dots123467": "\u286F",
    "/dots1234678": "\u28EF",
    "/dots123468": "\u28AF",
    "/dots12347": "\u284F",
    "/dots123478": "\u28CF",
    "/dots12348": "\u288F",
    "/dots1235": "\u2817",
    "/dots12356": "\u2837",
    "/dots123567": "\u2877",
    "/dots1235678": "\u28F7",
    "/dots123568": "\u28B7",
    "/dots12357": "\u2857",
    "/dots123578": "\u28D7",
    "/dots12358": "\u2897",
    "/dots1236": "\u2827",
    "/dots12367": "\u2867",
    "/dots123678": "\u28E7",
    "/dots12368": "\u28A7",
    "/dots1237": "\u2847",
    "/dots12378": "\u28C7",
    "/dots1238": "\u2887",
    "/dots124": "\u280B",
    "/dots1245": "\u281B",
    "/dots12456": "\u283B",
    "/dots124567": "\u287B",
    "/dots1245678": "\u28FB",
    "/dots124568": "\u28BB",
    "/dots12457": "\u285B",
    "/dots124578": "\u28DB",
    "/dots12458": "\u289B",
    "/dots1246": "\u282B",
    "/dots12467": "\u286B",
    "/dots124678": "\u28EB",
    "/dots12468": "\u28AB",
    "/dots1247": "\u284B",
    "/dots12478": "\u28CB",
    "/dots1248": "\u288B",
    "/dots125": "\u2813",
    "/dots1256": "\u2833",
    "/dots12567": "\u2873",
    "/dots125678": "\u28F3",
    "/dots12568": "\u28B3",
    "/dots1257": "\u2853",
    "/dots12578": "\u28D3",
    "/dots1258": "\u2893",
    "/dots126": "\u2823",
    "/dots1267": "\u2863",
    "/dots12678": "\u28E3",
    "/dots1268": "\u28A3",
    "/dots127": "\u2843",
    "/dots1278": "\u28C3",
    "/dots128": "\u2883",
    "/dots13": "\u2805",
    "/dots134": "\u280D",
    "/dots1345": "\u281D",
    "/dots13456": "\u283D",
    "/dots134567": "\u287D",
    "/dots1345678": "\u28FD",
    "/dots134568": "\u28BD",
    "/dots13457": "\u285D",
    "/dots134578": "\u28DD",
    "/dots13458": "\u289D",
    "/dots1346": "\u282D",
    "/dots13467": "\u286D",
    "/dots134678": "\u28ED",
    "/dots13468": "\u28AD",
    "/dots1347": "\u284D",
    "/dots13478": "\u28CD",
    "/dots1348": "\u288D",
    "/dots135": "\u2815",
    "/dots1356": "\u2835",
    "/dots13567": "\u2875",
    "/dots135678": "\u28F5",
    "/dots13568": "\u28B5",
    "/dots1357": "\u2855",
    "/dots13578": "\u28D5",
    "/dots1358": "\u2895",
    "/dots136": "\u2825",
    "/dots1367": "\u2865",
    "/dots13678": "\u28E5",
    "/dots1368": "\u28A5",
    "/dots137": "\u2845",
    "/dots1378": "\u28C5",
    "/dots138": "\u2885",
    "/dots14": "\u2809",
    "/dots145": "\u2819",
    "/dots1456": "\u2839",
    "/dots14567": "\u2879",
    "/dots145678": "\u28F9",
    "/dots14568": "\u28B9",
    "/dots1457": "\u2859",
    "/dots14578": "\u28D9",
    "/dots1458": "\u2899",
    "/dots146": "\u2829",
    "/dots1467": "\u2869",
    "/dots14678": "\u28E9",
    "/dots1468": "\u28A9",
    "/dots147": "\u2849",
    "/dots1478": "\u28C9",
    "/dots148": "\u2889",
    "/dots15": "\u2811",
    "/dots156": "\u2831",
    "/dots1567": "\u2871",
    "/dots15678": "\u28F1",
    "/dots1568": "\u28B1",
    "/dots157": "\u2851",
    "/dots1578": "\u28D1",
    "/dots158": "\u2891",
    "/dots16": "\u2821",
    "/dots167": "\u2861",
    "/dots1678": "\u28E1",
    "/dots168": "\u28A1",
    "/dots17": "\u2841",
    "/dots178": "\u28C1",
    "/dots18": "\u2881",
    "/dots2": "\u2802",
    "/dots23": "\u2806",
    "/dots234": "\u280E",
    "/dots2345": "\u281E",
    "/dots23456": "\u283E",
    "/dots234567": "\u287E",
    "/dots2345678": "\u28FE",
    "/dots234568": "\u28BE",
    "/dots23457": "\u285E",
    "/dots234578": "\u28DE",
    "/dots23458": "\u289E",
    "/dots2346": "\u282E",
    "/dots23467": "\u286E",
    "/dots234678": "\u28EE",
    "/dots23468": "\u28AE",
    "/dots2347": "\u284E",
    "/dots23478": "\u28CE",
    "/dots2348": "\u288E",
    "/dots235": "\u2816",
    "/dots2356": "\u2836",
    "/dots23567": "\u2876",
    "/dots235678": "\u28F6",
    "/dots23568": "\u28B6",
    "/dots2357": "\u2856",
    "/dots23578": "\u28D6",
    "/dots2358": "\u2896",
    "/dots236": "\u2826",
    "/dots2367": "\u2866",
    "/dots23678": "\u28E6",
    "/dots2368": "\u28A6",
    "/dots237": "\u2846",
    "/dots2378": "\u28C6",
    "/dots238": "\u2886",
    "/dots24": "\u280A",
    "/dots245": "\u281A",
    "/dots2456": "\u283A",
    "/dots24567": "\u287A",
    "/dots245678": "\u28FA",
    "/dots24568": "\u28BA",
    "/dots2457": "\u285A",
    "/dots24578": "\u28DA",
    "/dots2458": "\u289A",
    "/dots246": "\u282A",
    "/dots2467": "\u286A",
    "/dots24678": "\u28EA",
    "/dots2468": "\u28AA",
    "/dots247": "\u284A",
    "/dots2478": "\u28CA",
    "/dots248": "\u288A",
    "/dots25": "\u2812",
    "/dots256": "\u2832",
    "/dots2567": "\u2872",
    "/dots25678": "\u28F2",
    "/dots2568": "\u28B2",
    "/dots257": "\u2852",
    "/dots2578": "\u28D2",
    "/dots258": "\u2892",
    "/dots26": "\u2822",
    "/dots267": "\u2862",
    "/dots2678": "\u28E2",
    "/dots268": "\u28A2",
    "/dots27": "\u2842",
    "/dots278": "\u28C2",
    "/dots28": "\u2882",
    "/dots3": "\u2804",
    "/dots34": "\u280C",
    "/dots345": "\u281C",
    "/dots3456": "\u283C",
    "/dots34567": "\u287C",
    "/dots345678": "\u28FC",
    "/dots34568": "\u28BC",
    "/dots3457": "\u285C",
    "/dots34578": "\u28DC",
    "/dots3458": "\u289C",
    "/dots346": "\u282C",
    "/dots3467": "\u286C",
    "/dots34678": "\u28EC",
    "/dots3468": "\u28AC",
    "/dots347": "\u284C",
    "/dots3478": "\u28CC",
    "/dots348": "\u288C",
    "/dots35": "\u2814",
    "/dots356": "\u2834",
    "/dots3567": "\u2874",
    "/dots35678": "\u28F4",
    "/dots3568": "\u28B4",
    "/dots357": "\u2854",
    "/dots3578": "\u28D4",
    "/dots358": "\u2894",
    "/dots36": "\u2824",
    "/dots367": "\u2864",
    "/dots3678": "\u28E4",
    "/dots368": "\u28A4",
    "/dots37": "\u2844",
    "/dots378": "\u28C4",
    "/dots38": "\u2884",
    "/dots4": "\u2808",
    "/dots45": "\u2818",
    "/dots456": "\u2838",
    "/dots4567": "\u2878",
    "/dots45678": "\u28F8",
    "/dots4568": "\u28B8",
    "/dots457": "\u2858",
    "/dots4578": "\u28D8",
    "/dots458": "\u2898",
    "/dots46": "\u2828",
    "/dots467": "\u2868",
    "/dots4678": "\u28E8",
    "/dots468": "\u28A8",
    "/dots47": "\u2848",
    "/dots478": "\u28C8",
    "/dots48": "\u2888",
    "/dots5": "\u2810",
    "/dots56": "\u2830",
    "/dots567": "\u2870",
    "/dots5678": "\u28F0",
    "/dots568": "\u28B0",
    "/dots57": "\u2850",
    "/dots578": "\u28D0",
    "/dots58": "\u2890",
    "/dots6": "\u2820",
    "/dots67": "\u2860",
    "/dots678": "\u28E0",
    "/dots68": "\u28A0",
    "/dots7": "\u2840",
    "/dots78": "\u28C0",
    "/dots8": "\u2880",
    "/dotsquarefour": "\u2E2C",
    "/dottedcircle": "\u25CC",
    "/dottedcross": "\u205C",
    "/dotupper:hb": "\u05C4",
    "/doublebarvertical": "\u23F8",
    "/doubleyodpatah": "\uFB1F",
    "/doubleyodpatahhebrew": "\uFB1F",
    "/doughnut": "\u1F369",
    "/doveOfPeace": "\u1F54A",
    "/downtackbelowcmb": "\u031E",
    "/downtackmod": "\u02D5",
    "/downwarrowleftofuparrow": "\u21F5",
    "/dparen": "\u249F",
    "/dparenthesized": "\u249F",
    "/drachma": "\u20AF",
    "/dragon": "\u1F409",
    "/dragonFace": "\u1F432",
    "/draughtskingblack": "\u26C3",
    "/draughtskingwhite": "\u26C1",
    "/draughtsmanblack": "\u26C2",
    "/draughtsmanwhite": "\u26C0",
    "/dress": "\u1F457",
    "/driveslow": "\u26DA",
    "/dromedaryCamel": "\u1F42A",
    "/droplet": "\u1F4A7",
    "/dsquare": "\u1F1A5",
    "/dsuperior": "\uF6EB",
    "/dtail": "\u0256",
    "/dtopbar": "\u018C",
    "/duhiragana": "\u3065",
    "/dukatakana": "\u30C5",
    "/dul": "\u068E",
    "/dul.fina": "\uFB87",
    "/dul.isol": "\uFB86",
    "/dum": "\uA771",
    "/dvd": "\u1F4C0",
    "/dyeh": "\u0684",
    "/dyeh.fina": "\uFB73",
    "/dyeh.init": "\uFB74",
    "/dyeh.isol": "\uFB72",
    "/dyeh.medi": "\uFB75",
    "/dz": "\u01F3",
    "/dzaltone": "\u02A3",
    "/dzcaron": "\u01C6",
    "/dzcurl": "\u02A5",
    "/dzeabkhasiancyrillic": "\u04E1",
    "/dzeabkhcyr": "\u04E1",
    "/dzecyr": "\u0455",
    "/dzecyrillic": "\u0455",
    "/dzed": "\u02A3",
    "/dzedcurl": "\u02A5",
    "/dzhecyr": "\u045F",
    "/dzhecyrillic": "\u045F",
    "/dzjekomicyr": "\u0507",
    "/dzzhecyr": "\u052B",
    "/e": "\u0065",
    "/e-mail": "\u1F4E7",
    "/e.fina": "\uFBE5",
    "/e.inferior": "\u2091",
    "/e.init": "\uFBE6",
    "/e.isol": "\uFBE4",
    "/e.medi": "\uFBE7",
    "/eVfullwidth": "\u32CE",
    "/eacute": "\u00E9",
    "/earOfMaize": "\u1F33D",
    "/earOfRice": "\u1F33E",
    "/earth": "\u2641",
    "/earthGlobeAmericas": "\u1F30E",
    "/earthGlobeAsiaAustralia": "\u1F30F",
    "/earthGlobeEuropeAfrica": "\u1F30D",
    "/earthground": "\u23DA",
    "/earthideographiccircled": "\u328F",
    "/earthideographicparen": "\u322F",
    "/eastsyriaccross": "\u2671",
    "/ebengali": "\u098F",
    "/ebopomofo": "\u311C",
    "/ebreve": "\u0115",
    "/ecandradeva": "\u090D",
    "/ecandragujarati": "\u0A8D",
    "/ecandravowelsigndeva": "\u0945",
    "/ecandravowelsigngujarati": "\u0AC5",
    "/ecaron": "\u011B",
    "/ecedilla": "\u0229",
    "/ecedillabreve": "\u1E1D",
    "/echarmenian": "\u0565",
    "/echyiwnarmenian": "\u0587",
    "/ecircle": "\u24D4",
    "/ecirclekatakana": "\u32D3",
    "/ecircumflex": "\u00EA",
    "/ecircumflexacute": "\u1EBF",
    "/ecircumflexbelow": "\u1E19",
    "/ecircumflexdotbelow": "\u1EC7",
    "/ecircumflexgrave": "\u1EC1",
    "/ecircumflexhoi": "\u1EC3",
    "/ecircumflexhookabove": "\u1EC3",
    "/ecircumflextilde": "\u1EC5",
    "/ecyrillic": "\u0454",
    "/edblgrave": "\u0205",
    "/edblstruckitalic": "\u2147",
    "/edeva": "\u090F",
    "/edieresis": "\u00EB",
    "/edot": "\u0117",
    "/edotaccent": "\u0117",
    "/edotbelow": "\u1EB9",
    "/eegurmukhi": "\u0A0F",
    "/eekaasquare": "\u3308",
    "/eematragurmukhi": "\u0A47",
    "/efcyr": "\u0444",
    "/efcyrillic": "\u0444",
    "/egrave": "\u00E8",
    "/egravedbl": "\u0205",
    "/egujarati": "\u0A8F",
    "/egyptain": "\uA725",
    "/egyptalef": "\uA723",
    "/eharmenian": "\u0567",
    "/ehbopomofo": "\u311D",
    "/ehiragana": "\u3048",
    "/ehoi": "\u1EBB",
    "/ehookabove": "\u1EBB",
    "/eibopomofo": "\u311F",
    "/eight": "\u0038",
    "/eight.inferior": "\u2088",
    "/eight.roman": "\u2167",
    "/eight.romansmall": "\u2177",
    "/eight.superior": "\u2078",
    "/eightarabic": "\u0668",
    "/eightbengali": "\u09EE",
    "/eightcircle": "\u2467",
    "/eightcircledbl": "\u24FC",
    "/eightcircleinversesansserif": "\u2791",
    "/eightcomma": "\u1F109",
    "/eightdeva": "\u096E",
    "/eighteencircle": "\u2471",
    "/eighteencircleblack": "\u24F2",
    "/eighteenparen": "\u2485",
    "/eighteenparenthesized": "\u2485",
    "/eighteenperiod": "\u2499",
    "/eightfar": "\u06F8",
    "/eightgujarati": "\u0AEE",
    "/eightgurmukhi": "\u0A6E",
    "/eighthackarabic": "\u0668",
    "/eighthangzhou": "\u3028",
    "/eighthnote": "\u266A",
    "/eighthnotebeamed": "\u266B",
    "/eightideographiccircled": "\u3287",
    "/eightideographicparen": "\u3227",
    "/eightinferior": "\u2088",
    "/eightksquare": "\u1F19F",
    "/eightmonospace": "\uFF18",
    "/eightoldstyle": "\uF738",
    "/eightparen": "\u247B",
    "/eightparenthesized": "\u247B",
    "/eightperiod": "\u248F",
    "/eightpersian": "\u06F8",
    "/eightroman": "\u2177",
    "/eightsuperior": "\u2078",
    "/eightthai": "\u0E58",
    "/eightycirclesquare": "\u324F",
    "/einvertedbreve": "\u0207",
    "/eiotifiedcyr": "\u0465",
    "/eiotifiedcyrillic": "\u0465",
    "/eject": "\u23CF",
    "/ekatakana": "\u30A8",
    "/ekatakanahalfwidth": "\uFF74",
    "/ekonkargurmukhi": "\u0A74",
    "/ekorean": "\u3154",
    "/elcyr": "\u043B",
    "/elcyrillic": "\u043B",
    "/electricLightBulb": "\u1F4A1",
    "/electricPlug": "\u1F50C",
    "/electricTorch": "\u1F526",
    "/electricalintersection": "\u23E7",
    "/electricarrow": "\u2301",
    "/element": "\u2208",
    "/elementdotabove": "\u22F5",
    "/elementlonghorizontalstroke": "\u22F2",
    "/elementopeningup": "\u27D2",
    "/elementoverbar": "\u22F6",
    "/elementoverbarsmall": "\u22F7",
    "/elementsmall": "\u220A",
    "/elementsmallverticalbarhorizontalstroke": "\u22F4",
    "/elementtwoshorizontalstroke": "\u22F9",
    "/elementunderbar": "\u22F8",
    "/elementverticalbarhorizontalstroke": "\u22F3",
    "/elephant": "\u1F418",
    "/eleven.roman": "\u216A",
    "/eleven.romansmall": "\u217A",
    "/elevencircle": "\u246A",
    "/elevencircleblack": "\u24EB",
    "/elevenparen": "\u247E",
    "/elevenparenthesized": "\u247E",
    "/elevenperiod": "\u2492",
    "/elevenroman": "\u217A",
    "/elhookcyr": "\u0513",
    "/ellipsis": "\u2026",
    "/ellipsisdiagonaldownright": "\u22F1",
    "/ellipsisdiagonalupright": "\u22F0",
    "/ellipsismidhorizontal": "\u22EF",
    "/ellipsisvertical": "\u22EE",
    "/elmiddlehookcyr": "\u0521",
    "/elsharptailcyr": "\u04C6",
    "/eltailcyr": "\u052F",
    "/emacron": "\u0113",
    "/emacronacute": "\u1E17",
    "/emacrongrave": "\u1E15",
    "/emcyr": "\u043C",
    "/emcyrillic": "\u043C",
    "/emdash": "\u2014",
    "/emdashdbl": "\u2E3A",
    "/emdashtpl": "\u2E3B",
    "/emdashvertical": "\uFE31",
    "/emojiModifierFitzpatrickType-1-2": "\u1F3FB",
    "/emojiModifierFitzpatrickType-3": "\u1F3FC",
    "/emojiModifierFitzpatrickType-4": "\u1F3FD",
    "/emojiModifierFitzpatrickType-5": "\u1F3FE",
    "/emojiModifierFitzpatrickType-6": "\u1F3FF",
    "/emonospace": "\uFF45",
    "/emphasis": "\u2383",
    "/emphasismarkarmenian": "\u055B",
    "/emptyDocument": "\u1F5CB",
    "/emptyNote": "\u1F5C5",
    "/emptyNotePad": "\u1F5C7",
    "/emptyNotePage": "\u1F5C6",
    "/emptyPage": "\u1F5CC",
    "/emptyPages": "\u1F5CD",
    "/emptyset": "\u2205",
    "/emquad": "\u2001",
    "/emsharptailcyr": "\u04CE",
    "/emspace": "\u2003",
    "/enbopomofo": "\u3123",
    "/encyr": "\u043D",
    "/encyrillic": "\u043D",
    "/endLeftwardsArrowAbove": "\u1F51A",
    "/endash": "\u2013",
    "/endashvertical": "\uFE32",
    "/endescendercyrillic": "\u04A3",
    "/endpro": "\u220E",
    "/eng": "\u014B",
    "/engbopomofo": "\u3125",
    "/engecyr": "\u04A5",
    "/enghecyrillic": "\u04A5",
    "/enhookcyr": "\u04C8",
    "/enhookcyrillic": "\u04C8",
    "/enhookleftcyr": "\u0529",
    "/enmiddlehookcyr": "\u0523",
    "/enotch": "\u2C78",
    "/enquad": "\u2000",
    "/ensharptailcyr": "\u04CA",
    "/enspace": "\u2002",
    "/entailcyr": "\u04A3",
    "/enter": "\u2386",
    "/enterpriseideographiccircled": "\u32AD",
    "/enterpriseideographicparen": "\u323D",
    "/envelopeDownwardsArrowAbove": "\u1F4E9",
    "/envelopeLightning": "\u1F584",
    "/eogonek": "\u0119",
    "/eokorean": "\u3153",
    "/eopen": "\u025B",
    "/eopenclosed": "\u029A",
    "/eopenreversed": "\u025C",
    "/eopenreversedclosed": "\u025E",
    "/eopenreversedhook": "\u025D",
    "/eparen": "\u24A0",
    "/eparenthesized": "\u24A0",
    "/epsilon": "\u03B5",
    "/epsilonacute": "\u1F73",
    "/epsilonasper": "\u1F11",
    "/epsilonasperacute": "\u1F15",
    "/epsilonaspergrave": "\u1F13",
    "/epsilongrave": "\u1F72",
    "/epsilonlenis": "\u1F10",
    "/epsilonlenisacute": "\u1F14",
    "/epsilonlenisgrave": "\u1F12",
    "/epsilonlunatesymbol": "\u03F5",
    "/epsilonreversedlunatesymbol": "\u03F6",
    "/epsilontonos": "\u03AD",
    "/epsilonunderlinefunc": "\u2377",
    "/equal": "\u003D",
    "/equal.inferior": "\u208C",
    "/equal.superior": "\u207C",
    "/equalandparallel": "\u22D5",
    "/equalbydefinition": "\u225D",
    "/equalmonospace": "\uFF1D",
    "/equalorgreater": "\u22DD",
    "/equalorless": "\u22DC",
    "/equalorprecedes": "\u22DE",
    "/equalorsucceeds": "\u22DF",
    "/equalscolon": "\u2255",
    "/equalsmall": "\uFE66",
    "/equalsuperior": "\u207C",
    "/equiangular": "\u225A",
    "/equivalence": "\u2261",
    "/equivalent": "\u224D",
    "/eranameheiseisquare": "\u337B",
    "/eranamemeizisquare": "\u337E",
    "/eranamesyouwasquare": "\u337C",
    "/eranametaisyousquare": "\u337D",
    "/eraseleft": "\u232B",
    "/eraseright": "\u2326",
    "/erbopomofo": "\u3126",
    "/ercyr": "\u0440",
    "/ercyrillic": "\u0440",
    "/ereversed": "\u0258",
    "/ereversedcyr": "\u044D",
    "/ereversedcyrillic": "\u044D",
    "/ereverseddieresiscyr": "\u04ED",
    "/ergfullwidth": "\u32CD",
    "/ertickcyr": "\u048F",
    "/escript": "\u212F",
    "/escyr": "\u0441",
    "/escyrillic": "\u0441",
    "/esdescendercyrillic": "\u04AB",
    "/esh": "\u0283",
    "/eshcurl": "\u0286",
    "/eshortdeva": "\u090E",
    "/eshortvowelsigndeva": "\u0946",
    "/eshreversedloop": "\u01AA",
    "/eshsquatreversed": "\u0285",
    "/esmallhiragana": "\u3047",
    "/esmallkatakana": "\u30A7",
    "/esmallkatakanahalfwidth": "\uFF6A",
    "/estailcyr": "\u04AB",
    "/estimated": "\u212E",
    "/estimates": "\u2259",
    "/estroke": "\u0247",
    "/esukuudosquare": "\u3307",
    "/esuperior": "\uF6EC",
    "/et": "\uA76B",
    "/eta": "\u03B7",
    "/etaacute": "\u1F75",
    "/etaacuteiotasub": "\u1FC4",
    "/etaasper": "\u1F21",
    "/etaasperacute": "\u1F25",
    "/etaasperacuteiotasub": "\u1F95",
    "/etaaspergrave": "\u1F23",
    "/etaaspergraveiotasub": "\u1F93",
    "/etaasperiotasub": "\u1F91",
    "/etaaspertilde": "\u1F27",
    "/etaaspertildeiotasub": "\u1F97",
    "/etagrave": "\u1F74",
    "/etagraveiotasub": "\u1FC2",
    "/etaiotasub": "\u1FC3",
    "/etalenis": "\u1F20",
    "/etalenisacute": "\u1F24",
    "/etalenisacuteiotasub": "\u1F94",
    "/etalenisgrave": "\u1F22",
    "/etalenisgraveiotasub": "\u1F92",
    "/etalenisiotasub": "\u1F90",
    "/etalenistilde": "\u1F26",
    "/etalenistildeiotasub": "\u1F96",
    "/etarmenian": "\u0568",
    "/etatilde": "\u1FC6",
    "/etatildeiotasub": "\u1FC7",
    "/etatonos": "\u03AE",
    "/eth": "\u00F0",
    "/ethi:aaglottal": "\u12A3",
    "/ethi:aglottal": "\u12A0",
    "/ethi:ba": "\u1260",
    "/ethi:baa": "\u1263",
    "/ethi:be": "\u1265",
    "/ethi:bee": "\u1264",
    "/ethi:bi": "\u1262",
    "/ethi:bo": "\u1266",
    "/ethi:bu": "\u1261",
    "/ethi:bwa": "\u1267",
    "/ethi:ca": "\u1278",
    "/ethi:caa": "\u127B",
    "/ethi:ce": "\u127D",
    "/ethi:cee": "\u127C",
    "/ethi:cha": "\u1328",
    "/ethi:chaa": "\u132B",
    "/ethi:che": "\u132D",
    "/ethi:chee": "\u132C",
    "/ethi:chi": "\u132A",
    "/ethi:cho": "\u132E",
    "/ethi:chu": "\u1329",
    "/ethi:chwa": "\u132F",
    "/ethi:ci": "\u127A",
    "/ethi:co": "\u127E",
    "/ethi:colon": "\u1365",
    "/ethi:comma": "\u1363",
    "/ethi:cu": "\u1279",
    "/ethi:cwa": "\u127F",
    "/ethi:da": "\u12F0",
    "/ethi:daa": "\u12F3",
    "/ethi:dda": "\u12F8",
    "/ethi:ddaa": "\u12FB",
    "/ethi:dde": "\u12FD",
    "/ethi:ddee": "\u12FC",
    "/ethi:ddi": "\u12FA",
    "/ethi:ddo": "\u12FE",
    "/ethi:ddu": "\u12F9",
    "/ethi:ddwa": "\u12FF",
    "/ethi:de": "\u12F5",
    "/ethi:dee": "\u12F4",
    "/ethi:di": "\u12F2",
    "/ethi:do": "\u12F6",
    "/ethi:du": "\u12F1",
    "/ethi:dwa": "\u12F7",
    "/ethi:eeglottal": "\u12A4",
    "/ethi:eglottal": "\u12A5",
    "/ethi:eight": "\u1370",
    "/ethi:eighty": "\u1379",
    "/ethi:fa": "\u1348",
    "/ethi:faa": "\u134B",
    "/ethi:fe": "\u134D",
    "/ethi:fee": "\u134C",
    "/ethi:fi": "\u134A",
    "/ethi:fifty": "\u1376",
    "/ethi:five": "\u136D",
    "/ethi:fo": "\u134E",
    "/ethi:forty": "\u1375",
    "/ethi:four": "\u136C",
    "/ethi:fu": "\u1349",
    "/ethi:fullstop": "\u1362",
    "/ethi:fwa": "\u134F",
    "/ethi:fya": "\u135A",
    "/ethi:ga": "\u1308",
    "/ethi:gaa": "\u130B",
    "/ethi:ge": "\u130D",
    "/ethi:gee": "\u130C",
    "/ethi:geminationandvowellengthmarkcmb": "\u135D",
    "/ethi:geminationmarkcmb": "\u135F",
    "/ethi:gga": "\u1318",
    "/ethi:ggaa": "\u131B",
    "/ethi:gge": "\u131D",
    "/ethi:ggee": "\u131C",
    "/ethi:ggi": "\u131A",
    "/ethi:ggo": "\u131E",
    "/ethi:ggu": "\u1319",
    "/ethi:ggwaa": "\u131F",
    "/ethi:gi": "\u130A",
    "/ethi:go": "\u130E",
    "/ethi:goa": "\u130F",
    "/ethi:gu": "\u1309",
    "/ethi:gwa": "\u1310",
    "/ethi:gwaa": "\u1313",
    "/ethi:gwe": "\u1315",
    "/ethi:gwee": "\u1314",
    "/ethi:gwi": "\u1312",
    "/ethi:ha": "\u1200",
    "/ethi:haa": "\u1203",
    "/ethi:he": "\u1205",
    "/ethi:hee": "\u1204",
    "/ethi:hha": "\u1210",
    "/ethi:hhaa": "\u1213",
    "/ethi:hhe": "\u1215",
    "/ethi:hhee": "\u1214",
    "/ethi:hhi": "\u1212",
    "/ethi:hho": "\u1216",
    "/ethi:hhu": "\u1211",
    "/ethi:hhwa": "\u1217",
    "/ethi:hi": "\u1202",
    "/ethi:ho": "\u1206",
    "/ethi:hoa": "\u1207",
    "/ethi:hu": "\u1201",
    "/ethi:hundred": "\u137B",
    "/ethi:iglottal": "\u12A2",
    "/ethi:ja": "\u1300",
    "/ethi:jaa": "\u1303",
    "/ethi:je": "\u1305",
    "/ethi:jee": "\u1304",
    "/ethi:ji": "\u1302",
    "/ethi:jo": "\u1306",
    "/ethi:ju": "\u1301",
    "/ethi:jwa": "\u1307",
    "/ethi:ka": "\u12A8",
    "/ethi:kaa": "\u12AB",
    "/ethi:ke": "\u12AD",
    "/ethi:kee": "\u12AC",
    "/ethi:ki": "\u12AA",
    "/ethi:ko": "\u12AE",
    "/ethi:koa": "\u12AF",
    "/ethi:ku": "\u12A9",
    "/ethi:kwa": "\u12B0",
    "/ethi:kwaa": "\u12B3",
    "/ethi:kwe": "\u12B5",
    "/ethi:kwee": "\u12B4",
    "/ethi:kwi": "\u12B2",
    "/ethi:kxa": "\u12B8",
    "/ethi:kxaa": "\u12BB",
    "/ethi:kxe": "\u12BD",
    "/ethi:kxee": "\u12BC",
    "/ethi:kxi": "\u12BA",
    "/ethi:kxo": "\u12BE",
    "/ethi:kxu": "\u12B9",
    "/ethi:kxwa": "\u12C0",
    "/ethi:kxwaa": "\u12C3",
    "/ethi:kxwe": "\u12C5",
    "/ethi:kxwee": "\u12C4",
    "/ethi:kxwi": "\u12C2",
    "/ethi:la": "\u1208",
    "/ethi:laa": "\u120B",
    "/ethi:le": "\u120D",
    "/ethi:lee": "\u120C",
    "/ethi:li": "\u120A",
    "/ethi:lo": "\u120E",
    "/ethi:lu": "\u1209",
    "/ethi:lwa": "\u120F",
    "/ethi:ma": "\u1218",
    "/ethi:maa": "\u121B",
    "/ethi:me": "\u121D",
    "/ethi:mee": "\u121C",
    "/ethi:mi": "\u121A",
    "/ethi:mo": "\u121E",
    "/ethi:mu": "\u1219",
    "/ethi:mwa": "\u121F",
    "/ethi:mya": "\u1359",
    "/ethi:na": "\u1290",
    "/ethi:naa": "\u1293",
    "/ethi:ne": "\u1295",
    "/ethi:nee": "\u1294",
    "/ethi:ni": "\u1292",
    "/ethi:nine": "\u1371",
    "/ethi:ninety": "\u137A",
    "/ethi:no": "\u1296",
    "/ethi:nu": "\u1291",
    "/ethi:nwa": "\u1297",
    "/ethi:nya": "\u1298",
    "/ethi:nyaa": "\u129B",
    "/ethi:nye": "\u129D",
    "/ethi:nyee": "\u129C",
    "/ethi:nyi": "\u129A",
    "/ethi:nyo": "\u129E",
    "/ethi:nyu": "\u1299",
    "/ethi:nywa": "\u129F",
    "/ethi:oglottal": "\u12A6",
    "/ethi:one": "\u1369",
    "/ethi:pa": "\u1350",
    "/ethi:paa": "\u1353",
    "/ethi:paragraphseparator": "\u1368",
    "/ethi:pe": "\u1355",
    "/ethi:pee": "\u1354",
    "/ethi:pha": "\u1330",
    "/ethi:phaa": "\u1333",
    "/ethi:pharyngeala": "\u12D0",
    "/ethi:pharyngealaa": "\u12D3",
    "/ethi:pharyngeale": "\u12D5",
    "/ethi:pharyngealee": "\u12D4",
    "/ethi:pharyngeali": "\u12D2",
    "/ethi:pharyngealo": "\u12D6",
    "/ethi:pharyngealu": "\u12D1",
    "/ethi:phe": "\u1335",
    "/ethi:phee": "\u1334",
    "/ethi:phi": "\u1332",
    "/ethi:pho": "\u1336",
    "/ethi:phu": "\u1331",
    "/ethi:phwa": "\u1337",
    "/ethi:pi": "\u1352",
    "/ethi:po": "\u1356",
    "/ethi:prefacecolon": "\u1366",
    "/ethi:pu": "\u1351",
    "/ethi:pwa": "\u1357",
    "/ethi:qa": "\u1240",
    "/ethi:qaa": "\u1243",
    "/ethi:qe": "\u1245",
    "/ethi:qee": "\u1244",
    "/ethi:qha": "\u1250",
    "/ethi:qhaa": "\u1253",
    "/ethi:qhe": "\u1255",
    "/ethi:qhee": "\u1254",
    "/ethi:qhi": "\u1252",
    "/ethi:qho": "\u1256",
    "/ethi:qhu": "\u1251",
    "/ethi:qhwa": "\u1258",
    "/ethi:qhwaa": "\u125B",
    "/ethi:qhwe": "\u125D",
    "/ethi:qhwee": "\u125C",
    "/ethi:qhwi": "\u125A",
    "/ethi:qi": "\u1242",
    "/ethi:qo": "\u1246",
    "/ethi:qoa": "\u1247",
    "/ethi:qu": "\u1241",
    "/ethi:questionmark": "\u1367",
    "/ethi:qwa": "\u1248",
    "/ethi:qwaa": "\u124B",
    "/ethi:qwe": "\u124D",
    "/ethi:qwee": "\u124C",
    "/ethi:qwi": "\u124A",
    "/ethi:ra": "\u1228",
    "/ethi:raa": "\u122B",
    "/ethi:re": "\u122D",
    "/ethi:ree": "\u122C",
    "/ethi:ri": "\u122A",
    "/ethi:ro": "\u122E",
    "/ethi:ru": "\u1229",
    "/ethi:rwa": "\u122F",
    "/ethi:rya": "\u1358",
    "/ethi:sa": "\u1230",
    "/ethi:saa": "\u1233",
    "/ethi:se": "\u1235",
    "/ethi:sectionmark": "\u1360",
    "/ethi:see": "\u1234",
    "/ethi:semicolon": "\u1364",
    "/ethi:seven": "\u136F",
    "/ethi:seventy": "\u1378",
    "/ethi:sha": "\u1238",
    "/ethi:shaa": "\u123B",
    "/ethi:she": "\u123D",
    "/ethi:shee": "\u123C",
    "/ethi:shi": "\u123A",
    "/ethi:sho": "\u123E",
    "/ethi:shu": "\u1239",
    "/ethi:shwa": "\u123F",
    "/ethi:si": "\u1232",
    "/ethi:six": "\u136E",
    "/ethi:sixty": "\u1377",
    "/ethi:so": "\u1236",
    "/ethi:su": "\u1231",
    "/ethi:swa": "\u1237",
    "/ethi:sza": "\u1220",
    "/ethi:szaa": "\u1223",
    "/ethi:sze": "\u1225",
    "/ethi:szee": "\u1224",
    "/ethi:szi": "\u1222",
    "/ethi:szo": "\u1226",
    "/ethi:szu": "\u1221",
    "/ethi:szwa": "\u1227",
    "/ethi:ta": "\u1270",
    "/ethi:taa": "\u1273",
    "/ethi:te": "\u1275",
    "/ethi:tee": "\u1274",
    "/ethi:ten": "\u1372",
    "/ethi:tenthousand": "\u137C",
    "/ethi:tha": "\u1320",
    "/ethi:thaa": "\u1323",
    "/ethi:the": "\u1325",
    "/ethi:thee": "\u1324",
    "/ethi:thi": "\u1322",
    "/ethi:thirty": "\u1374",
    "/ethi:tho": "\u1326",
    "/ethi:three": "\u136B",
    "/ethi:thu": "\u1321",
    "/ethi:thwa": "\u1327",
    "/ethi:ti": "\u1272",
    "/ethi:to": "\u1276",
    "/ethi:tsa": "\u1338",
    "/ethi:tsaa": "\u133B",
    "/ethi:tse": "\u133D",
    "/ethi:tsee": "\u133C",
    "/ethi:tsi": "\u133A",
    "/ethi:tso": "\u133E",
    "/ethi:tsu": "\u1339",
    "/ethi:tswa": "\u133F",
    "/ethi:tu": "\u1271",
    "/ethi:twa": "\u1277",
    "/ethi:twenty": "\u1373",
    "/ethi:two": "\u136A",
    "/ethi:tza": "\u1340",
    "/ethi:tzaa": "\u1343",
    "/ethi:tze": "\u1345",
    "/ethi:tzee": "\u1344",
    "/ethi:tzi": "\u1342",
    "/ethi:tzo": "\u1346",
    "/ethi:tzoa": "\u1347",
    "/ethi:tzu": "\u1341",
    "/ethi:uglottal": "\u12A1",
    "/ethi:va": "\u1268",
    "/ethi:vaa": "\u126B",
    "/ethi:ve": "\u126D",
    "/ethi:vee": "\u126C",
    "/ethi:vi": "\u126A",
    "/ethi:vo": "\u126E",
    "/ethi:vowellengthmarkcmb": "\u135E",
    "/ethi:vu": "\u1269",
    "/ethi:vwa": "\u126F",
    "/ethi:wa": "\u12C8",
    "/ethi:waa": "\u12CB",
    "/ethi:waglottal": "\u12A7",
    "/ethi:we": "\u12CD",
    "/ethi:wee": "\u12CC",
    "/ethi:wi": "\u12CA",
    "/ethi:wo": "\u12CE",
    "/ethi:woa": "\u12CF",
    "/ethi:wordspace": "\u1361",
    "/ethi:wu": "\u12C9",
    "/ethi:xa": "\u1280",
    "/ethi:xaa": "\u1283",
    "/ethi:xe": "\u1285",
    "/ethi:xee": "\u1284",
    "/ethi:xi": "\u1282",
    "/ethi:xo": "\u1286",
    "/ethi:xoa": "\u1287",
    "/ethi:xu": "\u1281",
    "/ethi:xwa": "\u1288",
    "/ethi:xwaa": "\u128B",
    "/ethi:xwe": "\u128D",
    "/ethi:xwee": "\u128C",
    "/ethi:xwi": "\u128A",
    "/ethi:ya": "\u12E8",
    "/ethi:yaa": "\u12EB",
    "/ethi:ye": "\u12ED",
    "/ethi:yee": "\u12EC",
    "/ethi:yi": "\u12EA",
    "/ethi:yo": "\u12EE",
    "/ethi:yoa": "\u12EF",
    "/ethi:yu": "\u12E9",
    "/ethi:za": "\u12D8",
    "/ethi:zaa": "\u12DB",
    "/ethi:ze": "\u12DD",
    "/ethi:zee": "\u12DC",
    "/ethi:zha": "\u12E0",
    "/ethi:zhaa": "\u12E3",
    "/ethi:zhe": "\u12E5",
    "/ethi:zhee": "\u12E4",
    "/ethi:zhi": "\u12E2",
    "/ethi:zho": "\u12E6",
    "/ethi:zhu": "\u12E1",
    "/ethi:zhwa": "\u12E7",
    "/ethi:zi": "\u12DA",
    "/ethi:zo": "\u12DE",
    "/ethi:zu": "\u12D9",
    "/ethi:zwa": "\u12DF",
    "/etilde": "\u1EBD",
    "/etildebelow": "\u1E1B",
    "/etnahta:hb": "\u0591",
    "/etnahtafoukhhebrew": "\u0591",
    "/etnahtafoukhlefthebrew": "\u0591",
    "/etnahtahebrew": "\u0591",
    "/etnahtalefthebrew": "\u0591",
    "/eturned": "\u01DD",
    "/eukorean": "\u3161",
    "/eukrcyr": "\u0454",
    "/euler": "\u2107",
    "/euro": "\u20AC",
    "/euroarchaic": "\u20A0",
    "/europeanCastle": "\u1F3F0",
    "/europeanPostOffice": "\u1F3E4",
    "/evergreenTree": "\u1F332",
    "/evowelsignbengali": "\u09C7",
    "/evowelsigndeva": "\u0947",
    "/evowelsigngujarati": "\u0AC7",
    "/excellentideographiccircled": "\u329D",
    "/excess": "\u2239",
    "/exclam": "\u0021",
    "/exclamarmenian": "\u055C",
    "/exclamationquestion": "\u2049",
    "/exclamdbl": "\u203C",
    "/exclamdown": "\u00A1",
    "/exclamdownsmall": "\uF7A1",
    "/exclammonospace": "\uFF01",
    "/exclamsmall": "\uF721",
    "/existential": "\u2203",
    "/expressionlessFace": "\u1F611",
    "/extraterrestrialAlien": "\u1F47D",
    "/eye": "\u1F441",
    "/eyeglasses": "\u1F453",
    "/eyes": "\u1F440",
    "/ezh": "\u0292",
    "/ezhcaron": "\u01EF",
    "/ezhcurl": "\u0293",
    "/ezhreversed": "\u01B9",
    "/ezhtail": "\u01BA",
    "/f": "\u0066",
    "/f_f": "\uFB00",
    "/f_f_i": "\uFB03",
    "/f_f_l": "\uFB04",
    "/faceMassage": "\u1F486",
    "/faceSavouringDeliciousFood": "\u1F60B",
    "/faceScreamingInFear": "\u1F631",
    "/faceThrowingAKiss": "\u1F618",
    "/faceWithColdSweat": "\u1F613",
    "/faceWithLookOfTriumph": "\u1F624",
    "/faceWithMedicalMask": "\u1F637",
    "/faceWithNoGoodGesture": "\u1F645",
    "/faceWithOkGesture": "\u1F646",
    "/faceWithOpenMouth": "\u1F62E",
    "/faceWithOpenMouthAndColdSweat": "\u1F630",
    "/faceWithRollingEyes": "\u1F644",
    "/faceWithStuckOutTongue": "\u1F61B",
    "/faceWithStuckOutTongueAndTightlyClosedEyes": "\u1F61D",
    "/faceWithStuckOutTongueAndWinkingEye": "\u1F61C",
    "/faceWithTearsOfJoy": "\u1F602",
    "/faceWithoutMouth": "\u1F636",
    "/facsimile": "\u213B",
    "/factory": "\u1F3ED",
    "/fadeva": "\u095E",
    "/fagurmukhi": "\u0A5E",
    "/fahrenheit": "\u2109",
    "/fallenLeaf": "\u1F342",
    "/fallingdiagonal": "\u27CD",
    "/fallingdiagonalincircleinsquareblackwhite": "\u26DE",
    "/family": "\u1F46A",
    "/farsi": "\u262B",
    "/farsiYehDigitFourBelow": "\u0777",
    "/farsiYehDigitThreeAbove": "\u0776",
    "/farsiYehDigitTwoAbove": "\u0775",
    "/fatha": "\u064E",
    "/fathaIsol": "\uFE76",
    "/fathaMedi": "\uFE77",
    "/fathaarabic": "\u064E",
    "/fathalowarabic": "\u064E",
    "/fathasmall": "\u0618",
    "/fathatan": "\u064B",
    "/fathatanIsol": "\uFE70",
    "/fathatanarabic": "\u064B",
    "/fathatwodotsdots": "\u065E",
    "/fatherChristmas": "\u1F385",
    "/faxIcon": "\u1F5B7",
    "/faxMachine": "\u1F4E0",
    "/fbopomofo": "\u3108",
    "/fcircle": "\u24D5",
    "/fdot": "\u1E1F",
    "/fdotaccent": "\u1E1F",
    "/fearfulFace": "\u1F628",
    "/februarytelegraph": "\u32C1",
    "/feh.fina": "\uFED2",
    "/feh.init": "\uFED3",
    "/feh.init_alefmaksura.fina": "\uFC31",
    "/feh.init_hah.fina": "\uFC2E",
    "/feh.init_hah.medi": "\uFCBF",
    "/feh.init_jeem.fina": "\uFC2D",
    "/feh.init_jeem.medi": "\uFCBE",
    "/feh.init_khah.fina": "\uFC2F",
    "/feh.init_khah.medi": "\uFCC0",
    "/feh.init_khah.medi_meem.medi": "\uFD7D",
    "/feh.init_meem.fina": "\uFC30",
    "/feh.init_meem.medi": "\uFCC1",
    "/feh.init_yeh.fina": "\uFC32",
    "/feh.isol": "\uFED1",
    "/feh.medi": "\uFED4",
    "/feh.medi_alefmaksura.fina": "\uFC7C",
    "/feh.medi_khah.medi_meem.fina": "\uFD7C",
    "/feh.medi_meem.medi_yeh.fina": "\uFDC1",
    "/feh.medi_yeh.fina": "\uFC7D",
    "/fehThreeDotsUpBelow": "\u0761",
    "/fehTwoDotsBelow": "\u0760",
    "/feharabic": "\u0641",
    "/feharmenian": "\u0586",
    "/fehdotbelow": "\u06A3",
    "/fehdotbelowright": "\u06A2",
    "/fehfinalarabic": "\uFED2",
    "/fehinitialarabic": "\uFED3",
    "/fehmedialarabic": "\uFED4",
    "/fehthreedotsbelow": "\u06A5",
    "/feicoptic": "\u03E5",
    "/female": "\u2640",
    "/femaleideographiccircled": "\u329B",
    "/feng": "\u02A9",
    "/ferrisWheel": "\u1F3A1",
    "/ferry": "\u26F4",
    "/festivalideographicparen": "\u3240",
    "/ff": "\uFB00",
    "/ffi": "\uFB03",
    "/ffl": "\uFB04",
    "/fhook": "\u0192",
    "/fi": "\uFB01",  # ligature "fi"
    "/fieldHockeyStickAndBall": "\u1F3D1",
    "/fifteencircle": "\u246E",
    "/fifteencircleblack": "\u24EF",
    "/fifteenparen": "\u2482",
    "/fifteenparenthesized": "\u2482",
    "/fifteenperiod": "\u2496",
    "/fifty.roman": "\u216C",
    "/fifty.romansmall": "\u217C",
    "/fiftycircle": "\u32BF",
    "/fiftycirclesquare": "\u324C",
    "/fiftyearlyform.roman": "\u2186",
    "/fiftythousand.roman": "\u2187",
    "/figuredash": "\u2012",
    "/figurespace": "\u2007",
    "/fileCabinet": "\u1F5C4",
    "/fileFolder": "\u1F4C1",
    "/filledbox": "\u25A0",
    "/filledrect": "\u25AC",
    "/filledstopabove": "\u06EC",
    "/filmFrames": "\u1F39E",
    "/filmProjector": "\u1F4FD",
    "/finalkaf": "\u05DA",
    "/finalkaf:hb": "\u05DA",
    "/finalkafdagesh": "\uFB3A",
    "/finalkafdageshhebrew": "\uFB3A",
    "/finalkafhebrew": "\u05DA",
    "/finalkafqamats": "\u05DA",
    "/finalkafqamatshebrew": "\u05DA",
    "/finalkafsheva": "\u05DA",
    "/finalkafshevahebrew": "\u05DA",
    "/finalkafwithdagesh:hb": "\uFB3A",
    "/finalmem": "\u05DD",
    "/finalmem:hb": "\u05DD",
    "/finalmemhebrew": "\u05DD",
    "/finalmemwide:hb": "\uFB26",
    "/finalnun": "\u05DF",
    "/finalnun:hb": "\u05DF",
    "/finalnunhebrew": "\u05DF",
    "/finalpe": "\u05E3",
    "/finalpe:hb": "\u05E3",
    "/finalpehebrew": "\u05E3",
    "/finalpewithdagesh:hb": "\uFB43",
    "/finalsigma": "\u03C2",
    "/finaltsadi": "\u05E5",
    "/finaltsadi:hb": "\u05E5",
    "/finaltsadihebrew": "\u05E5",
    "/financialideographiccircled": "\u3296",
    "/financialideographicparen": "\u3236",
    "/finsular": "\uA77C",
    "/fire": "\u1F525",
    "/fireEngine": "\u1F692",
    "/fireideographiccircled": "\u328B",
    "/fireideographicparen": "\u322B",
    "/fireworkSparkler": "\u1F387",
    "/fireworks": "\u1F386",
    "/firstQuarterMoon": "\u1F313",
    "/firstQuarterMoonFace": "\u1F31B",
    "/firstquartermoon": "\u263D",
    "/firststrongisolate": "\u2068",
    "/firsttonechinese": "\u02C9",
    "/fish": "\u1F41F",
    "/fishCakeSwirlDesign": "\u1F365",
    "/fisheye": "\u25C9",
    "/fishingPoleAndFish": "\u1F3A3",
    "/fistedHandSign": "\u1F44A",
    "/fitacyr": "\u0473",
    "/fitacyrillic": "\u0473",
    "/five": "\u0035",
    "/five.inferior": "\u2085",
    "/five.roman": "\u2164",
    "/five.romansmall": "\u2174",
    "/five.superior": "\u2075",
    "/fivearabic": "\u0665",
    "/fivebengali": "\u09EB",
    "/fivecircle": "\u2464",
    "/fivecircledbl": "\u24F9",
    "/fivecircleinversesansserif": "\u278E",
    "/fivecomma": "\u1F106",
    "/fivedeva": "\u096B",
    "/fivedot": "\u2E2D",
    "/fivedotpunctuation": "\u2059",
    "/fiveeighths": "\u215D",
    "/fivefar": "\u06F5",
    "/fivegujarati": "\u0AEB",
    "/fivegurmukhi": "\u0A6B",
    "/fivehackarabic": "\u0665",
    "/fivehangzhou": "\u3025",
    "/fivehundred.roman": "\u216E",
    "/fivehundred.romansmall": "\u217E",
    "/fiveideographiccircled": "\u3284",
    "/fiveideographicparen": "\u3224",
    "/fiveinferior": "\u2085",
    "/fivemonospace": "\uFF15",
    "/fiveoldstyle": "\uF735",
    "/fiveparen": "\u2478",
    "/fiveparenthesized": "\u2478",
    "/fiveperiod": "\u248C",
    "/fivepersian": "\u06F5",
    "/fivepointedstar": "\u066D",
    "/fivepointonesquare": "\u1F1A0",
    "/fiveroman": "\u2174",
    "/fivesixths": "\u215A",
    "/fivesuperior": "\u2075",
    "/fivethai": "\u0E55",
    "/fivethousand.roman": "\u2181",
    "/fl": "\uFB02",
    "/flagblack": "\u2691",
    "/flaghorizontalmiddlestripeblackwhite": "\u26FF",
    "/flaginhole": "\u26F3",
    "/flagwhite": "\u2690",
    "/flatness": "\u23E5",
    "/fleurdelis": "\u269C",
    "/flexedBiceps": "\u1F4AA",
    "/floorleft": "\u230A",
    "/floorright": "\u230B",
    "/floppyDisk": "\u1F4BE",
    "/floralheartbulletreversedrotated": "\u2619",
    "/florin": "\u0192",
    "/flower": "\u2698",
    "/flowerPlayingCards": "\u1F3B4",
    "/flowerpunctuationmark": "\u2055",
    "/flushedFace": "\u1F633",
    "/flyingEnvelope": "\u1F585",
    "/flyingSaucer": "\u1F6F8",
    "/fmfullwidth": "\u3399",
    "/fmonospace": "\uFF46",
    "/fmsquare": "\u3399",
    "/fofanthai": "\u0E1F",
    "/fofathai": "\u0E1D",
    "/fog": "\u1F32B",
    "/foggy": "\u1F301",
    "/folder": "\u1F5C0",
    "/fongmanthai": "\u0E4F",
    "/footnote": "\u0602",
    "/footprints": "\u1F463",
    "/footsquare": "\u23CD",
    "/forall": "\u2200",
    "/forces": "\u22A9",
    "/fork": "\u2442",
    "/forkKnife": "\u1F374",
    "/forkKnifePlate": "\u1F37D",
    "/forsamaritan": "\u214F",
    "/fortycircle": "\u32B5",
    "/fortycirclesquare": "\u324B",
    "/fortyeightcircle": "\u32BD",
    "/fortyfivecircle": "\u32BA",
    "/fortyfourcircle": "\u32B9",
    "/fortyninecircle": "\u32BE",
    "/fortyonecircle": "\u32B6",
    "/fortysevencircle": "\u32BC",
    "/fortysixcircle": "\u32BB",
    "/fortythreecircle": "\u32B8",
    "/fortytwocircle": "\u32B7",
    "/fountain": "\u26F2",
    "/four": "\u0034",
    "/four.inferior": "\u2084",
    "/four.roman": "\u2163",
    "/four.romansmall": "\u2173",
    "/four.superior": "\u2074",
    "/fourLeafClover": "\u1F340",
    "/fourarabic": "\u0664",
    "/fourbengali": "\u09EA",
    "/fourcircle": "\u2463",
    "/fourcircledbl": "\u24F8",
    "/fourcircleinversesansserif": "\u278D",
    "/fourcomma": "\u1F105",
    "/fourdeva": "\u096A",
    "/fourdotmark": "\u205B",
    "/fourdotpunctuation": "\u2058",
    "/fourfar": "\u06F4",
    "/fourfifths": "\u2158",
    "/fourgujarati": "\u0AEA",
    "/fourgurmukhi": "\u0A6A",
    "/fourhackarabic": "\u0664",
    "/fourhangzhou": "\u3024",
    "/fourideographiccircled": "\u3283",
    "/fourideographicparen": "\u3223",
    "/fourinferior": "\u2084",
    "/fourksquare": "\u1F19E",
    "/fourmonospace": "\uFF14",
    "/fournumeratorbengali": "\u09F7",
    "/fouroldstyle": "\uF734",
    "/fourparen": "\u2477",
    "/fourparenthesized": "\u2477",
    "/fourperemspace": "\u2005",
    "/fourperiod": "\u248B",
    "/fourpersian": "\u06F4",
    "/fourroman": "\u2173",
    "/foursuperior": "\u2074",
    "/fourteencircle": "\u246D",
    "/fourteencircleblack": "\u24EE",
    "/fourteenparen": "\u2481",
    "/fourteenparenthesized": "\u2481",
    "/fourteenperiod": "\u2495",
    "/fourthai": "\u0E54",
    "/fourthtonechinese": "\u02CB",
    "/fparen": "\u24A1",
    "/fparenthesized": "\u24A1",
    "/fraction": "\u2044",
    "/frameAnX": "\u1F5BE",
    "/framePicture": "\u1F5BC",
    "/frameTiles": "\u1F5BD",
    "/franc": "\u20A3",
    "/freesquare": "\u1F193",
    "/frenchFries": "\u1F35F",
    "/freversedepigraphic": "\uA7FB",
    "/friedShrimp": "\u1F364",
    "/frogFace": "\u1F438",
    "/front-facingBabyChick": "\u1F425",
    "/frown": "\u2322",
    "/frowningFaceWithOpenMouth": "\u1F626",
    "/frowningfacewhite": "\u2639",
    "/fstroke": "\uA799",
    "/fturned": "\u214E",
    "/fuelpump": "\u26FD",
    "/fullBlock": "\u2588",
    "/fullMoon": "\u1F315",
    "/fullMoonFace": "\u1F31D",
    "/functionapplication": "\u2061",
    "/funeralurn": "\u26B1",
    "/fuse": "\u23DB",
    "/fwd:A": "\uFF21",
    "/fwd:B": "\uFF22",
    "/fwd:C": "\uFF23",
    "/fwd:D": "\uFF24",
    "/fwd:E": "\uFF25",
    "/fwd:F": "\uFF26",
    "/fwd:G": "\uFF27",
    "/fwd:H": "\uFF28",
    "/fwd:I": "\uFF29",
    "/fwd:J": "\uFF2A",
    "/fwd:K": "\uFF2B",
    "/fwd:L": "\uFF2C",
    "/fwd:M": "\uFF2D",
    "/fwd:N": "\uFF2E",
    "/fwd:O": "\uFF2F",
    "/fwd:P": "\uFF30",
    "/fwd:Q": "\uFF31",
    "/fwd:R": "\uFF32",
    "/fwd:S": "\uFF33",
    "/fwd:T": "\uFF34",
    "/fwd:U": "\uFF35",
    "/fwd:V": "\uFF36",
    "/fwd:W": "\uFF37",
    "/fwd:X": "\uFF38",
    "/fwd:Y": "\uFF39",
    "/fwd:Z": "\uFF3A",
    "/fwd:a": "\uFF41",
    "/fwd:ampersand": "\uFF06",
    "/fwd:asciicircum": "\uFF3E",
    "/fwd:asciitilde": "\uFF5E",
    "/fwd:asterisk": "\uFF0A",
    "/fwd:at": "\uFF20",
    "/fwd:b": "\uFF42",
    "/fwd:backslash": "\uFF3C",
    "/fwd:bar": "\uFF5C",
    "/fwd:braceleft": "\uFF5B",
    "/fwd:braceright": "\uFF5D",
    "/fwd:bracketleft": "\uFF3B",
    "/fwd:bracketright": "\uFF3D",
    "/fwd:brokenbar": "\uFFE4",
    "/fwd:c": "\uFF43",
    "/fwd:centsign": "\uFFE0",
    "/fwd:colon": "\uFF1A",
    "/fwd:comma": "\uFF0C",
    "/fwd:d": "\uFF44",
    "/fwd:dollar": "\uFF04",
    "/fwd:e": "\uFF45",
    "/fwd:eight": "\uFF18",
    "/fwd:equal": "\uFF1D",
    "/fwd:exclam": "\uFF01",
    "/fwd:f": "\uFF46",
    "/fwd:five": "\uFF15",
    "/fwd:four": "\uFF14",
    "/fwd:g": "\uFF47",
    "/fwd:grave": "\uFF40",
    "/fwd:greater": "\uFF1E",
    "/fwd:h": "\uFF48",
    "/fwd:hyphen": "\uFF0D",
    "/fwd:i": "\uFF49",
    "/fwd:j": "\uFF4A",
    "/fwd:k": "\uFF4B",
    "/fwd:l": "\uFF4C",
    "/fwd:leftwhiteparenthesis": "\uFF5F",
    "/fwd:less": "\uFF1C",
    "/fwd:m": "\uFF4D",
    "/fwd:macron": "\uFFE3",
    "/fwd:n": "\uFF4E",
    "/fwd:nine": "\uFF19",
    "/fwd:notsign": "\uFFE2",
    "/fwd:numbersign": "\uFF03",
    "/fwd:o": "\uFF4F",
    "/fwd:one": "\uFF11",
    "/fwd:p": "\uFF50",
    "/fwd:parenthesisleft": "\uFF08",
    "/fwd:parenthesisright": "\uFF09",
    "/fwd:percent": "\uFF05",
    "/fwd:period": "\uFF0E",
    "/fwd:plus": "\uFF0B",
    "/fwd:poundsign": "\uFFE1",
    "/fwd:q": "\uFF51",
    "/fwd:question": "\uFF1F",
    "/fwd:quotedbl": "\uFF02",
    "/fwd:quotesingle": "\uFF07",
    "/fwd:r": "\uFF52",
    "/fwd:rightwhiteparenthesis": "\uFF60",
    "/fwd:s": "\uFF53",
    "/fwd:semicolon": "\uFF1B",
    "/fwd:seven": "\uFF17",
    "/fwd:six": "\uFF16",
    "/fwd:slash": "\uFF0F",
    "/fwd:t": "\uFF54",
    "/fwd:three": "\uFF13",
    "/fwd:two": "\uFF12",
    "/fwd:u": "\uFF55",
    "/fwd:underscore": "\uFF3F",
    "/fwd:v": "\uFF56",
    "/fwd:w": "\uFF57",
    "/fwd:wonsign": "\uFFE6",
    "/fwd:x": "\uFF58",
    "/fwd:y": "\uFF59",
    "/fwd:yensign": "\uFFE5",
    "/fwd:z": "\uFF5A",
    "/fwd:zero": "\uFF10",
    "/g": "\u0067",
    "/gabengali": "\u0997",
    "/gacute": "\u01F5",
    "/gadeva": "\u0917",
    "/gaf": "\u06AF",
    "/gaf.fina": "\uFB93",
    "/gaf.init": "\uFB94",
    "/gaf.isol": "\uFB92",
    "/gaf.medi": "\uFB95",
    "/gafarabic": "\u06AF",
    "/gaffinalarabic": "\uFB93",
    "/gafinitialarabic": "\uFB94",
    "/gafmedialarabic": "\uFB95",
    "/gafring": "\u06B0",
    "/gafthreedotsabove": "\u06B4",
    "/gaftwodotsbelow": "\u06B2",
    "/gagujarati": "\u0A97",
    "/gagurmukhi": "\u0A17",
    "/gahiragana": "\u304C",
    "/gakatakana": "\u30AC",
    "/galsquare": "\u33FF",
    "/gameDie": "\u1F3B2",
    "/gamma": "\u03B3",
    "/gammadblstruck": "\u213D",
    "/gammalatinsmall": "\u0263",
    "/gammasuperior": "\u02E0",
    "/gammasupmod": "\u02E0",
    "/gamurda": "\uA993",
    "/gangiacoptic": "\u03EB",
    "/ganmasquare": "\u330F",
    "/garonsquare": "\u330E",
    "/gbfullwidth": "\u3387",
    "/gbopomofo": "\u310D",
    "/gbreve": "\u011F",
    "/gcaron": "\u01E7",
    "/gcedilla": "\u0123",
    "/gcircle": "\u24D6",
    "/gcircumflex": "\u011D",
    "/gcommaaccent": "\u0123",
    "/gdot": "\u0121",
    "/gdotaccent": "\u0121",
    "/gear": "\u2699",
    "/gearhles": "\u26EE",
    "/gearouthub": "\u26ED",
    "/gecyr": "\u0433",
    "/gecyrillic": "\u0433",
    "/gehiragana": "\u3052",
    "/gehookcyr": "\u0495",
    "/gehookstrokecyr": "\u04FB",
    "/gekatakana": "\u30B2",
    "/gemStone": "\u1F48E",
    "/gemini": "\u264A",
    "/geometricallyequal": "\u2251",
    "/geometricallyequivalent": "\u224E",
    "/geometricproportion": "\u223A",
    "/geresh:hb": "\u05F3",
    "/gereshMuqdam:hb": "\u059D",
    "/gereshaccenthebrew": "\u059C",
    "/gereshhebrew": "\u05F3",
    "/gereshmuqdamhebrew": "\u059D",
    "/germandbls": "\u00DF",
    "/germanpenny": "\u20B0",
    "/gershayim:hb": "\u05F4",
    "/gershayimaccenthebrew": "\u059E",
    "/gershayimhebrew": "\u05F4",
    "/gestrokecyr": "\u0493",
    "/getailcyr": "\u04F7",
    "/getamark": "\u3013",
    "/geupcyr": "\u0491",
    "/ghabengali": "\u0998",
    "/ghadarmenian": "\u0572",
    "/ghadeva": "\u0918",
    "/ghagujarati": "\u0A98",
    "/ghagurmukhi": "\u0A18",
    "/ghain": "\u063A",
    "/ghain.fina": "\uFECE",
    "/ghain.init": "\uFECF",
    "/ghain.init_alefmaksura.fina": "\uFCF9",
    "/ghain.init_jeem.fina": "\uFC2B",
    "/ghain.init_jeem.medi": "\uFCBC",
    "/ghain.init_meem.fina": "\uFC2C",
    "/ghain.init_meem.medi": "\uFCBD",
    "/ghain.init_yeh.fina": "\uFCFA",
    "/ghain.isol": "\uFECD",
    "/ghain.medi": "\uFED0",
    "/ghain.medi_alefmaksura.fina": "\uFD15",
    "/ghain.medi_meem.medi_alefmaksura.fina": "\uFD7B",
    "/ghain.medi_meem.medi_meem.fina": "\uFD79",
    "/ghain.medi_meem.medi_yeh.fina": "\uFD7A",
    "/ghain.medi_yeh.fina": "\uFD16",
    "/ghainarabic": "\u063A",
    "/ghaindotbelow": "\u06FC",
    "/ghainfinalarabic": "\uFECE",
    "/ghaininitialarabic": "\uFECF",
    "/ghainmedialarabic": "\uFED0",
    "/ghemiddlehookcyrillic": "\u0495",
    "/ghestrokecyrillic": "\u0493",
    "/gheupturncyrillic": "\u0491",
    "/ghhadeva": "\u095A",
    "/ghhagurmukhi": "\u0A5A",
    "/ghook": "\u0260",
    "/ghost": "\u1F47B",
    "/ghzfullwidth": "\u3393",
    "/ghzsquare": "\u3393",
    "/gigasquare": "\u3310",
    "/gihiragana": "\u304E",
    "/gikatakana": "\u30AE",
    "/gimarmenian": "\u0563",
    "/gimel": "\u05D2",
    "/gimel:hb": "\u05D2",
    "/gimeldagesh": "\uFB32",
    "/gimeldageshhebrew": "\uFB32",
    "/gimelhebrew": "\u05D2",
    "/gimelwithdagesh:hb": "\uFB32",
    "/giniisquare": "\u3311",
    "/ginsularturned": "\uA77F",
    "/girl": "\u1F467",
    "/girls": "\u1F6CA",
    "/girudaasquare": "\u3313",
    "/gjecyr": "\u0453",
    "/gjecyrillic": "\u0453",
    "/globeMeridians": "\u1F310",
    "/glottalinvertedstroke": "\u01BE",
    "/glottalstop": "\u0294",
    "/glottalstopinverted": "\u0296",
    "/glottalstopmod": "\u02C0",
    "/glottalstopreversed": "\u0295",
    "/glottalstopreversedmod": "\u02C1",
    "/glottalstopreversedsuperior": "\u02E4",
    "/glottalstopstroke": "\u02A1",
    "/glottalstopstrokereversed": "\u02A2",
    "/glottalstopsupreversedmod": "\u02E4",
    "/glowingStar": "\u1F31F",
    "/gmacron": "\u1E21",
    "/gmonospace": "\uFF47",
    "/gmtr:diamondblack": "\u25C6",
    "/gmtr:diamondwhite": "\u25C7",
    "/gnrl:hyphen": "\u2010",
    "/goat": "\u1F410",
    "/gobliquestroke": "\uA7A1",
    "/gohiragana": "\u3054",
    "/gokatakana": "\u30B4",
    "/golfer": "\u1F3CC",
    "/gpafullwidth": "\u33AC",
    "/gparen": "\u24A2",
    "/gparenthesized": "\u24A2",
    "/gpasquare": "\u33AC",
    "/gr:acute": "\u1FFD",
    "/gr:grave": "\u1FEF",
    "/gr:question": "\u037E",
    "/gr:tilde": "\u1FC0",
    "/gradient": "\u2207",
    "/graduationCap": "\u1F393",
    "/grapes": "\u1F347",
    "/grave": "\u0060",
    "/gravebelowcmb": "\u0316",
    "/gravecmb": "\u0300",
    "/gravecomb": "\u0300",
    "/gravedblmiddlemod": "\u02F5",
    "/gravedeva": "\u0953",
    "/gravelowmod": "\u02CE",
    "/gravemiddlemod": "\u02F4",
    "/gravemod": "\u02CB",
    "/gravemonospace": "\uFF40",
    "/gravetonecmb": "\u0340",
    "/greater": "\u003E",
    "/greaterbutnotequal": "\u2269",
    "/greaterbutnotequivalent": "\u22E7",
    "/greaterdot": "\u22D7",
    "/greaterequal": "\u2265",
    "/greaterequalorless": "\u22DB",
    "/greatermonospace": "\uFF1E",
    "/greaterorequivalent": "\u2273",
    "/greaterorless": "\u2277",
    "/greateroverequal": "\u2267",
    "/greatersmall": "\uFE65",
    "/greenApple": "\u1F34F",
    "/greenBook": "\u1F4D7",
    "/greenHeart": "\u1F49A",
    "/grimacingFace": "\u1F62C",
    "/grinningCatFaceWithSmilingEyes": "\u1F638",
    "/grinningFace": "\u1F600",
    "/grinningFaceWithSmilingEyes": "\u1F601",
    "/growingHeart": "\u1F497",
    "/gscript": "\u0261",
    "/gstroke": "\u01E5",
    "/guarani": "\u20B2",
    "/guardsman": "\u1F482",
    "/gueh": "\u06B3",
    "/gueh.fina": "\uFB97",
    "/gueh.init": "\uFB98",
    "/gueh.isol": "\uFB96",
    "/gueh.medi": "\uFB99",
    "/guhiragana": "\u3050",
    "/guillemetleft": "\u00AB",
    "/guillemetright": "\u00BB",
    "/guillemotleft": "\u00AB",
    "/guillemotright": "\u00BB",
    "/guilsinglleft": "\u2039",
    "/guilsinglright": "\u203A",
    "/guitar": "\u1F3B8",
    "/gujr:a": "\u0A85",
    "/gujr:aa": "\u0A86",
    "/gujr:aasign": "\u0ABE",
    "/gujr:abbreviation": "\u0AF0",
    "/gujr:ai": "\u0A90",
    "/gujr:aisign": "\u0AC8",
    "/gujr:anusvara": "\u0A82",
    "/gujr:au": "\u0A94",
    "/gujr:ausign": "\u0ACC",
    "/gujr:avagraha": "\u0ABD",
    "/gujr:ba": "\u0AAC",
    "/gujr:bha": "\u0AAD",
    "/gujr:binducandra": "\u0A81",
    "/gujr:ca": "\u0A9A",
    "/gujr:cha": "\u0A9B",
    "/gujr:circlenuktaabove": "\u0AFE",
    "/gujr:da": "\u0AA6",
    "/gujr:dda": "\u0AA1",
    "/gujr:ddha": "\u0AA2",
    "/gujr:dha": "\u0AA7",
    "/gujr:e": "\u0A8F",
    "/gujr:ecandra": "\u0A8D",
    "/gujr:eight": "\u0AEE",
    "/gujr:esign": "\u0AC7",
    "/gujr:esigncandra": "\u0AC5",
    "/gujr:five": "\u0AEB",
    "/gujr:four": "\u0AEA",
    "/gujr:ga": "\u0A97",
    "/gujr:gha": "\u0A98",
    "/gujr:ha": "\u0AB9",
    "/gujr:i": "\u0A87",
    "/gujr:ii": "\u0A88",
    "/gujr:iisign": "\u0AC0",
    "/gujr:isign": "\u0ABF",
    "/gujr:ja": "\u0A9C",
    "/gujr:jha": "\u0A9D",
    "/gujr:ka": "\u0A95",
    "/gujr:kha": "\u0A96",
    "/gujr:la": "\u0AB2",
    "/gujr:lla": "\u0AB3",
    "/gujr:llvocal": "\u0AE1",
    "/gujr:llvocalsign": "\u0AE3",
    "/gujr:lvocal": "\u0A8C",
    "/gujr:lvocalsign": "\u0AE2",
    "/gujr:ma": "\u0AAE",
    "/gujr:maddah": "\u0AFC",
    "/gujr:na": "\u0AA8",
    "/gujr:nga": "\u0A99",
    "/gujr:nine": "\u0AEF",
    "/gujr:nna": "\u0AA3",
    "/gujr:nukta": "\u0ABC",
    "/gujr:nya": "\u0A9E",
    "/gujr:o": "\u0A93",
    "/gujr:ocandra": "\u0A91",
    "/gujr:om": "\u0AD0",
    "/gujr:one": "\u0AE7",
    "/gujr:osign": "\u0ACB",
    "/gujr:osigncandra": "\u0AC9",
    "/gujr:pa": "\u0AAA",
    "/gujr:pha": "\u0AAB",
    "/gujr:ra": "\u0AB0",
    "/gujr:rrvocal": "\u0AE0",
    "/gujr:rrvocalsign": "\u0AC4",
    "/gujr:rupee": "\u0AF1",
    "/gujr:rvocal": "\u0A8B",
    "/gujr:rvocalsign": "\u0AC3",
    "/gujr:sa": "\u0AB8",
    "/gujr:seven": "\u0AED",
    "/gujr:sha": "\u0AB6",
    "/gujr:shadda": "\u0AFB",
    "/gujr:six": "\u0AEC",
    "/gujr:ssa": "\u0AB7",
    "/gujr:sukun": "\u0AFA",
    "/gujr:ta": "\u0AA4",
    "/gujr:tha": "\u0AA5",
    "/gujr:three": "\u0AE9",
    "/gujr:three-dotnuktaabove": "\u0AFD",
    "/gujr:tta": "\u0A9F",
    "/gujr:ttha": "\u0AA0",
    "/gujr:two": "\u0AE8",
    "/gujr:two-circlenuktaabove": "\u0AFF",
    "/gujr:u": "\u0A89",
    "/gujr:usign": "\u0AC1",
    "/gujr:uu": "\u0A8A",
    "/gujr:uusign": "\u0AC2",
    "/gujr:va": "\u0AB5",
    "/gujr:virama": "\u0ACD",
    "/gujr:visarga": "\u0A83",
    "/gujr:ya": "\u0AAF",
    "/gujr:zero": "\u0AE6",
    "/gujr:zha": "\u0AF9",
    "/gukatakana": "\u30B0",
    "/guramusquare": "\u3318",
    "/guramutonsquare": "\u3319",
    "/guru:a": "\u0A05",
    "/guru:aa": "\u0A06",
    "/guru:aasign": "\u0A3E",
    "/guru:adakbindisign": "\u0A01",
    "/guru:addak": "\u0A71",
    "/guru:ai": "\u0A10",
    "/guru:aisign": "\u0A48",
    "/guru:au": "\u0A14",
    "/guru:ausign": "\u0A4C",
    "/guru:ba": "\u0A2C",
    "/guru:bha": "\u0A2D",
    "/guru:bindisign": "\u0A02",
    "/guru:ca": "\u0A1A",
    "/guru:cha": "\u0A1B",
    "/guru:da": "\u0A26",
    "/guru:dda": "\u0A21",
    "/guru:ddha": "\u0A22",
    "/guru:dha": "\u0A27",
    "/guru:ee": "\u0A0F",
    "/guru:eesign": "\u0A47",
    "/guru:eight": "\u0A6E",
    "/guru:ekonkar": "\u0A74",
    "/guru:fa": "\u0A5E",
    "/guru:five": "\u0A6B",
    "/guru:four": "\u0A6A",
    "/guru:ga": "\u0A17",
    "/guru:gha": "\u0A18",
    "/guru:ghha": "\u0A5A",
    "/guru:ha": "\u0A39",
    "/guru:i": "\u0A07",
    "/guru:ii": "\u0A08",
    "/guru:iisign": "\u0A40",
    "/guru:iri": "\u0A72",
    "/guru:isign": "\u0A3F",
    "/guru:ja": "\u0A1C",
    "/guru:jha": "\u0A1D",
    "/guru:ka": "\u0A15",
    "/guru:kha": "\u0A16",
    "/guru:khha": "\u0A59",
    "/guru:la": "\u0A32",
    "/guru:lla": "\u0A33",
    "/guru:ma": "\u0A2E",
    "/guru:na": "\u0A28",
    "/guru:nga": "\u0A19",
    "/guru:nine": "\u0A6F",
    "/guru:nna": "\u0A23",
    "/guru:nukta": "\u0A3C",
    "/guru:nya": "\u0A1E",
    "/guru:one": "\u0A67",
    "/guru:oo": "\u0A13",
    "/guru:oosign": "\u0A4B",
    "/guru:pa": "\u0A2A",
    "/guru:pha": "\u0A2B",
    "/guru:ra": "\u0A30",
    "/guru:rra": "\u0A5C",
    "/guru:sa": "\u0A38",
    "/guru:seven": "\u0A6D",
    "/guru:sha": "\u0A36",
    "/guru:six": "\u0A6C",
    "/guru:ta": "\u0A24",
    "/guru:tha": "\u0A25",
    "/guru:three": "\u0A69",
    "/guru:tippi": "\u0A70",
    "/guru:tta": "\u0A1F",
    "/guru:ttha": "\u0A20",
    "/guru:two": "\u0A68",
    "/guru:u": "\u0A09",
    "/guru:udaatsign": "\u0A51",
    "/guru:ura": "\u0A73",
    "/guru:usign": "\u0A41",
    "/guru:uu": "\u0A0A",
    "/guru:uusign": "\u0A42",
    "/guru:va": "\u0A35",
    "/guru:virama": "\u0A4D",
    "/guru:visarga": "\u0A03",
    "/guru:ya": "\u0A2F",
    "/guru:yakashsign": "\u0A75",
    "/guru:za": "\u0A5B",
    "/guru:zero": "\u0A66",
    "/gyfullwidth": "\u33C9",
    "/gysquare": "\u33C9",
    "/h": "\u0068",
    "/h.inferior": "\u2095",
    "/haabkhasiancyrillic": "\u04A9",
    "/haabkhcyr": "\u04A9",
    "/haaltonearabic": "\u06C1",
    "/habengali": "\u09B9",
    "/hacirclekatakana": "\u32E9",
    "/hacyr": "\u0445",
    "/hadescendercyrillic": "\u04B3",
    "/hadeva": "\u0939",
    "/hafullwidth": "\u33CA",
    "/hagujarati": "\u0AB9",
    "/hagurmukhi": "\u0A39",
    "/hah": "\u062D",
    "/hah.fina": "\uFEA2",
    "/hah.init": "\uFEA3",
    "/hah.init_alefmaksura.fina": "\uFCFF",
    "/hah.init_jeem.fina": "\uFC17",
    "/hah.init_jeem.medi": "\uFCA9",
    "/hah.init_meem.fina": "\uFC18",
    "/hah.init_meem.medi": "\uFCAA",
    "/hah.init_yeh.fina": "\uFD00",
    "/hah.isol": "\uFEA1",
    "/hah.medi": "\uFEA4",
    "/hah.medi_alefmaksura.fina": "\uFD1B",
    "/hah.medi_jeem.medi_yeh.fina": "\uFDBF",
    "/hah.medi_meem.medi_alefmaksura.fina": "\uFD5B",
    "/hah.medi_meem.medi_yeh.fina": "\uFD5A",
    "/hah.medi_yeh.fina": "\uFD1C",
    "/hahDigitFourBelow": "\u077C",
    "/hahSmallTahAbove": "\u0772",
    "/hahSmallTahBelow": "\u076E",
    "/hahSmallTahTwoDots": "\u076F",
    "/hahThreeDotsUpBelow": "\u0758",
    "/hahTwoDotsAbove": "\u0757",
    "/haharabic": "\u062D",
    "/hahfinalarabic": "\uFEA2",
    "/hahhamza": "\u0681",
    "/hahinitialarabic": "\uFEA3",
    "/hahiragana": "\u306F",
    "/hahmedialarabic": "\uFEA4",
    "/hahookcyr": "\u04FD",
    "/hahthreedotsabove": "\u0685",
    "/hahtwodotsvertical": "\u0682",
    "/haircut": "\u1F487",
    "/hairspace": "\u200A",
    "/haitusquare": "\u332A",
    "/hakatakana": "\u30CF",
    "/hakatakanahalfwidth": "\uFF8A",
    "/halantgurmukhi": "\u0A4D",
    "/halfcircleleftblack": "\u25D6",
    "/halfcirclerightblack": "\u25D7",
    "/hamburger": "\u1F354",
    "/hammer": "\u1F528",
    "/hammerAndWrench": "\u1F6E0",
    "/hammerpick": "\u2692",
    "/hammersickle": "\u262D",
    "/hamsterFace": "\u1F439",
    "/hamza": "\u0621",
    "/hamzaIsol": "\uFE80",
    "/hamzaabove": "\u0654",
    "/hamzaarabic": "\u0621",
    "/hamzabelow": "\u0655",
    "/hamzadammaarabic": "\u0621",
    "/hamzadammatanarabic": "\u0621",
    "/hamzafathaarabic": "\u0621",
    "/hamzafathatanarabic": "\u0621",
    "/hamzalowarabic": "\u0621",
    "/hamzalowkasraarabic": "\u0621",
    "/hamzalowkasratanarabic": "\u0621",
    "/hamzasukunarabic": "\u0621",
    "/handbag": "\u1F45C",
    "/handtailfishhookturned": "\u02AF",
    "/hangulchieuchaparen": "\u3217",
    "/hangulchieuchparen": "\u3209",
    "/hangulcieucaparen": "\u3216",
    "/hangulcieucparen": "\u3208",
    "/hangulcieucuparen": "\u321C",
    "/hanguldottonemarkdbl": "\u302F",
    "/hangulfiller": "\u3164",
    "/hangulhieuhaparen": "\u321B",
    "/hangulhieuhparen": "\u320D",
    "/hangulieungaparen": "\u3215",
    "/hangulieungparen": "\u3207",
    "/hangulkhieukhaparen": "\u3218",
    "/hangulkhieukhparen": "\u320A",
    "/hangulkiyeokaparen": "\u320E",
    "/hangulkiyeokparen": "\u3200",
    "/hangulmieumaparen": "\u3212",
    "/hangulmieumparen": "\u3204",
    "/hangulnieunaparen": "\u320F",
    "/hangulnieunparen": "\u3201",
    "/hangulphieuphaparen": "\u321A",
    "/hangulphieuphparen": "\u320C",
    "/hangulpieupaparen": "\u3213",
    "/hangulpieupparen": "\u3205",
    "/hangulrieulaparen": "\u3211",
    "/hangulrieulparen": "\u3203",
    "/hangulsingledottonemark": "\u302E",
    "/hangulsiosaparen": "\u3214",
    "/hangulsiosparen": "\u3206",
    "/hangulthieuthaparen": "\u3219",
    "/hangulthieuthparen": "\u320B",
    "/hangultikeutaparen": "\u3210",
    "/hangultikeutparen": "\u3202",
    "/happyPersonRaisingOneHand": "\u1F64B",
    "/hardDisk": "\u1F5B4",
    "/hardcyr": "\u044A",
    "/hardsigncyrillic": "\u044A",
    "/harpoondownbarbleft": "\u21C3",
    "/harpoondownbarbright": "\u21C2",
    "/harpoonleftbarbdown": "\u21BD",
    "/harpoonleftbarbup": "\u21BC",
    "/harpoonrightbarbdown": "\u21C1",
    "/harpoonrightbarbup": "\u21C0",
    "/harpoonupbarbleft": "\u21BF",
    "/harpoonupbarbright": "\u21BE",
    "/hasquare": "\u33CA",
    "/hastrokecyr": "\u04FF",
    "/hatafPatah:hb": "\u05B2",
    "/hatafQamats:hb": "\u05B3",
    "/hatafSegol:hb": "\u05B1",
    "/hatafpatah": "\u05B2",
    "/hatafpatah16": "\u05B2",
    "/hatafpatah23": "\u05B2",
    "/hatafpatah2f": "\u05B2",
    "/hatafpatahhebrew": "\u05B2",
    "/hatafpatahnarrowhebrew": "\u05B2",
    "/hatafpatahquarterhebrew": "\u05B2",
    "/hatafpatahwidehebrew": "\u05B2",
    "/hatafqamats": "\u05B3",
    "/hatafqamats1b": "\u05B3",
    "/hatafqamats28": "\u05B3",
    "/hatafqamats34": "\u05B3",
    "/hatafqamatshebrew": "\u05B3",
    "/hatafqamatsnarrowhebrew": "\u05B3",
    "/hatafqamatsquarterhebrew": "\u05B3",
    "/hatafqamatswidehebrew": "\u05B3",
    "/hatafsegol": "\u05B1",
    "/hatafsegol17": "\u05B1",
    "/hatafsegol24": "\u05B1",
    "/hatafsegol30": "\u05B1",
    "/hatafsegolhebrew": "\u05B1",
    "/hatafsegolnarrowhebrew": "\u05B1",
    "/hatafsegolquarterhebrew": "\u05B1",
    "/hatafsegolwidehebrew": "\u05B1",
    "/hatchingChick": "\u1F423",
    "/haveideographiccircled": "\u3292",
    "/haveideographicparen": "\u3232",
    "/hbar": "\u0127",
    "/hbopomofo": "\u310F",
    "/hbrevebelow": "\u1E2B",
    "/hcaron": "\u021F",
    "/hcedilla": "\u1E29",
    "/hcircle": "\u24D7",
    "/hcircumflex": "\u0125",
    "/hcsquare": "\u1F1A6",
    "/hdescender": "\u2C68",
    "/hdieresis": "\u1E27",
    "/hdot": "\u1E23",
    "/hdotaccent": "\u1E23",
    "/hdotbelow": "\u1E25",
    "/hdrsquare": "\u1F1A7",
    "/he": "\u05D4",
    "/he:hb": "\u05D4",
    "/headphone": "\u1F3A7",
    "/headstonegraveyard": "\u26FC",
    "/hearNoEvilMonkey": "\u1F649",
    "/heart": "\u2665",
    "/heartArrow": "\u1F498",
    "/heartDecoration": "\u1F49F",
    "/heartRibbon": "\u1F49D",
    "/heartTipOnTheLeft": "\u1F394",
    "/heartblack": "\u2665",
    "/heartsuitblack": "\u2665",
    "/heartsuitwhite": "\u2661",
    "/heartwhite": "\u2661",
    "/heavyDollarSign": "\u1F4B2",
    "/heavyLatinCross": "\u1F547",
    "/heavydbldashhorz": "\u254D",
    "/heavydbldashvert": "\u254F",
    "/heavydn": "\u257B",
    "/heavydnhorz": "\u2533",
    "/heavydnleft": "\u2513",
    "/heavydnright": "\u250F",
    "/heavyhorz": "\u2501",
    "/heavyleft": "\u2578",
    "/heavyleftlightright": "\u257E",
    "/heavyquaddashhorz": "\u2509",
    "/heavyquaddashvert": "\u250B",
    "/heavyright": "\u257A",
    "/heavytrpldashhorz": "\u2505",
    "/heavytrpldashvert": "\u2507",
    "/heavyup": "\u2579",
    "/heavyuphorz": "\u253B",
    "/heavyupleft": "\u251B",
    "/heavyuplightdn": "\u257F",
    "/heavyupright": "\u2517",
    "/heavyvert": "\u2503",
    "/heavyverthorz": "\u254B",
    "/heavyvertleft": "\u252B",
    "/heavyvertright": "\u2523",
    "/hecirclekatakana": "\u32EC",
    "/hedagesh": "\uFB34",
    "/hedageshhebrew": "\uFB34",
    "/hedinterlacedpentagramleft": "\u26E6",
    "/hedinterlacedpentagramright": "\u26E5",
    "/heh": "\u0647",
    "/heh.fina": "\uFEEA",
    "/heh.init": "\uFEEB",
    "/heh.init_alefmaksura.fina": "\uFC53",
    "/heh.init_jeem.fina": "\uFC51",
    "/heh.init_jeem.medi": "\uFCD7",
    "/heh.init_meem.fina": "\uFC52",
    "/heh.init_meem.medi": "\uFCD8",
    "/heh.init_meem.medi_jeem.medi": "\uFD93",
    "/heh.init_meem.medi_meem.medi": "\uFD94",
    "/heh.init_superscriptalef.medi": "\uFCD9",
    "/heh.init_yeh.fina": "\uFC54",
    "/heh.isol": "\uFEE9",
    "/heh.medi": "\uFEEC",
    "/hehaltonearabic": "\u06C1",
    "/heharabic": "\u0647",
    "/hehdoachashmee": "\u06BE",
    "/hehdoachashmee.fina": "\uFBAB",
    "/hehdoachashmee.init": "\uFBAC",
    "/hehdoachashmee.isol": "\uFBAA",
    "/hehdoachashmee.medi": "\uFBAD",
    "/hehebrew": "\u05D4",
    "/hehfinalaltonearabic": "\uFBA7",
    "/hehfinalalttwoarabic": "\uFEEA",
    "/hehfinalarabic": "\uFEEA",
    "/hehgoal": "\u06C1",
    "/hehgoal.fina": "\uFBA7",
    "/hehgoal.init": "\uFBA8",
    "/hehgoal.isol": "\uFBA6",
    "/hehgoal.medi": "\uFBA9",
    "/hehgoalhamza": "\u06C2",
    "/hehhamzaabovefinalarabic": "\uFBA5",
    "/hehhamzaaboveisolatedarabic": "\uFBA4",
    "/hehinitialaltonearabic": "\uFBA8",
    "/hehinitialarabic": "\uFEEB",
    "/hehinvertedV": "\u06FF",
    "/hehiragana": "\u3078",
    "/hehmedialaltonearabic": "\uFBA9",
    "/hehmedialarabic": "\uFEEC",
    "/hehyeh": "\u06C0",
    "/hehyeh.fina": "\uFBA5",
    "/hehyeh.isol": "\uFBA4",
    "/heiseierasquare": "\u337B",
    "/hekatakana": "\u30D8",
    "/hekatakanahalfwidth": "\uFF8D",
    "/hekutaarusquare": "\u3336",
    "/helicopter": "\u1F681",
    "/helm": "\u2388",
    "/helmetcrosswhite": "\u26D1",
    "/heng": "\uA727",
    "/henghook": "\u0267",
    "/herb": "\u1F33F",
    "/hermitianconjugatematrix": "\u22B9",
    "/herutusquare": "\u3339",
    "/het": "\u05D7",
    "/het:hb": "\u05D7",
    "/heta": "\u0371",
    "/hethebrew": "\u05D7",
    "/hewide:hb": "\uFB23",
    "/hewithmapiq:hb": "\uFB34",
    "/hfishhookturned": "\u02AE",
    "/hhalf": "\u2C76",
    "/hhook": "\u0266",
    "/hhooksuperior": "\u02B1",
    "/hhooksupmod": "\u02B1",
    "/hi-ressquare": "\u1F1A8",
    "/hibiscus": "\u1F33A",
    "/hicirclekatakana": "\u32EA",
    "/hieuhacirclekorean": "\u327B",
    "/hieuhaparenkorean": "\u321B",
    "/hieuhcirclekorean": "\u326D",
    "/hieuhkorean": "\u314E",
    "/hieuhparenkorean": "\u320D",
    "/high-heeledShoe": "\u1F460",
    "/highBrightness": "\u1F506",
    "/highSpeedTrain": "\u1F684",
    "/highSpeedTrainWithBulletNose": "\u1F685",
    "/highhamza": "\u0674",
    "/highideographiccircled": "\u32A4",
    "/highvoltage": "\u26A1",
    "/hihiragana": "\u3072",
    "/hikatakana": "\u30D2",
    "/hikatakanahalfwidth": "\uFF8B",
    "/hira:a": "\u3042",
    "/hira:asmall": "\u3041",
    "/hira:ba": "\u3070",
    "/hira:be": "\u3079",
    "/hira:bi": "\u3073",
    "/hira:bo": "\u307C",
    "/hira:bu": "\u3076",
    "/hira:da": "\u3060",
    "/hira:de": "\u3067",
    "/hira:di": "\u3062",
    "/hira:digraphyori": "\u309F",
    "/hira:do": "\u3069",
    "/hira:du": "\u3065",
    "/hira:e": "\u3048",
    "/hira:esmall": "\u3047",
    "/hira:ga": "\u304C",
    "/hira:ge": "\u3052",
    "/hira:gi": "\u304E",
    "/hira:go": "\u3054",
    "/hira:gu": "\u3050",
    "/hira:ha": "\u306F",
    "/hira:he": "\u3078",
    "/hira:hi": "\u3072",
    "/hira:ho": "\u307B",
    "/hira:hu": "\u3075",
    "/hira:i": "\u3044",
    "/hira:ismall": "\u3043",
    "/hira:iterationhiragana": "\u309D",
    "/hira:ka": "\u304B",
    "/hira:kasmall": "\u3095",
    "/hira:ke": "\u3051",
    "/hira:kesmall": "\u3096",
    "/hira:ki": "\u304D",
    "/hira:ko": "\u3053",
    "/hira:ku": "\u304F",
    "/hira:ma": "\u307E",
    "/hira:me": "\u3081",
    "/hira:mi": "\u307F",
    "/hira:mo": "\u3082",
    "/hira:mu": "\u3080",
    "/hira:n": "\u3093",
    "/hira:na": "\u306A",
    "/hira:ne": "\u306D",
    "/hira:ni": "\u306B",
    "/hira:no": "\u306E",
    "/hira:nu": "\u306C",
    "/hira:o": "\u304A",
    "/hira:osmall": "\u3049",
    "/hira:pa": "\u3071",
    "/hira:pe": "\u307A",
    "/hira:pi": "\u3074",
    "/hira:po": "\u307D",
    "/hira:pu": "\u3077",
    "/hira:ra": "\u3089",
    "/hira:re": "\u308C",
    "/hira:ri": "\u308A",
    "/hira:ro": "\u308D",
    "/hira:ru": "\u308B",
    "/hira:sa": "\u3055",
    "/hira:se": "\u305B",
    "/hira:semivoicedmarkkana": "\u309C",
    "/hira:semivoicedmarkkanacmb": "\u309A",
    "/hira:si": "\u3057",
    "/hira:so": "\u305D",
    "/hira:su": "\u3059",
    "/hira:ta": "\u305F",
    "/hira:te": "\u3066",
    "/hira:ti": "\u3061",
    "/hira:to": "\u3068",
    "/hira:tu": "\u3064",
    "/hira:tusmall": "\u3063",
    "/hira:u": "\u3046",
    "/hira:usmall": "\u3045",
    "/hira:voicediterationhiragana": "\u309E",
    "/hira:voicedmarkkana": "\u309B",
    "/hira:voicedmarkkanacmb": "\u3099",
    "/hira:vu": "\u3094",
    "/hira:wa": "\u308F",
    "/hira:wasmall": "\u308E",
    "/hira:we": "\u3091",
    "/hira:wi": "\u3090",
    "/hira:wo": "\u3092",
    "/hira:ya": "\u3084",
    "/hira:yasmall": "\u3083",
    "/hira:yo": "\u3088",
    "/hira:yosmall": "\u3087",
    "/hira:yu": "\u3086",
    "/hira:yusmall": "\u3085",
    "/hira:za": "\u3056",
    "/hira:ze": "\u305C",
    "/hira:zi": "\u3058",
    "/hira:zo": "\u305E",
    "/hira:zu": "\u305A",
    "/hiriq": "\u05B4",
    "/hiriq14": "\u05B4",
    "/hiriq21": "\u05B4",
    "/hiriq2d": "\u05B4",
    "/hiriq:hb": "\u05B4",
    "/hiriqhebrew": "\u05B4",
    "/hiriqnarrowhebrew": "\u05B4",
    "/hiriqquarterhebrew": "\u05B4",
    "/hiriqwidehebrew": "\u05B4",
    "/historicsite": "\u26EC",
    "/hlinebelow": "\u1E96",
    "/hmonospace": "\uFF48",
    "/hoarmenian": "\u0570",
    "/hocho": "\u1F52A",
    "/hocirclekatakana": "\u32ED",
    "/hohipthai": "\u0E2B",
    "/hohiragana": "\u307B",
    "/hokatakana": "\u30DB",
    "/hokatakanahalfwidth": "\uFF8E",
    "/holam": "\u05B9",
    "/holam19": "\u05B9",
    "/holam26": "\u05B9",
    "/holam32": "\u05B9",
    "/holam:hb": "\u05B9",
    "/holamHaser:hb": "\u05BA",
    "/holamhebrew": "\u05B9",
    "/holamnarrowhebrew": "\u05B9",
    "/holamquarterhebrew": "\u05B9",
    "/holamwidehebrew": "\u05B9",
    "/hole": "\u1F573",
    "/homotic": "\u223B",
    "/honeyPot": "\u1F36F",
    "/honeybee": "\u1F41D",
    "/honokhukthai": "\u0E2E",
    "/honsquare": "\u333F",
    "/hook": "\u2440",
    "/hookabovecomb": "\u0309",
    "/hookcmb": "\u0309",
    "/hookpalatalizedbelowcmb": "\u0321",
    "/hookretroflexbelowcmb": "\u0322",
    "/hoonsquare": "\u3342",
    "/hoorusquare": "\u3341",
    "/horicoptic": "\u03E9",
    "/horizontalTrafficLight": "\u1F6A5",
    "/horizontalbar": "\u2015",
    "/horizontalbarwhitearrowonpedestalup": "\u21EC",
    "/horizontalmalestroke": "\u26A9",
    "/horncmb": "\u031B",
    "/horse": "\u1F40E",
    "/horseFace": "\u1F434",
    "/horseRacing": "\u1F3C7",
    "/hospital": "\u1F3E5",
    "/hotDog": "\u1F32D",
    "/hotPepper": "\u1F336",
    "/hotbeverage": "\u2615",
    "/hotel": "\u1F3E8",
    "/hotsprings": "\u2668",
    "/hourglass": "\u231B",
    "/hourglassflowings": "\u23F3",
    "/house": "\u2302",
    "/houseBuilding": "\u1F3E0",
    "/houseBuildings": "\u1F3D8",
    "/houseGarden": "\u1F3E1",
    "/hpafullwidth": "\u3371",
    "/hpalatalhook": "\uA795",
    "/hparen": "\u24A3",
    "/hparenthesized": "\u24A3",
    "/hpfullwidth": "\u33CB",
    "/hryvnia": "\u20B4",
    "/hsuperior": "\u02B0",
    "/hsupmod": "\u02B0",
    "/hturned": "\u0265",
    "/htypeopencircuit": "\u238F",
    "/huaraddosquare": "\u3332",
    "/hucirclekatakana": "\u32EB",
    "/huhiragana": "\u3075",
    "/huiitosquare": "\u3333",
    "/hukatakana": "\u30D5",
    "/hukatakanahalfwidth": "\uFF8C",
    "/hundredPoints": "\u1F4AF",
    "/hundredthousandscmbcyr": "\u0488",
    "/hungarumlaut": "\u02DD",
    "/hungarumlautcmb": "\u030B",
    "/huransquare": "\u3335",
    "/hushedFace": "\u1F62F",
    "/hv": "\u0195",
    "/hwd:a": "\uFFC2",
    "/hwd:ae": "\uFFC3",
    "/hwd:blacksquare": "\uFFED",
    "/hwd:chieuch": "\uFFBA",
    "/hwd:cieuc": "\uFFB8",
    "/hwd:downwardsarrow": "\uFFEC",
    "/hwd:e": "\uFFC7",
    "/hwd:eo": "\uFFC6",
    "/hwd:eu": "\uFFDA",
    "/hwd:formslightvertical": "\uFFE8",
    "/hwd:hangulfiller": "\uFFA0",
    "/hwd:hieuh": "\uFFBE",
    "/hwd:i": "\uFFDC",
    "/hwd:ideographiccomma": "\uFF64",
    "/hwd:ideographicfullstop": "\uFF61",
    "/hwd:ieung": "\uFFB7",
    "/hwd:kata:a": "\uFF71",
    "/hwd:kata:asmall": "\uFF67",
    "/hwd:kata:e": "\uFF74",
    "/hwd:kata:esmall": "\uFF6A",
    "/hwd:kata:ha": "\uFF8A",
    "/hwd:kata:he": "\uFF8D",
    "/hwd:kata:hi": "\uFF8B",
    "/hwd:kata:ho": "\uFF8E",
    "/hwd:kata:hu": "\uFF8C",
    "/hwd:kata:i": "\uFF72",
    "/hwd:kata:ismall": "\uFF68",
    "/hwd:kata:ka": "\uFF76",
    "/hwd:kata:ke": "\uFF79",
    "/hwd:kata:ki": "\uFF77",
    "/hwd:kata:ko": "\uFF7A",
    "/hwd:kata:ku": "\uFF78",
    "/hwd:kata:ma": "\uFF8F",
    "/hwd:kata:me": "\uFF92",
    "/hwd:kata:mi": "\uFF90",
    "/hwd:kata:middledot": "\uFF65",
    "/hwd:kata:mo": "\uFF93",
    "/hwd:kata:mu": "\uFF91",
    "/hwd:kata:n": "\uFF9D",
    "/hwd:kata:na": "\uFF85",
    "/hwd:kata:ne": "\uFF88",
    "/hwd:kata:ni": "\uFF86",
    "/hwd:kata:no": "\uFF89",
    "/hwd:kata:nu": "\uFF87",
    "/hwd:kata:o": "\uFF75",
    "/hwd:kata:osmall": "\uFF6B",
    "/hwd:kata:prolongedkana": "\uFF70",
    "/hwd:kata:ra": "\uFF97",
    "/hwd:kata:re": "\uFF9A",
    "/hwd:kata:ri": "\uFF98",
    "/hwd:kata:ro": "\uFF9B",
    "/hwd:kata:ru": "\uFF99",
    "/hwd:kata:sa": "\uFF7B",
    "/hwd:kata:se": "\uFF7E",
    "/hwd:kata:semi-voiced": "\uFF9F",
    "/hwd:kata:si": "\uFF7C",
    "/hwd:kata:so": "\uFF7F",
    "/hwd:kata:su": "\uFF7D",
    "/hwd:kata:ta": "\uFF80",
    "/hwd:kata:te": "\uFF83",
    "/hwd:kata:ti": "\uFF81",
    "/hwd:kata:to": "\uFF84",
    "/hwd:kata:tu": "\uFF82",
    "/hwd:kata:tusmall": "\uFF6F",
    "/hwd:kata:u": "\uFF73",
    "/hwd:kata:usmall": "\uFF69",
    "/hwd:kata:voiced": "\uFF9E",
    "/hwd:kata:wa": "\uFF9C",
    "/hwd:kata:wo": "\uFF66",
    "/hwd:kata:ya": "\uFF94",
    "/hwd:kata:yasmall": "\uFF6C",
    "/hwd:kata:yo": "\uFF96",
    "/hwd:kata:yosmall": "\uFF6E",
    "/hwd:kata:yu": "\uFF95",
    "/hwd:kata:yusmall": "\uFF6D",
    "/hwd:khieukh": "\uFFBB",
    "/hwd:kiyeok": "\uFFA1",
    "/hwd:kiyeoksios": "\uFFA3",
    "/hwd:leftcornerbracket": "\uFF62",
    "/hwd:leftwardsarrow": "\uFFE9",
    "/hwd:mieum": "\uFFB1",
    "/hwd:nieun": "\uFFA4",
    "/hwd:nieuncieuc": "\uFFA5",
    "/hwd:nieunhieuh": "\uFFA6",
    "/hwd:o": "\uFFCC",
    "/hwd:oe": "\uFFCF",
    "/hwd:phieuph": "\uFFBD",
    "/hwd:pieup": "\uFFB2",
    "/hwd:pieupsios": "\uFFB4",
    "/hwd:rieul": "\uFFA9",
    "/hwd:rieulhieuh": "\uFFB0",
    "/hwd:rieulkiyeok": "\uFFAA",
    "/hwd:rieulmieum": "\uFFAB",
    "/hwd:rieulphieuph": "\uFFAF",
    "/hwd:rieulpieup": "\uFFAC",
    "/hwd:rieulsios": "\uFFAD",
    "/hwd:rieulthieuth": "\uFFAE",
    "/hwd:rightcornerbracket": "\uFF63",
    "/hwd:rightwardsarrow": "\uFFEB",
    "/hwd:sios": "\uFFB5",
    "/hwd:ssangcieuc": "\uFFB9",
    "/hwd:ssangkiyeok": "\uFFA2",
    "/hwd:ssangpieup": "\uFFB3",
    "/hwd:ssangsios": "\uFFB6",
    "/hwd:ssangtikeut": "\uFFA8",
    "/hwd:thieuth": "\uFFBC",
    "/hwd:tikeut": "\uFFA7",
    "/hwd:u": "\uFFD3",
    "/hwd:upwardsarrow": "\uFFEA",
    "/hwd:wa": "\uFFCD",
    "/hwd:wae": "\uFFCE",
    "/hwd:we": "\uFFD5",
    "/hwd:weo": "\uFFD4",
    "/hwd:whitecircle": "\uFFEE",
    "/hwd:wi": "\uFFD6",
    "/hwd:ya": "\uFFC4",
    "/hwd:yae": "\uFFC5",
    "/hwd:ye": "\uFFCB",
    "/hwd:yeo": "\uFFCA",
    "/hwd:yi": "\uFFDB",
    "/hwd:yo": "\uFFD2",
    "/hwd:yu": "\uFFD7",
    "/hyphen": "\u002D",
    "/hyphenationpoint": "\u2027",
    "/hyphenbullet": "\u2043",
    "/hyphendbl": "\u2E40",
    "/hyphendbloblique": "\u2E17",
    "/hyphendieresis": "\u2E1A",
    "/hypheninferior": "\uF6E5",
    "/hyphenminus": "\u002D",
    "/hyphenmonospace": "\uFF0D",
    "/hyphensmall": "\uFE63",
    "/hyphensoft": "\u00AD",
    "/hyphensuperior": "\uF6E6",
    "/hyphentwo": "\u2010",
    "/hypodiastole": "\u2E12",
    "/hysteresis": "\u238E",
    "/hzfullwidth": "\u3390",
    "/i": "\u0069",
    "/i.superior": "\u2071",
    "/iacute": "\u00ED",
    "/iacyrillic": "\u044F",
    "/iaepigraphic": "\uA7FE",
    "/ibengali": "\u0987",
    "/ibopomofo": "\u3127",
    "/ibreve": "\u012D",
    "/icaron": "\u01D0",
    "/iceCream": "\u1F368",
    "/iceHockeyStickAndPuck": "\u1F3D2",
    "/iceskate": "\u26F8",
    "/icircle": "\u24D8",
    "/icirclekatakana": "\u32D1",
    "/icircumflex": "\u00EE",
    "/icyr": "\u0438",
    "/icyrillic": "\u0456",
    "/idblgrave": "\u0209",
    "/idblstruckitalic": "\u2148",
    "/ideographearthcircle": "\u328F",
    "/ideographfirecircle": "\u328B",
    "/ideographicallianceparen": "\u323F",
    "/ideographiccallparen": "\u323A",
    "/ideographiccentrecircle": "\u32A5",
    "/ideographicclose": "\u3006",
    "/ideographiccomma": "\u3001",
    "/ideographiccommaleft": "\uFF64",
    "/ideographiccongratulationparen": "\u3237",
    "/ideographiccorrectcircle": "\u32A3",
    "/ideographicdepartingtonemark": "\u302C",
    "/ideographicearthparen": "\u322F",
    "/ideographicenteringtonemark": "\u302D",
    "/ideographicenterpriseparen": "\u323D",
    "/ideographicexcellentcircle": "\u329D",
    "/ideographicfestivalparen": "\u3240",
    "/ideographicfinancialcircle": "\u3296",
    "/ideographicfinancialparen": "\u3236",
    "/ideographicfireparen": "\u322B",
    "/ideographichalffillspace": "\u303F",
    "/ideographichaveparen": "\u3232",
    "/ideographichighcircle": "\u32A4",
    "/ideographiciterationmark": "\u3005",
    "/ideographiclaborcircle": "\u3298",
    "/ideographiclaborparen": "\u3238",
    "/ideographicleftcircle": "\u32A7",
    "/ideographicleveltonemark": "\u302A",
    "/ideographiclowcircle": "\u32A6",
    "/ideographicmedicinecircle": "\u32A9",
    "/ideographicmetalparen": "\u322E",
    "/ideographicmoonparen": "\u322A",
    "/ideographicnameparen": "\u3234",
    "/ideographicperiod": "\u3002",
    "/ideographicprintcircle": "\u329E",
    "/ideographicreachparen": "\u3243",
    "/ideographicrepresentparen": "\u3239",
    "/ideographicresourceparen": "\u323E",
    "/ideographicrightcircle": "\u32A8",
    "/ideographicrisingtonemark": "\u302B",
    "/ideographicsecretcircle": "\u3299",
    "/ideographicselfparen": "\u3242",
    "/ideographicsocietyparen": "\u3233",
    "/ideographicspace": "\u3000",
    "/ideographicspecialparen": "\u3235",
    "/ideographicstockparen": "\u3231",
    "/ideographicstudyparen": "\u323B",
    "/ideographicsunparen": "\u3230",
    "/ideographicsuperviseparen": "\u323C",
    "/ideographictelegraphlinefeedseparatorsymbol": "\u3037",
    "/ideographictelegraphsymbolforhoureight": "\u3360",
    "/ideographictelegraphsymbolforhoureighteen": "\u336A",
    "/ideographictelegraphsymbolforhoureleven": "\u3363",
    "/ideographictelegraphsymbolforhourfifteen": "\u3367",
    "/ideographictelegraphsymbolforhourfive": "\u335D",
    "/ideographictelegraphsymbolforhourfour": "\u335C",
    "/ideographictelegraphsymbolforhourfourteen": "\u3366",
    "/ideographictelegraphsymbolforhournine": "\u3361",
    "/ideographictelegraphsymbolforhournineteen": "\u336B",
    "/ideographictelegraphsymbolforhourone": "\u3359",
    "/ideographictelegraphsymbolforhourseven": "\u335F",
    "/ideographictelegraphsymbolforhourseventeen": "\u3369",
    "/ideographictelegraphsymbolforhoursix": "\u335E",
    "/ideographictelegraphsymbolforhoursixteen": "\u3368",
    "/ideographictelegraphsymbolforhourten": "\u3362",
    "/ideographictelegraphsymbolforhourthirteen": "\u3365",
    "/ideographictelegraphsymbolforhourthree": "\u335B",
    "/ideographictelegraphsymbolforhourtwelve": "\u3364",
    "/ideographictelegraphsymbolforhourtwenty": "\u336C",
    "/ideographictelegraphsymbolforhourtwentyfour": "\u3370",
    "/ideographictelegraphsymbolforhourtwentyone": "\u336D",
    "/ideographictelegraphsymbolforhourtwentythree": "\u336F",
    "/ideographictelegraphsymbolforhourtwentytwo": "\u336E",
    "/ideographictelegraphsymbolforhourtwo": "\u335A",
    "/ideographictelegraphsymbolforhourzero": "\u3358",
    "/ideographicvariationindicator": "\u303E",
    "/ideographicwaterparen": "\u322C",
    "/ideographicwoodparen": "\u322D",
    "/ideographiczero": "\u3007",
    "/ideographmetalcircle": "\u328E",
    "/ideographmooncircle": "\u328A",
    "/ideographnamecircle": "\u3294",
    "/ideographsuncircle": "\u3290",
    "/ideographwatercircle": "\u328C",
    "/ideographwoodcircle": "\u328D",
    "/ideva": "\u0907",
    "/idieresis": "\u00EF",
    "/idieresisacute": "\u1E2F",
    "/idieresiscyr": "\u04E5",
    "/idieresiscyrillic": "\u04E5",
    "/idotbelow": "\u1ECB",
    "/idsquare": "\u1F194",
    "/iebrevecyr": "\u04D7",
    "/iebrevecyrillic": "\u04D7",
    "/iecyr": "\u0435",
    "/iecyrillic": "\u0435",
    "/iegravecyr": "\u0450",
    "/iepigraphicsideways": "\uA7F7",
    "/ieungacirclekorean": "\u3275",
    "/ieungaparenkorean": "\u3215",
    "/ieungcirclekorean": "\u3267",
    "/ieungkorean": "\u3147",
    "/ieungparenkorean": "\u3207",
    "/ieungucirclekorean": "\u327E",
    "/igrave": "\u00EC",
    "/igravecyr": "\u045D",
    "/igravedbl": "\u0209",
    "/igujarati": "\u0A87",
    "/igurmukhi": "\u0A07",
    "/ihiragana": "\u3044",
    "/ihoi": "\u1EC9",
    "/ihookabove": "\u1EC9",
    "/iibengali": "\u0988",
    "/iicyrillic": "\u0438",
    "/iideva": "\u0908",
    "/iigujarati": "\u0A88",
    "/iigurmukhi": "\u0A08",
    "/iimatragurmukhi": "\u0A40",
    "/iinvertedbreve": "\u020B",
    "/iishortcyrillic": "\u0439",
    "/iivowelsignbengali": "\u09C0",
    "/iivowelsigndeva": "\u0940",
    "/iivowelsigngujarati": "\u0AC0",
    "/ij": "\u0133",
    "/ikatakana": "\u30A4",
    "/ikatakanahalfwidth": "\uFF72",
    "/ikawi": "\uA985",
    "/ikorean": "\u3163",
    "/ilde": "\u02DC",
    "/iluy:hb": "\u05AC",
    "/iluyhebrew": "\u05AC",
    "/imacron": "\u012B",
    "/imacroncyr": "\u04E3",
    "/imacroncyrillic": "\u04E3",
    "/image": "\u22B7",
    "/imageorapproximatelyequal": "\u2253",
    "/imatragurmukhi": "\u0A3F",
    "/imonospace": "\uFF49",
    "/imp": "\u1F47F",
    "/inboxTray": "\u1F4E5",
    "/incomingEnvelope": "\u1F4E8",
    "/increaseFontSize": "\u1F5DA",
    "/increment": "\u2206",
    "/indianrupee": "\u20B9",
    "/infinity": "\u221E",
    "/information": "\u2139",
    "/infullwidth": "\u33CC",
    "/inhibitarabicformshaping": "\u206C",
    "/inhibitsymmetricswapping": "\u206A",
    "/iniarmenian": "\u056B",
    "/iningusquare": "\u3304",
    "/inmationDeskPerson": "\u1F481",
    "/inputLatinCapitalLetters": "\u1F520",
    "/inputLatinLetters": "\u1F524",
    "/inputLatinSmallLetters": "\u1F521",
    "/inputNumbers": "\u1F522",
    "/inputS": "\u1F523",
    "/insertion": "\u2380",
    "/integral": "\u222B",
    "/integralbottom": "\u2321",
    "/integralbt": "\u2321",
    "/integralclockwise": "\u2231",
    "/integralcontour": "\u222E",
    "/integralcontouranticlockwise": "\u2233",
    "/integralcontourclockwise": "\u2232",
    "/integraldbl": "\u222C",
    "/integralex": "\uF8F5",
    "/integralextension": "\u23AE",
    "/integralsurface": "\u222F",
    "/integraltop": "\u2320",
    "/integraltp": "\u2320",
    "/integraltpl": "\u222D",
    "/integralvolume": "\u2230",
    "/intercalate": "\u22BA",
    "/interlinearanchor": "\uFFF9",
    "/interlinearseparator": "\uFFFA",
    "/interlinearterminator": "\uFFFB",
    "/interlockedfemalemale": "\u26A4",
    "/interrobang": "\u203D",
    "/interrobanginverted": "\u2E18",
    "/intersection": "\u2229",
    "/intersectionarray": "\u22C2",
    "/intersectiondbl": "\u22D2",
    "/intisquare": "\u3305",
    "/invbullet": "\u25D8",
    "/invcircle": "\u25D9",
    "/inverteddamma": "\u0657",
    "/invertedfork": "\u2443",
    "/invertedpentagram": "\u26E7",
    "/invertedundertie": "\u2054",
    "/invisibleplus": "\u2064",
    "/invisibleseparator": "\u2063",
    "/invisibletimes": "\u2062",
    "/invsmileface": "\u263B",
    "/iocyr": "\u0451",
    "/iocyrillic": "\u0451",
    "/iogonek": "\u012F",
    "/iota": "\u03B9",
    "/iotaacute": "\u1F77",
    "/iotaadscript": "\u1FBE",
    "/iotaasper": "\u1F31",
    "/iotaasperacute": "\u1F35",
    "/iotaaspergrave": "\u1F33",
    "/iotaaspertilde": "\u1F37",
    "/iotabreve": "\u1FD0",
    "/iotadieresis": "\u03CA",
    "/iotadieresisacute": "\u1FD3",
    "/iotadieresisgrave": "\u1FD2",
    "/iotadieresistilde": "\u1FD7",
    "/iotadieresistonos": "\u0390",
    "/iotafunc": "\u2373",
    "/iotagrave": "\u1F76",
    "/iotalatin": "\u0269",
    "/iotalenis": "\u1F30",
    "/iotalenisacute": "\u1F34",
    "/iotalenisgrave": "\u1F32",
    "/iotalenistilde": "\u1F36",
    "/iotasub": "\u037A",
    "/iotatilde": "\u1FD6",
    "/iotatonos": "\u03AF",
    "/iotaturned": "\u2129",
    "/iotaunderlinefunc": "\u2378",
    "/iotawithmacron": "\u1FD1",
    "/ipa:Ismall": "\u026A",
    "/ipa:alpha": "\u0251",
    "/ipa:ereversed": "\u0258",
    "/ipa:esh": "\u0283",
    "/ipa:gamma": "\u0263",
    "/ipa:glottalstop": "\u0294",
    "/ipa:gscript": "\u0261",
    "/ipa:iota": "\u0269",
    "/ipa:phi": "\u0278",
    "/ipa:rtail": "\u027D",
    "/ipa:schwa": "\u0259",
    "/ipa:upsilon": "\u028A",
    "/iparen": "\u24A4",
    "/iparenthesized": "\u24A4",
    "/irigurmukhi": "\u0A72",
    "/is": "\uA76D",
    "/isen-isenpada": "\uA9DF",
    "/ishortcyr": "\u0439",
    "/ishortsharptailcyr": "\u048B",
    "/ismallhiragana": "\u3043",
    "/ismallkatakana": "\u30A3",
    "/ismallkatakanahalfwidth": "\uFF68",
    "/issharbengali": "\u09FA",
    "/istroke": "\u0268",
    "/isuperior": "\uF6ED",
    "/itemideographiccircled": "\u32A0",
    "/iterationhiragana": "\u309D",
    "/iterationkatakana": "\u30FD",
    "/itilde": "\u0129",
    "/itildebelow": "\u1E2D",
    "/iubopomofo": "\u3129",
    "/iucyrillic": "\u044E",
    "/iufullwidth": "\u337A",
    "/iukrcyr": "\u0456",
    "/ivowelsignbengali": "\u09BF",
    "/ivowelsigndeva": "\u093F",
    "/ivowelsigngujarati": "\u0ABF",
    "/izakayaLantern": "\u1F3EE",
    "/izhitsacyr": "\u0475",
    "/izhitsacyrillic": "\u0475",
    "/izhitsadblgravecyrillic": "\u0477",
    "/izhitsagravedblcyr": "\u0477",
    "/j": "\u006A",
    "/j.inferior": "\u2C7C",
    "/jaarmenian": "\u0571",
    "/jabengali": "\u099C",
    "/jackOLantern": "\u1F383",
    "/jadeva": "\u091C",
    "/jagujarati": "\u0A9C",
    "/jagurmukhi": "\u0A1C",
    "/jamahaprana": "\uA999",
    "/januarytelegraph": "\u32C0",
    "/japaneseBeginner": "\u1F530",
    "/japaneseCastle": "\u1F3EF",
    "/japaneseDolls": "\u1F38E",
    "/japaneseGoblin": "\u1F47A",
    "/japaneseOgre": "\u1F479",
    "/japanesePostOffice": "\u1F3E3",
    "/japanesebank": "\u26FB",
    "/java:a": "\uA984",
    "/java:ai": "\uA98D",
    "/java:ba": "\uA9A7",
    "/java:ca": "\uA995",
    "/java:da": "\uA9A2",
    "/java:dda": "\uA99D",
    "/java:e": "\uA98C",
    "/java:eight": "\uA9D8",
    "/java:five": "\uA9D5",
    "/java:four": "\uA9D4",
    "/java:ga": "\uA992",
    "/java:ha": "\uA9B2",
    "/java:i": "\uA986",
    "/java:ii": "\uA987",
    "/java:ja": "\uA997",
    "/java:ka": "\uA98F",
    "/java:la": "\uA9AD",
    "/java:ma": "\uA9A9",
    "/java:na": "\uA9A4",
    "/java:nga": "\uA994",
    "/java:nine": "\uA9D9",
    "/java:nya": "\uA99A",
    "/java:o": "\uA98E",
    "/java:one": "\uA9D1",
    "/java:pa": "\uA9A5",
    "/java:ra": "\uA9AB",
    "/java:sa": "\uA9B1",
    "/java:seven": "\uA9D7",
    "/java:six": "\uA9D6",
    "/java:ta": "\uA9A0",
    "/java:three": "\uA9D3",
    "/java:tta": "\uA99B",
    "/java:two": "\uA9D2",
    "/java:u": "\uA988",
    "/java:wa": "\uA9AE",
    "/java:ya": "\uA9AA",
    "/java:zero": "\uA9D0",
    "/jbopomofo": "\u3110",
    "/jcaron": "\u01F0",
    "/jcircle": "\u24D9",
    "/jcircumflex": "\u0135",
    "/jcrossedtail": "\u029D",
    "/jdblstruckitalic": "\u2149",
    "/jdotlessstroke": "\u025F",
    "/jeans": "\u1F456",
    "/jecyr": "\u0458",
    "/jecyrillic": "\u0458",
    "/jeem": "\u062C",
    "/jeem.fina": "\uFE9E",
    "/jeem.init": "\uFE9F",
    "/jeem.init_alefmaksura.fina": "\uFD01",
    "/jeem.init_hah.fina": "\uFC15",
    "/jeem.init_hah.medi": "\uFCA7",
    "/jeem.init_meem.fina": "\uFC16",
    "/jeem.init_meem.medi": "\uFCA8",
    "/jeem.init_meem.medi_hah.medi": "\uFD59",
    "/jeem.init_yeh.fina": "\uFD02",
    "/jeem.isol": "\uFE9D",
    "/jeem.medi": "\uFEA0",
    "/jeem.medi_alefmaksura.fina": "\uFD1D",
    "/jeem.medi_hah.medi_alefmaksura.fina": "\uFDA6",
    "/jeem.medi_hah.medi_yeh.fina": "\uFDBE",
    "/jeem.medi_meem.medi_alefmaksura.fina": "\uFDA7",
    "/jeem.medi_meem.medi_hah.fina": "\uFD58",
    "/jeem.medi_meem.medi_yeh.fina": "\uFDA5",
    "/jeem.medi_yeh.fina": "\uFD1E",
    "/jeemabove": "\u06DA",
    "/jeemarabic": "\u062C",
    "/jeemfinalarabic": "\uFE9E",
    "/jeeminitialarabic": "\uFE9F",
    "/jeemmedialarabic": "\uFEA0",
    "/jeh": "\u0698",
    "/jeh.fina": "\uFB8B",
    "/jeh.isol": "\uFB8A",
    "/jeharabic": "\u0698",
    "/jehfinalarabic": "\uFB8B",
    "/jhabengali": "\u099D",
    "/jhadeva": "\u091D",
    "/jhagujarati": "\u0A9D",
    "/jhagurmukhi": "\u0A1D",
    "/jheharmenian": "\u057B",
    "/jis": "\u3004",
    "/jiterup": "\u2643",
    "/jmonospace": "\uFF4A",
    "/jotdiaeresisfunc": "\u2364",
    "/jotunderlinefunc": "\u235B",
    "/joystick": "\u1F579",
    "/jparen": "\u24A5",
    "/jparenthesized": "\u24A5",
    "/jstroke": "\u0249",
    "/jsuperior": "\u02B2",
    "/jsupmod": "\u02B2",
    "/jueuicircle": "\u327D",
    "/julytelegraph": "\u32C6",
    "/junetelegraph": "\u32C5",
    "/juno": "\u26B5",
    "/k": "\u006B",
    "/k.inferior": "\u2096",
    "/kaaba": "\u1F54B",
    "/kaaleutcyr": "\u051F",
    "/kabashkcyr": "\u04A1",
    "/kabashkircyrillic": "\u04A1",
    "/kabengali": "\u0995",
    "/kacirclekatakana": "\u32D5",
    "/kacute": "\u1E31",
    "/kacyr": "\u043A",
    "/kacyrillic": "\u043A",
    "/kadescendercyrillic": "\u049B",
    "/kadeva": "\u0915",
    "/kaf": "\u05DB",
    "/kaf.fina": "\uFEDA",
    "/kaf.init": "\uFEDB",
    "/kaf.init_alef.fina": "\uFC37",
    "/kaf.init_alefmaksura.fina": "\uFC3D",
    "/kaf.init_hah.fina": "\uFC39",
    "/kaf.init_hah.medi": "\uFCC5",
    "/kaf.init_jeem.fina": "\uFC38",
    "/kaf.init_jeem.medi": "\uFCC4",
    "/kaf.init_khah.fina": "\uFC3A",
    "/kaf.init_khah.medi": "\uFCC6",
    "/kaf.init_lam.fina": "\uFC3B",
    "/kaf.init_lam.medi": "\uFCC7",
    "/kaf.init_meem.fina": "\uFC3C",
    "/kaf.init_meem.medi": "\uFCC8",
    "/kaf.init_meem.medi_meem.medi": "\uFDC3",
    "/kaf.init_yeh.fina": "\uFC3E",
    "/kaf.isol": "\uFED9",
    "/kaf.medi": "\uFEDC",
    "/kaf.medi_alef.fina": "\uFC80",
    "/kaf.medi_alefmaksura.fina": "\uFC83",
    "/kaf.medi_lam.fina": "\uFC81",
    "/kaf.medi_lam.medi": "\uFCEB",
    "/kaf.medi_meem.fina": "\uFC82",
    "/kaf.medi_meem.medi": "\uFCEC",
    "/kaf.medi_meem.medi_meem.fina": "\uFDBB",
    "/kaf.medi_meem.medi_yeh.fina": "\uFDB7",
    "/kaf.medi_yeh.fina": "\uFC84",
    "/kaf:hb": "\u05DB",
    "/kafTwoDotsAbove": "\u077F",
    "/kafarabic": "\u0643",
    "/kafdagesh": "\uFB3B",
    "/kafdageshhebrew": "\uFB3B",
    "/kafdotabove": "\u06AC",
    "/kaffinalarabic": "\uFEDA",
    "/kafhebrew": "\u05DB",
    "/kafinitialarabic": "\uFEDB",
    "/kafmedialarabic": "\uFEDC",
    "/kafrafehebrew": "\uFB4D",
    "/kafring": "\u06AB",
    "/kafswash": "\u06AA",
    "/kafthreedotsbelow": "\u06AE",
    "/kafullwidth": "\u3384",
    "/kafwide:hb": "\uFB24",
    "/kafwithdagesh:hb": "\uFB3B",
    "/kafwithrafe:hb": "\uFB4D",
    "/kagujarati": "\u0A95",
    "/kagurmukhi": "\u0A15",
    "/kahiragana": "\u304B",
    "/kahookcyr": "\u04C4",
    "/kahookcyrillic": "\u04C4",
    "/kairisquare": "\u330B",
    "/kaisymbol": "\u03D7",
    "/kakatakana": "\u30AB",
    "/kakatakanahalfwidth": "\uFF76",
    "/kamurda": "\uA991",
    "/kappa": "\u03BA",
    "/kappa.math": "\u03F0",
    "/kappasymbolgreek": "\u03F0",
    "/kapyeounmieumkorean": "\u3171",
    "/kapyeounphieuphkorean": "\u3184",
    "/kapyeounpieupkorean": "\u3178",
    "/kapyeounssangpieupkorean": "\u3179",
    "/karattosquare": "\u330C",
    "/karoriisquare": "\u330D",
    "/kasasak": "\uA990",
    "/kashida": "\u0640",
    "/kashidaFina": "\uFE73",
    "/kashidaautoarabic": "\u0640",
    "/kashidaautonosidebearingarabic": "\u0640",
    "/kashmiriyeh": "\u0620",
    "/kasmallkatakana": "\u30F5",
    "/kasquare": "\u3384",
    "/kasra": "\u0650",
    "/kasraIsol": "\uFE7A",
    "/kasraMedi": "\uFE7B",
    "/kasraarabic": "\u0650",
    "/kasrasmall": "\u061A",
    "/kasratan": "\u064D",
    "/kasratanIsol": "\uFE74",
    "/kasratanarabic": "\u064D",
    "/kastrokecyr": "\u049F",
    "/kastrokecyrillic": "\u049F",
    "/kata:a": "\u30A2",
    "/kata:asmall": "\u30A1",
    "/kata:ba": "\u30D0",
    "/kata:be": "\u30D9",
    "/kata:bi": "\u30D3",
    "/kata:bo": "\u30DC",
    "/kata:bu": "\u30D6",
    "/kata:da": "\u30C0",
    "/kata:de": "\u30C7",
    "/kata:di": "\u30C2",
    "/kata:digraphkoto": "\u30FF",
    "/kata:do": "\u30C9",
    "/kata:doublehyphenkana": "\u30A0",
    "/kata:du": "\u30C5",
    "/kata:e": "\u30A8",
    "/kata:esmall": "\u30A7",
    "/kata:ga": "\u30AC",
    "/kata:ge": "\u30B2",
    "/kata:gi": "\u30AE",
    "/kata:go": "\u30B4",
    "/kata:gu": "\u30B0",
    "/kata:ha": "\u30CF",
    "/kata:he": "\u30D8",
    "/kata:hi": "\u30D2",
    "/kata:ho": "\u30DB",
    "/kata:hu": "\u30D5",
    "/kata:i": "\u30A4",
    "/kata:ismall": "\u30A3",
    "/kata:iteration": "\u30FD",
    "/kata:ka": "\u30AB",
    "/kata:kasmall": "\u30F5",
    "/kata:ke": "\u30B1",
    "/kata:kesmall": "\u30F6",
    "/kata:ki": "\u30AD",
    "/kata:ko": "\u30B3",
    "/kata:ku": "\u30AF",
    "/kata:ma": "\u30DE",
    "/kata:me": "\u30E1",
    "/kata:mi": "\u30DF",
    "/kata:middledot": "\u30FB",
    "/kata:mo": "\u30E2",
    "/kata:mu": "\u30E0",
    "/kata:n": "\u30F3",
    "/kata:na": "\u30CA",
    "/kata:ne": "\u30CD",
    "/kata:ni": "\u30CB",
    "/kata:no": "\u30CE",
    "/kata:nu": "\u30CC",
    "/kata:o": "\u30AA",
    "/kata:osmall": "\u30A9",
    "/kata:pa": "\u30D1",
    "/kata:pe": "\u30DA",
    "/kata:pi": "\u30D4",
    "/kata:po": "\u30DD",
    "/kata:prolongedkana": "\u30FC",
    "/kata:pu": "\u30D7",
    "/kata:ra": "\u30E9",
    "/kata:re": "\u30EC",
    "/kata:ri": "\u30EA",
    "/kata:ro": "\u30ED",
    "/kata:ru": "\u30EB",
    "/kata:sa": "\u30B5",
    "/kata:se": "\u30BB",
    "/kata:si": "\u30B7",
    "/kata:so": "\u30BD",
    "/kata:su": "\u30B9",
    "/kata:ta": "\u30BF",
    "/kata:te": "\u30C6",
    "/kata:ti": "\u30C1",
    "/kata:to": "\u30C8",
    "/kata:tu": "\u30C4",
    "/kata:tusmall": "\u30C3",
    "/kata:u": "\u30A6",
    "/kata:usmall": "\u30A5",
    "/kata:va": "\u30F7",
    "/kata:ve": "\u30F9",
    "/kata:vi": "\u30F8",
    "/kata:vo": "\u30FA",
    "/kata:voicediteration": "\u30FE",
    "/kata:vu": "\u30F4",
    "/kata:wa": "\u30EF",
    "/kata:wasmall": "\u30EE",
    "/kata:we": "\u30F1",
    "/kata:wi": "\u30F0",
    "/kata:wo": "\u30F2",
    "/kata:ya": "\u30E4",
    "/kata:yasmall": "\u30E3",
    "/kata:yo": "\u30E8",
    "/kata:yosmall": "\u30E7",
    "/kata:yu": "\u30E6",
    "/kata:yusmall": "\u30E5",
    "/kata:za": "\u30B6",
    "/kata:ze": "\u30BC",
    "/kata:zi": "\u30B8",
    "/kata:zo": "\u30BE",
    "/kata:zu": "\u30BA",
    "/katahiraprolongmarkhalfwidth": "\uFF70",
    "/katailcyr": "\u049B",
    "/kaverticalstrokecyr": "\u049D",
    "/kaverticalstrokecyrillic": "\u049D",
    "/kavykainvertedlow": "\u2E45",
    "/kavykalow": "\u2E47",
    "/kavykawithdotlow": "\u2E48",
    "/kavykawithkavykaaboveinvertedlow": "\u2E46",
    "/kbfullwidth": "\u3385",
    "/kbopomofo": "\u310E",
    "/kcalfullwidth": "\u3389",
    "/kcalsquare": "\u3389",
    "/kcaron": "\u01E9",
    "/kcedilla": "\u0137",
    "/kcircle": "\u24DA",
    "/kcommaaccent": "\u0137",
    "/kdescender": "\u2C6A",
    "/kdiagonalstroke": "\uA743",
    "/kdotbelow": "\u1E33",
    "/kecirclekatakana": "\u32D8",
    "/keesusquare": "\u331C",
    "/keharmenian": "\u0584",
    "/keheh": "\u06A9",
    "/keheh.fina": "\uFB8F",
    "/keheh.init": "\uFB90",
    "/keheh.isol": "\uFB8E",
    "/keheh.medi": "\uFB91",
    "/kehehDotAbove": "\u0762",
    "/kehehThreeDotsAbove": "\u0763",
    "/kehehThreeDotsUpBelow": "\u0764",
    "/kehehthreedotsbelow": "\u063C",
    "/kehehtwodotsabove": "\u063B",
    "/kehiragana": "\u3051",
    "/kekatakana": "\u30B1",
    "/kekatakanahalfwidth": "\uFF79",
    "/kelvin": "\u212A",
    "/kenarmenian": "\u056F",
    "/keretconsonant": "\uA9BD",
    "/kesmallkatakana": "\u30F6",
    "/key": "\u1F511",
    "/keyboardAndMouse": "\u1F5A6",
    "/keycapTen": "\u1F51F",
    "/kgfullwidth": "\u338F",
    "/kgreenlandic": "\u0138",
    "/khabengali": "\u0996",
    "/khacyrillic": "\u0445",
    "/khadeva": "\u0916",
    "/khagujarati": "\u0A96",
    "/khagurmukhi": "\u0A16",
    "/khah": "\u062E",
    "/khah.fina": "\uFEA6",
    "/khah.init": "\uFEA7",
    "/khah.init_alefmaksura.fina": "\uFD03",
    "/khah.init_hah.fina": "\uFC1A",
    "/khah.init_jeem.fina": "\uFC19",
    "/khah.init_jeem.medi": "\uFCAB",
    "/khah.init_meem.fina": "\uFC1B",
    "/khah.init_meem.medi": "\uFCAC",
    "/khah.init_yeh.fina": "\uFD04",
    "/khah.isol": "\uFEA5",
    "/khah.medi": "\uFEA8",
    "/khah.medi_alefmaksura.fina": "\uFD1F",
    "/khah.medi_yeh.fina": "\uFD20",
    "/khaharabic": "\u062E",
    "/khahfinalarabic": "\uFEA6",
    "/khahinitialarabic": "\uFEA7",
    "/khahmedialarabic": "\uFEA8",
    "/kheicoptic": "\u03E7",
    "/khhadeva": "\u0959",
    "/khhagurmukhi": "\u0A59",
    "/khieukhacirclekorean": "\u3278",
    "/khieukhaparenkorean": "\u3218",
    "/khieukhcirclekorean": "\u326A",
    "/khieukhkorean": "\u314B",
    "/khieukhparenkorean": "\u320A",
    "/khokhaithai": "\u0E02",
    "/khokhonthai": "\u0E05",
    "/khokhuatthai": "\u0E03",
    "/khokhwaithai": "\u0E04",
    "/khomutthai": "\u0E5B",
    "/khook": "\u0199",
    "/khorakhangthai": "\u0E06",
    "/khzfullwidth": "\u3391",
    "/khzsquare": "\u3391",
    "/kicirclekatakana": "\u32D6",
    "/kihiragana": "\u304D",
    "/kikatakana": "\u30AD",
    "/kikatakanahalfwidth": "\uFF77",
    "/kimono": "\u1F458",
    "/kindergartenideographiccircled": "\u3245",
    "/kingblack": "\u265A",
    "/kingwhite": "\u2654",
    "/kip": "\u20AD",
    "/kiroguramusquare": "\u3315",
    "/kiromeetorusquare": "\u3316",
    "/kirosquare": "\u3314",
    "/kirowattosquare": "\u3317",
    "/kiss": "\u1F48F",
    "/kissMark": "\u1F48B",
    "/kissingCatFaceWithClosedEyes": "\u1F63D",
    "/kissingFace": "\u1F617",
    "/kissingFaceWithClosedEyes": "\u1F61A",
    "/kissingFaceWithSmilingEyes": "\u1F619",
    "/kiyeokacirclekorean": "\u326E",
    "/kiyeokaparenkorean": "\u320E",
    "/kiyeokcirclekorean": "\u3260",
    "/kiyeokkorean": "\u3131",
    "/kiyeokparenkorean": "\u3200",
    "/kiyeoksioskorean": "\u3133",
    "/kjecyr": "\u045C",
    "/kjecyrillic": "\u045C",
    "/kkfullwidth": "\u33CD",
    "/klfullwidth": "\u3398",
    "/klinebelow": "\u1E35",
    "/klsquare": "\u3398",
    "/km2fullwidth": "\u33A2",
    "/km3fullwidth": "\u33A6",
    "/kmcapitalfullwidth": "\u33CE",
    "/kmcubedsquare": "\u33A6",
    "/kmfullwidth": "\u339E",
    "/kmonospace": "\uFF4B",
    "/kmsquaredsquare": "\u33A2",
    "/knda:a": "\u0C85",
    "/knda:aa": "\u0C86",
    "/knda:aasign": "\u0CBE",
    "/knda:ai": "\u0C90",
    "/knda:ailength": "\u0CD6",
    "/knda:aisign": "\u0CC8",
    "/knda:anusvara": "\u0C82",
    "/knda:au": "\u0C94",
    "/knda:ausign": "\u0CCC",
    "/knda:avagraha": "\u0CBD",
    "/knda:ba": "\u0CAC",
    "/knda:bha": "\u0CAD",
    "/knda:ca": "\u0C9A",
    "/knda:cha": "\u0C9B",
    "/knda:da": "\u0CA6",
    "/knda:dda": "\u0CA1",
    "/knda:ddha": "\u0CA2",
    "/knda:dha": "\u0CA7",
    "/knda:e": "\u0C8E",
    "/knda:ee": "\u0C8F",
    "/knda:eesign": "\u0CC7",
    "/knda:eight": "\u0CEE",
    "/knda:esign": "\u0CC6",
    "/knda:fa": "\u0CDE",
    "/knda:five": "\u0CEB",
    "/knda:four": "\u0CEA",
    "/knda:ga": "\u0C97",
    "/knda:gha": "\u0C98",
    "/knda:ha": "\u0CB9",
    "/knda:i": "\u0C87",
    "/knda:ii": "\u0C88",
    "/knda:iisign": "\u0CC0",
    "/knda:isign": "\u0CBF",
    "/knda:ja": "\u0C9C",
    "/knda:jha": "\u0C9D",
    "/knda:jihvamuliya": "\u0CF1",
    "/knda:ka": "\u0C95",
    "/knda:kha": "\u0C96",
    "/knda:la": "\u0CB2",
    "/knda:length": "\u0CD5",
    "/knda:lla": "\u0CB3",
    "/knda:llvocal": "\u0CE1",
    "/knda:llvocalsign": "\u0CE3",
    "/knda:lvocal": "\u0C8C",
    "/knda:lvocalsign": "\u0CE2",
    "/knda:ma": "\u0CAE",
    "/knda:na": "\u0CA8",
    "/knda:nga": "\u0C99",
    "/knda:nine": "\u0CEF",
    "/knda:nna": "\u0CA3",
    "/knda:nukta": "\u0CBC",
    "/knda:nya": "\u0C9E",
    "/knda:o": "\u0C92",
    "/knda:one": "\u0CE7",
    "/knda:oo": "\u0C93",
    "/knda:oosign": "\u0CCB",
    "/knda:osign": "\u0CCA",
    "/knda:pa": "\u0CAA",
    "/knda:pha": "\u0CAB",
    "/knda:ra": "\u0CB0",
    "/knda:rra": "\u0CB1",
    "/knda:rrvocal": "\u0CE0",
    "/knda:rrvocalsign": "\u0CC4",
    "/knda:rvocal": "\u0C8B",
    "/knda:rvocalsign": "\u0CC3",
    "/knda:sa": "\u0CB8",
    "/knda:seven": "\u0CED",
    "/knda:sha": "\u0CB6",
    "/knda:signcandrabindu": "\u0C81",
    "/knda:signspacingcandrabindu": "\u0C80",
    "/knda:six": "\u0CEC",
    "/knda:ssa": "\u0CB7",
    "/knda:ta": "\u0CA4",
    "/knda:tha": "\u0CA5",
    "/knda:three": "\u0CE9",
    "/knda:tta": "\u0C9F",
    "/knda:ttha": "\u0CA0",
    "/knda:two": "\u0CE8",
    "/knda:u": "\u0C89",
    "/knda:upadhmaniya": "\u0CF2",
    "/knda:usign": "\u0CC1",
    "/knda:uu": "\u0C8A",
    "/knda:uusign": "\u0CC2",
    "/knda:va": "\u0CB5",
    "/knda:virama": "\u0CCD",
    "/knda:visarga": "\u0C83",
    "/knda:ya": "\u0CAF",
    "/knda:zero": "\u0CE6",
    "/knightblack": "\u265E",
    "/knightwhite": "\u2658",
    "/ko:a": "\u314F",
    "/ko:ae": "\u3150",
    "/ko:aejungseong": "\u1162",
    "/ko:aeujungseong": "\u11A3",
    "/ko:ajungseong": "\u1161",
    "/ko:aojungseong": "\u1176",
    "/ko:araea": "\u318D",
    "/ko:araeae": "\u318E",
    "/ko:araeaeojungseong": "\u119F",
    "/ko:araeaijungseong": "\u11A1",
    "/ko:araeajungseong": "\u119E",
    "/ko:araeaujungseong": "\u11A0",
    "/ko:aujungseong": "\u1177",
    "/ko:ceongchieumchieuchchoseong": "\u1155",
    "/ko:ceongchieumcieucchoseong": "\u1150",
    "/ko:ceongchieumsioschoseong": "\u113E",
    "/ko:ceongchieumssangcieucchoseong": "\u1151",
    "/ko:ceongchieumssangsioschoseong": "\u113F",
    "/ko:chieuch": "\u314A",
    "/ko:chieuchchoseong": "\u110E",
    "/ko:chieuchhieuhchoseong": "\u1153",
    "/ko:chieuchjongseong": "\u11BE",
    "/ko:chieuchkhieukhchoseong": "\u1152",
    "/ko:chitueumchieuchchoseong": "\u1154",
    "/ko:chitueumcieucchoseong": "\u114E",
    "/ko:chitueumsioschoseong": "\u113C",
    "/ko:chitueumssangcieucchoseong": "\u114F",
    "/ko:chitueumssangsioschoseong": "\u113D",
    "/ko:cieuc": "\u3148",
    "/ko:cieucchoseong": "\u110C",
    "/ko:cieucieungchoseong": "\u114D",
    "/ko:cieucjongseong": "\u11BD",
    "/ko:e": "\u3154",
    "/ko:ejungseong": "\u1166",
    "/ko:eo": "\u3153",
    "/ko:eo_eujungseong": "\u117C",
    "/ko:eojungseong": "\u1165",
    "/ko:eoojungseong": "\u117A",
    "/ko:eoujungseong": "\u117B",
    "/ko:eu": "\u3161",
    "/ko:eueujungseong": "\u1196",
    "/ko:eujungseong": "\u1173",
    "/ko:euujungseong": "\u1195",
    "/ko:filler": "\u3164",
    "/ko:fillerchoseong": "\u115F",
    "/ko:fillerjungseong": "\u1160",
    "/ko:hieuh": "\u314E",
    "/ko:hieuhchoseong": "\u1112",
    "/ko:hieuhjongseong": "\u11C2",
    "/ko:hieuhmieumjongseong": "\u11F7",
    "/ko:hieuhnieunjongseong": "\u11F5",
    "/ko:hieuhpieupjongseong": "\u11F8",
    "/ko:hieuhrieuljongseong": "\u11F6",
    "/ko:i": "\u3163",
    "/ko:iajungseong": "\u1198",
    "/ko:iaraeajungseong": "\u119D",
    "/ko:ieujungseong": "\u119C",
    "/ko:ieung": "\u3147",
    "/ko:ieungchieuchchoseong": "\u1149",
    "/ko:ieungchoseong": "\u110B",
    "/ko:ieungcieucchoseong": "\u1148",
    "/ko:ieungjongseong": "\u11BC",
    "/ko:ieungkhieukhjongseong": "\u11EF",
    "/ko:ieungkiyeokchoseong": "\u1141",
    "/ko:ieungkiyeokjongseong": "\u11EC",
    "/ko:ieungmieumchoseong": "\u1143",
    "/ko:ieungpansioschoseong": "\u1146",
    "/ko:ieungphieuphchoseong": "\u114B",
    "/ko:ieungpieupchoseong": "\u1144",
    "/ko:ieungsioschoseong": "\u1145",
    "/ko:ieungssangkiyeokjongseong": "\u11ED",
    "/ko:ieungthieuthchoseong": "\u114A",
    "/ko:ieungtikeutchoseong": "\u1142",
    "/ko:ijungseong": "\u1175",
    "/ko:iojungseong": "\u119A",
    "/ko:iujungseong": "\u119B",
    "/ko:iyajungseong": "\u1199",
    "/ko:kapyeounmieum": "\u3171",
    "/ko:kapyeounmieumchoseong": "\u111D",
    "/ko:kapyeounmieumjongseong": "\u11E2",
    "/ko:kapyeounphieuph": "\u3184",
    "/ko:kapyeounphieuphchoseong": "\u1157",
    "/ko:kapyeounphieuphjongseong": "\u11F4",
    "/ko:kapyeounpieup": "\u3178",
    "/ko:kapyeounpieupchoseong": "\u112B",
    "/ko:kapyeounpieupjongseong": "\u11E6",
    "/ko:kapyeounrieulchoseong": "\u111B",
    "/ko:kapyeounssangpieup": "\u3179",
    "/ko:kapyeounssangpieupchoseong": "\u112C",
    "/ko:khieukh": "\u314B",
    "/ko:khieukhchoseong": "\u110F",
    "/ko:khieukhjongseong": "\u11BF",
    "/ko:kiyeok": "\u3131",
    "/ko:kiyeokchieuchjongseong": "\u11FC",
    "/ko:kiyeokchoseong": "\u1100",
    "/ko:kiyeokhieuhjongseong": "\u11FE",
    "/ko:kiyeokjongseong": "\u11A8",
    "/ko:kiyeokkhieukhjongseong": "\u11FD",
    "/ko:kiyeoknieunjongseong": "\u11FA",
    "/ko:kiyeokpieupjongseong": "\u11FB",
    "/ko:kiyeokrieuljongseong": "\u11C3",
    "/ko:kiyeoksios": "\u3133",
    "/ko:kiyeoksiosjongseong": "\u11AA",
    "/ko:kiyeoksioskiyeokjongseong": "\u11C4",
    "/ko:kiyeoktikeutchoseong": "\u115A",
    "/ko:mieum": "\u3141",
    "/ko:mieumchieuchjongseong": "\u11E0",
    "/ko:mieumchoseong": "\u1106",
    "/ko:mieumhieuhjongseong": "\u11E1",
    "/ko:mieumjongseong": "\u11B7",
    "/ko:mieumkiyeokjongseong": "\u11DA",
    "/ko:mieumpansios": "\u3170",
    "/ko:mieumpansiosjongseong": "\u11DF",
    "/ko:mieumpieup": "\u316E",
    "/ko:mieumpieupchoseong": "\u111C",
    "/ko:mieumpieupjongseong": "\u11DC",
    "/ko:mieumrieuljongseong": "\u11DB",
    "/ko:mieumsios": "\u316F",
    "/ko:mieumsiosjongseong": "\u11DD",
    "/ko:mieumssangsiosjongseong": "\u11DE",
    "/ko:nieun": "\u3134",
    "/ko:nieunchoseong": "\u1102",
    "/ko:nieuncieuc": "\u3135",
    "/ko:nieuncieucchoseong": "\u115C",
    "/ko:nieuncieucjongseong": "\u11AC",
    "/ko:nieunhieuh": "\u3136",
    "/ko:nieunhieuhchoseong": "\u115D",
    "/ko:nieunhieuhjongseong": "\u11AD",
    "/ko:nieunjongseong": "\u11AB",
    "/ko:nieunkiyeokchoseong": "\u1113",
    "/ko:nieunkiyeokjongseong": "\u11C5",
    "/ko:nieunpansios": "\u3168",
    "/ko:nieunpansiosjongseong": "\u11C8",
    "/ko:nieunpieupchoseong": "\u1116",
    "/ko:nieunsios": "\u3167",
    "/ko:nieunsioschoseong": "\u115B",
    "/ko:nieunsiosjongseong": "\u11C7",
    "/ko:nieunthieuthjongseong": "\u11C9",
    "/ko:nieuntikeut": "\u3166",
    "/ko:nieuntikeutchoseong": "\u1115",
    "/ko:nieuntikeutjongseong": "\u11C6",
    "/ko:o": "\u3157",
    "/ko:o_ejungseong": "\u1180",
    "/ko:o_eojungseong": "\u117F",
    "/ko:oe": "\u315A",
    "/ko:oejungseong": "\u116C",
    "/ko:ojungseong": "\u1169",
    "/ko:oojungseong": "\u1182",
    "/ko:oujungseong": "\u1183",
    "/ko:oyaejungseong": "\u11A7",
    "/ko:oyajungseong": "\u11A6",
    "/ko:oyejungseong": "\u1181",
    "/ko:pansios": "\u317F",
    "/ko:pansioschoseong": "\u1140",
    "/ko:pansiosjongseong": "\u11EB",
    "/ko:phieuph": "\u314D",
    "/ko:phieuphchoseong": "\u1111",
    "/ko:phieuphjongseong": "\u11C1",
    "/ko:phieuphpieupchoseong": "\u1156",
    "/ko:phieuphpieupjongseong": "\u11F3",
    "/ko:pieup": "\u3142",
    "/ko:pieupchieuchchoseong": "\u1128",
    "/ko:pieupchoseong": "\u1107",
    "/ko:pieupcieuc": "\u3176",
    "/ko:pieupcieucchoseong": "\u1127",
    "/ko:pieuphieuhjongseong": "\u11E5",
    "/ko:pieupjongseong": "\u11B8",
    "/ko:pieupkiyeok": "\u3172",
    "/ko:pieupkiyeokchoseong": "\u111E",
    "/ko:pieupnieunchoseong": "\u111F",
    "/ko:pieupphieuphchoseong": "\u112A",
    "/ko:pieupphieuphjongseong": "\u11E4",
    "/ko:pieuprieuljongseong": "\u11E3",
    "/ko:pieupsios": "\u3144",
    "/ko:pieupsioschoseong": "\u1121",
    "/ko:pieupsioscieucchoseong": "\u1126",
    "/ko:pieupsiosjongseong": "\u11B9",
    "/ko:pieupsioskiyeok": "\u3174",
    "/ko:pieupsioskiyeokchoseong": "\u1122",
    "/ko:pieupsiospieupchoseong": "\u1124",
    "/ko:pieupsiostikeut": "\u3175",
    "/ko:pieupsiostikeutchoseong": "\u1123",
    "/ko:pieupssangsioschoseong": "\u1125",
    "/ko:pieupthieuth": "\u3177",
    "/ko:pieupthieuthchoseong": "\u1129",
    "/ko:pieuptikeut": "\u3173",
    "/ko:pieuptikeutchoseong": "\u1120",
    "/ko:rieul": "\u3139",
    "/ko:rieulchoseong": "\u1105",
    "/ko:rieulhieuh": "\u3140",
    "/ko:rieulhieuhchoseong": "\u111A",
    "/ko:rieulhieuhjongseong": "\u11B6",
    "/ko:rieuljongseong": "\u11AF",
    "/ko:rieulkapyeounpieupjongseong": "\u11D5",
    "/ko:rieulkhieukhjongseong": "\u11D8",
    "/ko:rieulkiyeok": "\u313A",
    "/ko:rieulkiyeokjongseong": "\u11B0",
    "/ko:rieulkiyeoksios": "\u3169",
    "/ko:rieulkiyeoksiosjongseong": "\u11CC",
    "/ko:rieulmieum": "\u313B",
    "/ko:rieulmieumjongseong": "\u11B1",
    "/ko:rieulmieumkiyeokjongseong": "\u11D1",
    "/ko:rieulmieumsiosjongseong": "\u11D2",
    "/ko:rieulnieunchoseong": "\u1118",
    "/ko:rieulnieunjongseong": "\u11CD",
    "/ko:rieulpansios": "\u316C",
    "/ko:rieulpansiosjongseong": "\u11D7",
    "/ko:rieulphieuph": "\u313F",
    "/ko:rieulphieuphjongseong": "\u11B5",
    "/ko:rieulpieup": "\u313C",
    "/ko:rieulpieuphieuhjongseong": "\u11D4",
    "/ko:rieulpieupjongseong": "\u11B2",
    "/ko:rieulpieupsios": "\u316B",
    "/ko:rieulpieupsiosjongseong": "\u11D3",
    "/ko:rieulsios": "\u313D",
    "/ko:rieulsiosjongseong": "\u11B3",
    "/ko:rieulssangsiosjongseong": "\u11D6",
    "/ko:rieulthieuth": "\u313E",
    "/ko:rieulthieuthjongseong": "\u11B4",
    "/ko:rieultikeut": "\u316A",
    "/ko:rieultikeuthieuhjongseong": "\u11CF",
    "/ko:rieultikeutjongseong": "\u11CE",
    "/ko:rieulyeorinhieuh": "\u316D",
    "/ko:rieulyeorinhieuhjongseong": "\u11D9",
    "/ko:sios": "\u3145",
    "/ko:sioschieuchchoseong": "\u1137",
    "/ko:sioschoseong": "\u1109",
    "/ko:sioscieuc": "\u317E",
    "/ko:sioscieucchoseong": "\u1136",
    "/ko:sioshieuhchoseong": "\u113B",
    "/ko:siosieungchoseong": "\u1135",
    "/ko:siosjongseong": "\u11BA",
    "/ko:sioskhieukhchoseong": "\u1138",
    "/ko:sioskiyeok": "\u317A",
    "/ko:sioskiyeokchoseong": "\u112D",
    "/ko:sioskiyeokjongseong": "\u11E7",
    "/ko:siosmieumchoseong": "\u1131",
    "/ko:siosnieun": "\u317B",
    "/ko:siosnieunchoseong": "\u112E",
    "/ko:siosphieuphchoseong": "\u113A",
    "/ko:siospieup": "\u317D",
    "/ko:siospieupchoseong": "\u1132",
    "/ko:siospieupjongseong": "\u11EA",
    "/ko:siospieupkiyeokchoseong": "\u1133",
    "/ko:siosrieulchoseong": "\u1130",
    "/ko:siosrieuljongseong": "\u11E9",
    "/ko:siosssangsioschoseong": "\u1134",
    "/ko:siosthieuthchoseong": "\u1139",
    "/ko:siostikeut": "\u317C",
    "/ko:siostikeutchoseong": "\u112F",
    "/ko:siostikeutjongseong": "\u11E8",
    "/ko:ssangaraeajungseong": "\u11A2",
    "/ko:ssangcieuc": "\u3149",
    "/ko:ssangcieucchoseong": "\u110D",
    "/ko:ssanghieuh": "\u3185",
    "/ko:ssanghieuhchoseong": "\u1158",
    "/ko:ssangieung": "\u3180",
    "/ko:ssangieungchoseong": "\u1147",
    "/ko:ssangieungjongseong": "\u11EE",
    "/ko:ssangkiyeok": "\u3132",
    "/ko:ssangkiyeokchoseong": "\u1101",
    "/ko:ssangkiyeokjongseong": "\u11A9",
    "/ko:ssangnieun": "\u3165",
    "/ko:ssangnieunchoseong": "\u1114",
    "/ko:ssangnieunjongseong": "\u11FF",
    "/ko:ssangpieup": "\u3143",
    "/ko:ssangpieupchoseong": "\u1108",
    "/ko:ssangrieulchoseong": "\u1119",
    "/ko:ssangrieuljongseong": "\u11D0",
    "/ko:ssangsios": "\u3146",
    "/ko:ssangsioschoseong": "\u110A",
    "/ko:ssangsiosjongseong": "\u11BB",
    "/ko:ssangtikeut": "\u3138",
    "/ko:ssangtikeutchoseong": "\u1104",
    "/ko:thieuth": "\u314C",
    "/ko:thieuthchoseong": "\u1110",
    "/ko:thieuthjongseong": "\u11C0",
    "/ko:tikeut": "\u3137",
    "/ko:tikeutchoseong": "\u1103",
    "/ko:tikeutjongseong": "\u11AE",
    "/ko:tikeutkiyeokchoseong": "\u1117",
    "/ko:tikeutkiyeokjongseong": "\u11CA",
    "/ko:tikeutrieulchoseong": "\u115E",
    "/ko:tikeutrieuljongseong": "\u11CB",
    "/ko:u": "\u315C",
    "/ko:uaejungseong": "\u118A",
    "/ko:uajungseong": "\u1189",
    "/ko:ueo_eujungseong": "\u118B",
    "/ko:ujungseong": "\u116E",
    "/ko:uujungseong": "\u118D",
    "/ko:uyejungseong": "\u118C",
    "/ko:wa": "\u3158",
    "/ko:wae": "\u3159",
    "/ko:waejungseong": "\u116B",
    "/ko:wajungseong": "\u116A",
    "/ko:we": "\u315E",
    "/ko:wejungseong": "\u1170",
    "/ko:weo": "\u315D",
    "/ko:weojungseong": "\u116F",
    "/ko:wi": "\u315F",
    "/ko:wijungseong": "\u1171",
    "/ko:ya": "\u3151",
    "/ko:yae": "\u3152",
    "/ko:yaejungseong": "\u1164",
    "/ko:yajungseong": "\u1163",
    "/ko:yaojungseong": "\u1178",
    "/ko:yaujungseong": "\u11A4",
    "/ko:yayojungseong": "\u1179",
    "/ko:ye": "\u3156",
    "/ko:yejungseong": "\u1168",
    "/ko:yeo": "\u3155",
    "/ko:yeojungseong": "\u1167",
    "/ko:yeoojungseong": "\u117D",
    "/ko:yeorinhieuh": "\u3186",
    "/ko:yeorinhieuhchoseong": "\u1159",
    "/ko:yeorinhieuhjongseong": "\u11F9",
    "/ko:yeoujungseong": "\u117E",
    "/ko:yeoyajungseong": "\u11A5",
    "/ko:yesieung": "\u3181",
    "/ko:yesieungchoseong": "\u114C",
    "/ko:yesieungjongseong": "\u11F0",
    "/ko:yesieungpansios": "\u3183",
    "/ko:yesieungpansiosjongseong": "\u11F2",
    "/ko:yesieungsios": "\u3182",
    "/ko:yesieungsiosjongseong": "\u11F1",
    "/ko:yi": "\u3162",
    "/ko:yijungseong": "\u1174",
    "/ko:yiujungseong": "\u1197",
    "/ko:yo": "\u315B",
    "/ko:yoi": "\u3189",
    "/ko:yoijungseong": "\u1188",
    "/ko:yojungseong": "\u116D",
    "/ko:yoojungseong": "\u1187",
    "/ko:yoya": "\u3187",
    "/ko:yoyae": "\u3188",
    "/ko:yoyaejungseong": "\u1185",
    "/ko:yoyajungseong": "\u1184",
    "/ko:yoyeojungseong": "\u1186",
    "/ko:yu": "\u3160",
    "/ko:yuajungseong": "\u118E",
    "/ko:yuejungseong": "\u1190",
    "/ko:yueojungseong": "\u118F",
    "/ko:yui": "\u318C",
    "/ko:yuijungseong": "\u1194",
    "/ko:yujungseong": "\u1172",
    "/ko:yuujungseong": "\u1193",
    "/ko:yuye": "\u318B",
    "/ko:yuyejungseong": "\u1192",
    "/ko:yuyeo": "\u318A",
    "/ko:yuyeojungseong": "\u1191",
    "/koala": "\u1F428",
    "/kobliquestroke": "\uA7A3",
    "/kocirclekatakana": "\u32D9",
    "/kohiragana": "\u3053",
    "/kohmfullwidth": "\u33C0",
    "/kohmsquare": "\u33C0",
    "/kokaithai": "\u0E01",
    "/kokatakana": "\u30B3",
    "/kokatakanahalfwidth": "\uFF7A",
    "/kooposquare": "\u331E",
    "/koppa": "\u03DF",
    "/koppaarchaic": "\u03D9",
    "/koppacyr": "\u0481",
    "/koppacyrillic": "\u0481",
    "/koreanstandardsymbol": "\u327F",
    "/koroniscmb": "\u0343",
    "/korunasquare": "\u331D",
    "/kotoideographiccircled": "\u3247",
    "/kpafullwidth": "\u33AA",
    "/kparen": "\u24A6",
    "/kparenthesized": "\u24A6",
    "/kpasquare": "\u33AA",
    "/kra": "\u0138",
    "/ksicyr": "\u046F",
    "/ksicyrillic": "\u046F",
    "/kstroke": "\uA741",
    "/kstrokediagonalstroke": "\uA745",
    "/ktfullwidth": "\u33CF",
    "/ktsquare": "\u33CF",
    "/kturned": "\u029E",
    "/kucirclekatakana": "\u32D7",
    "/kuhiragana": "\u304F",
    "/kukatakana": "\u30AF",
    "/kukatakanahalfwidth": "\uFF78",
    "/kuroonesquare": "\u331B",
    "/kuruzeirosquare": "\u331A",
    "/kvfullwidth": "\u33B8",
    "/kvsquare": "\u33B8",
    "/kwfullwidth": "\u33BE",
    "/kwsquare": "\u33BE",
    "/kyuriisquare": "\u3312",
    "/l": "\u006C",
    "/l.inferior": "\u2097",
    "/label": "\u1F3F7",
    "/labengali": "\u09B2",
    "/laborideographiccircled": "\u3298",
    "/laborideographicparen": "\u3238",
    "/lacute": "\u013A",
    "/ladeva": "\u0932",
    "/ladyBeetle": "\u1F41E",
    "/lagujarati": "\u0AB2",
    "/lagurmukhi": "\u0A32",
    "/lakkhangyaothai": "\u0E45",
    "/lam": "\u0644",
    "/lam.fina": "\uFEDE",
    "/lam.init": "\uFEDF",
    "/lam.init_alef.fina": "\uFEFB",
    "/lam.init_alef.medi_hamzaabove.fina": "\uFEF7",
    "/lam.init_alef.medi_hamzabelow.fina": "\uFEF9",
    "/lam.init_alef.medi_maddaabove.fina": "\uFEF5",
    "/lam.init_alefmaksura.fina": "\uFC43",
    "/lam.init_hah.fina": "\uFC40",
    "/lam.init_hah.medi": "\uFCCA",
    "/lam.init_hah.medi_meem.medi": "\uFDB5",
    "/lam.init_heh.medi": "\uFCCD",
    "/lam.init_jeem.fina": "\uFC3F",
    "/lam.init_jeem.medi": "\uFCC9",
    "/lam.init_jeem.medi_jeem.medi": "\uFD83",
    "/lam.init_jeem.medi_meem.medi": "\uFDBA",
    "/lam.init_khah.fina": "\uFC41",
    "/lam.init_khah.medi": "\uFCCB",
    "/lam.init_khah.medi_meem.medi": "\uFD86",
    "/lam.init_meem.fina": "\uFC42",
    "/lam.init_meem.medi": "\uFCCC",
    "/lam.init_meem.medi_hah.medi": "\uFD88",
    "/lam.init_yeh.fina": "\uFC44",
    "/lam.isol": "\uFEDD",
    "/lam.medi": "\uFEE0",
    "/lam.medi_alef.fina": "\uFEFC",
    "/lam.medi_alef.medi_hamzaabove.fina": "\uFEF8",
    "/lam.medi_alef.medi_hamzabelow.fina": "\uFEFA",
    "/lam.medi_alef.medi_maddaabove.fina": "\uFEF6",
    "/lam.medi_alefmaksura.fina": "\uFC86",
    "/lam.medi_hah.medi_alefmaksura.fina": "\uFD82",
    "/lam.medi_hah.medi_meem.fina": "\uFD80",
    "/lam.medi_hah.medi_yeh.fina": "\uFD81",
    "/lam.medi_jeem.medi_jeem.fina": "\uFD84",
    "/lam.medi_jeem.medi_meem.fina": "\uFDBC",
    "/lam.medi_jeem.medi_yeh.fina": "\uFDAC",
    "/lam.medi_khah.medi_meem.fina": "\uFD85",
    "/lam.medi_meem.fina": "\uFC85",
    "/lam.medi_meem.medi": "\uFCED",
    "/lam.medi_meem.medi_hah.fina": "\uFD87",
    "/lam.medi_meem.medi_yeh.fina": "\uFDAD",
    "/lam.medi_yeh.fina": "\uFC87",
    "/lamBar": "\u076A",
    "/lamVabove": "\u06B5",
    "/lamalefabove": "\u06D9",
    "/lamaleffinalarabic": "\uFEFC",
    "/lamalefhamzaabovefinalarabic": "\uFEF8",
    "/lamalefhamzaaboveisolatedarabic": "\uFEF7",
    "/lamalefhamzabelowfinalarabic": "\uFEFA",
    "/lamalefhamzabelowisolatedarabic": "\uFEF9",
    "/lamalefisolatedarabic": "\uFEFB",
    "/lamalefmaddaabovefinalarabic": "\uFEF6",
    "/lamalefmaddaaboveisolatedarabic": "\uFEF5",
    "/lamarabic": "\u0644",
    "/lambda": "\u03BB",
    "/lambdastroke": "\u019B",
    "/lamdotabove": "\u06B6",
    "/lamed": "\u05DC",
    "/lamed:hb": "\u05DC",
    "/lameddagesh": "\uFB3C",
    "/lameddageshhebrew": "\uFB3C",
    "/lamedhebrew": "\u05DC",
    "/lamedholam": "\u05DC",
    "/lamedholamdagesh": "\u05DC",
    "/lamedholamdageshhebrew": "\u05DC",
    "/lamedholamhebrew": "\u05DC",
    "/lamedwide:hb": "\uFB25",
    "/lamedwithdagesh:hb": "\uFB3C",
    "/lamfinalarabic": "\uFEDE",
    "/lamhahinitialarabic": "\uFCCA",
    "/laminitialarabic": "\uFEDF",
    "/lamjeeminitialarabic": "\uFCC9",
    "/lamkhahinitialarabic": "\uFCCB",
    "/lamlamhehisolatedarabic": "\uFDF2",
    "/lammedialarabic": "\uFEE0",
    "/lammeemhahinitialarabic": "\uFD88",
    "/lammeeminitialarabic": "\uFCCC",
    "/lammeemjeeminitialarabic": "\uFEDF",
    "/lammeemkhahinitialarabic": "\uFEDF",
    "/lamthreedotsabove": "\u06B7",
    "/lamthreedotsbelow": "\u06B8",
    "/lanemergeleftblack": "\u26D8",
    "/lanemergeleftwhite": "\u26D9",
    "/largeBlueCircle": "\u1F535",
    "/largeBlueDiamond": "\u1F537",
    "/largeOrangeDiamond": "\u1F536",
    "/largeRedCircle": "\u1F534",
    "/largecircle": "\u25EF",
    "/largetackdown": "\u27D9",
    "/largetackup": "\u27D8",
    "/lari": "\u20BE",
    "/lastQuarterMoon": "\u1F317",
    "/lastQuarterMoonFace": "\u1F31C",
    "/lastquartermoon": "\u263E",
    "/layar": "\uA982",
    "/lazysinverted": "\u223E",
    "/lbar": "\u019A",
    "/lbbar": "\u2114",
    "/lbelt": "\u026C",
    "/lbeltretroflex": "\uA78E",
    "/lbopomofo": "\u310C",
    "/lbroken": "\uA747",
    "/lcaron": "\u013E",
    "/lcedilla": "\u013C",
    "/lcircle": "\u24DB",
    "/lcircumflexbelow": "\u1E3D",
    "/lcommaaccent": "\u013C",
    "/lcurl": "\u0234",
    "/ldblbar": "\u2C61",
    "/ldot": "\u0140",
    "/ldotaccent": "\u0140",
    "/ldotbelow": "\u1E37",
    "/ldotbelowmacron": "\u1E39",
    "/leafFlutteringInWind": "\u1F343",
    "/ledger": "\u1F4D2",
    "/left-pointingMagnifyingGlass": "\u1F50D",
    "/leftAngerBubble": "\u1F5EE",
    "/leftFiveEighthsBlock": "\u258B",
    "/leftHalfBlock": "\u258C",
    "/leftHandTelephoneReceiver": "\u1F57B",
    "/leftLuggage": "\u1F6C5",
    "/leftOneEighthBlock": "\u258F",
    "/leftOneQuarterBlock": "\u258E",
    "/leftSevenEighthsBlock": "\u2589",
    "/leftSpeechBubble": "\u1F5E8",
    "/leftThoughtBubble": "\u1F5EC",
    "/leftThreeEighthsBlock": "\u258D",
    "/leftThreeQuartersBlock": "\u258A",
    "/leftWritingHand": "\u1F58E",
    "/leftangleabovecmb": "\u031A",
    "/leftarrowoverrightarrow": "\u21C6",
    "/leftdnheavyrightuplight": "\u2545",
    "/leftharpoonoverrightharpoon": "\u21CB",
    "/leftheavyrightdnlight": "\u252D",
    "/leftheavyrightuplight": "\u2535",
    "/leftheavyrightvertlight": "\u253D",
    "/leftideographiccircled": "\u32A7",
    "/leftlightrightdnheavy": "\u2532",
    "/leftlightrightupheavy": "\u253A",
    "/leftlightrightvertheavy": "\u254A",
    "/lefttackbelowcmb": "\u0318",
    "/lefttorightembed": "\u202A",
    "/lefttorightisolate": "\u2066",
    "/lefttorightmark": "\u200E",
    "/lefttorightoverride": "\u202D",
    "/leftupheavyrightdnlight": "\u2543",
    "/lemon": "\u1F34B",
    "/lenis": "\u1FBF",
    "/lenisacute": "\u1FCE",
    "/lenisgrave": "\u1FCD",
    "/lenistilde": "\u1FCF",
    "/leo": "\u264C",
    "/leopard": "\u1F406",
    "/less": "\u003C",
    "/lessbutnotequal": "\u2268",
    "/lessbutnotequivalent": "\u22E6",
    "/lessdot": "\u22D6",
    "/lessequal": "\u2264",
    "/lessequalorgreater": "\u22DA",
    "/lessmonospace": "\uFF1C",
    "/lessorequivalent": "\u2272",
    "/lessorgreater": "\u2276",
    "/lessoverequal": "\u2266",
    "/lesssmall": "\uFE64",
    "/levelSlider": "\u1F39A",
    "/lezh": "\u026E",
    "/lfblock": "\u258C",
    "/lhacyr": "\u0515",
    "/lhookretroflex": "\u026D",
    "/libra": "\u264E",
    "/ligaturealeflamed:hb": "\uFB4F",
    "/ligatureoemod": "\uA7F9",
    "/lightCheckMark": "\u1F5F8",
    "/lightRail": "\u1F688",
    "/lightShade": "\u2591",
    "/lightarcdnleft": "\u256E",
    "/lightarcdnright": "\u256D",
    "/lightarcupleft": "\u256F",
    "/lightarcupright": "\u2570",
    "/lightdbldashhorz": "\u254C",
    "/lightdbldashvert": "\u254E",
    "/lightdiagcross": "\u2573",
    "/lightdiagupleftdnright": "\u2572",
    "/lightdiaguprightdnleft": "\u2571",
    "/lightdn": "\u2577",
    "/lightdnhorz": "\u252C",
    "/lightdnleft": "\u2510",
    "/lightdnright": "\u250C",
    "/lighthorz": "\u2500",
    "/lightleft": "\u2574",
    "/lightleftheavyright": "\u257C",
    "/lightning": "\u2607",
    "/lightningMood": "\u1F5F2",
    "/lightningMoodBubble": "\u1F5F1",
    "/lightquaddashhorz": "\u2508",
    "/lightquaddashvert": "\u250A",
    "/lightright": "\u2576",
    "/lighttrpldashhorz": "\u2504",
    "/lighttrpldashvert": "\u2506",
    "/lightup": "\u2575",
    "/lightupheavydn": "\u257D",
    "/lightuphorz": "\u2534",
    "/lightupleft": "\u2518",
    "/lightupright": "\u2514",
    "/lightvert": "\u2502",
    "/lightverthorz": "\u253C",
    "/lightvertleft": "\u2524",
    "/lightvertright": "\u251C",
    "/lineextensionhorizontal": "\u23AF",
    "/lineextensionvertical": "\u23D0",
    "/linemiddledotvertical": "\u237F",
    "/lineseparator": "\u2028",
    "/lingsapada": "\uA9C8",
    "/link": "\u1F517",
    "/linkedPaperclips": "\u1F587",
    "/lips": "\u1F5E2",
    "/lipstick": "\u1F484",
    "/lira": "\u20A4",
    "/litre": "\u2113",
    "/livretournois": "\u20B6",
    "/liwnarmenian": "\u056C",
    "/lj": "\u01C9",
    "/ljecyr": "\u0459",
    "/ljecyrillic": "\u0459",
    "/ljekomicyr": "\u0509",
    "/ll": "\uF6C0",
    "/lladeva": "\u0933",
    "/llagujarati": "\u0AB3",
    "/llinebelow": "\u1E3B",
    "/llladeva": "\u0934",
    "/llvocalicbengali": "\u09E1",
    "/llvocalicdeva": "\u0961",
    "/llvocalicvowelsignbengali": "\u09E3",
    "/llvocalicvowelsigndeva": "\u0963",
    "/llwelsh": "\u1EFB",
    "/lmacrondot": "\u1E39",
    "/lmfullwidth": "\u33D0",
    "/lmiddletilde": "\u026B",
    "/lmonospace": "\uFF4C",
    "/lmsquare": "\u33D0",
    "/lnfullwidth": "\u33D1",
    "/lochulathai": "\u0E2C",
    "/lock": "\u1F512",
    "/lockInkPen": "\u1F50F",
    "/logfullwidth": "\u33D2",
    "/logicaland": "\u2227",
    "/logicalandarray": "\u22C0",
    "/logicalnot": "\u00AC",
    "/logicalnotreversed": "\u2310",
    "/logicalor": "\u2228",
    "/logicalorarray": "\u22C1",
    "/lolingthai": "\u0E25",
    "/lollipop": "\u1F36D",
    "/longdivision": "\u27CC",
    "/longovershortmetrical": "\u23D2",
    "/longovertwoshortsmetrical": "\u23D4",
    "/longs": "\u017F",
    "/longs_t": "\uFB05",
    "/longsdot": "\u1E9B",
    "/longswithdiagonalstroke": "\u1E9C",
    "/longswithhighstroke": "\u1E9D",
    "/longtackleft": "\u27DE",
    "/longtackright": "\u27DD",
    "/losslesssquare": "\u1F1A9",
    "/loudlyCryingFace": "\u1F62D",
    "/loveHotel": "\u1F3E9",
    "/loveLetter": "\u1F48C",
    "/lowBrightness": "\u1F505",
    "/lowasterisk": "\u204E",
    "/lowerFiveEighthsBlock": "\u2585",
    "/lowerHalfBlock": "\u2584",
    "/lowerLeftBallpointPen": "\u1F58A",
    "/lowerLeftCrayon": "\u1F58D",
    "/lowerLeftFountainPen": "\u1F58B",
    "/lowerLeftPaintbrush": "\u1F58C",
    "/lowerLeftPencil": "\u1F589",
    "/lowerOneEighthBlock": "\u2581",
    "/lowerOneQuarterBlock": "\u2582",
    "/lowerRightShadowedWhiteCircle": "\u1F53E",
    "/lowerSevenEighthsBlock": "\u2587",
    "/lowerThreeEighthsBlock": "\u2583",
    "/lowerThreeQuartersBlock": "\u2586",
    "/lowercornerdotright": "\u27D3",
    "/lowerhalfcircle": "\u25E1",
    "/lowerhalfcircleinversewhite": "\u25DB",
    "/lowerquadrantcirculararcleft": "\u25DF",
    "/lowerquadrantcirculararcright": "\u25DE",
    "/lowertriangleleft": "\u25FA",
    "/lowertriangleleftblack": "\u25E3",
    "/lowertriangleright": "\u25FF",
    "/lowertrianglerightblack": "\u25E2",
    "/lowideographiccircled": "\u32A6",
    "/lowlinecenterline": "\uFE4E",
    "/lowlinecmb": "\u0332",
    "/lowlinedashed": "\uFE4D",
    "/lownumeralsign": "\u0375",
    "/lowquotedblprime": "\u301F",
    "/lozenge": "\u25CA",
    "/lozengedividedbyrulehorizontal": "\u27E0",
    "/lozengesquare": "\u2311",
    "/lparen": "\u24A7",
    "/lparenthesized": "\u24A7",
    "/lretroflex": "\u026D",
    "/ls": "\u02AA",
    "/lslash": "\u0142",
    "/lsquare": "\u2113",
    "/lstroke": "\uA749",
    "/lsuperior": "\uF6EE",
    "/lsupmod": "\u02E1",
    "/lt:Alpha": "\u2C6D",
    "/lt:Alphaturned": "\u2C70",
    "/lt:Beta": "\uA7B4",
    "/lt:Chi": "\uA7B3",
    "/lt:Gamma": "\u0194",
    "/lt:Iota": "\u0196",
    "/lt:Omega": "\uA7B6",
    "/lt:Upsilon": "\u01B1",
    "/lt:beta": "\uA7B5",
    "/lt:delta": "\u1E9F",
    "/lt:omega": "\uA7B7",
    "/ltshade": "\u2591",
    "/lttr:bet": "\u2136",
    "/lttr:dalet": "\u2138",
    "/lttr:gimel": "\u2137",
    "/lttr:gscript": "\u210A",
    "/lturned": "\uA781",
    "/ltypeopencircuit": "\u2390",
    "/luhurpada": "\uA9C5",
    "/lum": "\uA772",
    "/lungsipada": "\uA9C9",
    "/luthai": "\u0E26",
    "/lvocalicbengali": "\u098C",
    "/lvocalicdeva": "\u090C",
    "/lvocalicvowelsignbengali": "\u09E2",
    "/lvocalicvowelsigndeva": "\u0962",
    "/lxfullwidth": "\u33D3",
    "/lxsquare": "\u33D3",
    "/lzed": "\u02AB",
    "/m": "\u006D",
    "/m.inferior": "\u2098",
    "/m2fullwidth": "\u33A1",
    "/m3fullwidth": "\u33A5",
    "/mabengali": "\u09AE",
    "/macirclekatakana": "\u32EE",
    "/macron": "\u00AF",
    "/macronbelowcmb": "\u0331",
    "/macroncmb": "\u0304",
    "/macronlowmod": "\u02CD",
    "/macronmod": "\u02C9",
    "/macronmonospace": "\uFFE3",
    "/macute": "\u1E3F",
    "/madda": "\u0653",
    "/maddaabove": "\u06E4",
    "/madeva": "\u092E",
    "/madyapada": "\uA9C4",
    "/mafullwidth": "\u3383",
    "/magujarati": "\u0AAE",
    "/magurmukhi": "\u0A2E",
    "/mahapakhhebrew": "\u05A4",
    "/mahapakhlefthebrew": "\u05A4",
    "/mahhasquare": "\u3345",
    "/mahiragana": "\u307E",
    "/mahpach:hb": "\u05A4",
    "/maichattawalowleftthai": "\uF895",
    "/maichattawalowrightthai": "\uF894",
    "/maichattawathai": "\u0E4B",
    "/maichattawaupperleftthai": "\uF893",
    "/maieklowleftthai": "\uF88C",
    "/maieklowrightthai": "\uF88B",
    "/maiekthai": "\u0E48",
    "/maiekupperleftthai": "\uF88A",
    "/maihanakatleftthai": "\uF884",
    "/maihanakatthai": "\u0E31",
    "/maikurosquare": "\u3343",
    "/mairusquare": "\u3344",
    "/maitaikhuleftthai": "\uF889",
    "/maitaikhuthai": "\u0E47",
    "/maitholowleftthai": "\uF88F",
    "/maitholowrightthai": "\uF88E",
    "/maithothai": "\u0E49",
    "/maithoupperleftthai": "\uF88D",
    "/maitrilowleftthai": "\uF892",
    "/maitrilowrightthai": "\uF891",
    "/maitrithai": "\u0E4A",
    "/maitriupperleftthai": "\uF890",
    "/maiyamokthai": "\u0E46",
    "/makatakana": "\u30DE",
    "/makatakanahalfwidth": "\uFF8F",
    "/male": "\u2642",
    "/malefemale": "\u26A5",
    "/maleideographiccircled": "\u329A",
    "/malestroke": "\u26A6",
    "/malestrokemalefemale": "\u26A7",
    "/man": "\u1F468",
    "/manAndWomanHoldingHands": "\u1F46B",
    "/manDancing": "\u1F57A",
    "/manGuaPiMao": "\u1F472",
    "/manInBusinessSuitLevitating": "\u1F574",
    "/manTurban": "\u1F473",
    "/manat": "\u20BC",
    "/mansShoe": "\u1F45E",
    "/mansyonsquare": "\u3347",
    "/mantelpieceClock": "\u1F570",
    "/mapleLeaf": "\u1F341",
    "/maplighthouse": "\u26EF",
    "/maqaf:hb": "\u05BE",
    "/maqafhebrew": "\u05BE",
    "/marchtelegraph": "\u32C2",
    "/mark": "\u061C",
    "/markerdottedraisedinterpolation": "\u2E07",
    "/markerdottedtransposition": "\u2E08",
    "/markerraisedinterpolation": "\u2E06",
    "/marknoonghunna": "\u0658",
    "/marksChapter": "\u1F545",
    "/marriage": "\u26AD",
    "/mars": "\u2642",
    "/marukusquare": "\u3346",
    "/masoraCircle:hb": "\u05AF",
    "/masoracirclehebrew": "\u05AF",
    "/masquare": "\u3383",
    "/masumark": "\u303C",
    "/math:bowtie": "\u22C8",
    "/math:cuberoot": "\u221B",
    "/math:fourthroot": "\u221C",
    "/maximize": "\u1F5D6",
    "/maytelegraph": "\u32C4",
    "/mbfullwidth": "\u3386",
    "/mbopomofo": "\u3107",
    "/mbsmallfullwidth": "\u33D4",
    "/mbsquare": "\u33D4",
    "/mcircle": "\u24DC",
    "/mcubedsquare": "\u33A5",
    "/mdot": "\u1E41",
    "/mdotaccent": "\u1E41",
    "/mdotbelow": "\u1E43",
    "/measuredangle": "\u2221",
    "/measuredby": "\u225E",
    "/meatOnBone": "\u1F356",
    "/mecirclekatakana": "\u32F1",
    "/medicineideographiccircled": "\u32A9",
    "/mediumShade": "\u2592",
    "/mediumcircleblack": "\u26AB",
    "/mediumcirclewhite": "\u26AA",
    "/mediummathematicalspace": "\u205F",
    "/mediumsmallcirclewhite": "\u26AC",
    "/meem": "\u0645",
    "/meem.fina": "\uFEE2",
    "/meem.init": "\uFEE3",
    "/meem.init_alefmaksura.fina": "\uFC49",
    "/meem.init_hah.fina": "\uFC46",
    "/meem.init_hah.medi": "\uFCCF",
    "/meem.init_hah.medi_jeem.medi": "\uFD89",
    "/meem.init_hah.medi_meem.medi": "\uFD8A",
    "/meem.init_jeem.fina": "\uFC45",
    "/meem.init_jeem.medi": "\uFCCE",
    "/meem.init_jeem.medi_hah.medi": "\uFD8C",
    "/meem.init_jeem.medi_khah.medi": "\uFD92",
    "/meem.init_jeem.medi_meem.medi": "\uFD8D",
    "/meem.init_khah.fina": "\uFC47",
    "/meem.init_khah.medi": "\uFCD0",
    "/meem.init_khah.medi_jeem.medi": "\uFD8E",
    "/meem.init_khah.medi_meem.medi": "\uFD8F",
    "/meem.init_meem.fina": "\uFC48",
    "/meem.init_meem.medi": "\uFCD1",
    "/meem.init_yeh.fina": "\uFC4A",
    "/meem.isol": "\uFEE1",
    "/meem.medi": "\uFEE4",
    "/meem.medi_alef.fina": "\uFC88",
    "/meem.medi_hah.medi_yeh.fina": "\uFD8B",
    "/meem.medi_jeem.medi_yeh.fina": "\uFDC0",
    "/meem.medi_khah.medi_yeh.fina": "\uFDB9",
    "/meem.medi_meem.fina": "\uFC89",
    "/meem.medi_meem.medi_yeh.fina": "\uFDB1",
    "/meemDotAbove": "\u0765",
    "/meemDotBelow": "\u0766",
    "/meemabove": "\u06E2",
    "/meemabove.init": "\u06D8",
    "/meemarabic": "\u0645",
    "/meembelow": "\u06ED",
    "/meemfinalarabic": "\uFEE2",
    "/meeminitialarabic": "\uFEE3",
    "/meemmedialarabic": "\uFEE4",
    "/meemmeeminitialarabic": "\uFCD1",
    "/meemmeemisolatedarabic": "\uFC48",
    "/meetorusquare": "\u334D",
    "/megasquare": "\u334B",
    "/megatonsquare": "\u334C",
    "/mehiragana": "\u3081",
    "/meizierasquare": "\u337E",
    "/mekatakana": "\u30E1",
    "/mekatakanahalfwidth": "\uFF92",
    "/melon": "\u1F348",
    "/mem": "\u05DE",
    "/mem:hb": "\u05DE",
    "/memdagesh": "\uFB3E",
    "/memdageshhebrew": "\uFB3E",
    "/memhebrew": "\u05DE",
    "/memo": "\u1F4DD",
    "/memwithdagesh:hb": "\uFB3E",
    "/menarmenian": "\u0574",
    "/menorahNineBranches": "\u1F54E",
    "/menpostSindhi": "\u06FE",
    "/mens": "\u1F6B9",
    "/mepigraphicinverted": "\uA7FD",
    "/mercha:hb": "\u05A5",
    "/merchaKefulah:hb": "\u05A6",
    "/mercury": "\u263F",
    "/merkhahebrew": "\u05A5",
    "/merkhakefulahebrew": "\u05A6",
    "/merkhakefulalefthebrew": "\u05A6",
    "/merkhalefthebrew": "\u05A5",
    "/metalideographiccircled": "\u328E",
    "/metalideographicparen": "\u322E",
    "/meteg:hb": "\u05BD",
    "/metro": "\u1F687",
    "/mgfullwidth": "\u338E",
    "/mhook": "\u0271",
    "/mhzfullwidth": "\u3392",
    "/mhzsquare": "\u3392",
    "/micirclekatakana": "\u32EF",
    "/microphone": "\u1F3A4",
    "/microscope": "\u1F52C",
    "/middledotkatakanahalfwidth": "\uFF65",
    "/middot": "\u00B7",
    "/mieumacirclekorean": "\u3272",
    "/mieumaparenkorean": "\u3212",
    "/mieumcirclekorean": "\u3264",
    "/mieumkorean": "\u3141",
    "/mieumpansioskorean": "\u3170",
    "/mieumparenkorean": "\u3204",
    "/mieumpieupkorean": "\u316E",
    "/mieumsioskorean": "\u316F",
    "/mihiragana": "\u307F",
    "/mikatakana": "\u30DF",
    "/mikatakanahalfwidth": "\uFF90",
    "/mikuronsquare": "\u3348",
    "/milfullwidth": "\u33D5",
    "/militaryMedal": "\u1F396",
    "/milkyWay": "\u1F30C",
    "/mill": "\u20A5",
    "/millionscmbcyr": "\u0489",
    "/millisecond": "\u2034",
    "/millisecondreversed": "\u2037",
    "/minibus": "\u1F690",
    "/minidisc": "\u1F4BD",
    "/minimize": "\u1F5D5",
    "/minus": "\u2212",
    "/minus.inferior": "\u208B",
    "/minus.superior": "\u207B",
    "/minusbelowcmb": "\u0320",
    "/minuscircle": "\u2296",
    "/minusmod": "\u02D7",
    "/minusplus": "\u2213",
    "/minussignmod": "\u02D7",
    "/minustilde": "\u2242",
    "/minute": "\u2032",
    "/minutereversed": "\u2035",
    "/miribaarusquare": "\u334A",
    "/mirisquare": "\u3349",
    "/misc:baby": "\u1F476",
    "/misc:bell": "\u1F514",
    "/misc:dash": "\u1F4A8",
    "/misc:decimalseparator": "\u2396",
    "/misc:diamondblack": "\u2666",
    "/misc:diamondwhite": "\u2662",
    "/misc:ear": "\u1F442",
    "/misc:om": "\u1F549",
    "/misc:ring": "\u1F48D",
    "/misra": "\u060F",
    "/mlfullwidth": "\u3396",
    "/mlonglegturned": "\u0270",
    "/mlsquare": "\u3396",
    "/mlym:a": "\u0D05",
    "/mlym:aa": "\u0D06",
    "/mlym:aasign": "\u0D3E",
    "/mlym:ai": "\u0D10",
    "/mlym:aisign": "\u0D48",
    "/mlym:anusvarasign": "\u0D02",
    "/mlym:archaicii": "\u0D5F",
    "/mlym:au": "\u0D14",
    "/mlym:aulength": "\u0D57",
    "/mlym:ausign": "\u0D4C",
    "/mlym:avagrahasign": "\u0D3D",
    "/mlym:ba": "\u0D2C",
    "/mlym:bha": "\u0D2D",
    "/mlym:ca": "\u0D1A",
    "/mlym:candrabindusign": "\u0D01",
    "/mlym:cha": "\u0D1B",
    "/mlym:circularviramasign": "\u0D3C",
    "/mlym:combininganusvaraabovesign": "\u0D00",
    "/mlym:da": "\u0D26",
    "/mlym:date": "\u0D79",
    "/mlym:dda": "\u0D21",
    "/mlym:ddha": "\u0D22",
    "/mlym:dha": "\u0D27",
    "/mlym:dotreph": "\u0D4E",
    "/mlym:e": "\u0D0E",
    "/mlym:ee": "\u0D0F",
    "/mlym:eesign": "\u0D47",
    "/mlym:eight": "\u0D6E",
    "/mlym:esign": "\u0D46",
    "/mlym:five": "\u0D6B",
    "/mlym:four": "\u0D6A",
    "/mlym:ga": "\u0D17",
    "/mlym:gha": "\u0D18",
    "/mlym:ha": "\u0D39",
    "/mlym:i": "\u0D07",
    "/mlym:ii": "\u0D08",
    "/mlym:iisign": "\u0D40",
    "/mlym:isign": "\u0D3F",
    "/mlym:ja": "\u0D1C",
    "/mlym:jha": "\u0D1D",
    "/mlym:ka": "\u0D15",
    "/mlym:kchillu": "\u0D7F",
    "/mlym:kha": "\u0D16",
    "/mlym:la": "\u0D32",
    "/mlym:lchillu": "\u0D7D",
    "/mlym:lla": "\u0D33",
    "/mlym:llchillu": "\u0D7E",
    "/mlym:llla": "\u0D34",
    "/mlym:lllchillu": "\u0D56",
    "/mlym:llvocal": "\u0D61",
    "/mlym:llvocalsign": "\u0D63",
    "/mlym:lvocal": "\u0D0C",
    "/mlym:lvocalsign": "\u0D62",
    "/mlym:ma": "\u0D2E",
    "/mlym:mchillu": "\u0D54",
    "/mlym:na": "\u0D28",
    "/mlym:nchillu": "\u0D7B",
    "/mlym:nga": "\u0D19",
    "/mlym:nine": "\u0D6F",
    "/mlym:nna": "\u0D23",
    "/mlym:nnchillu": "\u0D7A",
    "/mlym:nnna": "\u0D29",
    "/mlym:nya": "\u0D1E",
    "/mlym:o": "\u0D12",
    "/mlym:one": "\u0D67",
    "/mlym:oneeighth": "\u0D77",
    "/mlym:onefifth": "\u0D5E",
    "/mlym:onefortieth": "\u0D59",
    "/mlym:onehalf": "\u0D74",
    "/mlym:onehundred": "\u0D71",
    "/mlym:oneone-hundred-and-sixtieth": "\u0D58",
    "/mlym:onequarter": "\u0D73",
    "/mlym:onesixteenth": "\u0D76",
    "/mlym:onetenth": "\u0D5C",
    "/mlym:onethousand": "\u0D72",
    "/mlym:onetwentieth": "\u0D5B",
    "/mlym:oo": "\u0D13",
    "/mlym:oosign": "\u0D4B",
    "/mlym:osign": "\u0D4A",
    "/mlym:pa": "\u0D2A",
    "/mlym:parasign": "\u0D4F",
    "/mlym:pha": "\u0D2B",
    "/mlym:ra": "\u0D30",
    "/mlym:rra": "\u0D31",
    "/mlym:rrchillu": "\u0D7C",
    "/mlym:rrvocal": "\u0D60",
    "/mlym:rrvocalsign": "\u0D44",
    "/mlym:rvocal": "\u0D0B",
    "/mlym:rvocalsign": "\u0D43",
    "/mlym:sa": "\u0D38",
    "/mlym:seven": "\u0D6D",
    "/mlym:sha": "\u0D36",
    "/mlym:six": "\u0D6C",
    "/mlym:ssa": "\u0D37",
    "/mlym:ta": "\u0D24",
    "/mlym:ten": "\u0D70",
    "/mlym:tha": "\u0D25",
    "/mlym:three": "\u0D69",
    "/mlym:threeeightieths": "\u0D5A",
    "/mlym:threequarters": "\u0D75",
    "/mlym:threesixteenths": "\u0D78",
    "/mlym:threetwentieths": "\u0D5D",
    "/mlym:tta": "\u0D1F",
    "/mlym:ttha": "\u0D20",
    "/mlym:ttta": "\u0D3A",
    "/mlym:two": "\u0D68",
    "/mlym:u": "\u0D09",
    "/mlym:usign": "\u0D41",
    "/mlym:uu": "\u0D0A",
    "/mlym:uusign": "\u0D42",
    "/mlym:va": "\u0D35",
    "/mlym:verticalbarviramasign": "\u0D3B",
    "/mlym:viramasign": "\u0D4D",
    "/mlym:visargasign": "\u0D03",
    "/mlym:ya": "\u0D2F",
    "/mlym:ychillu": "\u0D55",
    "/mlym:zero": "\u0D66",
    "/mm2fullwidth": "\u339F",
    "/mm3fullwidth": "\u33A3",
    "/mmcubedsquare": "\u33A3",
    "/mmfullwidth": "\u339C",
    "/mmonospace": "\uFF4D",
    "/mmsquaredsquare": "\u339F",
    "/mobilePhone": "\u1F4F1",
    "/mobilePhoneOff": "\u1F4F4",
    "/mobilePhoneRightwardsArrowAtLeft": "\u1F4F2",
    "/mocirclekatakana": "\u32F2",
    "/models": "\u22A7",
    "/mohiragana": "\u3082",
    "/mohmfullwidth": "\u33C1",
    "/mohmsquare": "\u33C1",
    "/mokatakana": "\u30E2",
    "/mokatakanahalfwidth": "\uFF93",
    "/molfullwidth": "\u33D6",
    "/molsquare": "\u33D6",
    "/momathai": "\u0E21",
    "/moneyBag": "\u1F4B0",
    "/moneyWings": "\u1F4B8",
    "/mong:a": "\u1820",
    "/mong:aaligali": "\u1887",
    "/mong:ahaligali": "\u1897",
    "/mong:ang": "\u1829",
    "/mong:angsibe": "\u1862",
    "/mong:angtodo": "\u184A",
    "/mong:anusvaraonealigali": "\u1880",
    "/mong:ba": "\u182A",
    "/mong:baludaaligali": "\u1885",
    "/mong:baludaaligalithree": "\u1886",
    "/mong:batodo": "\u184B",
    "/mong:bhamanchualigali": "\u18A8",
    "/mong:birga": "\u1800",
    "/mong:caaligali": "\u188B",
    "/mong:camanchualigali": "\u189C",
    "/mong:cha": "\u1834",
    "/mong:chasibe": "\u1871",
    "/mong:chatodo": "\u1852",
    "/mong:chi": "\u1842",
    "/mong:colon": "\u1804",
    "/mong:comma": "\u1802",
    "/mong:commamanchu": "\u1808",
    "/mong:cyamanchualigali": "\u18A3",
    "/mong:da": "\u1833",
    "/mong:daaligali": "\u1891",
    "/mong:dagalgaaligali": "\u18A9",
    "/mong:damarualigali": "\u1882",
    "/mong:dasibe": "\u1869",
    "/mong:datodo": "\u1851",
    "/mong:ddaaligali": "\u188E",
    "/mong:ddhamanchualigali": "\u189F",
    "/mong:dhamanchualigali": "\u18A1",
    "/mong:dzatodo": "\u185C",
    "/mong:e": "\u1821",
    "/mong:ee": "\u1827",
    "/mong:eight": "\u1818",
    "/mong:ellipsis": "\u1801",
    "/mong:esibe": "\u185D",
    "/mong:etodo": "\u1844",
    "/mong:fa": "\u1839",
    "/mong:famanchu": "\u1876",
    "/mong:fasibe": "\u186B",
    "/mong:five": "\u1815",
    "/mong:four": "\u1814",
    "/mong:fourdots": "\u1805",
    "/mong:freevariationselectorone": "\u180B",
    "/mong:freevariationselectorthree": "\u180D",
    "/mong:freevariationselectortwo": "\u180C",
    "/mong:ga": "\u182D",
    "/mong:gaasibe": "\u186C",
    "/mong:gaatodo": "\u1858",
    "/mong:gasibe": "\u1864",
    "/mong:gatodo": "\u184E",
    "/mong:ghamanchualigali": "\u189A",
    "/mong:haa": "\u183E",
    "/mong:haasibe": "\u186D",
    "/mong:haatodo": "\u1859",
    "/mong:hasibe": "\u1865",
    "/mong:i": "\u1822",
    "/mong:ialigali": "\u1888",
    "/mong:imanchu": "\u1873",
    "/mong:isibe": "\u185E",
    "/mong:itodo": "\u1845",
    "/mong:iysibe": "\u185F",
    "/mong:ja": "\u1835",
    "/mong:jasibe": "\u186A",
    "/mong:jatodo": "\u1853",
    "/mong:jhamanchualigali": "\u189D",
    "/mong:jiatodo": "\u185A",
    "/mong:ka": "\u183A",
    "/mong:kaaligali": "\u1889",
    "/mong:kamanchu": "\u1874",
    "/mong:kasibe": "\u1863",
    "/mong:katodo": "\u1857",
    "/mong:kha": "\u183B",
    "/mong:la": "\u182F",
    "/mong:lha": "\u1840",
    "/mong:lhamanchualigali": "\u18AA",
    "/mong:longvowelsigntodo": "\u1843",
    "/mong:ma": "\u182E",
    "/mong:matodo": "\u184F",
    "/mong:na": "\u1828",
    "/mong:ngaaligali": "\u188A",
    "/mong:ngamanchualigali": "\u189B",
    "/mong:niatodo": "\u185B",
    "/mong:nine": "\u1819",
    "/mong:nirugu": "\u180A",
    "/mong:nnaaligali": "\u188F",
    "/mong:o": "\u1823",
    "/mong:oe": "\u1825",
    "/mong:oetodo": "\u1848",
    "/mong:one": "\u1811",
    "/mong:otodo": "\u1846",
    "/mong:pa": "\u182B",
    "/mong:paaligali": "\u1892",
    "/mong:pasibe": "\u1866",
    "/mong:patodo": "\u184C",
    "/mong:period": "\u1803",
    "/mong:periodmanchu": "\u1809",
    "/mong:phaaligali": "\u1893",
    "/mong:qa": "\u182C",
    "/mong:qatodo": "\u184D",
    "/mong:ra": "\u1837",
    "/mong:raasibe": "\u1870",
    "/mong:ramanchu": "\u1875",
    "/mong:sa": "\u1830",
    "/mong:seven": "\u1817",
    "/mong:sha": "\u1831",
    "/mong:shasibe": "\u1867",
    "/mong:six": "\u1816",
    "/mong:softhyphentodo": "\u1806",
    "/mong:ssaaligali": "\u1894",
    "/mong:ssamanchualigali": "\u18A2",
    "/mong:syllableboundarymarkersibe": "\u1807",
    "/mong:ta": "\u1832",
    "/mong:taaligali": "\u1890",
    "/mong:tamanchualigali": "\u18A0",
    "/mong:tasibe": "\u1868",
    "/mong:tatodo": "\u1850",
    "/mong:tatodoaligali": "\u1898",
    "/mong:three": "\u1813",
    "/mong:tsa": "\u183C",
    "/mong:tsasibe": "\u186E",
    "/mong:tsatodo": "\u1854",
    "/mong:ttaaligali": "\u188C",
    "/mong:ttamanchualigali": "\u189E",
    "/mong:tthaaligali": "\u188D",
    "/mong:two": "\u1812",
    "/mong:u": "\u1824",
    "/mong:ualigalihalf": "\u18A6",
    "/mong:ubadamaaligali": "\u1883",
    "/mong:ubadamaaligaliinverted": "\u1884",
    "/mong:ue": "\u1826",
    "/mong:uesibe": "\u1860",
    "/mong:uetodo": "\u1849",
    "/mong:usibe": "\u1861",
    "/mong:utodo": "\u1847",
    "/mong:visargaonealigali": "\u1881",
    "/mong:vowelseparator": "\u180E",
    "/mong:wa": "\u1838",
    "/mong:watodo": "\u1856",
    "/mong:ya": "\u1836",
    "/mong:yaaligalihalf": "\u18A7",
    "/mong:yatodo": "\u1855",
    "/mong:za": "\u183D",
    "/mong:zaaligali": "\u1896",
    "/mong:zamanchualigali": "\u18A5",
    "/mong:zasibe": "\u186F",
    "/mong:zero": "\u1810",
    "/mong:zhaaligali": "\u1895",
    "/mong:zhamanchu": "\u1877",
    "/mong:zhamanchualigali": "\u18A4",
    "/mong:zhasibe": "\u1872",
    "/mong:zhatodoaligali": "\u1899",
    "/mong:zhi": "\u1841",
    "/mong:zra": "\u183F",
    "/monkey": "\u1F412",
    "/monkeyFace": "\u1F435",
    "/monogramyang": "\u268A",
    "/monogramyin": "\u268B",
    "/monorail": "\u1F69D",
    "/monostable": "\u238D",
    "/moodBubble": "\u1F5F0",
    "/moonViewingCeremony": "\u1F391",
    "/moonideographiccircled": "\u328A",
    "/moonideographicparen": "\u322A",
    "/moonlilithblack": "\u26B8",
    "/mosque": "\u1F54C",
    "/motorBoat": "\u1F6E5",
    "/motorScooter": "\u1F6F5",
    "/motorway": "\u1F6E3",
    "/mountFuji": "\u1F5FB",
    "/mountain": "\u26F0",
    "/mountainBicyclist": "\u1F6B5",
    "/mountainCableway": "\u1F6A0",
    "/mountainRailway": "\u1F69E",
    "/mouse": "\u1F401",
    "/mouseFace": "\u1F42D",
    "/mouth": "\u1F444",
    "/movers2fullwidth": "\u33A8",
    "/moversfullwidth": "\u33A7",
    "/moverssquare": "\u33A7",
    "/moverssquaredsquare": "\u33A8",
    "/movieCamera": "\u1F3A5",
    "/moyai": "\u1F5FF",
    "/mpafullwidth": "\u33AB",
    "/mparen": "\u24A8",
    "/mparenthesized": "\u24A8",
    "/mpasquare": "\u33AB",
    "/msfullwidth": "\u33B3",
    "/mssquare": "\u33B3",
    "/msuperior": "\uF6EF",
    "/mturned": "\u026F",
    "/mu": "\u00B5",
    "/mu.math": "\u00B5",
    "/mu1": "\u00B5",
    "/muafullwidth": "\u3382",
    "/muasquare": "\u3382",
    "/muchgreater": "\u226B",
    "/muchless": "\u226A",
    "/mucirclekatakana": "\u32F0",
    "/muffullwidth": "\u338C",
    "/mufsquare": "\u338C",
    "/mugfullwidth": "\u338D",
    "/mugreek": "\u03BC",
    "/mugsquare": "\u338D",
    "/muhiragana": "\u3080",
    "/mukatakana": "\u30E0",
    "/mukatakanahalfwidth": "\uFF91",
    "/mulfullwidth": "\u3395",
    "/mulsquare": "\u3395",
    "/multimap": "\u22B8",
    "/multimapleft": "\u27DC",
    "/multipleMusicalNotes": "\u1F3B6",
    "/multiply": "\u00D7",
    "/multiset": "\u228C",
    "/multisetmultiplication": "\u228D",
    "/multisetunion": "\u228E",
    "/mum": "\uA773",
    "/mumfullwidth": "\u339B",
    "/mumsquare": "\u339B",
    "/munach:hb": "\u05A3",
    "/munahhebrew": "\u05A3",
    "/munahlefthebrew": "\u05A3",
    "/musfullwidth": "\u33B2",
    "/mushroom": "\u1F344",
    "/musicalKeyboard": "\u1F3B9",
    "/musicalKeyboardJacks": "\u1F398",
    "/musicalNote": "\u1F3B5",
    "/musicalScore": "\u1F3BC",
    "/musicalnote": "\u266A",
    "/musicalnotedbl": "\u266B",
    "/musicflat": "\u266D",
    "/musicflatsign": "\u266D",
    "/musicnatural": "\u266E",
    "/musicsharp": "\u266F",
    "/musicsharpsign": "\u266F",
    "/mussquare": "\u33B2",
    "/muvfullwidth": "\u33B6",
    "/muvsquare": "\u33B6",
    "/muwfullwidth": "\u33BC",
    "/muwsquare": "\u33BC",
    "/mvfullwidth": "\u33B7",
    "/mvmegafullwidth": "\u33B9",
    "/mvmegasquare": "\u33B9",
    "/mvsquare": "\u33B7",
    "/mwfullwidth": "\u33BD",
    "/mwmegafullwidth": "\u33BF",
    "/mwmegasquare": "\u33BF",
    "/mwsquare": "\u33BD",
    "/n": "\u006E",
    "/n.inferior": "\u2099",
    "/n.superior": "\u207F",
    "/nabengali": "\u09A8",
    "/nabla": "\u2207",
    "/nacirclekatakana": "\u32E4",
    "/nacute": "\u0144",
    "/nadeva": "\u0928",
    "/nafullwidth": "\u3381",
    "/nagujarati": "\u0AA8",
    "/nagurmukhi": "\u0A28",
    "/nahiragana": "\u306A",
    "/nailPolish": "\u1F485",
    "/naira": "\u20A6",
    "/nakatakana": "\u30CA",
    "/nakatakanahalfwidth": "\uFF85",
    "/nameBadge": "\u1F4DB",
    "/nameideographiccircled": "\u3294",
    "/nameideographicparen": "\u3234",
    "/namurda": "\uA99F",
    "/nand": "\u22BC",
    "/nanosquare": "\u3328",
    "/napostrophe": "\u0149",
    "/narrownobreakspace": "\u202F",
    "/nasquare": "\u3381",
    "/nationalPark": "\u1F3DE",
    "/nationaldigitshapes": "\u206E",
    "/nbopomofo": "\u310B",
    "/nbspace": "\u00A0",
    "/ncaron": "\u0148",
    "/ncedilla": "\u0146",
    "/ncircle": "\u24DD",
    "/ncircumflexbelow": "\u1E4B",
    "/ncommaaccent": "\u0146",
    "/ncurl": "\u0235",
    "/ndescender": "\uA791",
    "/ndot": "\u1E45",
    "/ndotaccent": "\u1E45",
    "/ndotbelow": "\u1E47",
    "/necirclekatakana": "\u32E7",
    "/necktie": "\u1F454",
    "/negatedturnstiledblverticalbarright": "\u22AF",
    "/nehiragana": "\u306D",
    "/neirapproximatelynoractuallyequal": "\u2247",
    "/neirasersetnorequalup": "\u2289",
    "/neirasubsetnorequal": "\u2288",
    "/neirgreaternorequal": "\u2271",
    "/neirgreaternorequivalent": "\u2275",
    "/neirgreaternorless": "\u2279",
    "/neirlessnorequal": "\u2270",
    "/neirlessnorequivalent": "\u2274",
    "/neirlessnorgreater": "\u2278",
    "/nekatakana": "\u30CD",
    "/nekatakanahalfwidth": "\uFF88",
    "/neptune": "\u2646",
    "/neuter": "\u26B2",
    "/neutralFace": "\u1F610",
    "/newMoon": "\u1F311",
    "/newMoonFace": "\u1F31A",
    "/newsheqel": "\u20AA",
    "/newsheqelsign": "\u20AA",
    "/newspaper": "\u1F4F0",
    "/newsquare": "\u1F195",
    "/nextpage": "\u2398",
    "/nffullwidth": "\u338B",
    "/nfsquare": "\u338B",
    "/ng.fina": "\uFBD4",
    "/ng.init": "\uFBD5",
    "/ng.isol": "\uFBD3",
    "/ng.medi": "\uFBD6",
    "/ngabengali": "\u0999",
    "/ngadeva": "\u0919",
    "/ngagujarati": "\u0A99",
    "/ngagurmukhi": "\u0A19",
    "/ngalelet": "\uA98A",
    "/ngaleletraswadi": "\uA98B",
    "/ngoeh": "\u06B1",
    "/ngoeh.fina": "\uFB9B",
    "/ngoeh.init": "\uFB9C",
    "/ngoeh.isol": "\uFB9A",
    "/ngoeh.medi": "\uFB9D",
    "/ngonguthai": "\u0E07",
    "/ngrave": "\u01F9",
    "/ngsquare": "\u1F196",
    "/nhiragana": "\u3093",
    "/nhookleft": "\u0272",
    "/nhookretroflex": "\u0273",
    "/nicirclekatakana": "\u32E5",
    "/nieunacirclekorean": "\u326F",
    "/nieunaparenkorean": "\u320F",
    "/nieuncieuckorean": "\u3135",
    "/nieuncirclekorean": "\u3261",
    "/nieunhieuhkorean": "\u3136",
    "/nieunkorean": "\u3134",
    "/nieunpansioskorean": "\u3168",
    "/nieunparenkorean": "\u3201",
    "/nieunsioskorean": "\u3167",
    "/nieuntikeutkorean": "\u3166",
    "/nightStars": "\u1F303",
    "/nightideographiccircled": "\u32B0",
    "/nihiragana": "\u306B",
    "/nikatakana": "\u30CB",
    "/nikatakanahalfwidth": "\uFF86",
    "/nikhahitleftthai": "\uF899",
    "/nikhahitthai": "\u0E4D",
    "/nine": "\u0039",
    "/nine.inferior": "\u2089",
    "/nine.roman": "\u2168",
    "/nine.romansmall": "\u2178",
    "/nine.superior": "\u2079",
    "/ninearabic": "\u0669",
    "/ninebengali": "\u09EF",
    "/ninecircle": "\u2468",
    "/ninecircledbl": "\u24FD",
    "/ninecircleinversesansserif": "\u2792",
    "/ninecomma": "\u1F10A",
    "/ninedeva": "\u096F",
    "/ninefar": "\u06F9",
    "/ninegujarati": "\u0AEF",
    "/ninegurmukhi": "\u0A6F",
    "/ninehackarabic": "\u0669",
    "/ninehangzhou": "\u3029",
    "/nineideographiccircled": "\u3288",
    "/nineideographicparen": "\u3228",
    "/nineinferior": "\u2089",
    "/ninemonospace": "\uFF19",
    "/nineoldstyle": "\uF739",
    "/nineparen": "\u247C",
    "/nineparenthesized": "\u247C",
    "/nineperiod": "\u2490",
    "/ninepersian": "\u06F9",
    "/nineroman": "\u2178",
    "/ninesuperior": "\u2079",
    "/nineteencircle": "\u2472",
    "/nineteencircleblack": "\u24F3",
    "/nineteenparen": "\u2486",
    "/nineteenparenthesized": "\u2486",
    "/nineteenperiod": "\u249A",
    "/ninethai": "\u0E59",
    "/nj": "\u01CC",
    "/njecyr": "\u045A",
    "/njecyrillic": "\u045A",
    "/njekomicyr": "\u050B",
    "/nkatakana": "\u30F3",
    "/nkatakanahalfwidth": "\uFF9D",
    "/nlegrightlong": "\u019E",
    "/nlinebelow": "\u1E49",
    "/nlongrightleg": "\u019E",
    "/nmbr:oneeighth": "\u215B",
    "/nmbr:onefifth": "\u2155",
    "/nmbr:onetenth": "\u2152",
    "/nmfullwidth": "\u339A",
    "/nmonospace": "\uFF4E",
    "/nmsquare": "\u339A",
    "/nnabengali": "\u09A3",
    "/nnadeva": "\u0923",
    "/nnagujarati": "\u0AA3",
    "/nnagurmukhi": "\u0A23",
    "/nnnadeva": "\u0929",
    "/noBicycles": "\u1F6B3",
    "/noEntrySign": "\u1F6AB",
    "/noMobilePhones": "\u1F4F5",
    "/noOneUnderEighteen": "\u1F51E",
    "/noPedestrians": "\u1F6B7",
    "/noPiracy": "\u1F572",
    "/noSmoking": "\u1F6AD",
    "/nobliquestroke": "\uA7A5",
    "/nocirclekatakana": "\u32E8",
    "/nodeascending": "\u260A",
    "/nodedescending": "\u260B",
    "/noentry": "\u26D4",
    "/nohiragana": "\u306E",
    "/nokatakana": "\u30CE",
    "/nokatakanahalfwidth": "\uFF89",
    "/nominaldigitshapes": "\u206F",
    "/nonPotableWater": "\u1F6B1",
    "/nonbreakinghyphen": "\u2011",
    "/nonbreakingspace": "\u00A0",
    "/nonenthai": "\u0E13",
    "/nonuthai": "\u0E19",
    "/noon": "\u0646",
    "/noon.fina": "\uFEE6",
    "/noon.init": "\uFEE7",
    "/noon.init_alefmaksura.fina": "\uFC4F",
    "/noon.init_hah.fina": "\uFC4C",
    "/noon.init_hah.medi": "\uFCD3",
    "/noon.init_hah.medi_meem.medi": "\uFD95",
    "/noon.init_heh.medi": "\uFCD6",
    "/noon.init_jeem.fina": "\uFC4B",
    "/noon.init_jeem.medi": "\uFCD2",
    "/noon.init_jeem.medi_hah.medi": "\uFDB8",
    "/noon.init_jeem.medi_meem.medi": "\uFD98",
    "/noon.init_khah.fina": "\uFC4D",
    "/noon.init_khah.medi": "\uFCD4",
    "/noon.init_meem.fina": "\uFC4E",
    "/noon.init_meem.medi": "\uFCD5",
    "/noon.init_yeh.fina": "\uFC50",
    "/noon.isol": "\uFEE5",
    "/noon.medi": "\uFEE8",
    "/noon.medi_alefmaksura.fina": "\uFC8E",
    "/noon.medi_hah.medi_alefmaksura.fina": "\uFD96",
    "/noon.medi_hah.medi_yeh.fina": "\uFDB3",
    "/noon.medi_heh.medi": "\uFCEF",
    "/noon.medi_jeem.medi_alefmaksura.fina": "\uFD99",
    "/noon.medi_jeem.medi_hah.fina": "\uFDBD",
    "/noon.medi_jeem.medi_meem.fina": "\uFD97",
    "/noon.medi_jeem.medi_yeh.fina": "\uFDC7",
    "/noon.medi_meem.fina": "\uFC8C",
    "/noon.medi_meem.medi": "\uFCEE",
    "/noon.medi_meem.medi_alefmaksura.fina": "\uFD9B",
    "/noon.medi_meem.medi_yeh.fina": "\uFD9A",
    "/noon.medi_noon.fina": "\uFC8D",
    "/noon.medi_reh.fina": "\uFC8A",
    "/noon.medi_yeh.fina": "\uFC8F",
    "/noon.medi_zain.fina": "\uFC8B",
    "/noonSmallTah": "\u0768",
    "/noonSmallV": "\u0769",
    "/noonTwoDotsBelow": "\u0767",
    "/noonabove": "\u06E8",
    "/noonarabic": "\u0646",
    "/noondotbelow": "\u06B9",
    "/noonfinalarabic": "\uFEE6",
    "/noonghunna": "\u06BA",
    "/noonghunna.fina": "\uFB9F",
    "/noonghunna.isol": "\uFB9E",
    "/noonghunnaarabic": "\u06BA",
    "/noonghunnafinalarabic": "\uFB9F",
    "/noonhehinitialarabic": "\uFEE7",
    "/nooninitialarabic": "\uFEE7",
    "/noonjeeminitialarabic": "\uFCD2",
    "/noonjeemisolatedarabic": "\uFC4B",
    "/noonmedialarabic": "\uFEE8",
    "/noonmeeminitialarabic": "\uFCD5",
    "/noonmeemisolatedarabic": "\uFC4E",
    "/noonnoonfinalarabic": "\uFC8D",
    "/noonring": "\u06BC",
    "/noonthreedotsabove": "\u06BD",
    "/nor": "\u22BD",
    "/nordicmark": "\u20BB",
    "/normalfacrsemidirectproductleft": "\u22C9",
    "/normalfacrsemidirectproductright": "\u22CA",
    "/normalsubgroorequalup": "\u22B4",
    "/normalsubgroup": "\u22B2",
    "/northeastPointingAirplane": "\u1F6EA",
    "/nose": "\u1F443",
    "/notalmostequal": "\u2249",
    "/notasersetup": "\u2285",
    "/notasympticallyequal": "\u2244",
    "/notcheckmark": "\u237B",
    "/notchedLeftSemicircleThreeDots": "\u1F543",
    "/notchedRightSemicircleThreeDots": "\u1F544",
    "/notcontains": "\u220C",
    "/note": "\u1F5C8",
    "/notePad": "\u1F5CA",
    "/notePage": "\u1F5C9",
    "/notebook": "\u1F4D3",
    "/notebookDecorativeCover": "\u1F4D4",
    "/notelement": "\u2209",
    "/notelementof": "\u2209",
    "/notequal": "\u2260",
    "/notequivalent": "\u226D",
    "/notexistential": "\u2204",
    "/notgreater": "\u226F",
    "/notgreaternorequal": "\u2271",
    "/notgreaternorless": "\u2279",
    "/notidentical": "\u2262",
    "/notless": "\u226E",
    "/notlessnorequal": "\u2270",
    "/notnormalsubgroorequalup": "\u22EC",
    "/notnormalsubgroup": "\u22EA",
    "/notparallel": "\u2226",
    "/notprecedes": "\u2280",
    "/notsignturned": "\u2319",
    "/notsquareimageorequal": "\u22E2",
    "/notsquareoriginalorequal": "\u22E3",
    "/notsubset": "\u2284",
    "/notsucceeds": "\u2281",
    "/notsuperset": "\u2285",
    "/nottilde": "\u2241",
    "/nottosquare": "\u3329",
    "/nottrue": "\u22AD",
    "/novembertelegraph": "\u32CA",
    "/nowarmenian": "\u0576",
    "/nparen": "\u24A9",
    "/nparenthesized": "\u24A9",
    "/nretroflex": "\u0273",
    "/nsfullwidth": "\u33B1",
    "/nssquare": "\u33B1",
    "/nsuperior": "\u207F",
    "/ntilde": "\u00F1",
    "/nu": "\u03BD",
    "/nucirclekatakana": "\u32E6",
    "/nuhiragana": "\u306C",
    "/nukatakana": "\u30CC",
    "/nukatakanahalfwidth": "\uFF87",
    "/nuktabengali": "\u09BC",
    "/nuktadeva": "\u093C",
    "/nuktagujarati": "\u0ABC",
    "/nuktagurmukhi": "\u0A3C",
    "/num": "\uA774",
    "/numbermarkabove": "\u0605",
    "/numbersign": "\u0023",
    "/numbersignmonospace": "\uFF03",
    "/numbersignsmall": "\uFE5F",
    "/numeralsign": "\u0374",
    "/numeralsigngreek": "\u0374",
    "/numeralsignlowergreek": "\u0375",
    "/numero": "\u2116",
    "/nun": "\u05E0",
    "/nun:hb": "\u05E0",
    "/nunHafukha:hb": "\u05C6",
    "/nundagesh": "\uFB40",
    "/nundageshhebrew": "\uFB40",
    "/nunhebrew": "\u05E0",
    "/nunwithdagesh:hb": "\uFB40",
    "/nutAndBolt": "\u1F529",
    "/nvfullwidth": "\u33B5",
    "/nvsquare": "\u33B5",
    "/nwfullwidth": "\u33BB",
    "/nwsquare": "\u33BB",
    "/nyabengali": "\u099E",
    "/nyadeva": "\u091E",
    "/nyagujarati": "\u0A9E",
    "/nyagurmukhi": "\u0A1E",
    "/nyamurda": "\uA998",
    "/nyeh": "\u0683",
    "/nyeh.fina": "\uFB77",
    "/nyeh.init": "\uFB78",
    "/nyeh.isol": "\uFB76",
    "/nyeh.medi": "\uFB79",
    "/o": "\u006F",
    "/o.inferior": "\u2092",
    "/oacute": "\u00F3",
    "/oangthai": "\u0E2D",
    "/obarcyr": "\u04E9",
    "/obardieresiscyr": "\u04EB",
    "/obarred": "\u0275",
    "/obarredcyrillic": "\u04E9",
    "/obarreddieresiscyrillic": "\u04EB",
    "/obelosdotted": "\u2E13",
    "/obengali": "\u0993",
    "/obopomofo": "\u311B",
    "/obreve": "\u014F",
    "/observereye": "\u23FF",
    "/ocandradeva": "\u0911",
    "/ocandragujarati": "\u0A91",
    "/ocandravowelsigndeva": "\u0949",
    "/ocandravowelsigngujarati": "\u0AC9",
    "/ocaron": "\u01D2",
    "/ocircle": "\u24DE",
    "/ocirclekatakana": "\u32D4",
    "/ocircumflex": "\u00F4",
    "/ocircumflexacute": "\u1ED1",
    "/ocircumflexdotbelow": "\u1ED9",
    "/ocircumflexgrave": "\u1ED3",
    "/ocircumflexhoi": "\u1ED5",
    "/ocircumflexhookabove": "\u1ED5",
    "/ocircumflextilde": "\u1ED7",
    "/ocr:bowtie": "\u2445",
    "/ocr:dash": "\u2448",
    "/octagonalSign": "\u1F6D1",
    "/octobertelegraph": "\u32C9",
    "/octopus": "\u1F419",
    "/ocyr": "\u043E",
    "/ocyrillic": "\u043E",
    "/odblacute": "\u0151",
    "/odblgrave": "\u020D",
    "/oden": "\u1F362",
    "/odeva": "\u0913",
    "/odieresis": "\u00F6",
    "/odieresiscyr": "\u04E7",
    "/odieresiscyrillic": "\u04E7",
    "/odieresismacron": "\u022B",
    "/odot": "\u022F",
    "/odotbelow": "\u1ECD",
    "/odotmacron": "\u0231",
    "/oe": "\u0153",
    "/oe.fina": "\uFBDA",
    "/oe.isol": "\uFBD9",
    "/oekirghiz": "\u06C5",
    "/oekirghiz.fina": "\uFBE1",
    "/oekirghiz.isol": "\uFBE0",
    "/oekorean": "\u315A",
    "/officeBuilding": "\u1F3E2",
    "/ogonek": "\u02DB",
    "/ogonekcmb": "\u0328",
    "/ograve": "\u00F2",
    "/ogravedbl": "\u020D",
    "/ogujarati": "\u0A93",
    "/oharmenian": "\u0585",
    "/ohiragana": "\u304A",
    "/ohm": "\u2126",
    "/ohminverted": "\u2127",
    "/ohoi": "\u1ECF",
    "/ohookabove": "\u1ECF",
    "/ohorn": "\u01A1",
    "/ohornacute": "\u1EDB",
    "/ohorndotbelow": "\u1EE3",
    "/ohorngrave": "\u1EDD",
    "/ohornhoi": "\u1EDF",
    "/ohornhookabove": "\u1EDF",
    "/ohorntilde": "\u1EE1",
    "/ohungarumlaut": "\u0151",
    "/ohuparen": "\u321E",
    "/oi": "\u01A3",
    "/oilDrum": "\u1F6E2",
    "/oinvertedbreve": "\u020F",
    "/ojeonparen": "\u321D",
    "/okHandSign": "\u1F44C",
    "/okatakana": "\u30AA",
    "/okatakanahalfwidth": "\uFF75",
    "/okorean": "\u3157",
    "/oksquare": "\u1F197",
    "/oldKey": "\u1F5DD",
    "/oldPersonalComputer": "\u1F5B3",
    "/olderMan": "\u1F474",
    "/olderWoman": "\u1F475",
    "/ole:hb": "\u05AB",
    "/olehebrew": "\u05AB",
    "/oloop": "\uA74D",
    "/olowringinside": "\u2C7A",
    "/omacron": "\u014D",
    "/omacronacute": "\u1E53",
    "/omacrongrave": "\u1E51",
    "/omdeva": "\u0950",
    "/omega": "\u03C9",
    "/omega1": "\u03D6",
    "/omegaacute": "\u1F7D",
    "/omegaacuteiotasub": "\u1FF4",
    "/omegaasper": "\u1F61",
    "/omegaasperacute": "\u1F65",
    "/omegaasperacuteiotasub": "\u1FA5",
    "/omegaaspergrave": "\u1F63",
    "/omegaaspergraveiotasub": "\u1FA3",
    "/omegaasperiotasub": "\u1FA1",
    "/omegaaspertilde": "\u1F67",
    "/omegaaspertildeiotasub": "\u1FA7",
    "/omegaclosed": "\u0277",
    "/omegacyr": "\u0461",
    "/omegacyrillic": "\u0461",
    "/omegafunc": "\u2375",
    "/omegagrave": "\u1F7C",
    "/omegagraveiotasub": "\u1FF2",
    "/omegaiotasub": "\u1FF3",
    "/omegalatinclosed": "\u0277",
    "/omegalenis": "\u1F60",
    "/omegalenisacute": "\u1F64",
    "/omegalenisacuteiotasub": "\u1FA4",
    "/omegalenisgrave": "\u1F62",
    "/omegalenisgraveiotasub": "\u1FA2",
    "/omegalenisiotasub": "\u1FA0",
    "/omegalenistilde": "\u1F66",
    "/omegalenistildeiotasub": "\u1FA6",
    "/omegaroundcyr": "\u047B",
    "/omegaroundcyrillic": "\u047B",
    "/omegatilde": "\u1FF6",
    "/omegatildeiotasub": "\u1FF7",
    "/omegatitlocyr": "\u047D",
    "/omegatitlocyrillic": "\u047D",
    "/omegatonos": "\u03CE",
    "/omegaunderlinefunc": "\u2379",
    "/omgujarati": "\u0AD0",
    "/omicron": "\u03BF",
    "/omicronacute": "\u1F79",
    "/omicronasper": "\u1F41",
    "/omicronasperacute": "\u1F45",
    "/omicronaspergrave": "\u1F43",
    "/omicrongrave": "\u1F78",
    "/omicronlenis": "\u1F40",
    "/omicronlenisacute": "\u1F44",
    "/omicronlenisgrave": "\u1F42",
    "/omicrontonos": "\u03CC",
    "/omonospace": "\uFF4F",
    "/onExclamationMarkLeftRightArrowAbove": "\u1F51B",
    "/oncomingAutomobile": "\u1F698",
    "/oncomingBus": "\u1F68D",
    "/oncomingFireEngine": "\u1F6F1",
    "/oncomingPoliceCar": "\u1F694",
    "/oncomingTaxi": "\u1F696",
    "/one": "\u0031",
    "/one.inferior": "\u2081",
    "/one.roman": "\u2160",
    "/one.romansmall": "\u2170",
    "/oneButtonMouse": "\u1F5AF",
    "/onearabic": "\u0661",
    "/onebengali": "\u09E7",
    "/onecircle": "\u2460",
    "/onecircledbl": "\u24F5",
    "/onecircleinversesansserif": "\u278A",
    "/onecomma": "\u1F102",
    "/onedeva": "\u0967",
    "/onedotenleader": "\u2024",
    "/onedotovertwodots": "\u2E2B",
    "/oneeighth": "\u215B",
    "/onefar": "\u06F1",
    "/onefitted": "\uF6DC",
    "/onefraction": "\u215F",
    "/onegujarati": "\u0AE7",
    "/onegurmukhi": "\u0A67",
    "/onehackarabic": "\u0661",
    "/onehalf": "\u00BD",
    "/onehangzhou": "\u3021",
    "/onehundred.roman": "\u216D",
    "/onehundred.romansmall": "\u217D",
    "/onehundredthousand.roman": "\u2188",
    "/onehundredtwentypsquare": "\u1F1A4",
    "/oneideographiccircled": "\u3280",
    "/oneideographicparen": "\u3220",
    "/oneinferior": "\u2081",
    "/onemonospace": "\uFF11",
    "/oneninth": "\u2151",
    "/onenumeratorbengali": "\u09F4",
    "/oneoldstyle": "\uF731",
    "/oneparen": "\u2474",
    "/oneparenthesized": "\u2474",
    "/oneperiod": "\u2488",
    "/onepersian": "\u06F1",
    "/onequarter": "\u00BC",
    "/oneroman": "\u2170",
    "/oneseventh": "\u2150",
    "/onesixth": "\u2159",
    "/onesuperior": "\u00B9",
    "/onethai": "\u0E51",
    "/onethird": "\u2153",
    "/onethousand.roman": "\u216F",
    "/onethousand.romansmall": "\u217F",
    "/onethousandcd.roman": "\u2180",
    "/onsusquare": "\u3309",
    "/oo": "\uA74F",
    "/oogonek": "\u01EB",
    "/oogonekmacron": "\u01ED",
    "/oogurmukhi": "\u0A13",
    "/oomatragurmukhi": "\u0A4B",
    "/oomusquare": "\u330A",
    "/oopen": "\u0254",
    "/oparen": "\u24AA",
    "/oparenthesized": "\u24AA",
    "/openBook": "\u1F4D6",
    "/openFileFolder": "\u1F4C2",
    "/openFolder": "\u1F5C1",
    "/openHandsSign": "\u1F450",
    "/openLock": "\u1F513",
    "/openMailboxLoweredFlag": "\u1F4ED",
    "/openMailboxRaisedFlag": "\u1F4EC",
    "/openbullet": "\u25E6",
    "/openheadarrowleft": "\u21FD",
    "/openheadarrowleftright": "\u21FF",
    "/openheadarrowright": "\u21FE",
    "/opensubset": "\u27C3",
    "/opensuperset": "\u27C4",
    "/ophiuchus": "\u26CE",
    "/opposition": "\u260D",
    "/opticalDisc": "\u1F4BF",
    "/opticalDiscIcon": "\u1F5B8",
    "/option": "\u2325",
    "/orangeBook": "\u1F4D9",
    "/ordfeminine": "\u00AA",
    "/ordmasculine": "\u00BA",
    "/ordotinside": "\u27C7",
    "/original": "\u22B6",
    "/ornateleftparenthesis": "\uFD3E",
    "/ornaterightparenthesis": "\uFD3F",
    "/orthodoxcross": "\u2626",
    "/orthogonal": "\u221F",
    "/orya:a": "\u0B05",
    "/orya:aa": "\u0B06",
    "/orya:aasign": "\u0B3E",
    "/orya:ai": "\u0B10",
    "/orya:ailengthmark": "\u0B56",
    "/orya:aisign": "\u0B48",
    "/orya:anusvara": "\u0B02",
    "/orya:au": "\u0B14",
    "/orya:aulengthmark": "\u0B57",
    "/orya:ausign": "\u0B4C",
    "/orya:avagraha": "\u0B3D",
    "/orya:ba": "\u0B2C",
    "/orya:bha": "\u0B2D",
    "/orya:ca": "\u0B1A",
    "/orya:candrabindu": "\u0B01",
    "/orya:cha": "\u0B1B",
    "/orya:da": "\u0B26",
    "/orya:dda": "\u0B21",
    "/orya:ddha": "\u0B22",
    "/orya:dha": "\u0B27",
    "/orya:e": "\u0B0F",
    "/orya:eight": "\u0B6E",
    "/orya:esign": "\u0B47",
    "/orya:five": "\u0B6B",
    "/orya:four": "\u0B6A",
    "/orya:fractiononeeighth": "\u0B76",
    "/orya:fractiononehalf": "\u0B73",
    "/orya:fractiononequarter": "\u0B72",
    "/orya:fractiononesixteenth": "\u0B75",
    "/orya:fractionthreequarters": "\u0B74",
    "/orya:fractionthreesixteenths": "\u0B77",
    "/orya:ga": "\u0B17",
    "/orya:gha": "\u0B18",
    "/orya:ha": "\u0B39",
    "/orya:i": "\u0B07",
    "/orya:ii": "\u0B08",
    "/orya:iisign": "\u0B40",
    "/orya:isign": "\u0B3F",
    "/orya:isshar": "\u0B70",
    "/orya:ja": "\u0B1C",
    "/orya:jha": "\u0B1D",
    "/orya:ka": "\u0B15",
    "/orya:kha": "\u0B16",
    "/orya:la": "\u0B32",
    "/orya:lla": "\u0B33",
    "/orya:llvocal": "\u0B61",
    "/orya:llvocalsign": "\u0B63",
    "/orya:lvocal": "\u0B0C",
    "/orya:lvocalsign": "\u0B62",
    "/orya:ma": "\u0B2E",
    "/orya:na": "\u0B28",
    "/orya:nga": "\u0B19",
    "/orya:nine": "\u0B6F",
    "/orya:nna": "\u0B23",
    "/orya:nukta": "\u0B3C",
    "/orya:nya": "\u0B1E",
    "/orya:o": "\u0B13",
    "/orya:one": "\u0B67",
    "/orya:osign": "\u0B4B",
    "/orya:pa": "\u0B2A",
    "/orya:pha": "\u0B2B",
    "/orya:ra": "\u0B30",
    "/orya:rha": "\u0B5D",
    "/orya:rra": "\u0B5C",
    "/orya:rrvocal": "\u0B60",
    "/orya:rrvocalsign": "\u0B44",
    "/orya:rvocal": "\u0B0B",
    "/orya:rvocalsign": "\u0B43",
    "/orya:sa": "\u0B38",
    "/orya:seven": "\u0B6D",
    "/orya:sha": "\u0B36",
    "/orya:six": "\u0B6C",
    "/orya:ssa": "\u0B37",
    "/orya:ta": "\u0B24",
    "/orya:tha": "\u0B25",
    "/orya:three": "\u0B69",
    "/orya:tta": "\u0B1F",
    "/orya:ttha": "\u0B20",
    "/orya:two": "\u0B68",
    "/orya:u": "\u0B09",
    "/orya:usign": "\u0B41",
    "/orya:uu": "\u0B0A",
    "/orya:uusign": "\u0B42",
    "/orya:va": "\u0B35",
    "/orya:virama": "\u0B4D",
    "/orya:visarga": "\u0B03",
    "/orya:wa": "\u0B71",
    "/orya:ya": "\u0B2F",
    "/orya:yya": "\u0B5F",
    "/orya:zero": "\u0B66",
    "/oscript": "\u2134",
    "/oshortdeva": "\u0912",
    "/oshortvowelsigndeva": "\u094A",
    "/oslash": "\u00F8",
    "/oslashacute": "\u01FF",
    "/osmallhiragana": "\u3049",
    "/osmallkatakana": "\u30A9",
    "/osmallkatakanahalfwidth": "\uFF6B",
    "/ostroke": "\uA74B",
    "/ostrokeacute": "\u01FF",
    "/osuperior": "\uF6F0",
    "/otcyr": "\u047F",
    "/otcyrillic": "\u047F",
    "/otilde": "\u00F5",
    "/otildeacute": "\u1E4D",
    "/otildedieresis": "\u1E4F",
    "/otildemacron": "\u022D",
    "/ou": "\u0223",
    "/oubopomofo": "\u3121",
    "/ounce": "\u2125",
    "/outboxTray": "\u1F4E4",
    "/outerjoinfull": "\u27D7",
    "/outerjoinleft": "\u27D5",
    "/outerjoinright": "\u27D6",
    "/outputpassiveup": "\u2392",
    "/overlap": "\u1F5D7",
    "/overline": "\u203E",
    "/overlinecenterline": "\uFE4A",
    "/overlinecmb": "\u0305",
    "/overlinedashed": "\uFE49",
    "/overlinedblwavy": "\uFE4C",
    "/overlinewavy": "\uFE4B",
    "/overscore": "\u00AF",
    "/ovfullwidth": "\u3375",
    "/ovowelsignbengali": "\u09CB",
    "/ovowelsigndeva": "\u094B",
    "/ovowelsigngujarati": "\u0ACB",
    "/ox": "\u1F402",
    "/p": "\u0070",
    "/p.inferior": "\u209A",
    "/paampsfullwidth": "\u3380",
    "/paampssquare": "\u3380",
    "/paasentosquare": "\u332B",
    "/paatusquare": "\u332C",
    "/pabengali": "\u09AA",
    "/pacerek": "\uA989",
    "/package": "\u1F4E6",
    "/pacute": "\u1E55",
    "/padeva": "\u092A",
    "/pafullwidth": "\u33A9",
    "/page": "\u1F5CF",
    "/pageCircledText": "\u1F5DF",
    "/pageCurl": "\u1F4C3",
    "/pageFacingUp": "\u1F4C4",
    "/pagedown": "\u21DF",
    "/pager": "\u1F4DF",
    "/pages": "\u1F5D0",
    "/pageup": "\u21DE",
    "/pagoda": "\u1F6D4",
    "/pagujarati": "\u0AAA",
    "/pagurmukhi": "\u0A2A",
    "/pahiragana": "\u3071",
    "/paiyannoithai": "\u0E2F",
    "/pakatakana": "\u30D1",
    "/palatalizationcyrilliccmb": "\u0484",
    "/palatcmbcyr": "\u0484",
    "/pallas": "\u26B4",
    "/palmTree": "\u1F334",
    "/palmbranch": "\u2E19",
    "/palochkacyr": "\u04CF",
    "/palochkacyrillic": "\u04C0",
    "/pamurda": "\uA9A6",
    "/pandaFace": "\u1F43C",
    "/pangkatpada": "\uA9C7",
    "/pangkon": "\uA9C0",
    "/pangrangkep": "\uA9CF",
    "/pansioskorean": "\u317F",
    "/panyangga": "\uA980",
    "/paperclip": "\u1F4CE",
    "/paragraph": "\u00B6",
    "/paragraphos": "\u2E0F",
    "/paragraphosforked": "\u2E10",
    "/paragraphosforkedreversed": "\u2E11",
    "/paragraphseparator": "\u2029",
    "/parallel": "\u2225",
    "/parallelogramblack": "\u25B0",
    "/parallelogramwhite": "\u25B1",
    "/parenbottom": "\u23DD",
    "/parendblleft": "\u2E28",
    "/parendblright": "\u2E29",
    "/parenextensionleft": "\u239C",
    "/parenextensionright": "\u239F",
    "/parenflatleft": "\u27EE",
    "/parenflatright": "\u27EF",
    "/parenhookupleft": "\u239B",
    "/parenhookupright": "\u239E",
    "/parenleft": "\u0028",
    "/parenleft.inferior": "\u208D",
    "/parenleft.superior": "\u207D",
    "/parenleftaltonearabic": "\uFD3E",
    "/parenleftbt": "\uF8ED",
    "/parenleftex": "\uF8EC",
    "/parenleftinferior": "\u208D",
    "/parenleftmonospace": "\uFF08",
    "/parenleftsmall": "\uFE59",
    "/parenleftsuperior": "\u207D",
    "/parenlefttp": "\uF8EB",
    "/parenleftvertical": "\uFE35",
    "/parenlowerhookleft": "\u239D",
    "/parenlowerhookright": "\u23A0",
    "/parenright": "\u0029",
    "/parenright.inferior": "\u208E",
    "/parenright.superior": "\u207E",
    "/parenrightaltonearabic": "\uFD3F",
    "/parenrightbt": "\uF8F8",
    "/parenrightex": "\uF8F7",
    "/parenrightinferior": "\u208E",
    "/parenrightmonospace": "\uFF09",
    "/parenrightsmall": "\uFE5A",
    "/parenrightsuperior": "\u207E",
    "/parenrighttp": "\uF8F6",
    "/parenrightvertical": "\uFE36",
    "/parentop": "\u23DC",
    "/partalternationmark": "\u303D",
    "/partialdiff": "\u2202",
    "/partnership": "\u3250",
    "/partyPopper": "\u1F389",
    "/paseq:hb": "\u05C0",
    "/paseqhebrew": "\u05C0",
    "/pashta:hb": "\u0599",
    "/pashtahebrew": "\u0599",
    "/pasquare": "\u33A9",
    "/passengerShip": "\u1F6F3",
    "/passivedown": "\u2391",
    "/passportControl": "\u1F6C2",
    "/patah": "\u05B7",
    "/patah11": "\u05B7",
    "/patah1d": "\u05B7",
    "/patah2a": "\u05B7",
    "/patah:hb": "\u05B7",
    "/patahhebrew": "\u05B7",
    "/patahnarrowhebrew": "\u05B7",
    "/patahquarterhebrew": "\u05B7",
    "/patahwidehebrew": "\u05B7",
    "/pawPrints": "\u1F43E",
    "/pawnblack": "\u265F",
    "/pawnwhite": "\u2659",
    "/pazer:hb": "\u05A1",
    "/pazerhebrew": "\u05A1",
    "/pbopomofo": "\u3106",
    "/pcfullwidth": "\u3376",
    "/pcircle": "\u24DF",
    "/pdot": "\u1E57",
    "/pdotaccent": "\u1E57",
    "/pe": "\u05E4",
    "/pe:hb": "\u05E4",
    "/peace": "\u262E",
    "/peach": "\u1F351",
    "/pear": "\u1F350",
    "/pecyr": "\u043F",
    "/pecyrillic": "\u043F",
    "/pedagesh": "\uFB44",
    "/pedageshhebrew": "\uFB44",
    "/pedestrian": "\u1F6B6",
    "/peezisquare": "\u333B",
    "/pefinaldageshhebrew": "\uFB43",
    "/peh.fina": "\uFB57",
    "/peh.init": "\uFB58",
    "/peh.isol": "\uFB56",
    "/peh.medi": "\uFB59",
    "/peharabic": "\u067E",
    "/peharmenian": "\u057A",
    "/pehebrew": "\u05E4",
    "/peheh": "\u06A6",
    "/peheh.fina": "\uFB6F",
    "/peheh.init": "\uFB70",
    "/peheh.isol": "\uFB6E",
    "/peheh.medi": "\uFB71",
    "/pehfinalarabic": "\uFB57",
    "/pehinitialarabic": "\uFB58",
    "/pehiragana": "\u307A",
    "/pehmedialarabic": "\uFB59",
    "/pehookcyr": "\u04A7",
    "/pekatakana": "\u30DA",
    "/pemiddlehookcyrillic": "\u04A7",
    "/penOverStampedEnvelope": "\u1F586",
    "/pengkalconsonant": "\uA9BE",
    "/penguin": "\u1F427",
    "/penihisquare": "\u3338",
    "/pensiveFace": "\u1F614",
    "/pensusquare": "\u333A",
    "/pentagram": "\u26E4",
    "/pentasememetrical": "\u23D9",
    "/pepetvowel": "\uA9BC",
    "/per": "\u214C",
    "/perafehebrew": "\uFB4E",
    "/percent": "\u0025",
    "/percentarabic": "\u066A",
    "/percentmonospace": "\uFF05",
    "/percentsmall": "\uFE6A",
    "/percussivebidental": "\u02AD",
    "/percussivebilabial": "\u02AC",
    "/performingArts": "\u1F3AD",
    "/period": "\u002E",
    "/periodarmenian": "\u0589",
    "/periodcentered": "\u00B7",
    "/periodhalfwidth": "\uFF61",
    "/periodinferior": "\uF6E7",
    "/periodmonospace": "\uFF0E",
    "/periodsmall": "\uFE52",
    "/periodsuperior": "\uF6E8",
    "/periodurdu": "\u06D4",
    "/perispomenigreekcmb": "\u0342",
    "/permanentpaper": "\u267E",
    "/permille": "\u0609",
    "/perpendicular": "\u22A5",
    "/perseveringFace": "\u1F623",
    "/personBlondHair": "\u1F471",
    "/personBowingDeeply": "\u1F647",
    "/personFrowning": "\u1F64D",
    "/personRaisingBothHandsInCelebration": "\u1F64C",
    "/personWithFoldedHands": "\u1F64F",
    "/personWithPoutingFace": "\u1F64E",
    "/personalComputer": "\u1F4BB",
    "/personball": "\u26F9",
    "/perspective": "\u2306",
    "/pertenthousandsign": "\u2031",
    "/perthousand": "\u2030",
    "/peseta": "\u20A7",
    "/peso": "\u20B1",
    "/pesosquare": "\u3337",
    "/petailcyr": "\u0525",
    "/pewithdagesh:hb": "\uFB44",
    "/pewithrafe:hb": "\uFB4E",
    "/pffullwidth": "\u338A",
    "/pflourish": "\uA753",
    "/pfsquare": "\u338A",
    "/phabengali": "\u09AB",
    "/phadeva": "\u092B",
    "/phagujarati": "\u0AAB",
    "/phagurmukhi": "\u0A2B",
    "/pharyngealvoicedfricative": "\u0295",
    "/phfullwidth": "\u33D7",
    "/phi": "\u03C6",
    "/phi.math": "\u03D5",
    "/phi1": "\u03D5",
    "/phieuphacirclekorean": "\u327A",
    "/phieuphaparenkorean": "\u321A",
    "/phieuphcirclekorean": "\u326C",
    "/phieuphkorean": "\u314D",
    "/phieuphparenkorean": "\u320C",
    "/philatin": "\u0278",
    "/phinthuthai": "\u0E3A",
    "/phisymbolgreek": "\u03D5",
    "/phitailless": "\u2C77",
    "/phon:AEsmall": "\u1D01",
    "/phon:Aemod": "\u1D2D",
    "/phon:Amod": "\u1D2C",
    "/phon:Asmall": "\u1D00",
    "/phon:Bbarmod": "\u1D2F",
    "/phon:Bbarsmall": "\u1D03",
    "/phon:Bmod": "\u1D2E",
    "/phon:Csmall": "\u1D04",
    "/phon:Dmod": "\u1D30",
    "/phon:Dsmall": "\u1D05",
    "/phon:ENcyrmod": "\u1D78",
    "/phon:Elsmallcyr": "\u1D2B",
    "/phon:Emod": "\u1D31",
    "/phon:Ereversedmod": "\u1D32",
    "/phon:Esmall": "\u1D07",
    "/phon:Ethsmall": "\u1D06",
    "/phon:Ezhsmall": "\u1D23",
    "/phon:Gmod": "\u1D33",
    "/phon:Hmod": "\u1D34",
    "/phon:Imod": "\u1D35",
    "/phon:Ismallmod": "\u1DA6",
    "/phon:Ismallstroke": "\u1D7B",
    "/phon:Istrokesmallmod": "\u1DA7",
    "/phon:Jmod": "\u1D36",
    "/phon:Jsmall": "\u1D0A",
    "/phon:Kmod": "\u1D37",
    "/phon:Ksmall": "\u1D0B",
    "/phon:Lmod": "\u1D38",
    "/phon:Lsmallmod": "\u1DAB",
    "/phon:Lsmallstroke": "\u1D0C",
    "/phon:Mmod": "\u1D39",
    "/phon:Msmall": "\u1D0D",
    "/phon:Nmod": "\u1D3A",
    "/phon:Nreversedmod": "\u1D3B",
    "/phon:Nsmallmod": "\u1DB0",
    "/phon:Nsmallreversed": "\u1D0E",
    "/phon:OUsmall": "\u1D15",
    "/phon:Omod": "\u1D3C",
    "/phon:Oopensmall": "\u1D10",
    "/phon:Osmall": "\u1D0F",
    "/phon:Oumod": "\u1D3D",
    "/phon:Pmod": "\u1D3E",
    "/phon:Psmall": "\u1D18",
    "/phon:Rmod": "\u1D3F",
    "/phon:Rsmallreversed": "\u1D19",
    "/phon:Rsmallturned": "\u1D1A",
    "/phon:Tmod": "\u1D40",
    "/phon:Tsmall": "\u1D1B",
    "/phon:Umod": "\u1D41",
    "/phon:Usmall": "\u1D1C",
    "/phon:Usmallmod": "\u1DB8",
    "/phon:Usmallstroke": "\u1D7E",
    "/phon:Vsmall": "\u1D20",
    "/phon:Wmod": "\u1D42",
    "/phon:Wsmall": "\u1D21",
    "/phon:Zsmall": "\u1D22",
    "/phon:aeturned": "\u1D02",
    "/phon:aeturnedmod": "\u1D46",
    "/phon:ain": "\u1D25",
    "/phon:ainmod": "\u1D5C",
    "/phon:alphamod": "\u1D45",
    "/phon:alpharetroflexhook": "\u1D90",
    "/phon:alphaturnedmod": "\u1D9B",
    "/phon:amod": "\u1D43",
    "/phon:aretroflexhook": "\u1D8F",
    "/phon:aturnedmod": "\u1D44",
    "/phon:betamod": "\u1D5D",
    "/phon:bmiddletilde": "\u1D6C",
    "/phon:bmod": "\u1D47",
    "/phon:bpalatalhook": "\u1D80",
    "/phon:ccurlmod": "\u1D9D",
    "/phon:chimod": "\u1D61",
    "/phon:cmod": "\u1D9C",
    "/phon:deltamod": "\u1D5F",
    "/phon:dhooktail": "\u1D91",
    "/phon:dmiddletilde": "\u1D6D",
    "/phon:dmod": "\u1D48",
    "/phon:dotlessjstrokemod": "\u1DA1",
    "/phon:dpalatalhook": "\u1D81",
    "/phon:emod": "\u1D49",
    "/phon:engmod": "\u1D51",
    "/phon:eopenmod": "\u1D4B",
    "/phon:eopenretroflexhook": "\u1D93",
    "/phon:eopenreversedmod": "\u1D9F",
    "/phon:eopenreversedretroflexhook": "\u1D94",
    "/phon:eopenturned": "\u1D08",
    "/phon:eopenturnedmod": "\u1D4C",
    "/phon:eretroflexhook": "\u1D92",
    "/phon:eshmod": "\u1DB4",
    "/phon:eshpalatalhook": "\u1D8B",
    "/phon:eshretroflexhook": "\u1D98",
    "/phon:ethmod": "\u1D9E",
    "/phon:ezhmod": "\u1DBE",
    "/phon:ezhretroflexhook": "\u1D9A",
    "/phon:fmiddletilde": "\u1D6E",
    "/phon:fmod": "\u1DA0",
    "/phon:fpalatalhook": "\u1D82",
    "/phon:ginsular": "\u1D79",
    "/phon:gmod": "\u1D4D",
    "/phon:gpalatalhook": "\u1D83",
    "/phon:gr:Gammasmall": "\u1D26",
    "/phon:gr:Lambdasmall": "\u1D27",
    "/phon:gr:Pismall": "\u1D28",
    "/phon:gr:Psismall": "\u1D2A",
    "/phon:gr:RsmallHO": "\u1D29",
    "/phon:gr:betasubscript": "\u1D66",
    "/phon:gr:chisubscript": "\u1D6A",
    "/phon:gr:gammamod": "\u1D5E",
    "/phon:gr:gammasubscript": "\u1D67",
    "/phon:gr:phimod": "\u1D60",
    "/phon:gr:phisubscript": "\u1D69",
    "/phon:gr:rhosubscript": "\u1D68",
    "/phon:gscriptmod": "\u1DA2",
    "/phon:gturned": "\u1D77",
    "/phon:hturnedmod": "\u1DA3",
    "/phon:iotamod": "\u1DA5",
    "/phon:iotastroke": "\u1D7C",
    "/phon:iretroflexhook": "\u1D96",
    "/phon:istrokemod": "\u1DA4",
    "/phon:isubscript": "\u1D62",
    "/phon:iturned": "\u1D09",
    "/phon:iturnedmod": "\u1D4E",
    "/phon:jcrossedtailmod": "\u1DA8",
    "/phon:kmod": "\u1D4F",
    "/phon:kpalatalhook": "\u1D84",
    "/phon:lpalatalhook": "\u1D85",
    "/phon:lpalatalhookmod": "\u1DAA",
    "/phon:lretroflexhookmod": "\u1DA9",
    "/phon:mhookmod": "\u1DAC",
    "/phon:mlonglegturnedmod": "\u1DAD",
    "/phon:mmiddletilde": "\u1D6F",
    "/phon:mmod": "\u1D50",
    "/phon:mpalatalhook": "\u1D86",
    "/phon:mturnedmod": "\u1D5A",
    "/phon:mturnedsideways": "\u1D1F",
    "/phon:nlefthookmod": "\u1DAE",
    "/phon:nmiddletilde": "\u1D70",
    "/phon:npalatalhook": "\u1D87",
    "/phon:nretroflexhookmod": "\u1DAF",
    "/phon:obarmod": "\u1DB1",
    "/phon:obottomhalf": "\u1D17",
    "/phon:obottomhalfmod": "\u1D55",
    "/phon:oeturned": "\u1D14",
    "/phon:omod": "\u1D52",
    "/phon:oopenmod": "\u1D53",
    "/phon:oopenretroflexhook": "\u1D97",
    "/phon:oopensideways": "\u1D12",
    "/phon:osideways": "\u1D11",
    "/phon:ostrokesideways": "\u1D13",
    "/phon:otophalf": "\u1D16",
    "/phon:otophalfmod": "\u1D54",
    "/phon:phimod": "\u1DB2",
    "/phon:pmiddletilde": "\u1D71",
    "/phon:pmod": "\u1D56",
    "/phon:ppalatalhook": "\u1D88",
    "/phon:pstroke": "\u1D7D",
    "/phon:rfishmiddletilde": "\u1D73",
    "/phon:rmiddletilde": "\u1D72",
    "/phon:rpalatalhook": "\u1D89",
    "/phon:rsubscript": "\u1D63",
    "/phon:schwamod": "\u1D4A",
    "/phon:schwaretroflexhook": "\u1D95",
    "/phon:shookmod": "\u1DB3",
    "/phon:smiddletilde": "\u1D74",
    "/phon:spalatalhook": "\u1D8A",
    "/phon:spirantvoicedlaryngeal": "\u1D24",
    "/phon:thetamod": "\u1DBF",
    "/phon:thstrike": "\u1D7A",
    "/phon:tmiddletilde": "\u1D75",
    "/phon:tmod": "\u1D57",
    "/phon:tpalatalhookmod": "\u1DB5",
    "/phon:ubarmod": "\u1DB6",
    "/phon:ue": "\u1D6B",
    "/phon:umod": "\u1D58",
    "/phon:upsilonmod": "\u1DB7",
    "/phon:upsilonstroke": "\u1D7F",
    "/phon:uretroflexhook": "\u1D99",
    "/phon:usideways": "\u1D1D",
    "/phon:usidewaysdieresised": "\u1D1E",
    "/phon:usidewaysmod": "\u1D59",
    "/phon:usubscript": "\u1D64",
    "/phon:vhookmod": "\u1DB9",
    "/phon:vmod": "\u1D5B",
    "/phon:vpalatalhook": "\u1D8C",
    "/phon:vsubscript": "\u1D65",
    "/phon:vturnedmod": "\u1DBA",
    "/phon:xpalatalhook": "\u1D8D",
    "/phon:zcurlmod": "\u1DBD",
    "/phon:zmiddletilde": "\u1D76",
    "/phon:zmod": "\u1DBB",
    "/phon:zpalatalhook": "\u1D8E",
    "/phon:zretroflexhookmod": "\u1DBC",
    "/phook": "\u01A5",
    "/phophanthai": "\u0E1E",
    "/phophungthai": "\u0E1C",
    "/phosamphaothai": "\u0E20",
    "/pi": "\u03C0",
    "/pi.math": "\u03D6",
    "/piasutorusquare": "\u332E",
    "/pick": "\u26CF",
    "/pidblstruck": "\u213C",
    "/pieupacirclekorean": "\u3273",
    "/pieupaparenkorean": "\u3213",
    "/pieupcieuckorean": "\u3176",
    "/pieupcirclekorean": "\u3265",
    "/pieupkiyeokkorean": "\u3172",
    "/pieupkorean": "\u3142",
    "/pieupparenkorean": "\u3205",
    "/pieupsioskiyeokkorean": "\u3174",
    "/pieupsioskorean": "\u3144",
    "/pieupsiostikeutkorean": "\u3175",
    "/pieupthieuthkorean": "\u3177",
    "/pieuptikeutkorean": "\u3173",
    "/pig": "\u1F416",
    "/pigFace": "\u1F437",
    "/pigNose": "\u1F43D",
    "/pihiragana": "\u3074",
    "/pikatakana": "\u30D4",
    "/pikosquare": "\u3330",
    "/pikurusquare": "\u332F",
    "/pilcrowsignreversed": "\u204B",
    "/pileOfPoo": "\u1F4A9",
    "/pill": "\u1F48A",
    "/pineDecoration": "\u1F38D",
    "/pineapple": "\u1F34D",
    "/pisces": "\u2653",
    "/piselehpada": "\uA9CC",
    "/pistol": "\u1F52B",
    "/pisymbolgreek": "\u03D6",
    "/pitchfork": "\u22D4",
    "/piwrarmenian": "\u0583",
    "/placeOfWorship": "\u1F6D0",
    "/placeofinterestsign": "\u2318",
    "/planck": "\u210E",
    "/plancktwopi": "\u210F",
    "/plus": "\u002B",
    "/plus.inferior": "\u208A",
    "/plus.superior": "\u207A",
    "/plusbelowcmb": "\u031F",
    "/pluscircle": "\u2295",
    "/plusminus": "\u00B1",
    "/plusmod": "\u02D6",
    "/plusmonospace": "\uFF0B",
    "/plussignalt:hb": "\uFB29",
    "/plussignmod": "\u02D6",
    "/plussmall": "\uFE62",
    "/plussuperior": "\u207A",
    "/pluto": "\u2647",
    "/pmfullwidth": "\u33D8",
    "/pmonospace": "\uFF50",
    "/pmsquare": "\u33D8",
    "/pocketCalculator": "\u1F5A9",
    "/poeticverse": "\u060E",
    "/pohiragana": "\u307D",
    "/pointerleftblack": "\u25C4",
    "/pointerleftwhite": "\u25C5",
    "/pointerrightblack": "\u25BA",
    "/pointerrightwhite": "\u25BB",
    "/pointingindexdownwhite": "\u261F",
    "/pointingindexleftblack": "\u261A",
    "/pointingindexleftwhite": "\u261C",
    "/pointingindexrightblack": "\u261B",
    "/pointingindexrightwhite": "\u261E",
    "/pointingindexupwhite": "\u261D",
    "/pointingtriangledownheavywhite": "\u26DB",
    "/pointosquare": "\u333D",
    "/pointring": "\u2E30",
    "/pokatakana": "\u30DD",
    "/pokrytiecmbcyr": "\u0487",
    "/policeCar": "\u1F693",
    "/policeCarsRevolvingLight": "\u1F6A8",
    "/policeOfficer": "\u1F46E",
    "/pondosquare": "\u3340",
    "/poodle": "\u1F429",
    "/popcorn": "\u1F37F",
    "/popdirectionalformatting": "\u202C",
    "/popdirectionalisolate": "\u2069",
    "/poplathai": "\u0E1B",
    "/portableStereo": "\u1F4FE",
    "/positionindicator": "\u2316",
    "/postalHorn": "\u1F4EF",
    "/postalmark": "\u3012",
    "/postalmarkface": "\u3020",
    "/postbox": "\u1F4EE",
    "/potOfFood": "\u1F372",
    "/potableWater": "\u1F6B0",
    "/pouch": "\u1F45D",
    "/poultryLeg": "\u1F357",
    "/poutingCatFace": "\u1F63E",
    "/poutingFace": "\u1F621",
    "/power": "\u23FB",
    "/poweron": "\u23FD",
    "/poweronoff": "\u23FC",
    "/powersleep": "\u23FE",
    "/pparen": "\u24AB",
    "/pparenthesized": "\u24AB",
    "/ppmfullwidth": "\u33D9",
    "/prayerBeads": "\u1F4FF",
    "/precedes": "\u227A",
    "/precedesbutnotequivalent": "\u22E8",
    "/precedesorequal": "\u227C",
    "/precedesorequivalent": "\u227E",
    "/precedesunderrelation": "\u22B0",
    "/prescription": "\u211E",
    "/preversedepigraphic": "\uA7FC",
    "/previouspage": "\u2397",
    "/prfullwidth": "\u33DA",
    "/primedblmod": "\u02BA",
    "/primemod": "\u02B9",
    "/primereversed": "\u2035",
    "/princess": "\u1F478",
    "/printer": "\u1F5A8",
    "/printerIcon": "\u1F5B6",
    "/printideographiccircled": "\u329E",
    "/printscreen": "\u2399",
    "/product": "\u220F",
    "/prohibitedSign": "\u1F6C7",
    "/projective": "\u2305",
    "/prolongedkana": "\u30FC",
    "/propellor": "\u2318",
    "/propersubset": "\u2282",
    "/propersuperset": "\u2283",
    "/propertyline": "\u214A",
    "/proportion": "\u2237",
    "/proportional": "\u221D",
    "/psfullwidth": "\u33B0",
    "/psi": "\u03C8",
    "/psicyr": "\u0471",
    "/psicyrillic": "\u0471",
    "/psilicmbcyr": "\u0486",
    "/psilipneumatacyrilliccmb": "\u0486",
    "/pssquare": "\u33B0",
    "/pstrokedescender": "\uA751",
    "/ptail": "\uA755",
    "/publicAddressLoudspeaker": "\u1F4E2",
    "/puhiragana": "\u3077",
    "/pukatakana": "\u30D7",
    "/punctuationspace": "\u2008",
    "/purpleHeart": "\u1F49C",
    "/purse": "\u1F45B",
    "/pushpin": "\u1F4CC",
    "/putLitterInItsPlace": "\u1F6AE",
    "/pvfullwidth": "\u33B4",
    "/pvsquare": "\u33B4",
    "/pwfullwidth": "\u33BA",
    "/pwsquare": "\u33BA",
    "/q": "\u0071",
    "/qacyr": "\u051B",
    "/qadeva": "\u0958",
    "/qadma:hb": "\u05A8",
    "/qadmahebrew": "\u05A8",
    "/qaf": "\u0642",
    "/qaf.fina": "\uFED6",
    "/qaf.init": "\uFED7",
    "/qaf.init_alefmaksura.fina": "\uFC35",
    "/qaf.init_hah.fina": "\uFC33",
    "/qaf.init_hah.medi": "\uFCC2",
    "/qaf.init_meem.fina": "\uFC34",
    "/qaf.init_meem.medi": "\uFCC3",
    "/qaf.init_meem.medi_hah.medi": "\uFDB4",
    "/qaf.init_yeh.fina": "\uFC36",
    "/qaf.isol": "\uFED5",
    "/qaf.medi": "\uFED8",
    "/qaf.medi_alefmaksura.fina": "\uFC7E",
    "/qaf.medi_meem.medi_hah.fina": "\uFD7E",
    "/qaf.medi_meem.medi_meem.fina": "\uFD7F",
    "/qaf.medi_meem.medi_yeh.fina": "\uFDB2",
    "/qaf.medi_yeh.fina": "\uFC7F",
    "/qaf_lam_alefmaksuraabove": "\u06D7",
    "/qafarabic": "\u0642",
    "/qafdotabove": "\u06A7",
    "/qaffinalarabic": "\uFED6",
    "/qafinitialarabic": "\uFED7",
    "/qafmedialarabic": "\uFED8",
    "/qafthreedotsabove": "\u06A8",
    "/qamats": "\u05B8",
    "/qamats10": "\u05B8",
    "/qamats1a": "\u05B8",
    "/qamats1c": "\u05B8",
    "/qamats27": "\u05B8",
    "/qamats29": "\u05B8",
    "/qamats33": "\u05B8",
    "/qamats:hb": "\u05B8",
    "/qamatsQatan:hb": "\u05C7",
    "/qamatsde": "\u05B8",
    "/qamatshebrew": "\u05B8",
    "/qamatsnarrowhebrew": "\u05B8",
    "/qamatsqatanhebrew": "\u05B8",
    "/qamatsqatannarrowhebrew": "\u05B8",
    "/qamatsqatanquarterhebrew": "\u05B8",
    "/qamatsqatanwidehebrew": "\u05B8",
    "/qamatsquarterhebrew": "\u05B8",
    "/qamatswidehebrew": "\u05B8",
    "/qarneFarah:hb": "\u059F",
    "/qarneyparahebrew": "\u059F",
    "/qbopomofo": "\u3111",
    "/qcircle": "\u24E0",
    "/qdiagonalstroke": "\uA759",
    "/qhook": "\u02A0",
    "/qhooktail": "\u024B",
    "/qmonospace": "\uFF51",
    "/qof": "\u05E7",
    "/qof:hb": "\u05E7",
    "/qofdagesh": "\uFB47",
    "/qofdageshhebrew": "\uFB47",
    "/qofhatafpatah": "\u05E7",
    "/qofhatafpatahhebrew": "\u05E7",
    "/qofhatafsegol": "\u05E7",
    "/qofhatafsegolhebrew": "\u05E7",
    "/qofhebrew": "\u05E7",
    "/qofhiriq": "\u05E7",
    "/qofhiriqhebrew": "\u05E7",
    "/qofholam": "\u05E7",
    "/qofholamhebrew": "\u05E7",
    "/qofpatah": "\u05E7",
    "/qofpatahhebrew": "\u05E7",
    "/qofqamats": "\u05E7",
    "/qofqamatshebrew": "\u05E7",
    "/qofqubuts": "\u05E7",
    "/qofqubutshebrew": "\u05E7",
    "/qofsegol": "\u05E7",
    "/qofsegolhebrew": "\u05E7",
    "/qofsheva": "\u05E7",
    "/qofshevahebrew": "\u05E7",
    "/qoftsere": "\u05E7",
    "/qoftserehebrew": "\u05E7",
    "/qofwithdagesh:hb": "\uFB47",
    "/qparen": "\u24AC",
    "/qparenthesized": "\u24AC",
    "/qpdigraph": "\u0239",
    "/qstrokedescender": "\uA757",
    "/quadarrowdownfunc": "\u2357",
    "/quadarrowleftfunc": "\u2347",
    "/quadarrowrightfunc": "\u2348",
    "/quadarrowupfunc": "\u2350",
    "/quadbackslashfunc": "\u2342",
    "/quadcaretdownfunc": "\u234C",
    "/quadcaretupfunc": "\u2353",
    "/quadcirclefunc": "\u233C",
    "/quadcolonfunc": "\u2360",
    "/quaddelfunc": "\u2354",
    "/quaddeltafunc": "\u234D",
    "/quaddiamondfunc": "\u233A",
    "/quaddividefunc": "\u2339",
    "/quadequalfunc": "\u2338",
    "/quadfunc": "\u2395",
    "/quadgreaterfunc": "\u2344",
    "/quadjotfunc": "\u233B",
    "/quadlessfunc": "\u2343",
    "/quadnotequalfunc": "\u236F",
    "/quadquestionfunc": "\u2370",
    "/quadrantLowerLeft": "\u2596",
    "/quadrantLowerRight": "\u2597",
    "/quadrantUpperLeft": "\u2598",
    "/quadrantUpperLeftAndLowerLeftAndLowerRight": "\u2599",
    "/quadrantUpperLeftAndLowerRight": "\u259A",
    "/quadrantUpperLeftAndUpperRightAndLowerLeft": "\u259B",
    "/quadrantUpperLeftAndUpperRightAndLowerRight": "\u259C",
    "/quadrantUpperRight": "\u259D",
    "/quadrantUpperRightAndLowerLeft": "\u259E",
    "/quadrantUpperRightAndLowerLeftAndLowerRight": "\u259F",
    "/quadrupleminute": "\u2057",
    "/quadslashfunc": "\u2341",
    "/quarternote": "\u2669",
    "/qubuts": "\u05BB",
    "/qubuts18": "\u05BB",
    "/qubuts25": "\u05BB",
    "/qubuts31": "\u05BB",
    "/qubuts:hb": "\u05BB",
    "/qubutshebrew": "\u05BB",
    "/qubutsnarrowhebrew": "\u05BB",
    "/qubutsquarterhebrew": "\u05BB",
    "/qubutswidehebrew": "\u05BB",
    "/queenblack": "\u265B",
    "/queenwhite": "\u2655",
    "/question": "\u003F",
    "/questionarabic": "\u061F",
    "/questionarmenian": "\u055E",
    "/questiondbl": "\u2047",
    "/questiondown": "\u00BF",
    "/questiondownsmall": "\uF7BF",
    "/questionedequal": "\u225F",
    "/questionexclamationmark": "\u2048",
    "/questiongreek": "\u037E",
    "/questionideographiccircled": "\u3244",
    "/questionmonospace": "\uFF1F",
    "/questionreversed": "\u2E2E",
    "/questionsmall": "\uF73F",
    "/quincunx": "\u26BB",
    "/quotedbl": "\u0022",
    "/quotedblbase": "\u201E",
    "/quotedblleft": "\u201C",
    "/quotedbllowreversed": "\u2E42",
    "/quotedblmonospace": "\uFF02",
    "/quotedblprime": "\u301E",
    "/quotedblprimereversed": "\u301D",
    "/quotedblreversed": "\u201F",
    "/quotedblright": "\u201D",
    "/quoteleft": "\u2018",
    "/quoteleftreversed": "\u201B",
    "/quotequadfunc": "\u235E",
    "/quotereversed": "\u201B",
    "/quoteright": "\u2019",
    "/quoterightn": "\u0149",
    "/quotesinglbase": "\u201A",
    "/quotesingle": "\u0027",
    "/quotesinglemonospace": "\uFF07",
    "/quoteunderlinefunc": "\u2358",
    "/r": "\u0072",
    "/raagung": "\uA9AC",
    "/raarmenian": "\u057C",
    "/rabbit": "\u1F407",
    "/rabbitFace": "\u1F430",
    "/rabengali": "\u09B0",
    "/racingCar": "\u1F3CE",
    "/racingMotorcycle": "\u1F3CD",
    "/racirclekatakana": "\u32F6",
    "/racute": "\u0155",
    "/radeva": "\u0930",
    "/radfullwidth": "\u33AD",
    "/radical": "\u221A",
    "/radicalbottom": "\u23B7",
    "/radicalex": "\uF8E5",
    "/radio": "\u1F4FB",
    "/radioButton": "\u1F518",
    "/radioactive": "\u2622",
    "/radovers2fullwidth": "\u33AF",
    "/radoversfullwidth": "\u33AE",
    "/radoverssquare": "\u33AE",
    "/radoverssquaredsquare": "\u33AF",
    "/radsquare": "\u33AD",
    "/rafe": "\u05BF",
    "/rafe:hb": "\u05BF",
    "/rafehebrew": "\u05BF",
    "/ragujarati": "\u0AB0",
    "/ragurmukhi": "\u0A30",
    "/rahiragana": "\u3089",
    "/railwayCar": "\u1F683",
    "/railwayTrack": "\u1F6E4",
    "/rain": "\u26C6",
    "/rainbow": "\u1F308",
    "/raisedHandFingersSplayed": "\u1F590",
    "/raisedHandPartBetweenMiddleAndRingFingers": "\u1F596",
    "/raisedmcsign": "\u1F16A",
    "/raisedmdsign": "\u1F16B",
    "/rakatakana": "\u30E9",
    "/rakatakanahalfwidth": "\uFF97",
    "/ralowerdiagonalbengali": "\u09F1",
    "/ram": "\u1F40F",
    "/ramiddlediagonalbengali": "\u09F0",
    "/ramshorn": "\u0264",
    "/rat": "\u1F400",
    "/ratio": "\u2236",
    "/ray": "\u0608",
    "/rbopomofo": "\u3116",
    "/rcaron": "\u0159",
    "/rcedilla": "\u0157",
    "/rcircle": "\u24E1",
    "/rcommaaccent": "\u0157",
    "/rdblgrave": "\u0211",
    "/rdot": "\u1E59",
    "/rdotaccent": "\u1E59",
    "/rdotbelow": "\u1E5B",
    "/rdotbelowmacron": "\u1E5D",
    "/reachideographicparen": "\u3243",
    "/recirclekatakana": "\u32F9",
    "/recreationalVehicle": "\u1F699",
    "/rectangleblack": "\u25AC",
    "/rectangleverticalblack": "\u25AE",
    "/rectangleverticalwhite": "\u25AF",
    "/rectanglewhite": "\u25AD",
    "/recycledpaper": "\u267C",
    "/recyclefiveplastics": "\u2677",
    "/recyclefourplastics": "\u2676",
    "/recyclegeneric": "\u267A",
    "/recycleoneplastics": "\u2673",
    "/recyclepartiallypaper": "\u267D",
    "/recyclesevenplastics": "\u2679",
    "/recyclesixplastics": "\u2678",
    "/recyclethreeplastics": "\u2675",
    "/recycletwoplastics": "\u2674",
    "/recycleuniversal": "\u2672",
    "/recycleuniversalblack": "\u267B",
    "/redApple": "\u1F34E",
    "/redTriangleDOwn": "\u1F53B",
    "/redTriangleUp": "\u1F53A",
    "/referencemark": "\u203B",
    "/reflexsubset": "\u2286",
    "/reflexsuperset": "\u2287",
    "/regionalindicatorsymbollettera": "\u1F1E6",
    "/regionalindicatorsymbolletterb": "\u1F1E7",
    "/regionalindicatorsymbolletterc": "\u1F1E8",
    "/regionalindicatorsymbolletterd": "\u1F1E9",
    "/regionalindicatorsymbollettere": "\u1F1EA",
    "/regionalindicatorsymbolletterf": "\u1F1EB",
    "/regionalindicatorsymbolletterg": "\u1F1EC",
    "/regionalindicatorsymbolletterh": "\u1F1ED",
    "/regionalindicatorsymbolletteri": "\u1F1EE",
    "/regionalindicatorsymbolletterj": "\u1F1EF",
    "/regionalindicatorsymbolletterk": "\u1F1F0",
    "/regionalindicatorsymbolletterl": "\u1F1F1",
    "/regionalindicatorsymbolletterm": "\u1F1F2",
    "/regionalindicatorsymbollettern": "\u1F1F3",
    "/regionalindicatorsymbollettero": "\u1F1F4",
    "/regionalindicatorsymbolletterp": "\u1F1F5",
    "/regionalindicatorsymbolletterq": "\u1F1F6",
    "/regionalindicatorsymbolletterr": "\u1F1F7",
    "/regionalindicatorsymbolletters": "\u1F1F8",
    "/regionalindicatorsymbollettert": "\u1F1F9",
    "/regionalindicatorsymbolletteru": "\u1F1FA",
    "/regionalindicatorsymbolletterv": "\u1F1FB",
    "/regionalindicatorsymbolletterw": "\u1F1FC",
    "/regionalindicatorsymbolletterx": "\u1F1FD",
    "/regionalindicatorsymbollettery": "\u1F1FE",
    "/regionalindicatorsymbolletterz": "\u1F1FF",
    "/registered": "\u00AE",
    "/registersans": "\uF8E8",
    "/registerserif": "\uF6DA",
    "/reh.fina": "\uFEAE",
    "/reh.init_superscriptalef.fina": "\uFC5C",
    "/reh.isol": "\uFEAD",
    "/rehHamzaAbove": "\u076C",
    "/rehSmallTahTwoDots": "\u0771",
    "/rehStroke": "\u075B",
    "/rehTwoDotsVerticallyAbove": "\u076B",
    "/rehVabove": "\u0692",
    "/rehVbelow": "\u0695",
    "/reharabic": "\u0631",
    "/reharmenian": "\u0580",
    "/rehdotbelow": "\u0694",
    "/rehdotbelowdotabove": "\u0696",
    "/rehfinalarabic": "\uFEAE",
    "/rehfourdotsabove": "\u0699",
    "/rehinvertedV": "\u06EF",
    "/rehiragana": "\u308C",
    "/rehring": "\u0693",
    "/rehtwodotsabove": "\u0697",
    "/rehyehaleflamarabic": "\u0631",
    "/rekatakana": "\u30EC",
    "/rekatakanahalfwidth": "\uFF9A",
    "/relievedFace": "\u1F60C",
    "/religionideographiccircled": "\u32AA",
    "/reminderRibbon": "\u1F397",
    "/remusquare": "\u3355",
    "/rentogensquare": "\u3356",
    "/replacementchar": "\uFFFD",
    "/replacementcharobj": "\uFFFC",
    "/representideographicparen": "\u3239",
    "/rerengganleft": "\uA9C1",
    "/rerengganright": "\uA9C2",
    "/resh": "\u05E8",
    "/resh:hb": "\u05E8",
    "/reshdageshhebrew": "\uFB48",
    "/reshhatafpatah": "\u05E8",
    "/reshhatafpatahhebrew": "\u05E8",
    "/reshhatafsegol": "\u05E8",
    "/reshhatafsegolhebrew": "\u05E8",
    "/reshhebrew": "\u05E8",
    "/reshhiriq": "\u05E8",
    "/reshhiriqhebrew": "\u05E8",
    "/reshholam": "\u05E8",
    "/reshholamhebrew": "\u05E8",
    "/reshpatah": "\u05E8",
    "/reshpatahhebrew": "\u05E8",
    "/reshqamats": "\u05E8",
    "/reshqamatshebrew": "\u05E8",
    "/reshqubuts": "\u05E8",
    "/reshqubutshebrew": "\u05E8",
    "/reshsegol": "\u05E8",
    "/reshsegolhebrew": "\u05E8",
    "/reshsheva": "\u05E8",
    "/reshshevahebrew": "\u05E8",
    "/reshtsere": "\u05E8",
    "/reshtserehebrew": "\u05E8",
    "/reshwide:hb": "\uFB27",
    "/reshwithdagesh:hb": "\uFB48",
    "/resourceideographiccircled": "\u32AE",
    "/resourceideographicparen": "\u323E",
    "/response": "\u211F",
    "/restideographiccircled": "\u32A1",
    "/restideographicparen": "\u3241",
    "/restrictedentryoneleft": "\u26E0",
    "/restrictedentrytwoleft": "\u26E1",
    "/restroom": "\u1F6BB",
    "/return": "\u23CE",
    "/reversedHandMiddleFingerExtended": "\u1F595",
    "/reversedRaisedHandFingersSplayed": "\u1F591",
    "/reversedThumbsDownSign": "\u1F593",
    "/reversedThumbsUpSign": "\u1F592",
    "/reversedVictoryHand": "\u1F594",
    "/reversedonehundred.roman": "\u2183",
    "/reversedtilde": "\u223D",
    "/reversedzecyr": "\u0511",
    "/revia:hb": "\u0597",
    "/reviahebrew": "\u0597",
    "/reviamugrashhebrew": "\u0597",
    "/revlogicalnot": "\u2310",
    "/revolvingHearts": "\u1F49E",
    "/rfishhook": "\u027E",
    "/rfishhookreversed": "\u027F",
    "/rgravedbl": "\u0211",
    "/rhabengali": "\u09DD",
    "/rhacyr": "\u0517",
    "/rhadeva": "\u095D",
    "/rho": "\u03C1",
    "/rhoasper": "\u1FE5",
    "/rhofunc": "\u2374",
    "/rholenis": "\u1FE4",
    "/rhook": "\u027D",
    "/rhookturned": "\u027B",
    "/rhookturnedsuperior": "\u02B5",
    "/rhookturnedsupmod": "\u02B5",
    "/rhostrokesymbol": "\u03FC",
    "/rhosymbol": "\u03F1",
    "/rhosymbolgreek": "\u03F1",
    "/rhotichookmod": "\u02DE",
    "/rial": "\uFDFC",
    "/ribbon": "\u1F380",
    "/riceBall": "\u1F359",
    "/riceCracker": "\u1F358",
    "/ricirclekatakana": "\u32F7",
    "/rieulacirclekorean": "\u3271",
    "/rieulaparenkorean": "\u3211",
    "/rieulcirclekorean": "\u3263",
    "/rieulhieuhkorean": "\u3140",
    "/rieulkiyeokkorean": "\u313A",
    "/rieulkiyeoksioskorean": "\u3169",
    "/rieulkorean": "\u3139",
    "/rieulmieumkorean": "\u313B",
    "/rieulpansioskorean": "\u316C",
    "/rieulparenkorean": "\u3203",
    "/rieulphieuphkorean": "\u313F",
    "/rieulpieupkorean": "\u313C",
    "/rieulpieupsioskorean": "\u316B",
    "/rieulsioskorean": "\u313D",
    "/rieulthieuthkorean": "\u313E",
    "/rieultikeutkorean": "\u316A",
    "/rieulyeorinhieuhkorean": "\u316D",
    "/right-pointingMagnifyingGlass": "\u1F50E",
    "/rightAngerBubble": "\u1F5EF",
    "/rightHalfBlock": "\u2590",
    "/rightHandTelephoneReceiver": "\u1F57D",
    "/rightOneEighthBlock": "\u2595",
    "/rightSpeaker": "\u1F568",
    "/rightSpeakerOneSoundWave": "\u1F569",
    "/rightSpeakerThreeSoundWaves": "\u1F56A",
    "/rightSpeechBubble": "\u1F5E9",
    "/rightThoughtBubble": "\u1F5ED",
    "/rightangle": "\u221F",
    "/rightarrowoverleftarrow": "\u21C4",
    "/rightdnheavyleftuplight": "\u2546",
    "/rightharpoonoverleftharpoon": "\u21CC",
    "/rightheavyleftdnlight": "\u252E",
    "/rightheavyleftuplight": "\u2536",
    "/rightheavyleftvertlight": "\u253E",
    "/rightideographiccircled": "\u32A8",
    "/rightlightleftdnheavy": "\u2531",
    "/rightlightleftupheavy": "\u2539",
    "/rightlightleftvertheavy": "\u2549",
    "/righttackbelowcmb": "\u0319",
    "/righttoleftembed": "\u202B",
    "/righttoleftisolate": "\u2067",
    "/righttoleftmark": "\u200F",
    "/righttoleftoverride": "\u202E",
    "/righttriangle": "\u22BF",
    "/rightupheavyleftdnlight": "\u2544",
    "/rihiragana": "\u308A",
    "/rikatakana": "\u30EA",
    "/rikatakanahalfwidth": "\uFF98",
    "/ring": "\u02DA",
    "/ringbelowcmb": "\u0325",
    "/ringcmb": "\u030A",
    "/ringequal": "\u2257",
    "/ringhalfleft": "\u02BF",
    "/ringhalfleftarmenian": "\u0559",
    "/ringhalfleftbelowcmb": "\u031C",
    "/ringhalfleftcentered": "\u02D3",
    "/ringhalfleftcentredmod": "\u02D3",
    "/ringhalfleftmod": "\u02BF",
    "/ringhalfright": "\u02BE",
    "/ringhalfrightbelowcmb": "\u0339",
    "/ringhalfrightcentered": "\u02D2",
    "/ringhalfrightcentredmod": "\u02D2",
    "/ringhalfrightmod": "\u02BE",
    "/ringinequal": "\u2256",
    "/ringingBell": "\u1F56D",
    "/ringlowmod": "\u02F3",
    "/ringoperator": "\u2218",
    "/rinsular": "\uA783",
    "/rinvertedbreve": "\u0213",
    "/rirasquare": "\u3352",
    "/risingdiagonal": "\u27CB",
    "/rittorusquare": "\u3351",
    "/rlinebelow": "\u1E5F",
    "/rlongleg": "\u027C",
    "/rlonglegturned": "\u027A",
    "/rmacrondot": "\u1E5D",
    "/rmonospace": "\uFF52",
    "/rnoon": "\u06BB",
    "/rnoon.fina": "\uFBA1",
    "/rnoon.init": "\uFBA2",
    "/rnoon.isol": "\uFBA0",
    "/rnoon.medi": "\uFBA3",
    "/roastedSweetPotato": "\u1F360",
    "/robliquestroke": "\uA7A7",
    "/rocirclekatakana": "\u32FA",
    "/rocket": "\u1F680",
    "/rohiragana": "\u308D",
    "/rokatakana": "\u30ED",
    "/rokatakanahalfwidth": "\uFF9B",
    "/rolled-upNewspaper": "\u1F5DE",
    "/rollerCoaster": "\u1F3A2",
    "/rookblack": "\u265C",
    "/rookwhite": "\u2656",
    "/rooster": "\u1F413",
    "/roruathai": "\u0E23",
    "/rose": "\u1F339",
    "/rosette": "\u1F3F5",
    "/roundPushpin": "\u1F4CD",
    "/roundedzeroabove": "\u06DF",
    "/rowboat": "\u1F6A3",
    "/rparen": "\u24AD",
    "/rparenthesized": "\u24AD",
    "/rrabengali": "\u09DC",
    "/rradeva": "\u0931",
    "/rragurmukhi": "\u0A5C",
    "/rreh": "\u0691",
    "/rreh.fina": "\uFB8D",
    "/rreh.isol": "\uFB8C",
    "/rreharabic": "\u0691",
    "/rrehfinalarabic": "\uFB8D",
    "/rrotunda": "\uA75B",
    "/rrvocalicbengali": "\u09E0",
    "/rrvocalicdeva": "\u0960",
    "/rrvocalicgujarati": "\u0AE0",
    "/rrvocalicvowelsignbengali": "\u09C4",
    "/rrvocalicvowelsigndeva": "\u0944",
    "/rrvocalicvowelsigngujarati": "\u0AC4",
    "/rstroke": "\u024D",
    "/rsuperior": "\uF6F1",
    "/rsupmod": "\u02B3",
    "/rtailturned": "\u2C79",
    "/rtblock": "\u2590",
    "/rturned": "\u0279",
    "/rturnedsuperior": "\u02B4",
    "/rturnedsupmod": "\u02B4",
    "/ruble": "\u20BD",
    "/rucirclekatakana": "\u32F8",
    "/rugbyFootball": "\u1F3C9",
    "/ruhiragana": "\u308B",
    "/rukatakana": "\u30EB",
    "/rukatakanahalfwidth": "\uFF99",
    "/rum": "\uA775",
    "/rumrotunda": "\uA75D",
    "/runner": "\u1F3C3",
    "/runningShirtSash": "\u1F3BD",
    "/rupeemarkbengali": "\u09F2",
    "/rupeesignbengali": "\u09F3",
    "/rupiah": "\uF6DD",
    "/rupiisquare": "\u3353",
    "/ruthai": "\u0E24",
    "/ruuburusquare": "\u3354",
    "/rvocalicbengali": "\u098B",
    "/rvocalicdeva": "\u090B",
    "/rvocalicgujarati": "\u0A8B",
    "/rvocalicvowelsignbengali": "\u09C3",
    "/rvocalicvowelsigndeva": "\u0943",
    "/rvocalicvowelsigngujarati": "\u0AC3",
    "/s": "\u0073",
    "/s.inferior": "\u209B",
    "/s_t": "\uFB06",
    "/sabengali": "\u09B8",
    "/sacirclekatakana": "\u32DA",
    "/sacute": "\u015B",
    "/sacutedotaccent": "\u1E65",
    "/sad": "\u0635",
    "/sad.fina": "\uFEBA",
    "/sad.init": "\uFEBB",
    "/sad.init_alefmaksura.fina": "\uFD05",
    "/sad.init_hah.fina": "\uFC20",
    "/sad.init_hah.medi": "\uFCB1",
    "/sad.init_hah.medi_hah.medi": "\uFD65",
    "/sad.init_khah.medi": "\uFCB2",
    "/sad.init_meem.fina": "\uFC21",
    "/sad.init_meem.medi": "\uFCB3",
    "/sad.init_meem.medi_meem.medi": "\uFDC5",
    "/sad.init_reh.fina": "\uFD0F",
    "/sad.init_yeh.fina": "\uFD06",
    "/sad.isol": "\uFEB9",
    "/sad.medi": "\uFEBC",
    "/sad.medi_alefmaksura.fina": "\uFD21",
    "/sad.medi_hah.medi_hah.fina": "\uFD64",
    "/sad.medi_hah.medi_yeh.fina": "\uFDA9",
    "/sad.medi_meem.medi_meem.fina": "\uFD66",
    "/sad.medi_reh.fina": "\uFD2B",
    "/sad.medi_yeh.fina": "\uFD22",
    "/sad_lam_alefmaksuraabove": "\u06D6",
    "/sadarabic": "\u0635",
    "/sadeva": "\u0938",
    "/sadfinalarabic": "\uFEBA",
    "/sadinitialarabic": "\uFEBB",
    "/sadmedialarabic": "\uFEBC",
    "/sadthreedotsabove": "\u069E",
    "/sadtwodotsbelow": "\u069D",
    "/sagittarius": "\u2650",
    "/sagujarati": "\u0AB8",
    "/sagurmukhi": "\u0A38",
    "/sahiragana": "\u3055",
    "/saikurusquare": "\u331F",
    "/sailboat": "\u26F5",
    "/sakatakana": "\u30B5",
    "/sakatakanahalfwidth": "\uFF7B",
    "/sakeBottleAndCup": "\u1F376",
    "/sallallahoualayhewasallamarabic": "\uFDFA",
    "/saltillo": "\uA78C",
    "/saltire": "\u2613",
    "/samahaprana": "\uA9B0",
    "/samekh": "\u05E1",
    "/samekh:hb": "\u05E1",
    "/samekhdagesh": "\uFB41",
    "/samekhdageshhebrew": "\uFB41",
    "/samekhhebrew": "\u05E1",
    "/samekhwithdagesh:hb": "\uFB41",
    "/sampi": "\u03E1",
    "/sampiarchaic": "\u0373",
    "/samurda": "\uA9AF",
    "/samvat": "\u0604",
    "/san": "\u03FB",
    "/santiimusquare": "\u3320",
    "/saraaathai": "\u0E32",
    "/saraaethai": "\u0E41",
    "/saraaimaimalaithai": "\u0E44",
    "/saraaimaimuanthai": "\u0E43",
    "/saraamthai": "\u0E33",
    "/saraathai": "\u0E30",
    "/saraethai": "\u0E40",
    "/saraiileftthai": "\uF886",
    "/saraiithai": "\u0E35",
    "/saraileftthai": "\uF885",
    "/saraithai": "\u0E34",
    "/saraothai": "\u0E42",
    "/saraueeleftthai": "\uF888",
    "/saraueethai": "\u0E37",
    "/saraueleftthai": "\uF887",
    "/sarauethai": "\u0E36",
    "/sarauthai": "\u0E38",
    "/sarauuthai": "\u0E39",
    "/satellite": "\u1F6F0",
    "/satelliteAntenna": "\u1F4E1",
    "/saturn": "\u2644",
    "/saxophone": "\u1F3B7",
    "/sbopomofo": "\u3119",
    "/scales": "\u2696",
    "/scanninehorizontal": "\u23BD",
    "/scanonehorizontal": "\u23BA",
    "/scansevenhorizontal": "\u23BC",
    "/scanthreehorizontal": "\u23BB",
    "/scaron": "\u0161",
    "/scarondot": "\u1E67",
    "/scarondotaccent": "\u1E67",
    "/scedilla": "\u015F",
    "/school": "\u1F3EB",
    "/schoolSatchel": "\u1F392",
    "/schoolideographiccircled": "\u3246",
    "/schwa": "\u0259",
    "/schwa.inferior": "\u2094",
    "/schwacyr": "\u04D9",
    "/schwacyrillic": "\u04D9",
    "/schwadieresiscyr": "\u04DB",
    "/schwadieresiscyrillic": "\u04DB",
    "/schwahook": "\u025A",
    "/scircle": "\u24E2",
    "/scircumflex": "\u015D",
    "/scommaaccent": "\u0219",
    "/scooter": "\u1F6F4",
    "/scorpius": "\u264F",
    "/screen": "\u1F5B5",
    "/scroll": "\u1F4DC",
    "/scruple": "\u2108",
    "/sdot": "\u1E61",
    "/sdotaccent": "\u1E61",
    "/sdotbelow": "\u1E63",
    "/sdotbelowdotabove": "\u1E69",
    "/sdotbelowdotaccent": "\u1E69",
    "/seagullbelowcmb": "\u033C",
    "/seat": "\u1F4BA",
    "/secirclekatakana": "\u32DD",
    "/second": "\u2033",
    "/secondreversed": "\u2036",
    "/secondscreensquare": "\u1F19C",
    "/secondtonechinese": "\u02CA",
    "/secretideographiccircled": "\u3299",
    "/section": "\u00A7",
    "/sectionsignhalftop": "\u2E39",
    "/sector": "\u2314",
    "/seeNoEvilMonkey": "\u1F648",
    "/seedling": "\u1F331",
    "/seen": "\u0633",
    "/seen.fina": "\uFEB2",
    "/seen.init": "\uFEB3",
    "/seen.init_alefmaksura.fina": "\uFCFB",
    "/seen.init_hah.fina": "\uFC1D",
    "/seen.init_hah.medi": "\uFCAE",
    "/seen.init_hah.medi_jeem.medi": "\uFD5C",
    "/seen.init_heh.medi": "\uFD31",
    "/seen.init_jeem.fina": "\uFC1C",
    "/seen.init_jeem.medi": "\uFCAD",
    "/seen.init_jeem.medi_hah.medi": "\uFD5D",
    "/seen.init_khah.fina": "\uFC1E",
    "/seen.init_khah.medi": "\uFCAF",
    "/seen.init_meem.fina": "\uFC1F",
    "/seen.init_meem.medi": "\uFCB0",
    "/seen.init_meem.medi_hah.medi": "\uFD60",
    "/seen.init_meem.medi_jeem.medi": "\uFD61",
    "/seen.init_meem.medi_meem.medi": "\uFD63",
    "/seen.init_reh.fina": "\uFD0E",
    "/seen.init_yeh.fina": "\uFCFC",
    "/seen.isol": "\uFEB1",
    "/seen.medi": "\uFEB4",
    "/seen.medi_alefmaksura.fina": "\uFD17",
    "/seen.medi_hah.medi": "\uFD35",
    "/seen.medi_heh.medi": "\uFCE8",
    "/seen.medi_jeem.medi": "\uFD34",
    "/seen.medi_jeem.medi_alefmaksura.fina": "\uFD5E",
    "/seen.medi_khah.medi": "\uFD36",
    "/seen.medi_khah.medi_alefmaksura.fina": "\uFDA8",
    "/seen.medi_khah.medi_yeh.fina": "\uFDC6",
    "/seen.medi_meem.medi": "\uFCE7",
    "/seen.medi_meem.medi_hah.fina": "\uFD5F",
    "/seen.medi_meem.medi_meem.fina": "\uFD62",
    "/seen.medi_reh.fina": "\uFD2A",
    "/seen.medi_yeh.fina": "\uFD18",
    "/seenDigitFourAbove": "\u077D",
    "/seenFourDotsAbove": "\u075C",
    "/seenInvertedV": "\u077E",
    "/seenSmallTahTwoDots": "\u0770",
    "/seenTwoDotsVerticallyAbove": "\u076D",
    "/seenabove": "\u06DC",
    "/seenarabic": "\u0633",
    "/seendotbelowdotabove": "\u069A",
    "/seenfinalarabic": "\uFEB2",
    "/seeninitialarabic": "\uFEB3",
    "/seenlow": "\u06E3",
    "/seenmedialarabic": "\uFEB4",
    "/seenthreedotsbelow": "\u069B",
    "/seenthreedotsbelowthreedotsabove": "\u069C",
    "/segment": "\u2313",
    "/segol": "\u05B6",
    "/segol13": "\u05B6",
    "/segol1f": "\u05B6",
    "/segol2c": "\u05B6",
    "/segol:hb": "\u05B6",
    "/segolhebrew": "\u05B6",
    "/segolnarrowhebrew": "\u05B6",
    "/segolquarterhebrew": "\u05B6",
    "/segolta:hb": "\u0592",
    "/segoltahebrew": "\u0592",
    "/segolwidehebrew": "\u05B6",
    "/seharmenian": "\u057D",
    "/sehiragana": "\u305B",
    "/sekatakana": "\u30BB",
    "/sekatakanahalfwidth": "\uFF7E",
    "/selfideographicparen": "\u3242",
    "/semicolon": "\u003B",
    "/semicolonarabic": "\u061B",
    "/semicolonmonospace": "\uFF1B",
    "/semicolonreversed": "\u204F",
    "/semicolonsmall": "\uFE54",
    "/semicolonunderlinefunc": "\u236E",
    "/semidirectproductleft": "\u22CB",
    "/semidirectproductright": "\u22CC",
    "/semisextile": "\u26BA",
    "/semisoftcyr": "\u048D",
    "/semivoicedmarkkana": "\u309C",
    "/semivoicedmarkkanahalfwidth": "\uFF9F",
    "/sentisquare": "\u3322",
    "/sentosquare": "\u3323",
    "/septembertelegraph": "\u32C8",
    "/sersetdblup": "\u22D1",
    "/sersetnotequalup": "\u228B",
    "/servicemark": "\u2120",
    "/sesamedot": "\uFE45",
    "/sesquiquadrate": "\u26BC",
    "/setminus": "\u2216",
    "/seven": "\u0037",
    "/seven.inferior": "\u2087",
    "/seven.roman": "\u2166",
    "/seven.romansmall": "\u2176",
    "/seven.superior": "\u2077",
    "/sevenarabic": "\u0667",
    "/sevenbengali": "\u09ED",
    "/sevencircle": "\u2466",
    "/sevencircledbl": "\u24FB",
    "/sevencircleinversesansserif": "\u2790",
    "/sevencomma": "\u1F108",
    "/sevendeva": "\u096D",
    "/seveneighths": "\u215E",
    "/sevenfar": "\u06F7",
    "/sevengujarati": "\u0AED",
    "/sevengurmukhi": "\u0A6D",
    "/sevenhackarabic": "\u0667",
    "/sevenhangzhou": "\u3027",
    "/sevenideographiccircled": "\u3286",
    "/sevenideographicparen": "\u3226",
    "/seveninferior": "\u2087",
    "/sevenmonospace": "\uFF17",
    "/sevenoldstyle": "\uF737",
    "/sevenparen": "\u247A",
    "/sevenparenthesized": "\u247A",
    "/sevenperiod": "\u248E",
    "/sevenpersian": "\u06F7",
    "/sevenpointonesquare": "\u1F1A1",
    "/sevenroman": "\u2176",
    "/sevensuperior": "\u2077",
    "/seventeencircle": "\u2470",
    "/seventeencircleblack": "\u24F1",
    "/seventeenparen": "\u2484",
    "/seventeenparenthesized": "\u2484",
    "/seventeenperiod": "\u2498",
    "/seventhai": "\u0E57",
    "/seventycirclesquare": "\u324E",
    "/sextile": "\u26B9",
    "/sfthyphen": "\u00AD",
    "/shaarmenian": "\u0577",
    "/shabengali": "\u09B6",
    "/shacyr": "\u0448",
    "/shacyrillic": "\u0448",
    "/shaddaAlefIsol": "\uFC63",
    "/shaddaDammaIsol": "\uFC61",
    "/shaddaDammaMedi": "\uFCF3",
    "/shaddaDammatanIsol": "\uFC5E",
    "/shaddaFathaIsol": "\uFC60",
    "/shaddaFathaMedi": "\uFCF2",
    "/shaddaIsol": "\uFE7C",
    "/shaddaKasraIsol": "\uFC62",
    "/shaddaKasraMedi": "\uFCF4",
    "/shaddaKasratanIsol": "\uFC5F",
    "/shaddaMedi": "\uFE7D",
    "/shaddaarabic": "\u0651",
    "/shaddadammaarabic": "\uFC61",
    "/shaddadammatanarabic": "\uFC5E",
    "/shaddafathaarabic": "\uFC60",
    "/shaddafathatanarabic": "\u0651",
    "/shaddakasraarabic": "\uFC62",
    "/shaddakasratanarabic": "\uFC5F",
    "/shade": "\u2592",
    "/shadedark": "\u2593",
    "/shadelight": "\u2591",
    "/shademedium": "\u2592",
    "/shadeva": "\u0936",
    "/shagujarati": "\u0AB6",
    "/shagurmukhi": "\u0A36",
    "/shalshelet:hb": "\u0593",
    "/shalshelethebrew": "\u0593",
    "/shamrock": "\u2618",
    "/shavedIce": "\u1F367",
    "/shbopomofo": "\u3115",
    "/shchacyr": "\u0449",
    "/shchacyrillic": "\u0449",
    "/sheen": "\u0634",
    "/sheen.fina": "\uFEB6",
    "/sheen.init": "\uFEB7",
    "/sheen.init_alefmaksura.fina": "\uFCFD",
    "/sheen.init_hah.fina": "\uFD0A",
    "/sheen.init_hah.medi": "\uFD2E",
    "/sheen.init_hah.medi_meem.medi": "\uFD68",
    "/sheen.init_heh.medi": "\uFD32",
    "/sheen.init_jeem.fina": "\uFD09",
    "/sheen.init_jeem.medi": "\uFD2D",
    "/sheen.init_khah.fina": "\uFD0B",
    "/sheen.init_khah.medi": "\uFD2F",
    "/sheen.init_meem.fina": "\uFD0C",
    "/sheen.init_meem.medi": "\uFD30",
    "/sheen.init_meem.medi_khah.medi": "\uFD6B",
    "/sheen.init_meem.medi_meem.medi": "\uFD6D",
    "/sheen.init_reh.fina": "\uFD0D",
    "/sheen.init_yeh.fina": "\uFCFE",
    "/sheen.isol": "\uFEB5",
    "/sheen.medi": "\uFEB8",
    "/sheen.medi_alefmaksura.fina": "\uFD19",
    "/sheen.medi_hah.fina": "\uFD26",
    "/sheen.medi_hah.medi": "\uFD38",
    "/sheen.medi_hah.medi_meem.fina": "\uFD67",
    "/sheen.medi_hah.medi_yeh.fina": "\uFDAA",
    "/sheen.medi_heh.medi": "\uFCEA",
    "/sheen.medi_jeem.fina": "\uFD25",
    "/sheen.medi_jeem.medi": "\uFD37",
    "/sheen.medi_jeem.medi_yeh.fina": "\uFD69",
    "/sheen.medi_khah.fina": "\uFD27",
    "/sheen.medi_khah.medi": "\uFD39",
    "/sheen.medi_meem.fina": "\uFD28",
    "/sheen.medi_meem.medi": "\uFCE9",
    "/sheen.medi_meem.medi_khah.fina": "\uFD6A",
    "/sheen.medi_meem.medi_meem.fina": "\uFD6C",
    "/sheen.medi_reh.fina": "\uFD29",
    "/sheen.medi_yeh.fina": "\uFD1A",
    "/sheenarabic": "\u0634",
    "/sheendotbelow": "\u06FA",
    "/sheenfinalarabic": "\uFEB6",
    "/sheeninitialarabic": "\uFEB7",
    "/sheenmedialarabic": "\uFEB8",
    "/sheep": "\u1F411",
    "/sheicoptic": "\u03E3",
    "/shelfmod": "\u02FD",
    "/shelfopenmod": "\u02FE",
    "/sheqel": "\u20AA",
    "/sheqelhebrew": "\u20AA",
    "/sheva": "\u05B0",
    "/sheva115": "\u05B0",
    "/sheva15": "\u05B0",
    "/sheva22": "\u05B0",
    "/sheva2e": "\u05B0",
    "/sheva:hb": "\u05B0",
    "/shevahebrew": "\u05B0",
    "/shevanarrowhebrew": "\u05B0",
    "/shevaquarterhebrew": "\u05B0",
    "/shevawidehebrew": "\u05B0",
    "/shhacyr": "\u04BB",
    "/shhacyrillic": "\u04BB",
    "/shhatailcyr": "\u0527",
    "/shield": "\u1F6E1",
    "/shimacoptic": "\u03ED",
    "/shin": "\u05E9",
    "/shin:hb": "\u05E9",
    "/shinDot:hb": "\u05C1",
    "/shindagesh": "\uFB49",
    "/shindageshhebrew": "\uFB49",
    "/shindageshshindot": "\uFB2C",
    "/shindageshshindothebrew": "\uFB2C",
    "/shindageshsindot": "\uFB2D",
    "/shindageshsindothebrew": "\uFB2D",
    "/shindothebrew": "\u05C1",
    "/shinhebrew": "\u05E9",
    "/shinshindot": "\uFB2A",
    "/shinshindothebrew": "\uFB2A",
    "/shinsindot": "\uFB2B",
    "/shinsindothebrew": "\uFB2B",
    "/shintoshrine": "\u26E9",
    "/shinwithdagesh:hb": "\uFB49",
    "/shinwithdageshandshinDot:hb": "\uFB2C",
    "/shinwithdageshandsinDot:hb": "\uFB2D",
    "/shinwithshinDot:hb": "\uFB2A",
    "/shinwithsinDot:hb": "\uFB2B",
    "/ship": "\u1F6A2",
    "/sho": "\u03F8",
    "/shoejotupfunc": "\u235D",
    "/shoestiledownfunc": "\u2366",
    "/shoestileleftfunc": "\u2367",
    "/shogipieceblack": "\u2617",
    "/shogipiecewhite": "\u2616",
    "/shook": "\u0282",
    "/shootingStar": "\u1F320",
    "/shoppingBags": "\u1F6CD",
    "/shoppingTrolley": "\u1F6D2",
    "/shortcake": "\u1F370",
    "/shortequalsmod": "\uA78A",
    "/shortoverlongmetrical": "\u23D3",
    "/shoulderedopenbox": "\u237D",
    "/shower": "\u1F6BF",
    "/shvsquare": "\u1F1AA",
    "/sicirclekatakana": "\u32DB",
    "/sidewaysBlackDownPointingIndex": "\u1F5A1",
    "/sidewaysBlackLeftPointingIndex": "\u1F59A",
    "/sidewaysBlackRightPointingIndex": "\u1F59B",
    "/sidewaysBlackUpPointingIndex": "\u1F5A0",
    "/sidewaysWhiteDownPointingIndex": "\u1F59F",
    "/sidewaysWhiteLeftPointingIndex": "\u1F598",
    "/sidewaysWhiteRightPointingIndex": "\u1F599",
    "/sidewaysWhiteUpPointingIndex": "\u1F59E",
    "/sigma": "\u03C3",
    "/sigma1": "\u03C2",
    "/sigmafinal": "\u03C2",
    "/sigmalunatedottedreversedsymbol": "\u037D",
    "/sigmalunatedottedsymbol": "\u037C",
    "/sigmalunatereversedsymbol": "\u037B",
    "/sigmalunatesymbol": "\u03F2",
    "/sigmalunatesymbolgreek": "\u03F2",
    "/sihiragana": "\u3057",
    "/sikatakana": "\u30B7",
    "/sikatakanahalfwidth": "\uFF7C",
    "/silhouetteOfJapan": "\u1F5FE",
    "/siluqhebrew": "\u05BD",
    "/siluqlefthebrew": "\u05BD",
    "/similar": "\u223C",
    "/sinDot:hb": "\u05C2",
    "/sindothebrew": "\u05C2",
    "/sinewave": "\u223F",
    "/sinh:a": "\u0D85",
    "/sinh:aa": "\u0D86",
    "/sinh:aae": "\u0D88",
    "/sinh:aaesign": "\u0DD1",
    "/sinh:aasign": "\u0DCF",
    "/sinh:ae": "\u0D87",
    "/sinh:aesign": "\u0DD0",
    "/sinh:ai": "\u0D93",
    "/sinh:aisign": "\u0DDB",
    "/sinh:anusvara": "\u0D82",
    "/sinh:au": "\u0D96",
    "/sinh:ausign": "\u0DDE",
    "/sinh:ba": "\u0DB6",
    "/sinh:bha": "\u0DB7",
    "/sinh:ca": "\u0DA0",
    "/sinh:cha": "\u0DA1",
    "/sinh:da": "\u0DAF",
    "/sinh:dda": "\u0DA9",
    "/sinh:ddha": "\u0DAA",
    "/sinh:dha": "\u0DB0",
    "/sinh:e": "\u0D91",
    "/sinh:ee": "\u0D92",
    "/sinh:eesign": "\u0DDA",
    "/sinh:esign": "\u0DD9",
    "/sinh:fa": "\u0DC6",
    "/sinh:ga": "\u0D9C",
    "/sinh:gha": "\u0D9D",
    "/sinh:ha": "\u0DC4",
    "/sinh:i": "\u0D89",
    "/sinh:ii": "\u0D8A",
    "/sinh:iisign": "\u0DD3",
    "/sinh:isign": "\u0DD2",
    "/sinh:ja": "\u0DA2",
    "/sinh:jha": "\u0DA3",
    "/sinh:jnya": "\u0DA5",
    "/sinh:ka": "\u0D9A",
    "/sinh:kha": "\u0D9B",
    "/sinh:kunddaliya": "\u0DF4",
    "/sinh:la": "\u0DBD",
    "/sinh:litheight": "\u0DEE",
    "/sinh:lithfive": "\u0DEB",
    "/sinh:lithfour": "\u0DEA",
    "/sinh:lithnine": "\u0DEF",
    "/sinh:lithone": "\u0DE7",
    "/sinh:lithseven": "\u0DED",
    "/sinh:lithsix": "\u0DEC",
    "/sinh:liththree": "\u0DE9",
    "/sinh:lithtwo": "\u0DE8",
    "/sinh:lithzero": "\u0DE6",
    "/sinh:lla": "\u0DC5",
    "/sinh:llvocal": "\u0D90",
    "/sinh:llvocalsign": "\u0DF3",
    "/sinh:lvocal": "\u0D8F",
    "/sinh:lvocalsign": "\u0DDF",
    "/sinh:ma": "\u0DB8",
    "/sinh:mba": "\u0DB9",
    "/sinh:na": "\u0DB1",
    "/sinh:nda": "\u0DB3",
    "/sinh:nga": "\u0D9E",
    "/sinh:nna": "\u0DAB",
    "/sinh:nndda": "\u0DAC",
    "/sinh:nnga": "\u0D9F",
    "/sinh:nya": "\u0DA4",
    "/sinh:nyja": "\u0DA6",
    "/sinh:o": "\u0D94",
    "/sinh:oo": "\u0D95",
    "/sinh:oosign": "\u0DDD",
    "/sinh:osign": "\u0DDC",
    "/sinh:pa": "\u0DB4",
    "/sinh:pha": "\u0DB5",
    "/sinh:ra": "\u0DBB",
    "/sinh:rrvocal": "\u0D8E",
    "/sinh:rrvocalsign": "\u0DF2",
    "/sinh:rvocal": "\u0D8D",
    "/sinh:rvocalsign": "\u0DD8",
    "/sinh:sa": "\u0DC3",
    "/sinh:sha": "\u0DC1",
    "/sinh:ssa": "\u0DC2",
    "/sinh:ta": "\u0DAD",
    "/sinh:tha": "\u0DAE",
    "/sinh:tta": "\u0DA7",
    "/sinh:ttha": "\u0DA8",
    "/sinh:u": "\u0D8B",
    "/sinh:usign": "\u0DD4",
    "/sinh:uu": "\u0D8C",
    "/sinh:uusign": "\u0DD6",
    "/sinh:va": "\u0DC0",
    "/sinh:virama": "\u0DCA",
    "/sinh:visarga": "\u0D83",
    "/sinh:ya": "\u0DBA",
    "/sinologicaldot": "\uA78F",
    "/sinsular": "\uA785",
    "/siosacirclekorean": "\u3274",
    "/siosaparenkorean": "\u3214",
    "/sioscieuckorean": "\u317E",
    "/sioscirclekorean": "\u3266",
    "/sioskiyeokkorean": "\u317A",
    "/sioskorean": "\u3145",
    "/siosnieunkorean": "\u317B",
    "/siosparenkorean": "\u3206",
    "/siospieupkorean": "\u317D",
    "/siostikeutkorean": "\u317C",
    "/siringusquare": "\u3321",
    "/six": "\u0036",
    "/six.inferior": "\u2086",
    "/six.roman": "\u2165",
    "/six.romansmall": "\u2175",
    "/six.superior": "\u2076",
    "/sixPointedStarMiddleDot": "\u1F52F",
    "/sixarabic": "\u0666",
    "/sixbengali": "\u09EC",
    "/sixcircle": "\u2465",
    "/sixcircledbl": "\u24FA",
    "/sixcircleinversesansserif": "\u278F",
    "/sixcomma": "\u1F107",
    "/sixdeva": "\u096C",
    "/sixdotsvertical": "\u2E3D",
    "/sixfar": "\u06F6",
    "/sixgujarati": "\u0AEC",
    "/sixgurmukhi": "\u0A6C",
    "/sixhackarabic": "\u0666",
    "/sixhangzhou": "\u3026",
    "/sixideographiccircled": "\u3285",
    "/sixideographicparen": "\u3225",
    "/sixinferior": "\u2086",
    "/sixlateform.roman": "\u2185",
    "/sixmonospace": "\uFF16",
    "/sixoldstyle": "\uF736",
    "/sixparen": "\u2479",
    "/sixparenthesized": "\u2479",
    "/sixperemspace": "\u2006",
    "/sixperiod": "\u248D",
    "/sixpersian": "\u06F6",
    "/sixroman": "\u2175",
    "/sixsuperior": "\u2076",
    "/sixteencircle": "\u246F",
    "/sixteencircleblack": "\u24F0",
    "/sixteencurrencydenominatorbengali": "\u09F9",
    "/sixteenparen": "\u2483",
    "/sixteenparenthesized": "\u2483",
    "/sixteenperiod": "\u2497",
    "/sixthai": "\u0E56",
    "/sixtycirclesquare": "\u324D",
    "/sixtypsquare": "\u1F1A3",
    "/sjekomicyr": "\u050D",
    "/skiAndSkiBoot": "\u1F3BF",
    "/skier": "\u26F7",
    "/skull": "\u1F480",
    "/skullcrossbones": "\u2620",
    "/slash": "\u002F",
    "/slashbarfunc": "\u233F",
    "/slashmonospace": "\uFF0F",
    "/sled": "\u1F6F7",
    "/sleeping": "\u1F4A4",
    "/sleepingAccommodation": "\u1F6CC",
    "/sleepingFace": "\u1F634",
    "/sleepyFace": "\u1F62A",
    "/sleuthOrSpy": "\u1F575",
    "/sliceOfPizza": "\u1F355",
    "/slightlyFrowningFace": "\u1F641",
    "/slightlySmilingFace": "\u1F642",
    "/slong": "\u017F",
    "/slongdotaccent": "\u1E9B",
    "/slope": "\u2333",
    "/slotMachine": "\u1F3B0",
    "/smallAirplane": "\u1F6E9",
    "/smallBlueDiamond": "\u1F539",
    "/smallOrangeDiamond": "\u1F538",
    "/smallRedTriangleDOwn": "\u1F53D",
    "/smallRedTriangleUp": "\u1F53C",
    "/smile": "\u2323",
    "/smileface": "\u263A",
    "/smilingCatFaceWithHeartShapedEyes": "\u1F63B",
    "/smilingCatFaceWithOpenMouth": "\u1F63A",
    "/smilingFaceWithHalo": "\u1F607",
    "/smilingFaceWithHeartShapedEyes": "\u1F60D",
    "/smilingFaceWithHorns": "\u1F608",
    "/smilingFaceWithOpenMouth": "\u1F603",
    "/smilingFaceWithOpenMouthAndColdSweat": "\u1F605",
    "/smilingFaceWithOpenMouthAndSmilingEyes": "\u1F604",
    "/smilingFaceWithOpenMouthAndTightlyClosedEyes": "\u1F606",
    "/smilingFaceWithSmilingEyes": "\u1F60A",
    "/smilingFaceWithSunglasses": "\u1F60E",
    "/smilingfaceblack": "\u263B",
    "/smilingfacewhite": "\u263A",
    "/smirkingFace": "\u1F60F",
    "/smll:ampersand": "\uFE60",
    "/smll:asterisk": "\uFE61",
    "/smll:backslash": "\uFE68",
    "/smll:braceleft": "\uFE5B",
    "/smll:braceright": "\uFE5C",
    "/smll:colon": "\uFE55",
    "/smll:comma": "\uFE50",
    "/smll:dollar": "\uFE69",
    "/smll:emdash": "\uFE58",
    "/smll:equal": "\uFE66",
    "/smll:exclam": "\uFE57",
    "/smll:greater": "\uFE65",
    "/smll:hyphen": "\uFE63",
    "/smll:ideographiccomma": "\uFE51",
    "/smll:less": "\uFE64",
    "/smll:numbersign": "\uFE5F",
    "/smll:parenthesisleft": "\uFE59",
    "/smll:parenthesisright": "\uFE5A",
    "/smll:percent": "\uFE6A",
    "/smll:period": "\uFE52",
    "/smll:plus": "\uFE62",
    "/smll:question": "\uFE56",
    "/smll:semicolon": "\uFE54",
    "/smll:tortoiseshellbracketleft": "\uFE5D",
    "/smll:tortoiseshellbracketright": "\uFE5E",
    "/smoking": "\u1F6AC",
    "/smonospace": "\uFF53",
    "/snail": "\u1F40C",
    "/snake": "\u1F40D",
    "/snowboarder": "\u1F3C2",
    "/snowcappedMountain": "\u1F3D4",
    "/snowman": "\u2603",
    "/snowmanblack": "\u26C7",
    "/snowmanoutsnow": "\u26C4",
    "/sobliquestroke": "\uA7A9",
    "/soccerball": "\u26BD",
    "/societyideographiccircled": "\u3293",
    "/societyideographicparen": "\u3233",
    "/socirclekatakana": "\u32DE",
    "/sofPasuq:hb": "\u05C3",
    "/sofpasuqhebrew": "\u05C3",
    "/softIceCream": "\u1F366",
    "/softShellFloppyDisk": "\u1F5AC",
    "/softcyr": "\u044C",
    "/softhyphen": "\u00AD",
    "/softsigncyrillic": "\u044C",
    "/softwarefunction": "\u2394",
    "/sohiragana": "\u305D",
    "/sokatakana": "\u30BD",
    "/sokatakanahalfwidth": "\uFF7F",
    "/soliduslongoverlaycmb": "\u0338",
    "/solidusshortoverlaycmb": "\u0337",
    "/solidussubsetreversepreceding": "\u27C8",
    "/solidussupersetpreceding": "\u27C9",
    "/soonRightwardsArrowAbove": "\u1F51C",
    "/sorusithai": "\u0E29",
    "/sosalathai": "\u0E28",
    "/sosothai": "\u0E0B",
    "/sossquare": "\u1F198",
    "/sosuathai": "\u0E2A",
    "/soundcopyright": "\u2117",
    "/space": "\u0020",
    "/spacehackarabic": "\u0020",
    "/spade": "\u2660",
    "/spadeblack": "\u2660",
    "/spadesuitblack": "\u2660",
    "/spadesuitwhite": "\u2664",
    "/spadewhite": "\u2664",
    "/spaghetti": "\u1F35D",
    "/sparen": "\u24AE",
    "/sparenthesized": "\u24AE",
    "/sparklingHeart": "\u1F496",
    "/speakNoEvilMonkey": "\u1F64A",
    "/speaker": "\u1F508",
    "/speakerCancellationStroke": "\u1F507",
    "/speakerOneSoundWave": "\u1F509",
    "/speakerThreeSoundWaves": "\u1F50A",
    "/speakingHeadInSilhouette": "\u1F5E3",
    "/specialideographiccircled": "\u3295",
    "/specialideographicparen": "\u3235",
    "/speechBalloon": "\u1F4AC",
    "/speedboat": "\u1F6A4",
    "/spesmilo": "\u20B7",
    "/sphericalangle": "\u2222",
    "/spider": "\u1F577",
    "/spiderWeb": "\u1F578",
    "/spiralCalendarPad": "\u1F5D3",
    "/spiralNotePad": "\u1F5D2",
    "/spiralShell": "\u1F41A",
    "/splashingSweat": "\u1F4A6",
    "/sportsMedal": "\u1F3C5",
    "/spoutingWhale": "\u1F433",
    "/sppl:tildevertical": "\u2E2F",
    "/squarebelowcmb": "\u033B",
    "/squareblack": "\u25A0",
    "/squarebracketleftvertical": "\uFE47",
    "/squarebracketrightvertical": "\uFE48",
    "/squarecap": "\u2293",
    "/squarecc": "\u33C4",
    "/squarecm": "\u339D",
    "/squarecup": "\u2294",
    "/squareddotoperator": "\u22A1",
    "/squarediagonalcrosshatchfill": "\u25A9",
    "/squaredj": "\u1F190",
    "/squaredkey": "\u26BF",
    "/squaredminus": "\u229F",
    "/squaredplus": "\u229E",
    "/squaredsaltire": "\u26DD",
    "/squaredtimes": "\u22A0",
    "/squarefourcorners": "\u26F6",
    "/squarehalfleftblack": "\u25E7",
    "/squarehalfrightblack": "\u25E8",
    "/squarehorizontalfill": "\u25A4",
    "/squareimage": "\u228F",
    "/squareimageorequal": "\u2291",
    "/squareimageornotequal": "\u22E4",
    "/squarekg": "\u338F",
    "/squarekm": "\u339E",
    "/squarekmcapital": "\u33CE",
    "/squareln": "\u33D1",
    "/squarelog": "\u33D2",
    "/squarelowerdiagonalhalfrightblack": "\u25EA",
    "/squaremediumblack": "\u25FC",
    "/squaremediumwhite": "\u25FB",
    "/squaremg": "\u338E",
    "/squaremil": "\u33D5",
    "/squaremm": "\u339C",
    "/squaremsquared": "\u33A1",
    "/squareoriginal": "\u2290",
    "/squareoriginalorequal": "\u2292",
    "/squareoriginalornotequal": "\u22E5",
    "/squareorthogonalcrosshatchfill": "\u25A6",
    "/squareraised": "\u2E0B",
    "/squaresmallblack": "\u25AA",
    "/squaresmallmediumblack": "\u25FE",
    "/squaresmallmediumwhite": "\u25FD",
    "/squaresmallwhite": "\u25AB",
    "/squareupperdiagonalhalfleftblack": "\u25E9",
    "/squareupperlefttolowerrightfill": "\u25A7",
    "/squareupperrighttolowerleftfill": "\u25A8",
    "/squareverticalfill": "\u25A5",
    "/squarewhite": "\u25A1",
    "/squarewhitebisectinglinevertical": "\u25EB",
    "/squarewhitelowerquadrantleft": "\u25F1",
    "/squarewhitelowerquadrantright": "\u25F2",
    "/squarewhiteround": "\u25A2",
    "/squarewhiteupperquadrantleft": "\u25F0",
    "/squarewhiteupperquadrantright": "\u25F3",
    "/squarewhitewithsmallblack": "\u25A3",
    "/squarewhitewithsquaresmallblack": "\u25A3",
    "/squishquadfunc": "\u2337",
    "/srfullwidth": "\u33DB",
    "/srsquare": "\u33DB",
    "/ssabengali": "\u09B7",
    "/ssadeva": "\u0937",
    "/ssagujarati": "\u0AB7",
    "/ssangcieuckorean": "\u3149",
    "/ssanghieuhkorean": "\u3185",
    "/ssangieungkorean": "\u3180",
    "/ssangkiyeokkorean": "\u3132",
    "/ssangnieunkorean": "\u3165",
    "/ssangpieupkorean": "\u3143",
    "/ssangsioskorean": "\u3146",
    "/ssangtikeutkorean": "\u3138",
    "/ssuperior": "\uF6F2",
    "/ssupmod": "\u02E2",
    "/sswashtail": "\u023F",
    "/stackedcommadbl": "\u2E49",
    "/stadium": "\u1F3DF",
    "/staffofaesculapius": "\u2695",
    "/staffofhermes": "\u269A",
    "/stampedEnvelope": "\u1F583",
    "/star": "\u22C6",
    "/starblack": "\u2605",
    "/starcrescent": "\u262A",
    "/stardiaeresisfunc": "\u2363",
    "/starequals": "\u225B",
    "/staroperator": "\u22C6",
    "/staroutlinedwhite": "\u269D",
    "/starwhite": "\u2606",
    "/station": "\u1F689",
    "/statueOfLiberty": "\u1F5FD",
    "/steamLocomotive": "\u1F682",
    "/steamingBowl": "\u1F35C",
    "/stenographicfullstop": "\u2E3C",
    "/sterling": "\u00A3",
    "/sterlingmonospace": "\uFFE1",
    "/stigma": "\u03DB",
    "/stiletildefunc": "\u236D",
    "/stockChart": "\u1F5E0",
    "/stockideographiccircled": "\u3291",
    "/stockideographicparen": "\u3231",
    "/stopabove": "\u06EB",
    "/stopbelow": "\u06EA",
    "/straightRuler": "\u1F4CF",
    "/straightness": "\u23E4",
    "/strawberry": "\u1F353",
    "/stresslowtonemod": "\uA721",
    "/stresstonemod": "\uA720",
    "/strictlyequivalent": "\u2263",
    "/strokelongoverlaycmb": "\u0336",
    "/strokeshortoverlaycmb": "\u0335",
    "/studioMicrophone": "\u1F399",
    "/studyideographiccircled": "\u32AB",
    "/studyideographicparen": "\u323B",
    "/stupa": "\u1F6D3",
    "/subscriptalef": "\u0656",
    "/subset": "\u2282",
    "/subsetdbl": "\u22D0",
    "/subsetnotequal": "\u228A",
    "/subsetorequal": "\u2286",
    "/succeeds": "\u227B",
    "/succeedsbutnotequivalent": "\u22E9",
    "/succeedsorequal": "\u227D",
    "/succeedsorequivalent": "\u227F",
    "/succeedsunderrelation": "\u22B1",
    "/suchthat": "\u220B",
    "/sucirclekatakana": "\u32DC",
    "/suhiragana": "\u3059",
    "/suitableideographiccircled": "\u329C",
    "/sukatakana": "\u30B9",
    "/sukatakanahalfwidth": "\uFF7D",
    "/sukumendutvowel": "\uA9B9",
    "/sukunIsol": "\uFE7E",
    "/sukunMedi": "\uFE7F",
    "/sukunarabic": "\u0652",
    "/sukuvowel": "\uA9B8",
    "/summation": "\u2211",
    "/summationbottom": "\u23B3",
    "/summationdblstruck": "\u2140",
    "/summationtop": "\u23B2",
    "/sun": "\u263C",
    "/sunFace": "\u1F31E",
    "/sunbehindcloud": "\u26C5",
    "/sunflower": "\u1F33B",
    "/sunideographiccircled": "\u3290",
    "/sunideographicparen": "\u3230",
    "/sunraysblack": "\u2600",
    "/sunrayswhite": "\u263C",
    "/sunrise": "\u1F305",
    "/sunriseOverMountains": "\u1F304",
    "/sunsetOverBuildings": "\u1F307",
    "/superset": "\u2283",
    "/supersetnotequal": "\u228B",
    "/supersetorequal": "\u2287",
    "/superviseideographiccircled": "\u32AC",
    "/superviseideographicparen": "\u323C",
    "/surfer": "\u1F3C4",
    "/sushi": "\u1F363",
    "/suspensionRailway": "\u1F69F",
    "/suspensiondbl": "\u2E44",
    "/svfullwidth": "\u33DC",
    "/svsquare": "\u33DC",
    "/swatchtop": "\u23F1",
    "/swimmer": "\u1F3CA",
    "/swungdash": "\u2053",
    "/symbolabovethreedotsabove": "\uFBB6",
    "/symbolbelowthreedotsabove": "\uFBB7",
    "/symboldotabove": "\uFBB2",
    "/symboldotbelow": "\uFBB3",
    "/symboldoubleverticalbarbelow": "\uFBBC",
    "/symbolfourdotsabove": "\uFBBA",
    "/symbolfourdotsbelow": "\uFBBB",
    "/symbolpointingabovedownthreedotsabove": "\uFBB8",
    "/symbolpointingbelowdownthreedotsabove": "\uFBB9",
    "/symbolring": "\uFBBF",
    "/symboltahabovesmall": "\uFBC0",
    "/symboltahbelowsmall": "\uFBC1",
    "/symboltwodotsabove": "\uFBB4",
    "/symboltwodotsbelow": "\uFBB5",
    "/symboltwodotsverticallyabove": "\uFBBD",
    "/symboltwodotsverticallybelow": "\uFBBE",
    "/symmetry": "\u232F",
    "/synagogue": "\u1F54D",
    "/syouwaerasquare": "\u337C",
    "/syringe": "\u1F489",
    "/t": "\u0074",
    "/t-shirt": "\u1F455",
    "/t.inferior": "\u209C",
    "/tabengali": "\u09A4",
    "/tableTennisPaddleAndBall": "\u1F3D3",
    "/tacirclekatakana": "\u32DF",
    "/tackcircleaboveup": "\u27DF",
    "/tackdiaeresisupfunc": "\u2361",
    "/tackdown": "\u22A4",
    "/tackdownmod": "\u02D5",
    "/tackjotdownfunc": "\u234E",
    "/tackjotupfunc": "\u2355",
    "/tackleft": "\u22A3",
    "/tackleftright": "\u27DB",
    "/tackoverbarupfunc": "\u2351",
    "/tackright": "\u22A2",
    "/tackunderlinedownfunc": "\u234A",
    "/tackup": "\u22A5",
    "/tackupmod": "\u02D4",
    "/taco": "\u1F32E",
    "/tadeva": "\u0924",
    "/tagujarati": "\u0AA4",
    "/tagurmukhi": "\u0A24",
    "/tah": "\u0637",
    "/tah.fina": "\uFEC2",
    "/tah.init": "\uFEC3",
    "/tah.init_alefmaksura.fina": "\uFCF5",
    "/tah.init_hah.fina": "\uFC26",
    "/tah.init_hah.medi": "\uFCB8",
    "/tah.init_meem.fina": "\uFC27",
    "/tah.init_meem.medi": "\uFD33",
    "/tah.init_meem.medi_hah.medi": "\uFD72",
    "/tah.init_meem.medi_meem.medi": "\uFD73",
    "/tah.init_yeh.fina": "\uFCF6",
    "/tah.isol": "\uFEC1",
    "/tah.medi": "\uFEC4",
    "/tah.medi_alefmaksura.fina": "\uFD11",
    "/tah.medi_meem.medi": "\uFD3A",
    "/tah.medi_meem.medi_hah.fina": "\uFD71",
    "/tah.medi_meem.medi_yeh.fina": "\uFD74",
    "/tah.medi_yeh.fina": "\uFD12",
    "/tahabove": "\u0615",
    "/taharabic": "\u0637",
    "/tahfinalarabic": "\uFEC2",
    "/tahinitialarabic": "\uFEC3",
    "/tahiragana": "\u305F",
    "/tahmedialarabic": "\uFEC4",
    "/tahthreedotsabove": "\u069F",
    "/taisyouerasquare": "\u337D",
    "/takatakana": "\u30BF",
    "/takatakanahalfwidth": "\uFF80",
    "/takhallus": "\u0614",
    "/talingvowel": "\uA9BA",
    "/taml:a": "\u0B85",
    "/taml:aa": "\u0B86",
    "/taml:aasign": "\u0BBE",
    "/taml:ai": "\u0B90",
    "/taml:aisign": "\u0BC8",
    "/taml:anusvarasign": "\u0B82",
    "/taml:asabovesign": "\u0BF8",
    "/taml:au": "\u0B94",
    "/taml:aulengthmark": "\u0BD7",
    "/taml:ausign": "\u0BCC",
    "/taml:ca": "\u0B9A",
    "/taml:creditsign": "\u0BF7",
    "/taml:daysign": "\u0BF3",
    "/taml:debitsign": "\u0BF6",
    "/taml:e": "\u0B8E",
    "/taml:ee": "\u0B8F",
    "/taml:eesign": "\u0BC7",
    "/taml:eight": "\u0BEE",
    "/taml:esign": "\u0BC6",
    "/taml:five": "\u0BEB",
    "/taml:four": "\u0BEA",
    "/taml:ha": "\u0BB9",
    "/taml:i": "\u0B87",
    "/taml:ii": "\u0B88",
    "/taml:iisign": "\u0BC0",
    "/taml:isign": "\u0BBF",
    "/taml:ja": "\u0B9C",
    "/taml:ka": "\u0B95",
    "/taml:la": "\u0BB2",
    "/taml:lla": "\u0BB3",
    "/taml:llla": "\u0BB4",
    "/taml:ma": "\u0BAE",
    "/taml:monthsign": "\u0BF4",
    "/taml:na": "\u0BA8",
    "/taml:nga": "\u0B99",
    "/taml:nine": "\u0BEF",
    "/taml:nna": "\u0BA3",
    "/taml:nnna": "\u0BA9",
    "/taml:nya": "\u0B9E",
    "/taml:o": "\u0B92",
    "/taml:om": "\u0BD0",
    "/taml:one": "\u0BE7",
    "/taml:onehundred": "\u0BF1",
    "/taml:onethousand": "\u0BF2",
    "/taml:oo": "\u0B93",
    "/taml:oosign": "\u0BCB",
    "/taml:osign": "\u0BCA",
    "/taml:pa": "\u0BAA",
    "/taml:ra": "\u0BB0",
    "/taml:rra": "\u0BB1",
    "/taml:rupeesign": "\u0BF9",
    "/taml:sa": "\u0BB8",
    "/taml:seven": "\u0BED",
    "/taml:sha": "\u0BB6",
    "/taml:sign": "\u0BFA",
    "/taml:six": "\u0BEC",
    "/taml:ssa": "\u0BB7",
    "/taml:ta": "\u0BA4",
    "/taml:ten": "\u0BF0",
    "/taml:three": "\u0BE9",
    "/taml:tta": "\u0B9F",
    "/taml:two": "\u0BE8",
    "/taml:u": "\u0B89",
    "/taml:usign": "\u0BC1",
    "/taml:uu": "\u0B8A",
    "/taml:uusign": "\u0BC2",
    "/taml:va": "\u0BB5",
    "/taml:viramasign": "\u0BCD",
    "/taml:visargasign": "\u0B83",
    "/taml:ya": "\u0BAF",
    "/taml:yearsign": "\u0BF5",
    "/taml:zero": "\u0BE6",
    "/tamurda": "\uA9A1",
    "/tanabataTree": "\u1F38B",
    "/tangerine": "\u1F34A",
    "/tapeCartridge": "\u1F5AD",
    "/tarungvowel": "\uA9B4",
    "/tatweelFathatanAbove": "\uFE71",
    "/tatweelarabic": "\u0640",
    "/tau": "\u03C4",
    "/taurus": "\u2649",
    "/tav": "\u05EA",
    "/tav:hb": "\u05EA",
    "/tavdages": "\uFB4A",
    "/tavdagesh": "\uFB4A",
    "/tavdageshhebrew": "\uFB4A",
    "/tavhebrew": "\u05EA",
    "/tavwide:hb": "\uFB28",
    "/tavwithdagesh:hb": "\uFB4A",
    "/taxi": "\u1F695",
    "/tbar": "\u0167",
    "/tbopomofo": "\u310A",
    "/tcaron": "\u0165",
    "/tccurl": "\u02A8",
    "/tcedilla": "\u0163",
    "/tcheh": "\u0686",
    "/tcheh.fina": "\uFB7B",
    "/tcheh.init": "\uFB7C",
    "/tcheh.isol": "\uFB7A",
    "/tcheh.medi": "\uFB7D",
    "/tcheharabic": "\u0686",
    "/tchehdotabove": "\u06BF",
    "/tcheheh": "\u0687",
    "/tcheheh.fina": "\uFB7F",
    "/tcheheh.init": "\uFB80",
    "/tcheheh.isol": "\uFB7E",
    "/tcheheh.medi": "\uFB81",
    "/tchehfinalarabic": "\uFB7B",
    "/tchehinitialarabic": "\uFB7C",
    "/tchehmedialarabic": "\uFB7D",
    "/tchehmeeminitialarabic": "\uFB7C",
    "/tcircle": "\u24E3",
    "/tcircumflexbelow": "\u1E71",
    "/tcommaaccent": "\u0163",
    "/tcurl": "\u0236",
    "/tdieresis": "\u1E97",
    "/tdot": "\u1E6B",
    "/tdotaccent": "\u1E6B",
    "/tdotbelow": "\u1E6D",
    "/teacupOutHandle": "\u1F375",
    "/tear-offCalendar": "\u1F4C6",
    "/tecirclekatakana": "\u32E2",
    "/tecyr": "\u0442",
    "/tecyrillic": "\u0442",
    "/tedescendercyrillic": "\u04AD",
    "/teh": "\u062A",
    "/teh.fina": "\uFE96",
    "/teh.init": "\uFE97",
    "/teh.init_alefmaksura.fina": "\uFC0F",
    "/teh.init_hah.fina": "\uFC0C",
    "/teh.init_hah.medi": "\uFCA2",
    "/teh.init_hah.medi_jeem.medi": "\uFD52",
    "/teh.init_hah.medi_meem.medi": "\uFD53",
    "/teh.init_heh.medi": "\uFCA5",
    "/teh.init_jeem.fina": "\uFC0B",
    "/teh.init_jeem.medi": "\uFCA1",
    "/teh.init_jeem.medi_meem.medi": "\uFD50",
    "/teh.init_khah.fina": "\uFC0D",
    "/teh.init_khah.medi": "\uFCA3",
    "/teh.init_khah.medi_meem.medi": "\uFD54",
    "/teh.init_meem.fina": "\uFC0E",
    "/teh.init_meem.medi": "\uFCA4",
    "/teh.init_meem.medi_hah.medi": "\uFD56",
    "/teh.init_meem.medi_jeem.medi": "\uFD55",
    "/teh.init_meem.medi_khah.medi": "\uFD57",
    "/teh.init_yeh.fina": "\uFC10",
    "/teh.isol": "\uFE95",
    "/teh.medi": "\uFE98",
    "/teh.medi_alefmaksura.fina": "\uFC74",
    "/teh.medi_hah.medi_jeem.fina": "\uFD51",
    "/teh.medi_heh.medi": "\uFCE4",
    "/teh.medi_jeem.medi_alefmaksura.fina": "\uFDA0",
    "/teh.medi_jeem.medi_yeh.fina": "\uFD9F",
    "/teh.medi_khah.medi_alefmaksura.fina": "\uFDA2",
    "/teh.medi_khah.medi_yeh.fina": "\uFDA1",
    "/teh.medi_meem.fina": "\uFC72",
    "/teh.medi_meem.medi": "\uFCE3",
    "/teh.medi_meem.medi_alefmaksura.fina": "\uFDA4",
    "/teh.medi_meem.medi_yeh.fina": "\uFDA3",
    "/teh.medi_noon.fina": "\uFC73",
    "/teh.medi_reh.fina": "\uFC70",
    "/teh.medi_yeh.fina": "\uFC75",
    "/teh.medi_zain.fina": "\uFC71",
    "/teharabic": "\u062A",
    "/tehdownthreedotsabove": "\u067D",
    "/teheh": "\u067F",
    "/teheh.fina": "\uFB63",
    "/teheh.init": "\uFB64",
    "/teheh.isol": "\uFB62",
    "/teheh.medi": "\uFB65",
    "/tehfinalarabic": "\uFE96",
    "/tehhahinitialarabic": "\uFCA2",
    "/tehhahisolatedarabic": "\uFC0C",
    "/tehinitialarabic": "\uFE97",
    "/tehiragana": "\u3066",
    "/tehjeeminitialarabic": "\uFCA1",
    "/tehjeemisolatedarabic": "\uFC0B",
    "/tehmarbuta": "\u0629",
    "/tehmarbuta.fina": "\uFE94",
    "/tehmarbuta.isol": "\uFE93",
    "/tehmarbutaarabic": "\u0629",
    "/tehmarbutafinalarabic": "\uFE94",
    "/tehmarbutagoal": "\u06C3",
    "/tehmedialarabic": "\uFE98",
    "/tehmeeminitialarabic": "\uFCA4",
    "/tehmeemisolatedarabic": "\uFC0E",
    "/tehnoonfinalarabic": "\uFC73",
    "/tehring": "\u067C",
    "/tekatakana": "\u30C6",
    "/tekatakanahalfwidth": "\uFF83",
    "/telephone": "\u2121",
    "/telephoneOnTopOfModem": "\u1F580",
    "/telephoneReceiver": "\u1F4DE",
    "/telephoneReceiverPage": "\u1F57C",
    "/telephoneblack": "\u260E",
    "/telephonerecorder": "\u2315",
    "/telephonewhite": "\u260F",
    "/telescope": "\u1F52D",
    "/television": "\u1F4FA",
    "/telishaGedolah:hb": "\u05A0",
    "/telishaQetannah:hb": "\u05A9",
    "/telishagedolahebrew": "\u05A0",
    "/telishaqetanahebrew": "\u05A9",
    "/telu:a": "\u0C05",
    "/telu:aa": "\u0C06",
    "/telu:aasign": "\u0C3E",
    "/telu:ai": "\u0C10",
    "/telu:ailengthmark": "\u0C56",
    "/telu:aisign": "\u0C48",
    "/telu:anusvarasign": "\u0C02",
    "/telu:au": "\u0C14",
    "/telu:ausign": "\u0C4C",
    "/telu:avagrahasign": "\u0C3D",
    "/telu:ba": "\u0C2C",
    "/telu:bha": "\u0C2D",
    "/telu:bindusigncandra": "\u0C01",
    "/telu:ca": "\u0C1A",
    "/telu:cha": "\u0C1B",
    "/telu:combiningbinduabovesigncandra": "\u0C00",
    "/telu:da": "\u0C26",
    "/telu:dda": "\u0C21",
    "/telu:ddha": "\u0C22",
    "/telu:dha": "\u0C27",
    "/telu:dza": "\u0C59",
    "/telu:e": "\u0C0E",
    "/telu:ee": "\u0C0F",
    "/telu:eesign": "\u0C47",
    "/telu:eight": "\u0C6E",
    "/telu:esign": "\u0C46",
    "/telu:five": "\u0C6B",
    "/telu:four": "\u0C6A",
    "/telu:fractiononeforevenpowersoffour": "\u0C7C",
    "/telu:fractiononeforoddpowersoffour": "\u0C79",
    "/telu:fractionthreeforevenpowersoffour": "\u0C7E",
    "/telu:fractionthreeforoddpowersoffour": "\u0C7B",
    "/telu:fractiontwoforevenpowersoffour": "\u0C7D",
    "/telu:fractiontwoforoddpowersoffour": "\u0C7A",
    "/telu:fractionzeroforoddpowersoffour": "\u0C78",
    "/telu:ga": "\u0C17",
    "/telu:gha": "\u0C18",
    "/telu:ha": "\u0C39",
    "/telu:i": "\u0C07",
    "/telu:ii": "\u0C08",
    "/telu:iisign": "\u0C40",
    "/telu:isign": "\u0C3F",
    "/telu:ja": "\u0C1C",
    "/telu:jha": "\u0C1D",
    "/telu:ka": "\u0C15",
    "/telu:kha": "\u0C16",
    "/telu:la": "\u0C32",
    "/telu:lengthmark": "\u0C55",
    "/telu:lla": "\u0C33",
    "/telu:llla": "\u0C34",
    "/telu:llsignvocal": "\u0C63",
    "/telu:llvocal": "\u0C61",
    "/telu:lsignvocal": "\u0C62",
    "/telu:lvocal": "\u0C0C",
    "/telu:ma": "\u0C2E",
    "/telu:na": "\u0C28",
    "/telu:nga": "\u0C19",
    "/telu:nine": "\u0C6F",
    "/telu:nna": "\u0C23",
    "/telu:nya": "\u0C1E",
    "/telu:o": "\u0C12",
    "/telu:one": "\u0C67",
    "/telu:oo": "\u0C13",
    "/telu:oosign": "\u0C4B",
    "/telu:osign": "\u0C4A",
    "/telu:pa": "\u0C2A",
    "/telu:pha": "\u0C2B",
    "/telu:ra": "\u0C30",
    "/telu:rra": "\u0C31",
    "/telu:rrra": "\u0C5A",
    "/telu:rrsignvocal": "\u0C44",
    "/telu:rrvocal": "\u0C60",
    "/telu:rsignvocal": "\u0C43",
    "/telu:rvocal": "\u0C0B",
    "/telu:sa": "\u0C38",
    "/telu:seven": "\u0C6D",
    "/telu:sha": "\u0C36",
    "/telu:six": "\u0C6C",
    "/telu:ssa": "\u0C37",
    "/telu:ta": "\u0C24",
    "/telu:tha": "\u0C25",
    "/telu:three": "\u0C69",
    "/telu:tsa": "\u0C58",
    "/telu:tta": "\u0C1F",
    "/telu:ttha": "\u0C20",
    "/telu:tuumusign": "\u0C7F",
    "/telu:two": "\u0C68",
    "/telu:u": "\u0C09",
    "/telu:usign": "\u0C41",
    "/telu:uu": "\u0C0A",
    "/telu:uusign": "\u0C42",
    "/telu:va": "\u0C35",
    "/telu:viramasign": "\u0C4D",
    "/telu:visargasign": "\u0C03",
    "/telu:ya": "\u0C2F",
    "/telu:zero": "\u0C66",
    "/ten.roman": "\u2169",
    "/ten.romansmall": "\u2179",
    "/tencircle": "\u2469",
    "/tencircledbl": "\u24FE",
    "/tencirclesquare": "\u3248",
    "/tenge": "\u20B8",
    "/tenhangzhou": "\u3038",
    "/tenideographiccircled": "\u3289",
    "/tenideographicparen": "\u3229",
    "/tennisRacquetAndBall": "\u1F3BE",
    "/tenparen": "\u247D",
    "/tenparenthesized": "\u247D",
    "/tenperiod": "\u2491",
    "/tenroman": "\u2179",
    "/tent": "\u26FA",
    "/tenthousand.roman": "\u2182",
    "/tesh": "\u02A7",
    "/tet": "\u05D8",
    "/tet:hb": "\u05D8",
    "/tetailcyr": "\u04AD",
    "/tetdagesh": "\uFB38",
    "/tetdageshhebrew": "\uFB38",
    "/tethebrew": "\u05D8",
    "/tetrasememetrical": "\u23D8",
    "/tetsecyr": "\u04B5",
    "/tetsecyrillic": "\u04B5",
    "/tetwithdagesh:hb": "\uFB38",
    "/tevir:hb": "\u059B",
    "/tevirhebrew": "\u059B",
    "/tevirlefthebrew": "\u059B",
    "/thabengali": "\u09A5",
    "/thadeva": "\u0925",
    "/thagujarati": "\u0AA5",
    "/thagurmukhi": "\u0A25",
    "/thai:angkhankhu": "\u0E5A",
    "/thai:baht": "\u0E3F",
    "/thai:bobaimai": "\u0E1A",
    "/thai:chochan": "\u0E08",
    "/thai:chochang": "\u0E0A",
    "/thai:choching": "\u0E09",
    "/thai:chochoe": "\u0E0C",
    "/thai:dochada": "\u0E0E",
    "/thai:dodek": "\u0E14",
    "/thai:eight": "\u0E58",
    "/thai:five": "\u0E55",
    "/thai:fofa": "\u0E1D",
    "/thai:fofan": "\u0E1F",
    "/thai:fongman": "\u0E4F",
    "/thai:four": "\u0E54",
    "/thai:hohip": "\u0E2B",
    "/thai:honokhuk": "\u0E2E",
    "/thai:khokhai": "\u0E02",
    "/thai:khokhon": "\u0E05",
    "/thai:khokhuat": "\u0E03",
    "/thai:khokhwai": "\u0E04",
    "/thai:khomut": "\u0E5B",
    "/thai:khorakhang": "\u0E06",
    "/thai:kokai": "\u0E01",
    "/thai:lakkhangyao": "\u0E45",
    "/thai:lochula": "\u0E2C",
    "/thai:loling": "\u0E25",
    "/thai:lu": "\u0E26",
    "/thai:maichattawa": "\u0E4B",
    "/thai:maiek": "\u0E48",
    "/thai:maihan-akat": "\u0E31",
    "/thai:maitaikhu": "\u0E47",
    "/thai:maitho": "\u0E49",
    "/thai:maitri": "\u0E4A",
    "/thai:maiyamok": "\u0E46",
    "/thai:moma": "\u0E21",
    "/thai:ngongu": "\u0E07",
    "/thai:nikhahit": "\u0E4D",
    "/thai:nine": "\u0E59",
    "/thai:nonen": "\u0E13",
    "/thai:nonu": "\u0E19",
    "/thai:oang": "\u0E2D",
    "/thai:one": "\u0E51",
    "/thai:paiyannoi": "\u0E2F",
    "/thai:phinthu": "\u0E3A",
    "/thai:phophan": "\u0E1E",
    "/thai:phophung": "\u0E1C",
    "/thai:phosamphao": "\u0E20",
    "/thai:popla": "\u0E1B",
    "/thai:rorua": "\u0E23",
    "/thai:ru": "\u0E24",
    "/thai:saraa": "\u0E30",
    "/thai:saraaa": "\u0E32",
    "/thai:saraae": "\u0E41",
    "/thai:saraaimaimalai": "\u0E44",
    "/thai:saraaimaimuan": "\u0E43",
    "/thai:saraam": "\u0E33",
    "/thai:sarae": "\u0E40",
    "/thai:sarai": "\u0E34",
    "/thai:saraii": "\u0E35",
    "/thai:sarao": "\u0E42",
    "/thai:sarau": "\u0E38",
    "/thai:saraue": "\u0E36",
    "/thai:sarauee": "\u0E37",
    "/thai:sarauu": "\u0E39",
    "/thai:seven": "\u0E57",
    "/thai:six": "\u0E56",
    "/thai:sorusi": "\u0E29",
    "/thai:sosala": "\u0E28",
    "/thai:soso": "\u0E0B",
    "/thai:sosua": "\u0E2A",
    "/thai:thanthakhat": "\u0E4C",
    "/thai:thonangmontho": "\u0E11",
    "/thai:thophuthao": "\u0E12",
    "/thai:thothahan": "\u0E17",
    "/thai:thothan": "\u0E10",
    "/thai:thothong": "\u0E18",
    "/thai:thothung": "\u0E16",
    "/thai:three": "\u0E53",
    "/thai:topatak": "\u0E0F",
    "/thai:totao": "\u0E15",
    "/thai:two": "\u0E52",
    "/thai:wowaen": "\u0E27",
    "/thai:yamakkan": "\u0E4E",
    "/thai:yoyak": "\u0E22",
    "/thai:yoying": "\u0E0D",
    "/thai:zero": "\u0E50",
    "/thal": "\u0630",
    "/thal.fina": "\uFEAC",
    "/thal.init_superscriptalef.fina": "\uFC5B",
    "/thal.isol": "\uFEAB",
    "/thalarabic": "\u0630",
    "/thalfinalarabic": "\uFEAC",
    "/thanthakhatlowleftthai": "\uF898",
    "/thanthakhatlowrightthai": "\uF897",
    "/thanthakhatthai": "\u0E4C",
    "/thanthakhatupperleftthai": "\uF896",
    "/theh": "\u062B",
    "/theh.fina": "\uFE9A",
    "/theh.init": "\uFE9B",
    "/theh.init_alefmaksura.fina": "\uFC13",
    "/theh.init_jeem.fina": "\uFC11",
    "/theh.init_meem.fina": "\uFC12",
    "/theh.init_meem.medi": "\uFCA6",
    "/theh.init_yeh.fina": "\uFC14",
    "/theh.isol": "\uFE99",
    "/theh.medi": "\uFE9C",
    "/theh.medi_alefmaksura.fina": "\uFC7A",
    "/theh.medi_heh.medi": "\uFCE6",
    "/theh.medi_meem.fina": "\uFC78",
    "/theh.medi_meem.medi": "\uFCE5",
    "/theh.medi_noon.fina": "\uFC79",
    "/theh.medi_reh.fina": "\uFC76",
    "/theh.medi_yeh.fina": "\uFC7B",
    "/theh.medi_zain.fina": "\uFC77",
    "/theharabic": "\u062B",
    "/thehfinalarabic": "\uFE9A",
    "/thehinitialarabic": "\uFE9B",
    "/thehmedialarabic": "\uFE9C",
    "/thereexists": "\u2203",
    "/therefore": "\u2234",
    "/thermometer": "\u1F321",
    "/theta": "\u03B8",
    "/theta.math": "\u03D1",
    "/theta1": "\u03D1",
    "/thetasymbolgreek": "\u03D1",
    "/thieuthacirclekorean": "\u3279",
    "/thieuthaparenkorean": "\u3219",
    "/thieuthcirclekorean": "\u326B",
    "/thieuthkorean": "\u314C",
    "/thieuthparenkorean": "\u320B",
    "/thinspace": "\u2009",
    "/thirteencircle": "\u246C",
    "/thirteencircleblack": "\u24ED",
    "/thirteenparen": "\u2480",
    "/thirteenparenthesized": "\u2480",
    "/thirteenperiod": "\u2494",
    "/thirtycircle": "\u325A",
    "/thirtycirclesquare": "\u324A",
    "/thirtyeightcircle": "\u32B3",
    "/thirtyfivecircle": "\u325F",
    "/thirtyfourcircle": "\u325E",
    "/thirtyhangzhou": "\u303A",
    "/thirtyninecircle": "\u32B4",
    "/thirtyonecircle": "\u325B",
    "/thirtysevencircle": "\u32B2",
    "/thirtysixcircle": "\u32B1",
    "/thirtythreecircle": "\u325D",
    "/thirtytwocircle": "\u325C",
    "/thonangmonthothai": "\u0E11",
    "/thook": "\u01AD",
    "/thophuthaothai": "\u0E12",
    "/thorn": "\u00FE",
    "/thornstroke": "\uA765",
    "/thornstrokedescender": "\uA767",
    "/thothahanthai": "\u0E17",
    "/thothanthai": "\u0E10",
    "/thothongthai": "\u0E18",
    "/thothungthai": "\u0E16",
    "/thoughtBalloon": "\u1F4AD",
    "/thousandcyrillic": "\u0482",
    "/thousandscyr": "\u0482",
    "/thousandsseparator": "\u066C",
    "/thousandsseparatorarabic": "\u066C",
    "/thousandsseparatorpersian": "\u066C",
    "/three": "\u0033",
    "/three.inferior": "\u2083",
    "/three.roman": "\u2162",
    "/three.romansmall": "\u2172",
    "/threeButtonMouse": "\u1F5B1",
    "/threeNetworkedComputers": "\u1F5A7",
    "/threeRaysAbove": "\u1F5E4",
    "/threeRaysBelow": "\u1F5E5",
    "/threeRaysLeft": "\u1F5E6",
    "/threeRaysRight": "\u1F5E7",
    "/threeSpeechBubbles": "\u1F5EB",
    "/threearabic": "\u0663",
    "/threebengali": "\u09E9",
    "/threecircle": "\u2462",
    "/threecircledbl": "\u24F7",
    "/threecircleinversesansserif": "\u278C",
    "/threecomma": "\u1F104",
    "/threedeva": "\u0969",
    "/threedimensionalangle": "\u27C0",
    "/threedotpunctuation": "\u2056",
    "/threedotsaboveabove": "\u06DB",
    "/threedsquare": "\u1F19B",
    "/threeeighths": "\u215C",
    "/threefar": "\u06F3",
    "/threefifths": "\u2157",
    "/threegujarati": "\u0AE9",
    "/threegurmukhi": "\u0A69",
    "/threehackarabic": "\u0663",
    "/threehangzhou": "\u3023",
    "/threeideographiccircled": "\u3282",
    "/threeideographicparen": "\u3222",
    "/threeinferior": "\u2083",
    "/threelinesconvergingleft": "\u269F",
    "/threelinesconvergingright": "\u269E",
    "/threemonospace": "\uFF13",
    "/threenumeratorbengali": "\u09F6",
    "/threeoldstyle": "\uF733",
    "/threeparen": "\u2476",
    "/threeparenthesized": "\u2476",
    "/threeperemspace": "\u2004",
    "/threeperiod": "\u248A",
    "/threepersian": "\u06F3",
    "/threequarters": "\u00BE",
    "/threequartersemdash": "\uF6DE",
    "/threerightarrows": "\u21F6",
    "/threeroman": "\u2172",
    "/threesuperior": "\u00B3",
    "/threethai": "\u0E53",
    "/thumbsDownSign": "\u1F44E",
    "/thumbsUpSign": "\u1F44D",
    "/thundercloudrain": "\u26C8",
    "/thunderstorm": "\u2608",
    "/thzfullwidth": "\u3394",
    "/thzsquare": "\u3394",
    "/tibt:AA": "\u0F60",
    "/tibt:a": "\u0F68",
    "/tibt:aavowelsign": "\u0F71",
    "/tibt:angkhanggyasmark": "\u0F3D",
    "/tibt:angkhanggyonmark": "\u0F3C",
    "/tibt:astrologicalkhyudpasign": "\u0F18",
    "/tibt:astrologicalsdongtshugssign": "\u0F19",
    "/tibt:astrologicalsgragcancharrtagssign": "\u0F17",
    "/tibt:asubjoined": "\u0FB8",
    "/tibt:ba": "\u0F56",
    "/tibt:basubjoined": "\u0FA6",
    "/tibt:bha": "\u0F57",
    "/tibt:bhasubjoined": "\u0FA7",
    "/tibt:bkashogyigmgomark": "\u0F0A",
    "/tibt:brdarnyingyigmgomdunmainitialmark": "\u0FD3",
    "/tibt:brdarnyingyigmgosgabmaclosingmark": "\u0FD4",
    "/tibt:bsdusrtagsmark": "\u0F34",
    "/tibt:bskashoggimgorgyanmark": "\u0FD0",
    "/tibt:bskuryigmgomark": "\u0F09",
    "/tibt:ca": "\u0F45",
    "/tibt:cangteucantillationsign": "\u0FC2",
    "/tibt:caretdzudrtagsbzhimigcanmark": "\u0F36",
    "/tibt:caretdzudrtagsmelongcanmark": "\u0F13",
    "/tibt:caretyigmgophurshadmamark": "\u0F06",
    "/tibt:casubjoined": "\u0F95",
    "/tibt:cha": "\u0F46",
    "/tibt:chadrtagslogotypesign": "\u0F15",
    "/tibt:chasubjoined": "\u0F96",
    "/tibt:chemgomark": "\u0F38",
    "/tibt:da": "\u0F51",
    "/tibt:dasubjoined": "\u0FA1",
    "/tibt:dda": "\u0F4C",
    "/tibt:ddasubjoined": "\u0F9C",
    "/tibt:ddha": "\u0F4D",
    "/tibt:ddhasubjoined": "\u0F9D",
    "/tibt:delimitertshegbstarmark": "\u0F0C",
    "/tibt:dha": "\u0F52",
    "/tibt:dhasubjoined": "\u0FA2",
    "/tibt:drilbusymbol": "\u0FC4",
    "/tibt:dza": "\u0F5B",
    "/tibt:dzasubjoined": "\u0FAB",
    "/tibt:dzha": "\u0F5C",
    "/tibt:dzhasubjoined": "\u0FAC",
    "/tibt:eevowelsign": "\u0F7B",
    "/tibt:eight": "\u0F28",
    "/tibt:evowelsign": "\u0F7A",
    "/tibt:five": "\u0F25",
    "/tibt:four": "\u0F24",
    "/tibt:ga": "\u0F42",
    "/tibt:gasubjoined": "\u0F92",
    "/tibt:gha": "\u0F43",
    "/tibt:ghasubjoined": "\u0F93",
    "/tibt:grucanrgyingssign": "\u0F8A",
    "/tibt:grumedrgyingssign": "\u0F8B",
    "/tibt:gtertshegmark": "\u0F14",
    "/tibt:gteryigmgotruncatedamark": "\u0F01",
    "/tibt:gteryigmgoumgtertshegmamark": "\u0F03",
    "/tibt:gteryigmgoumrnambcadmamark": "\u0F02",
    "/tibt:gugrtagsgyasmark": "\u0F3B",
    "/tibt:gugrtagsgyonmark": "\u0F3A",
    "/tibt:ha": "\u0F67",
    "/tibt:halantamark": "\u0F84",
    "/tibt:halfeight": "\u0F31",
    "/tibt:halffive": "\u0F2E",
    "/tibt:halffour": "\u0F2D",
    "/tibt:halfnine": "\u0F32",
    "/tibt:halfone": "\u0F2A",
    "/tibt:halfseven": "\u0F30",
    "/tibt:halfsix": "\u0F2F",
    "/tibt:halfthree": "\u0F2C",
    "/tibt:halftwo": "\u0F2B",
    "/tibt:halfzero": "\u0F33",
    "/tibt:hasubjoined": "\u0FB7",
    "/tibt:heavybeatcantillationsign": "\u0FC0",
    "/tibt:iivowelsign": "\u0F73",
    "/tibt:intersyllabictshegmark": "\u0F0B",
    "/tibt:invertedmchucansign": "\u0F8C",
    "/tibt:invertedmchucansubjoinedsign": "\u0F8F",
    "/tibt:ivowelsign": "\u0F72",
    "/tibt:ja": "\u0F47",
    "/tibt:jasubjoined": "\u0F97",
    "/tibt:ka": "\u0F40",
    "/tibt:kasubjoined": "\u0F90",
    "/tibt:kha": "\u0F41",
    "/tibt:khasubjoined": "\u0F91",
    "/tibt:kka": "\u0F6B",
    "/tibt:kssa": "\u0F69",
    "/tibt:kssasubjoined": "\u0FB9",
    "/tibt:kurukha": "\u0FBE",
    "/tibt:kurukhabzhimigcan": "\u0FBF",
    "/tibt:la": "\u0F63",
    "/tibt:lasubjoined": "\u0FB3",
    "/tibt:lcetsacansign": "\u0F88",
    "/tibt:lcetsacansubjoinedsign": "\u0F8D",
    "/tibt:lcirtagssign": "\u0F86",
    "/tibt:leadingmchanrtagsmark": "\u0FD9",
    "/tibt:lhagrtagslogotypesign": "\u0F16",
    "/tibt:lightbeatcantillationsign": "\u0FC1",
    "/tibt:llvocalicvowelsign": "\u0F79",
    "/tibt:lvocalicvowelsign": "\u0F78",
    "/tibt:ma": "\u0F58",
    "/tibt:martshessign": "\u0F3F",
    "/tibt:masubjoined": "\u0FA8",
    "/tibt:mchucansign": "\u0F89",
    "/tibt:mchucansubjoinedsign": "\u0F8E",
    "/tibt:mnyamyiggimgorgyanmark": "\u0FD1",
    "/tibt:na": "\u0F53",
    "/tibt:nasubjoined": "\u0FA3",
    "/tibt:nga": "\u0F44",
    "/tibt:ngasbzungnyizlamark": "\u0F35",
    "/tibt:ngasbzungsgorrtagsmark": "\u0F37",
    "/tibt:ngasubjoined": "\u0F94",
    "/tibt:nine": "\u0F29",
    "/tibt:nna": "\u0F4E",
    "/tibt:nnasubjoined": "\u0F9E",
    "/tibt:norbubzhikhyilsymbol": "\u0FCC",
    "/tibt:norbugsumkhyilsymbol": "\u0FCB",
    "/tibt:norbunyiskhyilsymbol": "\u0FCA",
    "/tibt:norbusymbol": "\u0FC9",
    "/tibt:nya": "\u0F49",
    "/tibt:nyasubjoined": "\u0F99",
    "/tibt:nyisshadmark": "\u0F0E",
    "/tibt:nyistshegmark": "\u0FD2",
    "/tibt:nyistshegshadmark": "\u0F10",
    "/tibt:nyizlanaadasign": "\u0F82",
    "/tibt:omsyllable": "\u0F00",
    "/tibt:one": "\u0F21",
    "/tibt:oovowelsign": "\u0F7D",
    "/tibt:ovowelsign": "\u0F7C",
    "/tibt:pa": "\u0F54",
    "/tibt:padmagdansymbol": "\u0FC6",
    "/tibt:palutamark": "\u0F85",
    "/tibt:pasubjoined": "\u0FA4",
    "/tibt:pha": "\u0F55",
    "/tibt:phasubjoined": "\u0FA5",
    "/tibt:phurpasymbol": "\u0FC8",
    "/tibt:ra": "\u0F62",
    "/tibt:rafixed": "\u0F6A",
    "/tibt:rasubjoined": "\u0FB2",
    "/tibt:rasubjoinedfixed": "\u0FBC",
    "/tibt:rdeldkargcigsign": "\u0F1A",
    "/tibt:rdeldkargnyissign": "\u0F1B",
    "/tibt:rdeldkargsumsign": "\u0F1C",
    "/tibt:rdeldkarrdelnagsign": "\u0F1F",
    "/tibt:rdelnaggcigsign": "\u0F1D",
    "/tibt:rdelnaggnyissign": "\u0F1E",
    "/tibt:rdelnaggsumsign": "\u0FCF",
    "/tibt:rdelnagrdeldkarsign": "\u0FCE",
    "/tibt:rdorjergyagramsymbol": "\u0FC7",
    "/tibt:rdorjesymbol": "\u0FC5",
    "/tibt:reversediivowelsign": "\u0F81",
    "/tibt:reversedivowelsign": "\u0F80",
    "/tibt:rgyagramshadmark": "\u0F12",
    "/tibt:rinchenspungsshadmark": "\u0F11",
    "/tibt:rjessungarosign": "\u0F7E",
    "/tibt:rnambcadsign": "\u0F7F",
    "/tibt:rra": "\u0F6C",
    "/tibt:rrvocalicvowelsign": "\u0F77",
    "/tibt:rvocalicvowelsign": "\u0F76",
    "/tibt:sa": "\u0F66",
    "/tibt:sasubjoined": "\u0FB6",
    "/tibt:sbrulshadmark": "\u0F08",
    "/tibt:sbubchalcantillationsign": "\u0FC3",
    "/tibt:seven": "\u0F27",
    "/tibt:sha": "\u0F64",
    "/tibt:shadmark": "\u0F0D",
    "/tibt:shasubjoined": "\u0FB4",
    "/tibt:six": "\u0F26",
    "/tibt:snaldansign": "\u0F83",
    "/tibt:ssa": "\u0F65",
    "/tibt:ssasubjoined": "\u0FB5",
    "/tibt:subjoinedAA": "\u0FB0",
    "/tibt:svastileft": "\u0FD6",
    "/tibt:svastileftdot": "\u0FD8",
    "/tibt:svastiright": "\u0FD5",
    "/tibt:svastirightdot": "\u0FD7",
    "/tibt:ta": "\u0F4F",
    "/tibt:tasubjoined": "\u0F9F",
    "/tibt:tha": "\u0F50",
    "/tibt:thasubjoined": "\u0FA0",
    "/tibt:three": "\u0F23",
    "/tibt:trailingmchanrtagsmark": "\u0FDA",
    "/tibt:tsa": "\u0F59",
    "/tibt:tsaphrumark": "\u0F39",
    "/tibt:tsasubjoined": "\u0FA9",
    "/tibt:tsha": "\u0F5A",
    "/tibt:tshasubjoined": "\u0FAA",
    "/tibt:tshegshadmark": "\u0F0F",
    "/tibt:tta": "\u0F4A",
    "/tibt:ttasubjoined": "\u0F9A",
    "/tibt:ttha": "\u0F4B",
    "/tibt:tthasubjoined": "\u0F9B",
    "/tibt:two": "\u0F22",
    "/tibt:uuvowelsign": "\u0F75",
    "/tibt:uvowelsign": "\u0F74",
    "/tibt:wa": "\u0F5D",
    "/tibt:wasubjoined": "\u0FAD",
    "/tibt:wasubjoinedfixed": "\u0FBA",
    "/tibt:ya": "\u0F61",
    "/tibt:yangrtagssign": "\u0F87",
    "/tibt:yartshessign": "\u0F3E",
    "/tibt:yasubjoined": "\u0FB1",
    "/tibt:yasubjoinedfixed": "\u0FBB",
    "/tibt:yigmgomdunmainitialmark": "\u0F04",
    "/tibt:yigmgosgabmaclosingmark": "\u0F05",
    "/tibt:yigmgotshegshadmamark": "\u0F07",
    "/tibt:za": "\u0F5F",
    "/tibt:zasubjoined": "\u0FAF",
    "/tibt:zero": "\u0F20",
    "/tibt:zha": "\u0F5E",
    "/tibt:zhasubjoined": "\u0FAE",
    "/ticirclekatakana": "\u32E0",
    "/tickconvavediamondleftwhite": "\u27E2",
    "/tickconvavediamondrightwhite": "\u27E3",
    "/ticket": "\u1F3AB",
    "/tickleftwhitesquare": "\u27E4",
    "/tickrightwhitesquare": "\u27E5",
    "/tifcha:hb": "\u0596",
    "/tiger": "\u1F405",
    "/tigerFace": "\u1F42F",
    "/tihiragana": "\u3061",
    "/tikatakana": "\u30C1",
    "/tikatakanahalfwidth": "\uFF81",
    "/tikeutacirclekorean": "\u3270",
    "/tikeutaparenkorean": "\u3210",
    "/tikeutcirclekorean": "\u3262",
    "/tikeutkorean": "\u3137",
    "/tikeutparenkorean": "\u3202",
    "/tilde": "\u02DC",
    "/tildebelowcmb": "\u0330",
    "/tildecmb": "\u0303",
    "/tildecomb": "\u0303",
    "/tildediaeresisfunc": "\u2368",
    "/tildedotaccent": "\u2E1E",
    "/tildedotbelow": "\u2E1F",
    "/tildedoublecmb": "\u0360",
    "/tildeequalsreversed": "\u22CD",
    "/tildelowmod": "\u02F7",
    "/tildeoperator": "\u223C",
    "/tildeoverlaycmb": "\u0334",
    "/tildereversed": "\u223D",
    "/tildering": "\u2E1B",
    "/tildetpl": "\u224B",
    "/tildeverticalcmb": "\u033E",
    "/timerclock": "\u23F2",
    "/timescircle": "\u2297",
    "/tinsular": "\uA787",
    "/tipehahebrew": "\u0596",
    "/tipehalefthebrew": "\u0596",
    "/tippigurmukhi": "\u0A70",
    "/tiredFace": "\u1F62B",
    "/tironiansignet": "\u204A",
    "/tirtatumetespada": "\uA9DE",
    "/titlocmbcyr": "\u0483",
    "/titlocyrilliccmb": "\u0483",
    "/tiwnarmenian": "\u057F",
    "/tjekomicyr": "\u050F",
    "/tlinebelow": "\u1E6F",
    "/tmonospace": "\uFF54",
    "/toarmenian": "\u0569",
    "/tocirclekatakana": "\u32E3",
    "/tocornerarrowNW": "\u21F1",
    "/tocornerarrowSE": "\u21F2",
    "/tohiragana": "\u3068",
    "/toilet": "\u1F6BD",
    "/tokatakana": "\u30C8",
    "/tokatakanahalfwidth": "\uFF84",
    "/tokyoTower": "\u1F5FC",
    "/tolongvowel": "\uA9B5",
    "/tomato": "\u1F345",
    "/tonebarextrahighmod": "\u02E5",
    "/tonebarextralowmod": "\u02E9",
    "/tonebarhighmod": "\u02E6",
    "/tonebarlowmod": "\u02E8",
    "/tonebarmidmod": "\u02E7",
    "/tonefive": "\u01BD",
    "/tonehighbeginmod": "\u02F9",
    "/tonehighendmod": "\u02FA",
    "/tonelowbeginmod": "\u02FB",
    "/tonelowendmod": "\u02FC",
    "/tonesix": "\u0185",
    "/tonetwo": "\u01A8",
    "/tongue": "\u1F445",
    "/tonos": "\u0384",
    "/tonsquare": "\u3327",
    "/topHat": "\u1F3A9",
    "/topUpwardsArrowAbove": "\u1F51D",
    "/topatakthai": "\u0E0F",
    "/tortoiseshellbracketleft": "\u3014",
    "/tortoiseshellbracketleftsmall": "\uFE5D",
    "/tortoiseshellbracketleftvertical": "\uFE39",
    "/tortoiseshellbracketright": "\u3015",
    "/tortoiseshellbracketrightsmall": "\uFE5E",
    "/tortoiseshellbracketrightvertical": "\uFE3A",
    "/totalrunout": "\u2330",
    "/totaothai": "\u0E15",
    "/tpalatalhook": "\u01AB",
    "/tparen": "\u24AF",
    "/tparenthesized": "\u24AF",
    "/trackball": "\u1F5B2",
    "/tractor": "\u1F69C",
    "/trademark": "\u2122",
    "/trademarksans": "\uF8EA",
    "/trademarkserif": "\uF6DB",
    "/train": "\u1F686",
    "/tram": "\u1F68A",
    "/tramCar": "\u1F68B",
    "/trapeziumwhite": "\u23E2",
    "/tresillo": "\uA72B",
    "/tretroflex": "\u0288",
    "/tretroflexhook": "\u0288",
    "/triagdn": "\u25BC",
    "/triaglf": "\u25C4",
    "/triagrt": "\u25BA",
    "/triagup": "\u25B2",
    "/triangleWithRoundedCorners": "\u1F6C6",
    "/triangledotupwhite": "\u25EC",
    "/triangledownblack": "\u25BC",
    "/triangledownsmallblack": "\u25BE",
    "/triangledownsmallwhite": "\u25BF",
    "/triangledownwhite": "\u25BD",
    "/trianglehalfupleftblack": "\u25ED",
    "/trianglehalfuprightblack": "\u25EE",
    "/triangleleftblack": "\u25C0",
    "/triangleleftsmallblack": "\u25C2",
    "/triangleleftsmallwhite": "\u25C3",
    "/triangleleftwhite": "\u25C1",
    "/triangleright": "\u22BF",
    "/trianglerightblack": "\u25B6",
    "/trianglerightsmallblack": "\u25B8",
    "/trianglerightsmallwhite": "\u25B9",
    "/trianglerightwhite": "\u25B7",
    "/triangleupblack": "\u25B2",
    "/triangleupsmallblack": "\u25B4",
    "/triangleupsmallwhite": "\u25B5",
    "/triangleupwhite": "\u25B3",
    "/triangularFlagOnPost": "\u1F6A9",
    "/triangularRuler": "\u1F4D0",
    "/triangularbullet": "\u2023",
    "/tricolon": "\u205D",
    "/tricontainingtriwhiteanglesmall": "\u27C1",
    "/tridentEmblem": "\u1F531",
    "/trigramearth": "\u2637",
    "/trigramfire": "\u2632",
    "/trigramheaven": "\u2630",
    "/trigramlake": "\u2631",
    "/trigrammountain": "\u2636",
    "/trigramthunder": "\u2633",
    "/trigramwater": "\u2635",
    "/trigramwind": "\u2634",
    "/triplearrowleft": "\u21DA",
    "/triplearrowright": "\u21DB",
    "/tripledot": "\u061E",
    "/trisememetrical": "\u23D7",
    "/trns:baby": "\u1F6BC",
    "/trolleybus": "\u1F68E",
    "/trophy": "\u1F3C6",
    "/tropicalDrink": "\u1F379",
    "/tropicalFish": "\u1F420",
    "/truckblack": "\u26DF",
    "/true": "\u22A8",
    "/trumpet": "\u1F3BA",
    "/ts": "\u02A6",
    "/tsadi": "\u05E6",
    "/tsadi:hb": "\u05E6",
    "/tsadidagesh": "\uFB46",
    "/tsadidageshhebrew": "\uFB46",
    "/tsadihebrew": "\u05E6",
    "/tsadiwithdagesh:hb": "\uFB46",
    "/tsecyr": "\u0446",
    "/tsecyrillic": "\u0446",
    "/tsere": "\u05B5",
    "/tsere12": "\u05B5",
    "/tsere1e": "\u05B5",
    "/tsere2b": "\u05B5",
    "/tsere:hb": "\u05B5",
    "/tserehebrew": "\u05B5",
    "/tserenarrowhebrew": "\u05B5",
    "/tserequarterhebrew": "\u05B5",
    "/tserewidehebrew": "\u05B5",
    "/tshecyr": "\u045B",
    "/tshecyrillic": "\u045B",
    "/tsinnorit:hb": "\u05AE",
    "/tstroke": "\u2C66",
    "/tsuperior": "\uF6F3",
    "/ttabengali": "\u099F",
    "/ttadeva": "\u091F",
    "/ttagujarati": "\u0A9F",
    "/ttagurmukhi": "\u0A1F",
    "/ttamahaprana": "\uA99C",
    "/tteh": "\u0679",
    "/tteh.fina": "\uFB67",
    "/tteh.init": "\uFB68",
    "/tteh.isol": "\uFB66",
    "/tteh.medi": "\uFB69",
    "/tteharabic": "\u0679",
    "/tteheh": "\u067A",
    "/tteheh.fina": "\uFB5F",
    "/tteheh.init": "\uFB60",
    "/tteheh.isol": "\uFB5E",
    "/tteheh.medi": "\uFB61",
    "/ttehfinalarabic": "\uFB67",
    "/ttehinitialarabic": "\uFB68",
    "/ttehmedialarabic": "\uFB69",
    "/tthabengali": "\u09A0",
    "/tthadeva": "\u0920",
    "/tthagujarati": "\u0AA0",
    "/tthagurmukhi": "\u0A20",
    "/tturned": "\u0287",
    "/tucirclekatakana": "\u32E1",
    "/tugrik": "\u20AE",
    "/tuhiragana": "\u3064",
    "/tukatakana": "\u30C4",
    "/tukatakanahalfwidth": "\uFF82",
    "/tulip": "\u1F337",
    "/tum": "\uA777",
    "/turkishlira": "\u20BA",
    "/turnedOkHandSign": "\u1F58F",
    "/turnedcomma": "\u2E32",
    "/turneddagger": "\u2E38",
    "/turneddigitthree": "\u218B",
    "/turneddigittwo": "\u218A",
    "/turnedpiselehpada": "\uA9CD",
    "/turnedsemicolon": "\u2E35",
    "/turnedshogipieceblack": "\u26CA",
    "/turnedshogipiecewhite": "\u26C9",
    "/turnstiledblverticalbarright": "\u22AB",
    "/turnstileleftrightdbl": "\u27DA",
    "/turnstiletplverticalbarright": "\u22AA",
    "/turtle": "\u1F422",
    "/tusmallhiragana": "\u3063",
    "/tusmallkatakana": "\u30C3",
    "/tusmallkatakanahalfwidth": "\uFF6F",
    "/twelve.roman": "\u216B",
    "/twelve.romansmall": "\u217B",
    "/twelvecircle": "\u246B",
    "/twelvecircleblack": "\u24EC",
    "/twelveparen": "\u247F",
    "/twelveparenthesized": "\u247F",
    "/twelveperiod": "\u2493",
    "/twelveroman": "\u217B",
    "/twenty-twopointtwosquare": "\u1F1A2",
    "/twentycircle": "\u2473",
    "/twentycircleblack": "\u24F4",
    "/twentycirclesquare": "\u3249",
    "/twentyeightcircle": "\u3258",
    "/twentyfivecircle": "\u3255",
    "/twentyfourcircle": "\u3254",
    "/twentyhangzhou": "\u5344",
    "/twentyninecircle": "\u3259",
    "/twentyonecircle": "\u3251",
    "/twentyparen": "\u2487",
    "/twentyparenthesized": "\u2487",
    "/twentyperiod": "\u249B",
    "/twentysevencircle": "\u3257",
    "/twentysixcircle": "\u3256",
    "/twentythreecircle": "\u3253",
    "/twentytwocircle": "\u3252",
    "/twistedRightwardsArrows": "\u1F500",
    "/two": "\u0032",
    "/two.inferior": "\u2082",
    "/two.roman": "\u2161",
    "/two.romansmall": "\u2171",
    "/twoButtonMouse": "\u1F5B0",
    "/twoHearts": "\u1F495",
    "/twoMenHoldingHands": "\u1F46C",
    "/twoSpeechBubbles": "\u1F5EA",
    "/twoWomenHoldingHands": "\u1F46D",
    "/twoarabic": "\u0662",
    "/twoasterisksalignedvertically": "\u2051",
    "/twobengali": "\u09E8",
    "/twocircle": "\u2461",
    "/twocircledbl": "\u24F6",
    "/twocircleinversesansserif": "\u278B",
    "/twocomma": "\u1F103",
    "/twodeva": "\u0968",
    "/twodotenleader": "\u2025",
    "/twodotleader": "\u2025",
    "/twodotleadervertical": "\uFE30",
    "/twodotpunctuation": "\u205A",
    "/twodotsoveronedot": "\u2E2A",
    "/twofar": "\u06F2",
    "/twofifths": "\u2156",
    "/twogujarati": "\u0AE8",
    "/twogurmukhi": "\u0A68",
    "/twohackarabic": "\u0662",
    "/twohangzhou": "\u3022",
    "/twoideographiccircled": "\u3281",
    "/twoideographicparen": "\u3221",
    "/twoinferior": "\u2082",
    "/twoksquare": "\u1F19D",
    "/twomonospace": "\uFF12",
    "/twonumeratorbengali": "\u09F5",
    "/twooldstyle": "\uF732",
    "/twoparen": "\u2475",
    "/twoparenthesized": "\u2475",
    "/twoperiod": "\u2489",
    "/twopersian": "\u06F2",
    "/tworoman": "\u2171",
    "/twoshortsjoinedmetrical": "\u23D6",
    "/twoshortsoverlongmetrical": "\u23D5",
    "/twostroke": "\u01BB",
    "/twosuperior": "\u00B2",
    "/twothai": "\u0E52",
    "/twothirds": "\u2154",
    "/twowayleftwaytrafficblack": "\u26D6",
    "/twowayleftwaytrafficwhite": "\u26D7",
    "/tz": "\uA729",
    "/u": "\u0075",
    "/u.fina": "\uFBD8",
    "/u.isol": "\uFBD7",
    "/uacute": "\u00FA",
    "/uacutedblcyr": "\u04F3",
    "/ubar": "\u0289",
    "/ubengali": "\u0989",
    "/ubopomofo": "\u3128",
    "/ubracketleft": "\u2E26",
    "/ubracketright": "\u2E27",
    "/ubreve": "\u016D",
    "/ucaron": "\u01D4",
    "/ucircle": "\u24E4",
    "/ucirclekatakana": "\u32D2",
    "/ucircumflex": "\u00FB",
    "/ucircumflexbelow": "\u1E77",
    "/ucyr": "\u0443",
    "/ucyrillic": "\u0443",
    "/udattadeva": "\u0951",
    "/udblacute": "\u0171",
    "/udblgrave": "\u0215",
    "/udeva": "\u0909",
    "/udieresis": "\u00FC",
    "/udieresisacute": "\u01D8",
    "/udieresisbelow": "\u1E73",
    "/udieresiscaron": "\u01DA",
    "/udieresiscyr": "\u04F1",
    "/udieresiscyrillic": "\u04F1",
    "/udieresisgrave": "\u01DC",
    "/udieresismacron": "\u01D6",
    "/udotbelow": "\u1EE5",
    "/ugrave": "\u00F9",
    "/ugravedbl": "\u0215",
    "/ugujarati": "\u0A89",
    "/ugurmukhi": "\u0A09",
    "/uhamza": "\u0677",
    "/uhamza.isol": "\uFBDD",
    "/uhdsquare": "\u1F1AB",
    "/uhiragana": "\u3046",
    "/uhoi": "\u1EE7",
    "/uhookabove": "\u1EE7",
    "/uhorn": "\u01B0",
    "/uhornacute": "\u1EE9",
    "/uhorndotbelow": "\u1EF1",
    "/uhorngrave": "\u1EEB",
    "/uhornhoi": "\u1EED",
    "/uhornhookabove": "\u1EED",
    "/uhorntilde": "\u1EEF",
    "/uhungarumlaut": "\u0171",
    "/uhungarumlautcyrillic": "\u04F3",
    "/uighurkazakhkirghizalefmaksura.init": "\uFBE8",
    "/uighurkazakhkirghizalefmaksura.medi": "\uFBE9",
    "/uighurkirghizyeh.init_hamzaabove.medi_alefmaksura.fina": "\uFBF9",
    "/uighurkirghizyeh.init_hamzaabove.medi_alefmaksura.medi": "\uFBFB",
    "/uighurkirghizyeh.medi_hamzaabove.medi_alefmaksura.fina": "\uFBFA",
    "/uinvertedbreve": "\u0217",
    "/ukatakana": "\u30A6",
    "/ukatakanahalfwidth": "\uFF73",
    "/ukcyr": "\u0479",
    "/ukcyrillic": "\u0479",
    "/ukorean": "\u315C",
    "/um": "\uA778",
    "/umacron": "\u016B",
    "/umacroncyr": "\u04EF",
    "/umacroncyrillic": "\u04EF",
    "/umacrondieresis": "\u1E7B",
    "/umatragurmukhi": "\u0A41",
    "/umbrella": "\u2602",
    "/umbrellaonground": "\u26F1",
    "/umbrellaraindrops": "\u2614",
    "/umonospace": "\uFF55",
    "/unamusedFace": "\u1F612",
    "/unaspiratedmod": "\u02ED",
    "/underscore": "\u005F",
    "/underscorecenterline": "\uFE4E",
    "/underscoredashed": "\uFE4D",
    "/underscoredbl": "\u2017",
    "/underscoremonospace": "\uFF3F",
    "/underscorevertical": "\uFE33",
    "/underscorewavy": "\uFE4F",
    "/underscorewavyvertical": "\uFE34",
    "/undertie": "\u203F",
    "/undo": "\u238C",
    "/union": "\u222A",
    "/unionarray": "\u22C3",
    "/uniondbl": "\u22D3",
    "/universal": "\u2200",
    "/unmarriedpartnership": "\u26AF",
    "/uogonek": "\u0173",
    "/uonsquare": "\u3306",
    "/upPointingAirplane": "\u1F6E7",
    "/upPointingMilitaryAirplane": "\u1F6E6",
    "/upPointingSmallAirplane": "\u1F6E8",
    "/uparen": "\u24B0",
    "/uparenthesized": "\u24B0",
    "/uparrowleftofdownarrow": "\u21C5",
    "/upblock": "\u2580",
    "/updblhorzsng": "\u2568",
    "/updblleftsng": "\u255C",
    "/updblrightsng": "\u2559",
    "/upheavydnhorzlight": "\u2540",
    "/upheavyhorzlight": "\u2538",
    "/upheavyleftdnlight": "\u2526",
    "/upheavyleftlight": "\u251A",
    "/upheavyrightdnlight": "\u251E",
    "/upheavyrightlight": "\u2516",
    "/uplightdnhorzheavy": "\u2548",
    "/uplighthorzheavy": "\u2537",
    "/uplightleftdnheavy": "\u252A",
    "/uplightleftheavy": "\u2519",
    "/uplightrightdnheavy": "\u2522",
    "/uplightrightheavy": "\u2515",
    "/upperHalfBlock": "\u2580",
    "/upperOneEighthBlock": "\u2594",
    "/upperRightShadowedWhiteCircle": "\u1F53F",
    "/upperdothebrew": "\u05C4",
    "/upperhalfcircle": "\u25E0",
    "/upperhalfcircleinversewhite": "\u25DA",
    "/upperquadrantcirculararcleft": "\u25DC",
    "/upperquadrantcirculararcright": "\u25DD",
    "/uppertriangleleft": "\u25F8",
    "/uppertriangleleftblack": "\u25E4",
    "/uppertriangleright": "\u25F9",
    "/uppertrianglerightblack": "\u25E5",
    "/upsideDownFace": "\u1F643",
    "/upsilon": "\u03C5",
    "/upsilonacute": "\u1F7B",
    "/upsilonasper": "\u1F51",
    "/upsilonasperacute": "\u1F55",
    "/upsilonaspergrave": "\u1F53",
    "/upsilonaspertilde": "\u1F57",
    "/upsilonbreve": "\u1FE0",
    "/upsilondieresis": "\u03CB",
    "/upsilondieresisacute": "\u1FE3",
    "/upsilondieresisgrave": "\u1FE2",
    "/upsilondieresistilde": "\u1FE7",
    "/upsilondieresistonos": "\u03B0",
    "/upsilongrave": "\u1F7A",
    "/upsilonlatin": "\u028A",
    "/upsilonlenis": "\u1F50",
    "/upsilonlenisacute": "\u1F54",
    "/upsilonlenisgrave": "\u1F52",
    "/upsilonlenistilde": "\u1F56",
    "/upsilontilde": "\u1FE6",
    "/upsilontonos": "\u03CD",
    "/upsilonwithmacron": "\u1FE1",
    "/upsnghorzdbl": "\u2567",
    "/upsngleftdbl": "\u255B",
    "/upsngrightdbl": "\u2558",
    "/uptackbelowcmb": "\u031D",
    "/uptackmod": "\u02D4",
    "/upwithexclamationmarksquare": "\u1F199",
    "/uragurmukhi": "\u0A73",
    "/uranus": "\u2645",
    "/uring": "\u016F",
    "/ushortcyr": "\u045E",
    "/ushortcyrillic": "\u045E",
    "/usmallhiragana": "\u3045",
    "/usmallkatakana": "\u30A5",
    "/usmallkatakanahalfwidth": "\uFF69",
    "/usmod": "\uA770",
    "/ustraightcyr": "\u04AF",
    "/ustraightcyrillic": "\u04AF",
    "/ustraightstrokecyr": "\u04B1",
    "/ustraightstrokecyrillic": "\u04B1",
    "/utilde": "\u0169",
    "/utildeacute": "\u1E79",
    "/utildebelow": "\u1E75",
    "/uubengali": "\u098A",
    "/uudeva": "\u090A",
    "/uugujarati": "\u0A8A",
    "/uugurmukhi": "\u0A0A",
    "/uumatragurmukhi": "\u0A42",
    "/uuvowelsignbengali": "\u09C2",
    "/uuvowelsigndeva": "\u0942",
    "/uuvowelsigngujarati": "\u0AC2",
    "/uvowelsignbengali": "\u09C1",
    "/uvowelsigndeva": "\u0941",
    "/uvowelsigngujarati": "\u0AC1",
    "/v": "\u0076",
    "/vadeva": "\u0935",
    "/vagujarati": "\u0AB5",
    "/vagurmukhi": "\u0A35",
    "/vakatakana": "\u30F7",
    "/vanedownfunc": "\u2356",
    "/vaneleftfunc": "\u2345",
    "/vanerightfunc": "\u2346",
    "/vaneupfunc": "\u234F",
    "/varikajudeospanish:hb": "\uFB1E",
    "/vav": "\u05D5",
    "/vav:hb": "\u05D5",
    "/vav_vav:hb": "\u05F0",
    "/vav_yod:hb": "\u05F1",
    "/vavdagesh": "\uFB35",
    "/vavdagesh65": "\uFB35",
    "/vavdageshhebrew": "\uFB35",
    "/vavhebrew": "\u05D5",
    "/vavholam": "\uFB4B",
    "/vavholamhebrew": "\uFB4B",
    "/vavvavhebrew": "\u05F0",
    "/vavwithdagesh:hb": "\uFB35",
    "/vavwithholam:hb": "\uFB4B",
    "/vavyodhebrew": "\u05F1",
    "/vcircle": "\u24E5",
    "/vcurl": "\u2C74",
    "/vdiagonalstroke": "\uA75F",
    "/vdotbelow": "\u1E7F",
    "/ve.fina": "\uFBDF",
    "/ve.isol": "\uFBDE",
    "/ve:abovetonecandra": "\u1CF4",
    "/ve:anusvaraantargomukhasign": "\u1CE9",
    "/ve:anusvarabahirgomukhasign": "\u1CEA",
    "/ve:anusvarasignlong": "\u1CEF",
    "/ve:anusvaraubhayatomukhasign": "\u1CF1",
    "/ve:anusvaravamagomukhasign": "\u1CEB",
    "/ve:anusvaravamagomukhawithtailsign": "\u1CEC",
    "/ve:ardhavisargasign": "\u1CF2",
    "/ve:atharvaindependentsvaritatone": "\u1CE1",
    "/ve:atikramasign": "\u1CF7",
    "/ve:belowtonecandra": "\u1CD8",
    "/ve:dotbelowtone": "\u1CDD",
    "/ve:hexiformanusvarasignlong": "\u1CEE",
    "/ve:jihvamuliyasign": "\u1CF5",
    "/ve:karshanatone": "\u1CD0",
    "/ve:kathakaanudattatone": "\u1CDC",
    "/ve:nihshvasasign": "\u1CD3",
    "/ve:prenkhatone": "\u1CD2",
    "/ve:rigkashmiriindependentsvaritatone": "\u1CE0",
    "/ve:ringabovetone": "\u1CF8",
    "/ve:ringabovetonedbl": "\u1CF9",
    "/ve:rotatedardhavisargasign": "\u1CF3",
    "/ve:rthanganusvarasignlong": "\u1CF0",
    "/ve:sharatone": "\u1CD1",
    "/ve:svaritatonedbl": "\u1CDA",
    "/ve:svaritatonetpl": "\u1CDB",
    "/ve:threedotsbelowtone": "\u1CDF",
    "/ve:tiryaksign": "\u1CED",
    "/ve:twodotsbelowtone": "\u1CDE",
    "/ve:upadhmaniyasign": "\u1CF6",
    "/ve:visargaanudattasign": "\u1CE5",
    "/ve:visargaanudattasignreversed": "\u1CE6",
    "/ve:visargaanudattawithtailsign": "\u1CE8",
    "/ve:visargasvaritasign": "\u1CE2",
    "/ve:visargaudattasign": "\u1CE3",
    "/ve:visargaudattasignreversed": "\u1CE4",
    "/ve:visargaudattawithtailsign": "\u1CE7",
    "/ve:yajuraggravatedindependentsvaritatone": "\u1CD5",
    "/ve:yajurindependentsvaritatone": "\u1CD6",
    "/ve:yajurkathakaindependentsvaritaschroedertone": "\u1CD9",
    "/ve:yajurkathakaindependentsvaritatone": "\u1CD7",
    "/ve:yajurmidlinesvaritasign": "\u1CD4",
    "/vecyr": "\u0432",
    "/vecyrillic": "\u0432",
    "/veh": "\u06A4",
    "/veh.fina": "\uFB6B",
    "/veh.init": "\uFB6C",
    "/veh.isol": "\uFB6A",
    "/veh.medi": "\uFB6D",
    "/veharabic": "\u06A4",
    "/vehfinalarabic": "\uFB6B",
    "/vehinitialarabic": "\uFB6C",
    "/vehmedialarabic": "\uFB6D",
    "/vekatakana": "\u30F9",
    "/vend": "\uA769",
    "/venus": "\u2640",
    "/versicle": "\u2123",
    "/vert:bracketwhiteleft": "\uFE17",
    "/vert:brakcetwhiteright": "\uFE18",
    "/vert:colon": "\uFE13",
    "/vert:comma": "\uFE10",
    "/vert:ellipsishor": "\uFE19",
    "/vert:exclam": "\uFE15",
    "/vert:ideographiccomma": "\uFE11",
    "/vert:ideographicfullstop": "\uFE12",
    "/vert:question": "\uFE16",
    "/vert:semicolon": "\uFE14",
    "/vertdblhorzsng": "\u256B",
    "/vertdblleftsng": "\u2562",
    "/vertdblrightsng": "\u255F",
    "/vertheavyhorzlight": "\u2542",
    "/vertheavyleftlight": "\u2528",
    "/vertheavyrightlight": "\u2520",
    "/verticalTrafficLight": "\u1F6A6",
    "/verticalbar": "\u007C",
    "/verticalbardbl": "\u2016",
    "/verticalbarhorizontalstroke": "\u27CA",
    "/verticalbarwhitearrowonpedestalup": "\u21ED",
    "/verticalfourdots": "\u205E",
    "/verticalideographiciterationmark": "\u303B",
    "/verticalkanarepeatmark": "\u3031",
    "/verticalkanarepeatmarklowerhalf": "\u3035",
    "/verticalkanarepeatmarkupperhalf": "\u3033",
    "/verticalkanarepeatwithvoicedsoundmark": "\u3032",
    "/verticalkanarepeatwithvoicedsoundmarkupperhalf": "\u3034",
    "/verticallineabovecmb": "\u030D",
    "/verticallinebelowcmb": "\u0329",
    "/verticallinelowmod": "\u02CC",
    "/verticallinemod": "\u02C8",
    "/verticalmalestroke": "\u26A8",
    "/verticalsdbltrokearrowleft": "\u21FA",
    "/verticalsdbltrokearrowleftright": "\u21FC",
    "/verticalsdbltrokearrowright": "\u21FB",
    "/verticalstrokearrowleft": "\u21F7",
    "/verticalstrokearrowleftright": "\u21F9",
    "/verticalstrokearrowright": "\u21F8",
    "/vertlighthorzheavy": "\u253F",
    "/vertlightleftheavy": "\u2525",
    "/vertlightrightheavy": "\u251D",
    "/vertsnghorzdbl": "\u256A",
    "/vertsngleftdbl": "\u2561",
    "/vertsngrightdbl": "\u255E",
    "/verymuchgreater": "\u22D9",
    "/verymuchless": "\u22D8",
    "/vesta": "\u26B6",
    "/vewarmenian": "\u057E",
    "/vhook": "\u028B",
    "/vibrationMode": "\u1F4F3",
    "/videoCamera": "\u1F4F9",
    "/videoGame": "\u1F3AE",
    "/videocassette": "\u1F4FC",
    "/viewdatasquare": "\u2317",
    "/vikatakana": "\u30F8",
    "/violin": "\u1F3BB",
    "/viramabengali": "\u09CD",
    "/viramadeva": "\u094D",
    "/viramagujarati": "\u0ACD",
    "/virgo": "\u264D",
    "/visargabengali": "\u0983",
    "/visargadeva": "\u0903",
    "/visargagujarati": "\u0A83",
    "/visigothicz": "\uA763",
    "/vmonospace": "\uFF56",
    "/voarmenian": "\u0578",
    "/vodsquare": "\u1F1AC",
    "/voicediterationhiragana": "\u309E",
    "/voicediterationkatakana": "\u30FE",
    "/voicedmarkkana": "\u309B",
    "/voicedmarkkanahalfwidth": "\uFF9E",
    "/voicingmod": "\u02EC",
    "/vokatakana": "\u30FA",
    "/volapukae": "\uA79B",
    "/volapukoe": "\uA79D",
    "/volapukue": "\uA79F",
    "/volcano": "\u1F30B",
    "/volleyball": "\u1F3D0",
    "/vovermfullwidth": "\u33DE",
    "/vowelVabove": "\u065A",
    "/voweldotbelow": "\u065C",
    "/vowelinvertedVabove": "\u065B",
    "/vparen": "\u24B1",
    "/vparenthesized": "\u24B1",
    "/vrighthook": "\u2C71",
    "/vssquare": "\u1F19A",
    "/vtilde": "\u1E7D",
    "/vturned": "\u028C",
    "/vuhiragana": "\u3094",
    "/vukatakana": "\u30F4",
    "/vwelsh": "\u1EFD",
    "/vy": "\uA761",
    "/w": "\u0077",
    "/wacirclekatakana": "\u32FB",
    "/wacute": "\u1E83",
    "/waekorean": "\u3159",
    "/wahiragana": "\u308F",
    "/wakatakana": "\u30EF",
    "/wakatakanahalfwidth": "\uFF9C",
    "/wakorean": "\u3158",
    "/waningCrescentMoon": "\u1F318",
    "/waningGibbousMoon": "\u1F316",
    "/warning": "\u26A0",
    "/wasmallhiragana": "\u308E",
    "/wasmallkatakana": "\u30EE",
    "/wastebasket": "\u1F5D1",
    "/watch": "\u231A",
    "/waterBuffalo": "\u1F403",
    "/waterCloset": "\u1F6BE",
    "/waterWave": "\u1F30A",
    "/waterideographiccircled": "\u328C",
    "/waterideographicparen": "\u322C",
    "/watermelon": "\u1F349",
    "/wattosquare": "\u3357",
    "/wavedash": "\u301C",
    "/wavingBlackFlag": "\u1F3F4",
    "/wavingHandSign": "\u1F44B",
    "/wavingWhiteFlag": "\u1F3F3",
    "/wavydash": "\u3030",
    "/wavyhamzabelow": "\u065F",
    "/wavyline": "\u2307",
    "/wavyunderscorevertical": "\uFE34",
    "/waw": "\u0648",
    "/waw.fina": "\uFEEE",
    "/waw.isol": "\uFEED",
    "/wawDigitThreeAbove": "\u0779",
    "/wawDigitTwoAbove": "\u0778",
    "/wawarabic": "\u0648",
    "/wawdotabove": "\u06CF",
    "/wawfinalarabic": "\uFEEE",
    "/wawhamza": "\u0624",
    "/wawhamza.fina": "\uFE86",
    "/wawhamza.isol": "\uFE85",
    "/wawhamzaabovearabic": "\u0624",
    "/wawhamzaabovefinalarabic": "\uFE86",
    "/wawhighhamza": "\u0676",
    "/wawring": "\u06C4",
    "/wawsmall": "\u06E5",
    "/wawtwodotsabove": "\u06CA",
    "/waxingCrescentMoon": "\u1F312",
    "/waxingGibbousMoon": "\u1F314",
    "/wbfullwidth": "\u33DD",
    "/wbsquare": "\u33DD",
    "/wcircle": "\u24E6",
    "/wcircumflex": "\u0175",
    "/wcsquare": "\u1F14F",
    "/wcsquareblack": "\u1F18F",
    "/wdieresis": "\u1E85",
    "/wdot": "\u1E87",
    "/wdotaccent": "\u1E87",
    "/wdotbelow": "\u1E89",
    "/wearyCatFace": "\u1F640",
    "/wearyFace": "\u1F629",
    "/wecirclekatakana": "\u32FD",
    "/wecyr": "\u051D",
    "/wedding": "\u1F492",
    "/wehiragana": "\u3091",
    "/weierstrass": "\u2118",
    "/weightLifter": "\u1F3CB",
    "/wekatakana": "\u30F1",
    "/wekorean": "\u315E",
    "/weokorean": "\u315D",
    "/westsyriaccross": "\u2670",
    "/wgrave": "\u1E81",
    "/whale": "\u1F40B",
    "/wheelchair": "\u267F",
    "/wheelofdharma": "\u2638",
    "/whiteDownPointingBackhandIndex": "\u1F447",
    "/whiteDownPointingLeftHandIndex": "\u1F597",
    "/whiteFlower": "\u1F4AE",
    "/whiteHardShellFloppyDisk": "\u1F5AB",
    "/whiteLatinCross": "\u1F546",
    "/whiteLeftPointingBackhandIndex": "\u1F448",
    "/whitePennant": "\u1F3F1",
    "/whiteRightPointingBackhandIndex": "\u1F449",
    "/whiteSquareButton": "\u1F533",
    "/whiteSun": "\u1F323",
    "/whiteSunBehindCloud": "\u1F325",
    "/whiteSunBehindCloudRain": "\u1F326",
    "/whiteSunSmallCloud": "\u1F324",
    "/whiteTouchtoneTelephone": "\u1F57E",
    "/whiteUpPointingBackhandIndex": "\u1F446",
    "/whitearrowdown": "\u21E9",
    "/whitearrowfromwallright": "\u21F0",
    "/whitearrowleft": "\u21E6",
    "/whitearrowonpedestalup": "\u21EB",
    "/whitearrowright": "\u21E8",
    "/whitearrowup": "\u21E7",
    "/whitearrowupdown": "\u21F3",
    "/whitearrowupfrombar": "\u21EA",
    "/whitebullet": "\u25E6",
    "/whitecircle": "\u25CB",
    "/whitecircleinverse": "\u25D9",
    "/whitecornerbracketleft": "\u300E",
    "/whitecornerbracketleftvertical": "\uFE43",
    "/whitecornerbracketright": "\u300F",
    "/whitecornerbracketrightvertical": "\uFE44",
    "/whitedblarrowonpedestalup": "\u21EF",
    "/whitedblarrowup": "\u21EE",
    "/whitediamond": "\u25C7",
    "/whitediamondcontainingblacksmalldiamond": "\u25C8",
    "/whitedownpointingsmalltriangle": "\u25BF",
    "/whitedownpointingtriangle": "\u25BD",
    "/whiteleftpointingsmalltriangle": "\u25C3",
    "/whiteleftpointingtriangle": "\u25C1",
    "/whitelenticularbracketleft": "\u3016",
    "/whitelenticularbracketright": "\u3017",
    "/whiterightpointingsmalltriangle": "\u25B9",
    "/whiterightpointingtriangle": "\u25B7",
    "/whitesesamedot": "\uFE46",
    "/whitesmallsquare": "\u25AB",
    "/whitesmilingface": "\u263A",
    "/whitesquare": "\u25A1",
    "/whitesquarebracketleft": "\u301A",
    "/whitesquarebracketright": "\u301B",
    "/whitestar": "\u2606",
    "/whitetelephone": "\u260F",
    "/whitetortoiseshellbracketleft": "\u3018",
    "/whitetortoiseshellbracketright": "\u3019",
    "/whiteuppointingsmalltriangle": "\u25B5",
    "/whiteuppointingtriangle": "\u25B3",
    "/whook": "\u2C73",
    "/wicirclekatakana": "\u32FC",
    "/wigglylinevertical": "\u2E3E",
    "/wignyan": "\uA983",
    "/wihiragana": "\u3090",
    "/wikatakana": "\u30F0",
    "/wikorean": "\u315F",
    "/windBlowingFace": "\u1F32C",
    "/windChime": "\u1F390",
    "/windupada": "\uA9C6",
    "/wineGlass": "\u1F377",
    "/winkingFace": "\u1F609",
    "/wiredKeyboard": "\u1F5AE",
    "/wmonospace": "\uFF57",
    "/wocirclekatakana": "\u32FE",
    "/wohiragana": "\u3092",
    "/wokatakana": "\u30F2",
    "/wokatakanahalfwidth": "\uFF66",
    "/wolfFace": "\u1F43A",
    "/woman": "\u1F469",
    "/womanBunnyEars": "\u1F46F",
    "/womansBoots": "\u1F462",
    "/womansClothes": "\u1F45A",
    "/womansHat": "\u1F452",
    "/womansSandal": "\u1F461",
    "/womens": "\u1F6BA",
    "/won": "\u20A9",
    "/wonmonospace": "\uFFE6",
    "/woodideographiccircled": "\u328D",
    "/woodideographicparen": "\u322D",
    "/wordjoiner": "\u2060",
    "/wordseparatormiddledot": "\u2E31",
    "/worldMap": "\u1F5FA",
    "/worriedFace": "\u1F61F",
    "/wowaenthai": "\u0E27",
    "/wparen": "\u24B2",
    "/wparenthesized": "\u24B2",
    "/wrappedPresent": "\u1F381",
    "/wreathproduct": "\u2240",
    "/wrench": "\u1F527",
    "/wring": "\u1E98",
    "/wsuperior": "\u02B7",
    "/wsupmod": "\u02B7",
    "/wturned": "\u028D",
    "/wulumelikvowel": "\uA9B7",
    "/wuluvowel": "\uA9B6",
    "/wynn": "\u01BF",
    "/x": "\u0078",
    "/x.inferior": "\u2093",
    "/xabovecmb": "\u033D",
    "/xatailcyr": "\u04B3",
    "/xbopomofo": "\u3112",
    "/xcircle": "\u24E7",
    "/xdieresis": "\u1E8D",
    "/xdot": "\u1E8B",
    "/xdotaccent": "\u1E8B",
    "/xeharmenian": "\u056D",
    "/xi": "\u03BE",
    "/xmonospace": "\uFF58",
    "/xor": "\u22BB",
    "/xparen": "\u24B3",
    "/xparenthesized": "\u24B3",
    "/xsuperior": "\u02E3",
    "/xsupmod": "\u02E3",
    "/y": "\u0079",
    "/yaadosquare": "\u334E",
    "/yaarusquare": "\u334F",
    "/yabengali": "\u09AF",
    "/yacirclekatakana": "\u32F3",
    "/yacute": "\u00FD",
    "/yacyr": "\u044F",
    "/yadeva": "\u092F",
    "/yaecyr": "\u0519",
    "/yaekorean": "\u3152",
    "/yagujarati": "\u0AAF",
    "/yagurmukhi": "\u0A2F",
    "/yahiragana": "\u3084",
    "/yakatakana": "\u30E4",
    "/yakatakanahalfwidth": "\uFF94",
    "/yakorean": "\u3151",
    "/yamakkanthai": "\u0E4E",
    "/yangtonemod": "\u02EB",
    "/yasmallhiragana": "\u3083",
    "/yasmallkatakana": "\u30E3",
    "/yasmallkatakanahalfwidth": "\uFF6C",
    "/yatcyr": "\u0463",
    "/yatcyrillic": "\u0463",
    "/ycircle": "\u24E8",
    "/ycircumflex": "\u0177",
    "/ydieresis": "\u00FF",
    "/ydot": "\u1E8F",
    "/ydotaccent": "\u1E8F",
    "/ydotbelow": "\u1EF5",
    "/yeh": "\u064A",
    "/yeh.fina": "\uFEF2",
    "/yeh.init": "\uFEF3",
    "/yeh.init_alefmaksura.fina": "\uFC59",
    "/yeh.init_hah.fina": "\uFC56",
    "/yeh.init_hah.medi": "\uFCDB",
    "/yeh.init_hamzaabove.medi_ae.fina": "\uFBEC",
    "/yeh.init_hamzaabove.medi_alef.fina": "\uFBEA",
    "/yeh.init_hamzaabove.medi_alefmaksura.fina": "\uFC03",
    "/yeh.init_hamzaabove.medi_e.fina": "\uFBF6",
    "/yeh.init_hamzaabove.medi_e.medi": "\uFBF8",
    "/yeh.init_hamzaabove.medi_hah.fina": "\uFC01",
    "/yeh.init_hamzaabove.medi_hah.medi": "\uFC98",
    "/yeh.init_hamzaabove.medi_heh.medi": "\uFC9B",
    "/yeh.init_hamzaabove.medi_jeem.fina": "\uFC00",
    "/yeh.init_hamzaabove.medi_jeem.medi": "\uFC97",
    "/yeh.init_hamzaabove.medi_khah.medi": "\uFC99",
    "/yeh.init_hamzaabove.medi_meem.fina": "\uFC02",
    "/yeh.init_hamzaabove.medi_meem.medi": "\uFC9A",
    "/yeh.init_hamzaabove.medi_oe.fina": "\uFBF2",
    "/yeh.init_hamzaabove.medi_u.fina": "\uFBF0",
    "/yeh.init_hamzaabove.medi_waw.fina": "\uFBEE",
    "/yeh.init_hamzaabove.medi_yeh.fina": "\uFC04",
    "/yeh.init_hamzaabove.medi_yu.fina": "\uFBF4",
    "/yeh.init_heh.medi": "\uFCDE",
    "/yeh.init_jeem.fina": "\uFC55",
    "/yeh.init_jeem.medi": "\uFCDA",
    "/yeh.init_khah.fina": "\uFC57",
    "/yeh.init_khah.medi": "\uFCDC",
    "/yeh.init_meem.fina": "\uFC58",
    "/yeh.init_meem.medi": "\uFCDD",
    "/yeh.init_meem.medi_meem.medi": "\uFD9D",
    "/yeh.init_yeh.fina": "\uFC5A",
    "/yeh.isol": "\uFEF1",
    "/yeh.medi": "\uFEF4",
    "/yeh.medi_alefmaksura.fina": "\uFC95",
    "/yeh.medi_hah.medi_yeh.fina": "\uFDAE",
    "/yeh.medi_hamzaabove.medi_ae.fina": "\uFBED",
    "/yeh.medi_hamzaabove.medi_alef.fina": "\uFBEB",
    "/yeh.medi_hamzaabove.medi_alefmaksura.fina": "\uFC68",
    "/yeh.medi_hamzaabove.medi_e.fina": "\uFBF7",
    "/yeh.medi_hamzaabove.medi_heh.medi": "\uFCE0",
    "/yeh.medi_hamzaabove.medi_meem.fina": "\uFC66",
    "/yeh.medi_hamzaabove.medi_meem.medi": "\uFCDF",
    "/yeh.medi_hamzaabove.medi_noon.fina": "\uFC67",
    "/yeh.medi_hamzaabove.medi_oe.fina": "\uFBF3",
    "/yeh.medi_hamzaabove.medi_reh.fina": "\uFC64",
    "/yeh.medi_hamzaabove.medi_u.fina": "\uFBF1",
    "/yeh.medi_hamzaabove.medi_waw.fina": "\uFBEF",
    "/yeh.medi_hamzaabove.medi_yeh.fina": "\uFC69",
    "/yeh.medi_hamzaabove.medi_yu.fina": "\uFBF5",
    "/yeh.medi_hamzaabove.medi_zain.fina": "\uFC65",
    "/yeh.medi_heh.medi": "\uFCF1",
    "/yeh.medi_jeem.medi_yeh.fina": "\uFDAF",
    "/yeh.medi_meem.fina": "\uFC93",
    "/yeh.medi_meem.medi": "\uFCF0",
    "/yeh.medi_meem.medi_meem.fina": "\uFD9C",
    "/yeh.medi_meem.medi_yeh.fina": "\uFDB0",
    "/yeh.medi_noon.fina": "\uFC94",
    "/yeh.medi_reh.fina": "\uFC91",
    "/yeh.medi_yeh.fina": "\uFC96",
    "/yeh.medi_zain.fina": "\uFC92",
    "/yehBarreeDigitThreeAbove": "\u077B",
    "/yehBarreeDigitTwoAbove": "\u077A",
    "/yehVabove": "\u06CE",
    "/yehabove": "\u06E7",
    "/yeharabic": "\u064A",
    "/yehbarree": "\u06D2",
    "/yehbarree.fina": "\uFBAF",
    "/yehbarree.isol": "\uFBAE",
    "/yehbarreearabic": "\u06D2",
    "/yehbarreefinalarabic": "\uFBAF",
    "/yehbarreehamza": "\u06D3",
    "/yehbarreehamza.fina": "\uFBB1",
    "/yehbarreehamza.isol": "\uFBB0",
    "/yehfarsi": "\u06CC",
    "/yehfarsi.fina": "\uFBFD",
    "/yehfarsi.init": "\uFBFE",
    "/yehfarsi.isol": "\uFBFC",
    "/yehfarsi.medi": "\uFBFF",
    "/yehfarsiinvertedV": "\u063D",
    "/yehfarsithreedotsabove": "\u063F",
    "/yehfarsitwodotsabove": "\u063E",
    "/yehfinalarabic": "\uFEF2",
    "/yehhamza": "\u0626",
    "/yehhamza.fina": "\uFE8A",
    "/yehhamza.init": "\uFE8B",
    "/yehhamza.isol": "\uFE89",
    "/yehhamza.medi": "\uFE8C",
    "/yehhamzaabovearabic": "\u0626",
    "/yehhamzaabovefinalarabic": "\uFE8A",
    "/yehhamzaaboveinitialarabic": "\uFE8B",
    "/yehhamzaabovemedialarabic": "\uFE8C",
    "/yehhighhamza": "\u0678",
    "/yehinitialarabic": "\uFEF3",
    "/yehmedialarabic": "\uFEF4",
    "/yehmeeminitialarabic": "\uFCDD",
    "/yehmeemisolatedarabic": "\uFC58",
    "/yehnoonfinalarabic": "\uFC94",
    "/yehsmall": "\u06E6",
    "/yehtail": "\u06CD",
    "/yehthreedotsbelow": "\u06D1",
    "/yehthreedotsbelowarabic": "\u06D1",
    "/yekorean": "\u3156",
    "/yellowHeart": "\u1F49B",
    "/yen": "\u00A5",
    "/yenmonospace": "\uFFE5",
    "/yeokorean": "\u3155",
    "/yeorinhieuhkorean": "\u3186",
    "/yerachBenYomo:hb": "\u05AA",
    "/yerahbenyomohebrew": "\u05AA",
    "/yerahbenyomolefthebrew": "\u05AA",
    "/yericyrillic": "\u044B",
    "/yerudieresiscyrillic": "\u04F9",
    "/yesieungkorean": "\u3181",
    "/yesieungpansioskorean": "\u3183",
    "/yesieungsioskorean": "\u3182",
    "/yetiv:hb": "\u059A",
    "/yetivhebrew": "\u059A",
    "/ygrave": "\u1EF3",
    "/yhoi": "\u1EF7",
    "/yhook": "\u01B4",
    "/yhookabove": "\u1EF7",
    "/yiarmenian": "\u0575",
    "/yicyrillic": "\u0457",
    "/yikorean": "\u3162",
    "/yintonemod": "\u02EA",
    "/yinyang": "\u262F",
    "/yiwnarmenian": "\u0582",
    "/ylongcyr": "\u044B",
    "/ylongdieresiscyr": "\u04F9",
    "/yloop": "\u1EFF",
    "/ymacron": "\u0233",
    "/ymonospace": "\uFF59",
    "/yocirclekatakana": "\u32F5",
    "/yod": "\u05D9",
    "/yod:hb": "\u05D9",
    "/yod_yod:hb": "\u05F2",
    "/yod_yod_patah:hb": "\uFB1F",
    "/yoddagesh": "\uFB39",
    "/yoddageshhebrew": "\uFB39",
    "/yodhebrew": "\u05D9",
    "/yodwithdagesh:hb": "\uFB39",
    "/yodwithhiriq:hb": "\uFB1D",
    "/yodyodhebrew": "\u05F2",
    "/yodyodpatahhebrew": "\uFB1F",
    "/yogh": "\u021D",
    "/yohiragana": "\u3088",
    "/yoikorean": "\u3189",
    "/yokatakana": "\u30E8",
    "/yokatakanahalfwidth": "\uFF96",
    "/yokorean": "\u315B",
    "/yosmallhiragana": "\u3087",
    "/yosmallkatakana": "\u30E7",
    "/yosmallkatakanahalfwidth": "\uFF6E",
    "/yot": "\u03F3",
    "/yotgreek": "\u03F3",
    "/yoyaekorean": "\u3188",
    "/yoyakorean": "\u3187",
    "/yoyakthai": "\u0E22",
    "/yoyingthai": "\u0E0D",
    "/yparen": "\u24B4",
    "/yparenthesized": "\u24B4",
    "/ypogegrammeni": "\u037A",
    "/ypogegrammenigreekcmb": "\u0345",
    "/yr": "\u01A6",
    "/yring": "\u1E99",
    "/ystroke": "\u024F",
    "/ysuperior": "\u02B8",
    "/ysupmod": "\u02B8",
    "/ytilde": "\u1EF9",
    "/yturned": "\u028E",
    "/yu.fina": "\uFBDC",
    "/yu.isol": "\uFBDB",
    "/yuansquare": "\u3350",
    "/yucirclekatakana": "\u32F4",
    "/yucyr": "\u044E",
    "/yuhiragana": "\u3086",
    "/yuikorean": "\u318C",
    "/yukatakana": "\u30E6",
    "/yukatakanahalfwidth": "\uFF95",
    "/yukirghiz": "\u06C9",
    "/yukirghiz.fina": "\uFBE3",
    "/yukirghiz.isol": "\uFBE2",
    "/yukorean": "\u3160",
    "/yukrcyr": "\u0457",
    "/yusbigcyr": "\u046B",
    "/yusbigcyrillic": "\u046B",
    "/yusbigiotifiedcyr": "\u046D",
    "/yusbigiotifiedcyrillic": "\u046D",
    "/yuslittlecyr": "\u0467",
    "/yuslittlecyrillic": "\u0467",
    "/yuslittleiotifiedcyr": "\u0469",
    "/yuslittleiotifiedcyrillic": "\u0469",
    "/yusmallhiragana": "\u3085",
    "/yusmallkatakana": "\u30E5",
    "/yusmallkatakanahalfwidth": "\uFF6D",
    "/yuyekorean": "\u318B",
    "/yuyeokorean": "\u318A",
    "/yyabengali": "\u09DF",
    "/yyadeva": "\u095F",
    "/z": "\u007A",
    "/zaarmenian": "\u0566",
    "/zacute": "\u017A",
    "/zadeva": "\u095B",
    "/zagurmukhi": "\u0A5B",
    "/zah": "\u0638",
    "/zah.fina": "\uFEC6",
    "/zah.init": "\uFEC7",
    "/zah.init_meem.fina": "\uFC28",
    "/zah.init_meem.medi": "\uFCB9",
    "/zah.isol": "\uFEC5",
    "/zah.medi": "\uFEC8",
    "/zah.medi_meem.medi": "\uFD3B",
    "/zaharabic": "\u0638",
    "/zahfinalarabic": "\uFEC6",
    "/zahinitialarabic": "\uFEC7",
    "/zahiragana": "\u3056",
    "/zahmedialarabic": "\uFEC8",
    "/zain": "\u0632",
    "/zain.fina": "\uFEB0",
    "/zain.isol": "\uFEAF",
    "/zainabove": "\u0617",
    "/zainarabic": "\u0632",
    "/zainfinalarabic": "\uFEB0",
    "/zakatakana": "\u30B6",
    "/zaqefGadol:hb": "\u0595",
    "/zaqefQatan:hb": "\u0594",
    "/zaqefgadolhebrew": "\u0595",
    "/zaqefqatanhebrew": "\u0594",
    "/zarqa:hb": "\u0598",
    "/zarqahebrew": "\u0598",
    "/zayin": "\u05D6",
    "/zayin:hb": "\u05D6",
    "/zayindagesh": "\uFB36",
    "/zayindageshhebrew": "\uFB36",
    "/zayinhebrew": "\u05D6",
    "/zayinwithdagesh:hb": "\uFB36",
    "/zbopomofo": "\u3117",
    "/zcaron": "\u017E",
    "/zcircle": "\u24E9",
    "/zcircumflex": "\u1E91",
    "/zcurl": "\u0291",
    "/zdescender": "\u2C6C",
    "/zdot": "\u017C",
    "/zdotaccent": "\u017C",
    "/zdotbelow": "\u1E93",
    "/zecyr": "\u0437",
    "/zecyrillic": "\u0437",
    "/zedescendercyrillic": "\u0499",
    "/zedieresiscyr": "\u04DF",
    "/zedieresiscyrillic": "\u04DF",
    "/zehiragana": "\u305C",
    "/zekatakana": "\u30BC",
    "/zero": "\u0030",
    "/zero.inferior": "\u2080",
    "/zero.superior": "\u2070",
    "/zeroarabic": "\u0660",
    "/zerobengali": "\u09E6",
    "/zerocircle": "\u24EA",
    "/zerocircleblack": "\u24FF",
    "/zerocomma": "\u1F101",
    "/zerodeva": "\u0966",
    "/zerofar": "\u06F0",
    "/zerofullstop": "\u1F100",
    "/zerogujarati": "\u0AE6",
    "/zerogurmukhi": "\u0A66",
    "/zerohackarabic": "\u0660",
    "/zeroinferior": "\u2080",
    "/zeromonospace": "\uFF10",
    "/zerooldstyle": "\uF730",
    "/zeropersian": "\u06F0",
    "/zerosquareabove": "\u06E0",
    "/zerosuperior": "\u2070",
    "/zerothai": "\u0E50",
    "/zerothirds": "\u2189",
    "/zerowidthjoiner": "\uFEFF",
    "/zerowidthnobreakspace": "\uFEFF",
    "/zerowidthnonjoiner": "\u200C",
    "/zerowidthspace": "\u200B",
    "/zeta": "\u03B6",
    "/zetailcyr": "\u0499",
    "/zhbopomofo": "\u3113",
    "/zhearmenian": "\u056A",
    "/zhebrevecyr": "\u04C2",
    "/zhebrevecyrillic": "\u04C2",
    "/zhecyr": "\u0436",
    "/zhecyrillic": "\u0436",
    "/zhedescendercyrillic": "\u0497",
    "/zhedieresiscyr": "\u04DD",
    "/zhedieresiscyrillic": "\u04DD",
    "/zhetailcyr": "\u0497",
    "/zhook": "\u0225",
    "/zihiragana": "\u3058",
    "/zikatakana": "\u30B8",
    "/zildefunc": "\u236C",
    "/zinorhebrew": "\u05AE",
    "/zjekomicyr": "\u0505",
    "/zlinebelow": "\u1E95",
    "/zmonospace": "\uFF5A",
    "/znotationbagmembership": "\u22FF",
    "/zohiragana": "\u305E",
    "/zokatakana": "\u30BE",
    "/zparen": "\u24B5",
    "/zparenthesized": "\u24B5",
    "/zretroflex": "\u0290",
    "/zretroflexhook": "\u0290",
    "/zstroke": "\u01B6",
    "/zswashtail": "\u0240",
    "/zuhiragana": "\u305A",
    "/zukatakana": "\u30BA",
    "/zwarakay": "\u0659",
    # manually added from
    # https://github.com/serviceprototypinglab/latex-pdfa/blob/master/glyphtounicode-cmr.tex
    "/angbracketleftBig": "\u28E8",
    "/angbracketleftBigg": "\u27E8",
    "/angbracketleftbig": "\u27E8",
    "/angbracketleftbigg": "\u27E8",
    "/angbracketrightBig": "\u27E9",
    "/angbracketrightBigg": "\u27E9",
    "/angbracketrightbig": "\u27E9",
    "/angbracketrightbigg": "\u27E9",
    "/arrowbt": "\u2193",
    "/arrowdblbt": "\u21D3",
    "/arrowdbltp": "\u21D1",
    "/arrowhookleft": "\u21AA",
    "/arrowhookright": "\u21A9",
    "/arrowtp": "\u2191",
    # diff : "/arrowvertex": "\u23D0",
    "/arrowvertexdbl": "\uED12",
    "/backslashBig": "\u005C",
    "/backslashBigg": "\u005C",
    "/backslashbig": "\u005C",
    "/backslashbigg": "\u005C",
    # diff : "/braceex": "\u23AA",
    "/bracehtipdownleft": "\uED17",
    "/bracehtipdownright": "\uED18",
    "/bracehtipupleft": "\uED19",
    "/bracehtipupright": "\uED1A",
    "/braceleftBig": "\u007B",
    "/braceleftBigg": "\u007B",
    "/braceleftbig": "\u007B",
    "/braceleftbigg": "\u007B",
    # diff : "/braceleftbt": "\u23A9",
    # diff : "/braceleftmid": "\u23A8",
    # diff : "/bracelefttp": "\u23A7",
    "/bracerightBig": "\u007D",
    "/bracerightBigg": "\u007D",
    "/bracerightbig": "\u007D",
    "/bracerightbigg": "\u007D",
    # diff : "/bracerightbt": "\u23AD",
    # diff : "/bracerightmid": "\u23AC",
    # diff : "/bracerighttp": "\u23AB",
    "/bracketleftBig": "\u005B",
    "/bracketleftBigg": "\u005B",
    "/bracketleftbig": "\u005B",
    "/bracketleftbigg": "\u005B",
    # diff : "/bracketleftbt": "\u23A3",
    # diff : "/bracketleftex": "\u23A2",
    # diff : "/bracketlefttp": "\u23A1",
    "/bracketrightBig": "\u005D",
    "/bracketrightBigg": "\u005D",
    "/bracketrightbig": "\u005D",
    "/bracketrightbigg": "\u005D",
    # diff : "/bracketrightbt": "\u23A6",
    # diff : "/bracketrightex": "\u23A5",
    # diff : "/bracketrighttp": "\u23A4",
    "/ceilingleftBig": "\u2308",
    "/ceilingleftBigg": "\u2308",
    "/ceilingleftbig": "\u2308",
    "/ceilingleftbigg": "\u2308",
    "/ceilingrightBig": "\u2309",
    "/ceilingrightBigg": "\u2309",
    "/ceilingrightbig": "\u2309",
    "/ceilingrightbigg": "\u2309",
    "/circledotdisplay": "\u2A00",
    "/circledottext": "\u2A00",
    "/circlemultiplydisplay": "\u2A02",
    "/circlemultiplytext": "\u2A02",
    "/circleplusdisplay": "\u2A01",
    "/circleplustext": "\u2A01",
    "/contintegraldisplay": "\u222E",
    "/contintegraltext": "\u222E",
    "/coproductdisplay": "\u2210",
    "/coproducttext": "\u2210",
    "/floorleftBig": "\u230A",
    "/floorleftBigg": "\u230A",
    "/floorleftbig": "\u230A",
    "/floorleftbigg": "\u230A",
    "/floorrightBig": "\u230B",
    "/floorrightBigg": "\u230B",
    "/floorrightbig": "\u230B",
    "/floorrightbigg": "\u230B",
    "/hatwide": "\u02C6",
    "/hatwider": "\u02C6",
    "/hatwidest": "\u02C6",
    "/integraldisplay": "\u222B",
    "/integraltext": "\u222B",
    "/intersectiondisplay": "\u22C2",
    "/intersectiontext": "\u22C2",
    "/logicalanddisplay": "\u22C0",
    "/logicalandtext": "\u22C0",
    "/logicalordisplay": "\u22C1",
    "/logicalortext": "\u22C1",
    "/mapsto": "\u21A6",
    "/parenleftBig": "\u0028",
    "/parenleftBigg": "\u0028",
    "/parenleftbig": "\u0028",
    "/parenleftbigg": "\u0028",
    # diff : "/parenleftbt": "\u239D",
    # diff : "/parenleftex": "\u239C",
    # diff : "/parenlefttp": "\u239B",
    "/parenrightBig": "\u0029",
    "/parenrightBigg": "\u0029",
    "/parenrightbig": "\u0029",
    "/parenrightbigg": "\u0029",
    # diff : "/parenrightbt": "\u23A0",
    # diff : "/parenrightex": "\u239F",
    # diff : "/parenrighttp": "\u239E",
    "/productdisplay": "\u220F",
    "/producttext": "\u220F",
    "/radicalBig": "\u221A",
    "/radicalBigg": "\u221A",
    "/radicalbig": "\u221A",
    "/radicalbigg": "\u221A",
    "/radicalbt": "\u221A",
    "/radicaltp": "\uED6A",
    "/radicalvertex": "\uED6B",
    "/slashBig": "\u002F",
    "/slashBigg": "\u002F",
    "/slashbig": "\u002F",
    "/slashbigg": "\u002F",
    "/summationdisplay": "\u2211",
    "/summationtext": "\u2211",
    "/tie": "\u2040",
    "/tildewide": "\u02DC",
    "/tildewider": "\u02DC",
    "/tildewidest": "\u02DC",
    "/uniondisplay": "\u22C3",
    "/unionmultidisplay": "\u2A04",
    "/unionmultitext": "\u2A04",
    "/unionsqdisplay": "\u2A06",
    "/unionsqtext": "\u2A06",
    "/uniontext": "\u22C3",
    "/vextenddouble": "\uED79",
    "/vextendsingle": "\u23D0",
    "/a1": "\u25C1",
    "/a2": "\u22B4",
    "/a3": "\u25B7",
    "/a4": "\u22B5",
    "/a40": "\u02C2",
    "/a41": "\u02C3",
    "/a42": "\u2303",
    "/a43": "\u2304",
    "/a48": "\u2127",
    "/a49": "\u22C8",
    "/a50": "\u25A1",
    "/a51": "\u25C7",
    "/a58": "\u2053",
    "/a59": "\u219D",
    "/a60": "\u228F",
    "/a61": "\u2290",
    "/d0": "\u2199",
    "/d1": "\u2199",
    "/d2": "\u2199",
    "/d3": "\u2199",
    "/d4": "\u2199",
    "/d5": "\u2199",
    "/d6": "\u2199",
    "/d7": "\u2193",
    "/d8": "\u2193",
    "/d9": "\u2193",
    "/d10": "\u2193",
    "/d11": "\u2193",
    "/d12": "\u2193",
    "/d13": "\u2193",
    "/d14": "\u2193",
    "/d15": "\u2193",
    "/d16": "\u2193",
    "/d17": "\u2193",
    "/d18": "\u2193",
    "/d19": "\u2193",
    "/d20": "\u2193",
    "/d21": "\u2193",
    "/d22": "\u2193",
    "/d23": "\u2193",
    "/d24": "\u2198",
    "/d25": "\u2198",
    "/d26": "\u2198",
    "/d27": "\u2198",
    "/d28": "\u2198",
    "/d29": "\u2198",
    "/d30": "\u2198",
    "/d31": "\u2198",
    "/d32": "\u2198",
    "/d33": "\u2198",
    "/d34": "\u2198",
    "/d35": "\u2198",
    "/d36": "\u2198",
    "/d37": "\u2198",
    "/d38": "\u2198",
    "/d39": "\u2192",
    "/d40": "\u2192",
    "/d41": "\u2192",
    "/d42": "\u2192",
    "/d43": "\u2192",
    "/d44": "\u2192",
    "/d45": "\u2192",
    "/d46": "\u2192",
    "/d47": "\u2192",
    "/d48": "\u2192",
    "/d49": "\u2192",
    "/d50": "\u2192",
    "/d51": "\u2192",
    "/d52": "\u2192",
    "/d53": "\u2192",
    "/d54": "\u2192",
    "/d55": "\u2192",
    "/d56": "\u2197",
    "/d57": "\u2197",
    "/d58": "\u2197",
    "/d59": "\u2197",
    "/d60": "\u2197",
    "/d61": "\u2197",
    "/d62": "\u2197",
    "/d63": "\u2197",
    "/d64": "\u2197",
    "/d65": "\u2197",
    "/d66": "\u2197",
    "/d67": "\u2197",
    "/d68": "\u2197",
    "/d69": "\u2197",
    "/d70": "\u2197",
    "/d71": "\u2191",
    "/d72": "\u2191",
    "/d73": "\u2191",
    "/d74": "\u2191",
    "/d75": "\u2191",
    "/d76": "\u2191",
    "/d77": "\u2191",
    "/d78": "\u2191",
    "/d79": "\u2191",
    "/d80": "\u2191",
    "/d81": "\u2191",
    "/d82": "\u2191",
    "/d83": "\u2191",
    "/d84": "\u2191",
    "/d85": "\u2191",
    "/d86": "\u2191",
    "/d87": "\u2191",
    "/d88": "\u2196",
    "/d89": "\u2196",
    "/d90": "\u2196",
    "/d91": "\u2196",
    "/d92": "\u2196",
    "/d93": "\u2196",
    "/d94": "\u2196",
    "/d95": "\u2196",
    "/d96": "\u2196",
    "/d97": "\u2196",
    "/d98": "\u2196",
    "/d99": "\u2196",
    "/d100": "\u2196",
    "/d101": "\u2196",
    "/d102": "\u2196",
    "/d103": "\u2190",
    "/d104": "\u2190",
    "/d105": "\u2190",
    "/d106": "\u2190",
    "/d107": "\u2190",
    "/d108": "\u2190",
    "/d109": "\u2190",
    "/d110": "\u2190",
    "/d111": "\u2190",
    "/d112": "\u2190",
    "/d113": "\u2190",
    "/d114": "\u2190",
    "/d115": "\u2190",
    "/d116": "\u2190",
    "/d117": "\u2190",
    "/d118": "\u2190",
    "/d119": "\u2190",
    "/d120": "\u2199",
    "/d121": "\u2199",
    "/d122": "\u2199",
    "/d123": "\u2199",
    "/d124": "\u2199",
    "/d125": "\u2199",
    "/d126": "\u2199",
    "/d127": "\u2199",
    # manually added from
    # https://github.com/kohler/lcdf-typetools/blob/master/texglyphlist.txt
    "/Ifractur": "\u2111",
    "/FFsmall": "\uF766",
    "/FFIsmall": "\uF766",
    "/FFLsmall": "\uF766",
    "/FIsmall": "\uF766",
    "/FLsmall": "\uF766",
    # diff : "/Germandbls": "\u0053",
    "/Germandblssmall": "\uF773",
    "/Ng": "\u014A",
    "/Rfractur": "\u211C",
    "/SS": "\u0053",
    "/SSsmall": "\uF773",
    "/altselector": "\uD802",
    "/angbracketleft": "\u27E8",
    "/angbracketright": "\u27E9",
    "/arrowbothv": "\u2195",
    "/arrowdblbothv": "\u21D5",
    "/arrowleftbothalf": "\u21BD",
    "/arrowlefttophalf": "\u21BC",
    "/arrownortheast": "\u2197",
    "/arrownorthwest": "\u2196",
    "/arrowrightbothalf": "\u21C1",
    "/arrowrighttophalf": "\u21C0",
    "/arrowsoutheast": "\u2198",
    "/arrowsouthwest": "\u2199",
    "/ascendercompwordmark": "\uD80A",
    "/asteriskcentered": "\u2217",
    "/bardbl": "\u2225",
    "/capitalcompwordmark": "\uD809",
    "/circlecopyrt": "\u20DD",
    "/circledivide": "\u2298",
    "/circleminus": "\u2296",
    "/coproduct": "\u2A3F",
    "/ct": "\u0063",
    "/cwm": "\u200C",
    "/dblbracketleft": "\u27E6",
    "/dblbracketright": "\u27E7",
    # diff : "/diamond": "\u2662",
    "/diamondmath": "\u22C4",
    # diff : "/dotlessj": "\u0237",
    "/emptyslot": "\uD801",
    "/epsilon1": "\u03F5",
    "/epsiloninv": "\u03F6",
    "/equivasymptotic": "\u224D",
    "/flat": "\u266D",
    "/follows": "\u227B",
    "/followsequal": "\u2AB0",
    "/followsorcurly": "\u227D",
    "/greatermuch": "\u226B",
    # diff : "/heart": "\u2661",
    "/interrobangdown": "\u2E18",
    "/intersectionsq": "\u2293",
    "/latticetop": "\u22A4",
    "/lessmuch": "\u226A",
    "/longdbls": "\u017F",
    "/longsh": "\u017F",
    "/longsi": "\u017F",
    "/longsl": "\u017F",
    "/longst": "\uFB05",
    "/lscript": "\u2113",
    "/natural": "\u266E",
    "/negationslash": "\u0338",
    "/ng": "\u014B",
    "/owner": "\u220B",
    "/pertenthousand": "\u2031",
    # diff : "/phi": "\u03D5",
    # diff : "/phi1": "\u03C6",
    "/pi1": "\u03D6",
    "/precedesequal": "\u2AAF",
    "/precedesorcurly": "\u227C",
    "/prime": "\u2032",
    "/rho1": "\u03F1",
    "/ringfitted": "\uD80D",
    "/sharp": "\u266F",
    "/similarequal": "\u2243",
    "/slurabove": "\u2322",
    "/slurbelow": "\u2323",
    "/st": "\uFB06",
    "/subsetsqequal": "\u2291",
    "/supersetsqequal": "\u2292",
    "/triangle": "\u25B3",
    "/triangleinv": "\u25BD",
    "/triangleleft": "\u25C1",
    # diff : "/triangleright": "\u25B7",
    "/turnstileleft": "\u22A2",
    "/turnstileright": "\u22A3",
    "/twelveudash": "\uD80C",
    "/unionmulti": "\u228E",
    "/unionsq": "\u2294",
    "/vector": "\u20D7",
    "/visualspace": "\u2423",
    "/Dbar": "\u0110",
    "/compwordmark": "\u200C",
    "/dbar": "\u0111",
    "/rangedash": "\u2013",
    "/hyphenchar": "\u002D",
    "/punctdash": "\u2014",
    "/visiblespace": "\u2423",
    "/Yen": "\u00A5",
    "/anticlockwise": "\u27F2",
    "/arrowparrleftright": "\u21C6",
    "/arrowparrrightleft": "\u21C4",
    "/arrowtailleft": "\u21A2",
    "/arrowtailright": "\u21A3",
    "/arrowtripleleft": "\u21DA",
    "/arrowtripleright": "\u21DB",
    "/check": "\u2713",
    "/circleR": "\u00AE",
    "/circleS": "\u24C8",
    "/circleasterisk": "\u229B",
    "/circleequal": "\u229C",
    "/circlering": "\u229A",
    "/clockwise": "\u27F3",
    "/curlyleft": "\u21AB",
    "/curlyright": "\u21AC",
    "/dblarrowdwn": "\u21CA",
    "/dblarrowheadleft": "\u219E",
    "/dblarrowheadright": "\u21A0",
    # diff : "/dblarrowup": "\u21C8",
    "/defines": "\u225C",
    "/diamondsolid": "\u2666",
    "/difference": "\u224F",
    "/downfall": "\u22CE",
    "/equaldotleftright": "\u2252",
    "/equaldotrightleft": "\u2253",
    "/equalorfollows": "\u22DF",
    # diff : "/equalorgreater": "\u2A96",
    # diff : "/equalorless": "\u2A95",
    "/equalsdots": "\u2251",
    "/followsorequal": "\u227F",
    "/forcesbar": "\u22AA",
    # diff : "/fork": "\u22D4",
    "/geomequivalent": "\u224E",
    "/greaterdbleqlless": "\u2A8C",
    "/greaterdblequal": "\u2267",
    "/greaterlessequal": "\u22DB",
    "/greaterorapproxeql": "\u2A86",
    "/greaterorequalslant": "\u2A7E",
    "/greaterorsimilar": "\u2273",
    "/harpoondownleft": "\u21C3",
    "/harpoondownright": "\u21C2",
    "/harpoonleftright": "\u21CC",
    "/harpoonrightleft": "\u21CB",
    "/harpoonupleft": "\u21BF",
    "/harpoonupright": "\u21BE",
    "/intercal": "\u22BA",
    "/lessdbleqlgreater": "\u2A8B",
    "/lessdblequal": "\u2266",
    "/lessequalgreater": "\u22DA",
    "/lessorapproxeql": "\u2A85",
    "/lessorequalslant": "\u2A7D",
    "/lessorsimilar": "\u2272",
    "/maltesecross": "\u2720",
    "/multiopenleft": "\u22CB",
    "/multiopenright": "\u22CC",
    "/orunderscore": "\u22BB",
    "/perpcorrespond": "\u2A5E",
    # diff : "/precedesorequal": "\u227E",
    "/primereverse": "\u2035",
    "/revasymptequal": "\u22CD",
    "/revsimilar": "\u223D",
    "/rightanglene": "\u231D",
    "/rightanglenw": "\u231C",
    "/rightanglese": "\u231F",
    "/rightanglesw": "\u231E",
    "/satisfies": "\u22A8",
    "/shiftleft": "\u21B0",
    "/shiftright": "\u21B1",
    "/square": "\u25A1",
    "/squaredot": "\u22A1",
    "/squareminus": "\u229F",
    "/squaremultiply": "\u22A0",
    "/squareplus": "\u229E",
    "/squaresolid": "\u25A0",
    "/squiggleleftright": "\u21AD",
    "/squiggleright": "\u21DD",
    "/subsetdblequal": "\u2AC5",
    "/supersetdbl": "\u22D1",
    "/supersetdblequal": "\u2AC6",
    "/triangledownsld": "\u25BC",
    "/triangleleftequal": "\u22B4",
    "/triangleleftsld": "\u25C0",
    "/trianglerightequal": "\u22B5",
    "/trianglerightsld": "\u25B6",
    "/trianglesolid": "\u25B2",
    "/uprise": "\u22CF",
    # diff : "/Digamma": "\u1D7C",
    "/Finv": "\u2132",
    "/Gmir": "\u2141",
    "/Omegainv": "\u2127",
    "/approxorequal": "\u224A",
    "/archleftdown": "\u21B6",
    "/archrightdown": "\u21B7",
    "/beth": "\u2136",
    "/daleth": "\u2138",
    "/dividemultiply": "\u22C7",
    "/downslope": "\u29F9",
    "/equalorsimilar": "\u2242",
    "/follownotdbleqv": "\u2ABA",
    "/follownotslnteql": "\u2AB6",
    "/followornoteqvlnt": "\u22E9",
    "/greaternotdblequal": "\u2A8A",
    "/greaternotequal": "\u2A88",
    "/greaterornotdbleql": "\u2269",
    "/greaterornotequal": "\u2269",
    "/integerdivide": "\u2216",
    "/lessnotdblequal": "\u2A89",
    "/lessnotequal": "\u2A87",
    "/lessornotdbleql": "\u2268",
    "/lessornotequal": "\u2268",
    "/multicloseleft": "\u22C9",
    "/multicloseright": "\u22CA",
    "/notapproxequal": "\u2247",
    "/notarrowboth": "\u21AE",
    "/notarrowleft": "\u219A",
    "/notarrowright": "\u219B",
    "/notbar": "\u2224",
    "/notdblarrowboth": "\u21CE",
    "/notdblarrowleft": "\u21CD",
    "/notdblarrowright": "\u21CF",
    "/notfollows": "\u2281",
    "/notfollowsoreql": "\u2AB0",
    "/notforces": "\u22AE",
    "/notforcesextra": "\u22AF",
    "/notgreaterdblequal": "\u2267",
    "/notgreaterequal": "\u2271",
    "/notgreaterorslnteql": "\u2A7E",
    "/notlessdblequal": "\u2266",
    "/notlessequal": "\u2270",
    "/notlessorslnteql": "\u2A7D",
    "/notprecedesoreql": "\u2AAF",
    "/notsatisfies": "\u22AD",
    "/notsimilar": "\u2241",
    "/notsubseteql": "\u2288",
    "/notsubsetordbleql": "\u2AC5",
    "/notsubsetoreql": "\u228A",
    "/notsuperseteql": "\u2289",
    "/notsupersetordbleql": "\u2AC6",
    "/notsupersetoreql": "\u228B",
    "/nottriangeqlleft": "\u22EC",
    "/nottriangeqlright": "\u22ED",
    "/nottriangleleft": "\u22EA",
    "/nottriangleright": "\u22EB",
    "/notturnstile": "\u22AC",
    "/planckover2pi": "\u210F",
    "/planckover2pi1": "\u210F",
    "/precedenotdbleqv": "\u2AB9",
    "/precedenotslnteql": "\u2AB5",
    "/precedeornoteqvlnt": "\u22E8",
    "/subsetnoteql": "\u228A",
    "/subsetornotdbleql": "\u2ACB",
    "/supersetnoteql": "\u228B",
    "/supersetornotdbleql": "\u2ACC",
    "/upslope": "\u29F8",
}


def _complete() -> None:
    for i in range(256):
        adobe_glyphs[f"/a{i}"] = chr(i)
    adobe_glyphs["/.notdef"] = "□"


_complete()


================================================
FILE: pypdf/_codecs/core_font_metrics.py
================================================
# This file is based upon the 14 core AFM files provided by Adobe/Macromedia at
# https://download.macromedia.com/pub/developer/opentype/tech-notes/Core14_AFMs.zip
# The original copyright follows:
#
# -----------------------------------------------------------------------------------------------
# Core 14 AFM Files - ReadMe
#
# This file and the 14 PostScript(R) AFM files it accompanies may be used, copied, and
# distributed for any purpose and without charge, with or without modification, provided that all
# copyright notices are retained; that the AFM files are not distributed without this file; that
# all modifications to this file or any of the AFM files are prominently noted in the modified
# file(s); and that this paragraph is not modified. Adobe Systems has no responsibility or
# obligation to support the use of the AFM files.
# -----------------------------------------------------------------------------------------------


from pypdf._font import CoreFontMetrics, FontDescriptor

CORE_FONT_METRICS: dict[str, CoreFontMetrics] = {
    # Generated from Courier.afm
    # Copyright (c) 1989, 1990, 1991, 1992, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.
    "Courier": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Courier",
            family="Courier",
            weight="Medium",
            ascent=629,
            descent=-157,
            cap_height=562,
            x_height=426,
            italic_angle=0,
            flags=33,
            bbox=(-23.0, -250.0, 715.0, 805.0),
        ),
        character_widths={
            " ": 600,
            "default": 600,
            "!": 600,
            '"': 600,
            "#": 600,
            "$": 600,
            "%": 600,
            "&": 600,
            "\u2019": 600,
            "(": 600,
            ")": 600,
            "*": 600,
            "+": 600,
            ",": 600,
            "-": 600,
            ".": 600,
            "/": 600,
            "0": 600,
            "1": 600,
            "2": 600,
            "3": 600,
            "4": 600,
            "5": 600,
            "6": 600,
            "7": 600,
            "8": 600,
            "9": 600,
            ":": 600,
            ";": 600,
            "<": 600,
            "=": 600,
            ">": 600,
            "?": 600,
            "@": 600,
            "A": 600,
            "B": 600,
            "C": 600,
            "D": 600,
            "E": 600,
            "F": 600,
            "G": 600,
            "H": 600,
            "I": 600,
            "J": 600,
            "K": 600,
            "L": 600,
            "M": 600,
            "N": 600,
            "O": 600,
            "P": 600,
            "Q": 600,
            "R": 600,
            "S": 600,
            "T": 600,
            "U": 600,
            "V": 600,
            "W": 600,
            "X": 600,
            "Y": 600,
            "Z": 600,
            "[": 600,
            "\\": 600,
            "]": 600,
            "^": 600,
            "_": 600,
            "\u2018": 600,
            "a": 600,
            "b": 600,
            "c": 600,
            "d": 600,
            "e": 600,
            "f": 600,
            "g": 600,
            "h": 600,
            "i": 600,
            "j": 600,
            "k": 600,
            "l": 600,
            "m": 600,
            "n": 600,
            "o": 600,
            "p": 600,
            "q": 600,
            "r": 600,
            "s": 600,
            "t": 600,
            "u": 600,
            "v": 600,
            "w": 600,
            "x": 600,
            "y": 600,
            "z": 600,
            "{": 600,
            "|": 600,
            "}": 600,
            "~": 600,
            "\xa1": 600,
            "\xa2": 600,
            "\xa3": 600,
            "\u2044": 600,
            "\xa5": 600,
            "\u0192": 600,
            "\xa7": 600,
            "\xa4": 600,
            "'": 600,
            "\u201c": 600,
            "\xab": 600,
            "\u2039": 600,
            "\u203a": 600,
            "\ufb01": 600,
            "\ufb02": 600,
            "\u2013": 600,
            "\u2020": 600,
            "\u2021": 600,
            "\xb7": 600,
            "\xb6": 600,
            "\u2022": 600,
            "\u201a": 600,
            "\u201e": 600,
            "\u201d": 600,
            "\xbb": 600,
            "\u2026": 600,
            "\u2030": 600,
            "\xbf": 600,
            "`": 600,
            "\xb4": 600,
            "\u02c6": 600,
            "\u02dc": 600,
            "\xaf": 600,
            "\u02d8": 600,
            "\u02d9": 600,
            "\xa8": 600,
            "\u02da": 600,
            "\xb8": 600,
            "\u02dd": 600,
            "\u02db": 600,
            "\u02c7": 600,
            "\u2014": 600,
            "\xc6": 600,
            "\xaa": 600,
            "\u0141": 600,
            "\xd8": 600,
            "\u0152": 600,
            "\xba": 600,
            "\xe6": 600,
            "\u0131": 600,
            "\u0142": 600,
            "\xf8": 600,
            "\u0153": 600,
            "\xdf": 600,
            "\xcf": 600,
            "\xe9": 600,
            "\u0103": 600,
            "\u0171": 600,
            "\u011b": 600,
            "\u0178": 600,
            "\xf7": 600,
            "\xdd": 600,
            "\xc2": 600,
            "\xe1": 600,
            "\xdb": 600,
            "\xfd": 600,
            "\u0219": 600,
            "\xea": 600,
            "\u016e": 600,
            "\xdc": 600,
            "\u0105": 600,
            "\xda": 600,
            "\u0173": 600,
            "\xcb": 600,
            "\u0110": 600,
            "\uf6c3": 600,
            "\xa9": 600,
            "\u0112": 600,
            "\u010d": 600,
            "\xe5": 600,
            "\u0145": 600,
            "\u013a": 600,
            "\xe0": 600,
            "\u0162": 600,
            "\u0106": 600,
            "\xe3": 600,
            "\u0116": 600,
            "\u0161": 600,
            "\u015f": 600,
            "\xed": 600,
            "\u25ca": 600,
            "\u0158": 600,
            "\u0122": 600,
            "\xfb": 600,
            "\xe2": 600,
            "\u0100": 600,
            "\u0159": 600,
            "\xe7": 600,
            "\u017b": 600,
            "\xde": 600,
            "\u014c": 600,
            "\u0154": 600,
            "\u015a": 600,
            "\u010f": 600,
            "\u016a": 600,
            "\u016f": 600,
            "\xb3": 600,
            "\xd2": 600,
            "\xc0": 600,
            "\u0102": 600,
            "\xd7": 600,
            "\xfa": 600,
            "\u0164": 600,
            "\u2202": 600,
            "\xff": 600,
            "\u0143": 600,
            "\xee": 600,
            "\xca": 600,
            "\xe4": 600,
            "\xeb": 600,
            "\u0107": 600,
            "\u0144": 600,
            "\u016b": 600,
            "\u0147": 600,
            "\xcd": 600,
            "\xb1": 600,
            "\xa6": 600,
            "\xae": 600,
            "\u011e": 600,
            "\u0130": 600,
            "\u2211": 600,
            "\xc8": 600,
            "\u0155": 600,
            "\u014d": 600,
            "\u0179": 600,
            "\u017d": 600,
            "\u2265": 600,
            "\xd0": 600,
            "\xc7": 600,
            "\u013c": 600,
            "\u0165": 600,
            "\u0119": 600,
            "\u0172": 600,
            "\xc1": 600,
            "\xc4": 600,
            "\xe8": 600,
            "\u017a": 600,
            "\u012f": 600,
            "\xd3": 600,
            "\xf3": 600,
            "\u0101": 600,
            "\u015b": 600,
            "\xef": 600,
            "\xd4": 600,
            "\xd9": 600,
            "\u2206": 600,
            "\xfe": 600,
            "\xb2": 600,
            "\xd6": 600,
            "\xb5": 600,
            "\xec": 600,
            "\u0151": 600,
            "\u0118": 600,
            "\u0111": 600,
            "\xbe": 600,
            "\u015e": 600,
            "\u013e": 600,
            "\u0136": 600,
            "\u0139": 600,
            "\u2122": 600,
            "\u0117": 600,
            "\xcc": 600,
            "\u012a": 600,
            "\u013d": 600,
            "\xbd": 600,
            "\u2264": 600,
            "\xf4": 600,
            "\xf1": 600,
            "\u0170": 600,
            "\xc9": 600,
            "\u0113": 600,
            "\u011f": 600,
            "\xbc": 600,
            "\u0160": 600,
            "\u0218": 600,
            "\u0150": 600,
            "\xb0": 600,
            "\xf2": 600,
            "\u010c": 600,
            "\xf9": 600,
            "\u221a": 600,
            "\u010e": 600,
            "\u0157": 600,
            "\xd1": 600,
            "\xf5": 600,
            "\u0156": 600,
            "\u013b": 600,
            "\xc3": 600,
            "\u0104": 600,
            "\xc5": 600,
            "\xd5": 600,
            "\u017c": 600,
            "\u011a": 600,
            "\u012e": 600,
            "\u0137": 600,
            "\u2212": 600,
            "\xce": 600,
            "\u0148": 600,
            "\u0163": 600,
            "\xac": 600,
            "\xf6": 600,
            "\xfc": 600,
            "\u2260": 600,
            "\u0123": 600,
            "\xf0": 600,
            "\u017e": 600,
            "\u0146": 600,
            "\xb9": 600,
            "\u012b": 600,
            "\u20ac": 600,
        },
    ),
    # Generated from Courier-Bold.afm
    # Copyright (c) 1989, 1990, 1991, 1993, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    "Courier-Bold": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Courier-Bold",
            family="Courier",
            weight="Bold",
            ascent=629,
            descent=-157,
            cap_height=562,
            x_height=439,
            italic_angle=0,
            flags=33,
            bbox=(-113.0, -250.0, 749.0, 801.0),
        ),
        character_widths={
            " ": 600,
            "default": 600,
            "!": 600,
            '"': 600,
            "#": 600,
            "$": 600,
            "%": 600,
            "&": 600,
            "\u2019": 600,
            "(": 600,
            ")": 600,
            "*": 600,
            "+": 600,
            ",": 600,
            "-": 600,
            ".": 600,
            "/": 600,
            "0": 600,
            "1": 600,
            "2": 600,
            "3": 600,
            "4": 600,
            "5": 600,
            "6": 600,
            "7": 600,
            "8": 600,
            "9": 600,
            ":": 600,
            ";": 600,
            "<": 600,
            "=": 600,
            ">": 600,
            "?": 600,
            "@": 600,
            "A": 600,
            "B": 600,
            "C": 600,
            "D": 600,
            "E": 600,
            "F": 600,
            "G": 600,
            "H": 600,
            "I": 600,
            "J": 600,
            "K": 600,
            "L": 600,
            "M": 600,
            "N": 600,
            "O": 600,
            "P": 600,
            "Q": 600,
            "R": 600,
            "S": 600,
            "T": 600,
            "U": 600,
            "V": 600,
            "W": 600,
            "X": 600,
            "Y": 600,
            "Z": 600,
            "[": 600,
            "\\": 600,
            "]": 600,
            "^": 600,
            "_": 600,
            "\u2018": 600,
            "a": 600,
            "b": 600,
            "c": 600,
            "d": 600,
            "e": 600,
            "f": 600,
            "g": 600,
            "h": 600,
            "i": 600,
            "j": 600,
            "k": 600,
            "l": 600,
            "m": 600,
            "n": 600,
            "o": 600,
            "p": 600,
            "q": 600,
            "r": 600,
            "s": 600,
            "t": 600,
            "u": 600,
            "v": 600,
            "w": 600,
            "x": 600,
            "y": 600,
            "z": 600,
            "{": 600,
            "|": 600,
            "}": 600,
            "~": 600,
            "\xa1": 600,
            "\xa2": 600,
            "\xa3": 600,
            "\u2044": 600,
            "\xa5": 600,
            "\u0192": 600,
            "\xa7": 600,
            "\xa4": 600,
            "'": 600,
            "\u201c": 600,
            "\xab": 600,
            "\u2039": 600,
            "\u203a": 600,
            "\ufb01": 600,
            "\ufb02": 600,
            "\u2013": 600,
            "\u2020": 600,
            "\u2021": 600,
            "\xb7": 600,
            "\xb6": 600,
            "\u2022": 600,
            "\u201a": 600,
            "\u201e": 600,
            "\u201d": 600,
            "\xbb": 600,
            "\u2026": 600,
            "\u2030": 600,
            "\xbf": 600,
            "`": 600,
            "\xb4": 600,
            "\u02c6": 600,
            "\u02dc": 600,
            "\xaf": 600,
            "\u02d8": 600,
            "\u02d9": 600,
            "\xa8": 600,
            "\u02da": 600,
            "\xb8": 600,
            "\u02dd": 600,
            "\u02db": 600,
            "\u02c7": 600,
            "\u2014": 600,
            "\xc6": 600,
            "\xaa": 600,
            "\u0141": 600,
            "\xd8": 600,
            "\u0152": 600,
            "\xba": 600,
            "\xe6": 600,
            "\u0131": 600,
            "\u0142": 600,
            "\xf8": 600,
            "\u0153": 600,
            "\xdf": 600,
            "\xcf": 600,
            "\xe9": 600,
            "\u0103": 600,
            "\u0171": 600,
            "\u011b": 600,
            "\u0178": 600,
            "\xf7": 600,
            "\xdd": 600,
            "\xc2": 600,
            "\xe1": 600,
            "\xdb": 600,
            "\xfd": 600,
            "\u0219": 600,
            "\xea": 600,
            "\u016e": 600,
            "\xdc": 600,
            "\u0105": 600,
            "\xda": 600,
            "\u0173": 600,
            "\xcb": 600,
            "\u0110": 600,
            "\uf6c3": 600,
            "\xa9": 600,
            "\u0112": 600,
            "\u010d": 600,
            "\xe5": 600,
            "\u0145": 600,
            "\u013a": 600,
            "\xe0": 600,
            "\u0162": 600,
            "\u0106": 600,
            "\xe3": 600,
            "\u0116": 600,
            "\u0161": 600,
            "\u015f": 600,
            "\xed": 600,
            "\u25ca": 600,
            "\u0158": 600,
            "\u0122": 600,
            "\xfb": 600,
            "\xe2": 600,
            "\u0100": 600,
            "\u0159": 600,
            "\xe7": 600,
            "\u017b": 600,
            "\xde": 600,
            "\u014c": 600,
            "\u0154": 600,
            "\u015a": 600,
            "\u010f": 600,
            "\u016a": 600,
            "\u016f": 600,
            "\xb3": 600,
            "\xd2": 600,
            "\xc0": 600,
            "\u0102": 600,
            "\xd7": 600,
            "\xfa": 600,
            "\u0164": 600,
            "\u2202": 600,
            "\xff": 600,
            "\u0143": 600,
            "\xee": 600,
            "\xca": 600,
            "\xe4": 600,
            "\xeb": 600,
            "\u0107": 600,
            "\u0144": 600,
            "\u016b": 600,
            "\u0147": 600,
            "\xcd": 600,
            "\xb1": 600,
            "\xa6": 600,
            "\xae": 600,
            "\u011e": 600,
            "\u0130": 600,
            "\u2211": 600,
            "\xc8": 600,
            "\u0155": 600,
            "\u014d": 600,
            "\u0179": 600,
            "\u017d": 600,
            "\u2265": 600,
            "\xd0": 600,
            "\xc7": 600,
            "\u013c": 600,
            "\u0165": 600,
            "\u0119": 600,
            "\u0172": 600,
            "\xc1": 600,
            "\xc4": 600,
            "\xe8": 600,
            "\u017a": 600,
            "\u012f": 600,
            "\xd3": 600,
            "\xf3": 600,
            "\u0101": 600,
            "\u015b": 600,
            "\xef": 600,
            "\xd4": 600,
            "\xd9": 600,
            "\u2206": 600,
            "\xfe": 600,
            "\xb2": 600,
            "\xd6": 600,
            "\xb5": 600,
            "\xec": 600,
            "\u0151": 600,
            "\u0118": 600,
            "\u0111": 600,
            "\xbe": 600,
            "\u015e": 600,
            "\u013e": 600,
            "\u0136": 600,
            "\u0139": 600,
            "\u2122": 600,
            "\u0117": 600,
            "\xcc": 600,
            "\u012a": 600,
            "\u013d": 600,
            "\xbd": 600,
            "\u2264": 600,
            "\xf4": 600,
            "\xf1": 600,
            "\u0170": 600,
            "\xc9": 600,
            "\u0113": 600,
            "\u011f": 600,
            "\xbc": 600,
            "\u0160": 600,
            "\u0218": 600,
            "\u0150": 600,
            "\xb0": 600,
            "\xf2": 600,
            "\u010c": 600,
            "\xf9": 600,
            "\u221a": 600,
            "\u010e": 600,
            "\u0157": 600,
            "\xd1": 600,
            "\xf5": 600,
            "\u0156": 600,
            "\u013b": 600,
            "\xc3": 600,
            "\u0104": 600,
            "\xc5": 600,
            "\xd5": 600,
            "\u017c": 600,
            "\u011a": 600,
            "\u012e": 600,
            "\u0137": 600,
            "\u2212": 600,
            "\xce": 600,
            "\u0148": 600,
            "\u0163": 600,
            "\xac": 600,
            "\xf6": 600,
            "\xfc": 600,
            "\u2260": 600,
            "\u0123": 600,
            "\xf0": 600,
            "\u017e": 600,
            "\u0146": 600,
            "\xb9": 600,
            "\u012b": 600,
            "\u20ac": 600,
        },
    ),
    # Generated from Courier-BoldOblique.afm
    # Copyright (c) 1989, 1990, 1991, 1993, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    "Courier-BoldOblique": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Courier-BoldOblique",
            family="Courier",
            weight="Bold",
            ascent=629,
            descent=-157,
            cap_height=562,
            x_height=439,
            italic_angle=-12,
            flags=97,
            bbox=(-57.0, -250.0, 869.0, 801.0),
        ),
        character_widths={
            " ": 600,
            "default": 600,
            "!": 600,
            '"': 600,
            "#": 600,
            "$": 600,
            "%": 600,
            "&": 600,
            "\u2019": 600,
            "(": 600,
            ")": 600,
            "*": 600,
            "+": 600,
            ",": 600,
            "-": 600,
            ".": 600,
            "/": 600,
            "0": 600,
            "1": 600,
            "2": 600,
            "3": 600,
            "4": 600,
            "5": 600,
            "6": 600,
            "7": 600,
            "8": 600,
            "9": 600,
            ":": 600,
            ";": 600,
            "<": 600,
            "=": 600,
            ">": 600,
            "?": 600,
            "@": 600,
            "A": 600,
            "B": 600,
            "C": 600,
            "D": 600,
            "E": 600,
            "F": 600,
            "G": 600,
            "H": 600,
            "I": 600,
            "J": 600,
            "K": 600,
            "L": 600,
            "M": 600,
            "N": 600,
            "O": 600,
            "P": 600,
            "Q": 600,
            "R": 600,
            "S": 600,
            "T": 600,
            "U": 600,
            "V": 600,
            "W": 600,
            "X": 600,
            "Y": 600,
            "Z": 600,
            "[": 600,
            "\\": 600,
            "]": 600,
            "^": 600,
            "_": 600,
            "\u2018": 600,
            "a": 600,
            "b": 600,
            "c": 600,
            "d": 600,
            "e": 600,
            "f": 600,
            "g": 600,
            "h": 600,
            "i": 600,
            "j": 600,
            "k": 600,
            "l": 600,
            "m": 600,
            "n": 600,
            "o": 600,
            "p": 600,
            "q": 600,
            "r": 600,
            "s": 600,
            "t": 600,
            "u": 600,
            "v": 600,
            "w": 600,
            "x": 600,
            "y": 600,
            "z": 600,
            "{": 600,
            "|": 600,
            "}": 600,
            "~": 600,
            "\xa1": 600,
            "\xa2": 600,
            "\xa3": 600,
            "\u2044": 600,
            "\xa5": 600,
            "\u0192": 600,
            "\xa7": 600,
            "\xa4": 600,
            "'": 600,
            "\u201c": 600,
            "\xab": 600,
            "\u2039": 600,
            "\u203a": 600,
            "\ufb01": 600,
            "\ufb02": 600,
            "\u2013": 600,
            "\u2020": 600,
            "\u2021": 600,
            "\xb7": 600,
            "\xb6": 600,
            "\u2022": 600,
            "\u201a": 600,
            "\u201e": 600,
            "\u201d": 600,
            "\xbb": 600,
            "\u2026": 600,
            "\u2030": 600,
            "\xbf": 600,
            "`": 600,
            "\xb4": 600,
            "\u02c6": 600,
            "\u02dc": 600,
            "\xaf": 600,
            "\u02d8": 600,
            "\u02d9": 600,
            "\xa8": 600,
            "\u02da": 600,
            "\xb8": 600,
            "\u02dd": 600,
            "\u02db": 600,
            "\u02c7": 600,
            "\u2014": 600,
            "\xc6": 600,
            "\xaa": 600,
            "\u0141": 600,
            "\xd8": 600,
            "\u0152": 600,
            "\xba": 600,
            "\xe6": 600,
            "\u0131": 600,
            "\u0142": 600,
            "\xf8": 600,
            "\u0153": 600,
            "\xdf": 600,
            "\xcf": 600,
            "\xe9": 600,
            "\u0103": 600,
            "\u0171": 600,
            "\u011b": 600,
            "\u0178": 600,
            "\xf7": 600,
            "\xdd": 600,
            "\xc2": 600,
            "\xe1": 600,
            "\xdb": 600,
            "\xfd": 600,
            "\u0219": 600,
            "\xea": 600,
            "\u016e": 600,
            "\xdc": 600,
            "\u0105": 600,
            "\xda": 600,
            "\u0173": 600,
            "\xcb": 600,
            "\u0110": 600,
            "\uf6c3": 600,
            "\xa9": 600,
            "\u0112": 600,
            "\u010d": 600,
            "\xe5": 600,
            "\u0145": 600,
            "\u013a": 600,
            "\xe0": 600,
            "\u0162": 600,
            "\u0106": 600,
            "\xe3": 600,
            "\u0116": 600,
            "\u0161": 600,
            "\u015f": 600,
            "\xed": 600,
            "\u25ca": 600,
            "\u0158": 600,
            "\u0122": 600,
            "\xfb": 600,
            "\xe2": 600,
            "\u0100": 600,
            "\u0159": 600,
            "\xe7": 600,
            "\u017b": 600,
            "\xde": 600,
            "\u014c": 600,
            "\u0154": 600,
            "\u015a": 600,
            "\u010f": 600,
            "\u016a": 600,
            "\u016f": 600,
            "\xb3": 600,
            "\xd2": 600,
            "\xc0": 600,
            "\u0102": 600,
            "\xd7": 600,
            "\xfa": 600,
            "\u0164": 600,
            "\u2202": 600,
            "\xff": 600,
            "\u0143": 600,
            "\xee": 600,
            "\xca": 600,
            "\xe4": 600,
            "\xeb": 600,
            "\u0107": 600,
            "\u0144": 600,
            "\u016b": 600,
            "\u0147": 600,
            "\xcd": 600,
            "\xb1": 600,
            "\xa6": 600,
            "\xae": 600,
            "\u011e": 600,
            "\u0130": 600,
            "\u2211": 600,
            "\xc8": 600,
            "\u0155": 600,
            "\u014d": 600,
            "\u0179": 600,
            "\u017d": 600,
            "\u2265": 600,
            "\xd0": 600,
            "\xc7": 600,
            "\u013c": 600,
            "\u0165": 600,
            "\u0119": 600,
            "\u0172": 600,
            "\xc1": 600,
            "\xc4": 600,
            "\xe8": 600,
            "\u017a": 600,
            "\u012f": 600,
            "\xd3": 600,
            "\xf3": 600,
            "\u0101": 600,
            "\u015b": 600,
            "\xef": 600,
            "\xd4": 600,
            "\xd9": 600,
            "\u2206": 600,
            "\xfe": 600,
            "\xb2": 600,
            "\xd6": 600,
            "\xb5": 600,
            "\xec": 600,
            "\u0151": 600,
            "\u0118": 600,
            "\u0111": 600,
            "\xbe": 600,
            "\u015e": 600,
            "\u013e": 600,
            "\u0136": 600,
            "\u0139": 600,
            "\u2122": 600,
            "\u0117": 600,
            "\xcc": 600,
            "\u012a": 600,
            "\u013d": 600,
            "\xbd": 600,
            "\u2264": 600,
            "\xf4": 600,
            "\xf1": 600,
            "\u0170": 600,
            "\xc9": 600,
            "\u0113": 600,
            "\u011f": 600,
            "\xbc": 600,
            "\u0160": 600,
            "\u0218": 600,
            "\u0150": 600,
            "\xb0": 600,
            "\xf2": 600,
            "\u010c": 600,
            "\xf9": 600,
            "\u221a": 600,
            "\u010e": 600,
            "\u0157": 600,
            "\xd1": 600,
            "\xf5": 600,
            "\u0156": 600,
            "\u013b": 600,
            "\xc3": 600,
            "\u0104": 600,
            "\xc5": 600,
            "\xd5": 600,
            "\u017c": 600,
            "\u011a": 600,
            "\u012e": 600,
            "\u0137": 600,
            "\u2212": 600,
            "\xce": 600,
            "\u0148": 600,
            "\u0163": 600,
            "\xac": 600,
            "\xf6": 600,
            "\xfc": 600,
            "\u2260": 600,
            "\u0123": 600,
            "\xf0": 600,
            "\u017e": 600,
            "\u0146": 600,
            "\xb9": 600,
            "\u012b": 600,
            "\u20ac": 600,
        },
    ),
    # Generated from Courier-Oblique.afm
    # Copyright (c) 1989, 1990, 1991, 1992, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.
    "Courier-Oblique": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Courier-Oblique",
            family="Courier",
            weight="Medium",
            ascent=629,
            descent=-157,
            cap_height=562,
            x_height=426,
            italic_angle=-12,
            flags=97,
            bbox=(-27.0, -250.0, 849.0, 805.0),
        ),
        character_widths={
            " ": 600,
            "default": 600,
            "!": 600,
            '"': 600,
            "#": 600,
            "$": 600,
            "%": 600,
            "&": 600,
            "\u2019": 600,
            "(": 600,
            ")": 600,
            "*": 600,
            "+": 600,
            ",": 600,
            "-": 600,
            ".": 600,
            "/": 600,
            "0": 600,
            "1": 600,
            "2": 600,
            "3": 600,
            "4": 600,
            "5": 600,
            "6": 600,
            "7": 600,
            "8": 600,
            "9": 600,
            ":": 600,
            ";": 600,
            "<": 600,
            "=": 600,
            ">": 600,
            "?": 600,
            "@": 600,
            "A": 600,
            "B": 600,
            "C": 600,
            "D": 600,
            "E": 600,
            "F": 600,
            "G": 600,
            "H": 600,
            "I": 600,
            "J": 600,
            "K": 600,
            "L": 600,
            "M": 600,
            "N": 600,
            "O": 600,
            "P": 600,
            "Q": 600,
            "R": 600,
            "S": 600,
            "T": 600,
            "U": 600,
            "V": 600,
            "W": 600,
            "X": 600,
            "Y": 600,
            "Z": 600,
            "[": 600,
            "\\": 600,
            "]": 600,
            "^": 600,
            "_": 600,
            "\u2018": 600,
            "a": 600,
            "b": 600,
            "c": 600,
            "d": 600,
            "e": 600,
            "f": 600,
            "g": 600,
            "h": 600,
            "i": 600,
            "j": 600,
            "k": 600,
            "l": 600,
            "m": 600,
            "n": 600,
            "o": 600,
            "p": 600,
            "q": 600,
            "r": 600,
            "s": 600,
            "t": 600,
            "u": 600,
            "v": 600,
            "w": 600,
            "x": 600,
            "y": 600,
            "z": 600,
            "{": 600,
            "|": 600,
            "}": 600,
            "~": 600,
            "\xa1": 600,
            "\xa2": 600,
            "\xa3": 600,
            "\u2044": 600,
            "\xa5": 600,
            "\u0192": 600,
            "\xa7": 600,
            "\xa4": 600,
            "'": 600,
            "\u201c": 600,
            "\xab": 600,
            "\u2039": 600,
            "\u203a": 600,
            "\ufb01": 600,
            "\ufb02": 600,
            "\u2013": 600,
            "\u2020": 600,
            "\u2021": 600,
            "\xb7": 600,
            "\xb6": 600,
            "\u2022": 600,
            "\u201a": 600,
            "\u201e": 600,
            "\u201d": 600,
            "\xbb": 600,
            "\u2026": 600,
            "\u2030": 600,
            "\xbf": 600,
            "`": 600,
            "\xb4": 600,
            "\u02c6": 600,
            "\u02dc": 600,
            "\xaf": 600,
            "\u02d8": 600,
            "\u02d9": 600,
            "\xa8": 600,
            "\u02da": 600,
            "\xb8": 600,
            "\u02dd": 600,
            "\u02db": 600,
            "\u02c7": 600,
            "\u2014": 600,
            "\xc6": 600,
            "\xaa": 600,
            "\u0141": 600,
            "\xd8": 600,
            "\u0152": 600,
            "\xba": 600,
            "\xe6": 600,
            "\u0131": 600,
            "\u0142": 600,
            "\xf8": 600,
            "\u0153": 600,
            "\xdf": 600,
            "\xcf": 600,
            "\xe9": 600,
            "\u0103": 600,
            "\u0171": 600,
            "\u011b": 600,
            "\u0178": 600,
            "\xf7": 600,
            "\xdd": 600,
            "\xc2": 600,
            "\xe1": 600,
            "\xdb": 600,
            "\xfd": 600,
            "\u0219": 600,
            "\xea": 600,
            "\u016e": 600,
            "\xdc": 600,
            "\u0105": 600,
            "\xda": 600,
            "\u0173": 600,
            "\xcb": 600,
            "\u0110": 600,
            "\uf6c3": 600,
            "\xa9": 600,
            "\u0112": 600,
            "\u010d": 600,
            "\xe5": 600,
            "\u0145": 600,
            "\u013a": 600,
            "\xe0": 600,
            "\u0162": 600,
            "\u0106": 600,
            "\xe3": 600,
            "\u0116": 600,
            "\u0161": 600,
            "\u015f": 600,
            "\xed": 600,
            "\u25ca": 600,
            "\u0158": 600,
            "\u0122": 600,
            "\xfb": 600,
            "\xe2": 600,
            "\u0100": 600,
            "\u0159": 600,
            "\xe7": 600,
            "\u017b": 600,
            "\xde": 600,
            "\u014c": 600,
            "\u0154": 600,
            "\u015a": 600,
            "\u010f": 600,
            "\u016a": 600,
            "\u016f": 600,
            "\xb3": 600,
            "\xd2": 600,
            "\xc0": 600,
            "\u0102": 600,
            "\xd7": 600,
            "\xfa": 600,
            "\u0164": 600,
            "\u2202": 600,
            "\xff": 600,
            "\u0143": 600,
            "\xee": 600,
            "\xca": 600,
            "\xe4": 600,
            "\xeb": 600,
            "\u0107": 600,
            "\u0144": 600,
            "\u016b": 600,
            "\u0147": 600,
            "\xcd": 600,
            "\xb1": 600,
            "\xa6": 600,
            "\xae": 600,
            "\u011e": 600,
            "\u0130": 600,
            "\u2211": 600,
            "\xc8": 600,
            "\u0155": 600,
            "\u014d": 600,
            "\u0179": 600,
            "\u017d": 600,
            "\u2265": 600,
            "\xd0": 600,
            "\xc7": 600,
            "\u013c": 600,
            "\u0165": 600,
            "\u0119": 600,
            "\u0172": 600,
            "\xc1": 600,
            "\xc4": 600,
            "\xe8": 600,
            "\u017a": 600,
            "\u012f": 600,
            "\xd3": 600,
            "\xf3": 600,
            "\u0101": 600,
            "\u015b": 600,
            "\xef": 600,
            "\xd4": 600,
            "\xd9": 600,
            "\u2206": 600,
            "\xfe": 600,
            "\xb2": 600,
            "\xd6": 600,
            "\xb5": 600,
            "\xec": 600,
            "\u0151": 600,
            "\u0118": 600,
            "\u0111": 600,
            "\xbe": 600,
            "\u015e": 600,
            "\u013e": 600,
            "\u0136": 600,
            "\u0139": 600,
            "\u2122": 600,
            "\u0117": 600,
            "\xcc": 600,
            "\u012a": 600,
            "\u013d": 600,
            "\xbd": 600,
            "\u2264": 600,
            "\xf4": 600,
            "\xf1": 600,
            "\u0170": 600,
            "\xc9": 600,
            "\u0113": 600,
            "\u011f": 600,
            "\xbc": 600,
            "\u0160": 600,
            "\u0218": 600,
            "\u0150": 600,
            "\xb0": 600,
            "\xf2": 600,
            "\u010c": 600,
            "\xf9": 600,
            "\u221a": 600,
            "\u010e": 600,
            "\u0157": 600,
            "\xd1": 600,
            "\xf5": 600,
            "\u0156": 600,
            "\u013b": 600,
            "\xc3": 600,
            "\u0104": 600,
            "\xc5": 600,
            "\xd5": 600,
            "\u017c": 600,
            "\u011a": 600,
            "\u012e": 600,
            "\u0137": 600,
            "\u2212": 600,
            "\xce": 600,
            "\u0148": 600,
            "\u0163": 600,
            "\xac": 600,
            "\xf6": 600,
            "\xfc": 600,
            "\u2260": 600,
            "\u0123": 600,
            "\xf0": 600,
            "\u017e": 600,
            "\u0146": 600,
            "\xb9": 600,
            "\u012b": 600,
            "\u20ac": 600,
        },
    ),
    # Generated from Helvetica.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    # Helvetica is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Helvetica": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Helvetica",
            family="Helvetica",
            weight="Medium",
            ascent=718,
            descent=-207,
            cap_height=718,
            x_height=523,
            italic_angle=0,
            flags=32,
            bbox=(-166.0, -225.0, 1000.0, 931.0),
        ),
        character_widths={
            " ": 278,
            "default": 556,
            "!": 278,
            '"': 355,
            "#": 556,
            "$": 556,
            "%": 889,
            "&": 667,
            "\u2019": 222,
            "(": 333,
            ")": 333,
            "*": 389,
            "+": 584,
            ",": 278,
            "-": 333,
            ".": 278,
            "/": 278,
            "0": 556,
            "1": 556,
            "2": 556,
            "3": 556,
            "4": 556,
            "5": 556,
            "6": 556,
            "7": 556,
            "8": 556,
            "9": 556,
            ":": 278,
            ";": 278,
            "<": 584,
            "=": 584,
            ">": 584,
            "?": 556,
            "@": 1015,
            "A": 667,
            "B": 667,
            "C": 722,
            "D": 722,
            "E": 667,
            "F": 611,
            "G": 778,
            "H": 722,
            "I": 278,
            "J": 500,
            "K": 667,
            "L": 556,
            "M": 833,
            "N": 722,
            "O": 778,
            "P": 667,
            "Q": 778,
            "R": 722,
            "S": 667,
            "T": 611,
            "U": 722,
            "V": 667,
            "W": 944,
            "X": 667,
            "Y": 667,
            "Z": 611,
            "[": 278,
            "\\": 278,
            "]": 278,
            "^": 469,
            "_": 556,
            "\u2018": 222,
            "a": 556,
            "b": 556,
            "c": 500,
            "d": 556,
            "e": 556,
            "f": 278,
            "g": 556,
            "h": 556,
            "i": 222,
            "j": 222,
            "k": 500,
            "l": 222,
            "m": 833,
            "n": 556,
            "o": 556,
            "p": 556,
            "q": 556,
            "r": 333,
            "s": 500,
            "t": 278,
            "u": 556,
            "v": 500,
            "w": 722,
            "x": 500,
            "y": 500,
            "z": 500,
            "{": 334,
            "|": 260,
            "}": 334,
            "~": 584,
            "\xa1": 333,
            "\xa2": 556,
            "\xa3": 556,
            "\u2044": 167,
            "\xa5": 556,
            "\u0192": 556,
            "\xa7": 556,
            "\xa4": 556,
            "'": 191,
            "\u201c": 333,
            "\xab": 556,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 500,
            "\ufb02": 500,
            "\u2013": 556,
            "\u2020": 556,
            "\u2021": 556,
            "\xb7": 278,
            "\xb6": 537,
            "\u2022": 350,
            "\u201a": 222,
            "\u201e": 333,
            "\u201d": 333,
            "\xbb": 556,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 611,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 1000,
            "\xaa": 370,
            "\u0141": 556,
            "\xd8": 778,
            "\u0152": 1000,
            "\xba": 365,
            "\xe6": 889,
            "\u0131": 278,
            "\u0142": 222,
            "\xf8": 611,
            "\u0153": 944,
            "\xdf": 611,
            "\xcf": 278,
            "\xe9": 556,
            "\u0103": 556,
            "\u0171": 556,
            "\u011b": 556,
            "\u0178": 667,
            "\xf7": 584,
            "\xdd": 667,
            "\xc2": 667,
            "\xe1": 556,
            "\xdb": 722,
            "\xfd": 500,
            "\u0219": 500,
            "\xea": 556,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 556,
            "\xda": 722,
            "\u0173": 556,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 737,
            "\u0112": 667,
            "\u010d": 500,
            "\xe5": 556,
            "\u0145": 722,
            "\u013a": 222,
            "\xe0": 556,
            "\u0162": 611,
            "\u0106": 722,
            "\xe3": 556,
            "\u0116": 667,
            "\u0161": 500,
            "\u015f": 500,
            "\xed": 278,
            "\u25ca": 471,
            "\u0158": 722,
            "\u0122": 778,
            "\xfb": 556,
            "\xe2": 556,
            "\u0100": 667,
            "\u0159": 333,
            "\xe7": 500,
            "\u017b": 611,
            "\xde": 667,
            "\u014c": 778,
            "\u0154": 722,
            "\u015a": 667,
            "\u010f": 643,
            "\u016a": 722,
            "\u016f": 556,
            "\xb3": 333,
            "\xd2": 778,
            "\xc0": 667,
            "\u0102": 667,
            "\xd7": 584,
            "\xfa": 556,
            "\u0164": 611,
            "\u2202": 476,
            "\xff": 500,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 556,
            "\xeb": 556,
            "\u0107": 500,
            "\u0144": 556,
            "\u016b": 556,
            "\u0147": 722,
            "\xcd": 278,
            "\xb1": 584,
            "\xa6": 260,
            "\xae": 737,
            "\u011e": 778,
            "\u0130": 278,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 333,
            "\u014d": 556,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 722,
            "\u013c": 222,
            "\u0165": 317,
            "\u0119": 556,
            "\u0172": 722,
            "\xc1": 667,
            "\xc4": 667,
            "\xe8": 556,
            "\u017a": 500,
            "\u012f": 222,
            "\xd3": 778,
            "\xf3": 556,
            "\u0101": 556,
            "\u015b": 500,
            "\xef": 278,
            "\xd4": 778,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 556,
            "\xb2": 333,
            "\xd6": 778,
            "\xb5": 556,
            "\xec": 278,
            "\u0151": 556,
            "\u0118": 667,
            "\u0111": 556,
            "\xbe": 834,
            "\u015e": 667,
            "\u013e": 299,
            "\u0136": 667,
            "\u0139": 556,
            "\u2122": 1000,
            "\u0117": 556,
            "\xcc": 278,
            "\u012a": 278,
            "\u013d": 556,
            "\xbd": 834,
            "\u2264": 549,
            "\xf4": 556,
            "\xf1": 556,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 556,
            "\u011f": 556,
            "\xbc": 834,
            "\u0160": 667,
            "\u0218": 667,
            "\u0150": 778,
            "\xb0": 400,
            "\xf2": 556,
            "\u010c": 722,
            "\xf9": 556,
            "\u221a": 453,
            "\u010e": 722,
            "\u0157": 333,
            "\xd1": 722,
            "\xf5": 556,
            "\u0156": 722,
            "\u013b": 556,
            "\xc3": 667,
            "\u0104": 667,
            "\xc5": 667,
            "\xd5": 778,
            "\u017c": 500,
            "\u011a": 667,
            "\u012e": 278,
            "\u0137": 500,
            "\u2212": 584,
            "\xce": 278,
            "\u0148": 556,
            "\u0163": 278,
            "\xac": 584,
            "\xf6": 556,
            "\xfc": 556,
            "\u2260": 549,
            "\u0123": 556,
            "\xf0": 556,
            "\u017e": 500,
            "\u0146": 556,
            "\xb9": 333,
            "\u012b": 278,
            "\u20ac": 556,
        },
    ),
    # Generated from Helvetica-Bold.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    # Helvetica is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Helvetica-Bold": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Helvetica-Bold",
            family="Helvetica",
            weight="Bold",
            ascent=718,
            descent=-207,
            cap_height=718,
            x_height=532,
            italic_angle=0,
            flags=32,
            bbox=(-170.0, -228.0, 1003.0, 962.0),
        ),
        character_widths={
            " ": 278,
            "default": 556,
            "!": 333,
            '"': 474,
            "#": 556,
            "$": 556,
            "%": 889,
            "&": 722,
            "\u2019": 278,
            "(": 333,
            ")": 333,
            "*": 389,
            "+": 584,
            ",": 278,
            "-": 333,
            ".": 278,
            "/": 278,
            "0": 556,
            "1": 556,
            "2": 556,
            "3": 556,
            "4": 556,
            "5": 556,
            "6": 556,
            "7": 556,
            "8": 556,
            "9": 556,
            ":": 333,
            ";": 333,
            "<": 584,
            "=": 584,
            ">": 584,
            "?": 611,
            "@": 975,
            "A": 722,
            "B": 722,
            "C": 722,
            "D": 722,
            "E": 667,
            "F": 611,
            "G": 778,
            "H": 722,
            "I": 278,
            "J": 556,
            "K": 722,
            "L": 611,
            "M": 833,
            "N": 722,
            "O": 778,
            "P": 667,
            "Q": 778,
            "R": 722,
            "S": 667,
            "T": 611,
            "U": 722,
            "V": 667,
            "W": 944,
            "X": 667,
            "Y": 667,
            "Z": 611,
            "[": 333,
            "\\": 278,
            "]": 333,
            "^": 584,
            "_": 556,
            "\u2018": 278,
            "a": 556,
            "b": 611,
            "c": 556,
            "d": 611,
            "e": 556,
            "f": 333,
            "g": 611,
            "h": 611,
            "i": 278,
            "j": 278,
            "k": 556,
            "l": 278,
            "m": 889,
            "n": 611,
            "o": 611,
            "p": 611,
            "q": 611,
            "r": 389,
            "s": 556,
            "t": 333,
            "u": 611,
            "v": 556,
            "w": 778,
            "x": 556,
            "y": 556,
            "z": 500,
            "{": 389,
            "|": 280,
            "}": 389,
            "~": 584,
            "\xa1": 333,
            "\xa2": 556,
            "\xa3": 556,
            "\u2044": 167,
            "\xa5": 556,
            "\u0192": 556,
            "\xa7": 556,
            "\xa4": 556,
            "'": 238,
            "\u201c": 500,
            "\xab": 556,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 611,
            "\ufb02": 611,
            "\u2013": 556,
            "\u2020": 556,
            "\u2021": 556,
            "\xb7": 278,
            "\xb6": 556,
            "\u2022": 350,
            "\u201a": 278,
            "\u201e": 500,
            "\u201d": 500,
            "\xbb": 556,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 611,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 1000,
            "\xaa": 370,
            "\u0141": 611,
            "\xd8": 778,
            "\u0152": 1000,
            "\xba": 365,
            "\xe6": 889,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 611,
            "\u0153": 944,
            "\xdf": 611,
            "\xcf": 278,
            "\xe9": 556,
            "\u0103": 556,
            "\u0171": 611,
            "\u011b": 556,
            "\u0178": 667,
            "\xf7": 584,
            "\xdd": 667,
            "\xc2": 722,
            "\xe1": 556,
            "\xdb": 722,
            "\xfd": 556,
            "\u0219": 556,
            "\xea": 556,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 556,
            "\xda": 722,
            "\u0173": 611,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 737,
            "\u0112": 667,
            "\u010d": 556,
            "\xe5": 556,
            "\u0145": 722,
            "\u013a": 278,
            "\xe0": 556,
            "\u0162": 611,
            "\u0106": 722,
            "\xe3": 556,
            "\u0116": 667,
            "\u0161": 556,
            "\u015f": 556,
            "\xed": 278,
            "\u25ca": 494,
            "\u0158": 722,
            "\u0122": 778,
            "\xfb": 611,
            "\xe2": 556,
            "\u0100": 722,
            "\u0159": 389,
            "\xe7": 556,
            "\u017b": 611,
            "\xde": 667,
            "\u014c": 778,
            "\u0154": 722,
            "\u015a": 667,
            "\u010f": 743,
            "\u016a": 722,
            "\u016f": 611,
            "\xb3": 333,
            "\xd2": 778,
            "\xc0": 722,
            "\u0102": 722,
            "\xd7": 584,
            "\xfa": 611,
            "\u0164": 611,
            "\u2202": 494,
            "\xff": 556,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 556,
            "\xeb": 556,
            "\u0107": 556,
            "\u0144": 611,
            "\u016b": 611,
            "\u0147": 722,
            "\xcd": 278,
            "\xb1": 584,
            "\xa6": 280,
            "\xae": 737,
            "\u011e": 778,
            "\u0130": 278,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 389,
            "\u014d": 611,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 722,
            "\u013c": 278,
            "\u0165": 389,
            "\u0119": 556,
            "\u0172": 722,
            "\xc1": 722,
            "\xc4": 722,
            "\xe8": 556,
            "\u017a": 500,
            "\u012f": 278,
            "\xd3": 778,
            "\xf3": 611,
            "\u0101": 556,
            "\u015b": 556,
            "\xef": 278,
            "\xd4": 778,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 611,
            "\xb2": 333,
            "\xd6": 778,
            "\xb5": 611,
            "\xec": 278,
            "\u0151": 611,
            "\u0118": 667,
            "\u0111": 611,
            "\xbe": 834,
            "\u015e": 667,
            "\u013e": 400,
            "\u0136": 722,
            "\u0139": 611,
            "\u2122": 1000,
            "\u0117": 556,
            "\xcc": 278,
            "\u012a": 278,
            "\u013d": 611,
            "\xbd": 834,
            "\u2264": 549,
            "\xf4": 611,
            "\xf1": 611,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 556,
            "\u011f": 611,
            "\xbc": 834,
            "\u0160": 667,
            "\u0218": 667,
            "\u0150": 778,
            "\xb0": 400,
            "\xf2": 611,
            "\u010c": 722,
            "\xf9": 611,
            "\u221a": 549,
            "\u010e": 722,
            "\u0157": 389,
            "\xd1": 722,
            "\xf5": 611,
            "\u0156": 722,
            "\u013b": 611,
            "\xc3": 722,
            "\u0104": 722,
            "\xc5": 722,
            "\xd5": 778,
            "\u017c": 500,
            "\u011a": 667,
            "\u012e": 278,
            "\u0137": 556,
            "\u2212": 584,
            "\xce": 278,
            "\u0148": 611,
            "\u0163": 333,
            "\xac": 584,
            "\xf6": 611,
            "\xfc": 611,
            "\u2260": 549,
            "\u0123": 611,
            "\xf0": 611,
            "\u017e": 500,
            "\u0146": 611,
            "\xb9": 333,
            "\u012b": 278,
            "\u20ac": 556,
        },
    ),
    # Generated from Helvetica-BoldOblique.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    # Helvetica is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Helvetica-BoldOblique": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Helvetica-BoldOblique",
            family="Helvetica",
            weight="Bold",
            ascent=718,
            descent=-207,
            cap_height=718,
            x_height=532,
            italic_angle=-12,
            flags=96,
            bbox=(-174.0, -228.0, 1114.0, 962.0),
        ),
        character_widths={
            " ": 278,
            "default": 556,
            "!": 333,
            '"': 474,
            "#": 556,
            "$": 556,
            "%": 889,
            "&": 722,
            "\u2019": 278,
            "(": 333,
            ")": 333,
            "*": 389,
            "+": 584,
            ",": 278,
            "-": 333,
            ".": 278,
            "/": 278,
            "0": 556,
            "1": 556,
            "2": 556,
            "3": 556,
            "4": 556,
            "5": 556,
            "6": 556,
            "7": 556,
            "8": 556,
            "9": 556,
            ":": 333,
            ";": 333,
            "<": 584,
            "=": 584,
            ">": 584,
            "?": 611,
            "@": 975,
            "A": 722,
            "B": 722,
            "C": 722,
            "D": 722,
            "E": 667,
            "F": 611,
            "G": 778,
            "H": 722,
            "I": 278,
            "J": 556,
            "K": 722,
            "L": 611,
            "M": 833,
            "N": 722,
            "O": 778,
            "P": 667,
            "Q": 778,
            "R": 722,
            "S": 667,
            "T": 611,
            "U": 722,
            "V": 667,
            "W": 944,
            "X": 667,
            "Y": 667,
            "Z": 611,
            "[": 333,
            "\\": 278,
            "]": 333,
            "^": 584,
            "_": 556,
            "\u2018": 278,
            "a": 556,
            "b": 611,
            "c": 556,
            "d": 611,
            "e": 556,
            "f": 333,
            "g": 611,
            "h": 611,
            "i": 278,
            "j": 278,
            "k": 556,
            "l": 278,
            "m": 889,
            "n": 611,
            "o": 611,
            "p": 611,
            "q": 611,
            "r": 389,
            "s": 556,
            "t": 333,
            "u": 611,
            "v": 556,
            "w": 778,
            "x": 556,
            "y": 556,
            "z": 500,
            "{": 389,
            "|": 280,
            "}": 389,
            "~": 584,
            "\xa1": 333,
            "\xa2": 556,
            "\xa3": 556,
            "\u2044": 167,
            "\xa5": 556,
            "\u0192": 556,
            "\xa7": 556,
            "\xa4": 556,
            "'": 238,
            "\u201c": 500,
            "\xab": 556,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 611,
            "\ufb02": 611,
            "\u2013": 556,
            "\u2020": 556,
            "\u2021": 556,
            "\xb7": 278,
            "\xb6": 556,
            "\u2022": 350,
            "\u201a": 278,
            "\u201e": 500,
            "\u201d": 500,
            "\xbb": 556,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 611,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 1000,
            "\xaa": 370,
            "\u0141": 611,
            "\xd8": 778,
            "\u0152": 1000,
            "\xba": 365,
            "\xe6": 889,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 611,
            "\u0153": 944,
            "\xdf": 611,
            "\xcf": 278,
            "\xe9": 556,
            "\u0103": 556,
            "\u0171": 611,
            "\u011b": 556,
            "\u0178": 667,
            "\xf7": 584,
            "\xdd": 667,
            "\xc2": 722,
            "\xe1": 556,
            "\xdb": 722,
            "\xfd": 556,
            "\u0219": 556,
            "\xea": 556,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 556,
            "\xda": 722,
            "\u0173": 611,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 737,
            "\u0112": 667,
            "\u010d": 556,
            "\xe5": 556,
            "\u0145": 722,
            "\u013a": 278,
            "\xe0": 556,
            "\u0162": 611,
            "\u0106": 722,
            "\xe3": 556,
            "\u0116": 667,
            "\u0161": 556,
            "\u015f": 556,
            "\xed": 278,
            "\u25ca": 494,
            "\u0158": 722,
            "\u0122": 778,
            "\xfb": 611,
            "\xe2": 556,
            "\u0100": 722,
            "\u0159": 389,
            "\xe7": 556,
            "\u017b": 611,
            "\xde": 667,
            "\u014c": 778,
            "\u0154": 722,
            "\u015a": 667,
            "\u010f": 743,
            "\u016a": 722,
            "\u016f": 611,
            "\xb3": 333,
            "\xd2": 778,
            "\xc0": 722,
            "\u0102": 722,
            "\xd7": 584,
            "\xfa": 611,
            "\u0164": 611,
            "\u2202": 494,
            "\xff": 556,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 556,
            "\xeb": 556,
            "\u0107": 556,
            "\u0144": 611,
            "\u016b": 611,
            "\u0147": 722,
            "\xcd": 278,
            "\xb1": 584,
            "\xa6": 280,
            "\xae": 737,
            "\u011e": 778,
            "\u0130": 278,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 389,
            "\u014d": 611,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 722,
            "\u013c": 278,
            "\u0165": 389,
            "\u0119": 556,
            "\u0172": 722,
            "\xc1": 722,
            "\xc4": 722,
            "\xe8": 556,
            "\u017a": 500,
            "\u012f": 278,
            "\xd3": 778,
            "\xf3": 611,
            "\u0101": 556,
            "\u015b": 556,
            "\xef": 278,
            "\xd4": 778,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 611,
            "\xb2": 333,
            "\xd6": 778,
            "\xb5": 611,
            "\xec": 278,
            "\u0151": 611,
            "\u0118": 667,
            "\u0111": 611,
            "\xbe": 834,
            "\u015e": 667,
            "\u013e": 400,
            "\u0136": 722,
            "\u0139": 611,
            "\u2122": 1000,
            "\u0117": 556,
            "\xcc": 278,
            "\u012a": 278,
            "\u013d": 611,
            "\xbd": 834,
            "\u2264": 549,
            "\xf4": 611,
            "\xf1": 611,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 556,
            "\u011f": 611,
            "\xbc": 834,
            "\u0160": 667,
            "\u0218": 667,
            "\u0150": 778,
            "\xb0": 400,
            "\xf2": 611,
            "\u010c": 722,
            "\xf9": 611,
            "\u221a": 549,
            "\u010e": 722,
            "\u0157": 389,
            "\xd1": 722,
            "\xf5": 611,
            "\u0156": 722,
            "\u013b": 611,
            "\xc3": 722,
            "\u0104": 722,
            "\xc5": 722,
            "\xd5": 778,
            "\u017c": 500,
            "\u011a": 667,
            "\u012e": 278,
            "\u0137": 556,
            "\u2212": 584,
            "\xce": 278,
            "\u0148": 611,
            "\u0163": 333,
            "\xac": 584,
            "\xf6": 611,
            "\xfc": 611,
            "\u2260": 549,
            "\u0123": 611,
            "\xf0": 611,
            "\u017e": 500,
            "\u0146": 611,
            "\xb9": 333,
            "\u012b": 278,
            "\u20ac": 556,
        },
    ),
    # Generated from Helvetica-Oblique.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1997 Adobe Systems Incorporated.  All Rights Reserved.
    # Helvetica is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Helvetica-Oblique": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Helvetica-Oblique",
            family="Helvetica",
            weight="Medium",
            ascent=718,
            descent=-207,
            cap_height=718,
            x_height=523,
            italic_angle=-12,
            flags=96,
            bbox=(-170.0, -225.0, 1116.0, 931.0),
        ),
        character_widths={
            " ": 278,
            "default": 556,
            "!": 278,
            '"': 355,
            "#": 556,
            "$": 556,
            "%": 889,
            "&": 667,
            "\u2019": 222,
            "(": 333,
            ")": 333,
            "*": 389,
            "+": 584,
            ",": 278,
            "-": 333,
            ".": 278,
            "/": 278,
            "0": 556,
            "1": 556,
            "2": 556,
            "3": 556,
            "4": 556,
            "5": 556,
            "6": 556,
            "7": 556,
            "8": 556,
            "9": 556,
            ":": 278,
            ";": 278,
            "<": 584,
            "=": 584,
            ">": 584,
            "?": 556,
            "@": 1015,
            "A": 667,
            "B": 667,
            "C": 722,
            "D": 722,
            "E": 667,
            "F": 611,
            "G": 778,
            "H": 722,
            "I": 278,
            "J": 500,
            "K": 667,
            "L": 556,
            "M": 833,
            "N": 722,
            "O": 778,
            "P": 667,
            "Q": 778,
            "R": 722,
            "S": 667,
            "T": 611,
            "U": 722,
            "V": 667,
            "W": 944,
            "X": 667,
            "Y": 667,
            "Z": 611,
            "[": 278,
            "\\": 278,
            "]": 278,
            "^": 469,
            "_": 556,
            "\u2018": 222,
            "a": 556,
            "b": 556,
            "c": 500,
            "d": 556,
            "e": 556,
            "f": 278,
            "g": 556,
            "h": 556,
            "i": 222,
            "j": 222,
            "k": 500,
            "l": 222,
            "m": 833,
            "n": 556,
            "o": 556,
            "p": 556,
            "q": 556,
            "r": 333,
            "s": 500,
            "t": 278,
            "u": 556,
            "v": 500,
            "w": 722,
            "x": 500,
            "y": 500,
            "z": 500,
            "{": 334,
            "|": 260,
            "}": 334,
            "~": 584,
            "\xa1": 333,
            "\xa2": 556,
            "\xa3": 556,
            "\u2044": 167,
            "\xa5": 556,
            "\u0192": 556,
            "\xa7": 556,
            "\xa4": 556,
            "'": 191,
            "\u201c": 333,
            "\xab": 556,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 500,
            "\ufb02": 500,
            "\u2013": 556,
            "\u2020": 556,
            "\u2021": 556,
            "\xb7": 278,
            "\xb6": 537,
            "\u2022": 350,
            "\u201a": 222,
            "\u201e": 333,
            "\u201d": 333,
            "\xbb": 556,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 611,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 1000,
            "\xaa": 370,
            "\u0141": 556,
            "\xd8": 778,
            "\u0152": 1000,
            "\xba": 365,
            "\xe6": 889,
            "\u0131": 278,
            "\u0142": 222,
            "\xf8": 611,
            "\u0153": 944,
            "\xdf": 611,
            "\xcf": 278,
            "\xe9": 556,
            "\u0103": 556,
            "\u0171": 556,
            "\u011b": 556,
            "\u0178": 667,
            "\xf7": 584,
            "\xdd": 667,
            "\xc2": 667,
            "\xe1": 556,
            "\xdb": 722,
            "\xfd": 500,
            "\u0219": 500,
            "\xea": 556,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 556,
            "\xda": 722,
            "\u0173": 556,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 737,
            "\u0112": 667,
            "\u010d": 500,
            "\xe5": 556,
            "\u0145": 722,
            "\u013a": 222,
            "\xe0": 556,
            "\u0162": 611,
            "\u0106": 722,
            "\xe3": 556,
            "\u0116": 667,
            "\u0161": 500,
            "\u015f": 500,
            "\xed": 278,
            "\u25ca": 471,
            "\u0158": 722,
            "\u0122": 778,
            "\xfb": 556,
            "\xe2": 556,
            "\u0100": 667,
            "\u0159": 333,
            "\xe7": 500,
            "\u017b": 611,
            "\xde": 667,
            "\u014c": 778,
            "\u0154": 722,
            "\u015a": 667,
            "\u010f": 643,
            "\u016a": 722,
            "\u016f": 556,
            "\xb3": 333,
            "\xd2": 778,
            "\xc0": 667,
            "\u0102": 667,
            "\xd7": 584,
            "\xfa": 556,
            "\u0164": 611,
            "\u2202": 476,
            "\xff": 500,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 556,
            "\xeb": 556,
            "\u0107": 500,
            "\u0144": 556,
            "\u016b": 556,
            "\u0147": 722,
            "\xcd": 278,
            "\xb1": 584,
            "\xa6": 260,
            "\xae": 737,
            "\u011e": 778,
            "\u0130": 278,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 333,
            "\u014d": 556,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 722,
            "\u013c": 222,
            "\u0165": 317,
            "\u0119": 556,
            "\u0172": 722,
            "\xc1": 667,
            "\xc4": 667,
            "\xe8": 556,
            "\u017a": 500,
            "\u012f": 222,
            "\xd3": 778,
            "\xf3": 556,
            "\u0101": 556,
            "\u015b": 500,
            "\xef": 278,
            "\xd4": 778,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 556,
            "\xb2": 333,
            "\xd6": 778,
            "\xb5": 556,
            "\xec": 278,
            "\u0151": 556,
            "\u0118": 667,
            "\u0111": 556,
            "\xbe": 834,
            "\u015e": 667,
            "\u013e": 299,
            "\u0136": 667,
            "\u0139": 556,
            "\u2122": 1000,
            "\u0117": 556,
            "\xcc": 278,
            "\u012a": 278,
            "\u013d": 556,
            "\xbd": 834,
            "\u2264": 549,
            "\xf4": 556,
            "\xf1": 556,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 556,
            "\u011f": 556,
            "\xbc": 834,
            "\u0160": 667,
            "\u0218": 667,
            "\u0150": 778,
            "\xb0": 400,
            "\xf2": 556,
            "\u010c": 722,
            "\xf9": 556,
            "\u221a": 453,
            "\u010e": 722,
            "\u0157": 333,
            "\xd1": 722,
            "\xf5": 556,
            "\u0156": 722,
            "\u013b": 556,
            "\xc3": 667,
            "\u0104": 667,
            "\xc5": 667,
            "\xd5": 778,
            "\u017c": 500,
            "\u011a": 667,
            "\u012e": 278,
            "\u0137": 500,
            "\u2212": 584,
            "\xce": 278,
            "\u0148": 556,
            "\u0163": 278,
            "\xac": 584,
            "\xf6": 556,
            "\xfc": 556,
            "\u2260": 549,
            "\u0123": 556,
            "\xf0": 556,
            "\u017e": 500,
            "\u0146": 556,
            "\xb9": 333,
            "\u012b": 278,
            "\u20ac": 556,
        },
    ),
    # Generated from Symbol.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1997 Adobe Systems Incorporated. All rights reserved.
    "Symbol": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Symbol",
            family="Symbol",
            weight="Medium",
            ascent=0.0,
            descent=0.0,
            cap_height=0.0,
            x_height=0.0,
            italic_angle=0,
            flags=4,
            bbox=(-180.0, -293.0, 1090.0, 1010.0),
        ),
        character_widths={
            " ": 250,
            "default": 500,
            "!": 333,
            "\u2200": 713,
            "#": 500,
            "\u2203": 549,
            "%": 833,
            "&": 778,
            "\u220b": 439,
            "(": 333,
            ")": 333,
            "\u2217": 500,
            "+": 549,
            ",": 250,
            "\u2212": 549,
            ".": 250,
            "/": 278,
            "0": 500,
            "1": 500,
            "2": 500,
            "3": 500,
            "4": 500,
            "5": 500,
            "6": 500,
            "7": 500,
            "8": 500,
            "9": 500,
            ":": 278,
            ";": 278,
            "<": 549,
            "=": 549,
            ">": 549,
            "?": 444,
            "\u2245": 549,
            "\u0391": 722,
            "\u0392": 667,
            "\u03a7": 722,
            "\u2206": 612,
            "\u0395": 611,
            "\u03a6": 763,
            "\u0393": 603,
            "\u0397": 722,
            "\u0399": 333,
            "\u03d1": 631,
            "\u039a": 722,
            "\u039b": 686,
            "\u039c": 889,
            "\u039d": 722,
            "\u039f": 722,
            "\u03a0": 768,
            "\u0398": 741,
            "\u03a1": 556,
            "\u03a3": 592,
            "\u03a4": 611,
            "\u03a5": 690,
            "\u03c2": 439,
            "\u2126": 768,
            "\u039e": 645,
            "\u03a8": 795,
            "\u0396": 611,
            "[": 333,
            "\u2234": 863,
            "]": 333,
            "\u22a5": 658,
            "_": 500,
            "\uf8e5": 500,
            "\u03b1": 631,
            "\u03b2": 549,
            "\u03c7": 549,
            "\u03b4": 494,
            "\u03b5": 439,
            "\u03c6": 521,
            "\u03b3": 411,
            "\u03b7": 603,
            "\u03b9": 329,
            "\u03d5": 603,
            "\u03ba": 549,
            "\u03bb": 549,
            "\xb5": 576,
            "\u03bd": 521,
            "\u03bf": 549,
            "\u03c0": 549,
            "\u03b8": 521,
            "\u03c1": 549,
            "\u03c3": 603,
            "\u03c4": 439,
            "\u03c5": 576,
            "\u03d6": 713,
            "\u03c9": 686,
            "\u03be": 493,
            "\u03c8": 686,
            "\u03b6": 494,
            "{": 480,
            "|": 200,
            "}": 480,
            "\u223c": 549,
            "\u20ac": 750,
            "\u03d2": 620,
            "\u2032": 247,
            "\u2264": 549,
            "\u2044": 167,
            "\u221e": 713,
            "\u0192": 500,
            "\u2663": 753,
            "\u2666": 753,
            "\u2665": 753,
            "\u2660": 753,
            "\u2194": 1042,
            "\u2190": 987,
            "\u2191": 603,
            "\u2192": 987,
            "\u2193": 603,
            "\xb0": 400,
            "\xb1": 549,
            "\u2033": 411,
            "\u2265": 549,
            "\xd7": 549,
            "\u221d": 713,
            "\u2202": 494,
            "\u2022": 460,
            "\xf7": 549,
            "\u2260": 549,
            "\u2261": 549,
            "\u2248": 549,
            "\u2026": 1000,
            "\uf8e6": 603,
            "\uf8e7": 1000,
            "\u21b5": 658,
            "\u2135": 823,
            "\u2111": 686,
            "\u211c": 795,
            "\u2118": 987,
            "\u2297": 768,
            "\u2295": 768,
            "\u2205": 823,
            "\u2229": 768,
            "\u222a": 768,
            "\u2283": 713,
            "\u2287": 713,
            "\u2284": 713,
            "\u2282": 713,
            "\u2286": 713,
            "\u2208": 713,
            "\u2209": 713,
            "\u2220": 768,
            "\u2207": 713,
            "\uf6da": 790,
            "\uf6d9": 790,
            "\uf6db": 890,
            "\u220f": 823,
            "\u221a": 549,
            "\u22c5": 250,
            "\xac": 713,
            "\u2227": 603,
            "\u2228": 603,
            "\u21d4": 1042,
            "\u21d0": 987,
            "\u21d1": 603,
            "\u21d2": 987,
            "\u21d3": 603,
            "\u25ca": 494,
            "\u2329": 329,
            "\uf8e8": 790,
            "\uf8e9": 790,
            "\uf8ea": 786,
            "\u2211": 713,
            "\uf8eb": 384,
            "\uf8ec": 384,
            "\uf8ed": 384,
            "\uf8ee": 384,
            "\uf8ef": 384,
            "\uf8f0": 384,
            "\uf8f1": 494,
            "\uf8f2": 494,
            "\uf8f3": 494,
            "\uf8f4": 494,
            "\u232a": 329,
            "\u222b": 274,
            "\u2320": 686,
            "\uf8f5": 686,
            "\u2321": 686,
            "\uf8f6": 384,
            "\uf8f7": 384,
            "\uf8f8": 384,
            "\uf8f9": 384,
            "\uf8fa": 384,
            "\uf8fb": 384,
            "\uf8fc": 494,
            "\uf8fd": 494,
            "\uf8fe": 494,
            "\uf8ff": 790,
        },
    ),
    # Generated from Times-Bold.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.  Times is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Times-Bold": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Times-Bold",
            family="Times",
            weight="Bold",
            ascent=683,
            descent=-217,
            cap_height=676,
            x_height=461,
            italic_angle=0,
            flags=34,
            bbox=(-168.0, -218.0, 1000.0, 935.0),
        ),
        character_widths={
            " ": 250,
            "default": 500,
            "!": 333,
            '"': 555,
            "#": 500,
            "$": 500,
            "%": 1000,
            "&": 833,
            "\u2019": 333,
            "(": 333,
            ")": 333,
            "*": 500,
            "+": 570,
            ",": 250,
            "-": 333,
            ".": 250,
            "/": 278,
            "0": 500,
            "1": 500,
            "2": 500,
            "3": 500,
            "4": 500,
            "5": 500,
            "6": 500,
            "7": 500,
            "8": 500,
            "9": 500,
            ":": 333,
            ";": 333,
            "<": 570,
            "=": 570,
            ">": 570,
            "?": 500,
            "@": 930,
            "A": 722,
            "B": 667,
            "C": 722,
            "D": 722,
            "E": 667,
            "F": 611,
            "G": 778,
            "H": 778,
            "I": 389,
            "J": 500,
            "K": 778,
            "L": 667,
            "M": 944,
            "N": 722,
            "O": 778,
            "P": 611,
            "Q": 778,
            "R": 722,
            "S": 556,
            "T": 667,
            "U": 722,
            "V": 722,
            "W": 1000,
            "X": 722,
            "Y": 722,
            "Z": 667,
            "[": 333,
            "\\": 278,
            "]": 333,
            "^": 581,
            "_": 500,
            "\u2018": 333,
            "a": 500,
            "b": 556,
            "c": 444,
            "d": 556,
            "e": 444,
            "f": 333,
            "g": 500,
            "h": 556,
            "i": 278,
            "j": 333,
            "k": 556,
            "l": 278,
            "m": 833,
            "n": 556,
            "o": 500,
            "p": 556,
            "q": 556,
            "r": 444,
            "s": 389,
            "t": 333,
            "u": 556,
            "v": 500,
            "w": 722,
            "x": 500,
            "y": 500,
            "z": 444,
            "{": 394,
            "|": 220,
            "}": 394,
            "~": 520,
            "\xa1": 333,
            "\xa2": 500,
            "\xa3": 500,
            "\u2044": 167,
            "\xa5": 500,
            "\u0192": 500,
            "\xa7": 500,
            "\xa4": 500,
            "'": 278,
            "\u201c": 500,
            "\xab": 500,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 556,
            "\ufb02": 556,
            "\u2013": 500,
            "\u2020": 500,
            "\u2021": 500,
            "\xb7": 250,
            "\xb6": 540,
            "\u2022": 350,
            "\u201a": 333,
            "\u201e": 500,
            "\u201d": 500,
            "\xbb": 500,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 500,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 1000,
            "\xaa": 300,
            "\u0141": 667,
            "\xd8": 778,
            "\u0152": 1000,
            "\xba": 330,
            "\xe6": 722,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 500,
            "\u0153": 722,
            "\xdf": 556,
            "\xcf": 389,
            "\xe9": 444,
            "\u0103": 500,
            "\u0171": 556,
            "\u011b": 444,
            "\u0178": 722,
            "\xf7": 570,
            "\xdd": 722,
            "\xc2": 722,
            "\xe1": 500,
            "\xdb": 722,
            "\xfd": 500,
            "\u0219": 389,
            "\xea": 444,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 500,
            "\xda": 722,
            "\u0173": 556,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 747,
            "\u0112": 667,
            "\u010d": 444,
            "\xe5": 500,
            "\u0145": 722,
            "\u013a": 278,
            "\xe0": 500,
            "\u0162": 667,
            "\u0106": 722,
            "\xe3": 500,
            "\u0116": 667,
            "\u0161": 389,
            "\u015f": 389,
            "\xed": 278,
            "\u25ca": 494,
            "\u0158": 722,
            "\u0122": 778,
            "\xfb": 556,
            "\xe2": 500,
            "\u0100": 722,
            "\u0159": 444,
            "\xe7": 444,
            "\u017b": 667,
            "\xde": 611,
            "\u014c": 778,
            "\u0154": 722,
            "\u015a": 556,
            "\u010f": 672,
            "\u016a": 722,
            "\u016f": 556,
            "\xb3": 300,
            "\xd2": 778,
            "\xc0": 722,
            "\u0102": 722,
            "\xd7": 570,
            "\xfa": 556,
            "\u0164": 667,
            "\u2202": 494,
            "\xff": 500,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 500,
            "\xeb": 444,
            "\u0107": 444,
            "\u0144": 556,
            "\u016b": 556,
            "\u0147": 722,
            "\xcd": 389,
            "\xb1": 570,
            "\xa6": 220,
            "\xae": 747,
            "\u011e": 778,
            "\u0130": 389,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 444,
            "\u014d": 500,
            "\u0179": 667,
            "\u017d": 667,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 722,
            "\u013c": 278,
            "\u0165": 416,
            "\u0119": 444,
            "\u0172": 722,
            "\xc1": 722,
            "\xc4": 722,
            "\xe8": 444,
            "\u017a": 444,
            "\u012f": 278,
            "\xd3": 778,
            "\xf3": 500,
            "\u0101": 500,
            "\u015b": 389,
            "\xef": 278,
            "\xd4": 778,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 556,
            "\xb2": 300,
            "\xd6": 778,
            "\xb5": 556,
            "\xec": 278,
            "\u0151": 500,
            "\u0118": 667,
            "\u0111": 556,
            "\xbe": 750,
            "\u015e": 556,
            "\u013e": 394,
            "\u0136": 778,
            "\u0139": 667,
            "\u2122": 1000,
            "\u0117": 444,
            "\xcc": 389,
            "\u012a": 389,
            "\u013d": 667,
            "\xbd": 750,
            "\u2264": 549,
            "\xf4": 500,
            "\xf1": 556,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 444,
            "\u011f": 500,
            "\xbc": 750,
            "\u0160": 556,
            "\u0218": 556,
            "\u0150": 778,
            "\xb0": 400,
            "\xf2": 500,
            "\u010c": 722,
            "\xf9": 556,
            "\u221a": 549,
            "\u010e": 722,
            "\u0157": 444,
            "\xd1": 722,
            "\xf5": 500,
            "\u0156": 722,
            "\u013b": 667,
            "\xc3": 722,
            "\u0104": 722,
            "\xc5": 722,
            "\xd5": 778,
            "\u017c": 444,
            "\u011a": 667,
            "\u012e": 389,
            "\u0137": 556,
            "\u2212": 570,
            "\xce": 389,
            "\u0148": 556,
            "\u0163": 333,
            "\xac": 570,
            "\xf6": 500,
            "\xfc": 556,
            "\u2260": 549,
            "\u0123": 500,
            "\xf0": 500,
            "\u017e": 444,
            "\u0146": 556,
            "\xb9": 300,
            "\u012b": 278,
            "\u20ac": 500,
        },
    ),
    # Generated from Times-BoldItalic.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.  Times is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Times-BoldItalic": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Times-BoldItalic",
            family="Times",
            weight="Bold",
            ascent=683,
            descent=-217,
            cap_height=669,
            x_height=462,
            italic_angle=-15,
            flags=98,
            bbox=(-200.0, -218.0, 996.0, 921.0),
        ),
        character_widths={
            " ": 250,
            "default": 500,
            "!": 389,
            '"': 555,
            "#": 500,
            "$": 500,
            "%": 833,
            "&": 778,
            "\u2019": 333,
            "(": 333,
            ")": 333,
            "*": 500,
            "+": 570,
            ",": 250,
            "-": 333,
            ".": 250,
            "/": 278,
            "0": 500,
            "1": 500,
            "2": 500,
            "3": 500,
            "4": 500,
            "5": 500,
            "6": 500,
            "7": 500,
            "8": 500,
            "9": 500,
            ":": 333,
            ";": 333,
            "<": 570,
            "=": 570,
            ">": 570,
            "?": 500,
            "@": 832,
            "A": 667,
            "B": 667,
            "C": 667,
            "D": 722,
            "E": 667,
            "F": 667,
            "G": 722,
            "H": 778,
            "I": 389,
            "J": 500,
            "K": 667,
            "L": 611,
            "M": 889,
            "N": 722,
            "O": 722,
            "P": 611,
            "Q": 722,
            "R": 667,
            "S": 556,
            "T": 611,
            "U": 722,
            "V": 667,
            "W": 889,
            "X": 667,
            "Y": 611,
            "Z": 611,
            "[": 333,
            "\\": 278,
            "]": 333,
            "^": 570,
            "_": 500,
            "\u2018": 333,
            "a": 500,
            "b": 500,
            "c": 444,
            "d": 500,
            "e": 444,
            "f": 333,
            "g": 500,
            "h": 556,
            "i": 278,
            "j": 278,
            "k": 500,
            "l": 278,
            "m": 778,
            "n": 556,
            "o": 500,
            "p": 500,
            "q": 500,
            "r": 389,
            "s": 389,
            "t": 278,
            "u": 556,
            "v": 444,
            "w": 667,
            "x": 500,
            "y": 444,
            "z": 389,
            "{": 348,
            "|": 220,
            "}": 348,
            "~": 570,
            "\xa1": 389,
            "\xa2": 500,
            "\xa3": 500,
            "\u2044": 167,
            "\xa5": 500,
            "\u0192": 500,
            "\xa7": 500,
            "\xa4": 500,
            "'": 278,
            "\u201c": 500,
            "\xab": 500,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 556,
            "\ufb02": 556,
            "\u2013": 500,
            "\u2020": 500,
            "\u2021": 500,
            "\xb7": 250,
            "\xb6": 500,
            "\u2022": 350,
            "\u201a": 333,
            "\u201e": 500,
            "\u201d": 500,
            "\xbb": 500,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 500,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 944,
            "\xaa": 266,
            "\u0141": 611,
            "\xd8": 722,
            "\u0152": 944,
            "\xba": 300,
            "\xe6": 722,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 500,
            "\u0153": 722,
            "\xdf": 500,
            "\xcf": 389,
            "\xe9": 444,
            "\u0103": 500,
            "\u0171": 556,
            "\u011b": 444,
            "\u0178": 611,
            "\xf7": 570,
            "\xdd": 611,
            "\xc2": 667,
            "\xe1": 500,
            "\xdb": 722,
            "\xfd": 444,
            "\u0219": 389,
            "\xea": 444,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 500,
            "\xda": 722,
            "\u0173": 556,
            "\xcb": 667,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 747,
            "\u0112": 667,
            "\u010d": 444,
            "\xe5": 500,
            "\u0145": 722,
            "\u013a": 278,
            "\xe0": 500,
            "\u0162": 611,
            "\u0106": 667,
            "\xe3": 500,
            "\u0116": 667,
            "\u0161": 389,
            "\u015f": 389,
            "\xed": 278,
            "\u25ca": 494,
            "\u0158": 667,
            "\u0122": 722,
            "\xfb": 556,
            "\xe2": 500,
            "\u0100": 667,
            "\u0159": 389,
            "\xe7": 444,
            "\u017b": 611,
            "\xde": 611,
            "\u014c": 722,
            "\u0154": 667,
            "\u015a": 556,
            "\u010f": 608,
            "\u016a": 722,
            "\u016f": 556,
            "\xb3": 300,
            "\xd2": 722,
            "\xc0": 667,
            "\u0102": 667,
            "\xd7": 570,
            "\xfa": 556,
            "\u0164": 611,
            "\u2202": 494,
            "\xff": 444,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 667,
            "\xe4": 500,
            "\xeb": 444,
            "\u0107": 444,
            "\u0144": 556,
            "\u016b": 556,
            "\u0147": 722,
            "\xcd": 389,
            "\xb1": 570,
            "\xa6": 220,
            "\xae": 747,
            "\u011e": 722,
            "\u0130": 389,
            "\u2211": 600,
            "\xc8": 667,
            "\u0155": 389,
            "\u014d": 500,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 667,
            "\u013c": 278,
            "\u0165": 366,
            "\u0119": 444,
            "\u0172": 722,
            "\xc1": 667,
            "\xc4": 667,
            "\xe8": 444,
            "\u017a": 389,
            "\u012f": 278,
            "\xd3": 722,
            "\xf3": 500,
            "\u0101": 500,
            "\u015b": 389,
            "\xef": 278,
            "\xd4": 722,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 500,
            "\xb2": 300,
            "\xd6": 722,
            "\xb5": 576,
            "\xec": 278,
            "\u0151": 500,
            "\u0118": 667,
            "\u0111": 500,
            "\xbe": 750,
            "\u015e": 556,
            "\u013e": 382,
            "\u0136": 667,
            "\u0139": 611,
            "\u2122": 1000,
            "\u0117": 444,
            "\xcc": 389,
            "\u012a": 389,
            "\u013d": 611,
            "\xbd": 750,
            "\u2264": 549,
            "\xf4": 500,
            "\xf1": 556,
            "\u0170": 722,
            "\xc9": 667,
            "\u0113": 444,
            "\u011f": 500,
            "\xbc": 750,
            "\u0160": 556,
            "\u0218": 556,
            "\u0150": 722,
            "\xb0": 400,
            "\xf2": 500,
            "\u010c": 667,
            "\xf9": 556,
            "\u221a": 549,
            "\u010e": 722,
            "\u0157": 389,
            "\xd1": 722,
            "\xf5": 500,
            "\u0156": 667,
            "\u013b": 611,
            "\xc3": 667,
            "\u0104": 667,
            "\xc5": 667,
            "\xd5": 722,
            "\u017c": 389,
            "\u011a": 667,
            "\u012e": 389,
            "\u0137": 500,
            "\u2212": 606,
            "\xce": 389,
            "\u0148": 556,
            "\u0163": 278,
            "\xac": 606,
            "\xf6": 500,
            "\xfc": 556,
            "\u2260": 549,
            "\u0123": 500,
            "\xf0": 500,
            "\u017e": 389,
            "\u0146": 556,
            "\xb9": 300,
            "\u012b": 278,
            "\u20ac": 500,
        },
    ),
    # Generated from Times-Italic.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.  Times is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Times-Italic": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Times-Italic",
            family="Times",
            weight="Medium",
            ascent=683,
            descent=-217,
            cap_height=653,
            x_height=441,
            italic_angle=-15.5,
            flags=98,
            bbox=(-169.0, -217.0, 1010.0, 883.0),
        ),
        character_widths={
            " ": 250,
            "default": 500,
            "!": 333,
            '"': 420,
            "#": 500,
            "$": 500,
            "%": 833,
            "&": 778,
            "\u2019": 333,
            "(": 333,
            ")": 333,
            "*": 500,
            "+": 675,
            ",": 250,
            "-": 333,
            ".": 250,
            "/": 278,
            "0": 500,
            "1": 500,
            "2": 500,
            "3": 500,
            "4": 500,
            "5": 500,
            "6": 500,
            "7": 500,
            "8": 500,
            "9": 500,
            ":": 333,
            ";": 333,
            "<": 675,
            "=": 675,
            ">": 675,
            "?": 500,
            "@": 920,
            "A": 611,
            "B": 611,
            "C": 667,
            "D": 722,
            "E": 611,
            "F": 611,
            "G": 722,
            "H": 722,
            "I": 333,
            "J": 444,
            "K": 667,
            "L": 556,
            "M": 833,
            "N": 667,
            "O": 722,
            "P": 611,
            "Q": 722,
            "R": 611,
            "S": 500,
            "T": 556,
            "U": 722,
            "V": 611,
            "W": 833,
            "X": 611,
            "Y": 556,
            "Z": 556,
            "[": 389,
            "\\": 278,
            "]": 389,
            "^": 422,
            "_": 500,
            "\u2018": 333,
            "a": 500,
            "b": 500,
            "c": 444,
            "d": 500,
            "e": 444,
            "f": 278,
            "g": 500,
            "h": 500,
            "i": 278,
            "j": 278,
            "k": 444,
            "l": 278,
            "m": 722,
            "n": 500,
            "o": 500,
            "p": 500,
            "q": 500,
            "r": 389,
            "s": 389,
            "t": 278,
            "u": 500,
            "v": 444,
            "w": 667,
            "x": 444,
            "y": 444,
            "z": 389,
            "{": 400,
            "|": 275,
            "}": 400,
            "~": 541,
            "\xa1": 389,
            "\xa2": 500,
            "\xa3": 500,
            "\u2044": 167,
            "\xa5": 500,
            "\u0192": 500,
            "\xa7": 500,
            "\xa4": 500,
            "'": 214,
            "\u201c": 556,
            "\xab": 500,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 500,
            "\ufb02": 500,
            "\u2013": 500,
            "\u2020": 500,
            "\u2021": 500,
            "\xb7": 250,
            "\xb6": 523,
            "\u2022": 350,
            "\u201a": 333,
            "\u201e": 556,
            "\u201d": 556,
            "\xbb": 500,
            "\u2026": 889,
            "\u2030": 1000,
            "\xbf": 500,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 889,
            "\xc6": 889,
            "\xaa": 276,
            "\u0141": 556,
            "\xd8": 722,
            "\u0152": 944,
            "\xba": 310,
            "\xe6": 667,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 500,
            "\u0153": 667,
            "\xdf": 500,
            "\xcf": 333,
            "\xe9": 444,
            "\u0103": 500,
            "\u0171": 500,
            "\u011b": 444,
            "\u0178": 556,
            "\xf7": 675,
            "\xdd": 556,
            "\xc2": 611,
            "\xe1": 500,
            "\xdb": 722,
            "\xfd": 444,
            "\u0219": 389,
            "\xea": 444,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 500,
            "\xda": 722,
            "\u0173": 500,
            "\xcb": 611,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 760,
            "\u0112": 611,
            "\u010d": 444,
            "\xe5": 500,
            "\u0145": 667,
            "\u013a": 278,
            "\xe0": 500,
            "\u0162": 556,
            "\u0106": 667,
            "\xe3": 500,
            "\u0116": 611,
            "\u0161": 389,
            "\u015f": 389,
            "\xed": 278,
            "\u25ca": 471,
            "\u0158": 611,
            "\u0122": 722,
            "\xfb": 500,
            "\xe2": 500,
            "\u0100": 611,
            "\u0159": 389,
            "\xe7": 444,
            "\u017b": 556,
            "\xde": 611,
            "\u014c": 722,
            "\u0154": 611,
            "\u015a": 500,
            "\u010f": 544,
            "\u016a": 722,
            "\u016f": 500,
            "\xb3": 300,
            "\xd2": 722,
            "\xc0": 611,
            "\u0102": 611,
            "\xd7": 675,
            "\xfa": 500,
            "\u0164": 556,
            "\u2202": 476,
            "\xff": 444,
            "\u0143": 667,
            "\xee": 278,
            "\xca": 611,
            "\xe4": 500,
            "\xeb": 444,
            "\u0107": 444,
            "\u0144": 500,
            "\u016b": 500,
            "\u0147": 667,
            "\xcd": 333,
            "\xb1": 675,
            "\xa6": 275,
            "\xae": 760,
            "\u011e": 722,
            "\u0130": 333,
            "\u2211": 600,
            "\xc8": 611,
            "\u0155": 389,
            "\u014d": 500,
            "\u0179": 556,
            "\u017d": 556,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 667,
            "\u013c": 278,
            "\u0165": 300,
            "\u0119": 444,
            "\u0172": 722,
            "\xc1": 611,
            "\xc4": 611,
            "\xe8": 444,
            "\u017a": 389,
            "\u012f": 278,
            "\xd3": 722,
            "\xf3": 500,
            "\u0101": 500,
            "\u015b": 389,
            "\xef": 278,
            "\xd4": 722,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 500,
            "\xb2": 300,
            "\xd6": 722,
            "\xb5": 500,
            "\xec": 278,
            "\u0151": 500,
            "\u0118": 611,
            "\u0111": 500,
            "\xbe": 750,
            "\u015e": 500,
            "\u013e": 300,
            "\u0136": 667,
            "\u0139": 556,
            "\u2122": 980,
            "\u0117": 444,
            "\xcc": 333,
            "\u012a": 333,
            "\u013d": 611,
            "\xbd": 750,
            "\u2264": 549,
            "\xf4": 500,
            "\xf1": 500,
            "\u0170": 722,
            "\xc9": 611,
            "\u0113": 444,
            "\u011f": 500,
            "\xbc": 750,
            "\u0160": 500,
            "\u0218": 500,
            "\u0150": 722,
            "\xb0": 400,
            "\xf2": 500,
            "\u010c": 667,
            "\xf9": 500,
            "\u221a": 453,
            "\u010e": 722,
            "\u0157": 389,
            "\xd1": 667,
            "\xf5": 500,
            "\u0156": 611,
            "\u013b": 556,
            "\xc3": 611,
            "\u0104": 611,
            "\xc5": 611,
            "\xd5": 722,
            "\u017c": 389,
            "\u011a": 611,
            "\u012e": 333,
            "\u0137": 444,
            "\u2212": 675,
            "\xce": 333,
            "\u0148": 500,
            "\u0163": 278,
            "\xac": 675,
            "\xf6": 500,
            "\xfc": 500,
            "\u2260": 549,
            "\u0123": 500,
            "\xf0": 500,
            "\u017e": 389,
            "\u0146": 500,
            "\xb9": 300,
            "\u012b": 278,
            "\u20ac": 500,
        },
    ),
    # Generated from Times-Roman.afm
    # Copyright (c) 1985, 1987, 1989, 1990, 1993, 1997 Adobe Systems Incorporated.  All Rights
    # Reserved.  Times is a trademark of Linotype-Hell AG and/or its subsidiaries.
    "Times-Roman": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="Times-Roman",
            family="Times",
            weight="Roman",
            ascent=683,
            descent=-217,
            cap_height=662,
            x_height=450,
            italic_angle=0,
            flags=34,
            bbox=(-168.0, -218.0, 1000.0, 898.0),
        ),
        character_widths={
            " ": 250,
            "default": 500,
            "!": 333,
            '"': 408,
            "#": 500,
            "$": 500,
            "%": 833,
            "&": 778,
            "\u2019": 333,
            "(": 333,
            ")": 333,
            "*": 500,
            "+": 564,
            ",": 250,
            "-": 333,
            ".": 250,
            "/": 278,
            "0": 500,
            "1": 500,
            "2": 500,
            "3": 500,
            "4": 500,
            "5": 500,
            "6": 500,
            "7": 500,
            "8": 500,
            "9": 500,
            ":": 278,
            ";": 278,
            "<": 564,
            "=": 564,
            ">": 564,
            "?": 444,
            "@": 921,
            "A": 722,
            "B": 667,
            "C": 667,
            "D": 722,
            "E": 611,
            "F": 556,
            "G": 722,
            "H": 722,
            "I": 333,
            "J": 389,
            "K": 722,
            "L": 611,
            "M": 889,
            "N": 722,
            "O": 722,
            "P": 556,
            "Q": 722,
            "R": 667,
            "S": 556,
            "T": 611,
            "U": 722,
            "V": 722,
            "W": 944,
            "X": 722,
            "Y": 722,
            "Z": 611,
            "[": 333,
            "\\": 278,
            "]": 333,
            "^": 469,
            "_": 500,
            "\u2018": 333,
            "a": 444,
            "b": 500,
            "c": 444,
            "d": 500,
            "e": 444,
            "f": 333,
            "g": 500,
            "h": 500,
            "i": 278,
            "j": 278,
            "k": 500,
            "l": 278,
            "m": 778,
            "n": 500,
            "o": 500,
            "p": 500,
            "q": 500,
            "r": 333,
            "s": 389,
            "t": 278,
            "u": 500,
            "v": 500,
            "w": 722,
            "x": 500,
            "y": 500,
            "z": 444,
            "{": 480,
            "|": 200,
            "}": 480,
            "~": 541,
            "\xa1": 333,
            "\xa2": 500,
            "\xa3": 500,
            "\u2044": 167,
            "\xa5": 500,
            "\u0192": 500,
            "\xa7": 500,
            "\xa4": 500,
            "'": 180,
            "\u201c": 444,
            "\xab": 500,
            "\u2039": 333,
            "\u203a": 333,
            "\ufb01": 556,
            "\ufb02": 556,
            "\u2013": 500,
            "\u2020": 500,
            "\u2021": 500,
            "\xb7": 250,
            "\xb6": 453,
            "\u2022": 350,
            "\u201a": 333,
            "\u201e": 444,
            "\u201d": 444,
            "\xbb": 500,
            "\u2026": 1000,
            "\u2030": 1000,
            "\xbf": 444,
            "`": 333,
            "\xb4": 333,
            "\u02c6": 333,
            "\u02dc": 333,
            "\xaf": 333,
            "\u02d8": 333,
            "\u02d9": 333,
            "\xa8": 333,
            "\u02da": 333,
            "\xb8": 333,
            "\u02dd": 333,
            "\u02db": 333,
            "\u02c7": 333,
            "\u2014": 1000,
            "\xc6": 889,
            "\xaa": 276,
            "\u0141": 611,
            "\xd8": 722,
            "\u0152": 889,
            "\xba": 310,
            "\xe6": 667,
            "\u0131": 278,
            "\u0142": 278,
            "\xf8": 500,
            "\u0153": 722,
            "\xdf": 500,
            "\xcf": 333,
            "\xe9": 444,
            "\u0103": 444,
            "\u0171": 500,
            "\u011b": 444,
            "\u0178": 722,
            "\xf7": 564,
            "\xdd": 722,
            "\xc2": 722,
            "\xe1": 444,
            "\xdb": 722,
            "\xfd": 500,
            "\u0219": 389,
            "\xea": 444,
            "\u016e": 722,
            "\xdc": 722,
            "\u0105": 444,
            "\xda": 722,
            "\u0173": 500,
            "\xcb": 611,
            "\u0110": 722,
            "\uf6c3": 250,
            "\xa9": 760,
            "\u0112": 611,
            "\u010d": 444,
            "\xe5": 444,
            "\u0145": 722,
            "\u013a": 278,
            "\xe0": 444,
            "\u0162": 611,
            "\u0106": 667,
            "\xe3": 444,
            "\u0116": 611,
            "\u0161": 389,
            "\u015f": 389,
            "\xed": 278,
            "\u25ca": 471,
            "\u0158": 667,
            "\u0122": 722,
            "\xfb": 500,
            "\xe2": 444,
            "\u0100": 722,
            "\u0159": 333,
            "\xe7": 444,
            "\u017b": 611,
            "\xde": 556,
            "\u014c": 722,
            "\u0154": 667,
            "\u015a": 556,
            "\u010f": 588,
            "\u016a": 722,
            "\u016f": 500,
            "\xb3": 300,
            "\xd2": 722,
            "\xc0": 722,
            "\u0102": 722,
            "\xd7": 564,
            "\xfa": 500,
            "\u0164": 611,
            "\u2202": 476,
            "\xff": 500,
            "\u0143": 722,
            "\xee": 278,
            "\xca": 611,
            "\xe4": 444,
            "\xeb": 444,
            "\u0107": 444,
            "\u0144": 500,
            "\u016b": 500,
            "\u0147": 722,
            "\xcd": 333,
            "\xb1": 564,
            "\xa6": 200,
            "\xae": 760,
            "\u011e": 722,
            "\u0130": 333,
            "\u2211": 600,
            "\xc8": 611,
            "\u0155": 333,
            "\u014d": 500,
            "\u0179": 611,
            "\u017d": 611,
            "\u2265": 549,
            "\xd0": 722,
            "\xc7": 667,
            "\u013c": 278,
            "\u0165": 326,
            "\u0119": 444,
            "\u0172": 722,
            "\xc1": 722,
            "\xc4": 722,
            "\xe8": 444,
            "\u017a": 444,
            "\u012f": 278,
            "\xd3": 722,
            "\xf3": 500,
            "\u0101": 444,
            "\u015b": 389,
            "\xef": 278,
            "\xd4": 722,
            "\xd9": 722,
            "\u2206": 612,
            "\xfe": 500,
            "\xb2": 300,
            "\xd6": 722,
            "\xb5": 500,
            "\xec": 278,
            "\u0151": 500,
            "\u0118": 611,
            "\u0111": 500,
            "\xbe": 750,
            "\u015e": 556,
            "\u013e": 344,
            "\u0136": 722,
            "\u0139": 611,
            "\u2122": 980,
            "\u0117": 444,
            "\xcc": 333,
            "\u012a": 333,
            "\u013d": 611,
            "\xbd": 750,
            "\u2264": 549,
            "\xf4": 500,
            "\xf1": 500,
            "\u0170": 722,
            "\xc9": 611,
            "\u0113": 444,
            "\u011f": 500,
            "\xbc": 750,
            "\u0160": 556,
            "\u0218": 556,
            "\u0150": 722,
            "\xb0": 400,
            "\xf2": 500,
            "\u010c": 667,
            "\xf9": 500,
            "\u221a": 453,
            "\u010e": 722,
            "\u0157": 333,
            "\xd1": 722,
            "\xf5": 500,
            "\u0156": 667,
            "\u013b": 611,
            "\xc3": 722,
            "\u0104": 722,
            "\xc5": 722,
            "\xd5": 722,
            "\u017c": 444,
            "\u011a": 611,
            "\u012e": 333,
            "\u0137": 500,
            "\u2212": 564,
            "\xce": 333,
            "\u0148": 500,
            "\u0163": 278,
            "\xac": 564,
            "\xf6": 500,
            "\xfc": 500,
            "\u2260": 549,
            "\u0123": 500,
            "\xf0": 500,
            "\u017e": 444,
            "\u0146": 500,
            "\xb9": 300,
            "\u012b": 278,
            "\u20ac": 500,
        },
    ),
    # Generated from ZapfDingbats.afm
    # Copyright (c) 1985, 1987, 1988, 1989, 1997 Adobe Systems Incorporated. All Rights Reserved.
    # ITC Zapf Dingbats is a registered trademark of International Typeface Corporation.
    "ZapfDingbats": CoreFontMetrics(
        font_descriptor=FontDescriptor(
            name="ZapfDingbats",
            family="ZapfDingbats",
            weight="Medium",
            ascent=0.0,
            descent=0.0,
            cap_height=0.0,
            x_height=0.0,
            italic_angle=0,
            flags=4,
            bbox=(-1.0, -143.0, 981.0, 820.0),
        ),
        character_widths={
            " ": 790,
            "default": 1580,
            "\x01": 974,
            "\x02": 961,
            "\xca": 974,
            "\x03": 980,
            "\x04": 719,
            "\x05": 789,
            "w": 790,
            "v": 791,
            "u": 690,
            "\x0b": 960,
            "\x0c": 939,
            "\r": 549,
            "\x0e": 855,
            "\x0f": 911,
            "\x10": 933,
            "i": 911,
            "\x11": 945,
            "\x12": 974,
            "\x13": 755,
            "\x14": 846,
            "\x15": 762,
            "\x16": 761,
            "\x17": 571,
            "\x18": 677,
            "\x19": 763,
            "\x1a": 760,
            "\x1b": 759,
            "\x1c": 754,
            "\x06": 494,
            "\x07": 552,
            "\x08": 537,
            "\t": 577,
            "\n": 692,
            "\x1d": 786,
            "\x1e": 788,
            "\x1f": 788,
            "!": 793,
            '"': 794,
            "#": 816,
            "$": 823,
            "%": 789,
            "&": 841,
            "'": 823,
            "(": 833,
            ")": 816,
            "*": 831,
            "+": 923,
            ",": 744,
            "-": 723,
            ".": 749,
            "/": 790,
            "0": 792,
            "1": 695,
            "2": 776,
            "3": 768,
            "4": 792,
            "5": 759,
            "6": 707,
            "7": 708,
            "8": 682,
            "9": 701,
            ":": 826,
            ";": 815,
            "<": 789,
            "=": 789,
            ">": 707,
            "?": 687,
            "@": 696,
            "A": 689,
            "B": 786,
            "C": 787,
            "D": 713,
            "E": 791,
            "F": 785,
            "G": 791,
            "H": 873,
            "I": 761,
            "J": 762,
            "\xcb": 762,
            "K": 759,
            "\xcc": 759,
            "L": 892,
            "M": 892,
            "N": 788,
            "O": 784,
            "Q": 438,
            "R": 138,
            "S": 277,
            "T": 415,
            "a": 392,
            "b": 392,
            "c": 668,
            "d": 668,
            "Y": 390,
            "Z": 390,
            "]": 317,
            "^": 317,
            "[": 276,
            "\\": 276,
            "\xcd": 509,
            "U": 509,
            "\xce": 410,
            "V": 410,
            "W": 234,
            "X": 234,
            "_": 334,
            "`": 334,
            "e": 732,
            "f": 544,
            "g": 544,
            "h": 910,
            "j": 667,
            "k": 760,
            "l": 760,
            "p": 776,
            "o": 595,
            "n": 694,
            "m": 626,
            "x": 788,
            "y": 788,
            "z": 788,
            "{": 788,
            "|": 788,
            "}": 788,
            "~": 788,
            "\x7f": 788,
            "\x80": 788,
            "\x81": 788,
            "\x82": 788,
            "\x83": 788,
            "\x84": 788,
            "\x85": 788,
            "\x86": 788,
            "\x87": 788,
            "\x88": 788,
            "\x89": 788,
            "\x8a": 788,
            "\x8b": 788,
            "\x8c": 788,
            "\x8d": 788,
            "\x8e": 788,
            "\x8f": 788,
            "\x90": 788,
            "\x91": 788,
            "\x92": 788,
            "\x93": 788,
            "\x94": 788,
            "\x95": 788,
            "\x96": 788,
            "\x97": 788,
            "\x98": 788,
            "\x99": 788,
            "\x9a": 788,
            "\x9b": 788,
            "\x9c": 788,
            "\x9d": 788,
            "\x9e": 788,
            "\x9f": 788,
            "\xa0": 894,
            "\xa1": 838,
            "\xa3": 1016,
            "\xa4": 458,
            "\xc4": 748,
            "\xa5": 924,
            "\xc0": 748,
            "\xa6": 918,
            "\xa7": 927,
            "\xa8": 928,
            "\xa9": 928,
            "\xaa": 834,
            "\xab": 873,
            "\xac": 828,
            "\xad": 924,
            "\xa2": 924,
            "\xae": 917,
            "\xaf": 930,
            "\xb0": 931,
            "\xb1": 463,
            "\xb2": 883,
            "\xb3": 836,
            "\xc1": 836,
            "\xb4": 867,
            "\xc7": 867,
            "\xb5": 696,
            "\xc8": 696,
            "\xb6": 874,
            "\xc9": 874,
            "\xb7": 760,
            "\xb8": 946,
            "\xc5": 771,
            "\xb9": 865,
            "\xc2": 771,
            "\xc6": 888,
            "\xba": 967,
            "\xc3": 888,
            "\xbb": 831,
            "\xbc": 873,
            "\xbd": 927,
            "\xbe": 970,
            "\xbf": 918,
        },
    ),
}


# Add aliases per table H.3 on pp. 1109-1110 of the PDF 1.7 reference
CORE_FONT_METRICS["Arial"] = CORE_FONT_METRICS["Helvetica"]
CORE_FONT_METRICS["Arial,Italic"] = CORE_FONT_METRICS["Helvetica-Oblique"]
CORE_FONT_METRICS["Arial,Bold"] = CORE_FONT_METRICS["Helvetica-Bold"]
CORE_FONT_METRICS["Arial,BoldItalic"] = CORE_FONT_METRICS["Helvetica-BoldOblique"]
CORE_FONT_METRICS["CourierNew"] = CORE_FONT_METRICS["Courier"]
CORE_FONT_METRICS["CourierNew,Italic"] = CORE_FONT_METRICS["Courier-Oblique"]
CORE_FONT_METRICS["CourierNew,Bold"] = CORE_FONT_METRICS["Courier-Bold"]
CORE_FONT_METRICS["CourierNew,BoldItalic"] = CORE_FONT_METRICS["Courier-BoldOblique"]
CORE_FONT_METRICS["TimesNewRoman"] = CORE_FONT_METRICS["Times-Roman"]
CORE_FONT_METRICS["TimesNewRoman,Italic"] = CORE_FONT_METRICS["Times-Italic"]
CORE_FONT_METRICS["TimesNewRoman,Bold"] = CORE_FONT_METRICS["Times-Bold"]
CORE_FONT_METRICS["TimesNewRoman,BoldItalic"] = CORE_FONT_METRICS["Times-BoldItalic"]


================================================
FILE: pypdf/_codecs/pdfdoc.py
================================================
# PDFDocEncoding Character Set: Table D.2 of PDF Reference 1.7
# C.1 Predefined encodings sorted by character name of another PDF reference
# Some indices have '\u0000' although they should have something else:
# 22: should be '\u0017'
_pdfdoc_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",  # 0 -  7
    "\u0008",
    "\u0009",
    "\u000a",
    "\u000b",
    "\u000c",
    "\u000d",
    "\u000e",
    "\u000f",  # 8 - 15
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0000",
    "\u0017",  # 16 - 23
    "\u02d8",
    "\u02c7",
    "\u02c6",
    "\u02d9",
    "\u02dd",
    "\u02db",
    "\u02da",
    "\u02dc",  # 24 - 31
    "\u0020",
    "\u0021",
    "\u0022",
    "\u0023",
    "\u0024",
    "\u0025",
    "\u0026",
    "\u0027",  # 32 - 39
    "\u0028",
    "\u0029",
    "\u002a",
    "\u002b",
    "\u002c",
    "\u002d",
    "\u002e",
    "\u002f",  # 40 - 47
    "\u0030",
    "\u0031",
    "\u0032",
    "\u0033",
    "\u0034",
    "\u0035",
    "\u0036",
    "\u0037",  # 48 - 55
    "\u0038",
    "\u0039",
    "\u003a",
    "\u003b",
    "\u003c",
    "\u003d",
    "\u003e",
    "\u003f",  # 56 - 63
    "\u0040",
    "\u0041",
    "\u0042",
    "\u0043",
    "\u0044",
    "\u0045",
    "\u0046",
    "\u0047",  # 64 - 71
    "\u0048",
    "\u0049",
    "\u004a",
    "\u004b",
    "\u004c",
    "\u004d",
    "\u004e",
    "\u004f",  # 72 - 79
    "\u0050",
    "\u0051",
    "\u0052",
    "\u0053",
    "\u0054",
    "\u0055",
    "\u0056",
    "\u0057",  # 80 - 87
    "\u0058",
    "\u0059",
    "\u005a",
    "\u005b",
    "\u005c",
    "\u005d",
    "\u005e",
    "\u005f",  # 88 - 95
    "\u0060",
    "\u0061",
    "\u0062",
    "\u0063",
    "\u0064",
    "\u0065",
    "\u0066",
    "\u0067",  # 96 - 103
    "\u0068",
    "\u0069",
    "\u006a",
    "\u006b",
    "\u006c",
    "\u006d",
    "\u006e",
    "\u006f",  # 104 - 111
    "\u0070",
    "\u0071",
    "\u0072",
    "\u0073",
    "\u0074",
    "\u0075",
    "\u0076",
    "\u0077",  # 112 - 119
    "\u0078",
    "\u0079",
    "\u007a",
    "\u007b",
    "\u007c",
    "\u007d",
    "\u007e",
    "\u0000",  # 120 - 127
    "\u2022",
    "\u2020",
    "\u2021",
    "\u2026",
    "\u2014",
    "\u2013",
    "\u0192",
    "\u2044",  # 128 - 135
    "\u2039",
    "\u203a",
    "\u2212",
    "\u2030",
    "\u201e",
    "\u201c",
    "\u201d",
    "\u2018",  # 136 - 143
    "\u2019",
    "\u201a",
    "\u2122",
    "\ufb01",
    "\ufb02",
    "\u0141",
    "\u0152",
    "\u0160",  # 144 - 151
    "\u0178",
    "\u017d",
    "\u0131",
    "\u0142",
    "\u0153",
    "\u0161",
    "\u017e",
    "\u0000",  # 152 - 159
    "\u20ac",
    "\u00a1",
    "\u00a2",
    "\u00a3",
    "\u00a4",
    "\u00a5",
    "\u00a6",
    "\u00a7",  # 160 - 167
    "\u00a8",
    "\u00a9",
    "\u00aa",
    "\u00ab",
    "\u00ac",
    "\u0000",
    "\u00ae",
    "\u00af",  # 168 - 175
    "\u00b0",
    "\u00b1",
    "\u00b2",
    "\u00b3",
    "\u00b4",
    "\u00b5",
    "\u00b6",
    "\u00b7",  # 176 - 183
    "\u00b8",
    "\u00b9",
    "\u00ba",
    "\u00bb",
    "\u00bc",
    "\u00bd",
    "\u00be",
    "\u00bf",  # 184 - 191
    "\u00c0",
    "\u00c1",
    "\u00c2",
    "\u00c3",
    "\u00c4",
    "\u00c5",
    "\u00c6",
    "\u00c7",  # 192 - 199
    "\u00c8",
    "\u00c9",
    "\u00ca",
    "\u00cb",
    "\u00cc",
    "\u00cd",
    "\u00ce",
    "\u00cf",  # 200 - 207
    "\u00d0",
    "\u00d1",
    "\u00d2",
    "\u00d3",
    "\u00d4",
    "\u00d5",
    "\u00d6",
    "\u00d7",  # 208 - 215
    "\u00d8",
    "\u00d9",
    "\u00da",
    "\u00db",
    "\u00dc",
    "\u00dd",
    "\u00de",
    "\u00df",  # 216 - 223
    "\u00e0",
    "\u00e1",
    "\u00e2",
    "\u00e3",
    "\u00e4",
    "\u00e5",
    "\u00e6",
    "\u00e7",  # 224 - 231
    "\u00e8",
    "\u00e9",
    "\u00ea",
    "\u00eb",
    "\u00ec",
    "\u00ed",
    "\u00ee",
    "\u00ef",  # 232 - 239
    "\u00f0",
    "\u00f1",
    "\u00f2",
    "\u00f3",
    "\u00f4",
    "\u00f5",
    "\u00f6",
    "\u00f7",  # 240 - 247
    "\u00f8",
    "\u00f9",
    "\u00fa",
    "\u00fb",
    "\u00fc",
    "\u00fd",
    "\u00fe",
    "\u00ff",  # 248 - 255
]

assert len(_pdfdoc_encoding) == 256


================================================
FILE: pypdf/_codecs/std.py
================================================
_std_encoding = [
    "\x00",
    "\x01",
    "\x02",
    "\x03",
    "\x04",
    "\x05",
    "\x06",
    "\x07",
    "\x08",
    "\t",
    "\n",
    "\x0b",
    "\x0c",
    "\r",
    "\x0e",
    "\x0f",
    "\x10",
    "\x11",
    "\x12",
    "\x13",
    "\x14",
    "\x15",
    "\x16",
    "\x17",
    "\x18",
    "\x19",
    "\x1a",
    "\x1b",
    "\x1c",
    "\x1d",
    "\x1e",
    "\x1f",
    " ",
    "!",
    '"',
    "#",
    "$",
    "%",
    "&",
    "’",
    "(",
    ")",
    "*",
    "+",
    ",",
    "-",
    ".",
    "/",
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9",
    ":",
    ";",
    "<",
    "=",
    ">",
    "?",
    "@",
    "A",
    "B",
    "C",
    "D",
    "E",
    "F",
    "G",
    "H",
    "I",
    "J",
    "K",
    "L",
    "M",
    "N",
    "O",
    "P",
    "Q",
    "R",
    "S",
    "T",
    "U",
    "V",
    "W",
    "X",
    "Y",
    "Z",
    "[",
    "\\",
    "]",
    "^",
    "_",
    "‘",
    "a",
    "b",
    "c",
    "d",
    "e",
    "f",
    "g",
    "h",
    "i",
    "j",
    "k",
    "l",
    "m",
    "n",
    "o",
    "p",
    "q",
    "r",
    "s",
    "t",
    "u",
    "v",
    "w",
    "x",
    "y",
    "z",
    "{",
    "|",
    "}",
    "~",
    "\x7f",
    "\x80",
    "\x81",
    "\x82",
    "\x83",
    "\x84",
    "\x85",
    "\x86",
    "\x87",
    "\x88",
    "\x89",
    "\x8a",
    "\x8b",
    "\x8c",
    "\x8d",
    "\x8e",
    "\x8f",
    "\x90",
    "\x91",
    "\x92",
    "\x93",
    "\x94",
    "\x95",
    "\x96",
    "\x97",
    "\x98",
    "\x99",
    "\x9a",
    "\x9b",
    "\x9c",
    "\x9d",
    "\x9e",
    "\x9f",
    "\xa0",
    "¡",
    "¢",
    "£",
    "⁄",
    "¥",
    "ƒ",
    "§",
    "¤",
    "'",
    "“",
    "«",
    "‹",
    "›",
    "ﬁ",
    "ﬂ",
    "°",
    "–",
    "†",
    "‡",
    "·",
    "µ",
    "¶",
    "•",
    "‚",
    "„",
    "”",
    "»",
    "…",
    "‰",
    "¾",
    "¿",
    "À",
    "`",
    "´",
    "ˆ",
    "˜",
    "¯",
    "˘",
    "˙",
    "¨",
    "É",
    "˚",
    "¸",
    "Ì",
    "˝",
    "˛",
    "ˇ",
    "—",
    "Ñ",
    "Ò",
    "Ó",
    "Ô",
    "Õ",
    "Ö",
    "×",
    "Ø",
    "Ù",
    "Ú",
    "Û",
    "Ü",
    "Ý",
    "Þ",
    "ß",
    "à",
    "Æ",
    "â",
    "ª",
    "ä",
    "å",
    "æ",
    "ç",
    "Ł",
    "Ø",
    "Œ",
    "º",
    "ì",
    "í",
    "î",
    "ï",
    "ð",
    "æ",
    "ò",
    "ó",
    "ô",
    "ı",
    "ö",
    "÷",
    "ł",
    "ø",
    "œ",
    "ß",
    "ü",
    "ý",
    "þ",
    "ÿ",
]


================================================
FILE: pypdf/_codecs/symbol.py
================================================
# manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/symbol.txt
_symbol_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",
    "\u0008",
    "\u0009",
    "\u000A",
    "\u000B",
    "\u000C",
    "\u000D",
    "\u000E",
    "\u000F",
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0016",
    "\u0017",
    "\u0018",
    "\u0019",
    "\u001A",
    "\u001B",
    "\u001C",
    "\u001D",
    "\u001E",
    "\u001F",
    "\u0020",
    "\u0021",
    "\u2200",
    "\u0023",
    "\u2203",
    "\u0025",
    "\u0026",
    "\u220B",
    "\u0028",
    "\u0029",
    "\u2217",
    "\u002B",
    "\u002C",
    "\u2212",
    "\u002E",
    "\u002F",
    "\u0030",
    "\u0031",
    "\u0032",
    "\u0033",
    "\u0034",
    "\u0035",
    "\u0036",
    "\u0037",
    "\u0038",
    "\u0039",
    "\u003A",
    "\u003B",
    "\u003C",
    "\u003D",
    "\u003E",
    "\u003F",
    "\u2245",
    "\u0391",
    "\u0392",
    "\u03A7",
    "\u0394",
    "\u0395",
    "\u03A6",
    "\u0393",
    "\u0397",
    "\u0399",
    "\u03D1",
    "\u039A",
    "\u039B",
    "\u039C",
    "\u039D",
    "\u039F",
    "\u03A0",
    "\u0398",
    "\u03A1",
    "\u03A3",
    "\u03A4",
    "\u03A5",
    "\u03C2",
    "\u03A9",
    "\u039E",
    "\u03A8",
    "\u0396",
    "\u005B",
    "\u2234",
    "\u005D",
    "\u22A5",
    "\u005F",
    "\uF8E5",
    "\u03B1",
    "\u03B2",
    "\u03C7",
    "\u03B4",
    "\u03B5",
    "\u03C6",
    "\u03B3",
    "\u03B7",
    "\u03B9",
    "\u03D5",
    "\u03BA",
    "\u03BB",
    "\u00B5",
    "\u03BD",
    "\u03BF",
    "\u03C0",
    "\u03B8",
    "\u03C1",
    "\u03C3",
    "\u03C4",
    "\u03C5",
    "\u03D6",
    "\u03C9",
    "\u03BE",
    "\u03C8",
    "\u03B6",
    "\u007B",
    "\u007C",
    "\u007D",
    "\u223C",
    "\u007F",
    "\u0080",
    "\u0081",
    "\u0082",
    "\u0083",
    "\u0084",
    "\u0085",
    "\u0086",
    "\u0087",
    "\u0088",
    "\u0089",
    "\u008A",
    "\u008B",
    "\u008C",
    "\u008D",
    "\u008E",
    "\u008F",
    "\u0090",
    "\u0091",
    "\u0092",
    "\u0093",
    "\u0094",
    "\u0095",
    "\u0096",
    "\u0097",
    "\u0098",
    "\u0099",
    "\u009A",
    "\u009B",
    "\u009C",
    "\u009D",
    "\u009E",
    "\u009F",
    "\u20AC",
    "\u03D2",
    "\u2032",
    "\u2264",
    "\u2044",
    "\u221E",
    "\u0192",
    "\u2663",
    "\u2666",
    "\u2665",
    "\u2660",
    "\u2194",
    "\u2190",
    "\u2191",
    "\u2192",
    "\u2193",
    "\u00B0",
    "\u00B1",
    "\u2033",
    "\u2265",
    "\u00D7",
    "\u221D",
    "\u2202",
    "\u2022",
    "\u00F7",
    "\u2260",
    "\u2261",
    "\u2248",
    "\u2026",
    "\uF8E6",
    "\uF8E7",
    "\u21B5",
    "\u2135",
    "\u2111",
    "\u211C",
    "\u2118",
    "\u2297",
    "\u2295",
    "\u2205",
    "\u2229",
    "\u222A",
    "\u2283",
    "\u2287",
    "\u2284",
    "\u2282",
    "\u2286",
    "\u2208",
    "\u2209",
    "\u2220",
    "\u2207",
    "\uF6DA",
    "\uF6D9",
    "\uF6DB",
    "\u220F",
    "\u221A",
    "\u22C5",
    "\u00AC",
    "\u2227",
    "\u2228",
    "\u21D4",
    "\u21D0",
    "\u21D1",
    "\u21D2",
    "\u21D3",
    "\u25CA",
    "\u2329",
    "\uF8E8",
    "\uF8E9",
    "\uF8EA",
    "\u2211",
    "\uF8EB",
    "\uF8EC",
    "\uF8ED",
    "\uF8EE",
    "\uF8EF",
    "\uF8F0",
    "\uF8F1",
    "\uF8F2",
    "\uF8F3",
    "\uF8F4",
    "\u00F0",
    "\u232A",
    "\u222B",
    "\u2320",
    "\uF8F5",
    "\u2321",
    "\uF8F6",
    "\uF8F7",
    "\uF8F8",
    "\uF8F9",
    "\uF8FA",
    "\uF8FB",
    "\uF8FC",
    "\uF8FD",
    "\uF8FE",
    "\u00FF",
]
assert len(_symbol_encoding) == 256


================================================
FILE: pypdf/_codecs/zapfding.py
================================================
#  manually generated from https://www.unicode.org/Public/MAPPINGS/VENDORS/ADOBE/zdingbat.txt

_zapfding_encoding = [
    "\u0000",
    "\u0001",
    "\u0002",
    "\u0003",
    "\u0004",
    "\u0005",
    "\u0006",
    "\u0007",
    "\u0008",
    "\u0009",
    "\u000A",
    "\u000B",
    "\u000C",
    "\u000D",
    "\u000E",
    "\u000F",
    "\u0010",
    "\u0011",
    "\u0012",
    "\u0013",
    "\u0014",
    "\u0015",
    "\u0016",
    "\u0017",
    "\u0018",
    "\u0019",
    "\u001A",
    "\u001B",
    "\u001C",
    "\u001D",
    "\u001E",
    "\u001F",
    "\u0020",
    "\u2701",
    "\u2702",
    "\u2703",
    "\u2704",
    "\u260E",
    "\u2706",
    "\u2707",
    "\u2708",
    "\u2709",
    "\u261B",
    "\u261E",
    "\u270C",
    "\u270D",
    "\u270E",
    "\u270F",
    "\u2710",
    "\u2711",
    "\u2712",
    "\u2713",
    "\u2714",
    "\u2715",
    "\u2716",
    "\u2717",
    "\u2718",
    "\u2719",
    "\u271A",
    "\u271B",
    "\u271C",
    "\u271D",
    "\u271E",
    "\u271F",
    "\u2720",
    "\u2721",
    "\u2722",
    "\u2723",
    "\u2724",
    "\u2725",
    "\u2726",
    "\u2727",
    "\u2605",
    "\u2729",
    "\u272A",
    "\u272B",
    "\u272C",
    "\u272D",
    "\u272E",
    "\u272F",
    "\u2730",
    "\u2731",
    "\u2732",
    "\u2733",
    "\u2734",
    "\u2735",
    "\u2736",
    "\u2737",
    "\u2738",
    "\u2739",
    "\u273A",
    "\u273B",
    "\u273C",
    "\u273D",
    "\u273E",
    "\u273F",
    "\u2740",
    "\u2741",
    "\u2742",
    "\u2743",
    "\u2744",
    "\u2745",
    "\u2746",
    "\u2747",
    "\u2748",
    "\u2749",
    "\u274A",
    "\u274B",
    "\u25CF",
    "\u274D",
    "\u25A0",
    "\u274F",
    "\u2750",
    "\u2751",
    "\u2752",
    "\u25B2",
    "\u25BC",
    "\u25C6",
    "\u2756",
    "\u25D7",
    "\u2758",
    "\u2759",
    "\u275A",
    "\u275B",
    "\u275C",
    "\u275D",
    "\u275E",
    "\u007F",
    "\uF8D7",
    "\uF8D8",
    "\uF8D9",
    "\uF8DA",
    "\uF8DB",
    "\uF8DC",
    "\uF8DD",
    "\uF8DE",
    "\uF8DF",
    "\uF8E0",
    "\uF8E1",
    "\uF8E2",
    "\uF8E3",
    "\uF8E4",
    "\u008E",
    "\u008F",
    "\u0090",
    "\u0091",
    "\u0092",
    "\u0093",
    "\u0094",
    "\u0095",
    "\u0096",
    "\u0097",
    "\u0098",
    "\u0099",
    "\u009A",
    "\u009B",
    "\u009C",
    "\u009D",
    "\u009E",
    "\u009F",
    "\u00A0",
    "\u2761",
    "\u2762",
    "\u2763",
    "\u2764",
    "\u2765",
    "\u2766",
    "\u2767",
    "\u2663",
    "\u2666",
    "\u2665",
    "\u2660",
    "\u2460",
    "\u2461",
    "\u2462",
    "\u2463",
    "\u2464",
    "\u2465",
    "\u2466",
    "\u2467",
    "\u2468",
    "\u2469",
    "\u2776",
    "\u2777",
    "\u2778",
    "\u2779",
    "\u277A",
    "\u277B",
    "\u277C",
    "\u277D",
    "\u277E",
    "\u277F",
    "\u2780",
    "\u2781",
    "\u2782",
    "\u2783",
    "\u2784",
    "\u2785",
    "\u2786",
    "\u2787",
    "\u2788",
    "\u2789",
    "\u278A",
    "\u278B",
    "\u278C",
    "\u278D",
    "\u278E",
    "\u278F",
    "\u2790",
    "\u2791",
    "\u2792",
    "\u2793",
    "\u2794",
    "\u2192",
    "\u2194",
    "\u2195",
    "\u2798",
    "\u2799",
    "\u279A",
    "\u279B",
    "\u279C",
    "\u279D",
    "\u279E",
    "\u279F",
    "\u27A0",
    "\u27A1",
    "\u27A2",
    "\u27A3",
    "\u27A4",
    "\u27A5",
    "\u27A6",
    "\u27A7",
    "\u27A8",
    "\u27A9",
    "\u27AA",
    "\u27AB",
    "\u27AC",
    "\u27AD",
    "\u27AE",
    "\u27AF",
    "\u00F0",
    "\u27B1",
    "\u27B2",
    "\u27B3",
    "\u27B4",
    "\u27B5",
    "\u27B6",
    "\u27B7",
    "\u27B8",
    "\u27B9",
    "\u27BA",
    "\u27BB",
    "\u27BC",
    "\u27BD",
    "\u27BE",
    "\u00FF",
]
assert len(_zapfding_encoding) == 256


================================================
FILE: pypdf/_crypt_providers/__init__.py
================================================
# Copyright (c) 2023, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from pypdf._crypt_providers._base import CryptBase, CryptIdentity

try:
    from pypdf._crypt_providers._cryptography import (
        CryptAES,
        CryptRC4,
        aes_cbc_decrypt,
        aes_cbc_encrypt,
        aes_ecb_decrypt,
        aes_ecb_encrypt,
        crypt_provider,
        rc4_decrypt,
        rc4_encrypt,
    )
    from pypdf._utils import Version

    if Version(crypt_provider[1]) <= Version("3.0"):
        # This is due to the backend parameter being required back then:
        # https://cryptography.io/en/latest/changelog/#v3-1
        raise ImportError("cryptography<=3.0 is not supported")  # pragma: no cover
except ImportError:
    try:
        from pypdf._crypt_providers._pycryptodome import (  # type: ignore
            CryptAES,
            CryptRC4,
            aes_cbc_decrypt,
            aes_cbc_encrypt,
            aes_ecb_decrypt,
            aes_ecb_encrypt,
            crypt_provider,
            rc4_decrypt,
            rc4_encrypt,
        )
    except ImportError:
        from pypdf._crypt_providers._fallback import (  # type: ignore
            CryptAES,
            CryptRC4,
            aes_cbc_decrypt,
            aes_cbc_encrypt,
            aes_ecb_decrypt,
            aes_ecb_encrypt,
            crypt_provider,
            rc4_decrypt,
            rc4_encrypt,
        )

__all__ = [
    "CryptAES",
    "CryptBase",
    "CryptIdentity",
    "CryptRC4",
    "aes_cbc_decrypt",
    "aes_cbc_encrypt",
    "aes_ecb_decrypt",
    "aes_ecb_encrypt",
    "crypt_provider",
    "rc4_decrypt",
    "rc4_encrypt",
]


================================================
FILE: pypdf/_crypt_providers/_base.py
================================================
# Copyright (c) 2023, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


class CryptBase:
    def encrypt(self, data: bytes) -> bytes:  # pragma: no cover
        return data

    def decrypt(self, data: bytes) -> bytes:  # pragma: no cover
        return data


class CryptIdentity(CryptBase):
    pass


================================================
FILE: pypdf/_crypt_providers/_cryptography.py
================================================
# Copyright (c) 2023, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import secrets

from cryptography import __version__
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers.algorithms import AES

try:
    # 43.0.0 - https://cryptography.io/en/latest/changelog/#v43-0-0
    from cryptography.hazmat.decrepit.ciphers.algorithms import ARC4
except ImportError:
    from cryptography.hazmat.primitives.ciphers.algorithms import ARC4
from cryptography.hazmat.primitives.ciphers.base import Cipher
from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB

from pypdf._crypt_providers._base import CryptBase

crypt_provider = ("cryptography", __version__)


class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.cipher = Cipher(ARC4(key), mode=None)

    def encrypt(self, data: bytes) -> bytes:
        encryptor = self.cipher.encryptor()
        return encryptor.update(data) + encryptor.finalize()

    def decrypt(self, data: bytes) -> bytes:
        decryptor = self.cipher.decryptor()
        return decryptor.update(data) + decryptor.finalize()


class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.alg = AES(key)

    def encrypt(self, data: bytes) -> bytes:
        iv = secrets.token_bytes(16)
        pad = padding.PKCS7(128).padder()
        data = pad.update(data) + pad.finalize()

        cipher = Cipher(self.alg, CBC(iv))
        encryptor = cipher.encryptor()
        return iv + encryptor.update(data) + encryptor.finalize()

    def decrypt(self, data: bytes) -> bytes:
        iv = data[:16]
        data = data[16:]
        # for empty encrypted data
        if not data:
            return data

        # just for robustness, it does not happen under normal circumstances
        if len(data) % 16 != 0:
            pad = padding.PKCS7(128).padder()
            data = pad.update(data) + pad.finalize()

        cipher = Cipher(self.alg, CBC(iv))
        decryptor = cipher.decryptor()
        d = decryptor.update(data) + decryptor.finalize()
        return d[: -d[-1]]


def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    encryptor = Cipher(ARC4(key), mode=None).encryptor()
    return encryptor.update(data) + encryptor.finalize()


def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    decryptor = Cipher(ARC4(key), mode=None).decryptor()
    return decryptor.update(data) + decryptor.finalize()


def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    encryptor = Cipher(AES(key), mode=ECB()).encryptor()
    return encryptor.update(data) + encryptor.finalize()


def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    decryptor = Cipher(AES(key), mode=ECB()).decryptor()
    return decryptor.update(data) + decryptor.finalize()


def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    encryptor = Cipher(AES(key), mode=CBC(iv)).encryptor()
    return encryptor.update(data) + encryptor.finalize()


def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    decryptor = Cipher(AES(key), mode=CBC(iv)).decryptor()
    return decryptor.update(data) + decryptor.finalize()


================================================
FILE: pypdf/_crypt_providers/_fallback.py
================================================
# Copyright (c) 2023, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from pypdf._crypt_providers._base import CryptBase
from pypdf.errors import DependencyError

_DEPENDENCY_ERROR_STR = "cryptography>=3.1 is required for AES algorithm"


crypt_provider = ("local_crypt_fallback", "0.0.0")


class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.s = bytearray(range(256))
        j = 0
        for i in range(256):
            j = (j + self.s[i] + key[i % len(key)]) % 256
            self.s[i], self.s[j] = self.s[j], self.s[i]

    def encrypt(self, data: bytes) -> bytes:
        s = bytearray(self.s)
        out = [0 for _ in range(len(data))]
        i, j = 0, 0
        for k in range(len(data)):
            i = (i + 1) % 256
            j = (j + s[i]) % 256
            s[i], s[j] = s[j], s[i]
            x = s[(s[i] + s[j]) % 256]
            out[k] = data[k] ^ x
        return bytes(out)

    def decrypt(self, data: bytes) -> bytes:
        return self.encrypt(data)


class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        pass

    def encrypt(self, data: bytes) -> bytes:
        raise DependencyError(_DEPENDENCY_ERROR_STR)

    def decrypt(self, data: bytes) -> bytes:
        raise DependencyError(_DEPENDENCY_ERROR_STR)


def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    return CryptRC4(key).encrypt(data)


def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    return CryptRC4(key).decrypt(data)


def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)


def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)


def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)


def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    raise DependencyError(_DEPENDENCY_ERROR_STR)


================================================
FILE: pypdf/_crypt_providers/_pycryptodome.py
================================================
# Copyright (c) 2023, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import secrets

from Crypto import __version__
from Crypto.Cipher import AES, ARC4
from Crypto.Util.Padding import pad

from pypdf._crypt_providers._base import CryptBase

crypt_provider = ("pycryptodome", __version__)


class CryptRC4(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.key = key

    def encrypt(self, data: bytes) -> bytes:
        return ARC4.ARC4Cipher(self.key).encrypt(data)

    def decrypt(self, data: bytes) -> bytes:
        return ARC4.ARC4Cipher(self.key).decrypt(data)


class CryptAES(CryptBase):
    def __init__(self, key: bytes) -> None:
        self.key = key

    def encrypt(self, data: bytes) -> bytes:
        iv = secrets.token_bytes(16)
        data = pad(data, 16)
        aes = AES.new(self.key, AES.MODE_CBC, iv)
        return iv + aes.encrypt(data)

    def decrypt(self, data: bytes) -> bytes:
        iv = data[:16]
        data = data[16:]
        # for empty encrypted data
        if not data:
            return data

        # just for robustness, it does not happen under normal circumstances
        if len(data) % 16 != 0:
            data = pad(data, 16)

        aes = AES.new(self.key, AES.MODE_CBC, iv)
        d = aes.decrypt(data)
        return d[: -d[-1]]


def rc4_encrypt(key: bytes, data: bytes) -> bytes:
    return ARC4.ARC4Cipher(key).encrypt(data)


def rc4_decrypt(key: bytes, data: bytes) -> bytes:
    return ARC4.ARC4Cipher(key).decrypt(data)


def aes_ecb_encrypt(key: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_ECB).encrypt(data)


def aes_ecb_decrypt(key: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_ECB).decrypt(data)


def aes_cbc_encrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_CBC, iv).encrypt(data)


def aes_cbc_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
    return AES.new(key, AES.MODE_CBC, iv).decrypt(data)


================================================
FILE: pypdf/_doc_common.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
# Copyright (c) 2024, Pubpub-ZZ
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import struct
from abc import abstractmethod
from collections.abc import Generator, Iterable, Iterator, Mapping
from datetime import datetime
from typing import (
    Any,
    Optional,
    Union,
    cast,
)

from ._encryption import Encryption
from ._page import PageObject, _VirtualList
from ._page_labels import index2label as page_index2page_label
from ._utils import (
    deprecation_with_replacement,
    logger_warning,
    parse_iso8824_date,
)
from .constants import CatalogAttributes as CA
from .constants import CatalogDictionary as CD
from .constants import (
    CheckboxRadioButtonAttributes,
    GoToActionArguments,
    PagesAttributes,
    UserAccessPermissions,
)
from .constants import Core as CO
from .constants import DocumentInformationAttributes as DI
from .constants import FieldDictionaryAttributes as FA
from .constants import PageAttributes as PG
from .errors import PdfReadError, PyPdfError
from .filters import _decompress_with_limit
from .generic import (
    ArrayObject,
    BooleanObject,
    ByteStringObject,
    Destination,
    DictionaryObject,
    EncodedStreamObject,
    Field,
    Fit,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    TextStringObject,
    TreeObject,
    ViewerPreferences,
    create_string_object,
    is_null_or_none,
)
from .generic._files import EmbeddedFile
from .types import OutlineType, PagemodeType
from .xmp import XmpInformation


def convert_to_int(d: bytes, size: int) -> Union[int, tuple[Any, ...]]:
    if size > 8:
        raise PdfReadError("Invalid size in convert_to_int")
    d = b"\x00\x00\x00\x00\x00\x00\x00\x00" + d
    d = d[-8:]
    return struct.unpack(">q", d)[0]


class DocumentInformation(DictionaryObject):
    """
    A class representing the basic document metadata provided in a PDF File.
    This class is accessible through
    :py:class:`PdfReader.metadata<pypdf.PdfReader.metadata>`.

    All text properties of the document metadata have
    *two* properties, e.g. author and author_raw. The non-raw property will
    always return a ``TextStringObject``, making it ideal for a case where the
    metadata is being displayed. The raw property can sometimes return a
    ``ByteStringObject``, if pypdf was unable to decode the string's text
    encoding; this requires additional safety in the caller and therefore is not
    as commonly accessed.
    """

    def __init__(self) -> None:
        DictionaryObject.__init__(self)

    def _get_text(self, key: str) -> Optional[str]:
        retval = self.get(key, None)
        if isinstance(retval, TextStringObject):
            return retval
        if isinstance(retval, ByteStringObject):
            return str(retval)
        return None

    @property
    def title(self) -> Optional[str]:
        """
        Read-only property accessing the document's title.

        Returns a ``TextStringObject`` or ``None`` if the title is not
        specified.
        """
        return (
            self._get_text(DI.TITLE) or self.get(DI.TITLE).get_object()  # type: ignore
            if self.get(DI.TITLE)
            else None
        )

    @property
    def title_raw(self) -> Optional[str]:
        """The "raw" version of title; can return a ``ByteStringObject``."""
        return self.get(DI.TITLE)

    @property
    def author(self) -> Optional[str]:
        """
        Read-only property accessing the document's author.

        Returns a ``TextStringObject`` or ``None`` if the author is not
        specified.
        """
        return self._get_text(DI.AUTHOR)

    @property
    def author_raw(self) -> Optional[str]:
        """The "raw" version of author; can return a ``ByteStringObject``."""
        return self.get(DI.AUTHOR)

    @property
    def subject(self) -> Optional[str]:
        """
        Read-only property accessing the document's subject.

        Returns a ``TextStringObject`` or ``None`` if the subject is not
        specified.
        """
        return self._get_text(DI.SUBJECT)

    @property
    def subject_raw(self) -> Optional[str]:
        """The "raw" version of subject; can return a ``ByteStringObject``."""
        return self.get(DI.SUBJECT)

    @property
    def creator(self) -> Optional[str]:
        """
        Read-only property accessing the document's creator.

        If the document was converted to PDF from another format, this is the
        name of the application (e.g. OpenOffice) that created the original
        document from which it was converted. Returns a ``TextStringObject`` or
        ``None`` if the creator is not specified.
        """
        return self._get_text(DI.CREATOR)

    @property
    def creator_raw(self) -> Optional[str]:
        """The "raw" version of creator; can return a ``ByteStringObject``."""
        return self.get(DI.CREATOR)

    @property
    def producer(self) -> Optional[str]:
        """
        Read-only property accessing the document's producer.

        If the document was converted to PDF from another format, this is the
        name of the application (for example, macOS Quartz) that converted it to
        PDF. Returns a ``TextStringObject`` or ``None`` if the producer is not
        specified.
        """
        return self._get_text(DI.PRODUCER)

    @property
    def producer_raw(self) -> Optional[str]:
        """The "raw" version of producer; can return a ``ByteStringObject``."""
        return self.get(DI.PRODUCER)

    @property
    def creation_date(self) -> Optional[datetime]:
        """Read-only property accessing the document's creation date."""
        return parse_iso8824_date(self._get_text(DI.CREATION_DATE))

    @property
    def creation_date_raw(self) -> Optional[str]:
        """
        The "raw" version of creation date; can return a ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        """
        return self.get(DI.CREATION_DATE)

    @property
    def modification_date(self) -> Optional[datetime]:
        """
        Read-only property accessing the document's modification date.

        The date and time the document was most recently modified.
        """
        return parse_iso8824_date(self._get_text(DI.MOD_DATE))

    @property
    def modification_date_raw(self) -> Optional[str]:
        """
        The "raw" version of modification date; can return a
        ``ByteStringObject``.

        Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix
        is the offset from UTC.
        """
        return self.get(DI.MOD_DATE)

    @property
    def keywords(self) -> Optional[str]:
        """
        Read-only property accessing the document's keywords.

        Returns a ``TextStringObject`` or ``None`` if keywords are not
        specified.
        """
        return self._get_text(DI.KEYWORDS)

    @property
    def keywords_raw(self) -> Optional[str]:
        """The "raw" version of keywords; can return a ``ByteStringObject``."""
        return self.get(DI.KEYWORDS)


class PdfDocCommon:
    """
    Common functions from PdfWriter and PdfReader objects.

    This root class is strongly abstracted.
    """

    strict: bool = False  # default

    flattened_pages: Optional[list[PageObject]] = None

    _encryption: Optional[Encryption] = None

    _readonly: bool = False

    @property
    @abstractmethod
    def root_object(self) -> DictionaryObject:
        ...  # pragma: no cover

    @property
    @abstractmethod
    def pdf_header(self) -> str:
        ...  # pragma: no cover

    @abstractmethod
    def get_object(
        self, indirect_reference: Union[int, IndirectObject]
    ) -> Optional[PdfObject]:
        ...  # pragma: no cover

    @abstractmethod
    def _replace_object(self, indirect: IndirectObject, obj: PdfObject) -> PdfObject:
        ...  # pragma: no cover

    @property
    @abstractmethod
    def _info(self) -> Optional[DictionaryObject]:
        ...  # pragma: no cover

    @property
    def metadata(self) -> Optional[DocumentInformation]:
        """
        Retrieve the PDF file's document information dictionary, if it exists.

        Note that some PDF files use metadata streams instead of document
        information dictionaries, and these metadata streams will not be
        accessed by this function.
        """
        retval = DocumentInformation()
        if self._info is None:
            return None
        retval.update(self._info)
        return retval

    @property
    def xmp_metadata(self) -> Optional[XmpInformation]:
        ...  # pragma: no cover

    @property
    def viewer_preferences(self) -> Optional[ViewerPreferences]:
        """Returns the existing ViewerPreferences as an overloaded dictionary."""
        o = self.root_object.get(CD.VIEWER_PREFERENCES, None)
        if o is None:
            return None
        o = o.get_object()
        if not isinstance(o, ViewerPreferences):
            o = ViewerPreferences(o)
            if hasattr(o, "indirect_reference") and o.indirect_reference is not None:
                self._replace_object(o.indirect_reference, o)
            else:
                self.root_object[NameObject(CD.VIEWER_PREFERENCES)] = o
        return o

    def get_num_pages(self) -> int:
        """
        Calculate the number of pages in this PDF file.

        Returns:
            The number of pages of the parsed PDF file.

        Raises:
            PdfReadError: If restrictions prevent this action.

        """
        # Flattened pages will not work on an encrypted PDF;
        # the PDF file's page count is used in this case. Otherwise,
        # the original method (flattened page count) is used.
        if self.is_encrypted:
            return self.root_object["/Pages"]["/Count"]  # type: ignore
        if self.flattened_pages is None:
            self._flatten(self._readonly)
        assert self.flattened_pages is not None
        return len(self.flattened_pages)

    def get_page(self, page_number: int) -> PageObject:
        """
        Retrieve a page by number from this PDF file.
        Most of the time ``.pages[page_number]`` is preferred.

        Args:
            page_number: The page number to retrieve
                (pages begin at zero)

        Returns:
            A :class:`PageObject<pypdf._page.PageObject>` instance.

        """
        if self.flattened_pages is None:
            self._flatten(self._readonly)
        assert self.flattened_pages is not None, "hint for mypy"
        return self.flattened_pages[page_number]

    def _get_page_in_node(
        self,
        page_number: int,
    ) -> tuple[DictionaryObject, int]:
        """
        Retrieve the node and position within the /Kids containing the page.
        If page_number is greater than the number of pages, it returns the top node, -1.
        """
        top = cast(DictionaryObject, self.root_object["/Pages"])

        def recursive_call(
            node: DictionaryObject, mi: int
        ) -> tuple[Optional[PdfObject], int]:
            ma = cast(int, node.get("/Count", 1))  # default 1 for /Page types
            if node["/Type"] == "/Page":
                if page_number == mi:
                    return node, -1
                return None, mi + 1
            if (page_number - mi) >= ma:  # not in nodes below
                if node == top:
                    return top, -1
                return None, mi + ma
            for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])):
                kid = cast(DictionaryObject, kid.get_object())
                n, i = recursive_call(kid, mi)
                if n is not None:  # page has just been found ...
                    if i < 0:  # ... just below!
                        return node, idx
                    # ... at lower levels
                    return n, i
                mi = i
            raise PyPdfError("Unexpectedly cannot find the node.")

        node, idx = recursive_call(top, 0)
        assert isinstance(node, DictionaryObject), "mypy"
        return node, idx

    @property
    def named_destinations(self) -> dict[str, Destination]:
        """A read-only dictionary which maps names to destinations."""
        return self._get_named_destinations()

    def get_named_dest_root(self) -> ArrayObject:
        named_dest = ArrayObject()
        if CA.NAMES in self.root_object and isinstance(
            self.root_object[CA.NAMES], DictionaryObject
        ):
            names = cast(DictionaryObject, self.root_object[CA.NAMES])
            if CA.DESTS in names and isinstance(names[CA.DESTS], DictionaryObject):
                # §3.6.3 Name Dictionary (PDF spec 1.7)
                dests = cast(DictionaryObject, names[CA.DESTS])
                dests_ref = dests.indirect_reference
                if CA.NAMES in dests:
                    # §7.9.6, entries in a name tree node dictionary
                    named_dest = cast(ArrayObject, dests[CA.NAMES])
                else:
                    named_dest = ArrayObject()
                    dests[NameObject(CA.NAMES)] = named_dest
            elif hasattr(self, "_add_object"):
                dests = DictionaryObject()
                dests_ref = self._add_object(dests)
                names[NameObject(CA.DESTS)] = dests_ref
                dests[NameObject(CA.NAMES)] = named_dest

        elif hasattr(self, "_add_object"):
            names = DictionaryObject()
            names_ref = self._add_object(names)
            self.root_object[NameObject(CA.NAMES)] = names_ref
            dests = DictionaryObject()
            dests_ref = self._add_object(dests)
            names[NameObject(CA.DESTS)] = dests_ref
            dests[NameObject(CA.NAMES)] = named_dest

        return named_dest

    ## common
    def _get_named_destinations(
        self,
        tree: Union[TreeObject, None] = None,
        retval: Optional[dict[str, Destination]] = None,
    ) -> dict[str, Destination]:
        """
        Retrieve the named destinations present in the document.

        Args:
            tree: The current tree.
            retval: The previously retrieved destinations for nested calls.

        Returns:
            A dictionary which maps names to destinations.

        """
        if retval is None:
            retval = {}
            catalog = self.root_object

            # get the name tree
            if CA.DESTS in catalog:
                tree = cast(TreeObject, catalog[CA.DESTS])
            elif CA.NAMES in catalog:
                names = cast(DictionaryObject, catalog[CA.NAMES])
                if CA.DESTS in names:
                    tree = cast(TreeObject, names[CA.DESTS])

        if is_null_or_none(tree):
            return retval
        assert tree is not None, "mypy"

        if PagesAttributes.KIDS in tree:
            # recurse down the tree
            for kid in cast(ArrayObject, tree[PagesAttributes.KIDS]):
                self._get_named_destinations(kid.get_object(), retval)
        # §7.9.6, entries in a name tree node dictionary
        elif CA.NAMES in tree:  # /Kids and /Names are exclusives (§7.9.6)
            names = cast(DictionaryObject, tree[CA.NAMES])
            i = 0
            while i < len(names):
                key = names[i].get_object()
                i += 1
                if not isinstance(key, (bytes, str)):
                    continue
                try:
                    value = names[i].get_object()
                except IndexError:
                    break
                i += 1
                if isinstance(value, DictionaryObject):
                    if "/D" in value:
                        value = value["/D"]
                    else:
                        continue
                dest = self._build_destination(key, value)
                if dest is not None:
                    retval[cast(str, dest["/Title"])] = dest
                    # Remain backwards-compatible.
                    retval[str(key)] = dest
        else:  # case where Dests is in root catalog (PDF 1.7 specs, §2 about PDF 1.1)
            for k__, v__ in tree.items():
                val = v__.get_object()
                if isinstance(val, DictionaryObject):
                    if "/D" in val:
                        val = val["/D"].get_object()
                    else:
                        continue
                dest = self._build_destination(k__, val)
                if dest is not None:
                    retval[k__] = dest
        return retval

    # A select group of relevant field attributes. For the complete list,
    # see §12.3.2 of the PDF 1.7 or PDF 2.0 specification.

    def get_fields(
        self,
        tree: Optional[TreeObject] = None,
        retval: Optional[dict[Any, Any]] = None,
        fileobj: Optional[Any] = None,
        stack: Optional[list[PdfObject]] = None,
    ) -> Optional[dict[str, Any]]:
        """
        Extract field data if this PDF contains interactive form fields.

        The *tree*, *retval*, *stack* parameters are for recursive use.

        Args:
            tree: Current object to parse.
            retval: In-progress list of fields.
            fileobj: A file object (usually a text file) to write
                a report to on all interactive form fields found.
            stack: List of already parsed objects.

        Returns:
            A dictionary where each key is a field name, and each
            value is a :class:`Field<pypdf.generic.Field>` object. By
            default, the mapping name is used for keys.
            ``None`` if form data could not be located.

        """
        field_attributes = FA.attributes_dict()
        field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
        if retval is None:
            retval = {}
            catalog = self.root_object
            stack = []
            # get the AcroForm tree
            if CD.ACRO_FORM in catalog:
                tree = cast(Optional[TreeObject], catalog[CD.ACRO_FORM])
            else:
                return None
        if tree is None:
            return retval
        assert stack is not None
        if "/Fields" in tree:
            fields = cast(ArrayObject, tree["/Fields"])
            for f in fields:
                field = f.get_object()
                self._build_field(field, retval, fileobj, field_attributes, stack)
        elif any(attr in tree for attr in field_attributes):
            # Tree is a field
            self._build_field(tree, retval, fileobj, field_attributes, stack)
        return retval

    def _get_qualified_field_name(self, parent: DictionaryObject) -> str:
        if "/TM" in parent:
            return cast(str, parent["/TM"])
        if "/Parent" in parent:
            return (
                self._get_qualified_field_name(
                    cast(DictionaryObject, parent["/Parent"])
                )
                + "."
                + cast(str, parent.get("/T", ""))
            )
        return cast(str, parent.get("/T", ""))

    def _build_field(
        self,
        field: Union[TreeObject, DictionaryObject],
        retval: dict[Any, Any],
        fileobj: Any,
        field_attributes: Any,
        stack: list[PdfObject],
    ) -> None:
        if all(attr not in field for attr in ("/T", "/TM")):
            return
        key = self._get_qualified_field_name(field)
        if fileobj:
            self._write_field(fileobj, field, field_attributes)
            fileobj.write("\n")
        retval[key] = Field(field)
        obj = retval[key].indirect_reference.get_object()  # to get the full object
        if obj.get(FA.FT, "") == "/Ch" and obj.get(NameObject(FA.Opt)):
            retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
        if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
            #  Checkbox
            retval[key][NameObject("/_States_")] = ArrayObject(
                list(obj["/AP"]["/N"].keys())
            )
            if "/Off" not in retval[key]["/_States_"]:
                retval[key][NameObject("/_States_")].append(NameObject("/Off"))
        elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
            states: list[str] = []
            retval[key][NameObject("/_States_")] = ArrayObject(states)
            for k in obj.get(FA.Kids, {}):
                k = k.get_object()
                for s in list(k["/AP"]["/N"].keys()):
                    if s not in states:
                        states.append(s)
                retval[key][NameObject("/_States_")] = ArrayObject(states)
            if (
                obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0
                and "/Off" in retval[key]["/_States_"]
            ):
                del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]
        # at last for order
        self._check_kids(field, retval, fileobj, stack)

    def _check_kids(
        self,
        tree: Union[TreeObject, DictionaryObject],
        retval: Any,
        fileobj: Any,
        stack: list[PdfObject],
    ) -> None:
        if tree in stack:
            logger_warning(
                f"{self._get_qualified_field_name(tree)} already parsed", __name__
            )
            return
        stack.append(tree)
        if PagesAttributes.KIDS in tree:
            # recurse down the tree
            for kid in tree[PagesAttributes.KIDS]:  # type: ignore
                kid = kid.get_object()
                self.get_fields(kid, retval, fileobj, stack)

    def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
        field_attributes_tuple = FA.attributes()
        field_attributes_tuple = (
            field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
        )

        for attr in field_attributes_tuple:
            if attr in (
                FA.Kids,
                FA.AA,
            ):
                continue
            attr_name = field_attributes[attr]
            try:
                if attr == FA.FT:
                    # Make the field type value clearer
                    types = {
                        "/Btn": "Button",
                        "/Tx": "Text",
                        "/Ch": "Choice",
                        "/Sig": "Signature",
                    }
                    if field[attr] in types:
                        fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
                elif attr == FA.Parent:
                    # Let's just write the name of the parent
                    try:
                        name = field[attr][FA.TM]
                    except KeyError:
                        name = field[attr][FA.T]
                    fileobj.write(f"{attr_name}: {name}\n")
                else:
                    fileobj.write(f"{attr_name}: {field[attr]}\n")
            except KeyError:
                # Field attribute is N/A or unknown, so don't write anything
                pass

    def get_form_text_fields(self, full_qualified_name: bool = False) -> dict[str, Any]:
        """
        Retrieve form fields from the document with textual data.

        Args:
            full_qualified_name: to get full name

        Returns:
            A dictionary. The key is the name of the form field,
            the value is the content of the field.

            If the document contains multiple form fields with the same name, the
            second and following will get the suffix .2, .3, ...

        """

        def indexed_key(k: str, fields: dict[Any, Any]) -> str:
            if k not in fields:
                return k
            return (
                k
                + "."
                + str(sum(1 for kk in fields if kk.startswith(k + ".")) + 2)
            )

        # Retrieve document form fields
        formfields = self.get_fields()
        if formfields is None:
            return {}
        ff = {}
        for field, value in formfields.items():
            if value.get("/FT") == "/Tx":
                if full_qualified_name:
                    ff[field] = value.get("/V")
                else:
                    ff[indexed_key(cast(str, value["/T"]), ff)] = value.get("/V")
        return ff

    def get_pages_showing_field(
        self, field: Union[Field, PdfObject, IndirectObject]
    ) -> list[PageObject]:
        """
        Provides list of pages where the field is called.

        Args:
            field: Field Object, PdfObject or IndirectObject referencing a Field

        Returns:
            List of pages:
                - Empty list:
                    The field has no widgets attached
                    (either hidden field or ancestor field).
                - Single page list:
                    Page where the widget is present
                    (most common).
                - Multi-page list:
                    Field with multiple kids widgets
                    (example: radio buttons, field repeated on multiple pages).

        """

        def _get_inherited(obj: DictionaryObject, key: str) -> Any:
            if key in obj:
                return obj[key]
            if "/Parent" in obj:
                return _get_inherited(
                    cast(DictionaryObject, obj["/Parent"].get_object()), key
                )
            return None

        try:
            # to cope with all types
            field = cast(DictionaryObject, field.indirect_reference.get_object())  # type: ignore
        except Exception as exc:
            raise ValueError("Field type is invalid") from exc
        if is_null_or_none(_get_inherited(field, "/FT")):
            raise ValueError("Field is not valid")
        ret = []
        if field.get("/Subtype", "") == "/Widget":
            if "/P" in field:
                ret = [field["/P"].get_object()]
            else:
                ret = [
                    p
                    for p in self.pages
                    if field.indirect_reference in p.get("/Annots", "")
                ]
        else:
            kids = field.get("/Kids", ())
            for k in kids:
                k = k.get_object()
                if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k):
                    # Kid that is just a widget, not a field:
                    if "/P" in k:
                        ret += [k["/P"].get_object()]
                    else:
                        ret += [
                            p
                            for p in self.pages
                            if k.indirect_reference in p.get("/Annots", "")
                        ]
        return [
            x
            if isinstance(x, PageObject)
            else (self.pages[self._get_page_number_by_indirect(x.indirect_reference)])  # type: ignore
            for x in ret
        ]

    @property
    def open_destination(
        self,
    ) -> Union[None, Destination, TextStringObject, ByteStringObject]:
        """
        Property to access the opening destination (``/OpenAction`` entry in
        the PDF catalog). It returns ``None`` if the entry does not exist
        or is not set.

        Raises:
            Exception: If a destination is invalid.

        """
        if "/OpenAction" not in self.root_object:
            return None
        oa: Any = self.root_object["/OpenAction"]
        if isinstance(oa, bytes):  # pragma: no cover
            oa = oa.decode()
        if isinstance(oa, str):
            return create_string_object(oa)
        if isinstance(oa, ArrayObject):
            try:
                page, typ, *array = oa
                fit = Fit(typ, tuple(array))
                return Destination("OpenAction", page, fit)
            except Exception as exc:
                raise Exception(f"Invalid Destination {oa}: {exc}")
        else:
            return None

    @open_destination.setter
    def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None:
        raise NotImplementedError("No setter for open_destination")

    @property
    def outline(self) -> OutlineType:
        """
        Read-only property for the outline present in the document
        (i.e., a collection of 'outline items' which are also known as
        'bookmarks').
        """
        return self._get_outline()

    def _get_outline(
        self,
        node: Optional[DictionaryObject] = None,
        outline: Optional[Any] = None,
        visited: Optional[set[int]] = None,
    ) -> OutlineType:
        if outline is None:
            outline = []
            catalog = self.root_object

            # get the outline dictionary and named destinations
            if CO.OUTLINES in catalog:
                lines = cast(DictionaryObject, catalog[CO.OUTLINES])

                if isinstance(lines, NullObject):
                    return outline

                # §12.3.3 Document outline, entries in the outline dictionary
                if not is_null_or_none(lines) and "/First" in lines:
                    node = cast(DictionaryObject, lines["/First"])
            self._named_destinations = self._get_named_destinations()

        if node is None:
            return outline

        # see if there are any more outline items
        if visited is None:
            visited = set()
        while True:
            node_id = id(node)
            if node_id in visited:
                logger_warning(f"Detected cycle in outline structure for {node}", __name__)
                break
            visited.add(node_id)

            outline_obj = self._build_outline_item(node)
            if outline_obj:
                outline.append(outline_obj)

            # check for sub-outline
            if "/First" in node:
                sub_outline: list[Any] = []
                # Pass a copy to allow multiple outer entries to reference the same inner one.
                inner_visited = visited.copy()
                self._get_outline(
                    node=cast(DictionaryObject, node["/First"]),
                    outline=sub_outline,
                    visited=inner_visited,
                )
                if sub_outline:
                    outline.append(sub_outline)

            if "/Next" not in node:
                break
            node = cast(DictionaryObject, node["/Next"])

        return outline

    @property
    def threads(self) -> Optional[ArrayObject]:
        """
        Read-only property for the list of threads.

        See §12.4.3 from the PDF 1.7 or 2.0 specification.

        It is an array of dictionaries with "/F" (the first bead in the thread)
        and "/I" (a thread information dictionary containing information about
        the thread, such as its title, author, and creation date) properties or
        None if there are no articles.

        Since PDF 2.0 it can also contain an indirect reference to a metadata
        stream containing information about the thread, such as its title,
        author, and creation date.
        """
        catalog = self.root_object
        if CO.THREADS in catalog:
            return cast("ArrayObject", catalog[CO.THREADS])
        return None

    @abstractmethod
    def _get_page_number_by_indirect(
        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
    ) -> Optional[int]:
        ...  # pragma: no cover

    def get_page_number(self, page: PageObject) -> Optional[int]:
        """
        Retrieve page number of a given PageObject.

        Args:
            page: The page to get page number. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`

        Returns:
            The page number or None if page is not found

        """
        return self._get_page_number_by_indirect(page.indirect_reference)

    def get_destination_page_number(self, destination: Destination) -> Optional[int]:
        """
        Retrieve page number of a given Destination object.

        Args:
            destination: The destination to get page number.

        Returns:
            The page number or None if page is not found

        """
        return self._get_page_number_by_indirect(destination.page)

    def _build_destination(
        self,
        title: Union[str, bytes],
        array: Optional[
            list[
                Union[NumberObject, IndirectObject, None, NullObject, DictionaryObject]
            ]
        ],
    ) -> Destination:
        page, typ = None, None
        # handle outline items with missing or invalid destination
        if (
            isinstance(array, (NullObject, str))
            or (isinstance(array, ArrayObject) and len(array) == 0)
            or array is None
        ):
            page = NullObject()
            return Destination(title, page, Fit.fit())
        page, typ, *array = array  # type: ignore
        try:
            return Destination(title, page, Fit(fit_type=typ, fit_args=array))  # type: ignore
        except PdfReadError:
            logger_warning(f"Unknown destination: {title!r} {array}", __name__)
            if self.strict:
                raise
            # create a link to first Page
            tmp = self.pages[0].indirect_reference
            indirect_reference = NullObject() if tmp is None else tmp
            return Destination(title, indirect_reference, Fit.fit())

    def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]:
        dest, title, outline_item = None, None, None

        # title required for valid outline
        # §12.3.3, entries in an outline item dictionary
        try:
            title = cast("str", node["/Title"])
        except KeyError:
            if self.strict:
                raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}")
            title = ""

        if "/A" in node:
            # Action, PDF 1.7 and PDF 2.0 §12.6 (only type GoTo supported)
            action = cast(DictionaryObject, node["/A"])
            action_type = cast(NameObject, action[GoToActionArguments.S])
            if action_type == "/GoTo":
                if GoToActionArguments.D in action:
                    dest = action[GoToActionArguments.D]
                elif self.strict:
                    raise PdfReadError(f"Outline Action Missing /D attribute: {node!r}")
        elif "/Dest" in node:
            # Destination, PDF 1.7 and PDF 2.0 §12.3.2
            dest = node["/Dest"]
            # if array was referenced in another object, will be a dict w/ key "/D"
            if isinstance(dest, DictionaryObject) and "/D" in dest:
                dest = dest["/D"]

        if isinstance(dest, ArrayObject):
            outline_item = self._build_destination(title, dest)
        elif isinstance(dest, str):
            # named destination, addresses NameObject Issue #193
            # TODO: Keep named destination instead of replacing it?
            try:
                outline_item = self._build_destination(
                    title, self._named_destinations[dest].dest_array
                )
            except KeyError:
                # named destination not found in Name Dict
                outline_item = self._build_destination(title, None)
        elif dest is None:
            # outline item not required to have destination or action
            # PDFv1.7 Table 153
            outline_item = self._build_destination(title, dest)
        else:
            if self.strict:
                raise PdfReadError(f"Unexpected destination {dest!r}")
            logger_warning(
                f"Removed unexpected destination {dest!r} from destination",
                __name__,
            )
            outline_item = self._build_destination(title, None)

        # if outline item created, add color, format, and child count if present
        if outline_item:
            if "/C" in node:
                # Color of outline item font in (R, G, B) with values ranging 0.0-1.0
                outline_item[NameObject("/C")] = ArrayObject(FloatObject(c) for c in node["/C"])  # type: ignore
            if "/F" in node:
                # specifies style characteristics bold and/or italic
                # with 1=italic, 2=bold, 3=both
                outline_item[NameObject("/F")] = node["/F"]
            if "/Count" in node:
                # absolute value = num. visible children
                # with positive = open/unfolded, negative = closed/folded
                outline_item[NameObject("/Count")] = node["/Count"]
            #  if count is 0 we will consider it as open (to have available is_open)
            outline_item[NameObject("/%is_open%")] = BooleanObject(
                node.get("/Count", 0) >= 0
            )
        outline_item.node = node
        try:
            outline_item.indirect_reference = node.indirect_reference
        except AttributeError:
            pass
        return outline_item

    @property
    def pages(self) -> list[PageObject]:
        """
        Property that emulates a list of :class:`PageObject<pypdf._page.PageObject>`.
        This property allows to get a page or a range of pages.

        Note:
            For PdfWriter only: Provides the capability to remove a page/range of
            page from the list (using the del operator). Remember: Only the page
            entry is removed, as the objects beneath can be used elsewhere. A
            solution to completely remove them - if they are not used anywhere - is
            to write to a buffer/temporary file and then load it into a new
            PdfWriter.

        """
        return _VirtualList(self.get_num_pages, self.get_page)  # type: ignore

    @property
    def page_labels(self) -> list[str]:
        """
        A list of labels for the pages in this document.

        This property is read-only. The labels are in the order that the pages
        appear in the document.
        """
        return [page_index2page_label(self, i) for i in range(len(self.pages))]

    @property
    def page_layout(self) -> Optional[str]:
        """
        Get the page layout currently being used.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        """
        try:
            return cast(NameObject, self.root_object[CD.PAGE_LAYOUT])
        except KeyError:
            return None

    @property
    def page_mode(self) -> Optional[PagemodeType]:
        """
        Get the page mode currently being used.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        """
        try:
            return self.root_object["/PageMode"]  # type: ignore
        except KeyError:
            return None

    def _flatten(
        self,
        list_only: bool = False,
        pages: Union[None, DictionaryObject, PageObject] = None,
        inherit: Optional[dict[str, Any]] = None,
        indirect_reference: Optional[IndirectObject] = None,
    ) -> None:
        """
        Process the document pages to ease searching.

        Attributes of a page may inherit from ancestor nodes
        in the page tree. Flattening means moving
        any inheritance data into descendant nodes,
        effectively removing the inheritance dependency.

        Note: It is distinct from another use of "flattening" applied to PDFs.
        Flattening a PDF also means combining all the contents into one single layer
        and making the file less editable.

        Args:
            list_only: Will only list the pages within _flatten_pages.
            pages:
            inherit:
            indirect_reference: Used recursively to flatten the /Pages object.

        """
        inheritable_page_attributes = (
            NameObject(PG.RESOURCES),
            NameObject(PG.MEDIABOX),
            NameObject(PG.CROPBOX),
            NameObject(PG.ROTATE),
        )
        if inherit is None:
            inherit = {}
        if pages is None:
            # Fix issue 327: set flattened_pages attribute only for
            # decrypted file
            catalog = self.root_object
            pages = catalog.get("/Pages").get_object()  # type: ignore
            if not isinstance(pages, DictionaryObject):
                raise PdfReadError("Invalid object in /Pages")
            self.flattened_pages = []

        if PagesAttributes.TYPE in pages:
            t = cast(str, pages[PagesAttributes.TYPE])
        # if the page tree node has no /Type, consider as a page if /Kids is also missing
        elif PagesAttributes.KIDS not in pages:
            t = "/Page"
        else:
            t = "/Pages"

        if t == "/Pages":
            for attr in inheritable_page_attributes:
                if attr in pages:
                    inherit[attr] = pages[attr]
            pages_reference = getattr(pages, "indirect_reference", object())
            for page in cast(ArrayObject, pages[PagesAttributes.KIDS]):
                if getattr(page, "indirect_reference", object()) == pages_reference:
                    raise PdfReadError("Detected cyclic page references.")

                addt = {}
                if isinstance(page, IndirectObject):
                    addt["indirect_reference"] = page
                obj = page.get_object()
                if obj:
                    # damaged file may have invalid child in /Pages
                    try:
                        self._flatten(list_only, obj, inherit, **addt)
                    except RecursionError:
                        raise PdfReadError(
                            "Maximum recursion depth reached during page flattening."
                        )
        elif t == "/Page":
            for attr_in, value in inherit.items():
                # if the page has its own value, it does not inherit the
                # parent's value
                if attr_in not in pages:
                    pages[attr_in] = value
            page_obj = PageObject(self, indirect_reference)
            if not list_only:
                page_obj.update(pages)

            # TODO: Could flattened_pages be None at this point?
            self.flattened_pages.append(page_obj)  # type: ignore

    def remove_page(
        self,
        page: Union[int, PageObject, IndirectObject],
        clean: bool = False,
    ) -> None:
        """
        Remove page from pages list.

        Args:
            page:
                * :class:`int`: Page number to be removed.
                * :class:`~pypdf._page.PageObject`: page to be removed. If the page appears many times
                  only the first one will be removed.
                * :class:`~pypdf.generic.IndirectObject`: Reference to page to be removed.

            clean: replace PageObject with NullObject to prevent annotations
                or destinations to reference a detached page.

        """
        if self.flattened_pages is None:
            self._flatten(self._readonly)
        assert self.flattened_pages is not None
        if isinstance(page, IndirectObject):
            p = page.get_object()
            if not isinstance(p, PageObject):
                logger_warning("IndirectObject is not referencing a page", __name__)
                return
            page = p

        if not isinstance(page, int):
            try:
                page = self.flattened_pages.index(page)
            except ValueError:
                logger_warning("Cannot find page in pages", __name__)
                return
        if not (0 <= page < len(self.flattened_pages)):
            logger_warning("Page number is out of range", __name__)
            return

        ind = self.pages[page].indirect_reference
        del self.pages[page]
        if clean and ind is not None:
            self._replace_object(ind, NullObject())

    def _get_indirect_object(self, num: int, gen: int) -> Optional[PdfObject]:
        """
        Used to ease development.

        This is equivalent to generic.IndirectObject(num,gen,self).get_object()

        Args:
            num: The object number of the indirect object.
            gen: The generation number of the indirect object.

        Returns:
            A PdfObject

        """
        return IndirectObject(num, gen, self).get_object()

    def decode_permissions(
        self, permissions_code: int
    ) -> dict[str, bool]:  # pragma: no cover
        """Take the permissions as an integer, return the allowed access."""
        deprecation_with_replacement(
            old_name="decode_permissions",
            new_name="user_access_permissions",
            removed_in="5.0.0",
        )

        permissions_mapping = {
            "print": UserAccessPermissions.PRINT,
            "modify": UserAccessPermissions.MODIFY,
            "copy": UserAccessPermissions.EXTRACT,
            "annotations": UserAccessPermissions.ADD_OR_MODIFY,
            "forms": UserAccessPermissions.FILL_FORM_FIELDS,
            # Do not fix typo, as part of official, but deprecated API.
            "accessability": UserAccessPermissions.EXTRACT_TEXT_AND_GRAPHICS,
            "assemble": UserAccessPermissions.ASSEMBLE_DOC,
            "print_high_quality": UserAccessPermissions.PRINT_TO_REPRESENTATION,
        }

        return {
            key: permissions_code & flag != 0
            for key, flag in permissions_mapping.items()
        }

    @property
    def user_access_permissions(self) -> Optional[UserAccessPermissions]:
        """
        Get the user access permissions for encrypted documents.
        Returns None if not encrypted.

        .. warning::

            For AES-256 encrypted documents (R=5/R=6), the returned
            permissions are derived from the ``/P`` field, which is
            only trustworthy if the ``/Perms`` integrity check passed.
            Check :attr:`are_permissions_valid` to verify.
        """
        if self._encryption is None:
            return None
        return UserAccessPermissions(self._encryption.P)

    @property
    def are_permissions_valid(self) -> Optional[bool]:
        """
        Whether the ``/Perms`` integrity check passed for this document.

        For AES-256 encrypted documents (R=5/R=6), the ``/Perms`` field
        is an encrypted copy of the permissions that can be verified
        independently. Returns ``False`` if this check fails (the ``/P``
        permissions may have been tampered with).

        Returns ``None`` if the document is not encrypted or has not yet
        been decrypted via :meth:`decrypt()<pypdf.PdfReader.decrypt>`.
        Returns ``True`` for non-AES-256 encryption (no ``/Perms`` to check).
        """
        if self._encryption is None:
            return None
        if not self._encryption.is_decrypted():
            return None
        return self._encryption._are_permissions_valid

    @property
    @abstractmethod
    def is_encrypted(self) -> bool:
        """
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        """
        ...  # pragma: no cover

    @property
    def xfa(self) -> Optional[dict[str, Any]]:
        retval: dict[str, Any] = {}
        catalog = self.root_object

        if "/AcroForm" not in catalog or not catalog["/AcroForm"]:
            return None

        tree = cast(TreeObject, catalog["/AcroForm"])

        if "/XFA" in tree:
            fields = cast(ArrayObject, tree["/XFA"])
            i = iter(fields)
            for f in i:
                tag = f
                f = next(i)
                if isinstance(f, IndirectObject):
                    field = cast(Optional[EncodedStreamObject], f.get_object())
                    if field:
                        es = _decompress_with_limit(field._data)
                        retval[tag] = es
        return retval

    @property
    def attachments(self) -> Mapping[str, list[bytes]]:
        """Mapping of attachment filenames to their content."""
        return LazyDict(
            {
                name: (self._get_attachment_list, name)
                for name in self._list_attachments()
            }
        )

    @property
    def attachment_list(self) -> Generator[EmbeddedFile, None, None]:
        """Iterable of attachment objects."""
        yield from EmbeddedFile._load(self.root_object)

    def _list_attachments(self) -> list[str]:
        """
        Retrieves the list of filenames of file attachments.

        Returns:
            list of filenames

        """
        names = []
        for entry in self.attachment_list:
            names.append(entry.name)
            if (name := entry.alternative_name) != entry.name and name:
                names.append(name)
        return names

    def _get_attachment_list(self, name: str) -> list[bytes]:
        out = self._get_attachments(name)[name]
        if isinstance(out, list):
            return out
        return [out]

    def _get_attachments(
        self, filename: Optional[str] = None
    ) -> dict[str, Union[bytes, list[bytes]]]:
        """
        Retrieves all or selected file attachments of the PDF as a dictionary of file names
        and the file data as a bytestring.

        Args:
            filename: If filename is None, then a dictionary of all attachments
                will be returned, where the key is the filename and the value
                is the content. Otherwise, a dictionary with just a single key
                - the filename - and its content will be returned.

        Returns:
            dictionary of filename -> Union[bytestring or List[ByteString]]
            If the filename exists multiple times a list of the different versions will be provided.

        """
        attachments: dict[str, Union[bytes, list[bytes]]] = {}
        for entry in self.attachment_list:
            names = set()
            alternative_name = entry.alternative_name
            if filename is not None:
                if filename in {entry.name, alternative_name}:
                    name = entry.name if filename == entry.name else alternative_name
                    names.add(name)
                else:
                    continue
            else:
                names = {entry.name, alternative_name}

            for name in names:
                if name is None:
                    continue
                if name in attachments:
                    if not isinstance(attachments[name], list):
                        attachments[name] = [attachments[name]]  # type:ignore
                    attachments[name].append(entry.content)  # type:ignore
                else:
                    attachments[name] = entry.content
        return attachments

    @abstractmethod
    def _repr_mimebundle_(
        self,
        include: Union[None, Iterable[str]] = None,
        exclude: Union[None, Iterable[str]] = None,
    ) -> dict[str, Any]:
        """
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to its
        representation.

        .. seealso::

            https://ipython.readthedocs.io/en/stable/config/integrating.html
        """
        ...  # pragma: no cover


class LazyDict(Mapping[Any, Any]):
    def __init__(self, *args: Any, **kwargs: Any) -> None:
        self._raw_dict = dict(*args, **kwargs)

    def __getitem__(self, key: str) -> Any:
        func, arg = self._raw_dict.__getitem__(key)
        return func(arg)

    def __iter__(self) -> Iterator[Any]:
        return iter(self._raw_dict)

    def __len__(self) -> int:
        return len(self._raw_dict)

    def __str__(self) -> str:
        return f"LazyDict(keys={list(self.keys())})"


================================================
FILE: pypdf/_encryption.py
================================================
# Copyright (c) 2022, exiledkingcc
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import hashlib
import secrets
import struct
from enum import Enum, IntEnum
from typing import Any, Optional, Union, cast

from pypdf._crypt_providers import (
    CryptAES,
    CryptBase,
    CryptIdentity,
    CryptRC4,
    aes_cbc_decrypt,
    aes_cbc_encrypt,
    aes_ecb_decrypt,
    aes_ecb_encrypt,
    rc4_decrypt,
    rc4_encrypt,
)

from ._utils import logger_warning
from .generic import (
    ArrayObject,
    ByteStringObject,
    DictionaryObject,
    NameObject,
    NumberObject,
    PdfObject,
    StreamObject,
    TextStringObject,
    create_string_object,
)


class CryptFilter:
    def __init__(
        self,
        stm_crypt: CryptBase,
        str_crypt: CryptBase,
        ef_crypt: CryptBase,
    ) -> None:
        self.stm_crypt = stm_crypt
        self.str_crypt = str_crypt
        self.ef_crypt = ef_crypt

    def encrypt_object(self, obj: PdfObject) -> PdfObject:
        if isinstance(obj, ByteStringObject):
            data = self.str_crypt.encrypt(obj.original_bytes)
            obj = ByteStringObject(data)
        elif isinstance(obj, TextStringObject):
            data = self.str_crypt.encrypt(obj.get_encoded_bytes())
            obj = ByteStringObject(data)
        elif isinstance(obj, StreamObject):
            obj2 = StreamObject()
            obj2.update(obj)
            obj2.set_data(self.stm_crypt.encrypt(obj._data))
            for key, value in obj.items():  # Dont forget the Stream dict.
                obj2[key] = self.encrypt_object(value)
            obj = obj2
        elif isinstance(obj, DictionaryObject):
            obj2 = DictionaryObject()  # type: ignore
            for key, value in obj.items():
                obj2[key] = self.encrypt_object(value)
            obj = obj2
        elif isinstance(obj, ArrayObject):
            obj = ArrayObject(self.encrypt_object(x) for x in obj)
        return obj

    def decrypt_object(self, obj: PdfObject) -> PdfObject:
        if isinstance(obj, (ByteStringObject, TextStringObject)):
            data = self.str_crypt.decrypt(obj.original_bytes)
            obj = create_string_object(data)
        elif isinstance(obj, StreamObject):
            obj._data = self.stm_crypt.decrypt(obj._data)
            for key, value in obj.items():  # Dont forget the Stream dict.
                obj[key] = self.decrypt_object(value)
        elif isinstance(obj, DictionaryObject):
            for key, value in obj.items():
                obj[key] = self.decrypt_object(value)
        elif isinstance(obj, ArrayObject):
            for i in range(len(obj)):
                obj[i] = self.decrypt_object(obj[i])
        return obj


_PADDING = (
    b"\x28\xbf\x4e\x5e\x4e\x75\x8a\x41\x64\x00\x4e\x56\xff\xfa\x01\x08"
    b"\x2e\x2e\x00\xb6\xd0\x68\x3e\x80\x2f\x0c\xa9\xfe\x64\x53\x69\x7a"
)


def _padding(data: bytes) -> bytes:
    return (data + _PADDING)[:32]


class AlgV4:
    @staticmethod
    def compute_key(
        password: bytes,
        rev: int,
        key_size: int,
        o_entry: bytes,
        P: int,
        id1_entry: bytes,
        metadata_encrypted: bool,
    ) -> bytes:
        """
        Algorithm 2: Computing an encryption key.

        a) Pad or truncate the password string to exactly 32 bytes. If the
           password string is more than 32 bytes long,
           use only its first 32 bytes; if it is less than 32 bytes long, pad it
           by appending the required number of
           additional bytes from the beginning of the following padding string:
                < 28 BF 4E 5E 4E 75 8A 41 64 00 4E 56 FF FA 01 08
                2E 2E 00 B6 D0 68 3E 80 2F 0C A9 FE 64 53 69 7A >
           That is, if the password string is n bytes long, append
           the first 32 - n bytes of the padding string to the end
           of the password string. If the password string is empty
           (zero-length), meaning there is no user password,
           substitute the entire padding string in its place.

        b) Initialize the MD5 hash function and pass the result of step (a)
           as input to this function.
        c) Pass the value of the encryption dictionary’s O entry to the
           MD5 hash function. ("Algorithm 3: Computing
           the encryption dictionary’s O (owner password) value" shows how the
           O value is computed.)
        d) Convert the integer value of the P entry to a 32-bit unsigned binary
           number and pass these bytes to the
           MD5 hash function, low-order byte first.
        e) Pass the first element of the file’s file identifier array (the value
           of the ID entry in the document’s trailer
           dictionary; see Table 15) to the MD5 hash function.
        f) (Security handlers of revision 4 or greater) If document metadata is
           not being encrypted, pass 4 bytes with
           the value 0xFFFFFFFF to the MD5 hash function.
        g) Finish the hash.
        h) (Security handlers of revision 3 or greater) Do the following
           50 times: Take the output from the previous
           MD5 hash and pass the first n bytes of the output as input into a new
           MD5 hash, where n is the number of
           bytes of the encryption key as defined by the value of the encryption
           dictionary’s Length entry.
        i) Set the encryption key to the first n bytes of the output from the
           final MD5 hash, where n shall always be 5
           for security handlers of revision 2 but, for security handlers of
           revision 3 or greater, shall depend on the
           value of the encryption dictionary’s Length entry.

        Args:
            password: The encryption secret as a bytes-string
            rev: The encryption revision (see PDF standard)
            key_size: The size of the key in bytes
            o_entry: The owner entry
            P: A set of flags specifying which operations shall be permitted
                when the document is opened with user access. If bit 2 is set to 1,
                all other bits are ignored and all operations are permitted.
                If bit 2 is set to 0, permission for operations are based on the
                values of the remaining flags defined in Table 24.
            id1_entry:
            metadata_encrypted: A boolean indicating if the metadata is encrypted.

        Returns:
            The u_hash digest of length key_size

        """
        a = _padding(password)
        u_hash = hashlib.md5(a)
        u_hash.update(o_entry)
        u_hash.update(struct.pack("<I", P))
        u_hash.update(id1_entry)
        if rev >= 4 and not metadata_encrypted:
            u_hash.update(b"\xff\xff\xff\xff")
        u_hash_digest = u_hash.digest()
        length = key_size // 8
        if rev >= 3:
            for _ in range(50):
                u_hash_digest = hashlib.md5(u_hash_digest[:length]).digest()
        return u_hash_digest[:length]

    @staticmethod
    def compute_O_value_key(owner_password: bytes, rev: int, key_size: int) -> bytes:
        """
        Algorithm 3: Computing the encryption dictionary’s O (owner password) value.

        a) Pad or truncate the owner password string as described in step (a)
           of "Algorithm 2: Computing an encryption key".
           If there is no owner password, use the user password instead.
        b) Initialize the MD5 hash function and pass the result of step (a) as
           input to this function.
        c) (Security handlers of revision 3 or greater) Do the following 50 times:
           Take the output from the previous
           MD5 hash and pass it as input into a new MD5 hash.
        d) Create an RC4 encryption key using the first n bytes of the output
           from the final MD5 hash, where n shall
           always be 5 for security handlers of revision 2 but, for security
           handlers of revision 3 or greater, shall
           depend on the value of the encryption dictionary’s Length entry.
        e) Pad or truncate the user password string as described in step (a) of
           "Algorithm 2: Computing an encryption key".
        f) Encrypt the result of step (e), using an RC4 encryption function with
           the encryption key obtained in step (d).
        g) (Security handlers of revision 3 or greater) Do the following 19 times:
           Take the output from the previous
           invocation of the RC4 function and pass it as input to a new
           invocation of the function; use an encryption
           key generated by taking each byte of the encryption key obtained in
           step (d) and performing an XOR
           (exclusive or) operation between that byte and the single-byte value
           of the iteration counter (from 1 to 19).
        h) Store the output from the final invocation of the RC4 function as
           the value of the O entry in the encryption dictionary.

        Args:
            owner_password:
            rev: The encryption revision (see PDF standard)
            key_size: The size of the key in bytes

        Returns:
            The RC4 key

        """
        a = _padding(owner_password)
        o_hash_digest = hashlib.md5(a).digest()

        if rev >= 3:
            for _ in range(50):
                o_hash_digest = hashlib.md5(o_hash_digest).digest()

        return o_hash_digest[: key_size // 8]

    @staticmethod
    def compute_O_value(rc4_key: bytes, user_password: bytes, rev: int) -> bytes:
        """
        See :func:`compute_O_value_key`.

        Args:
            rc4_key:
            user_password:
            rev: The encryption revision (see PDF standard)

        Returns:
            The RC4 encrypted

        """
        a = _padding(user_password)
        rc4_enc = rc4_encrypt(rc4_key, a)
        if rev >= 3:
            for i in range(1, 20):
                key = bytes(x ^ i for x in rc4_key)
                rc4_enc = rc4_encrypt(key, rc4_enc)
        return rc4_enc

    @staticmethod
    def compute_U_value(key: bytes, rev: int, id1_entry: bytes) -> bytes:
        """
        Algorithm 4: Computing the encryption dictionary’s U (user password) value.

        (Security handlers of revision 2)

        a) Create an encryption key based on the user password string, as
           described in "Algorithm 2: Computing an encryption key".
        b) Encrypt the 32-byte padding string shown in step (a) of
           "Algorithm 2: Computing an encryption key", using an RC4 encryption
           function with the encryption key from the preceding step.
        c) Store the result of step (b) as the value of the U entry in the
           encryption dictionary.

        Args:
            key:
            rev: The encryption revision (see PDF standard)
            id1_entry:

        Returns:
            The value

        """
        if rev <= 2:
            return rc4_encrypt(key, _PADDING)

        """
        Algorithm 5: Computing the encryption dictionary’s U (user password) value.

        (Security handlers of revision 3 or greater)

        a) Create an encryption key based on the user password string, as
           described in "Algorithm 2: Computing an encryption key".
        b) Initialize the MD5 hash function and pass the 32-byte padding string
           shown in step (a) of "Algorithm 2:
           Computing an encryption key" as input to this function.
        c) Pass the first element of the file’s file identifier array (the value
           of the ID entry in the document’s trailer
           dictionary; see Table 15) to the hash function and finish the hash.
        d) Encrypt the 16-byte result of the hash, using an RC4 encryption
           function with the encryption key from step (a).
        e) Do the following 19 times: Take the output from the previous
           invocation of the RC4 function and pass it as input to a new
           invocation of the function; use an encryption key generated by
           taking each byte of the original encryption key obtained in
           step (a) and performing an XOR (exclusive or) operation between that
           byte and the single-byte value of the iteration counter (from 1 to 19).
        f) Append 16 bytes of arbitrary padding to the output from the final
           invocation of the RC4 function and store the 32-byte result as the
           value of the U entry in the encryption dictionary.
        """
        u_hash = hashlib.md5(_PADDING)
        u_hash.update(id1_entry)
        rc4_enc = rc4_encrypt(key, u_hash.digest())
        for i in range(1, 20):
            rc4_key = bytes(x ^ i for x in key)
            rc4_enc = rc4_encrypt(rc4_key, rc4_enc)
        return _padding(rc4_enc)

    @staticmethod
    def verify_user_password(
        user_password: bytes,
        rev: int,
        key_size: int,
        o_entry: bytes,
        u_entry: bytes,
        P: int,
        id1_entry: bytes,
        metadata_encrypted: bool,
    ) -> bytes:
        """
        Algorithm 6: Authenticating the user password.

        a) Perform all but the last step of "Algorithm 4: Computing the
           encryption dictionary’s U (user password) value (Security handlers of
           revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U
           (user password) value (Security handlers of revision 3 or greater)"
           using the supplied password string.
        b) If the result of step (a) is equal to the value of the encryption
           dictionary’s U entry (comparing on the first 16 bytes in the case of
           security handlers of revision 3 or greater), the password supplied is
           the correct user password. The key obtained in step (a) (that is, in
           the first step of "Algorithm 4: Computing the encryption
           dictionary’s U (user password) value
           (Security handlers of revision 2)" or
           "Algorithm 5: Computing the encryption dictionary’s U (user password)
           value (Security handlers of revision 3 or greater)") shall be used
           to decrypt the document.

        Args:
            user_password: The user password as a bytes stream
            rev: The encryption revision (see PDF standard)
            key_size: The size of the key in bytes
            o_entry: The owner entry
            u_entry: The user entry
            P: A set of flags specifying which operations shall be permitted
                when the document is opened with user access. If bit 2 is set to 1,
                all other bits are ignored and all operations are permitted.
                If bit 2 is set to 0, permission for operations are based on the
                values of the remaining flags defined in Table 24.
            id1_entry:
            metadata_encrypted: A boolean indicating if the metadata is encrypted.

        Returns:
            The key

        """
        key = AlgV4.compute_key(
            user_password, rev, key_size, o_entry, P, id1_entry, metadata_encrypted
        )
        u_value = AlgV4.compute_U_value(key, rev, id1_entry)
        if rev >= 3:
            u_value = u_value[:16]
            u_entry = u_entry[:16]
        if u_value != u_entry:
            key = b""
        return key

    @staticmethod
    def verify_owner_password(
        owner_password: bytes,
        rev: int,
        key_size: int,
        o_entry: bytes,
        u_entry: bytes,
        P: int,
        id1_entry: bytes,
        metadata_encrypted: bool,
    ) -> bytes:
        """
        Algorithm 7: Authenticating the owner password.

        a) Compute an encryption key from the supplied password string, as
           described in steps (a) to (d) of
           "Algorithm 3: Computing the encryption dictionary’s O (owner password)
           value".
        b) (Security handlers of revision 2 only) Decrypt the value of the
           encryption dictionary’s O entry, using an RC4
           encryption function with the encryption key computed in step (a).
           (Security handlers of revision 3 or greater) Do the following 20 times:
           Decrypt the value of the encryption dictionary’s O entry (first iteration)
           or the output from the previous iteration (all subsequent iterations),
           using an RC4 encryption function with a different encryption key at
           each iteration. The key shall be generated by taking the original key
           (obtained in step (a)) and performing an XOR (exclusive or) operation
           between each byte of the key and the single-byte value of the
           iteration counter (from 19 to 0).
        c) The result of step (b) purports to be the user password.
           Authenticate this user password using
           "Algorithm 6: Authenticating the user password".
           If it is correct, the password supplied is the correct owner password.

        Args:
            owner_password:
            rev: The encryption revision (see PDF standard)
            key_size: The size of the key in bytes
            o_entry: The owner entry
            u_entry: The user entry
            P: A set of flags specifying which operations shall be permitted
                when the document is opened with user access. If bit 2 is set to 1,
                all other bits are ignored and all operations are permitted.
                If bit 2 is set to 0, permission for operations are based on the
                values of the remaining flags defined in Table 24.
            id1_entry:
            metadata_encrypted: A boolean indicating if the metadata is encrypted.

        Returns:
            bytes

        """
        rc4_key = AlgV4.compute_O_value_key(owner_password, rev, key_size)

        if rev <= 2:
            user_password = rc4_decrypt(rc4_key, o_entry)
        else:
            user_password = o_entry
            for i in range(19, -1, -1):
                key = bytes(x ^ i for x in rc4_key)
                user_password = rc4_decrypt(key, user_password)
        return AlgV4.verify_user_password(
            user_password,
            rev,
            key_size,
            o_entry,
            u_entry,
            P,
            id1_entry,
            metadata_encrypted,
        )


class AlgV5:
    @staticmethod
    def verify_owner_password(
        R: int, password: bytes, o_value: bytes, oe_value: bytes, u_value: bytes
    ) -> bytes:
        """
        Algorithm 3.2a Computing an encryption key.

        To understand the algorithm below, it is necessary to treat the O and U
        strings in the Encrypt dictionary as made up of three sections.
        The first 32 bytes are a hash value (explained below). The next 8 bytes
        are called the Validation Salt. The final 8 bytes are called the Key Salt.

        1. The password string is generated from Unicode input by processing the
           input string with the SASLprep (IETF RFC 4013) profile of
           stringprep (IETF RFC 3454), and then converting to a UTF-8
           representation.
        2. Truncate the UTF-8 representation to 127 bytes if it is longer than
           127 bytes.
        3. Test the password against the owner key by computing the SHA-256 hash
           of the UTF-8 password concatenated with the 8 bytes of owner
           Validation Salt, concatenated with the 48-byte U string. If the
           32-byte result matches the first 32 bytes of the O string, this is
           the owner password.
           Compute an intermediate owner key by computing the SHA-256 hash of
           the UTF-8 password concatenated with the 8 bytes of owner Key Salt,
           concatenated with the 48-byte U string. The 32-byte result is the
           key used to decrypt the 32-byte OE string using AES-256 in CBC mode
           with no padding and an initialization vector of zero.
           The 32-byte result is the file encryption key.
        4. Test the password against the user key by computing the SHA-256 hash
           of the UTF-8 password concatenated with the 8 bytes of user
           Validation Salt. If the 32 byte result matches the first 32 bytes of
           the U string, this is the user password.
           Compute an intermediate user key by computing the SHA-256 hash of the
           UTF-8 password concatenated with the 8 bytes of user Key Salt.
           The 32-byte result is the key used to decrypt the 32-byte
           UE string using AES-256 in CBC mode with no padding and an
           initialization vector of zero. The 32-byte result is the file
           encryption key.
        5. Decrypt the 16-byte Perms string using AES-256 in ECB mode with an
           initialization vector of zero and the file encryption key as the key.
           Verify that bytes 9-11 of the result are the characters ‘a’, ‘d’, ‘b’.
           Bytes 0-3 of the decrypted Perms entry, treated as a little-endian
           integer, are the user permissions.
           They should match the value in the P key.

        Args:
            R: A number specifying which revision of the standard security
                handler shall be used to interpret this dictionary
            password: The owner password
            o_value: A 32-byte string, based on both the owner and user passwords,
                that shall be used in computing the encryption key and in
                determining whether a valid owner password was entered
            oe_value:
            u_value: A 32-byte string, based on the user password, that shall be
                used in determining whether to prompt the user for a password and,
                if so, whether a valid user or owner password was entered.

        Returns:
            The key

        """
        password = password[:127]
        if (
            AlgV5.calculate_hash(R, password, o_value[32:40], u_value[:48])
            != o_value[:32]
        ):
            return b""
        iv = bytes(0 for _ in range(16))
        tmp_key = AlgV5.calculate_hash(R, password, o_value[40:48], u_value[:48])
        return aes_cbc_decrypt(tmp_key, iv, oe_value)

    @staticmethod
    def verify_user_password(
        R: int, password: bytes, u_value: bytes, ue_value: bytes
    ) -> bytes:
        """
        See :func:`verify_owner_password`.

        Args:
            R: A number specifying which revision of the standard security
                handler shall be used to interpret this dictionary
            password: The user password
            u_value: A 32-byte string, based on the user password, that shall be
                used in determining whether to prompt the user for a password
                and, if so, whether a valid user or owner password was entered.
            ue_value:

        Returns:
            bytes

        """
        password = password[:127]
        if AlgV5.calculate_hash(R, password, u_value[32:40], b"") != u_value[:32]:
            return b""
        iv = bytes(0 for _ in range(16))
        tmp_key = AlgV5.calculate_hash(R, password, u_value[40:48], b"")
        return aes_cbc_decrypt(tmp_key, iv, ue_value)

    @staticmethod
    def calculate_hash(R: int, password: bytes, salt: bytes, udata: bytes) -> bytes:
        # https://github.com/qpdf/qpdf/blob/main/libqpdf/QPDF_encryption.cc
        k = hashlib.sha256(password + salt + udata).digest()
        if R < 6:
            return k
        count = 0
        while True:
            count += 1
            k1 = password + k + udata
            e = aes_cbc_encrypt(k[:16], k[16:32], k1 * 64)
            hash_fn = (
                hashlib.sha256,
                hashlib.sha384,
                hashlib.sha512,
            )[sum(e[:16]) % 3]
            k = hash_fn(e).digest()
            if count >= 64 and e[-1] <= count - 32:
                break
        return k[:32]

    @staticmethod
    def verify_perms(
        key: bytes, perms: bytes, p: int, metadata_encrypted: bool
    ) -> bool:
        """
        See :func:`verify_owner_password` and :func:`compute_perms_value`.

        Args:
            key: The owner password
            perms:
            p: A set of flags specifying which operations shall be permitted
                when the document is opened with user access.
                If bit 2 is set to 1, all other bits are ignored and all
                operations are permitted.
                If bit 2 is set to 0, permission for operations are based on
                the values of the remaining flags defined in Table 24.
            metadata_encrypted:

        Returns:
            A boolean

        """
        b8 = b"T" if metadata_encrypted else b"F"
        p1 = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb"
        p2 = aes_ecb_decrypt(key, perms)
        return p1 == p2[:12]

    @staticmethod
    def generate_values(
        R: int,
        user_password: bytes,
        owner_password: bytes,
        key: bytes,
        p: int,
        metadata_encrypted: bool,
    ) -> dict[Any, Any]:
        user_password = user_password[:127]
        owner_password = owner_password[:127]
        u_value, ue_value = AlgV5.compute_U_value(R, user_password, key)
        o_value, oe_value = AlgV5.compute_O_value(R, owner_password, key, u_value)
        perms = AlgV5.compute_Perms_value(key, p, metadata_encrypted)
        return {
            "/U": u_value,
            "/UE": ue_value,
            "/O": o_value,
            "/OE": oe_value,
            "/Perms": perms,
        }

    @staticmethod
    def compute_U_value(R: int, password: bytes, key: bytes) -> tuple[bytes, bytes]:
        """
        Algorithm 3.8 Computing the encryption dictionary’s U (user password)
        and UE (user encryption key) values.

        1. Generate 16 random bytes of data using a strong random number generator.
           The first 8 bytes are the User Validation Salt. The second 8 bytes
           are the User Key Salt. Compute the 32-byte SHA-256 hash of the
           password concatenated with the User Validation Salt. The 48-byte
           string consisting of the 32-byte hash followed by the User
           Validation Salt followed by the User Key Salt is stored as the U key.
        2. Compute the 32-byte SHA-256 hash of the password concatenated with
           the User Key Salt. Using this hash as the key, encrypt the file
           encryption key using AES-256 in CBC mode with no padding and an
           initialization vector of zero. The resulting 32-byte string is stored
           as the UE key.

        Args:
            R:
            password:
            key:

        Returns:
            A tuple (u-value, ue value)

        """
        random_bytes = secrets.token_bytes(16)
        val_salt = random_bytes[:8]
        key_salt = random_bytes[8:]
        u_value = AlgV5.calculate_hash(R, password, val_salt, b"") + val_salt + key_salt

        tmp_key = AlgV5.calculate_hash(R, password, key_salt, b"")
        iv = bytes(0 for _ in range(16))
        ue_value = aes_cbc_encrypt(tmp_key, iv, key)
        return u_value, ue_value

    @staticmethod
    def compute_O_value(
        R: int, password: bytes, key: bytes, u_value: bytes
    ) -> tuple[bytes, bytes]:
        """
        Algorithm 3.9 Computing the encryption dictionary’s O (owner password)
        and OE (owner encryption key) values.

        1. Generate 16 random bytes of data using a strong random number
           generator. The first 8 bytes are the Owner Validation Salt. The
           second 8 bytes are the Owner Key Salt. Compute the 32-byte SHA-256
           hash of the password concatenated with the Owner Validation Salt and
           then concatenated with the 48-byte U string as generated in
           Algorithm 3.8. The 48-byte string consisting of the 32-byte hash
           followed by the Owner Validation Salt followed by the Owner Key Salt
           is stored as the O key.
        2. Compute the 32-byte SHA-256 hash of the password concatenated with
           the Owner Key Salt and then concatenated with the 48-byte U string as
           generated in Algorithm 3.8. Using this hash as the key,
           encrypt the file encryption key using AES-256 in CBC mode with
           no padding and an initialization vector of zero.
           The resulting 32-byte string is stored as the OE key.

        Args:
            R:
            password:
            key:
            u_value: A 32-byte string, based on the user password, that shall be
                used in determining whether to prompt the user for a password
                and, if so, whether a valid user or owner password was entered.

        Returns:
            A tuple (O value, OE value)

        """
        random_bytes = secrets.token_bytes(16)
        val_salt = random_bytes[:8]
        key_salt = random_bytes[8:]
        o_value = (
            AlgV5.calculate_hash(R, password, val_salt, u_value) + val_salt + key_salt
        )
        tmp_key = AlgV5.calculate_hash(R, password, key_salt, u_value[:48])
        iv = bytes(0 for _ in range(16))
        oe_value = aes_cbc_encrypt(tmp_key, iv, key)
        return o_value, oe_value

    @staticmethod
    def compute_Perms_value(key: bytes, p: int, metadata_encrypted: bool) -> bytes:
        """
        Algorithm 3.10 Computing the encryption dictionary’s Perms
        (permissions) value.

        1. Extend the permissions (contents of the P integer) to 64 bits by
           setting the upper 32 bits to all 1’s.
           (This allows for future extension without changing the format.)
        2. Record the 8 bytes of permission in the bytes 0-7 of the block,
           low order byte first.
        3. Set byte 8 to the ASCII value ' T ' or ' F ' according to the
           EncryptMetadata Boolean.
        4. Set bytes 9-11 to the ASCII characters ' a ', ' d ', ' b '.
        5. Set bytes 12-15 to 4 bytes of random data, which will be ignored.
        6. Encrypt the 16-byte block using AES-256 in ECB mode with an
           initialization vector of zero, using the file encryption key as the
           key. The result (16 bytes) is stored as the Perms string, and checked
           for validity when the file is opened.

        Args:
            key:
            p: A set of flags specifying which operations shall be permitted
                when the document is opened with user access. If bit 2 is set to 1,
                all other bits are ignored and all operations are permitted.
                If bit 2 is set to 0, permission for operations are based on the
                values of the remaining flags defined in Table 24.
            metadata_encrypted: A boolean indicating if the metadata is encrypted.

        Returns:
            The perms value

        """
        b8 = b"T" if metadata_encrypted else b"F"
        rr = secrets.token_bytes(4)
        data = struct.pack("<I", p) + b"\xff\xff\xff\xff" + b8 + b"adb" + rr
        return aes_ecb_encrypt(key, data)


class PasswordType(IntEnum):
    NOT_DECRYPTED = 0
    USER_PASSWORD = 1
    OWNER_PASSWORD = 2


class EncryptAlgorithm(tuple, Enum):  # type: ignore # noqa: SLOT001
    # V, R, Length
    RC4_40 = (1, 2, 40)
    RC4_128 = (2, 3, 128)
    AES_128 = (4, 4, 128)
    AES_256_R5 = (5, 5, 256)
    AES_256 = (5, 6, 256)


class EncryptionValues:
    O: bytes  # noqa: E741
    U: bytes
    OE: bytes
    UE: bytes
    Perms: bytes


class Encryption:
    """
    Collects and manages parameters for PDF document encryption and decryption.

    Args:
        V: A code specifying the algorithm to be used in encrypting and
           decrypting the document.
        R: The revision of the standard security handler.
        Length: The length of the encryption key in bits.
        P: A set of flags specifying which operations shall be permitted
           when the document is opened with user access
        entry: The encryption dictionary object.
        EncryptMetadata: Whether to encrypt metadata in the document.
        first_id_entry: The first 16 bytes of the file's original ID.
        StmF: The name of the crypt filter that shall be used by default
              when decrypting streams.
        StrF: The name of the crypt filter that shall be used when decrypting
              all strings in the document.
        EFF: The name of the crypt filter that shall be used when
             encrypting embedded file streams that do not have their own
             crypt filter specifier.
        values: Additional encryption parameters.

    """

    def __init__(
        self,
        V: int,
        R: int,
        Length: int,
        P: int,
        entry: DictionaryObject,
        EncryptMetadata: bool,
        first_id_entry: bytes,
        StmF: str,
        StrF: str,
        EFF: str,
        values: Optional[EncryptionValues],
    ) -> None:
        # §7.6.2, entries common to all encryption dictionaries
        # use same name as keys of encryption dictionaries entries
        self.V = V
        self.R = R
        self.Length = Length  # key_size
        self.P = (P + 0x100000000) % 0x100000000  # maybe P < 0
        self.EncryptMetadata = EncryptMetadata
        self.id1_entry = first_id_entry
        self.StmF = StmF
        self.StrF = StrF
        self.EFF = EFF
        self.values: EncryptionValues = values or EncryptionValues()

        self._password_type = PasswordType.NOT_DECRYPTED
        self._key: Optional[bytes] = None
        self._are_permissions_valid: bool = True

    def is_decrypted(self) -> bool:
        return self._password_type != PasswordType.NOT_DECRYPTED

    def encrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
        # skip calculate key
        if not self._is_encryption_object(obj):
            return obj

        cf = self._make_crypt_filter(idnum, generation)
        return cf.encrypt_object(obj)

    def decrypt_object(self, obj: PdfObject, idnum: int, generation: int) -> PdfObject:
        # skip calculate key
        if not self._is_encryption_object(obj):
            return obj

        cf = self._make_crypt_filter(idnum, generation)
        return cf.decrypt_object(obj)

    @staticmethod
    def _is_encryption_object(obj: PdfObject) -> bool:
        return isinstance(
            obj,
            (
                ByteStringObject,
                TextStringObject,
                StreamObject,
                ArrayObject,
                DictionaryObject,
            ),
        )

    def _make_crypt_filter(self, idnum: int, generation: int) -> CryptFilter:
        """
        Algorithm 1: Encryption of data using the RC4 or AES algorithms.

        a) Obtain the object number and generation number from the object
           identifier of the string or stream to be encrypted
           (see 7.3.10, "Indirect Objects"). If the string is a direct object,
           use the identifier of the indirect object containing it.
        b) For all strings and streams without crypt filter specifier; treating
           the object number and generation number as binary integers, extend
           the original n-byte encryption key to n + 5 bytes by appending the
           low-order 3 bytes of the object number and the low-order 2 bytes of
           the generation number in that order, low-order byte first.
           (n is 5 unless the value of V in the encryption dictionary is greater
           than 1, in which case n is the value of Length divided by 8.)
           If using the AES algorithm, extend the encryption key an additional
           4 bytes by adding the value “sAlT”, which corresponds to the
           hexadecimal values 0x73, 0x41, 0x6C, 0x54. (This addition is done for
           backward compatibility and is not intended to provide additional
           security.)
        c) Initialize the MD5 hash function and pass the result of step (b) as
           input to this function.
        d) Use the first (n + 5) bytes, up to a maximum of 16, of the output
           from the MD5 hash as the key for the RC4 or AES symmetric key
           algorithms, along with the string or stream data to be encrypted.
           If using the AES algorithm, the Cipher Block Chaining (CBC) mode,
           which requires an initialization vector, is used. The block size
           parameter is set to 16 bytes, and the initialization vector is a
           16-byte random number that is stored as the first 16 bytes of the
           encrypted stream or string.

        Algorithm 3.1a Encryption of data using the AES algorithm
        1. Use the 32-byte file encryption key for the AES-256 symmetric key
           algorithm, along with the string or stream data to be encrypted.
           Use the AES algorithm in Cipher Block Chaining (CBC) mode, which
           requires an initialization vector. The block size parameter is set to
           16 bytes, and the initialization vector is a 16-byte random number
           that is stored as the first 16 bytes of the encrypted stream or string.
           The output is the encrypted data to be stored in the PDF file.
        """
        pack1 = struct.pack("<i", idnum)[:3]
        pack2 = struct.pack("<i", generation)[:2]

        assert self._key
        key = self._key
        n = 5 if self.V == 1 else self.Length // 8
        key_data = key[:n] + pack1 + pack2
        key_hash = hashlib.md5(key_data)
        rc4_key = key_hash.digest()[: min(n + 5, 16)]

        # for AES-128
        key_hash.update(b"sAlT")
        aes128_key = key_hash.digest()[: min(n + 5, 16)]

        # for AES-256
        aes256_key = key

        stm_crypt = self._get_crypt(self.StmF, rc4_key, aes128_key, aes256_key)
        str_crypt = self._get_crypt(self.StrF, rc4_key, aes128_key, aes256_key)
        ef_crypt = self._get_crypt(self.EFF, rc4_key, aes128_key, aes256_key)

        return CryptFilter(stm_crypt, str_crypt, ef_crypt)

    @staticmethod
    def _get_crypt(
        method: str, rc4_key: bytes, aes128_key: bytes, aes256_key: bytes
    ) -> CryptBase:
        if method == "/AESV2":
            return CryptAES(aes128_key)
        if method == "/AESV3":
            return CryptAES(aes256_key)
        if method == "/Identity":
            return CryptIdentity()

        return CryptRC4(rc4_key)

    @staticmethod
    def _encode_password(password: Union[bytes, str]) -> bytes:
        if isinstance(password, str):
            try:
                pwd = password.encode("latin-1")
            except Exception:
                pwd = password.encode("utf-8")
        else:
            pwd = password
        return pwd

    def verify(self, password: Union[bytes, str]) -> PasswordType:
        pwd = self._encode_password(password)
        key, rc = self.verify_v4(pwd) if self.V <= 4 else self.verify_v5(pwd)
        if rc != PasswordType.NOT_DECRYPTED:
            self._password_type = rc
            self._key = key
        return rc

    def verify_v4(self, password: bytes) -> tuple[bytes, PasswordType]:
        # verify owner password first
        key = AlgV4.verify_owner_password(
            password,
            self.R,
            self.Length,
            self.values.O,
            self.values.U,
            self.P,
            self.id1_entry,
            self.EncryptMetadata,
        )
        if key:
            return key, PasswordType.OWNER_PASSWORD
        key = AlgV4.verify_user_password(
            password,
            self.R,
            self.Length,
            self.values.O,
            self.values.U,
            self.P,
            self.id1_entry,
            self.EncryptMetadata,
        )
        if key:
            return key, PasswordType.USER_PASSWORD
        return b"", PasswordType.NOT_DECRYPTED

    def verify_v5(self, password: bytes) -> tuple[bytes, PasswordType]:
        # TODO: use SASLprep process
        # verify owner password first
        key = AlgV5.verify_owner_password(
            self.R, password, self.values.O, self.values.OE, self.values.U
        )
        rc = PasswordType.OWNER_PASSWORD
        if not key:
            key = AlgV5.verify_user_password(
                self.R, password, self.values.U, self.values.UE
            )
            rc = PasswordType.USER_PASSWORD
        if not key:
            return b"", PasswordType.NOT_DECRYPTED

        # verify Perms
        self._are_permissions_valid = AlgV5.verify_perms(key, self.values.Perms, self.P, self.EncryptMetadata)
        if not self._are_permissions_valid:
            logger_warning("ignore '/Perms' verify failed", __name__)
        return key, rc

    def write_entry(
        self, user_password: str, owner_password: Optional[str]
    ) -> DictionaryObject:
        user_pwd = self._encode_password(user_password)
        owner_pwd = self._encode_password(owner_password) if owner_password else None
        if owner_pwd is None:
            owner_pwd = user_pwd

        if self.V <= 4:
            self.compute_values_v4(user_pwd, owner_pwd)
        else:
            self._key = secrets.token_bytes(self.Length // 8)
            values = AlgV5.generate_values(
                self.R, user_pwd, owner_pwd, self._key, self.P, self.EncryptMetadata
            )
            self.values.O = values["/O"]
            self.values.U = values["/U"]
            self.values.OE = values["/OE"]
            self.values.UE = values["/UE"]
            self.values.Perms = values["/Perms"]

        dict_obj = DictionaryObject()
        dict_obj[NameObject("/V")] = NumberObject(self.V)
        dict_obj[NameObject("/R")] = NumberObject(self.R)
        dict_obj[NameObject("/Length")] = NumberObject(self.Length)
        dict_obj[NameObject("/P")] = NumberObject(self.P)
        dict_obj[NameObject("/Filter")] = NameObject("/Standard")
        # ignore /EncryptMetadata

        dict_obj[NameObject("/O")] = ByteStringObject(self.values.O)
        dict_obj[NameObject("/U")] = ByteStringObject(self.values.U)

        if self.V >= 4:
            # TODO: allow different method
            std_cf = DictionaryObject()
            std_cf[NameObject("/AuthEvent")] = NameObject("/DocOpen")
            std_cf[NameObject("/CFM")] = NameObject(self.StmF)
            std_cf[NameObject("/Length")] = NumberObject(self.Length // 8)
            cf = DictionaryObject()
            cf[NameObject("/StdCF")] = std_cf
            dict_obj[NameObject("/CF")] = cf
            dict_obj[NameObject("/StmF")] = NameObject("/StdCF")
            dict_obj[NameObject("/StrF")] = NameObject("/StdCF")
            # ignore EFF
            # dict_obj[NameObject("/EFF")] = NameObject("/StdCF")

        if self.V >= 5:
            dict_obj[NameObject("/OE")] = ByteStringObject(self.values.OE)
            dict_obj[NameObject("/UE")] = ByteStringObject(self.values.UE)
            dict_obj[NameObject("/Perms")] = ByteStringObject(self.values.Perms)
        return dict_obj

    def compute_values_v4(self, user_password: bytes, owner_password: bytes) -> None:
        rc4_key = AlgV4.compute_O_value_key(owner_password, self.R, self.Length)
        o_value = AlgV4.compute_O_value(rc4_key, user_password, self.R)

        key = AlgV4.compute_key(
            user_password,
            self.R,
            self.Length,
            o_value,
            self.P,
            self.id1_entry,
            self.EncryptMetadata,
        )
        u_value = AlgV4.compute_U_value(key, self.R, self.id1_entry)

        self._key = key
        self.values.O = o_value
        self.values.U = u_value

    @staticmethod
    def read(encryption_entry: DictionaryObject, first_id_entry: bytes) -> "Encryption":
        if encryption_entry.get("/Filter") != "/Standard":
            raise NotImplementedError(
                "only Standard PDF encryption handler is available"
            )
        if "/SubFilter" in encryption_entry:
            raise NotImplementedError("/SubFilter NOT supported")

        stm_filter = "/V2"
        str_filter = "/V2"
        ef_filter = "/V2"

        alg_ver = encryption_entry.get("/V", 0)
        if alg_ver not in (1, 2, 3, 4, 5):
            raise NotImplementedError(f"Encryption V={alg_ver} NOT supported")
        if alg_ver >= 4:
            filters = encryption_entry["/CF"]

            stm_filter = encryption_entry.get("/StmF", "/Identity")
            str_filter = encryption_entry.get("/StrF", "/Identity")
            ef_filter = encryption_entry.get("/EFF", stm_filter)

            if stm_filter != "/Identity":
                stm_filter = filters[stm_filter]["/CFM"]  # type: ignore
            if str_filter != "/Identity":
                str_filter = filters[str_filter]["/CFM"]  # type: ignore
            if ef_filter != "/Identity":
                ef_filter = filters[ef_filter]["/CFM"]  # type: ignore

            allowed_methods = ("/Identity", "/V2", "/AESV2", "/AESV3")
            if stm_filter not in allowed_methods:
                raise NotImplementedError(f"StmF Method {stm_filter} NOT supported!")
            if str_filter not in allowed_methods:
                raise NotImplementedError(f"StrF Method {str_filter} NOT supported!")
            if ef_filter not in allowed_methods:
                raise NotImplementedError(f"EFF Method {ef_filter} NOT supported!")

        alg_rev = cast(int, encryption_entry["/R"])
        perm_flags = cast(int, encryption_entry["/P"])
        key_bits = encryption_entry.get("/Length", 40)
        if alg_ver == 4 and stm_filter == "/AESV2":
            cf_dict = cast(DictionaryObject, filters[encryption_entry["/StmF"]])  # type: ignore[index]
            # CF /Length is in bytes (default 16 for AES-128), convert to bits
            key_bits = cast(int, cf_dict.get("/Length", 16)) * 8
        encrypt_metadata = encryption_entry.get("/EncryptMetadata")
        encrypt_metadata = (
            encrypt_metadata.value if encrypt_metadata is not None else True
        )
        values = EncryptionValues()
        values.O = cast(ByteStringObject, encryption_entry["/O"]).original_bytes
        values.U = cast(ByteStringObject, encryption_entry["/U"]).original_bytes
        values.OE = encryption_entry.get("/OE", ByteStringObject()).original_bytes
        values.UE = encryption_entry.get("/UE", ByteStringObject()).original_bytes
        values.Perms = encryption_entry.get("/Perms", ByteStringObject()).original_bytes
        return Encryption(
            V=alg_ver,
            R=alg_rev,
            Length=key_bits,
            P=perm_flags,
            EncryptMetadata=encrypt_metadata,
            first_id_entry=first_id_entry,
            values=values,
            StrF=str_filter,
            StmF=stm_filter,
            EFF=ef_filter,
            entry=encryption_entry,  # Dummy entry for the moment; will get removed
        )

    @staticmethod
    def make(
        alg: EncryptAlgorithm, permissions: int, first_id_entry: bytes
    ) -> "Encryption":
        alg_ver, alg_rev, key_bits = alg

        stm_filter, str_filter, ef_filter = "/V2", "/V2", "/V2"

        if alg == EncryptAlgorithm.AES_128:
            stm_filter, str_filter, ef_filter = "/AESV2", "/AESV2", "/AESV2"
        elif alg in (EncryptAlgorithm.AES_256_R5, EncryptAlgorithm.AES_256):
            stm_filter, str_filter, ef_filter = "/AESV3", "/AESV3", "/AESV3"

        return Encryption(
            V=alg_ver,
            R=alg_rev,
            Length=key_bits,
            P=permissions,
            EncryptMetadata=True,
            first_id_entry=first_id_entry,
            values=None,
            StrF=str_filter,
            StmF=stm_filter,
            EFF=ef_filter,
            entry=DictionaryObject(),  # Dummy entry for the moment; will get removed
        )


================================================
FILE: pypdf/_font.py
================================================
from collections.abc import Sequence
from dataclasses import dataclass, field
from typing import Any, Union, cast

from pypdf.generic import ArrayObject, DictionaryObject, NameObject

from ._cmap import get_encoding
from ._codecs.adobe_glyphs import adobe_glyphs
from ._utils import logger_warning
from .constants import FontFlags


@dataclass(frozen=True)
class FontDescriptor:
    """
    Represents the FontDescriptor dictionary as defined in the PDF specification.
    This contains both descriptive and metric information.

    The defaults are derived from the mean values of the 14 core fonts, rounded
    to 100.
    """

    name: str = "Unknown"
    family: str = "Unknown"
    weight: str = "Unknown"

    ascent: float = 700.0
    descent: float = -200.0
    cap_height: float = 600.0
    x_height: float = 500.0
    italic_angle: float = 0.0  # Non-italic
    flags: int = 32  # Non-serif, non-symbolic, not fixed width
    bbox: tuple[float, float, float, float] = field(default_factory=lambda: (-100.0, -200.0, 1000.0, 900.0))


@dataclass(frozen=True)
class CoreFontMetrics:
    font_descriptor: FontDescriptor
    character_widths: dict[str, int]


@dataclass
class Font:
    """
    A font object for use during text extraction and for producing
    text appearance streams.

    Attributes:
        name: Font name, derived from font["/BaseFont"]
        character_map: The font's character map
        encoding: Font encoding
        sub_type: The font type, such as Type1, TrueType, or Type3.
        font_descriptor: Font metrics, including a mapping of characters to widths
        character_widths: A mapping of characters to widths
        space_width: The width of a space, or an approximation
        interpretable: Default True. If False, the font glyphs cannot
            be translated to characters, e.g. Type3 fonts that do not define
            a '/ToUnicode' mapping.

    """

    name: str
    encoding: Union[str, dict[int, str]]
    character_map: dict[Any, Any] = field(default_factory=dict)
    sub_type: str = "Unknown"
    font_descriptor: FontDescriptor = field(default_factory=FontDescriptor)
    character_widths: dict[str, int] = field(default_factory=lambda: {"default": 500})
    space_width: Union[float, int] = 250
    interpretable: bool = True

    @staticmethod
    def _collect_tt_t1_character_widths(
        pdf_font_dict: DictionaryObject,
        char_map: dict[Any, Any],
        encoding: Union[str, dict[int, str]],
        current_widths: dict[str, int]
    ) -> None:
        """Parses a TrueType or Type1 font's /Widths array from a font dictionary and updates character widths"""
        widths_array = cast(ArrayObject, pdf_font_dict["/Widths"])
        first_char = pdf_font_dict.get("/FirstChar", 0)
        if not isinstance(encoding, str):
            # This means that encoding is a dict
            current_widths.update({
                encoding.get(idx + first_char, chr(idx + first_char)): width
                for idx, width in enumerate(widths_array)
            })
            return

        # We map the character code directly to the character
        # using the string encoding
        for idx, width in enumerate(widths_array):
            # Often "idx == 0" will denote the .notdef character, but we add it anyway
            char_code = idx + first_char  # This is a raw code
            # Get the "raw" character or byte representation
            raw_char = bytes([char_code]).decode(encoding, "surrogatepass")
            # Translate raw_char to the REAL Unicode character using the char_map
            unicode_char = char_map.get(raw_char)
            if unicode_char:
                current_widths[unicode_char] = int(width)
            else:
                current_widths[raw_char] = int(width)

    @staticmethod
    def _collect_cid_character_widths(
        d_font: DictionaryObject, char_map: dict[Any, Any], current_widths: dict[str, int]
    ) -> None:
        """Parses the /W array from a DescendantFont dictionary and updates character widths."""
        ord_map = {
            ord(_target): _surrogate
            for _target, _surrogate in char_map.items()
            if isinstance(_target, str)
        }
        # /W width definitions have two valid formats which can be mixed and matched:
        #   (1) A character start index followed by a list of widths, e.g.
        #       `45 [500 600 700]` applies widths 500, 600, 700 to characters 45-47.
        #   (2) A character start index, a character stop index, and a width, e.g.
        #       `45 65 500` applies width 500 to characters 45-65.
        skip_count = 0
        _w = d_font.get("/W", [])
        for idx, w_entry in enumerate(_w):
            w_entry = w_entry.get_object()
            if skip_count:
                skip_count -= 1
                continue
            if not isinstance(w_entry, (int, float)):
                # We should never get here due to skip_count above. But
                # sometimes we do.
                logger_warning(f"Expected numeric value for width, got {w_entry}. Ignoring it.", __name__)
                continue
            # check for format (1): `int [int int int int ...]`
            w_next_entry = _w[idx + 1].get_object()
            if isinstance(w_next_entry, Sequence):
                start_idx, width_list = w_entry, w_next_entry
                current_widths.update(
                    {
                        ord_map[_cidx]: _width
                        for _cidx, _width in zip(
                            range(
                                cast(int, start_idx),
                                cast(int, start_idx) + len(width_list),
                                1,
                            ),
                            width_list,
                        )
                        if _cidx in ord_map
                    }
                )
                skip_count = 1
            # check for format (2): `int int int`
            elif isinstance(w_next_entry, (int, float)) and isinstance(
                _w[idx + 2].get_object(), (int, float)
            ):
                start_idx, stop_idx, const_width = (
                    w_entry,
                    w_next_entry,
                    _w[idx + 2].get_object(),
                )
                current_widths.update(
                    {
                        ord_map[_cidx]: const_width
                        for _cidx in range(
                            cast(int, start_idx), cast(int, stop_idx + 1), 1
                        )
                        if _cidx in ord_map
                    }
                )
                skip_count = 2
            else:
                # This handles the case of out of bounds (reaching the end of the width definitions
                # while expecting more elements).
                logger_warning(
                    f"Invalid font width definition. Last element: {w_entry}.",
                    __name__
                )

    @staticmethod
    def _add_default_width(current_widths: dict[str, int], flags: int) -> None:
        if not current_widths:
            current_widths["default"] = 500
            return

        if " " in current_widths and current_widths[" "] != 0:
            # Setting default to once or twice the space width, depending on fixed pitch
            if (flags & FontFlags.FIXED_PITCH) == FontFlags.FIXED_PITCH:
                current_widths["default"] = current_widths[" "]
                return

            current_widths["default"] = int(2 * current_widths[" "])
            return

        # Use the average width of existing glyph widths
        valid_widths = [w for w in current_widths.values() if w > 0]
        current_widths["default"] = sum(valid_widths) // len(valid_widths) if valid_widths else 500

    @staticmethod
    def _parse_font_descriptor(font_descriptor_obj: DictionaryObject) -> dict[str, Any]:
        font_descriptor_kwargs: dict[Any, Any] = {}
        for source_key, target_key in [
            ("/FontName", "name"),
            ("/FontFamily", "family"),
            ("/FontWeight", "weight"),
            ("/Ascent", "ascent"),
            ("/Descent", "descent"),
            ("/CapHeight", "cap_height"),
            ("/XHeight", "x_height"),
            ("/ItalicAngle", "italic_angle"),
            ("/Flags", "flags"),
            ("/FontBBox", "bbox")
        ]:
            if source_key in font_descriptor_obj:
                font_descriptor_kwargs[target_key] = font_descriptor_obj[source_key]
        # Handle missing bbox gracefully - PDFs may have fonts without valid bounding boxes
        if "bbox" in font_descriptor_kwargs:
            bbox_tuple = tuple(map(float, font_descriptor_kwargs["bbox"]))
            assert len(bbox_tuple) == 4, bbox_tuple
            font_descriptor_kwargs["bbox"] = bbox_tuple
        return font_descriptor_kwargs

    @classmethod
    def from_font_resource(
        cls,
        pdf_font_dict: DictionaryObject,
    ) -> "Font":
        from pypdf._codecs.core_font_metrics import CORE_FONT_METRICS  # noqa: PLC0415

        # Can collect base_font, name and encoding directly from font resource
        name = pdf_font_dict.get("/BaseFont", "Unknown").removeprefix("/")
        sub_type = pdf_font_dict.get("/Subtype", "Unknown").removeprefix("/")
        encoding, character_map = get_encoding(pdf_font_dict)
        font_descriptor = None
        character_widths: dict[str, int] = {}
        interpretable = True

        # Deal with fonts by type; Type1, TrueType and certain Type3
        if pdf_font_dict.get("/Subtype") in ("/Type1", "/MMType1", "/TrueType", "/Type3"):
            # Type3 fonts that do not specify a "/ToUnicode" mapping cannot be
            # reliably converted into character codes unless all named chars
            # in /CharProcs map to a standard adobe glyph. See §9.10.2 of the
            # PDF 1.7 standard.
            if sub_type == "Type3" and "/ToUnicode" not in pdf_font_dict:
                interpretable = all(
                    cname in adobe_glyphs
                    for cname in pdf_font_dict.get("/CharProcs") or []
                )
            if interpretable:  # Save some overhead if font is not interpretable
                if "/Widths" in pdf_font_dict:
                    cls._collect_tt_t1_character_widths(
                        pdf_font_dict, character_map, encoding, character_widths
                    )
                elif name in CORE_FONT_METRICS:
                    font_descriptor = CORE_FONT_METRICS[name].font_descriptor
                    character_widths = CORE_FONT_METRICS[name].character_widths
                if "/FontDescriptor" in pdf_font_dict:
                    font_descriptor_obj = pdf_font_dict.get("/FontDescriptor", DictionaryObject()).get_object()
                    if "/MissingWidth" in font_descriptor_obj:
                        character_widths["default"] = cast(int, font_descriptor_obj["/MissingWidth"].get_object())
                    font_descriptor = FontDescriptor(**cls._parse_font_descriptor(font_descriptor_obj))
                elif "/FontBBox" in pdf_font_dict:
                    # For Type3 without Font Descriptor but with FontBBox, see Table 110 in the PDF specification 2.0
                    bbox_tuple = tuple(map(float, cast(ArrayObject, pdf_font_dict["/FontBBox"])))
                    assert len(bbox_tuple) == 4, bbox_tuple
                    font_descriptor = FontDescriptor(name=name, bbox=bbox_tuple)

        else:
            # Composite font or CID font - CID fonts have a /W array mapping character codes
            # to widths stashed in /DescendantFonts. No need to test for /DescendantFonts though,
            # because all other fonts have already been dealt with.
            d_font: DictionaryObject
            for d_font_idx, d_font in enumerate(
                cast(ArrayObject, pdf_font_dict["/DescendantFonts"])
            ):
                d_font = cast(DictionaryObject, d_font.get_object())
                cast(ArrayObject, pdf_font_dict["/DescendantFonts"])[d_font_idx] = d_font
                cls._collect_cid_character_widths(
                    d_font, character_map, character_widths
                )
                if "/DW" in d_font:
                    character_widths["default"] = cast(int, d_font["/DW"].get_object())
                font_descriptor_obj = d_font.get("/FontDescriptor", DictionaryObject()).get_object()
                font_descriptor = FontDescriptor(**cls._parse_font_descriptor(font_descriptor_obj))

        if not font_descriptor:
            font_descriptor = FontDescriptor(name=name)

        if character_widths.get("default", 0) == 0:
            cls._add_default_width(character_widths, font_descriptor.flags)
        space_width = character_widths.get(" ", 0)
        if space_width == 0:
            if (font_descriptor.flags & FontFlags.FIXED_PITCH) == FontFlags.FIXED_PITCH:
                space_width = character_widths["default"]
            else:
                space_width = character_widths["default"] // 2

        return cls(
            name=name,
            sub_type=sub_type,
            encoding=encoding,
            font_descriptor=font_descriptor,
            character_map=character_map,
            character_widths=character_widths,
            space_width=space_width,
            interpretable=interpretable
        )

    def as_font_resource(self) -> DictionaryObject:
        # For now, this returns a font resource that only works with the 14 Adobe Core fonts.
        return (
            DictionaryObject({
                NameObject("/Subtype"): NameObject("/Type1"),
                NameObject("/Name"): NameObject(f"/{self.name}"),
                NameObject("/Type"): NameObject("/Font"),
                NameObject("/BaseFont"): NameObject(f"/{self.name}"),
                NameObject("/Encoding"): NameObject("/WinAnsiEncoding")
            })
        )

    def text_width(self, text: str = "") -> float:
        """Sum of character widths specified in PDF font for the supplied text."""
        return sum(
            [self.character_widths.get(char, self.character_widths["default"]) for char in text], 0.0
        )


================================================
FILE: pypdf/_page.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import math
from collections.abc import Iterable, Iterator, Sequence
from copy import deepcopy
from dataclasses import asdict, dataclass
from decimal import Decimal
from io import BytesIO
from pathlib import Path
from typing import (
    Any,
    Callable,
    Literal,
    Optional,
    Union,
    cast,
    overload,
)

from ._font import Font
from ._protocols import PdfCommonDocProtocol
from ._text_extraction import (
    _layout_mode,
)
from ._text_extraction._text_extractor import TextExtraction
from ._utils import (
    CompressedTransformationMatrix,
    TransformationMatrixType,
    _human_readable_bytes,
    deprecate,
    logger_warning,
    matrix_multiply,
)
from .constants import _INLINE_IMAGE_KEY_MAPPING, _INLINE_IMAGE_VALUE_MAPPING
from .constants import AnnotationDictionaryAttributes as ADA
from .constants import ImageAttributes as IA
from .constants import PageAttributes as PG
from .constants import Resources as RES
from .errors import PageSizeNotDefinedError, PdfReadError
from .generic import (
    ArrayObject,
    ContentStream,
    DictionaryObject,
    EncodedStreamObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    RectangleObject,
    StreamObject,
    is_null_or_none,
)

try:
    from PIL.Image import Image

    pil_not_imported = False
except ImportError:
    Image = object  # type: ignore[assignment,misc,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
    pil_not_imported = True  # error will be raised only when using images

MERGE_CROP_BOX = "cropbox"  # pypdf <= 3.4.0 used "trimbox"


def _get_rectangle(self: Any, name: str, defaults: Iterable[str]) -> RectangleObject:
    retval: Union[None, RectangleObject, ArrayObject, IndirectObject] = self.get(name)
    if isinstance(retval, RectangleObject):
        return retval
    if is_null_or_none(retval):
        for d in defaults:
            retval = self.get(d)
            if retval is not None:
                break
    if isinstance(retval, IndirectObject):
        retval = self.pdf.get_object(retval)
    if isinstance(retval, ArrayObject) and (length := len(retval)) > 4:
        logger_warning(f"Expected four values, got {length}: {retval}", __name__)
        retval = RectangleObject(tuple(retval[:4]))
    else:
        retval = RectangleObject(retval)  # type: ignore
    _set_rectangle(self, name, retval)
    return retval


def _set_rectangle(self: Any, name: str, value: Union[RectangleObject, float]) -> None:
    self[NameObject(name)] = value


def _delete_rectangle(self: Any, name: str) -> None:
    del self[name]


def _create_rectangle_accessor(name: str, fallback: Iterable[str]) -> property:
    return property(
        lambda self: _get_rectangle(self, name, fallback),
        lambda self, value: _set_rectangle(self, name, value),
        lambda self: _delete_rectangle(self, name),
    )


class Transformation:
    """
    Represent a 2D transformation.

    The transformation between two coordinate systems is represented by a 3-by-3
    transformation matrix with the following form::

        a b 0
        c d 0
        e f 1

    Because a transformation matrix has only six elements that can be changed,
    it is usually specified in PDF as the six-element array [ a b c d e f ].

    Coordinate transformations are expressed as matrix multiplications::

                                 a b 0
     [ x′ y′ 1 ] = [ x y 1 ] ×   c d 0
                                 e f 1


    Example:
        >>> from pypdf import PdfWriter, Transformation
        >>> page = PdfWriter().add_blank_page(800, 600)
        >>> op = Transformation().scale(sx=2, sy=3).translate(tx=10, ty=20)
        >>> page.add_transformation(op)

    """

    def __init__(self, ctm: CompressedTransformationMatrix = (1, 0, 0, 1, 0, 0)) -> None:
        self.ctm = ctm

    @property
    def matrix(self) -> TransformationMatrixType:
        """
        Return the transformation matrix as a tuple of tuples in the form:

        ((a, b, 0), (c, d, 0), (e, f, 1))
        """
        return (
            (self.ctm[0], self.ctm[1], 0),
            (self.ctm[2], self.ctm[3], 0),
            (self.ctm[4], self.ctm[5], 1),
        )

    @staticmethod
    def compress(matrix: TransformationMatrixType) -> CompressedTransformationMatrix:
        """
        Compresses the transformation matrix into a tuple of (a, b, c, d, e, f).

        Args:
            matrix: The transformation matrix as a tuple of tuples.

        Returns:
            A tuple representing the transformation matrix as (a, b, c, d, e, f)

        """
        return (
            matrix[0][0],
            matrix[0][1],
            matrix[1][0],
            matrix[1][1],
            matrix[2][0],
            matrix[2][1],
        )

    def _to_cm(self) -> str:
        # Returns the cm operation string for the given transformation matrix
        return (
            f"{self.ctm[0]:.4f} {self.ctm[1]:.4f} {self.ctm[2]:.4f} "
            f"{self.ctm[3]:.4f} {self.ctm[4]:.4f} {self.ctm[5]:.4f} cm"
        )

    def transform(self, m: "Transformation") -> "Transformation":
        """
        Apply one transformation to another.

        Args:
            m: a Transformation to apply.

        Returns:
            A new ``Transformation`` instance

        Example:
            >>> from pypdf import PdfWriter, Transformation
            >>> height, width = 40, 50
            >>> page = PdfWriter().add_blank_page(800, 600)
            >>> op = Transformation((1, 0, 0, -1, 0, height)) # vertical mirror
            >>> op = Transformation().transform(Transformation((-1, 0, 0, 1, width, 0)))  # horizontal mirror
            >>> page.add_transformation(op)

        """
        ctm = Transformation.compress(matrix_multiply(self.matrix, m.matrix))
        return Transformation(ctm)

    def translate(self, tx: float = 0, ty: float = 0) -> "Transformation":
        """
        Translate the contents of a page.

        Args:
            tx: The translation along the x-axis.
            ty: The translation along the y-axis.

        Returns:
            A new ``Transformation`` instance

        """
        m = self.ctm
        return Transformation(ctm=(m[0], m[1], m[2], m[3], m[4] + tx, m[5] + ty))

    def scale(
        self, sx: Optional[float] = None, sy: Optional[float] = None
    ) -> "Transformation":
        """
        Scale the contents of a page towards the origin of the coordinate system.

        Typically, that is the lower-left corner of the page. That can be
        changed by translating the contents / the page boxes.

        Args:
            sx: The scale factor along the x-axis.
            sy: The scale factor along the y-axis.

        Returns:
            A new Transformation instance with the scaled matrix.

        """
        if sx is None and sy is None:
            raise ValueError("Either sx or sy must be specified")
        if sx is None:
            sx = sy
        if sy is None:
            sy = sx
        assert sx is not None
        assert sy is not None
        op: TransformationMatrixType = ((sx, 0, 0), (0, sy, 0), (0, 0, 1))
        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
        return Transformation(ctm)

    def rotate(self, rotation: float) -> "Transformation":
        """
        Rotate the contents of a page.

        Args:
            rotation: The angle of rotation in degrees.

        Returns:
            A new ``Transformation`` instance with the rotated matrix.

        """
        rotation = math.radians(rotation)
        op: TransformationMatrixType = (
            (math.cos(rotation), math.sin(rotation), 0),
            (-math.sin(rotation), math.cos(rotation), 0),
            (0, 0, 1),
        )
        ctm = Transformation.compress(matrix_multiply(self.matrix, op))
        return Transformation(ctm)

    def __repr__(self) -> str:
        return f"Transformation(ctm={self.ctm})"

    @overload
    def apply_on(self, pt: list[float], as_object: bool = False) -> list[float]:
        ...

    @overload
    def apply_on(
        self, pt: tuple[float, float], as_object: bool = False
    ) -> tuple[float, float]:
        ...

    def apply_on(
        self,
        pt: Union[tuple[float, float], list[float]],
        as_object: bool = False,
    ) -> Union[tuple[float, float], list[float]]:
        """
        Apply the transformation matrix on the given point.

        Args:
            pt: A tuple or list representing the point in the form (x, y).
            as_object: If True, return items as FloatObject, otherwise as plain floats.

        Returns:
            A tuple or list representing the transformed point in the form (x', y')

        """
        typ = FloatObject if as_object else float
        pt1 = (
            typ(float(pt[0]) * self.ctm[0] + float(pt[1]) * self.ctm[2] + self.ctm[4]),
            typ(float(pt[0]) * self.ctm[1] + float(pt[1]) * self.ctm[3] + self.ctm[5]),
        )
        return list(pt1) if isinstance(pt, list) else pt1


@dataclass
class ImageFile:
    """
    Image within the PDF file. *This object is not designed to be built.*

    This object should not be modified except using :func:`ImageFile.replace` to replace the image with a new one.
    """

    name: str = ""
    """
    Filename as identified within the PDF file.
    """

    data: bytes = b""
    """
    Data as bytes.
    """

    image: Optional[Image] = None
    """
    Data as PIL image.
    """

    indirect_reference: Optional[IndirectObject] = None
    """
    Reference to the object storing the stream.
    """

    def replace(self, new_image: Image, **kwargs: Any) -> None:
        """
        Replace the image with a new PIL image.

        Args:
            new_image (PIL.Image.Image): The new PIL image to replace the existing image.
            **kwargs: Additional keyword arguments to pass to `Image.save()`.

        Raises:
            TypeError: If the image is inline or in a PdfReader.
            TypeError: If the image does not belong to a PdfWriter.
            TypeError: If `new_image` is not a PIL Image.

        Note:
            This method replaces the existing image with a new image.
            It is not allowed for inline images or images within a PdfReader.
            The `kwargs` parameter allows passing additional parameters
            to `Image.save()`, such as quality.

        """
        if pil_not_imported:
            raise ImportError(
                "pillow is required to do image extraction. "
                "It can be installed via 'pip install pypdf[image]'"
            )

        from ._reader import PdfReader  # noqa: PLC0415
        from .generic import DictionaryObject, PdfObject  # noqa: PLC0415
        from .generic._image_xobject import _xobj_to_image  # noqa: PLC0415

        if self.indirect_reference is None:
            raise TypeError("Cannot update an inline image.")
        if not hasattr(self.indirect_reference.pdf, "_id_translated"):
            raise TypeError("Cannot update an image not belonging to a PdfWriter.")
        if not isinstance(new_image, Image):
            raise TypeError("new_image shall be a PIL Image")
        b = BytesIO()
        new_image.save(b, "PDF", **kwargs)
        reader = PdfReader(b)
        page_image = reader.pages[0].images[0]
        assert page_image.indirect_reference is not None
        self.indirect_reference.pdf._objects[self.indirect_reference.idnum - 1] = (
            page_image.indirect_reference.get_object()
        )
        cast(
            PdfObject, self.indirect_reference.get_object()
        ).indirect_reference = self.indirect_reference
        # change the object attributes
        extension, byte_stream, img = _xobj_to_image(
            cast(DictionaryObject, self.indirect_reference.get_object()),
            pillow_parameters=kwargs,
        )
        assert extension is not None
        self.name = self.name[: self.name.rfind(".")] + extension
        self.data = byte_stream
        self.image = img

    def __str__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"

    def __repr__(self) -> str:
        return self.__str__()[:-1] + f", hash: {hash(self.data)})"


class VirtualListImages(Sequence[ImageFile]):
    """
    Provides access to images referenced within a page.
    Only one copy will be returned if the usage is used on the same page multiple times.
    See :func:`PageObject.images` for more details.
    """

    def __init__(
        self,
        ids_function: Callable[[], list[Union[str, list[str]]]],
        get_function: Callable[[Union[str, list[str], tuple[str]]], ImageFile],
    ) -> None:
        self.ids_function = ids_function
        self.get_function = get_function
        self.current = -1

    def __len__(self) -> int:
        return len(self.ids_function())

    def keys(self) -> list[Union[str, list[str]]]:
        return self.ids_function()

    def items(self) -> list[tuple[Union[str, list[str]], ImageFile]]:
        return [(x, self[x]) for x in self.ids_function()]

    @overload
    def __getitem__(self, index: Union[int, str, list[str]]) -> ImageFile:
        ...

    @overload
    def __getitem__(self, index: slice) -> Sequence[ImageFile]:
        ...

    def __getitem__(
        self, index: Union[int, slice, str, list[str], tuple[str]]
    ) -> Union[ImageFile, Sequence[ImageFile]]:
        lst = self.ids_function()
        if isinstance(index, slice):
            indices = range(*index.indices(len(self)))
            lst = [lst[x] for x in indices]
            cls = type(self)
            return cls((lambda: lst), self.get_function)
        if isinstance(index, (str, list, tuple)):
            return self.get_function(index)
        if not isinstance(index, int):
            raise TypeError("Invalid sequence indices type")
        len_self = len(lst)
        if index < 0:
            # support negative indexes
            index += len_self
        if not (0 <= index < len_self):
            raise IndexError("Sequence index out of range")
        return self.get_function(lst[index])

    def __iter__(self) -> Iterator[ImageFile]:
        for i in range(len(self)):
            yield self[i]

    def __str__(self) -> str:
        p = [f"Image_{i}={n}" for i, n in enumerate(self.ids_function())]
        return f"[{', '.join(p)}]"


class PageObject(DictionaryObject):
    """
    PageObject represents a single page within a PDF file.

    Typically these objects will be created by accessing the
    :attr:`pages<pypdf.PdfReader.pages>` property of the
    :class:`PdfReader<pypdf.PdfReader>` class, but it is
    also possible to create an empty page with the
    :meth:`create_blank_page()<pypdf._page.PageObject.create_blank_page>` static method.

    Args:
        pdf: PDF file the page belongs to.
        indirect_reference: Stores the original indirect reference to
            this object in its source PDF

    """

    original_page: "PageObject"  # very local use in writer when appending

    def __init__(
        self,
        pdf: Optional[PdfCommonDocProtocol] = None,
        indirect_reference: Optional[IndirectObject] = None,
    ) -> None:
        DictionaryObject.__init__(self)
        self.pdf = pdf
        self.inline_images: Optional[dict[str, ImageFile]] = None
        self.indirect_reference = indirect_reference
        if not is_null_or_none(indirect_reference):
            assert indirect_reference is not None, "mypy"
            self.update(cast(DictionaryObject, indirect_reference.get_object()))
        self._font_width_maps: dict[str, tuple[dict[str, float], str, float]] = {}

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Note: this function is overloaded to return the same results
        as a DictionaryObject.

        Returns:
            Hash considering type and value.

        """
        return hash(
            (DictionaryObject, tuple(((k, v.hash_bin()) for k, v in self.items())))
        )

    def hash_value_data(self) -> bytes:
        data = super().hash_value_data()
        data += f"{id(self)}".encode()
        return data

    @property
    def user_unit(self) -> float:
        """
        A read-only positive number giving the size of user space units.

        It is in multiples of 1/72 inch. Hence a value of 1 means a user
        space unit is 1/72 inch, and a value of 3 means that a user
        space unit is 3/72 inch.
        """
        return self.get(PG.USER_UNIT, 1)

    @staticmethod
    def create_blank_page(
        pdf: Optional[PdfCommonDocProtocol] = None,
        width: Union[float, Decimal, None] = None,
        height: Union[float, Decimal, None] = None,
    ) -> "PageObject":
        """
        Return a new blank page.

        If ``width`` or ``height`` is ``None``, try to get the page size
        from the last page of *pdf*.

        Args:
            pdf: PDF file the page is within.
            width: The width of the new page expressed in default user
                space units.
            height: The height of the new page expressed in default user
                space units.

        Returns:
            The new blank page

        Raises:
            PageSizeNotDefinedError: if ``pdf`` is ``None`` or contains
                no page

        """
        page = PageObject(pdf)

        # Creates a new page (cf PDF Reference §7.7.3.3)
        page.__setitem__(NameObject(PG.TYPE), NameObject("/Page"))
        page.__setitem__(NameObject(PG.PARENT), NullObject())
        page.__setitem__(NameObject(PG.RESOURCES), DictionaryObject())
        if width is None or height is None:
            if pdf is not None and len(pdf.pages) > 0:
                lastpage = pdf.pages[len(pdf.pages) - 1]
                width = lastpage.mediabox.width
                height = lastpage.mediabox.height
            else:
                raise PageSizeNotDefinedError
        page.__setitem__(
            NameObject(PG.MEDIABOX), RectangleObject((0, 0, width, height))  # type: ignore
        )

        return page

    def _get_ids_image(
        self,
        obj: Optional[DictionaryObject] = None,
        ancest: Optional[list[str]] = None,
        call_stack: Optional[list[Any]] = None,
    ) -> list[Union[str, list[str]]]:
        if call_stack is None:
            call_stack = []
        _i = getattr(obj, "indirect_reference", None)
        if _i in call_stack:
            return []
        call_stack.append(_i)
        if self.inline_images is None:
            self.inline_images = self._get_inline_images()
        if obj is None:
            obj = self
        if ancest is None:
            ancest = []
        lst: list[Union[str, list[str]]] = []
        if (
                PG.RESOURCES not in obj or
                is_null_or_none(resources := obj[PG.RESOURCES]) or
                RES.XOBJECT not in cast(DictionaryObject, resources)
        ):
            return [] if self.inline_images is None else list(self.inline_images.keys())

        x_object = resources[RES.XOBJECT].get_object()  # type: ignore
        for o in x_object:
            if not isinstance(x_object[o], StreamObject):
                continue
            if x_object[o][IA.SUBTYPE] == "/Image":
                lst.append(o if len(ancest) == 0 else [*ancest, o])
            else:  # is a form with possible images inside
                lst.extend(self._get_ids_image(x_object[o], [*ancest, o], call_stack))
        assert self.inline_images is not None
        lst.extend(list(self.inline_images.keys()))
        return lst

    def _get_image(
        self,
        id: Union[str, list[str], tuple[str]],
        obj: Optional[DictionaryObject] = None,
    ) -> ImageFile:
        if obj is None:
            obj = cast(DictionaryObject, self)
        if isinstance(id, tuple):
            id = list(id)
        if isinstance(id, list) and len(id) == 1:
            id = id[0]
        xobjs: Optional[DictionaryObject] = None
        try:
            xobjs = cast(
                DictionaryObject, cast(DictionaryObject, obj[PG.RESOURCES])[RES.XOBJECT]
            )
        except KeyError as exc:
            if not (id[0] == "~" and id[-1] == "~"):
                raise KeyError(
                    f"Cannot access image object {id} without XObject resources"
                ) from exc
        if isinstance(id, str):
            if id[0] == "~" and id[-1] == "~":
                if self.inline_images is None:
                    self.inline_images = self._get_inline_images()
                if self.inline_images is None:
                    raise KeyError("No inline image can be found")
                return self.inline_images[id]

            assert xobjs is not None
            from .generic._image_xobject import _xobj_to_image  # noqa: PLC0415
            imgd = _xobj_to_image(cast(DictionaryObject, xobjs[id]))
            extension, byte_stream = imgd[:2]
            return ImageFile(
                name=f"{id[1:]}{extension}",
                data=byte_stream,
                image=imgd[2],
                indirect_reference=xobjs[id].indirect_reference,
            )
        # in a subobject
        assert xobjs is not None
        ids = id[1:]
        return self._get_image(ids, cast(DictionaryObject, xobjs[id[0]]))

    @property
    def images(self) -> VirtualListImages:
        """
        Read-only property emulating a list of images on a page.

        Get a list of all images on the page. The key can be:
        - A string (for the top object)
        - A tuple (for images within XObject forms)
        - An integer

        Examples:
            * `reader.pages[0].images[0]`        # return first image
            * `reader.pages[0].images['/I0']`    # return image '/I0'
            * `reader.pages[0].images['/TP1','/Image1']` # return image '/Image1' within '/TP1' XObject form
            * `for img in reader.pages[0].images:` # loops through all objects

        images.keys() and images.items() can be used.

        The ImageFile has the following properties:

            * `.name` : name of the object
            * `.data` : bytes of the object
            * `.image` : PIL Image Object
            * `.indirect_reference` : object reference

        and the following methods:
            `.replace(new_image: PIL.Image.Image, **kwargs)` :
                replace the image in the pdf with the new image
                applying the saving parameters indicated (such as quality)

        Example usage:

            reader.pages[0].images[0].replace(Image.open("new_image.jpg"), quality=20)

        Inline images are extracted and named ~0~, ~1~, ..., with the
        indirect_reference set to None.

        """
        return VirtualListImages(self._get_ids_image, self._get_image)

    def _translate_value_inline_image(self, k: str, v: PdfObject) -> PdfObject:
        """Translate values used in inline image"""
        try:
            v = NameObject(_INLINE_IMAGE_VALUE_MAPPING[cast(str, v)])
        except (TypeError, KeyError):
            if isinstance(v, NameObject):
                # It is a custom name, thus we have to look in resources.
                # The only applicable case is for ColorSpace.
                try:
                    res = cast(DictionaryObject, self["/Resources"])["/ColorSpace"]
                    v = cast(DictionaryObject, res)[v]
                except KeyError:  # for res and v
                    raise PdfReadError(f"Cannot find resource entry {v} for {k}")
        return v

    def _get_inline_images(self) -> dict[str, ImageFile]:
        """Load inline images. Entries will be identified as `~1~`."""
        content = self.get_contents()
        if is_null_or_none(content):
            return {}
        imgs_data = []
        assert content is not None, "mypy"
        for param, ope in content.operations:
            if ope == b"INLINE IMAGE":
                imgs_data.append(
                    {"settings": param["settings"], "__streamdata__": param["data"]}
                )
            elif ope in (b"BI", b"EI", b"ID"):  # pragma: no cover
                raise PdfReadError(
                    f"{ope!r} operator met whereas not expected, "
                    "please share use case with pypdf dev team"
                )
        files = {}
        for num, ii in enumerate(imgs_data):
            init = {
                "__streamdata__": ii["__streamdata__"],
                "/Length": len(ii["__streamdata__"]),
            }
            for k, v in ii["settings"].items():
                if k in {"/Length", "/L"}:  # no length is expected
                    continue
                if isinstance(v, list):
                    v = ArrayObject(
                        [self._translate_value_inline_image(k, x) for x in v]
                    )
                else:
                    v = self._translate_value_inline_image(k, v)
                k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k])
                if k not in init:
                    init[k] = v
            ii["object"] = EncodedStreamObject.initialize_from_dictionary(init)
            from .generic._image_xobject import _xobj_to_image  # noqa: PLC0415
            extension, byte_stream, img = _xobj_to_image(ii["object"])
            files[f"~{num}~"] = ImageFile(
                name=f"~{num}~{extension}",
                data=byte_stream,
                image=img,
                indirect_reference=None,
            )
        return files

    @property
    def rotation(self) -> int:
        """
        The visual rotation of the page.

        This number has to be a multiple of 90 degrees: 0, 90, 180, or 270 are
        valid values. This property does not affect ``/Contents``.
        """
        rotate_obj = self.get(PG.ROTATE, 0)
        return rotate_obj if isinstance(rotate_obj, int) else rotate_obj.get_object()

    @rotation.setter
    def rotation(self, r: float) -> None:
        self[NameObject(PG.ROTATE)] = NumberObject((((int(r) + 45) // 90) * 90) % 360)

    def transfer_rotation_to_content(self) -> None:
        """
        Apply the rotation of the page to the content and the media/crop/...
        boxes.

        It is recommended to apply this function before page merging.
        """
        r = -self.rotation  # rotation to apply is in the otherway
        self.rotation = 0
        mb = RectangleObject(self.mediabox)
        trsf = (
            Transformation()
            .translate(
                -float(mb.left + mb.width / 2), -float(mb.bottom + mb.height / 2)
            )
            .rotate(r)
        )
        pt1 = trsf.apply_on(mb.lower_left)
        pt2 = trsf.apply_on(mb.upper_right)
        trsf = trsf.translate(-min(pt1[0], pt2[0]), -min(pt1[1], pt2[1]))
        self.add_transformation(trsf, False)
        for b in ["/MediaBox", "/CropBox", "/BleedBox", "/TrimBox", "/ArtBox"]:
            if b in self:
                rr = RectangleObject(self[b])  # type: ignore
                pt1 = trsf.apply_on(rr.lower_left)
                pt2 = trsf.apply_on(rr.upper_right)
                self[NameObject(b)] = RectangleObject(
                    (
                        min(pt1[0], pt2[0]),
                        min(pt1[1], pt2[1]),
                        max(pt1[0], pt2[0]),
                        max(pt1[1], pt2[1]),
                    )
                )

    def rotate(self, angle: int) -> "PageObject":
        """
        Rotate a page clockwise by increments of 90 degrees.

        Args:
            angle: Angle to rotate the page. Must be an increment of 90 deg.

        Returns:
            The rotated PageObject

        """
        if angle % 90 != 0:
            raise ValueError("Rotation angle must be a multiple of 90")
        self[NameObject(PG.ROTATE)] = NumberObject(self.rotation + angle)
        return self

    def _merge_resources(
        self,
        res1: DictionaryObject,
        res2: DictionaryObject,
        resource: Any,
        new_res1: bool = True,
    ) -> tuple[dict[str, Any], dict[str, Any]]:
        try:
            assert isinstance(self.indirect_reference, IndirectObject)
            pdf = self.indirect_reference.pdf
            is_pdf_writer = hasattr(
                pdf, "_add_object"
            )  # expect isinstance(pdf, PdfWriter)
        except (AssertionError, AttributeError):
            pdf = None
            is_pdf_writer = False

        def compute_unique_key(base_key: str) -> tuple[str, bool]:
            """
            Find a key that either doesn't already exist or has the same value
            (indicated by the bool)

            Args:
                base_key: An index is added to this to get the computed key

            Returns:
                A tuple (computed key, bool) where the boolean indicates
                if there is a resource of the given computed_key with the same
                value.

            """
            value = page2res.raw_get(base_key)
            # TODO: a possible improvement for writer, the indirect_reference
            # cannot be found because translated

            # try the current key first (e.g. "foo"), but otherwise iterate
            # through "foo-0", "foo-1", etc. new_res can contain only finitely
            # many keys, thus this'll eventually end, even if it's been crafted
            # to be maximally annoying.
            computed_key = base_key
            idx = 0
            while computed_key in new_res:
                if new_res.raw_get(computed_key) == value:
                    # there's already a resource of this name, with the exact
                    # same value
                    return computed_key, True
                computed_key = f"{base_key}-{idx}"
                idx += 1
            return computed_key, False

        if new_res1:
            new_res = DictionaryObject()
            new_res.update(res1.get(resource, DictionaryObject()).get_object())
        else:
            new_res = cast(DictionaryObject, res1[resource])
        page2res = cast(
            DictionaryObject, res2.get(resource, DictionaryObject()).get_object()
        )
        rename_res = {}
        for key in page2res:
            unique_key, same_value = compute_unique_key(key)
            newname = NameObject(unique_key)
            if key != unique_key:
                # we have to use a different name for this
                rename_res[key] = newname

            if not same_value:
                if is_pdf_writer:
                    new_res[newname] = page2res.raw_get(key).clone(pdf)
                    try:
                        new_res[newname] = new_res[newname].indirect_reference
                    except AttributeError:
                        pass
                else:
                    new_res[newname] = page2res.raw_get(key)
            lst = sorted(new_res.items())
            new_res.clear()
            for el in lst:
                new_res[el[0]] = el[1]
        return new_res, rename_res

    @staticmethod
    def _content_stream_rename(
        stream: ContentStream,
        rename: dict[Any, Any],
        pdf: Optional[PdfCommonDocProtocol],
    ) -> ContentStream:
        if not rename:
            return stream
        stream = ContentStream(stream, pdf)
        for operands, _operator in stream.operations:
            if isinstance(operands, list):
                for i, op in enumerate(operands):
                    if isinstance(op, NameObject):
                        operands[i] = rename.get(op, op)
            elif isinstance(operands, dict):
                for i, op in operands.items():
                    if isinstance(op, NameObject):
                        operands[i] = rename.get(op, op)
            else:
                raise KeyError(f"Type of operands is {type(operands)}")
        return stream

    @staticmethod
    def _add_transformation_matrix(
        contents: Any,
        pdf: Optional[PdfCommonDocProtocol],
        ctm: CompressedTransformationMatrix,
    ) -> ContentStream:
        """Add transformation matrix at the beginning of the given contents stream."""
        contents = ContentStream(contents, pdf)
        contents.operations.insert(
            0,
            [
                [FloatObject(x) for x in ctm],
                b"cm",
            ],
        )
        return contents

    def _get_contents_as_bytes(self) -> Optional[bytes]:
        """
        Return the page contents as bytes.

        Returns:
            The ``/Contents`` object as bytes, or ``None`` if it doesn't exist.

        """
        if PG.CONTENTS in self:
            obj = self[PG.CONTENTS].get_object()
            if isinstance(obj, list):
                return b"".join(x.get_object().get_data() for x in obj)
            return cast(EncodedStreamObject, obj).get_data()
        return None

    def get_contents(self) -> Optional[ContentStream]:
        """
        Access the page contents.

        Returns:
            The ``/Contents`` object, or ``None`` if it does not exist.
            ``/Contents`` is optional, as described in §7.7.3.3 of the PDF Reference.

        """
        if PG.CONTENTS in self:
            try:
                pdf = cast(IndirectObject, self.indirect_reference).pdf
            except AttributeError:
                pdf = None
            obj = self[PG.CONTENTS]
            if is_null_or_none(obj):
                return None
            resolved_object = obj.get_object()
            return ContentStream(resolved_object, pdf)
        return None

    def replace_contents(
        self, content: Union[None, ContentStream, EncodedStreamObject, ArrayObject]
    ) -> None:
        """
        Replace the page contents with the new content and nullify old objects
        Args:
            content: new content; if None delete the content field.
        """
        if not hasattr(self, "indirect_reference") or self.indirect_reference is None:
            # the page is not attached : the content is directly attached.
            self[NameObject(PG.CONTENTS)] = content
            return

        from pypdf._writer import PdfWriter  # noqa: PLC0415
        if not isinstance(self.indirect_reference.pdf, PdfWriter):
            deprecate(
                "Calling `PageObject.replace_contents()` for pages not assigned to a writer is deprecated "
                "and will be removed in pypdf 7.0.0. Attach the page to the writer first or use "
                "`PdfWriter(clone_from=...)` directly. The existing approach has proved being unreliable."
            )

        writer = self.indirect_reference.pdf
        if isinstance(self.get(PG.CONTENTS, None), ArrayObject):
            content_array = cast(ArrayObject, self[PG.CONTENTS])
            for reference in content_array:
                try:
                    writer._replace_object(indirect_reference=reference.indirect_reference, obj=NullObject())
                except ValueError:
                    # Occurs when called on PdfReader.
                    pass

        if isinstance(content, ArrayObject):
            content = ArrayObject(writer._add_object(obj) for obj in content)

        if is_null_or_none(content):
            if PG.CONTENTS not in self:
                return
            assert self[PG.CONTENTS].indirect_reference is not None
            writer._replace_object(indirect_reference=self[PG.CONTENTS].indirect_reference, obj=NullObject())
            del self[PG.CONTENTS]
        elif not hasattr(self.get(PG.CONTENTS, None), "indirect_reference"):
            try:
                self[NameObject(PG.CONTENTS)] = writer._add_object(content)
            except AttributeError:
                # applies at least for page not in writer
                # as a backup solution, we put content as an object although not in accordance with pdf ref
                # this will be fixed with the _add_object
                self[NameObject(PG.CONTENTS)] = content
        else:
            assert content is not None, "mypy"
            content.indirect_reference = self[
                PG.CONTENTS
            ].indirect_reference  # TODO: in the future may require generation management
            try:
                writer._replace_object(indirect_reference=content.indirect_reference, obj=content)
            except AttributeError:
                # applies at least for page not in writer
                # as a backup solution, we put content as an object although not in accordance with pdf ref
                # this will be fixed with the _add_object
                self[NameObject(PG.CONTENTS)] = content
        # forces recalculation of inline_images
        self.inline_images = None

    def merge_page(
        self, page2: "PageObject", expand: bool = False, over: bool = True
    ) -> None:
        """
        Merge the content streams of two pages into one.

        Resource references (e.g. fonts) are maintained from both pages.
        The mediabox, cropbox, etc of this page are not altered.
        The parameter page's content stream will
        be added to the end of this page's content stream,
        meaning that it will be drawn after, or "on top" of this page.

        Args:
            page2: The page to be merged into this one. Should be
                an instance of :class:`PageObject<PageObject>`.
            over: set the page2 content over page1 if True (default) else under
            expand: If True, the current page dimensions will be
                expanded to accommodate the dimensions of the page to be merged.

        """
        self._merge_page(page2, over=over, expand=expand)

    def _merge_page(
        self,
        page2: "PageObject",
        page2transformation: Optional[Callable[[Any], ContentStream]] = None,
        ctm: Optional[CompressedTransformationMatrix] = None,
        over: bool = True,
        expand: bool = False,
    ) -> None:
        # First we work on merging the resource dictionaries. This allows us
        # to find out what symbols in the content streams we might need to
        # rename.
        try:
            assert isinstance(self.indirect_reference, IndirectObject)
            if hasattr(
                self.indirect_reference.pdf, "_add_object"
            ):  # to detect PdfWriter
                return self._merge_page_writer(
                    page2, page2transformation, ctm, over, expand
                )
        except (AssertionError, AttributeError):
            pass

        new_resources = DictionaryObject()
        rename = {}
        original_resources = cast(DictionaryObject, self.get(PG.RESOURCES, DictionaryObject()).get_object())
        page2resources = cast(DictionaryObject, page2.get(PG.RESOURCES, DictionaryObject()).get_object())
        new_annots = ArrayObject()

        for page in (self, page2):
            if PG.ANNOTS in page:
                annots = page[PG.ANNOTS]
                if isinstance(annots, ArrayObject):
                    new_annots.extend(annots)

        for res in (
            RES.EXT_G_STATE,
            RES.FONT,
            RES.XOBJECT,
            RES.COLOR_SPACE,
            RES.PATTERN,
            RES.SHADING,
            RES.PROPERTIES,
        ):
            new, newrename = self._merge_resources(
                original_resources, page2resources, res
            )
            if new:
                new_resources[NameObject(res)] = new
                rename.update(newrename)

        # Combine /ProcSet sets, making sure there's a consistent order
        new_resources[NameObject(RES.PROC_SET)] = ArrayObject(
            sorted(
                set(
                    original_resources.get(RES.PROC_SET, ArrayObject()).get_object()
                ).union(
                    set(page2resources.get(RES.PROC_SET, ArrayObject()).get_object())
                )
            )
        )

        new_content_array = ArrayObject()
        original_content = self.get_contents()
        if original_content is not None:
            original_content.isolate_graphics_state()
            new_content_array.append(original_content)

        page2content = page2.get_contents()
        if page2content is not None:
            rect = getattr(page2, MERGE_CROP_BOX)
            page2content.operations.insert(
                0,
                (
                    map(
                        FloatObject,
                        [
                            rect.left,
                            rect.bottom,
                            rect.width,
                            rect.height,
                        ],
                    ),
                    b"re",
                ),
            )
            page2content.operations.insert(1, ([], b"W"))
            page2content.operations.insert(2, ([], b"n"))
            if page2transformation is not None:
                page2content = page2transformation(page2content)
            page2content = PageObject._content_stream_rename(
                page2content, rename, self.pdf
            )
            page2content.isolate_graphics_state()
            if over:
                new_content_array.append(page2content)
            else:
                new_content_array.insert(0, page2content)

        # if expanding the page to fit a new page, calculate the new media box size
        if expand:
            self._expand_mediabox(page2, ctm)

        self.replace_contents(ContentStream(new_content_array, self.pdf))
        self[NameObject(PG.RESOURCES)] = new_resources
        self[NameObject(PG.ANNOTS)] = new_annots
        return None

    def _merge_page_writer(
        self,
        page2: "PageObject",
        page2transformation: Optional[Callable[[Any], ContentStream]] = None,
        ctm: Optional[CompressedTransformationMatrix] = None,
        over: bool = True,
        expand: bool = False,
    ) -> None:
        # First we work on merging the resource dictionaries. This allows us
        # to find which symbols in the content streams we might need to
        # rename.
        assert isinstance(self.indirect_reference, IndirectObject)
        pdf = self.indirect_reference.pdf

        rename = {}
        if PG.RESOURCES not in self:
            self[NameObject(PG.RESOURCES)] = DictionaryObject()
        original_resources = cast(DictionaryObject, self[PG.RESOURCES].get_object())
        if PG.RESOURCES not in page2:
            page2resources = DictionaryObject()
        else:
            page2resources = cast(DictionaryObject, page2[PG.RESOURCES].get_object())

        for res in (
            RES.EXT_G_STATE,
            RES.FONT,
            RES.XOBJECT,
            RES.COLOR_SPACE,
            RES.PATTERN,
            RES.SHADING,
            RES.PROPERTIES,
        ):
            if res in page2resources:
                if res not in original_resources:
                    original_resources[NameObject(res)] = DictionaryObject()
                _, newrename = self._merge_resources(
                    original_resources, page2resources, res, False
                )
                rename.update(newrename)
        # Combine /ProcSet sets.
        if RES.PROC_SET in page2resources:
            if RES.PROC_SET not in original_resources:
                original_resources[NameObject(RES.PROC_SET)] = ArrayObject()
            arr = cast(ArrayObject, original_resources[RES.PROC_SET])
            for x in cast(ArrayObject, page2resources[RES.PROC_SET]):
                if x not in arr:
                    arr.append(x)
            arr.sort()

        if PG.ANNOTS in page2:
            if PG.ANNOTS not in self:
                self[NameObject(PG.ANNOTS)] = ArrayObject()
            annots = cast(ArrayObject, self[PG.ANNOTS].get_object())
            if ctm is None:
                trsf = Transformation()
            else:
                trsf = Transformation(ctm)
            # Ensure we are working on a copy of the list. Otherwise, if both pages
            # are the same object, we might run into an infinite loop.
            for a in cast(ArrayObject, deepcopy(page2[PG.ANNOTS])):
                a = a.get_object()
                aa = a.clone(
                    pdf,
                    ignore_fields=("/P", "/StructParent", "/Parent"),
                    force_duplicate=True,
                )
                r = cast(ArrayObject, a["/Rect"])
                pt1 = trsf.apply_on((r[0], r[1]), True)
                pt2 = trsf.apply_on((r[2], r[3]), True)
                aa[NameObject("/Rect")] = ArrayObject(
                    (
                        min(pt1[0], pt2[0]),
                        min(pt1[1], pt2[1]),
                        max(pt1[0], pt2[0]),
                        max(pt1[1], pt2[1]),
                    )
                )
                if "/QuadPoints" in a:
                    q = cast(ArrayObject, a["/QuadPoints"])
                    aa[NameObject("/QuadPoints")] = ArrayObject(
                        trsf.apply_on((q[0], q[1]), True)
                        + trsf.apply_on((q[2], q[3]), True)
                        + trsf.apply_on((q[4], q[5]), True)
                        + trsf.apply_on((q[6], q[7]), True)
                    )
                try:
                    aa["/Popup"][NameObject("/Parent")] = aa.indirect_reference
                except KeyError:
                    pass
                try:
                    aa[NameObject("/P")] = self.indirect_reference
                    annots.append(aa.indirect_reference)
                except AttributeError:
                    pass

        new_content_array = ArrayObject()
        original_content = self.get_contents()
        if original_content is not None:
            original_content.isolate_graphics_state()
            new_content_array.append(original_content)

        page2content = page2.get_contents()
        if page2content is not None:
            rect = getattr(page2, MERGE_CROP_BOX)
            page2content.operations.insert(
                0,
                (
                    map(
                        FloatObject,
                        [
                            rect.left,
                            rect.bottom,
                            rect.width,
                            rect.height,
                        ],
                    ),
                    b"re",
                ),
            )
            page2content.operations.insert(1, ([], b"W"))
            page2content.operations.insert(2, ([], b"n"))
            if page2transformation is not None:
                page2content = page2transformation(page2content)
            page2content = PageObject._content_stream_rename(
                page2content, rename, self.pdf
            )
            page2content.isolate_graphics_state()
            if over:
                new_content_array.append(page2content)
            else:
                new_content_array.insert(0, page2content)

        # if expanding the page to fit a new page, calculate the new media box size
        if expand:
            self._expand_mediabox(page2, ctm)

        self.replace_contents(new_content_array)

    def _expand_mediabox(
        self, page2: "PageObject", ctm: Optional[CompressedTransformationMatrix]
    ) -> None:
        corners1 = (
            self.mediabox.left.as_numeric(),
            self.mediabox.bottom.as_numeric(),
            self.mediabox.right.as_numeric(),
            self.mediabox.top.as_numeric(),
        )
        corners2 = (
            page2.mediabox.left.as_numeric(),
            page2.mediabox.bottom.as_numeric(),
            page2.mediabox.left.as_numeric(),
            page2.mediabox.top.as_numeric(),
            page2.mediabox.right.as_numeric(),
            page2.mediabox.top.as_numeric(),
            page2.mediabox.right.as_numeric(),
            page2.mediabox.bottom.as_numeric(),
        )
        if ctm is not None:
            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
            new_x = tuple(
                ctm[0] * corners2[i] + ctm[2] * corners2[i + 1] + ctm[4]
                for i in range(0, 8, 2)
            )
            new_y = tuple(
                ctm[1] * corners2[i] + ctm[3] * corners2[i + 1] + ctm[5]
                for i in range(0, 8, 2)
            )
        else:
            new_x = corners2[0:8:2]
            new_y = corners2[1:8:2]
        lowerleft = (min(new_x), min(new_y))
        upperright = (max(new_x), max(new_y))
        lowerleft = (min(corners1[0], lowerleft[0]), min(corners1[1], lowerleft[1]))
        upperright = (
            max(corners1[2], upperright[0]),
            max(corners1[3], upperright[1]),
        )

        self.mediabox.lower_left = lowerleft
        self.mediabox.upper_right = upperright

    def merge_transformed_page(
        self,
        page2: "PageObject",
        ctm: Union[CompressedTransformationMatrix, Transformation],
        over: bool = True,
        expand: bool = False,
    ) -> None:
        """
        Similar to :meth:`~pypdf._page.PageObject.merge_page`, but a transformation
        matrix is applied to the merged stream.

        Args:
          page2: The page to be merged into this one.
          ctm: a 6-element tuple containing the operands of the
                 transformation matrix
          over: set the page2 content over page1 if True (default) else under
          expand: Whether the page should be expanded to fit the dimensions
            of the page to be merged.

        """
        if isinstance(ctm, Transformation):
            ctm = ctm.ctm
        self._merge_page(
            page2,
            lambda page2_content: PageObject._add_transformation_matrix(
                page2_content, page2.pdf, ctm
            ),
            ctm,
            over,
            expand,
        )

    def merge_scaled_page(
        self, page2: "PageObject", scale: float, over: bool = True, expand: bool = False
    ) -> None:
        """
        Similar to :meth:`~pypdf._page.PageObject.merge_page`, but the stream to be merged
        is scaled by applying a transformation matrix.

        Args:
          page2: The page to be merged into this one.
          scale: The scaling factor
          over: set the page2 content over page1 if True (default) else under
          expand: Whether the page should be expanded to fit the
            dimensions of the page to be merged.

        """
        op = Transformation().scale(scale, scale)
        self.merge_transformed_page(page2, op, over, expand)

    def merge_rotated_page(
        self,
        page2: "PageObject",
        rotation: float,
        over: bool = True,
        expand: bool = False,
    ) -> None:
        """
        Similar to :meth:`~pypdf._page.PageObject.merge_page`, but the stream to be merged
        is rotated by applying a transformation matrix.

        Args:
          page2: The page to be merged into this one.
          rotation: The angle of the rotation, in degrees
          over: set the page2 content over page1 if True (default) else under
          expand: Whether the page should be expanded to fit the
            dimensions of the page to be merged.

        """
        op = Transformation().rotate(rotation)
        self.merge_transformed_page(page2, op, over, expand)

    def merge_translated_page(
        self,
        page2: "PageObject",
        tx: float,
        ty: float,
        over: bool = True,
        expand: bool = False,
    ) -> None:
        """
        Similar to :meth:`~pypdf._page.PageObject.merge_page`, but the stream to be
        merged is translated by applying a transformation matrix.

        Args:
          page2: the page to be merged into this one.
          tx: The translation on X axis
          ty: The translation on Y axis
          over: set the page2 content over page1 if True (default) else under
          expand: Whether the page should be expanded to fit the
            dimensions of the page to be merged.

        """
        op = Transformation().translate(tx, ty)
        self.merge_transformed_page(page2, op, over, expand)

    def add_transformation(
        self,
        ctm: Union[Transformation, CompressedTransformationMatrix],
        expand: bool = False,
    ) -> None:
        """
        Apply a transformation matrix to the page.

        Args:
            ctm: A 6-element tuple containing the operands of the
                transformation matrix. Alternatively, a
                :py:class:`Transformation<pypdf.Transformation>`
                object can be passed.

        See :doc:`/user/cropping-and-transforming`.

        """
        if isinstance(ctm, Transformation):
            ctm = ctm.ctm
        content = self.get_contents()
        if content is not None:
            content = PageObject._add_transformation_matrix(content, self.pdf, ctm)
            content.isolate_graphics_state()
            self.replace_contents(content)
        # if expanding the page to fit a new page, calculate the new media box size
        if expand:
            corners = [
                self.mediabox.left.as_numeric(),
                self.mediabox.bottom.as_numeric(),
                self.mediabox.left.as_numeric(),
                self.mediabox.top.as_numeric(),
                self.mediabox.right.as_numeric(),
                self.mediabox.top.as_numeric(),
                self.mediabox.right.as_numeric(),
                self.mediabox.bottom.as_numeric(),
            ]

            ctm = tuple(float(x) for x in ctm)  # type: ignore[assignment]
            new_x = [
                ctm[0] * corners[i] + ctm[2] * corners[i + 1] + ctm[4]
                for i in range(0, 8, 2)
            ]
            new_y = [
                ctm[1] * corners[i] + ctm[3] * corners[i + 1] + ctm[5]
                for i in range(0, 8, 2)
            ]

            self.mediabox.lower_left = (min(new_x), min(new_y))
            self.mediabox.upper_right = (max(new_x), max(new_y))

    def scale(self, sx: float, sy: float) -> None:
        """
        Scale a page by the given factors by applying a transformation matrix
        to its content and updating the page size.

        This updates the various page boundaries (bleedbox, trimbox, etc.)
        and the contents of the page.

        Args:
            sx: The scaling factor on horizontal axis.
            sy: The scaling factor on vertical axis.

        """
        self.add_transformation((sx, 0, 0, sy, 0, 0))
        self.bleedbox = self.bleedbox.scale(sx, sy)
        self.trimbox = self.trimbox.scale(sx, sy)
        self.artbox = self.artbox.scale(sx, sy)
        self.cropbox = self.cropbox.scale(sx, sy)
        self.mediabox = self.mediabox.scale(sx, sy)

        if PG.ANNOTS in self:
            annotations = self[PG.ANNOTS]
            if isinstance(annotations, ArrayObject):
                for annotation in annotations:
                    annotation_obj = annotation.get_object()
                    if ADA.Rect in annotation_obj:
                        rectangle = annotation_obj[ADA.Rect]
                        if isinstance(rectangle, ArrayObject):
                            rectangle[0] = FloatObject(float(rectangle[0]) * sx)
                            rectangle[1] = FloatObject(float(rectangle[1]) * sy)
                            rectangle[2] = FloatObject(float(rectangle[2]) * sx)
                            rectangle[3] = FloatObject(float(rectangle[3]) * sy)

        if PG.VP in self:
            viewport = self[PG.VP]
            if isinstance(viewport, ArrayObject):
                bbox = viewport[0]["/BBox"]
            else:
                bbox = viewport["/BBox"]  # type: ignore
            scaled_bbox = RectangleObject(
                (
                    float(bbox[0]) * sx,
                    float(bbox[1]) * sy,
                    float(bbox[2]) * sx,
                    float(bbox[3]) * sy,
                )
            )
            if isinstance(viewport, ArrayObject):
                self[NameObject(PG.VP)][NumberObject(0)][  # type: ignore
                    NameObject("/BBox")
                ] = scaled_bbox
            else:
                self[NameObject(PG.VP)][NameObject("/BBox")] = scaled_bbox  # type: ignore

    def scale_by(self, factor: float) -> None:
        """
        Scale a page by the given factor by applying a transformation matrix to
        its content and updating the page size.

        Args:
            factor: The scaling factor (for both X and Y axis).

        """
        self.scale(factor, factor)

    def scale_to(self, width: float, height: float) -> None:
        """
        Scale a page to the specified dimensions by applying a transformation
        matrix to its content and updating the page size.

        Args:
            width: The new width.
            height: The new height.

        """
        sx = width / float(self.mediabox.width)
        sy = height / float(self.mediabox.height)
        self.scale(sx, sy)

    def compress_content_streams(self, level: int = -1) -> None:
        """
        Compress the size of this page by joining all content streams and
        applying a FlateDecode filter.

        However, it is possible that this function will perform no action if
        content stream compression becomes "automatic".
        """
        content = self.get_contents()
        if content is not None:
            content_obj = content.flate_encode(level)
            try:
                content.indirect_reference.pdf._objects[  # type: ignore
                    content.indirect_reference.idnum - 1  # type: ignore
                ] = content_obj
            except AttributeError:
                if self.indirect_reference is not None and hasattr(
                    self.indirect_reference.pdf, "_add_object"
                ):
                    self.replace_contents(content_obj)
                else:
                    raise ValueError("Page must be part of a PdfWriter")

    @property
    def page_number(self) -> Optional[int]:
        """
        Read-only property which returns the page number within the PDF file.

        Returns:
            Page number; None if the page is not attached to a PDF.

        """
        if self.indirect_reference is None:
            return None
        try:
            lst = self.indirect_reference.pdf.pages
            return lst.index(self)
        except ValueError:
            return None

    def _debug_for_extract(self) -> str:  # pragma: no cover
        out = ""
        for ope, op in ContentStream(
            self["/Contents"].get_object(), self.pdf, "bytes"
        ).operations:
            if op == b"TJ":
                s = [x for x in ope[0] if isinstance(x, str)]
            else:
                s = []
            out += op.decode("utf-8") + " " + "".join(s) + ope.__repr__() + "\n"
        out += "\n=============================\n"
        try:
            for fo in self[PG.RESOURCES]["/Font"]:  # type:ignore
                out += fo + "\n"
                out += self[PG.RESOURCES]["/Font"][fo].__repr__() + "\n"  # type:ignore
                try:
                    enc_repr = self[PG.RESOURCES]["/Font"][fo][  # type:ignore
                        "/Encoding"
                    ].__repr__()
                    out += enc_repr + "\n"
                except Exception:
                    pass
                try:
                    out += (
                        self[PG.RESOURCES]["/Font"][fo][  # type:ignore
                            "/ToUnicode"
                        ]
                        .get_data()
                        .decode()
                        + "\n"
                    )
                except Exception:
                    pass

        except KeyError:
            out += "No Font\n"
        return out

    def _extract_text(
        self,
        obj: Any,
        pdf: Any,
        orientations: tuple[int, ...] = (0, 90, 180, 270),
        space_width: float = 200.0,
        content_key: Optional[str] = PG.CONTENTS,
        visitor_operand_before: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_operand_after: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
    ) -> str:
        """
        See extract_text for most arguments.

        Args:
            content_key: indicate the default key where to extract data
                None = the object; this allows reusing the function on an XObject
                default = "/Content"

        """
        extractor = TextExtraction()
        font_resources: dict[str, DictionaryObject] = {}
        fonts: dict[str, Font] = {}

        try:
            objr = obj
            while NameObject(PG.RESOURCES) not in objr:
                # /Resources can be inherited so we look to parents
                objr = objr["/Parent"].get_object()
                # If no parents then no /Resources will be available,
                # so an exception will be raised
            resources_dict = cast(DictionaryObject, objr[PG.RESOURCES])
        except Exception:
            # No resources means no text is possible (no font); we consider the
            # file as not damaged, no need to check for TJ or Tj
            return ""

        if (
            not is_null_or_none(resources_dict)
            and "/Font" in resources_dict
            and (font_resources_dict := cast(DictionaryObject, resources_dict["/Font"]))
        ):
            for font_resource in font_resources_dict:
                try:
                    font_resource_object = cast(DictionaryObject, font_resources_dict[font_resource].get_object())
                    font_resources[font_resource] = font_resource_object
                    fonts[font_resource] = Font.from_font_resource(font_resource_object)
                    # Override space width, if applicable
                    if fonts[font_resource].character_widths.get(" ", 0) == 0:
                        fonts[font_resource].space_width = space_width
                except (AttributeError, TypeError):
                    pass

        try:
            content = (
                obj[content_key].get_object() if isinstance(content_key, str) else obj
            )
            if not isinstance(content, ContentStream):
                content = ContentStream(content, pdf, "bytes")
        except (AttributeError, KeyError):  # no content can be extracted (certainly empty page)
            return ""
        # We check all strings are TextStringObjects. ByteStringObjects
        # are strings where the byte->string encoding was unknown, so adding
        # them to the text here would be gibberish.

        # Initialize the extractor with the necessary parameters
        extractor.initialize_extraction(orientations, visitor_text, font_resources, fonts)

        for operands, operator in content.operations:
            if visitor_operand_before is not None:
                visitor_operand_before(operator, operands, extractor.cm_matrix, extractor.tm_matrix)
            # Multiple operators are handled here
            if operator == b"'":
                extractor.process_operation(b"T*", [])
                extractor.process_operation(b"Tj", operands)
            elif operator == b'"':
                extractor.process_operation(b"Tw", [operands[0]])
                extractor.process_operation(b"Tc", [operands[1]])
                extractor.process_operation(b"T*", [])
                extractor.process_operation(b"Tj", operands[2:])
            elif operator == b"TJ":
                # The space width may be smaller than the font width, so the width should be 95%.
                _confirm_space_width = extractor._space_width * 0.95
                if operands:
                    for op in operands[0]:
                        if isinstance(op, (str, bytes)):
                            extractor.process_operation(b"Tj", [op])
                        if isinstance(op, (int, float, NumberObject, FloatObject)) and (
                            abs(float(op)) >= _confirm_space_width
                            and extractor.text
                            and extractor.text[-1] != " "
                        ):
                            extractor.process_operation(b"Tj", [" "])
            elif operator == b"TD":
                extractor.process_operation(b"TL", [-operands[1]])
                extractor.process_operation(b"Td", operands)
            elif operator == b"Do":
                extractor.output += extractor.text
                if visitor_text is not None:
                    visitor_text(
                        extractor.text,
                        extractor.memo_cm,
                        extractor.memo_tm,
                        extractor.font_resource,
                        extractor.font_size,
                    )
                try:
                    if extractor.output[-1] != "\n":
                        extractor.output += "\n"
                        if visitor_text is not None:
                            visitor_text(
                                "\n",
                                extractor.memo_cm,
                                extractor.memo_tm,
                                extractor.font_resource,
                                extractor.font_size,
                            )
                except IndexError:
                    pass
                try:
                    xobj = resources_dict["/XObject"]
                    if xobj[operands[0]]["/Subtype"] != "/Image":  # type: ignore
                        text = self.extract_xform_text(
                            xobj[operands[0]],  # type: ignore
                            orientations,
                            space_width,
                            visitor_operand_before,
                            visitor_operand_after,
                            visitor_text,
                        )
                        extractor.output += text
                        if visitor_text is not None:
                            visitor_text(
                                text,
                                extractor.memo_cm,
                                extractor.memo_tm,
                                extractor.font_resource,
                                extractor.font_size,
                            )
                except Exception as exception:
                    logger_warning(
                        f"Impossible to decode XFormObject {operands[0]}: {exception}",
                        __name__,
                    )
                finally:
                    extractor.text = ""
                    extractor.memo_cm = extractor.cm_matrix.copy()
                    extractor.memo_tm = extractor.tm_matrix.copy()
            else:
                extractor.process_operation(operator, operands)
            if visitor_operand_after is not None:
                visitor_operand_after(operator, operands, extractor.cm_matrix, extractor.tm_matrix)
        extractor.output += extractor.text  # just in case
        if extractor.text != "" and visitor_text is not None:
            visitor_text(
                extractor.text,
                extractor.memo_cm,
                extractor.memo_tm,
                extractor.font_resource,
                extractor.font_size,
            )
        return extractor.output

    def _layout_mode_fonts(self) -> dict[str, Font]:
        """
        Get fonts formatted for "layout" mode text extraction.

        Returns:
            Dict[str, Font]: dictionary of Font instances keyed by font name

        """
        # Font retrieval logic adapted from pypdf.PageObject._extract_text()
        objr: Any = self
        fonts: dict[str, Font] = {}
        while objr is not None:
            try:
                resources_dict: Any = objr[PG.RESOURCES]
            except KeyError:
                resources_dict = {}
            if "/Font" in resources_dict and self.pdf is not None:
                for font_name in resources_dict["/Font"]:
                    fonts[font_name] = Font.from_font_resource(resources_dict["/Font"][font_name])
            try:
                objr = objr["/Parent"].get_object()
            except KeyError:
                objr = None

        return fonts

    def _layout_mode_text(
        self,
        space_vertically: bool = True,
        scale_weight: float = 1.25,
        strip_rotated: bool = True,
        debug_path: Optional[Path] = None,
        font_height_weight: float = 1,
    ) -> str:
        """
        Get text preserving fidelity to source PDF text layout.

        Args:
            space_vertically: include blank lines inferred from y distance + font
                height. Defaults to True.
            scale_weight: multiplier for string length when calculating weighted
                average character width. Defaults to 1.25.
            strip_rotated: Removes text that is rotated w.r.t. to the page from
                layout mode output. Defaults to True.
            debug_path (Path | None): if supplied, must target a directory.
                creates the following files with debug information for layout mode
                functions if supplied:
                  - fonts.json: output of self._layout_mode_fonts
                  - tjs.json: individual text render ops with corresponding transform matrices
                  - bts.json: text render ops left justified and grouped by BT/ET operators
                  - bt_groups.json: BT/ET operations grouped by rendered y-coord (aka lines)
                Defaults to None.
            font_height_weight: multiplier for font height when calculating
                blank lines. Defaults to 1.

        Returns:
            str: multiline string containing page text in a fixed width format that
                closely adheres to the rendered layout in the source pdf.

        """
        fonts = self._layout_mode_fonts()
        if debug_path:  # pragma: no cover
            import json  # noqa: PLC0415

            debug_path.joinpath("fonts.json").write_text(
                json.dumps(fonts, indent=2, default=asdict),
                "utf-8"
            )

        ops = iter(
            ContentStream(self["/Contents"].get_object(), self.pdf, "bytes").operations
        )
        bt_groups = _layout_mode.text_show_operations(
            ops, fonts, strip_rotated, debug_path
        )

        if not bt_groups:
            return ""

        ty_groups = _layout_mode.y_coordinate_groups(bt_groups, debug_path)

        char_width = _layout_mode.fixed_char_width(bt_groups, scale_weight)

        return _layout_mode.fixed_width_page(ty_groups, char_width, space_vertically, font_height_weight)

    def extract_text(
        self,
        *args: Any,
        orientations: Union[int, tuple[int, ...]] = (0, 90, 180, 270),
        space_width: float = 200.0,
        visitor_operand_before: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_operand_after: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
        extraction_mode: Literal["plain", "layout"] = "plain",
        **kwargs: Any,
    ) -> str:
        """
        Locate all text drawing commands, in the order they are provided in the
        content stream, and extract the text.

        This works well for some PDF files, but poorly for others, depending on
        the generator used. This will be refined in the future.

        Do not rely on the order of text coming out of this function, as it
        will change if this function is made more sophisticated.

        Arabic and Hebrew are extracted in the correct order.
        If required a custom RTL range of characters can be defined;
        see function set_custom_rtl.

        Additionally you can provide visitor methods to get informed on all
        operations and all text objects.
        For example in some PDF files this can be useful to parse tables.

        Args:
            orientations: list of orientations extract_text will look for
                default = (0, 90, 180, 270)
                note: currently only 0 (up),90 (turned left), 180 (upside down),
                270 (turned right)
                Silently ignored in "layout" mode.
            space_width: force default space width
                if not extracted from font (default: 200)
                Silently ignored in "layout" mode.
            visitor_operand_before: function to be called before processing an operation.
                It has four arguments: operator, operand-arguments,
                current transformation matrix and text matrix.
                Ignored with a warning in "layout" mode.
            visitor_operand_after: function to be called after processing an operation.
                It has four arguments: operator, operand-arguments,
                current transformation matrix and text matrix.
                Ignored with a warning in "layout" mode.
            visitor_text: function to be called when extracting some text at some position.
                It has five arguments: text, current transformation matrix,
                text matrix, font-dictionary and font-size.
                The font-dictionary may be None in case of unknown fonts.
                If not None it may e.g. contain key "/BaseFont" with value "/Arial,Bold".
                Ignored with a warning in "layout" mode.
            extraction_mode (Literal["plain", "layout"]): "plain" for legacy functionality,
                "layout" for experimental layout mode functionality.
                NOTE: orientations, space_width, and visitor_* parameters are NOT respected
                in "layout" mode.

        kwargs:
            layout_mode_space_vertically (bool): include blank lines inferred from
                y distance + font height. Defaults to True.
            layout_mode_scale_weight (float): multiplier for string length when calculating
                weighted average character width. Defaults to 1.25.
            layout_mode_strip_rotated (bool): layout mode does not support rotated text.
                Set to False to include rotated text anyway. If rotated text is discovered,
                layout will be degraded and a warning will result. Defaults to True.
            layout_mode_debug_path (Path | None): if supplied, must target a directory.
                creates the following files with debug information for layout mode
                functions if supplied:

                  - fonts.json: output of self._layout_mode_fonts
                  - tjs.json: individual text render ops with corresponding transform matrices
                  - bts.json: text render ops left justified and grouped by BT/ET operators
                  - bt_groups.json: BT/ET operations grouped by rendered y-coord (aka lines)
            layout_mode_font_height_weight (float): multiplier for font height when calculating
                blank lines. Defaults to 1.

        Returns:
            The extracted text

        """
        if extraction_mode not in ["plain", "layout"]:
            raise ValueError(f"Invalid text extraction mode '{extraction_mode}'")
        if extraction_mode == "layout":
            for visitor in (
                "visitor_operand_before",
                "visitor_operand_after",
                "visitor_text",
            ):
                if locals()[visitor]:
                    logger_warning(
                        f"Argument {visitor} is ignored in layout mode",
                        __name__,
                    )
            return self._layout_mode_text(
                space_vertically=kwargs.get("layout_mode_space_vertically", True),
                scale_weight=kwargs.get("layout_mode_scale_weight", 1.25),
                strip_rotated=kwargs.get("layout_mode_strip_rotated", True),
                debug_path=kwargs.get("layout_mode_debug_path"),
                font_height_weight=kwargs.get("layout_mode_font_height_weight", 1)
            )
        if len(args) >= 1:
            if isinstance(args[0], str):
                if len(args) >= 3:
                    if isinstance(args[2], (tuple, int)):
                        orientations = args[2]
                    else:
                        raise TypeError(f"Invalid positional parameter {args[2]}")
                if len(args) >= 4:
                    if isinstance(args[3], (float, int)):
                        space_width = args[3]
                    else:
                        raise TypeError(f"Invalid positional parameter {args[3]}")
            elif isinstance(args[0], (tuple, int)):
                orientations = args[0]
                if len(args) >= 2:
                    if isinstance(args[1], (float, int)):
                        space_width = args[1]
                    else:
                        raise TypeError(f"Invalid positional parameter {args[1]}")
            else:
                raise TypeError(f"Invalid positional parameter {args[0]}")

        if isinstance(orientations, int):
            orientations = (orientations,)

        return self._extract_text(
            self,
            self.pdf,
            orientations,
            space_width,
            PG.CONTENTS,
            visitor_operand_before,
            visitor_operand_after,
            visitor_text,
        )

    def extract_xform_text(
        self,
        xform: EncodedStreamObject,
        orientations: tuple[int, ...] = (0, 90, 270, 360),
        space_width: float = 200.0,
        visitor_operand_before: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_operand_after: Optional[Callable[[Any, Any, Any, Any], None]] = None,
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
    ) -> str:
        """
        Extract text from an XObject.

        Args:
            xform:
            orientations:
            space_width:  force default space width (if not extracted from font (default 200)
            visitor_operand_before:
            visitor_operand_after:
            visitor_text:

        Returns:
            The extracted text

        """
        return self._extract_text(
            xform,
            self.pdf,
            orientations,
            space_width,
            None,
            visitor_operand_before,
            visitor_operand_after,
            visitor_text,
        )

    def _get_fonts(self) -> tuple[set[str], set[str]]:
        """
        Get the names of embedded fonts and unembedded fonts.

        Returns:
            A tuple (set of embedded fonts, set of unembedded fonts)

        """
        obj = self.get_object()
        assert isinstance(obj, DictionaryObject)
        fonts: set[str] = set()
        embedded: set[str] = set()
        fonts, embedded = _get_fonts_walk(obj, fonts, embedded)
        unembedded = fonts - embedded
        return embedded, unembedded

    mediabox = _create_rectangle_accessor(PG.MEDIABOX, ())
    """A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
    default user space units, defining the boundaries of the physical medium on
    which the page is intended to be displayed or printed."""

    cropbox = _create_rectangle_accessor("/CropBox", (PG.MEDIABOX,))
    """
    A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
    default user space units, defining the visible region of default user
    space.

    When the page is displayed or printed, its contents are to be clipped
    (cropped) to this rectangle and then imposed on the output medium in some
    implementation-defined manner. Default value: same as
    :attr:`mediabox<mediabox>`.
    """

    bleedbox = _create_rectangle_accessor("/BleedBox", ("/CropBox", PG.MEDIABOX))
    """A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
    default user space units, defining the region to which the contents of the
    page should be clipped when output in a production environment."""

    trimbox = _create_rectangle_accessor("/TrimBox", ("/CropBox", PG.MEDIABOX))
    """A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
    default user space units, defining the intended dimensions of the finished
    page after trimming."""

    artbox = _create_rectangle_accessor("/ArtBox", ("/CropBox", PG.MEDIABOX))
    """A :class:`RectangleObject<pypdf.generic.RectangleObject>`, expressed in
    default user space units, defining the extent of the page's meaningful
    content as intended by the page's creator."""

    @property
    def annotations(self) -> Optional[ArrayObject]:
        if "/Annots" not in self:
            return None
        return cast(ArrayObject, self["/Annots"])

    @annotations.setter
    def annotations(self, value: Optional[ArrayObject]) -> None:
        """
        Set the annotations array of the page.

        Typically you do not want to set this value, but append to it.
        If you append to it, remember to add the object first to the writer
        and only add the indirect object.
        """
        if value is None:
            if "/Annots" not in self:
                return
            del self[NameObject("/Annots")]
        else:
            self[NameObject("/Annots")] = value


class _VirtualList(Sequence[PageObject]):
    def __init__(
        self,
        length_function: Callable[[], int],
        get_function: Callable[[int], PageObject],
    ) -> None:
        self.length_function = length_function
        self.get_function = get_function
        self.current = -1

    def __len__(self) -> int:
        return self.length_function()

    @overload
    def __getitem__(self, index: int) -> PageObject:
        ...

    @overload
    def __getitem__(self, index: slice) -> Sequence[PageObject]:
        ...

    def __getitem__(
        self, index: Union[int, slice]
    ) -> Union[PageObject, Sequence[PageObject]]:
        if isinstance(index, slice):
            indices = range(*index.indices(len(self)))
            cls = type(self)
            return cls(indices.__len__, lambda idx: self[indices[idx]])
        if not isinstance(index, int):
            raise TypeError("Sequence indices must be integers")
        len_self = len(self)
        if index < 0:
            # support negative indexes
            index += len_self
        if not (0 <= index < len_self):
            raise IndexError("Sequence index out of range")
        return self.get_function(index)

    def __delitem__(self, index: Union[int, slice]) -> None:
        if isinstance(index, slice):
            r = list(range(*index.indices(len(self))))
            # pages have to be deleted from last to first
            r.sort()
            r.reverse()
            for p in r:
                del self[p]  # recursive call
            return
        if not isinstance(index, int):
            raise TypeError("Index must be integers")
        len_self = len(self)
        if index < 0:
            # support negative indexes
            index += len_self
        if not (0 <= index < len_self):
            raise IndexError("Index out of range")
        ind = self[index].indirect_reference
        assert ind is not None
        parent: Optional[PdfObject] = cast(DictionaryObject, ind.get_object()).get(
            "/Parent", None
        )
        first = True
        while parent is not None:
            parent = cast(DictionaryObject, parent.get_object())
            try:
                i = cast(ArrayObject, parent["/Kids"]).index(ind)
                del cast(ArrayObject, parent["/Kids"])[i]
                first = False
                try:
                    assert ind is not None
                    del ind.pdf.flattened_pages[index]  # case of page in a Reader
                except Exception:  # pragma: no cover
                    pass
                if "/Count" in parent:
                    parent[NameObject("/Count")] = NumberObject(
                        cast(int, parent["/Count"]) - 1
                    )
                if len(cast(ArrayObject, parent["/Kids"])) == 0:
                    # No more objects in this part of this subtree
                    ind = parent.indirect_reference
                parent = parent.get("/Parent", None)
            except ValueError:  # from index
                if first:
                    raise PdfReadError(f"Page not found in page tree: {ind}")
                break

    def __iter__(self) -> Iterator[PageObject]:
        for i in range(len(self)):
            yield self[i]

    def __str__(self) -> str:
        p = [f"PageObject({i})" for i in range(self.length_function())]
        return f"[{', '.join(p)}]"


def _get_fonts_walk(
    obj: DictionaryObject,
    fnt: set[str],
    emb: set[str],
) -> tuple[set[str], set[str]]:
    """
    Get the set of all fonts and all embedded fonts.

    Args:
        obj: Page resources dictionary
        fnt: font
        emb: embedded fonts

    Returns:
        A tuple (fnt, emb)

    If there is a key called 'BaseFont', that is a font that is used in the document.
    If there is a key called 'FontName' and another key in the same dictionary object
    that is called 'FontFilex' (where x is null, 2, or 3), then that fontname is
    embedded.

    We create and add to two sets, fnt = fonts used and emb = fonts embedded.

    """
    fontkeys = ("/FontFile", "/FontFile2", "/FontFile3")

    def process_font(f: DictionaryObject) -> None:
        nonlocal fnt, emb
        f = cast(DictionaryObject, f.get_object())  # to be sure
        if "/BaseFont" in f:
            fnt.add(cast(str, f["/BaseFont"]))

        if (
            ("/CharProcs" in f)
            or (
                "/FontDescriptor" in f
                and any(
                    x in cast(DictionaryObject, f["/FontDescriptor"]) for x in fontkeys
                )
            )
            or (
                "/DescendantFonts" in f
                and "/FontDescriptor"
                in cast(
                    DictionaryObject,
                    cast(ArrayObject, f["/DescendantFonts"])[0].get_object(),
                )
                and any(
                    x
                    in cast(
                        DictionaryObject,
                        cast(
                            DictionaryObject,
                            cast(ArrayObject, f["/DescendantFonts"])[0].get_object(),
                        )["/FontDescriptor"],
                    )
                    for x in fontkeys
                )
            )
        ):
            # the list comprehension ensures there is FontFile
            try:
                emb.add(cast(str, f["/BaseFont"]))
            except KeyError:
                emb.add("(" + cast(str, f["/Subtype"]) + ")")

    if "/DR" in obj and "/Font" in cast(DictionaryObject, obj["/DR"]):
        for f in cast(DictionaryObject, cast(DictionaryObject, obj["/DR"])["/Font"]):
            process_font(f)
    if "/Resources" in obj:
        if "/Font" in cast(DictionaryObject, obj["/Resources"]):
            for f in cast(
                DictionaryObject, cast(DictionaryObject, obj["/Resources"])["/Font"]
            ).values():
                process_font(f)
        if "/XObject" in cast(DictionaryObject, obj["/Resources"]):
            for x in cast(
                DictionaryObject, cast(DictionaryObject, obj["/Resources"])["/XObject"]
            ).values():
                _get_fonts_walk(cast(DictionaryObject, x.get_object()), fnt, emb)
    if "/Annots" in obj:
        for a in cast(ArrayObject, obj["/Annots"]):
            _get_fonts_walk(cast(DictionaryObject, a.get_object()), fnt, emb)
    if "/AP" in obj:
        if (
            cast(DictionaryObject, cast(DictionaryObject, obj["/AP"])["/N"]).get(
                "/Type"
            )
            == "/XObject"
        ):
            _get_fonts_walk(
                cast(DictionaryObject, cast(DictionaryObject, obj["/AP"])["/N"]),
                fnt,
                emb,
            )
        else:
            for a in cast(DictionaryObject, cast(DictionaryObject, obj["/AP"])["/N"]):
                _get_fonts_walk(cast(DictionaryObject, a), fnt, emb)
    return fnt, emb  # return the sets for each page


================================================
FILE: pypdf/_page_labels.py
================================================
"""
Page labels are shown by PDF viewers as "the page number".

A page has a numeric index, starting at 0. Additionally, the page
has a label. In the most simple case:

    label = index + 1

However, the title page and the table of contents might have Roman numerals as
page labels. This makes things more complicated.

Example 1
---------

>>> reader.root_object["/PageLabels"]["/Nums"]
[0, IndirectObject(18, 0, 139929798197504),
 8, IndirectObject(19, 0, 139929798197504)]
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][1])
{'/S': '/r'}
>>> reader.get_object(reader.root_object["/PageLabels"]["/Nums"][3])
{'/S': '/D'}

Example 2
---------
The following is a document with pages labeled
i, ii, iii, iv, 1, 2, 3, A-8, A-9, ...

1 0 obj
    << /Type /Catalog
       /PageLabels << /Nums [
                        0 << /S /r >>
                        4 << /S /D >>
                        7 << /S /D
                             /P ( A- )
                             /St 8
                        >>
                        % A number tree containing
                        % three page label dictionaries
                        ]
                   >>
    ...
    >>
endobj


§12.4.2 PDF Specification 1.7 and 2.0
=====================================

Entries in a page label dictionary
----------------------------------
The /S key:
D       Decimal Arabic numerals
R       Uppercase Roman numerals
r       Lowercase Roman numerals
A       Uppercase letters (A to Z for the first 26 pages,
                           AA to ZZ for the next 26, and so on)
a       Lowercase letters (a to z for the first 26 pages,
                           aa to zz for the next 26, and so on)
"""

from collections.abc import Callable, Iterator
from typing import Optional, cast

from ._protocols import PdfCommonDocProtocol
from ._utils import logger_warning
from .generic import (
    ArrayObject,
    DictionaryObject,
    NullObject,
    NumberObject,
    is_null_or_none,
)


def number2uppercase_roman_numeral(num: int) -> str:
    roman = [
        (1000, "M"),
        (900, "CM"),
        (500, "D"),
        (400, "CD"),
        (100, "C"),
        (90, "XC"),
        (50, "L"),
        (40, "XL"),
        (10, "X"),
        (9, "IX"),
        (5, "V"),
        (4, "IV"),
        (1, "I"),
    ]

    def roman_num(num: int) -> Iterator[str]:
        for decimal, roman_repr in roman:
            x, _ = divmod(num, decimal)
            yield roman_repr * x
            num -= decimal * x
            if num <= 0:
                break

    return "".join(list(roman_num(num)))


def number2lowercase_roman_numeral(number: int) -> str:
    return number2uppercase_roman_numeral(number).lower()


def number2uppercase_letter(number: int) -> str:
    if number <= 0:
        raise ValueError("Expecting a positive number")
    alphabet = [chr(i) for i in range(ord("A"), ord("Z") + 1)]
    rep = ""
    while number > 0:
        remainder = number % 26
        if remainder == 0:
            remainder = 26
        rep = alphabet[remainder - 1] + rep
        # update
        number -= remainder
        number = number // 26
    return rep


def number2lowercase_letter(number: int) -> str:
    return number2uppercase_letter(number).lower()


def get_label_from_nums(dictionary_object: DictionaryObject, index: int) -> str:
    # [Nums] shall be an array of the form
    #   [ key_1 value_1 key_2 value_2 ... key_n value_n ]
    # where each key_i is an integer and the corresponding
    # value_i shall be the object associated with that key.
    # The keys shall be sorted in numerical order,
    # analogously to the arrangement of keys in a name tree
    # as described in 7.9.6, "Name Trees."
    nums = cast(ArrayObject, dictionary_object["/Nums"])
    i = 0
    value = None
    start_index = 0
    while i < len(nums):
        start_index = nums[i]
        value = nums[i + 1].get_object()
        if i + 2 == len(nums):
            break
        if nums[i + 2] > index:
            break
        i += 2
    m: dict[Optional[str], Callable[[int], str]] = {
        None: lambda _: "",
        "/D": str,
        "/R": number2uppercase_roman_numeral,
        "/r": number2lowercase_roman_numeral,
        "/A": number2uppercase_letter,
        "/a": number2lowercase_letter,
    }
    # if /Nums array is not following the specification or if /Nums is empty
    if not isinstance(value, dict):
        return str(index + 1)  # Fallback
    start = value.get("/St", 1)
    prefix = value.get("/P", "")
    mapping_function = m[value.get("/S")]
    return prefix + mapping_function(index - start_index + start)


def index2label(reader: PdfCommonDocProtocol, index: int) -> str:
    """
    See 7.9.7 "Number Trees".

    Args:
        reader: The PdfReader
        index: The index of the page

    Returns:
        The label of the page, e.g. "iv" or "4".

    """
    root = cast(DictionaryObject, reader.root_object)
    if "/PageLabels" not in root:
        return str(index + 1)  # Fallback
    number_tree = cast(DictionaryObject, root["/PageLabels"].get_object())
    if "/Nums" in number_tree:
        return get_label_from_nums(number_tree, index)
    if "/Kids" in number_tree and not isinstance(number_tree["/Kids"], NullObject):
        # number_tree = {'/Kids': [IndirectObject(7333, 0, 140132998195856), ...]}
        # Limit maximum depth.
        level = 0
        while level < 100:
            kids = cast(list[DictionaryObject], number_tree["/Kids"])
            for kid in kids:
                # kid = {'/Limits': [0, 63], '/Nums': [0, {'/P': 'C1'}, ...]}
                limits = cast(list[int], kid["/Limits"])
                if limits[0] <= index <= limits[1]:
                    if not is_null_or_none(kid.get("/Kids", None)):
                        # Recursive definition.
                        level += 1
                        if level == 100:  # pragma: no cover
                            raise NotImplementedError(
                                "Too deep nesting is not supported."
                            )
                        number_tree = kid
                        # Exit the inner `for` loop and continue at the next level with the
                        # next iteration of the `while` loop.
                        break
                    return get_label_from_nums(kid, index)
            else:
                # When there are no kids, make sure to exit the `while` loop directly
                # and continue with the fallback.
                break

    logger_warning(f"Could not reliably determine page label for {index}.", __name__)
    return str(index + 1)  # Fallback if neither /Nums nor /Kids is in the number_tree


def nums_insert(
    key: NumberObject,
    value: DictionaryObject,
    nums: ArrayObject,
) -> None:
    """
    Insert a key, value pair in a Nums array.

    See 7.9.7 "Number Trees".

    Args:
        key: number key of the entry
        value: value of the entry
        nums: Nums array to modify

    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")

    i = len(nums)
    while i != 0 and key <= nums[i - 2]:
        i = i - 2

    if i < len(nums) and key == nums[i]:
        nums[i + 1] = value
    else:
        nums.insert(i, key)
        nums.insert(i + 1, value)


def nums_clear_range(
    key: NumberObject,
    page_index_to: int,
    nums: ArrayObject,
) -> None:
    """
    Remove all entries in a number tree in a range after an entry.

    See 7.9.7 "Number Trees".

    Args:
        key: number key of the entry before the range
        page_index_to: The page index of the upper limit of the range
        nums: Nums array to modify

    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")
    if page_index_to < key:
        raise ValueError("page_index_to must be greater or equal than key")

    i = nums.index(key) + 2
    while i < len(nums) and nums[i] <= page_index_to:
        nums.pop(i)
        nums.pop(i)


def nums_next(
    key: NumberObject,
    nums: ArrayObject,
) -> tuple[Optional[NumberObject], Optional[DictionaryObject]]:
    """
    Return the (key, value) pair of the entry after the given one.

    See 7.9.7 "Number Trees".

    Args:
        key: number key of the entry
        nums: Nums array

    """
    if len(nums) % 2 != 0:
        raise ValueError("A nums like array must have an even number of elements")

    i = nums.index(key) + 2
    if i < len(nums):
        return (nums[i], nums[i + 1])
    return (None, None)


================================================
FILE: pypdf/_protocols.py
================================================
"""Helpers for working with PDF types."""

from abc import abstractmethod
from pathlib import Path
from typing import IO, Any, Optional, Protocol, Union

from ._utils import StrByteType, StreamType


class PdfObjectProtocol(Protocol):
    indirect_reference: Any

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Union[tuple[str, ...], list[str], None] = (),
    ) -> Any:
        ...  # pragma: no cover

    def _reference_clone(self, clone: Any, pdf_dest: Any) -> Any:
        ...  # pragma: no cover

    def get_object(self) -> Optional["PdfObjectProtocol"]:
        ...  # pragma: no cover

    def hash_value(self) -> bytes:
        ...  # pragma: no cover

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        ...  # pragma: no cover


class XmpInformationProtocol(PdfObjectProtocol):
    pass


class PdfCommonDocProtocol(Protocol):
    @property
    def pdf_header(self) -> str:
        ...  # pragma: no cover

    @property
    def pages(self) -> list[Any]:
        ...  # pragma: no cover

    @property
    def root_object(self) -> PdfObjectProtocol:
        ...  # pragma: no cover

    def get_object(self, indirect_reference: Any) -> Optional[PdfObjectProtocol]:
        ...  # pragma: no cover

    @property
    def strict(self) -> bool:
        ...  # pragma: no cover


class PdfReaderProtocol(PdfCommonDocProtocol, Protocol):
    @property
    @abstractmethod
    def xref(self) -> dict[int, dict[int, Any]]:
        ...  # pragma: no cover

    @property
    @abstractmethod
    def trailer(self) -> dict[str, Any]:
        ...  # pragma: no cover


class PdfWriterProtocol(PdfCommonDocProtocol, Protocol):
    _objects: list[Any]
    _id_translated: dict[int, dict[int, int]]

    incremental: bool
    _reader: Any  # PdfReader

    @abstractmethod
    def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]:
        ...  # pragma: no cover

    @abstractmethod
    def _add_object(self, obj: Any) -> Any:
        ...  # pragma: no cover


================================================
FILE: pypdf/_reader.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import os
import re
import sys
from collections.abc import Iterable
from io import BytesIO, UnsupportedOperation
from pathlib import Path
from types import TracebackType
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Optional,
    Union,
    cast,
)

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

from ._doc_common import PdfDocCommon, convert_to_int
from ._encryption import Encryption, PasswordType
from ._utils import (
    WHITESPACES_AS_BYTES,
    StrByteType,
    StreamType,
    logger_warning,
    read_non_whitespace,
    read_previous_line,
    read_until_whitespace,
    skip_over_comment,
    skip_over_whitespace,
)
from .constants import TrailerKeys as TK
from .errors import (
    EmptyFileError,
    FileNotDecryptedError,
    LimitReachedError,
    PdfReadError,
    PdfStreamError,
    WrongPasswordError,
)
from .generic import (
    ArrayObject,
    ContentStream,
    DecodedStreamObject,
    DictionaryObject,
    EncodedStreamObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    StreamObject,
    TextStringObject,
    is_null_or_none,
    read_object,
)
from .xmp import XmpInformation

if TYPE_CHECKING:
    from ._page import PageObject


class PdfReader(PdfDocCommon):
    """
    Initialize a PdfReader object.

    This operation can take some time, as the PDF stream's cross-reference
    tables are read into memory.

    Args:
        stream: A File object or an object that supports the standard read
            and seek methods similar to a File object. Could also be a
            string representing a path to a PDF file.
        strict: Determines whether user should be warned of all
            problems and also causes some correctable problems to be fatal.
            Defaults to ``False``.
        password: Decrypt PDF file at initialization. If the
            password is None, the file will not be decrypted.
            Defaults to ``None``.
        root_object_recovery_limit: The maximum number of objects to query
            for recovering the Root object in non-strict mode. To disable
            this security measure, pass ``None``.

    """

    def __init__(
        self,
        stream: Union[StrByteType, Path],
        strict: bool = False,
        password: Union[None, str, bytes] = None,
        *,
        root_object_recovery_limit: Optional[int] = 10_000,
    ) -> None:
        self.strict = strict
        self.flattened_pages: Optional[list[PageObject]] = None

        #: Storage of parsed PDF objects.
        self.resolved_objects: dict[tuple[Any, Any], Optional[PdfObject]] = {}

        self._startxref: int = 0
        self.xref_index = 0
        self.xref: dict[int, dict[Any, Any]] = {}
        self.xref_free_entry: dict[int, dict[Any, Any]] = {}
        self.xref_objStm: dict[int, tuple[Any, Any]] = {}
        self.trailer = DictionaryObject()

        # Security parameters.
        self._root_object_recovery_limit = (
            root_object_recovery_limit if isinstance(root_object_recovery_limit, int) else sys.maxsize
        )

        # Map page indirect_reference number to page number
        self._page_id2num: Optional[dict[Any, Any]] = None

        self._validated_root: Optional[DictionaryObject] = None

        self._initialize_stream(stream)
        self._known_objects: set[tuple[int, int]] = set()

        self._override_encryption = False
        self._encryption: Optional[Encryption] = None
        if self.is_encrypted:
            self._handle_encryption(password)
        elif password is not None:
            raise PdfReadError("Not an encrypted file")

    def _initialize_stream(self, stream: Union[StrByteType, Path]) -> None:
        if hasattr(stream, "mode") and "b" not in stream.mode:
            logger_warning(
                "PdfReader stream/file object is not in binary mode. "
                "It may not be read correctly.",
                __name__,
            )
        self._stream_opened = False
        if isinstance(stream, (str, Path)):
            with open(stream, "rb") as fh:
                stream = BytesIO(fh.read())
            self._stream_opened = True
        self.read(stream)
        self.stream = stream

    def _handle_encryption(self, password: Optional[Union[str, bytes]]) -> None:
        self._override_encryption = True
        # Some documents may not have a /ID, use two empty
        # byte strings instead. Solves
        # https://github.com/py-pdf/pypdf/issues/608
        id_entry = self.trailer.get(TK.ID)
        id1_entry = id_entry[0].get_object().original_bytes if id_entry else b""
        encrypt_entry = cast(DictionaryObject, self.trailer[TK.ENCRYPT].get_object())
        self._encryption = Encryption.read(encrypt_entry, id1_entry)

        # try empty password if no password provided
        pwd = password if password is not None else b""
        if (
            self._encryption.verify(pwd) == PasswordType.NOT_DECRYPTED
            and password is not None
        ):
            # raise if password provided
            raise WrongPasswordError("Wrong password")
        self._override_encryption = False

    def __enter__(self) -> Self:
        return self

    def __exit__(
        self,
        exc_type: Optional[type[BaseException]],
        exc_val: Optional[BaseException],
        exc_tb: Optional[TracebackType],
    ) -> None:
        self.close()

    def close(self) -> None:
        """Close the stream if opened in __init__ and clear memory."""
        if self._stream_opened:
            self.stream.close()
        self.flattened_pages = []
        self.resolved_objects = {}
        self.trailer = DictionaryObject()
        self.xref = {}
        self.xref_free_entry = {}
        self.xref_objStm = {}

    @property
    def root_object(self) -> DictionaryObject:
        """Provide access to "/Root". Standardized with PdfWriter."""
        if self._validated_root:
            return self._validated_root
        root = self.trailer.get(TK.ROOT)
        if is_null_or_none(root):
            logger_warning('Cannot find "/Root" key in trailer', __name__)
        elif (
            cast(DictionaryObject, cast(PdfObject, root).get_object()).get("/Type")
            == "/Catalog"
        ):
            self._validated_root = cast(
                DictionaryObject, cast(PdfObject, root).get_object()
            )
        else:
            logger_warning("Invalid Root object in trailer", __name__)
        if self._validated_root is None:
            logger_warning('Searching object with "/Catalog" key', __name__)
            number_of_objects = cast(int, self.trailer.get("/Size", 0))
            for i in range(number_of_objects):
                if i >= self._root_object_recovery_limit:
                    raise LimitReachedError("Maximum Root object recovery limit reached.")
                try:
                    obj = self.get_object(i + 1)
                except Exception:  # to be sure to capture all errors
                    obj = None
                if isinstance(obj, DictionaryObject) and obj.get("/Type") == "/Catalog":
                    self._validated_root = obj
                    logger_warning(f"Root found at {obj.indirect_reference!r}", __name__)
                    break
        if self._validated_root is None:
            if not is_null_or_none(root) and "/Pages" in cast(DictionaryObject, cast(PdfObject, root).get_object()):
                logger_warning(
                    f"Possible root found at {cast(PdfObject, root).indirect_reference!r}, but missing /Catalog key",
                    __name__
                )
                self._validated_root = cast(
                    DictionaryObject, cast(PdfObject, root).get_object()
                )
            else:
                raise PdfReadError("Cannot find Root object in pdf")
        return self._validated_root

    @property
    def _info(self) -> Optional[DictionaryObject]:
        """
        Provide access to "/Info". Standardized with PdfWriter.

        Returns:
            /Info Dictionary; None if the entry does not exist

        """
        info = self.trailer.get(TK.INFO, None)
        if is_null_or_none(info):
            return None
        assert info is not None, "mypy"
        info = info.get_object()
        if not isinstance(info, DictionaryObject):
            raise PdfReadError(
                "Trailer not found or does not point to a document information dictionary"
            )
        return info

    @property
    def _ID(self) -> Optional[ArrayObject]:
        """
        Provide access to "/ID". Standardized with PdfWriter.

        Returns:
            /ID array; None if the entry does not exist

        """
        id = self.trailer.get(TK.ID, None)
        if is_null_or_none(id):
            return None
        assert id is not None, "mypy"
        return cast(ArrayObject, id.get_object())

    @property
    def pdf_header(self) -> str:
        """
        The first 8 bytes of the file.

        This is typically something like ``'%PDF-1.6'`` and can be used to
        detect if the file is actually a PDF file and which version it is.
        """
        # TODO: Make this return a bytes object for consistency
        #       but that needs a deprecation
        loc = self.stream.tell()
        self.stream.seek(0, 0)
        pdf_file_version = self.stream.read(8).decode("utf-8", "backslashreplace")
        self.stream.seek(loc, 0)  # return to where it was
        return pdf_file_version

    @property
    def xmp_metadata(self) -> Optional[XmpInformation]:
        """XMP (Extensible Metadata Platform) data."""
        try:
            self._override_encryption = True
            return cast(XmpInformation, self.root_object.xmp_metadata)
        finally:
            self._override_encryption = False

    def _get_page_number_by_indirect(
        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
    ) -> Optional[int]:
        """
        Retrieve the page number from an indirect reference.

        Args:
            indirect_reference: The indirect reference to locate.

        Returns:
            Page number or None.

        """
        if self._page_id2num is None:
            self._page_id2num = {
                x.indirect_reference.idnum: i for i, x in enumerate(self.pages)  # type: ignore
            }

        if is_null_or_none(indirect_reference):
            return None
        assert isinstance(indirect_reference, (int, IndirectObject)), "mypy"
        if isinstance(indirect_reference, int):
            idnum = indirect_reference
        else:
            idnum = indirect_reference.idnum
        assert self._page_id2num is not None, "hint for mypy"
        return self._page_id2num.get(idnum, None)

    def _get_object_from_stream(
        self, indirect_reference: IndirectObject
    ) -> Union[int, PdfObject, str]:
        # indirect reference to object in object stream
        # read the entire object stream into memory
        stmnum, _idx = self.xref_objStm[indirect_reference.idnum]
        obj_stm: EncodedStreamObject = IndirectObject(stmnum, 0, self).get_object()  # type: ignore
        # This is an xref to a stream, so its type better be a stream
        assert cast(str, obj_stm["/Type"]) == "/ObjStm"
        # Parse ALL objects in this stream in one pass and cache them.
        # This avoids O(N²) behavior when many objects from the same stream
        # are resolved individually (each call would re-parse the header).
        stream_data = BytesIO(obj_stm.get_data())
        n = int(obj_stm["/N"])  # type: ignore[call-overload]
        first_offset = int(obj_stm["/First"])  # type: ignore[call-overload]

        # Phase 1: Read the index (objnum, offset) pairs from the header.
        obj_index: list[tuple[int, int]] = []
        for _i in range(n):
            read_non_whitespace(stream_data)
            stream_data.seek(-1, 1)
            objnum = NumberObject.read_from_stream(stream_data)
            read_non_whitespace(stream_data)
            stream_data.seek(-1, 1)
            offset = NumberObject.read_from_stream(stream_data)
            read_non_whitespace(stream_data)
            stream_data.seek(-1, 1)
            obj_index.append((int(objnum), int(offset)))

        # Phase 2: Parse each object and cache it.
        target_obj: Union[int, PdfObject, str] = NullObject()
        found = False
        for i, (obj_num, obj_offset) in enumerate(obj_index):
            # Skip objects already in the cache.
            cached = self.cache_get_indirect_object(0, obj_num)
            if cached is not None:
                if obj_num == indirect_reference.idnum:
                    target_obj = cached
                    found = True
                continue

            stream_data.seek(first_offset + obj_offset, 0)

            # To cope with case where the 'pointer' is on a white space
            read_non_whitespace(stream_data)
            stream_data.seek(-1, 1)

            try:
                obj = read_object(stream_data, self)
            except PdfStreamError as exc:
                # Stream object cannot be read. Normally, a critical error, but
                # Adobe Reader doesn't complain, so continue (in strict mode?)
                logger_warning(
                    f"Invalid stream (index {i}) within object "
                    f"{obj_num} 0: {exc}",
                    __name__,
                )
                if self.strict:  # pragma: no cover
                    raise PdfReadError(
                        f"Cannot read object stream: {exc}"
                    )  # pragma: no cover
                obj = NullObject()  # pragma: no cover

            # Only cache if this object is still registered in xref_objStm.
            # Incremental updates may override objects originally in the stream;
            # caching those stale versions would shadow the newer xref entry.
            if obj_num in self.xref_objStm:
                self.cache_indirect_object(0, obj_num, obj)  # type: ignore[arg-type]

            if obj_num == indirect_reference.idnum:
                target_obj = obj
                found = True

        if not found and self.strict:  # pragma: no cover
            raise PdfReadError(
                "This is a fatal error in strict mode."
            )  # pragma: no cover
        return target_obj

    def get_object(
        self, indirect_reference: Union[int, IndirectObject]
    ) -> Optional[PdfObject]:
        if isinstance(indirect_reference, int):
            indirect_reference = IndirectObject(indirect_reference, 0, self)
        retval = self.cache_get_indirect_object(
            indirect_reference.generation, indirect_reference.idnum
        )
        if retval is not None:
            return retval
        if (
            indirect_reference.generation == 0
            and indirect_reference.idnum in self.xref_objStm
        ):
            retval = self._get_object_from_stream(indirect_reference)  # type: ignore
        elif (
            indirect_reference.generation in self.xref
            and indirect_reference.idnum in self.xref[indirect_reference.generation]
        ):
            if self.xref_free_entry.get(indirect_reference.generation, {}).get(
                indirect_reference.idnum, False
            ):
                return NullObject()
            start = self.xref[indirect_reference.generation][indirect_reference.idnum]
            self.stream.seek(start, 0)
            try:
                idnum, generation = self.read_object_header(self.stream)
                if (
                    idnum != indirect_reference.idnum
                    or generation != indirect_reference.generation
                ):
                    raise PdfReadError("Not matching, we parse the file for it")
            except Exception:
                if hasattr(self.stream, "getbuffer"):
                    buf = bytes(self.stream.getbuffer())
                else:
                    p = self.stream.tell()
                    self.stream.seek(0, 0)
                    buf = self.stream.read(-1)
                    self.stream.seek(p, 0)
                m = re.search(
                    rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
                    buf,
                )
                if m is not None:
                    logger_warning(
                        f"Object ID {indirect_reference.idnum},{indirect_reference.generation} ref repaired",
                        __name__,
                    )
                    self.xref[indirect_reference.generation][
                        indirect_reference.idnum
                    ] = (m.start(0) + 1)
                    self.stream.seek(m.start(0) + 1)
                    idnum, generation = self.read_object_header(self.stream)
                else:
                    idnum = -1
                    generation = -1  # exception will be raised below
            if idnum != indirect_reference.idnum and self.xref_index:
                # xref table probably had bad indexes due to not being zero-indexed
                if self.strict:
                    raise PdfReadError(
                        f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
                        f"does not match actual ({idnum} {generation}); "
                        "xref table not zero-indexed."
                    )
                # xref table is corrected in non-strict mode
            elif idnum != indirect_reference.idnum and self.strict:
                # some other problem
                raise PdfReadError(
                    f"Expected object ID ({indirect_reference.idnum} {indirect_reference.generation}) "
                    f"does not match actual ({idnum} {generation})."
                )
            if self.strict:
                assert generation == indirect_reference.generation

            current_object = (indirect_reference.idnum, indirect_reference.generation)
            if current_object in self._known_objects:
                raise PdfReadError(f"Detected loop with self reference for {indirect_reference!r}.")
            self._known_objects.add(current_object)
            retval = read_object(self.stream, self)  # type: ignore
            self._known_objects.remove(current_object)

            # override encryption is used for the /Encrypt dictionary
            if not self._override_encryption and self._encryption is not None:
                # if we don't have the encryption key:
                if not self._encryption.is_decrypted():
                    raise FileNotDecryptedError("File has not been decrypted")
                # otherwise, decrypt here...
                retval = cast(PdfObject, retval)
                retval = self._encryption.decrypt_object(
                    retval, indirect_reference.idnum, indirect_reference.generation
                )
        else:
            if hasattr(self.stream, "getbuffer"):
                buf = bytes(self.stream.getbuffer())
            else:
                p = self.stream.tell()
                self.stream.seek(0, 0)
                buf = self.stream.read(-1)
                self.stream.seek(p, 0)
            m = re.search(
                rf"\s{indirect_reference.idnum}\s+{indirect_reference.generation}\s+obj".encode(),
                buf,
            )
            if m is not None:
                logger_warning(
                    f"Object {indirect_reference.idnum} {indirect_reference.generation} found",
                    __name__,
                )
                if indirect_reference.generation not in self.xref:
                    self.xref[indirect_reference.generation] = {}
                self.xref[indirect_reference.generation][indirect_reference.idnum] = (
                    m.start(0) + 1
                )
                self.stream.seek(m.end(0) + 1)
                skip_over_whitespace(self.stream)
                self.stream.seek(-1, 1)
                retval = read_object(self.stream, self)  # type: ignore

                # override encryption is used for the /Encrypt dictionary
                if not self._override_encryption and self._encryption is not None:
                    # if we don't have the encryption key:
                    if not self._encryption.is_decrypted():
                        raise FileNotDecryptedError("File has not been decrypted")
                    # otherwise, decrypt here...
                    retval = cast(PdfObject, retval)
                    retval = self._encryption.decrypt_object(
                        retval, indirect_reference.idnum, indirect_reference.generation
                    )
            else:
                logger_warning(
                    f"Object {indirect_reference.idnum} {indirect_reference.generation} not defined.",
                    __name__,
                )
                if self.strict:
                    raise PdfReadError("Could not find object.")
        # For ObjStm objects, _get_object_from_stream already cached
        # the result during batch parsing; skip the redundant cache write
        # to avoid "Overwriting cache" warnings. For non-ObjStm objects
        # (including encrypted ones that need decrypted values cached),
        # always write.
        if not (
            indirect_reference.generation == 0
            and indirect_reference.idnum in self.xref_objStm
        ):
            self.cache_indirect_object(
                indirect_reference.generation, indirect_reference.idnum, retval
            )
        return retval

    def read_object_header(self, stream: StreamType) -> tuple[int, int]:
        # Should never be necessary to read out whitespace, since the
        # cross-reference table should put us in the right spot to read the
        # object header. In reality some files have stupid cross-reference
        # tables that are off by whitespace bytes.
        skip_over_comment(stream)
        extra = skip_over_whitespace(stream)
        stream.seek(-1, 1)
        idnum = read_until_whitespace(stream)
        extra |= skip_over_whitespace(stream)
        stream.seek(-1, 1)
        generation = read_until_whitespace(stream)
        extra |= skip_over_whitespace(stream)
        stream.seek(-1, 1)

        # although it's not used, it might still be necessary to read
        _obj = stream.read(3)

        read_non_whitespace(stream)
        stream.seek(-1, 1)
        if extra and self.strict:
            logger_warning(
                f"Superfluous whitespace found in object header {idnum} {generation}",  # type: ignore
                __name__,
            )
        return int(idnum), int(generation)

    def cache_get_indirect_object(
        self, generation: int, idnum: int
    ) -> Optional[PdfObject]:
        try:
            return self.resolved_objects.get((generation, idnum))
        except RecursionError:
            raise PdfReadError("Maximum recursion depth reached.")

    def cache_indirect_object(
        self, generation: int, idnum: int, obj: Optional[PdfObject]
    ) -> Optional[PdfObject]:
        if (generation, idnum) in self.resolved_objects:
            msg = f"Overwriting cache for {generation} {idnum}"
            if self.strict:
                raise PdfReadError(msg)
            logger_warning(msg, __name__)
        self.resolved_objects[(generation, idnum)] = obj
        if obj is not None:
            obj.indirect_reference = IndirectObject(idnum, generation, self)
        return obj

    def _replace_object(self, indirect_reference: IndirectObject, obj: PdfObject) -> PdfObject:
        # function reserved for future development
        if indirect_reference.pdf != self:
            raise ValueError("Cannot update PdfReader with external object")
        if (indirect_reference.generation, indirect_reference.idnum) not in self.resolved_objects:
            raise ValueError("Cannot find referenced object")
        self.resolved_objects[(indirect_reference.generation, indirect_reference.idnum)] = obj
        obj.indirect_reference = indirect_reference
        return obj

    def read(self, stream: StreamType) -> None:
        """
        Read and process the PDF stream, extracting necessary data.

        Args:
            stream: The PDF file stream.

        """
        self._basic_validation(stream)
        self._find_eof_marker(stream)
        startxref = self._find_startxref_pos(stream)
        self._startxref = startxref

        # check and eventually correct the startxref only if not strict
        xref_issue_nr = self._get_xref_issues(stream, startxref)
        if xref_issue_nr != 0:
            if self.strict and xref_issue_nr:
                raise PdfReadError("Broken xref table")
            logger_warning(f"incorrect startxref pointer({xref_issue_nr})", __name__)

        # read all cross-reference tables and their trailers
        self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)

        # if not zero-indexed, verify that the table is correct; change it if necessary
        if self.xref_index and not self.strict:
            loc = stream.tell()
            for gen, xref_entry in self.xref.items():
                if gen == 65535:
                    continue
                xref_k = sorted(
                    xref_entry.keys()
                )  # ensure ascending to prevent damage
                for id in xref_k:
                    stream.seek(xref_entry[id], 0)
                    try:
                        pid, _pgen = self.read_object_header(stream)
                    except ValueError:
                        self._rebuild_xref_table(stream)
                        break
                    if pid == id - self.xref_index:
                        # fixing index item per item is required for revised PDF.
                        self.xref[gen][pid] = self.xref[gen][id]
                        del self.xref[gen][id]
                    # if not, then either it's just plain wrong, or the
                    # non-zero-index is actually correct
            stream.seek(loc, 0)  # return to where it was

        # remove wrong objects (not pointing to correct structures) - cf #2326
        if not self.strict:
            loc = stream.tell()
            for gen, xref_entry in self.xref.items():
                if gen == 65535:
                    continue
                ids = list(xref_entry.keys())
                for id in ids:
                    stream.seek(xref_entry[id], 0)
                    try:
                        self.read_object_header(stream)
                    except ValueError:
                        logger_warning(
                            f"Ignoring wrong pointing object {id} {gen} (offset {xref_entry[id]})",
                            __name__,
                        )
                        del xref_entry[id]  # we can delete the id, we are parsing ids
            stream.seek(loc, 0)  # return to where it was

    def _basic_validation(self, stream: StreamType) -> None:
        """Ensure the stream is valid and not empty."""
        stream.seek(0, os.SEEK_SET)
        try:
            header_byte = stream.read(5)
        except UnicodeDecodeError:
            raise UnsupportedOperation("cannot read header")
        if header_byte == b"":
            raise EmptyFileError("Cannot read an empty file")
        if header_byte != b"%PDF-":
            if self.strict:
                raise PdfReadError(
                    f"PDF starts with '{header_byte.decode('utf8')}', "
                    "but '%PDF-' expected"
                )
            logger_warning(f"invalid pdf header: {header_byte}", __name__)
        stream.seek(0, os.SEEK_END)

    def _find_eof_marker(self, stream: StreamType) -> None:
        """
        Jump to the %%EOF marker.

        According to the specs, the %%EOF marker should be at the very end of
        the file. Hence for standard-compliant PDF documents this function will
        read only the last part (DEFAULT_BUFFER_SIZE).
        """
        HEADER_SIZE = 8  # to parse whole file, Header is e.g. '%PDF-1.6'
        line = b""
        first = True
        while not line.startswith(b"%%EOF"):
            if line != b"" and first:
                if any(
                    line.strip().endswith(tr) for tr in (b"%%EO", b"%%E", b"%%", b"%")
                ):
                    # Consider the file as truncated while
                    # having enough confidence to carry on.
                    logger_warning("EOF marker seems truncated", __name__)
                    break
                first = False
            if b"startxref" in line:
                logger_warning(
                    "CAUTION: startxref found while searching for %%EOF. "
                    "The file might be truncated and some data might not be read.",
                    __name__,
                )
            if stream.tell() < HEADER_SIZE:
                if self.strict:
                    raise PdfReadError("EOF marker not found")
                logger_warning("EOF marker not found", __name__)
            line = read_previous_line(stream)

    def _find_startxref_pos(self, stream: StreamType) -> int:
        """
        Find startxref entry - the location of the xref table.

        Args:
            stream:

        Returns:
            The bytes offset

        """
        line = read_previous_line(stream)
        try:
            startxref = int(line)
        except ValueError:
            # 'startxref' may be on the same line as the location
            if not line.startswith(b"startxref"):
                raise PdfReadError("startxref not found")
            startxref = int(line[9:].strip())
            logger_warning("startxref on same line as offset", __name__)
        else:
            line = read_previous_line(stream)
            if not line.startswith(b"startxref"):
                raise PdfReadError("startxref not found")
        return startxref

    def _read_standard_xref_table(self, stream: StreamType) -> None:
        # standard cross-reference table
        ref = stream.read(3)
        if ref != b"ref":
            raise PdfReadError("xref table read error")
        read_non_whitespace(stream)
        stream.seek(-1, 1)
        first_time = True  # check if the first time looking at the xref table
        while True:
            num = cast(int, read_object(stream, self))
            if first_time and num != 0:
                self.xref_index = num
                if self.strict:
                    logger_warning(
                        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
                        __name__,
                    )
                    # if table not zero indexed, could be due to error from when PDF was created
                    # which will lead to mismatched indices later on, only warned and corrected if self.strict==True
            first_time = False
            read_non_whitespace(stream)
            stream.seek(-1, 1)
            size = cast(int, read_object(stream, self))
            if not isinstance(size, int):
                logger_warning(
                    "Invalid/Truncated xref table. Rebuilding it.",
                    __name__,
                )
                self._rebuild_xref_table(stream)
                stream.read()
                return
            read_non_whitespace(stream)
            stream.seek(-1, 1)
            cnt = 0
            while cnt < size:
                line = stream.read(20)
                if not line:
                    raise PdfReadError("Unexpected empty line in Xref table.")

                # It's very clear in section 3.4.3 of the PDF spec
                # that all cross-reference table lines are a fixed
                # 20 bytes (as of PDF 1.7). However, some files have
                # 21-byte entries (or more) due to the use of \r\n
                # (CRLF) EOL's. Detect that case, and adjust the line
                # until it does not begin with a \r (CR) or \n (LF).
                while line[0] in b"\x0D\x0A":
                    stream.seek(-20 + 1, 1)
                    line = stream.read(20)

                # On the other hand, some malformed PDF files
                # use a single character EOL without a preceding
                # space. Detect that case, and seek the stream
                # back one character (0-9 means we've bled into
                # the next xref entry, t means we've bled into the
                # text "trailer"):
                if line[-1] in b"0123456789t":
                    stream.seek(-1, 1)

                try:
                    offset_b, generation_b = line[:16].split(b" ")
                    entry_type_b = line[17:18]

                    offset, generation = int(offset_b), int(generation_b)
                except Exception:
                    if hasattr(stream, "getbuffer"):
                        buf = bytes(stream.getbuffer())
                    else:
                        p = stream.tell()
                        stream.seek(0, 0)
                        buf = stream.read(-1)
                        stream.seek(p)

                    f = re.search(rf"{num}\s+(\d+)\s+obj".encode(), buf)
                    if f is None:
                        logger_warning(
                            f"entry {num} in Xref table invalid; object not found",
                            __name__,
                        )
                        generation = 65535
                        offset = -1
                        entry_type_b = b"f"
                    else:
                        logger_warning(
                            f"entry {num} in Xref table invalid but object found",
                            __name__,
                        )
                        generation = int(f.group(1))
                        offset = f.start()

                if generation not in self.xref:
                    self.xref[generation] = {}
                    self.xref_free_entry[generation] = {}
                if num in self.xref[generation]:
                    # It really seems like we should allow the last
                    # xref table in the file to override previous
                    # ones. Since we read the file backwards, assume
                    # any existing key is already set correctly.
                    pass
                else:
                    if entry_type_b == b"n":
                        self.xref[generation][num] = offset
                    try:
                        self.xref_free_entry[generation][num] = entry_type_b == b"f"
                    except Exception:
                        pass
                    try:
                        self.xref_free_entry[65535][num] = entry_type_b == b"f"
                    except Exception:
                        pass
                cnt += 1
                num += 1
            read_non_whitespace(stream)
            stream.seek(-1, 1)
            trailer_tag = stream.read(7)
            if trailer_tag != b"trailer":
                # more xrefs!
                stream.seek(-7, 1)
            else:
                break

    def _read_xref_tables_and_trailers(
        self, stream: StreamType, startxref: Optional[int], xref_issue_nr: int
    ) -> None:
        """Read the cross-reference tables and trailers in the PDF stream."""
        self.xref = {}
        self.xref_free_entry = {}
        self.xref_objStm = {}
        self.trailer = DictionaryObject()
        visited_xref_offsets: set[int] = set()
        while startxref is not None:
            # Detect circular /Prev references in the xref chain
            if startxref in visited_xref_offsets:
                logger_warning(
                    f"Circular xref chain detected at offset {startxref}, stopping",
                    __name__,
                )
                break
            visited_xref_offsets.add(startxref)
            # load the xref table
            stream.seek(startxref, 0)
            x = stream.read(1)
            if x in b"\r\n":
                x = stream.read(1)
            if x == b"x":
                startxref = self._read_xref(stream)
            elif xref_issue_nr:
                try:
                    self._rebuild_xref_table(stream)
                    break
                except Exception:
                    xref_issue_nr = 0
            elif x.isdigit():
                try:
                    xrefstream = self._read_pdf15_xref_stream(stream)
                except Exception as e:
                    if TK.ROOT in self.trailer:
                        logger_warning(
                            f"Previous trailer cannot be read: {e.args}", __name__
                        )
                        break
                    raise PdfReadError(f"Trailer cannot be read: {e!s}")
                self._process_xref_stream(xrefstream)
                if "/Prev" in xrefstream:
                    startxref = cast(int, xrefstream["/Prev"])
                else:
                    break
            else:
                startxref = self._read_xref_other_error(stream, startxref)

    def _process_xref_stream(self, xrefstream: DictionaryObject) -> None:
        """Process and handle the xref stream."""
        trailer_keys = TK.ROOT, TK.ENCRYPT, TK.INFO, TK.ID, TK.SIZE
        for key in trailer_keys:
            if key in xrefstream and key not in self.trailer:
                self.trailer[NameObject(key)] = xrefstream.raw_get(key)
        if "/XRefStm" in xrefstream:
            p = self.stream.tell()
            self.stream.seek(cast(int, xrefstream["/XRefStm"]) + 1, 0)
            self._read_pdf15_xref_stream(self.stream)
            self.stream.seek(p, 0)

    def _read_xref(self, stream: StreamType) -> Optional[int]:
        self._read_standard_xref_table(stream)
        if stream.read(1) == b"":
            return None
        stream.seek(-1, 1)
        read_non_whitespace(stream)
        stream.seek(-1, 1)
        new_trailer = cast(dict[str, Any], read_object(stream, self))
        for key, value in new_trailer.items():
            if key not in self.trailer:
                self.trailer[key] = value
        if "/XRefStm" in new_trailer:
            p = stream.tell()
            stream.seek(cast(int, new_trailer["/XRefStm"]) + 1, 0)
            try:
                self._read_pdf15_xref_stream(stream)
            except Exception:
                logger_warning(
                    f"XRef object at {new_trailer['/XRefStm']} can not be read, some object may be missing",
                    __name__,
                )
            stream.seek(p, 0)
        if "/Prev" in new_trailer:
            return new_trailer["/Prev"]
        return None

    def _read_xref_other_error(
        self, stream: StreamType, startxref: int
    ) -> Optional[int]:
        # some PDFs have /Prev=0 in the trailer, instead of no /Prev
        if startxref == 0:
            if self.strict:
                raise PdfReadError(
                    "/Prev=0 in the trailer (try opening with strict=False)"
                )
            logger_warning(
                "/Prev=0 in the trailer - assuming there is no previous xref table",
                __name__,
            )
            return None
        # bad xref character at startxref. Let's see if we can find
        # the xref table nearby, as we've observed this error with an
        # off-by-one before.
        stream.seek(-11, 1)
        tmp = stream.read(20)
        xref_loc = tmp.find(b"xref")
        if xref_loc != -1:
            startxref -= 10 - xref_loc
            return startxref
        # No explicit xref table, try finding a cross-reference stream.
        stream.seek(startxref, 0)
        for look in range(25):  # value extended to cope with more linearized files
            if stream.read(1).isdigit():
                # This is not a standard PDF, consider adding a warning
                startxref += look
                return startxref
        # no xref table found at specified location
        if "/Root" in self.trailer and not self.strict:
            # if Root has been already found, just raise warning
            logger_warning("Invalid parent xref., rebuild xref", __name__)
            try:
                self._rebuild_xref_table(stream)
                return None
            except Exception:
                raise PdfReadError("Cannot rebuild xref")
        raise PdfReadError("Could not find xref table at specified location")

    def _read_pdf15_xref_stream(
        self, stream: StreamType
    ) -> Union[ContentStream, EncodedStreamObject, DecodedStreamObject]:
        """Read the cross-reference stream for PDF 1.5+."""
        stream.seek(-1, 1)
        idnum, generation = self.read_object_header(stream)
        xrefstream = cast(ContentStream, read_object(stream, self))
        if cast(str, xrefstream["/Type"]) != "/XRef":
            raise PdfReadError(f"Unexpected type {xrefstream['/Type']!r}")
        self.cache_indirect_object(generation, idnum, xrefstream)

        # Index pairs specify the subsections in the dictionary.
        # If none, create one subsection that spans everything.
        if "/Size" not in xrefstream:
            # According to table 17 of the PDF 2.0 specification, this key is required.
            raise PdfReadError(f"Size missing from XRef stream {xrefstream!r}!")
        idx_pairs = xrefstream.get("/Index", [0, xrefstream["/Size"]])

        entry_sizes = cast(dict[Any, Any], xrefstream.get("/W"))
        assert len(entry_sizes) >= 3
        if self.strict and len(entry_sizes) > 3:
            raise PdfReadError(f"Too many entry sizes: {entry_sizes}")

        stream_data = BytesIO(xrefstream.get_data())

        def get_entry(i: int) -> Union[int, tuple[int, ...]]:
            # Reads the correct number of bytes for each entry. See the
            # discussion of the W parameter in PDF spec table 17.
            if entry_sizes[i] > 0:
                d = stream_data.read(entry_sizes[i])
                return convert_to_int(d, entry_sizes[i])

            # PDF Spec Table 17: A value of zero for an element in the
            # W array indicates...the default value shall be used
            if i == 0:
                return 1  # First value defaults to 1
            return 0

        def used_before(num: int, generation: Union[int, tuple[int, ...]]) -> bool:
            # We move backwards through the xrefs, don't replace any.
            return num in self.xref.get(generation, []) or num in self.xref_objStm  # type: ignore

        # Iterate through each subsection
        self._read_xref_subsections(idx_pairs, get_entry, used_before)
        return xrefstream

    @staticmethod
    def _get_xref_issues(stream: StreamType, startxref: int) -> int:
        """
        Return an int which indicates an issue. 0 means there is no issue.

        Args:
            stream:
            startxref:

        Returns:
            0 means no issue, other values represent specific issues.

        """
        if startxref == 0:
            return 4

        stream.seek(startxref - 1, 0)  # -1 to check character before
        line = stream.read(1)
        if line == b"j":
            line = stream.read(1)
        if line not in b"\r\n \t":
            return 1
        line = stream.read(4)
        if line != b"xref":
            # not a xref so check if it is an XREF object
            line = b""
            while line in b"0123456789 \t":
                line = stream.read(1)
                if line == b"":
                    return 2
            line += stream.read(2)  # 1 char already read, +2 to check "obj"
            if line.lower() != b"obj":
                return 3
        return 0

    @classmethod
    def _find_pdf_objects(cls, data: bytes) -> Iterable[tuple[int, int, int]]:
        index = 0
        ord_0 = ord("0")
        ord_9 = ord("9")
        while True:
            index = data.find(b" obj", index)
            if index == -1:
                return

            index_before_space = index - 1

            # Skip whitespace backwards
            while index_before_space >= 0 and data[index_before_space] in WHITESPACES_AS_BYTES:
                index_before_space -= 1

            # Read generation number
            generation_end = index_before_space + 1
            while index_before_space >= 0 and ord_0 <= data[index_before_space] <= ord_9:
                index_before_space -= 1
            generation_start = index_before_space + 1

            # Skip whitespace
            while index_before_space >= 0 and data[index_before_space] in WHITESPACES_AS_BYTES:
                index_before_space -= 1

            # Read object number
            object_end = index_before_space + 1
            while index_before_space >= 0 and ord_0 <= data[index_before_space] <= ord_9:
                index_before_space -= 1
            object_start = index_before_space + 1

            # Validate
            if object_start < object_end and generation_start < generation_end:
                object_number = int(data[object_start:object_end])
                generation_number = int(data[generation_start:generation_end])

                yield object_number, generation_number, object_start

            index += 4  # len(b" obj")

    @classmethod
    def _find_pdf_trailers(cls, data: bytes) -> Iterable[int]:
        index = 0
        data_length = len(data)
        while True:
            index = data.find(b"trailer", index)
            if index == -1:
                return

            index_after_trailer = index + 7  # len(b"trailer")

            # Skip whitespace
            while index_after_trailer < data_length and data[index_after_trailer] in WHITESPACES_AS_BYTES:
                index_after_trailer += 1

            # Must be dictionary start
            if index_after_trailer + 1 < data_length and data[index_after_trailer:index_after_trailer+2] == b"<<":
                yield index_after_trailer  # offset of '<<'

            index += 7  # len(b"trailer")

    def _rebuild_xref_table(self, stream: StreamType) -> None:
        self.xref = {}
        stream.seek(0, 0)
        stream_data = stream.read(-1)

        for object_number, generation_number, object_start in self._find_pdf_objects(stream_data):
            if generation_number not in self.xref:
                self.xref[generation_number] = {}
            self.xref[generation_number][object_number] = object_start

        logger_warning("parsing for Object Streams", __name__)
        for generation_number in self.xref:
            for object_number in self.xref[generation_number]:
                # get_object in manual
                stream.seek(self.xref[generation_number][object_number], 0)
                try:
                    _ = self.read_object_header(stream)
                    obj = cast(StreamObject, read_object(stream, self))
                    if obj.get("/Type", "") != "/ObjStm":
                        continue
                    object_stream = BytesIO(obj.get_data())
                    actual_count = 0
                    while True:
                        current = read_until_whitespace(object_stream)
                        if not current.isdigit():
                            break
                        inner_object_number = int(current)
                        skip_over_whitespace(object_stream)
                        object_stream.seek(-1, 1)
                        current = read_until_whitespace(object_stream)
                        if not current.isdigit():  # pragma: no cover
                            break  # pragma: no cover
                        inner_generation_number = int(current)
                        self.xref_objStm[inner_object_number] = (object_number, inner_generation_number)
                        actual_count += 1
                    if actual_count != obj.get("/N"):  # pragma: no cover
                        logger_warning(  # pragma: no cover
                            f"found {actual_count} objects within Object({object_number},{generation_number})"
                            f" whereas {obj.get('/N')} expected",
                            __name__,
                        )
                except Exception:  # could be multiple causes
                    pass

        stream.seek(0, 0)
        for position in self._find_pdf_trailers(stream_data):
            stream.seek(position, 0)
            new_trailer = cast(dict[Any, Any], read_object(stream, self))
            # Here, we are parsing the file from start to end, the new data have to erase the existing.
            for key, value in new_trailer.items():
                self.trailer[key] = value

    def _read_xref_subsections(
        self,
        idx_pairs: list[int],
        get_entry: Callable[[int], Union[int, tuple[int, ...]]],
        used_before: Callable[[int, Union[int, tuple[int, ...]]], bool],
    ) -> None:
        """Read and process the subsections of the xref."""
        for start, size in self._pairs(idx_pairs):
            # The subsections must increase
            for num in range(start, start + size):
                # The first entry is the type
                xref_type = get_entry(0)
                # The rest of the elements depend on the xref_type
                if xref_type == 0:
                    # linked list of free objects
                    next_free_object = get_entry(1)  # noqa: F841
                    next_generation = get_entry(2)  # noqa: F841
                elif xref_type == 1:
                    # objects that are in use but are not compressed
                    byte_offset = get_entry(1)
                    generation = get_entry(2)
                    if generation not in self.xref:
                        self.xref[generation] = {}  # type: ignore
                    if not used_before(num, generation):
                        self.xref[generation][num] = byte_offset  # type: ignore
                elif xref_type == 2:
                    # compressed objects
                    objstr_num = get_entry(1)
                    obstr_idx = get_entry(2)
                    generation = 0  # PDF spec table 18, generation is 0
                    if not used_before(num, generation):
                        self.xref_objStm[num] = (objstr_num, obstr_idx)
                elif self.strict:
                    raise PdfReadError(f"Unknown xref type: {xref_type}")

    def _pairs(self, array: list[int]) -> Iterable[tuple[int, int]]:
        """Iterate over pairs in the array."""
        i = 0
        while i + 1 < len(array):
            yield array[i], array[i + 1]
            i += 2

    def decrypt(self, password: Union[str, bytes]) -> PasswordType:
        """
        When using an encrypted / secured PDF file with the PDF Standard
        encryption handler, this function will allow the file to be decrypted.
        It checks the given password against the document's user password and
        owner password, and then stores the resulting decryption key if either
        password is correct.

        It does not matter which password was matched. Both passwords provide
        the correct decryption key that will allow the document to be used with
        this library.

        Args:
            password: The password to match.

        Returns:
            An indicator if the document was decrypted and whether it was the
            owner password or the user password.

        """
        if not self._encryption:
            raise PdfReadError("Not encrypted file")
        # TODO: raise Exception for wrong password
        return self._encryption.verify(password)

    @property
    def is_encrypted(self) -> bool:
        """
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        """
        return TK.ENCRYPT in self.trailer

    def add_form_topname(self, name: str) -> Optional[DictionaryObject]:
        """
        Add a top level form that groups all form fields below it.

        Args:
            name: text string of the "/T" Attribute of the created object

        Returns:
            The created object. ``None`` means no object was created.

        """
        catalog = self.root_object

        if "/AcroForm" not in catalog or not isinstance(
            catalog["/AcroForm"], DictionaryObject
        ):
            return None
        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
        if "/Fields" not in acroform:
            # TODO: No error but this may be extended for XFA Forms
            return None

        interim = DictionaryObject()
        interim[NameObject("/T")] = TextStringObject(name)
        interim[NameObject("/Kids")] = acroform[NameObject("/Fields")]
        self.cache_indirect_object(
            0,
            max(i for (g, i) in self.resolved_objects if g == 0) + 1,
            interim,
        )
        arr = ArrayObject()
        arr.append(interim.indirect_reference)
        acroform[NameObject("/Fields")] = arr
        for o in cast(ArrayObject, interim["/Kids"]):
            obj = o.get_object()
            if "/Parent" in obj:
                logger_warning(
                    f"Top Level Form Field {obj.indirect_reference} have a non-expected parent",
                    __name__,
                )
            obj[NameObject("/Parent")] = interim.indirect_reference
        return interim

    def rename_form_topname(self, name: str) -> Optional[DictionaryObject]:
        """
        Rename top level form field that all form fields below it.

        Args:
            name: text string of the "/T" field of the created object

        Returns:
            The modified object. ``None`` means no object was modified.

        """
        catalog = self.root_object

        if "/AcroForm" not in catalog or not isinstance(
            catalog["/AcroForm"], DictionaryObject
        ):
            return None
        acroform = cast(DictionaryObject, catalog[NameObject("/AcroForm")])
        if "/Fields" not in acroform:
            return None

        interim = cast(
            DictionaryObject,
            cast(ArrayObject, acroform[NameObject("/Fields")])[0].get_object(),
        )
        interim[NameObject("/T")] = TextStringObject(name)
        return interim

    def _repr_mimebundle_(
        self,
        include: Union[None, Iterable[str]] = None,
        exclude: Union[None, Iterable[str]] = None,
    ) -> dict[str, Any]:
        """
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to its
        representation.

        .. seealso::

            https://ipython.readthedocs.io/en/stable/config/integrating.html
        """
        self.stream.seek(0)
        pdf_data = self.stream.read()
        data = {
            "application/pdf": pdf_data,
        }

        if include is not None:
            # Filter representations based on include list
            data = {k: v for k, v in data.items() if k in include}

        if exclude is not None:
            # Remove representations based on exclude list
            data = {k: v for k, v in data.items() if k not in exclude}

        return data


================================================
FILE: pypdf/_text_extraction/__init__.py
================================================
"""
Code related to text extraction.

Some parts are still in _page.py. In doubt, they will stay there.
"""

import math
from typing import Any, Callable, Optional, Union

from .._font import Font
from ..generic import DictionaryObject, TextStringObject, encode_pdfdocencoding

CUSTOM_RTL_MIN: int = -1
CUSTOM_RTL_MAX: int = -1
CUSTOM_RTL_SPECIAL_CHARS: list[int] = []
LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS: int = 5


class OrientationNotFoundError(Exception):
    pass


def set_custom_rtl(
    _min: Union[str, int, None] = None,
    _max: Union[str, int, None] = None,
    specials: Union[str, list[int], None] = None,
) -> tuple[int, int, list[int]]:
    """
    Change the Right-To-Left and special characters custom parameters.

    Args:
        _min: The new minimum value for the range of custom characters that
            will be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        _max: The new maximum value for the range of custom characters that will
            be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        specials: The new list of special characters to be inserted in the
            current insertion order.
            If set to ``None``, the current value will not be changed.
            If set to a string, it will be converted to a list of ASCII codes.
            The default value is an empty list.

    Returns:
        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.

    """
    global CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS
    if isinstance(_min, int):
        CUSTOM_RTL_MIN = _min
    elif isinstance(_min, str):
        CUSTOM_RTL_MIN = ord(_min)
    if isinstance(_max, int):
        CUSTOM_RTL_MAX = _max
    elif isinstance(_max, str):
        CUSTOM_RTL_MAX = ord(_max)
    if isinstance(specials, str):
        CUSTOM_RTL_SPECIAL_CHARS = [ord(x) for x in specials]
    elif isinstance(specials, list):
        CUSTOM_RTL_SPECIAL_CHARS = specials
    return CUSTOM_RTL_MIN, CUSTOM_RTL_MAX, CUSTOM_RTL_SPECIAL_CHARS


def mult(m: list[float], n: list[float]) -> list[float]:
    return [
        m[0] * n[0] + m[1] * n[2],
        m[0] * n[1] + m[1] * n[3],
        m[2] * n[0] + m[3] * n[2],
        m[2] * n[1] + m[3] * n[3],
        m[4] * n[0] + m[5] * n[2] + n[4],
        m[4] * n[1] + m[5] * n[3] + n[5],
    ]


def orient(m: list[float]) -> int:
    if m[3] > 1e-6:
        return 0
    if m[3] < -1e-6:
        return 180
    if m[1] > 0:
        return 90
    return 270


def crlf_space_check(
    text: str,
    cmtm_prev: tuple[list[float], list[float]],
    cmtm_matrix: tuple[list[float], list[float]],
    memo_cmtm: tuple[list[float], list[float]],
    font_resource: Optional[DictionaryObject],
    orientations: tuple[int, ...],
    output: str,
    font_size: float,
    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
    str_widths: float,
    spacewidth: float,
    str_height: float,
) -> tuple[str, str, list[float], list[float]]:
    cm_prev = cmtm_prev[0]
    tm_prev = cmtm_prev[1]
    cm_matrix = cmtm_matrix[0]
    tm_matrix = cmtm_matrix[1]
    memo_cm = memo_cmtm[0]
    memo_tm = memo_cmtm[1]

    m_prev = mult(tm_prev, cm_prev)
    m = mult(tm_matrix, cm_matrix)
    orientation = orient(m)
    delta_x = m[4] - m_prev[4]
    delta_y = m[5] - m_prev[5]
    # Table 108 of the 1.7 reference ("Text positioning operators")
    scale_prev_x = math.sqrt(tm_prev[0]**2 + tm_prev[1]**2)
    scale_prev_y = math.sqrt(tm_prev[2]**2 + tm_prev[3]**2)
    scale_y = math.sqrt(tm_matrix[2]**2 + tm_matrix[3]**2)
    cm_prev = m

    if orientation not in orientations:
        raise OrientationNotFoundError
    if orientation in (0, 180):
        moved_height: float = delta_y
        moved_width: float = delta_x
    elif orientation in (90, 270):
        moved_height = delta_x
        moved_width = delta_y
    try:
        if abs(moved_height) > 0.8 * min(str_height * scale_prev_y, font_size * scale_y):
            if (output + text)[-1] != "\n":
                output += text + "\n"
                if visitor_text is not None:
                    visitor_text(
                        text + "\n",
                        memo_cm,
                        memo_tm,
                        font_resource,
                        font_size,
                    )
                text = ""
        elif (
            (moved_width >= (spacewidth + str_widths) * scale_prev_x)
            and (output + text)[-1] != " "
        ):
            text += " "
    except Exception:
        pass
    tm_prev = tm_matrix.copy()
    cm_prev = cm_matrix.copy()
    return text, output, cm_prev, tm_prev


def get_text_operands(
    operands: list[Union[str, TextStringObject]],
    cm_matrix: list[float],
    tm_matrix: list[float],
    font: Font,
    orientations: tuple[int, ...]
) -> tuple[str, bool]:
    t: str = ""
    is_str_operands = False
    m = mult(tm_matrix, cm_matrix)
    orientation = orient(m)
    if orientation in orientations and len(operands) > 0:
        if isinstance(operands[0], str):
            t = operands[0]
            is_str_operands = True
        else:
            t = ""
            tt: bytes = (
                encode_pdfdocencoding(operands[0])
                if isinstance(operands[0], str)
                else operands[0]
            )
            if isinstance(font.encoding, str):
                try:
                    t = tt.decode(font.encoding, "surrogatepass")  # apply str encoding
                except Exception:
                    # the data does not match the expectation,
                    # we use the alternative ;
                    # text extraction may not be good
                    t = tt.decode(
                        "utf-16-be" if font.encoding == "charmap" else "charmap",
                        "surrogatepass",
                    )  # apply str encoding
            else:  # apply dict encoding
                t = "".join(
                    [font.encoding[x] if x in font.encoding else bytes((x,)).decode() for x in tt]
                )
    return (t, is_str_operands)


def get_display_str(
    text: str,
    cm_matrix: list[float],
    tm_matrix: list[float],
    font_resource: Optional[DictionaryObject],
    font: Font,
    text_operands: str,
    font_size: float,
    rtl_dir: bool,
    visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]]
) -> tuple[str, bool, float]:
    # "\u0590 - \u08FF \uFB50 - \uFDFF"
    widths: float = 0.0
    for x in [font.character_map.get(x, x) for x in text_operands]:
        # x can be a sequence of bytes ; ex: habibi.pdf
        if len(x) == 1:
            xx = ord(x)
        else:
            xx = 1
        # fmt: off
        if (
            # cases where the current inserting order is kept
            (xx <= 0x2F)                        # punctuations but...
            or 0x3A <= xx <= 0x40               # numbers (x30-39)
            or 0x2000 <= xx <= 0x206F           # upper punctuations..
            or 0x20A0 <= xx <= 0x21FF           # but (numbers) indices/exponents
            or xx in CUSTOM_RTL_SPECIAL_CHARS   # customized....
        ):
            text = x + text if rtl_dir else text + x
        elif (  # right-to-left characters set
            0x0590 <= xx <= 0x08FF
            or 0xFB1D <= xx <= 0xFDFF
            or 0xFE70 <= xx <= 0xFEFF
            or CUSTOM_RTL_MIN <= xx <= CUSTOM_RTL_MAX
        ):
            if not rtl_dir:
                rtl_dir = True
                if visitor_text is not None:
                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
                text = ""
            text = x + text
        else:  # left-to-right
            if rtl_dir:
                rtl_dir = False
                if visitor_text is not None:
                    visitor_text(text, cm_matrix, tm_matrix, font_resource, font_size)
                text = ""
            text = text + x
        widths += font.space_width if x == " " else font.text_width(x)
        # fmt: on
    return text, rtl_dir, widths


================================================
FILE: pypdf/_text_extraction/_layout_mode/__init__.py
================================================
"""Layout mode text extraction extension for pypdf"""
from ..._font import Font
from ._fixed_width_page import (
    fixed_char_width,
    fixed_width_page,
    text_show_operations,
    y_coordinate_groups,
)

__all__ = [
    "Font",
    "fixed_char_width",
    "fixed_width_page",
    "text_show_operations",
    "y_coordinate_groups",
]


================================================
FILE: pypdf/_text_extraction/_layout_mode/_fixed_width_page.py
================================================
"""Extract PDF text preserving the layout of the source PDF"""

from collections.abc import Iterator
from itertools import groupby
from math import ceil
from pathlib import Path
from typing import Any, Literal, Optional, TypedDict

from ..._font import Font
from ..._utils import logger_warning
from .. import LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
from ._text_state_manager import TextStateManager
from ._text_state_params import TextStateParams


class BTGroup(TypedDict):
    """
    Dict describing a line of text rendered within a BT/ET operator pair.
    If multiple text show operations render text on the same line, the text
    will be combined into a single BTGroup dict.

    Keys:
        tx: x coordinate of first character in BTGroup
        ty: y coordinate of first character in BTGroup
        font_size: nominal font size
        font_height: effective font height
        text: rendered text
        displaced_tx: x coordinate of last character in BTGroup
        flip_sort: -1 if page is upside down, else 1
    """

    tx: float
    ty: float
    font_size: float
    font_height: float
    text: str
    displaced_tx: float
    flip_sort: Literal[-1, 1]


def bt_group(tj_op: TextStateParams, rendered_text: str, dispaced_tx: float) -> BTGroup:
    """
    BTGroup constructed from a TextStateParams instance, rendered text, and
    displaced tx value.

    Args:
        tj_op (TextStateParams): TextStateParams instance
        rendered_text (str): rendered text
        dispaced_tx (float): x coordinate of last character in BTGroup

    """
    return BTGroup(
        tx=tj_op.tx,
        ty=tj_op.ty,
        font_size=tj_op.font_size,
        font_height=tj_op.font_height,
        text=rendered_text,
        displaced_tx=dispaced_tx,
        flip_sort=-1 if tj_op.flip_vertical else 1,
    )


def recurs_to_target_op(
    ops: Iterator[tuple[list[Any], bytes]],
    text_state_mgr: TextStateManager,
    end_target: Literal[b"Q", b"ET"],
    fonts: dict[str, Font],
    strip_rotated: bool = True,
) -> tuple[list[BTGroup], list[TextStateParams]]:
    """
    Recurse operators between BT/ET and/or q/Q operators managing the transform
    stack and capturing text positioning and rendering data.

    Args:
        ops: iterator of operators in content stream
        text_state_mgr: a TextStateManager instance
        end_target: Either b"Q" (ends b"q" op) or b"ET" (ends b"BT" op)
        fonts: font dictionary as returned by PageObject._layout_mode_fonts()

    Returns:
        tuple: list of BTGroup dicts + list of TextStateParams dataclass instances.

    """
    # 1 entry per line of text rendered within each BT/ET operation.
    bt_groups: list[BTGroup] = []

    # 1 entry per text show operator (Tj/TJ/'/")
    tj_ops: list[TextStateParams] = []

    if end_target == b"Q":
        # add new q level. cm's added at this level will be popped at next b'Q'
        text_state_mgr.add_q()

    for operands, op in ops:
        # The loop is broken by the end target, or exits normally when there are no more ops.
        if op == end_target:
            if op == b"Q":
                text_state_mgr.remove_q()
            if op == b"ET":
                if not tj_ops:
                    return bt_groups, tj_ops
                _text = ""
                bt_idx = 0  # idx of first tj in this bt group
                last_displaced_tx = tj_ops[bt_idx].displaced_tx
                last_ty = tj_ops[bt_idx].ty
                for _idx, _tj in enumerate(
                    tj_ops
                ):  # ... build text from new Tj operators
                    if strip_rotated and _tj.rotated:
                        continue
                    if not _tj.font.interpretable:  # generates warning
                        continue
                    # if the y position of the text is greater than the font height, assume
                    # the text is on a new line and start a new group
                    if abs(_tj.ty - last_ty) > _tj.font_height:
                        if _text.strip():
                            bt_groups.append(
                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
                            )
                        bt_idx = _idx
                        _text = ""

                    # if the x position of the text is less than the last x position by
                    # more than 5 spaces widths, assume the text order should be flipped
                    # and start a new group
                    if (
                        last_displaced_tx - _tj.tx
                        > _tj.space_tx * LAYOUT_NEW_BT_GROUP_SPACE_WIDTHS
                    ):
                        if _text.strip():
                            bt_groups.append(
                                bt_group(tj_ops[bt_idx], _text, last_displaced_tx)
                            )
                        bt_idx = _idx
                        last_displaced_tx = _tj.displaced_tx
                        _text = ""

                    # calculate excess x translation based on ending tx of previous Tj.
                    # multiply by bool (_idx != bt_idx) to ensure spaces aren't double
                    # applied to the first tj of a BTGroup in fixed_width_page().
                    excess_tx = round(_tj.tx - last_displaced_tx, 3) * (_idx != bt_idx)
                    # space_tx could be 0 if either Tz or font_size was 0 for this _tj.
                    spaces = int(excess_tx // _tj.space_tx) if _tj.space_tx else 0
                    new_text = f'{" " * spaces}{_tj.txt}'

                    last_ty = _tj.ty
                    _text = f"{_text}{new_text}"
                    last_displaced_tx = _tj.displaced_tx
                if _text:
                    bt_groups.append(bt_group(tj_ops[bt_idx], _text, last_displaced_tx))
                text_state_mgr.reset_tm()
            break
        if op == b"q":
            bts, tjs = recurs_to_target_op(
                ops, text_state_mgr, b"Q", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"cm":
            text_state_mgr.add_cm(*operands)
        elif op == b"BT":
            bts, tjs = recurs_to_target_op(
                ops, text_state_mgr, b"ET", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"Tj":
            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
        elif op == b"TJ":
            _tj = text_state_mgr.text_state_params()
            for tj_op in operands[0]:
                if isinstance(tj_op, bytes):
                    _tj = text_state_mgr.text_state_params(tj_op)
                    tj_ops.append(_tj)
                else:
                    text_state_mgr.add_trm(_tj.displacement_matrix(td_offset=tj_op))
        elif op == b"'":
            text_state_mgr.reset_trm()
            text_state_mgr.add_tm([0, -text_state_mgr.TL])
            tj_ops.append(text_state_mgr.text_state_params(operands[0]))
        elif op == b'"':
            text_state_mgr.reset_trm()
            text_state_mgr.set_state_param(b"Tw", operands[0])
            text_state_mgr.set_state_param(b"Tc", operands[1])
            text_state_mgr.add_tm([0, -text_state_mgr.TL])
            tj_ops.append(text_state_mgr.text_state_params(operands[2]))
        elif op in (b"Td", b"Tm", b"TD", b"T*"):
            text_state_mgr.reset_trm()
            if op == b"Tm":
                text_state_mgr.reset_tm()
            elif op == b"TD":
                text_state_mgr.set_state_param(b"TL", -operands[1])
            elif op == b"T*":
                operands = [0, -text_state_mgr.TL]
            text_state_mgr.add_tm(operands)
        elif op == b"Tf":
            text_state_mgr.set_font(fonts[operands[0]], operands[1])
        else:  # handle Tc, Tw, Tz, TL, and Ts operators
            text_state_mgr.set_state_param(op, operands)
    else:
        logger_warning(
            f"Unbalanced target operations, expected {end_target!r}.",
            __name__,
        )
    return bt_groups, tj_ops


def y_coordinate_groups(
    bt_groups: list[BTGroup], debug_path: Optional[Path] = None
) -> dict[int, list[BTGroup]]:
    """
    Group text operations by rendered y coordinate, i.e. the line number.

    Args:
        bt_groups: list of dicts as returned by text_show_operations()
        debug_path (Path, optional): Path to a directory for saving debug output.

    Returns:
        Dict[int, List[BTGroup]]: dict of lists of text rendered by each BT operator
            keyed by y coordinate

    """
    ty_groups = {
        ty: sorted(grp, key=lambda x: x["tx"])
        for ty, grp in groupby(
            bt_groups, key=lambda bt_grp: int(bt_grp["ty"] * bt_grp["flip_sort"])
        )
    }
    # combine groups whose y coordinates differ by less than the effective font height
    # (accounts for mixed fonts and other minor oddities)
    last_ty = next(iter(ty_groups))
    last_txs = {int(_t["tx"]) for _t in ty_groups[last_ty] if _t["text"].strip()}
    for ty in list(ty_groups)[1:]:
        fsz = min(ty_groups[_y][0]["font_height"] for _y in (ty, last_ty))
        txs = {int(_t["tx"]) for _t in ty_groups[ty] if _t["text"].strip()}
        # prevent merge if both groups are rendering in the same x position.
        no_text_overlap = not (txs & last_txs)
        offset_less_than_font_height = abs(ty - last_ty) < fsz
        if no_text_overlap and offset_less_than_font_height:
            ty_groups[last_ty] = sorted(
                ty_groups.pop(ty) + ty_groups[last_ty], key=lambda x: x["tx"]
            )
            last_txs |= txs
        else:
            last_ty = ty
            last_txs = txs
    if debug_path:  # pragma: no cover
        import json  # noqa: PLC0415

        debug_path.joinpath("bt_groups.json").write_text(
            json.dumps(ty_groups, indent=2, default=str), "utf-8"
        )
    return ty_groups


def text_show_operations(
    ops: Iterator[tuple[list[Any], bytes]],
    fonts: dict[str, Font],
    strip_rotated: bool = True,
    debug_path: Optional[Path] = None,
) -> list[BTGroup]:
    """
    Extract text from BT/ET operator pairs.

    Args:
        ops (Iterator[Tuple[List, bytes]]): iterator of operators in content stream
        fonts (Dict[str, Font]): font dictionary
        strip_rotated: Removes text if rotated w.r.t. to the page. Defaults to True.
        debug_path (Path, optional): Path to a directory for saving debug output.

    Returns:
        List[BTGroup]: list of dicts of text rendered by each BT operator

    """
    state_mgr = TextStateManager()  # transformation stack manager
    bt_groups: list[BTGroup] = []  # BT operator dict
    tj_ops: list[TextStateParams] = []  # Tj/TJ operator data
    for operands, op in ops:
        if op in (b"BT", b"q"):
            bts, tjs = recurs_to_target_op(
                ops, state_mgr, b"ET" if op == b"BT" else b"Q", fonts, strip_rotated
            )
            bt_groups.extend(bts)
            tj_ops.extend(tjs)
        elif op == b"Tf":
            state_mgr.set_font(fonts[operands[0]], operands[1])
        else:  # set Tc, Tw, Tz, TL, and Ts if required. ignores all other ops
            state_mgr.set_state_param(op, operands)

    if any(tj.rotated for tj in tj_ops):
        if strip_rotated:
            logger_warning(
                "Rotated text discovered. Output will be incomplete.", __name__
            )
        else:
            logger_warning(
                "Rotated text discovered. Layout will be degraded.", __name__
            )
    if not all(tj.font.interpretable for tj in tj_ops):
        logger_warning(
            "PDF contains an uninterpretable font. Output will be incomplete.", __name__
        )

    # left align the data, i.e. decrement all tx values by min(tx)
    min_x = min((x["tx"] for x in bt_groups), default=0.0)
    bt_groups = [
        dict(ogrp, tx=ogrp["tx"] - min_x, displaced_tx=ogrp["displaced_tx"] - min_x)  # type: ignore[misc]
        for ogrp in sorted(
            bt_groups, key=lambda x: (x["ty"] * x["flip_sort"], -x["tx"]), reverse=True
        )
    ]

    if debug_path:  # pragma: no cover
        import json  # noqa: PLC0415

        debug_path.joinpath("bts.json").write_text(
            json.dumps(bt_groups, indent=2, default=str), "utf-8"
        )
        debug_path.joinpath("tjs.json").write_text(
            json.dumps(
                tj_ops, indent=2, default=lambda x: getattr(x, "to_dict", str)(x)
            ),
            "utf-8",
        )
    return bt_groups


def fixed_char_width(bt_groups: list[BTGroup], scale_weight: float = 1.25) -> float:
    """
    Calculate average character width weighted by the length of the rendered
    text in each sample for conversion to fixed-width layout.

    Args:
        bt_groups (List[BTGroup]): List of dicts of text rendered by each
            BT operator

    Returns:
        float: fixed character width

    """
    char_widths = []
    for _bt in bt_groups:
        _len = len(_bt["text"]) * scale_weight
        char_widths.append(((_bt["displaced_tx"] - _bt["tx"]) / _len, _len))
    return sum(_w * _l for _w, _l in char_widths) / sum(_l for _, _l in char_widths)


def fixed_width_page(
    ty_groups: dict[int, list[BTGroup]], char_width: float, space_vertically: bool, font_height_weight: float
) -> str:
    """
    Generate page text from text operations grouped by rendered y coordinate.

    Args:
        ty_groups: dict of text show ops as returned by y_coordinate_groups()
        char_width: fixed character width
        space_vertically: include blank lines inferred from y distance + font height.
        font_height_weight: multiplier for font height when calculating blank lines.

    Returns:
        str: page text in a fixed width format that closely adheres to the rendered
            layout in the source pdf.

    """
    lines: list[str] = []
    last_y_coord = 0
    table = str.maketrans(dict.fromkeys(range(14, 32), " "))
    for y_coord, line_data in ty_groups.items():
        if space_vertically and lines:
            fh = line_data[0]["font_height"]
            blank_lines = 0 if fh == 0 else (
                int(abs(y_coord - last_y_coord) / (fh * font_height_weight)) - 1
            )
            lines.extend([""] * blank_lines)

        line_parts = []  # It uses a list to construct the line, avoiding string concatenation.
        current_len = 0  # Track the size with int instead of len(str) overhead.
        last_disp = 0.0
        for bt_op in line_data:
            tx = bt_op["tx"]
            offset = int(tx // char_width)
            needed_spaces = offset - current_len
            if needed_spaces > 0 and ceil(last_disp) < int(tx):
                padding = " " * needed_spaces
                line_parts.append(padding)
                current_len += needed_spaces

            raw_text = bt_op["text"]
            text = raw_text.translate(table)
            line_parts.append(text)
            current_len += len(text)
            last_disp = bt_op["displaced_tx"]

        full_line = "".join(line_parts).rstrip()
        if full_line.strip() or (space_vertically and lines):
            lines.append(full_line)

        last_y_coord = y_coord

    return "\n".join(lines)


================================================
FILE: pypdf/_text_extraction/_layout_mode/_text_state_manager.py
================================================
"""manage the PDF transform stack during "layout" mode text extraction"""

from collections import ChainMap, Counter
from collections import ChainMap as ChainMapType
from collections import Counter as CounterType
from collections.abc import MutableMapping
from typing import Any, Union

from ..._font import Font
from ...errors import PdfReadError
from .. import mult
from ._text_state_params import TextStateParams

TextStateManagerChainMapType = ChainMapType[Union[int, str], Union[float, bool]]
TextStateManagerDictType = MutableMapping[Union[int, str], Union[float, bool]]


class TextStateManager:
    """
    Tracks the current text state including cm/tm/trm transformation matrices.

    Attributes:
        transform_stack (ChainMap): ChainMap of cm/tm transformation matrices
        q_queue (Counter[int]): Counter of q operators
        q_depth (List[int]): list of q operator nesting levels
        Tc (float): character spacing
        Tw (float): word spacing
        Tz (int): horizontal scaling
        TL (float): leading
        Ts (float): text rise
        font (Font): font object
        font_size (int | float): font size

    """

    def __init__(self) -> None:
        self.transform_stack: TextStateManagerChainMapType = ChainMap(
            self.new_transform()
        )
        self.q_queue: CounterType[int] = Counter()
        self.q_depth = [0]
        self.Tc: float = 0.0
        self.Tw: float = 0.0
        self.Tz: float = 100.0
        self.TL: float = 0.0
        self.Ts: float = 0.0
        self.font_stack: list[tuple[Union[Font, None], Union[int, float]]] = []
        self.font: Union[Font, None] = None
        self.font_size: Union[int, float] = 0

    def set_state_param(self, op: bytes, value: Union[float, list[Any]]) -> None:
        """
        Set a text state parameter. Supports Tc, Tz, Tw, TL, and Ts operators.

        Args:
            op: operator read from PDF stream as bytes. No action is taken
                for unsupported operators (see supported operators above).
            value (float | List[Any]): new parameter value. If a list,
                value[0] is used.

        """
        if op not in [b"Tc", b"Tz", b"Tw", b"TL", b"Ts"]:
            return
        self.__setattr__(op.decode(), value[0] if isinstance(value, list) else value)

    def set_font(self, font: Font, size: float) -> None:
        """
        Set the current font and font_size.

        Args:
            font (Font): a layout mode Font
            size (float): font size

        """
        self.font = font
        self.font_size = size

    def text_state_params(self, value: Union[bytes, str] = "") -> TextStateParams:
        """
        Create a TextStateParams instance to display a text string. Type[bytes] values
        will be decoded implicitly.

        Args:
            value (str | bytes): text to associate with the captured state.

        Raises:
            PdfReadError: if font not set (no Tf operator in incoming pdf content stream)

        Returns:
            TextStateParams: current text state parameters

        """
        if not isinstance(self.font, Font):
            raise PdfReadError(
                "font not set: is PDF missing a Tf operator?"
            )  # pragma: no cover
        if isinstance(value, bytes):
            try:
                if isinstance(self.font.encoding, str):
                    txt = value.decode(self.font.encoding, "surrogatepass")
                else:
                    txt = "".join(
                        self.font.encoding[x]
                        if x in self.font.encoding
                        else bytes((x,)).decode()
                        for x in value
                    )
            except (UnicodeEncodeError, UnicodeDecodeError):
                txt = value.decode("utf-8", "replace")
            txt = "".join(
                self.font.character_map.get(x, x) for x in txt
            )
        else:
            txt = value
        return TextStateParams(
            txt,
            self.font,
            self.font_size,
            self.Tc,
            self.Tw,
            self.Tz,
            self.TL,
            self.Ts,
            self.effective_transform,
        )

    @staticmethod
    def raw_transform(
        _a: float = 1.0,
        _b: float = 0.0,
        _c: float = 0.0,
        _d: float = 1.0,
        _e: float = 0.0,
        _f: float = 0.0,
    ) -> dict[int, float]:
        """Only a/b/c/d/e/f matrix params"""
        return dict(zip(range(6), map(float, (_a, _b, _c, _d, _e, _f))))

    @staticmethod
    def new_transform(
        _a: float = 1.0,
        _b: float = 0.0,
        _c: float = 0.0,
        _d: float = 1.0,
        _e: float = 0.0,
        _f: float = 0.0,
        is_text: bool = False,
        is_render: bool = False,
    ) -> TextStateManagerDictType:
        """Standard a/b/c/d/e/f matrix params + 'is_text' and 'is_render' keys"""
        result: Any = TextStateManager.raw_transform(_a, _b, _c, _d, _e, _f)
        result.update({"is_text": is_text, "is_render": is_render})
        return result

    def reset_tm(self) -> TextStateManagerChainMapType:
        """Clear all transforms from chainmap having is_text==True or is_render==True"""
        while (
            self.transform_stack.maps[0]["is_text"]
            or self.transform_stack.maps[0]["is_render"]
        ):
            self.transform_stack = self.transform_stack.parents
        return self.transform_stack

    def reset_trm(self) -> TextStateManagerChainMapType:
        """Clear all transforms from chainmap having is_render==True"""
        while self.transform_stack.maps[0]["is_render"]:
            self.transform_stack = self.transform_stack.parents
        return self.transform_stack

    def remove_q(self) -> TextStateManagerChainMapType:
        """Rewind to stack prior state after closing a 'q' with internal 'cm' ops"""
        self.font, self.font_size = self.font_stack.pop(-1)
        self.transform_stack = self.reset_tm()
        self.transform_stack.maps = self.transform_stack.maps[
            self.q_queue.pop(self.q_depth.pop(), 0) :
        ]
        return self.transform_stack

    def add_q(self) -> None:
        """Add another level to q_queue"""
        self.font_stack.append((self.font, self.font_size))
        self.q_depth.append(len(self.q_depth))

    def add_cm(self, *args: Any) -> TextStateManagerChainMapType:
        """Concatenate an additional transform matrix"""
        self.transform_stack = self.reset_tm()
        self.q_queue.update(self.q_depth[-1:])
        self.transform_stack = self.transform_stack.new_child(self.new_transform(*args))
        return self.transform_stack

    def _complete_matrix(self, operands: list[float]) -> list[float]:
        """Adds a, b, c, and d to an "e/f only" operand set (e.g Td)"""
        if len(operands) == 2:  # this is a Td operator or equivalent
            operands = [1.0, 0.0, 0.0, 1.0, *operands]
        return operands

    def add_tm(self, operands: list[float]) -> TextStateManagerChainMapType:
        """Append a text transform matrix"""
        self.transform_stack = self.transform_stack.new_child(
            self.new_transform(  # type: ignore[misc]
                *self._complete_matrix(operands), is_text=True  # type: ignore[arg-type]
            )
        )
        return self.transform_stack

    def add_trm(self, operands: list[float]) -> TextStateManagerChainMapType:
        """Append a text rendering transform matrix"""
        self.transform_stack = self.transform_stack.new_child(
            self.new_transform(  # type: ignore[misc]
                *self._complete_matrix(operands), is_text=True, is_render=True  # type: ignore[arg-type]
            )
        )
        return self.transform_stack

    @property
    def effective_transform(self) -> list[float]:
        """Current effective transform accounting for cm, tm, and trm transforms"""
        eff_transform = [*self.transform_stack.maps[0].values()]
        for transform in self.transform_stack.maps[1:]:
            eff_transform = mult(eff_transform, transform)  # type: ignore[arg-type]  # dict has int keys 0-5
        return eff_transform


================================================
FILE: pypdf/_text_extraction/_layout_mode/_text_state_params.py
================================================
"""A dataclass that captures the CTM and Text State for a tj operation"""

import math
from dataclasses import dataclass, field
from typing import Any, Union

from ..._font import Font
from .. import mult, orient


@dataclass
class TextStateParams:
    """
    Text state parameters and operator values for a single text value in a
    TJ or Tj PDF operation.

    Attributes:
        txt (str): the text to be rendered.
        font (Font): font object
        font_size (int | float): font size
        Tc (float): character spacing. Defaults to 0.0.
        Tw (float): word spacing. Defaults to 0.0.
        Tz (float): horizontal scaling. Defaults to 100.0.
        TL (float): leading, vertical displacement between text lines. Defaults to 0.0.
        Ts (float): text rise. Used for super/subscripts. Defaults to 0.0.
        transform (List[float]): effective transformation matrix.
        tx (float): x cood of rendered text, i.e. self.transform[4]
        ty (float): y cood of rendered text. May differ from self.transform[5] per self.Ts.
        displaced_tx (float): x coord immediately following rendered text
        space_tx (float): tx for a space character
        font_height (float): effective font height accounting for CTM
        flip_vertical (bool): True if y axis has been inverted (i.e. if self.transform[3] < 0.)
        rotated (bool): True if the text orientation is rotated with respect to the page.

    """

    txt: str
    font: Font
    font_size: Union[int, float]
    Tc: float = 0.0
    Tw: float = 0.0
    Tz: float = 100.0
    TL: float = 0.0
    Ts: float = 0.0
    transform: list[float] = field(
        default_factory=lambda: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
    )
    tx: float = field(default=0.0, init=False)
    ty: float = field(default=0.0, init=False)
    displaced_tx: float = field(default=0.0, init=False)
    space_tx: float = field(default=0.0, init=False)
    font_height: float = field(default=0.0, init=False)
    flip_vertical: bool = field(default=False, init=False)
    rotated: bool = field(default=False, init=False)

    def __post_init__(self) -> None:
        if orient(self.transform) in (90, 270):
            self.transform = mult(
                [1.0, -self.transform[1], -self.transform[2], 1.0, 0.0, 0.0],
                self.transform,
            )
            self.rotated = True
        # self.transform[0] AND self.transform[3] < 0 indicates true rotation.
        # If only self.transform[3] < 0, the y coords are simply inverted.
        if orient(self.transform) == 180 and self.transform[0] < -1e-6:
            self.transform = mult([-1.0, 0.0, 0.0, -1.0, 0.0, 0.0], self.transform)
            self.rotated = True
        self.displaced_tx = self.displaced_transform()[4]
        self.tx = self.transform[4]
        self.ty = self.render_transform()[5]
        self.space_tx = round(self.word_tx(" "), 3)
        if self.space_tx < 1e-6:
            # if the " " char is assigned 0 width (e.g. for fine tuned spacing
            # with TJ int operators a la crazyones.pdf), calculate space_tx as
            # a td_offset of -1 * font.space_width where font.space_width is
            # the space_width calculated in _font.py.
            self.space_tx = round(self.word_tx("", -self.font.space_width), 3)
        self.font_height = self.font_size * math.sqrt(
            self.transform[1] ** 2 + self.transform[3] ** 2
        )
        # flip_vertical handles PDFs generated by Microsoft Word's "publish" command.
        self.flip_vertical = self.transform[3] < -1e-6  # inverts y axis

    def font_size_matrix(self) -> list[float]:
        """Font size matrix"""
        return [
            self.font_size * (self.Tz / 100.0),
            0.0,
            0.0,
            self.font_size,
            0.0,
            self.Ts,
        ]

    def displaced_transform(self) -> list[float]:
        """Effective transform matrix after text has been rendered."""
        return mult(self.displacement_matrix(), self.transform)

    def render_transform(self) -> list[float]:
        """Effective transform matrix accounting for font size, Tz, and Ts."""
        return mult(self.font_size_matrix(), self.transform)

    def displacement_matrix(
        self, word: Union[str, None] = None, td_offset: float = 0.0
    ) -> list[float]:
        """
        Text displacement matrix

        Args:
            word (str, optional): Defaults to None in which case self.txt displacement is
                returned.
            td_offset (float, optional): translation applied by TD operator. Defaults to 0.0.

        """
        word = word if word is not None else self.txt
        return [1.0, 0.0, 0.0, 1.0, self.word_tx(word, td_offset), 0.0]

    def word_tx(self, word: str, td_offset: float = 0.0) -> float:
        """Horizontal text displacement for any word according this text state"""
        width: float = 0.0
        for char in word:
            if char == " ":
                width += self.font.space_width
            else:
                width += self.font.text_width(char)
        return (
            (self.font_size * ((width - td_offset) / 1000.0))
            + self.Tc
            + word.count(" ") * self.Tw
        ) * (self.Tz / 100.0)

    @staticmethod
    def to_dict(inst: "TextStateParams") -> dict[str, Any]:
        """Dataclass to dict for json.dumps serialization"""
        return {k: getattr(inst, k) for k in inst.__dataclass_fields__ if k != "font"}


================================================
FILE: pypdf/_text_extraction/_text_extractor.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import math
from typing import Any, Callable, Optional, Union

from .._font import Font, FontDescriptor
from ..generic import DictionaryObject, TextStringObject
from . import OrientationNotFoundError, crlf_space_check, get_display_str, get_text_operands, mult


class TextExtraction:
    """
    A class to handle PDF text extraction operations.

    This class encapsulates all the state and operations needed for extracting
    text from PDF content streams, replacing the nested functions and nonlocal
    variables in the original implementation.
    """

    def __init__(self) -> None:
        self._font_width_maps: dict[str, tuple[dict[Any, float], str, float]] = {}

        # Text extraction state variables
        self.cm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.tm_matrix: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.cm_stack: list[
            tuple[
                list[float],
                Optional[DictionaryObject],
                Font,
                float,
                float,
                float,
                float,
            ]
        ] = []

        # Store the last modified matrices; can be an intermediate position
        self.cm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.tm_prev: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]

        # Store the position at the beginning of building the text
        self.memo_cm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.memo_tm: list[float] = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]

        self.char_scale = 1.0
        self.space_scale = 1.0
        self._space_width: float = 500.0  # will be set correctly at first Tf
        self._actual_str_size: dict[str, float] = {
            "str_widths": 0.0,
            "str_height": 0.0,
        }  # will be set to string length calculation result
        self.TL = 0.0
        self.font_size = 12.0  # init just in case of

        # Text extraction variables
        self.text: str = ""
        self.output: str = ""
        self.rtl_dir: bool = False  # right-to-left
        self.font_resource: Optional[DictionaryObject] = None
        self.font = Font(
            name = "NotInitialized",
            sub_type="Unknown",
            encoding="charmap",
            font_descriptor=FontDescriptor(),
            )
        self.orientations: tuple[int, ...] = (0, 90, 180, 270)
        self.visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None
        self.font_resources: dict[str, DictionaryObject] = {}
        self.fonts: dict[str, Font] = {}

        self.operation_handlers = {
            b"BT": self._handle_bt,
            b"ET": self._handle_et,
            b"q": self._handle_save_graphics_state,
            b"Q": self._handle_restore_graphics_state,
            b"cm": self._handle_cm,
            b"Tz": self._handle_tz,
            b"Tw": self._handle_tw,
            b"TL": self._handle_tl,
            b"Tf": self._handle_tf,
            b"Td": self._handle_td,
            b"Tm": self._handle_tm,
            b"T*": self._handle_t_star,
            b"Tj": self._handle_tj_operation,
        }

    def initialize_extraction(
        self,
        orientations: tuple[int, ...] = (0, 90, 180, 270),
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]] = None,
        font_resources: Optional[dict[str, DictionaryObject]] = None,
        fonts: Optional[dict[str, Font]] = None
    ) -> None:
        """Initialize the extractor with extraction parameters."""
        self.orientations = orientations
        self.visitor_text = visitor_text
        self.font_resources = font_resources or {}
        self.fonts = fonts or {}

        # Reset state
        self.text = ""
        self.output = ""
        self.rtl_dir = False

    def compute_str_widths(self, str_widths: float) -> float:
        return str_widths / 1000

    def process_operation(self, operator: bytes, operands: list[Any]) -> None:
        if operator in self.operation_handlers:
            handler = self.operation_handlers[operator]
            str_widths = handler(operands)

            # Post-process operations that affect text positioning
            if operator in {b"Td", b"Tm", b"T*", b"Tj"}:
                self._post_process_text_operation(str_widths or 0.0)

    def _post_process_text_operation(self, str_widths: float) -> None:
        """Handle common post-processing for text positioning operations."""
        try:
            self.text, self.output, self.cm_prev, self.tm_prev = crlf_space_check(
                self.text,
                (self.cm_prev, self.tm_prev),
                (self.cm_matrix, self.tm_matrix),
                (self.memo_cm, self.memo_tm),
                self.font_resource,
                self.orientations,
                self.output,
                self.font_size,
                self.visitor_text,
                str_widths,
                self.compute_str_widths(self.font_size * self._space_width),
                self._actual_str_size["str_height"],
            )
            if self.text == "":
                self.memo_cm = self.cm_matrix.copy()
                self.memo_tm = self.tm_matrix.copy()
        except OrientationNotFoundError:
            pass

    def _handle_tj(
        self,
        text: str,
        operands: list[Union[str, TextStringObject]],
        cm_matrix: list[float],
        tm_matrix: list[float],
        font_resource: Optional[DictionaryObject],
        font: Font,
        orientations: tuple[int, ...],
        font_size: float,
        rtl_dir: bool,
        visitor_text: Optional[Callable[[Any, Any, Any, Any, Any], None]],
        actual_str_size: dict[str, float],
    ) -> tuple[str, bool, dict[str, float]]:
        text_operands, is_str_operands = get_text_operands(
            operands, cm_matrix, tm_matrix, font, orientations
        )
        if is_str_operands:
            text += text_operands
            font_widths = sum([font.space_width if x == " " else font.text_width(x) for x in text_operands])
        else:
            text, rtl_dir, font_widths = get_display_str(
                text,
                cm_matrix,
                tm_matrix,  # text matrix
                font_resource,
                font,
                text_operands,
                font_size,
                rtl_dir,
                visitor_text,
            )
        actual_str_size["str_widths"] += font_widths * font_size
        actual_str_size["str_height"] = font_size
        return text, rtl_dir, actual_str_size

    def _flush_text(self) -> None:
        """Flush accumulated text to output and call visitor if present."""
        self.output += self.text
        if self.visitor_text is not None:
            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()

    # Operation handlers

    def _handle_bt(self, operands: list[Any]) -> None:
        """Handle BT (Begin Text) operation - Table 5.4 page 405."""
        self.tm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self._flush_text()

    def _handle_et(self, operands: list[Any]) -> None:
        """Handle ET (End Text) operation - Table 5.4 page 405."""
        self._flush_text()

    def _handle_save_graphics_state(self, operands: list[Any]) -> None:
        """Handle q (Save graphics state) operation - Table 4.7 page 219."""
        self.cm_stack.append(
            (
                self.cm_matrix,
                self.font_resource,
                self.font,
                self.font_size,
                self.char_scale,
                self.space_scale,
                self.TL,
            )
        )

    def _handle_restore_graphics_state(self, operands: list[Any]) -> None:
        """Handle Q (Restore graphics state) operation - Table 4.7 page 219."""
        try:
            (
                self.cm_matrix,
                self.font_resource,
                self.font,
                self.font_size,
                self.char_scale,
                self.space_scale,
                self.TL,
            ) = self.cm_stack.pop()
        except Exception:
            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]

    def _handle_cm(self, operands: list[Any]) -> None:
        """Handle cm (Modify current matrix) operation - Table 4.7 page 219."""
        self.output += self.text
        if self.visitor_text is not None:
            self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        try:
            self.cm_matrix = mult([float(operand) for operand in operands[:6]], self.cm_matrix)
        except Exception:
            self.cm_matrix = [1.0, 0.0, 0.0, 1.0, 0.0, 0.0]
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()

    def _handle_tz(self, operands: list[Any]) -> None:
        """Handle Tz (Set horizontal text scaling) operation - Table 5.2 page 398."""
        self.char_scale = float(operands[0]) / 100 if operands else 1.0

    def _handle_tw(self, operands: list[Any]) -> None:
        """Handle Tw (Set word spacing) operation - Table 5.2 page 398."""
        self.space_scale = 1.0 + float(operands[0] if operands else 0.0)

    def _handle_tl(self, operands: list[Any]) -> None:
        """Handle TL (Set Text Leading) operation - Table 5.2 page 398."""
        scale_x = math.sqrt(self.tm_matrix[0] ** 2 + self.tm_matrix[2] ** 2)
        self.TL = float(operands[0] if operands else 0.0) * self.font_size * scale_x

    def _handle_tf(self, operands: list[Any]) -> None:
        """Handle Tf (Set font size) operation - Table 5.2 page 398."""
        if self.text != "":
            self.output += self.text  # .translate(cmap)
            if self.visitor_text is not None:
                self.visitor_text(self.text, self.memo_cm, self.memo_tm, self.font_resource, self.font_size)
        self.text = ""
        self.memo_cm = self.cm_matrix.copy()
        self.memo_tm = self.tm_matrix.copy()
        try:
            self.font_resource = self.font_resources[operands[0]]
            self.font = self.fonts[operands[0]]
        except KeyError:  # font not found
            self.font_resource = None
            font_descriptor = FontDescriptor()
            self.font = Font(
                "Unknown",
                space_width=250,
                encoding=dict.fromkeys(range(256), "�"),
                font_descriptor=font_descriptor,
                character_map={},
            )

        self._space_width = self.font.space_width / 2  # Actually the width of _half_ a space...
        try:
            self.font_size = float(operands[1])
        except Exception:
            pass  # keep previous size

    def _handle_td(self, operands: list[Any]) -> float:
        """Handle Td (Move text position) operation - Table 5.5 page 406."""
        # A special case is a translating only tm:
        # tm = [1, 0, 0, 1, e, f]
        # i.e. tm[4] += tx, tm[5] += ty.
        tx, ty = float(operands[0]), float(operands[1])
        self.tm_matrix[4] += tx * self.tm_matrix[0] + ty * self.tm_matrix[2]
        self.tm_matrix[5] += tx * self.tm_matrix[1] + ty * self.tm_matrix[3]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths

    def _handle_tm(self, operands: list[Any]) -> float:
        """Handle Tm (Set text matrix) operation - Table 5.5 page 406."""
        self.tm_matrix = [float(operand) for operand in operands[:6]]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths

    def _handle_t_star(self, operands: list[Any]) -> float:
        """Handle T* (Move to next line) operation - Table 5.5 page 406."""
        self.tm_matrix[4] -= self.TL * self.tm_matrix[2]
        self.tm_matrix[5] -= self.TL * self.tm_matrix[3]
        str_widths = self.compute_str_widths(self._actual_str_size["str_widths"])
        self._actual_str_size["str_widths"] = 0.0
        return str_widths

    def _handle_tj_operation(self, operands: list[Any]) -> float:
        """Handle Tj (Show text) operation - Table 5.5 page 406."""
        self.text, self.rtl_dir, self._actual_str_size = self._handle_tj(
            self.text,
            operands,
            self.cm_matrix,
            self.tm_matrix,
            self.font_resource,
            self.font,
            self.orientations,
            self.font_size,
            self.rtl_dir,
            self.visitor_text,
            self._actual_str_size,
        )
        return 0.0  # str_widths will be handled in post-processing


================================================
FILE: pypdf/_utils.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

"""Utility functions for PDF library."""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

import functools
import logging
import re
import sys
import warnings
from dataclasses import dataclass
from datetime import datetime, timezone
from io import DEFAULT_BUFFER_SIZE
from os import SEEK_CUR
from re import Pattern
from typing import (
    IO,
    Any,
    Optional,
    Union,
    overload,
)

if sys.version_info[:2] >= (3, 10):
    # Python 3.10+: https://www.python.org/dev/peps/pep-0484/
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

from .errors import (
    STREAM_TRUNCATED_PREMATURELY,
    DeprecationError,
    PdfStreamError,
)

TransformationMatrixType: TypeAlias = tuple[
    tuple[float, float, float], tuple[float, float, float], tuple[float, float, float]
]
CompressedTransformationMatrix: TypeAlias = tuple[
    float, float, float, float, float, float
]

StreamType = IO[Any]
StrByteType = Union[str, StreamType]


def parse_iso8824_date(text: Optional[str]) -> Optional[datetime]:
    orgtext = text
    if not text:
        return None
    if text[0].isdigit():
        text = "D:" + text
    if text.endswith(("Z", "z")):
        text += "0000"
    text = text.replace("z", "+").replace("Z", "+").replace("'", "")
    i = max(text.find("+"), text.find("-"))
    if i > 0 and i != len(text) - 5:
        text += "00"
    for f in (
        "D:%Y",
        "D:%Y%m",
        "D:%Y%m%d",
        "D:%Y%m%d%H",
        "D:%Y%m%d%H%M",
        "D:%Y%m%d%H%M%S",
        "D:%Y%m%d%H%M%S%z",
    ):
        try:
            d = datetime.strptime(text, f)  # noqa: DTZ007
        except ValueError:
            continue
        else:
            if text.endswith("+0000"):
                d = d.replace(tzinfo=timezone.utc)
            return d
    raise ValueError(f"Can not convert date: {orgtext}")


def format_iso8824_date(dt: datetime) -> str:
    """
    Convert a datetime object to PDF date string format.

    Converts datetime to the PDF date format D:YYYYMMDDHHmmSSOHH'mm
    as specified in the PDF Reference.

    Args:
        dt: A datetime object to convert.

    Returns:
        A date string in PDF format.
    """
    date_str = dt.strftime("D:%Y%m%d%H%M%S")
    if dt.tzinfo is not None:
        offset = dt.utcoffset()
        assert offset is not None
        total_seconds = int(offset.total_seconds())
        hours, remainder = divmod(abs(total_seconds), 3600)
        minutes = remainder // 60
        sign = "+" if total_seconds >= 0 else "-"
        date_str += f"{sign}{hours:02d}'{minutes:02d}'"
    return date_str


def _get_max_pdf_version_header(header1: str, header2: str) -> str:
    versions = (
        "%PDF-1.3",
        "%PDF-1.4",
        "%PDF-1.5",
        "%PDF-1.6",
        "%PDF-1.7",
        "%PDF-2.0",
    )
    pdf_header_indices = []
    if header1 in versions:
        pdf_header_indices.append(versions.index(header1))
    if header2 in versions:
        pdf_header_indices.append(versions.index(header2))
    if len(pdf_header_indices) == 0:
        raise ValueError(f"Neither {header1!r} nor {header2!r} are proper headers")
    return versions[max(pdf_header_indices)]


WHITESPACES = (b"\x00", b"\t", b"\n", b"\f", b"\r", b" ")
WHITESPACES_AS_BYTES = b"".join(WHITESPACES)
WHITESPACES_AS_REGEXP = b"[" + WHITESPACES_AS_BYTES + b"]"


def read_until_whitespace(stream: StreamType, maxchars: Optional[int] = None) -> bytes:
    """
    Read non-whitespace characters and return them.

    Stops upon encountering whitespace or when maxchars is reached.

    Args:
        stream: The data stream from which was read.
        maxchars: The maximum number of bytes returned; by default unlimited.

    Returns:
        The data which was read.

    """
    txt = b""
    while True:
        tok = stream.read(1)
        if tok.isspace() or not tok:
            break
        txt += tok
        if len(txt) == maxchars:
            break
    return txt


def read_non_whitespace(stream: StreamType) -> bytes:
    """
    Find and read the next non-whitespace character (ignores whitespace).

    Args:
        stream: The data stream from which was read.

    Returns:
        The data which was read.

    """
    tok = stream.read(1)
    while tok in WHITESPACES:
        tok = stream.read(1)
    return tok


def skip_over_whitespace(stream: StreamType) -> bool:
    """
    Similar to read_non_whitespace, but return a boolean if at least one
    whitespace character was read.

    Args:
        stream: The data stream from which was read.

    Returns:
        True if one or more whitespace was skipped, otherwise return False.

    """
    tok = stream.read(1)
    cnt = 0
    while tok in WHITESPACES:
        cnt += 1
        tok = stream.read(1)
    return cnt > 0


def check_if_whitespace_only(value: bytes) -> bool:
    """
    Check if the given value consists of whitespace characters only.

    Args:
        value: The bytes to check.

    Returns:
        True if the value only has whitespace characters, otherwise return False.

    """
    return all(b in WHITESPACES_AS_BYTES for b in value)


def skip_over_comment(stream: StreamType) -> None:
    tok = stream.read(1)
    stream.seek(-1, 1)
    if tok == b"%":
        while tok not in (b"\n", b"\r"):
            tok = stream.read(1)
            if tok == b"":
                raise PdfStreamError("File ended unexpectedly.")


def read_until_regex(stream: StreamType, regex: Pattern[bytes]) -> bytes:
    """
    Read until the regular expression pattern matched (ignore the match).
    Treats EOF on the underlying stream as the end of the token to be matched.

    Args:
        regex: re.Pattern

    Returns:
        The read bytes.

    """
    parts: list[bytes] = []
    total_len = 0
    tail = b""
    chunk_size = 16
    while True:
        tok = stream.read(chunk_size)
        if not tok:
            return b"".join(parts)
        # Search overlap of previous tail + new chunk to catch
        # multi-byte regex matches spanning chunk boundaries.
        buf = tail + tok
        m = regex.search(buf)
        if m is not None:
            overlap = len(tail)
            actual_start = total_len - overlap + m.start()
            stream.seek(actual_start - total_len - len(tok), 1)
            parts.append(tok)
            return b"".join(parts)[:actual_start]
        parts.append(tok)
        total_len += len(tok)
        # Fixed overlap: 16 bytes is sufficient for the short
        # delimiter patterns used in PDF parsing.
        tail = tok[-16:]
        if chunk_size < 8192:
            chunk_size <<= 1
    return b"".join(parts)


def read_block_backwards(stream: StreamType, to_read: int) -> bytes:
    """
    Given a stream at position X, read a block of size to_read ending at position X.

    This changes the stream's position to the beginning of where the block was
    read.

    Args:
        stream:
        to_read:

    Returns:
        The data which was read.

    """
    if stream.tell() < to_read:
        raise PdfStreamError("Could not read malformed PDF file")
    # Seek to the start of the block we want to read.
    stream.seek(-to_read, SEEK_CUR)
    read = stream.read(to_read)
    # Seek to the start of the block we read after reading it.
    stream.seek(-to_read, SEEK_CUR)
    return read


def read_previous_line(stream: StreamType) -> bytes:
    """
    Given a byte stream with current position X, return the previous line.

    All characters between the first CR/LF byte found before X
    (or, the start of the file, if no such byte is found) and position X
    After this call, the stream will be positioned one byte after the
    first non-CRLF character found beyond the first CR/LF byte before X,
    or, if no such byte is found, at the beginning of the stream.

    Args:
        stream: StreamType:

    Returns:
        The data which was read.

    """
    line_content = []
    found_crlf = False
    if stream.tell() == 0:
        raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
    while True:
        to_read = min(DEFAULT_BUFFER_SIZE, stream.tell())
        if to_read == 0:
            break
        # Read the block. After this, our stream will be one
        # beyond the initial position.
        block = read_block_backwards(stream, to_read)
        idx = len(block) - 1
        if not found_crlf:
            # We haven't found our first CR/LF yet.
            # Read off characters until we hit one.
            while idx >= 0 and block[idx] not in b"\r\n":
                idx -= 1
            if idx >= 0:
                found_crlf = True
        if found_crlf:
            # We found our first CR/LF already (on this block or
            # a previous one).
            # Our combined line is the remainder of the block
            # plus any previously read blocks.
            line_content.append(block[idx + 1 :])
            # Continue to read off any more CRLF characters.
            while idx >= 0 and block[idx] in b"\r\n":
                idx -= 1
        else:
            # Didn't find CR/LF yet - add this block to our
            # previously read blocks and continue.
            line_content.append(block)
        if idx >= 0:
            # We found the next non-CRLF character.
            # Set the stream position correctly, then break
            stream.seek(idx + 1, SEEK_CUR)
            break
    # Join all the blocks in the line (which are in reverse order)
    return b"".join(line_content[::-1])


def matrix_multiply(
    a: TransformationMatrixType, b: TransformationMatrixType
) -> TransformationMatrixType:
    return tuple(  # type: ignore[return-value]
        tuple(sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b))
        for row in a
    )


def mark_location(stream: StreamType) -> None:
    """Create text file showing current location in context."""
    # Mainly for debugging
    radius = 5000
    stream.seek(-radius, 1)
    with open("pypdf_pdfLocation.txt", "wb") as output_fh:
        output_fh.write(stream.read(radius))
        output_fh.write(b"HERE")
        output_fh.write(stream.read(radius))
    stream.seek(-radius, 1)


@overload
def ord_(b: str) -> int:
    ...


@overload
def ord_(b: bytes) -> bytes:
    ...


@overload
def ord_(b: int) -> int:
    ...


def ord_(b: Union[int, str, bytes]) -> Union[int, bytes]:
    if isinstance(b, str):
        return ord(b)
    return b


def deprecate(msg: str, stacklevel: int = 3) -> None:
    warnings.warn(msg, DeprecationWarning, stacklevel=stacklevel)


def deprecation(msg: str) -> None:
    raise DeprecationError(msg)


def deprecate_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
    """Issue a warning that a feature will be removed, but has a replacement."""
    deprecate(
        f"{old_name} is deprecated and will be removed in pypdf {removed_in}. Use {new_name} instead.",
        4,
    )


def deprecation_with_replacement(old_name: str, new_name: str, removed_in: str) -> None:
    """Raise an exception that a feature was already removed, but has a replacement."""
    deprecation(
        f"{old_name} is deprecated and was removed in pypdf {removed_in}. Use {new_name} instead."
    )


def deprecate_no_replacement(name: str, removed_in: str) -> None:
    """Issue a warning that a feature will be removed without replacement."""
    deprecate(f"{name} is deprecated and will be removed in pypdf {removed_in}.", 4)


def deprecation_no_replacement(name: str, removed_in: str) -> None:
    """Raise an exception that a feature was already removed without replacement."""
    deprecation(f"{name} is deprecated and was removed in pypdf {removed_in}.")


def logger_error(message: str, *, source: str, **values: Any) -> None:
    """
    Use this instead of logger.error directly.

    That allows people to overwrite it more easily.

    See the docs on when to use which:
    https://pypdf.readthedocs.io/en/latest/user/suppress-warnings.html
    """
    logging.getLogger(source).error(message, values)


def logger_warning(msg: str, src: str) -> None:
    """
    Use this instead of logger.warning directly.

    That allows people to overwrite it more easily.

    ## Exception, warnings.warn, logger_warning
    - Exceptions should be used if the user should write code that deals with
      an error case, e.g. the PDF being completely broken.
    - warnings.warn should be used if the user needs to fix their code, e.g.
      DeprecationWarnings
    - logger_warning should be used if the user needs to know that an issue was
      handled by pypdf, e.g. a non-compliant PDF being read in a way that
      pypdf could apply a robustness fix to still read it. This applies mainly
      to strict=False mode.
    """
    logging.getLogger(src).warning(msg)


def rename_kwargs(
    func_name: str, kwargs: dict[str, Any], aliases: dict[str, str], fail: bool = False
) -> None:
    """
    Helper function to deprecate arguments.

    Args:
        func_name: Name of the function to be deprecated
        kwargs:
        aliases:
        fail:

    """
    for old_term, new_term in aliases.items():
        if old_term in kwargs:
            if fail:
                raise DeprecationError(
                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
                )
            if new_term in kwargs:
                raise TypeError(
                    f"{func_name} received both {old_term} and {new_term} as "
                    f"an argument. {old_term} is deprecated. "
                    f"Use {new_term} instead."
                )
            kwargs[new_term] = kwargs.pop(old_term)
            warnings.warn(
                message=(
                    f"{old_term} is deprecated as an argument. Use {new_term} instead"
                ),
                category=DeprecationWarning,
                stacklevel=3,
            )


def _human_readable_bytes(bytes: int) -> str:
    if bytes < 10**3:
        return f"{bytes} Byte"
    if bytes < 10**6:
        return f"{bytes / 10**3:.1f} kB"
    if bytes < 10**9:
        return f"{bytes / 10**6:.1f} MB"
    return f"{bytes / 10**9:.1f} GB"


# The following class has been copied from Django:
# https://github.com/django/django/blob/adae619426b6f50046b3daaa744db52989c9d6db/django/utils/functional.py#L51-L65
# It received some modifications to comply with our own coding standards.
#
# Original license:
#
# ---------------------------------------------------------------------------------
# Copyright (c) Django Software Foundation and individual contributors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
#     1. Redistributions of source code must retain the above copyright notice,
#        this list of conditions and the following disclaimer.
#
#     2. Redistributions in binary form must reproduce the above copyright
#        notice, this list of conditions and the following disclaimer in the
#        documentation and/or other materials provided with the distribution.
#
#     3. Neither the name of Django nor the names of its contributors may be used
#        to endorse or promote products derived from this software without
#        specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ---------------------------------------------------------------------------------
class classproperty:  # noqa: N801
    """
    Decorator that converts a method with a single cls argument into a property
    that can be accessed directly from the class.
    """

    def __init__(self, method=None) -> None:  # type: ignore  # noqa: ANN001
        self.fget = method

    def __get__(self, instance, cls=None) -> Any:  # type: ignore  # noqa: ANN001
        return self.fget(cls)

    def getter(self, method) -> Self:  # type: ignore  # noqa: ANN001
        self.fget = method
        return self


@dataclass
class File:
    from .generic import IndirectObject  # noqa: PLC0415

    name: str = ""
    """
    Filename as identified within the PDF file.
    """
    data: bytes = b""
    """
    Data as bytes.
    """
    indirect_reference: Optional[IndirectObject] = None
    """
    Reference to the object storing the stream.
    """

    def __str__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, data: {_human_readable_bytes(len(self.data))})"

    def __repr__(self) -> str:
        return self.__str__()[:-1] + f", hash: {hash(self.data)})"


@functools.total_ordering
class Version:
    COMPONENT_PATTERN = re.compile(r"^(\d+)(.*)$")

    def __init__(self, version_str: str) -> None:
        self.version_str = version_str
        self.components = self._parse_version(version_str)

    def _parse_version(self, version_str: str) -> list[tuple[int, str]]:
        components = version_str.split(".")
        parsed_components = []
        for component in components:
            match = Version.COMPONENT_PATTERN.match(component)
            if not match:
                parsed_components.append((0, component))
                continue
            integer_prefix = match.group(1)
            suffix = match.group(2)
            if integer_prefix is None:
                integer_prefix = 0
            parsed_components.append((int(integer_prefix), suffix))
        return parsed_components

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Version):
            return False
        return self.components == other.components

    def __hash__(self) -> int:
        # Convert to tuple as lists cannot be hashed.
        return hash((self.__class__, tuple(self.components)))

    def __lt__(self, other: Any) -> bool:
        if not isinstance(other, Version):
            raise ValueError(f"Version cannot be compared against {type(other)}")

        for self_component, other_component in zip(self.components, other.components):
            self_value, self_suffix = self_component
            other_value, other_suffix = other_component

            if self_value < other_value:
                return True
            if self_value > other_value:
                return False

            if self_suffix < other_suffix:
                return True
            if self_suffix > other_suffix:
                return False

        return len(self.components) < len(other.components)


================================================
FILE: pypdf/_version.py
================================================
__version__ = "6.9.1"


================================================
FILE: pypdf/_writer.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import decimal
import enum
import hashlib
import re
import struct
import sys
import uuid
from collections.abc import Iterable, Mapping
from io import BytesIO, FileIO, IOBase
from itertools import compress
from pathlib import Path
from re import Pattern
from types import TracebackType
from typing import (
    IO,
    Any,
    Callable,
    Optional,
    Union,
    cast,
)

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

from ._doc_common import DocumentInformation, PdfDocCommon
from ._encryption import EncryptAlgorithm, Encryption
from ._page import PageObject, Transformation
from ._page_labels import nums_clear_range, nums_insert, nums_next
from ._reader import PdfReader
from ._utils import (
    StrByteType,
    StreamType,
    _get_max_pdf_version_header,
    deprecation_no_replacement,
    logger_warning,
)
from .constants import AnnotationDictionaryAttributes as AA
from .constants import CatalogAttributes as CA
from .constants import (
    CatalogDictionary,
    GoToActionArguments,
    ImageType,
    InteractiveFormDictEntries,
    OutlineFontFlag,
    PageLabelStyle,
    PagesAttributes,
    TypFitArguments,
    UserAccessPermissions,
)
from .constants import Core as CO
from .constants import FieldDictionaryAttributes as FA
from .constants import PageAttributes as PG
from .constants import TrailerKeys as TK
from .errors import PdfReadError, PyPdfError
from .generic import (
    PAGE_FIT,
    ArrayObject,
    BooleanObject,
    ByteStringObject,
    ContentStream,
    Destination,
    DictionaryObject,
    EmbeddedFile,
    Fit,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    RectangleObject,
    ReferenceLink,
    StreamObject,
    TextStringObject,
    TreeObject,
    ViewerPreferences,
    create_string_object,
    extract_links,
    hex_to_rgb,
    is_null_or_none,
)
from .generic._appearance_stream import TextStreamAppearance
from .pagerange import PageRange, PageRangeSpec
from .types import (
    AnnotationSubtype,
    BorderArrayType,
    LayoutType,
    OutlineItemType,
    OutlineType,
    PagemodeType,
)
from .xmp import XmpInformation

ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions.all()


class ObjectDeletionFlag(enum.IntFlag):
    NONE = 0
    TEXT = enum.auto()
    LINKS = enum.auto()
    ATTACHMENTS = enum.auto()
    OBJECTS_3D = enum.auto()
    ALL_ANNOTATIONS = enum.auto()
    XOBJECT_IMAGES = enum.auto()
    INLINE_IMAGES = enum.auto()
    DRAWING_IMAGES = enum.auto()
    IMAGES = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES


def _rolling_checksum(stream: BytesIO, blocksize: int = 65536) -> str:
    hash = hashlib.md5(usedforsecurity=False)
    for block in iter(lambda: stream.read(blocksize), b""):
        hash.update(block)
    return hash.hexdigest()


class PdfWriter(PdfDocCommon):
    """
    Write a PDF file out, given pages produced by another class or through
    cloning a PDF file during initialization.

    Typically data is added from a :class:`PdfReader<pypdf.PdfReader>`.

    Args:
        clone_from: identical to fileobj (for compatibility)

        incremental: If true, loads the document and set the PdfWriter in incremental mode.

            When writing incrementally, the original document is written first and new/modified
            content is appended. To be used for signed document/forms to keep signature valid.

        full: If true, loads all the objects (always full if incremental = True).
            This parameter may allow loading large PDFs.

        strict: If true, pypdf will raise an exception if a PDF does not follow the specification.
            If false, pypdf will try to be forgiving and do something reasonable, but it will log
            a warning message. It is a best-effort approach.

    """

    def __init__(
        self,
        fileobj: Union[None, PdfReader, StrByteType, Path] = "",
        clone_from: Union[None, PdfReader, StrByteType, Path] = None,
        incremental: bool = False,
        full: bool = False,
        strict: bool = False,
    ) -> None:
        self.strict = strict
        """
        If true, pypdf will raise an exception if a PDF does not follow the specification.
        If false, pypdf will try to be forgiving and do something reasonable, but it will log
        a warning message. It is a best-effort approach.
        """

        self.incremental = incremental or full
        """
        Returns if the PdfWriter object has been started in incremental mode.
        """

        self._objects: list[Optional[PdfObject]] = []
        """
        The indirect objects in the PDF.
        For the incremental case, it will be filled with None
        in clone_reader_document_root.
        """

        self._original_hash: list[int] = []
        """
        List of hashes after import; used to identify changes.
        """

        self._idnum_hash: dict[bytes, tuple[IndirectObject, list[IndirectObject]]] = {}
        """
        Maps hash values of indirect objects to the list of IndirectObjects.
        This is used for compression.
        """

        self._id_translated: dict[int, dict[int, int]] = {}
        """List of already translated IDs.
           dict[id(pdf)][(idnum, generation)]
        """

        self._info_obj: Optional[PdfObject]
        """The PDF files's document information dictionary,
        defined by Info in the PDF file's trailer dictionary."""

        self._ID: Union[ArrayObject, None] = None
        """The PDF file identifier,
        defined by the ID in the PDF file's trailer dictionary."""

        self._unresolved_links: list[tuple[ReferenceLink, ReferenceLink]] = []
        "Tracks links in pages added to the writer for resolving later."
        self._merged_in_pages: dict[Optional[IndirectObject], Optional[IndirectObject]] = {}
        "Tracks pages added to the writer and what page they turned into."

        if self.incremental:
            if isinstance(fileobj, (str, Path)):
                with open(fileobj, "rb") as f:
                    fileobj = BytesIO(f.read(-1))
            if isinstance(fileobj, BytesIO):
                fileobj = PdfReader(fileobj)
            if not isinstance(fileobj, PdfReader):
                raise PyPdfError("Invalid type for incremental mode")
            self._reader = fileobj  # prev content is in _reader.stream
            self._header = fileobj.pdf_header.encode()
            self._readonly = True  # TODO: to be analysed
        else:
            self._header = b"%PDF-1.3"
            self._info_obj = self._add_object(
                DictionaryObject(
                    {NameObject("/Producer"): create_string_object("pypdf")}
                )
            )

        def _get_clone_from(
            fileobj: Union[None, PdfReader, str, Path, IO[Any], BytesIO],
            clone_from: Union[None, PdfReader, str, Path, IO[Any], BytesIO],
        ) -> Union[None, PdfReader, str, Path, IO[Any], BytesIO]:
            if isinstance(fileobj, (str, Path, IO, BytesIO)) and (
                fileobj == "" or clone_from is not None
            ):
                return clone_from
            cloning = True
            if isinstance(fileobj, (str, Path)) and (
                not Path(str(fileobj)).exists()
                or Path(str(fileobj)).stat().st_size == 0
            ):
                cloning = False
            if isinstance(fileobj, (IOBase, BytesIO)):
                t = fileobj.tell()
                if fileobj.seek(0, 2) == 0:
                    cloning = False
                fileobj.seek(t, 0)
            if cloning:
                clone_from = fileobj
            return clone_from

        clone_from = _get_clone_from(fileobj, clone_from)
        # To prevent overwriting
        self.temp_fileobj = fileobj
        self.fileobj = ""
        self._with_as_usage = False
        self._cloned = False
        # The root of our page tree node
        pages = DictionaryObject(
            {
                NameObject(PagesAttributes.TYPE): NameObject("/Pages"),
                NameObject(PagesAttributes.COUNT): NumberObject(0),
                NameObject(PagesAttributes.KIDS): ArrayObject(),
            }
        )
        self.flattened_pages = []
        self._encryption: Optional[Encryption] = None
        self._encrypt_entry: Optional[DictionaryObject] = None

        if clone_from is not None:
            if not isinstance(clone_from, PdfReader):
                clone_from = PdfReader(clone_from)
            self.clone_document_from_reader(clone_from)
            self._cloned = True
        else:
            self._pages = self._add_object(pages)
            self._root_object = DictionaryObject(
                {
                    NameObject(PagesAttributes.TYPE): NameObject(CO.CATALOG),
                    NameObject(CO.PAGES): self._pages,
                }
            )
            self._add_object(self._root_object)
        if full and not incremental:
            self.incremental = False
        if isinstance(self._ID, list):
            if isinstance(self._ID[0], TextStringObject):
                self._ID[0] = ByteStringObject(self._ID[0].get_original_bytes())
            if isinstance(self._ID[1], TextStringObject):
                self._ID[1] = ByteStringObject(self._ID[1].get_original_bytes())

    # for commonality
    @property
    def is_encrypted(self) -> bool:
        """
        Read-only boolean property showing whether this PDF file is encrypted.

        Note that this property, if true, will remain true even after the
        :meth:`decrypt()<pypdf.PdfReader.decrypt>` method is called.
        """
        return False

    @property
    def root_object(self) -> DictionaryObject:
        """
        Provide direct access to PDF Structure.

        Note:
            Recommended only for read access.

        """
        return self._root_object

    @property
    def _info(self) -> Optional[DictionaryObject]:
        """
        Provide access to "/Info". Standardized with PdfReader.

        Returns:
            /Info Dictionary; None if the entry does not exist

        """
        return (
            None
            if self._info_obj is None
            else cast(DictionaryObject, self._info_obj.get_object())
        )

    @_info.setter
    def _info(self, value: Optional[Union[IndirectObject, DictionaryObject]]) -> None:
        if value is None:
            try:
                self._objects[self._info_obj.indirect_reference.idnum - 1] = None  # type: ignore
            except (KeyError, AttributeError):
                pass
            self._info_obj = None
        else:
            if self._info_obj is None:
                self._info_obj = self._add_object(DictionaryObject())
            obj = cast(DictionaryObject, self._info_obj.get_object())
            obj.clear()
            obj.update(cast(DictionaryObject, value.get_object()))

    @property
    def xmp_metadata(self) -> Optional[XmpInformation]:
        """XMP (Extensible Metadata Platform) data."""
        return cast(XmpInformation, self.root_object.xmp_metadata)

    @xmp_metadata.setter
    def xmp_metadata(self, value: Union[XmpInformation, bytes, None]) -> None:
        """XMP (Extensible Metadata Platform) data."""
        if value is None:
            if "/Metadata" in self.root_object:
                del self.root_object["/Metadata"]
            return

        metadata = self.root_object.get("/Metadata", None)
        if not isinstance(metadata, IndirectObject):
            if metadata is not None:
                del self.root_object["/Metadata"]
            metadata_stream = StreamObject()
            stream_reference = self._add_object(metadata_stream)
            self.root_object[NameObject("/Metadata")] = stream_reference
        else:
            metadata_stream = cast(StreamObject, metadata.get_object())

        if isinstance(value, XmpInformation):
            bytes_data = value.stream.get_data()
        else:
            bytes_data = value
        metadata_stream.set_data(bytes_data)

    @property
    def with_as_usage(self) -> bool:
        deprecation_no_replacement("with_as_usage", "5.0")
        return self._with_as_usage

    @with_as_usage.setter
    def with_as_usage(self, value: bool) -> None:
        deprecation_no_replacement("with_as_usage", "5.0")
        self._with_as_usage = value

    def __enter__(self) -> Self:
        """Store how writer is initialized by 'with'."""
        c: bool = self._cloned
        t = self.temp_fileobj
        self.__init__()  # type: ignore
        self._cloned = c
        self._with_as_usage = True
        self.fileobj = t  # type: ignore
        return self

    def __exit__(
        self,
        exc_type: Optional[type[BaseException]],
        exc: Optional[BaseException],
        traceback: Optional[TracebackType],
    ) -> None:
        """Write data to the fileobj."""
        if self.fileobj and not self._cloned:
            self.write(self.fileobj)

    @property
    def pdf_header(self) -> str:
        """
        Read/Write property of the PDF header that is written.

        This should be something like ``'%PDF-1.5'``. It is recommended to set
        the lowest version that supports all features which are used within the
        PDF file.

        Note: `pdf_header` returns a string but accepts bytes or str for writing
        """
        return self._header.decode()

    @pdf_header.setter
    def pdf_header(self, new_header: Union[str, bytes]) -> None:
        if isinstance(new_header, str):
            new_header = new_header.encode()
        self._header = new_header

    def _add_object(self, obj: PdfObject) -> IndirectObject:
        if (
            getattr(obj, "indirect_reference", None) is not None
            and obj.indirect_reference.pdf == self  # type: ignore
        ):
            return obj.indirect_reference  # type: ignore
        # check for /Contents in Pages (/Contents in annotations are strings)
        if isinstance(obj, DictionaryObject) and isinstance(
            obj.get(PG.CONTENTS, None), (ArrayObject, DictionaryObject)
        ):
            obj[NameObject(PG.CONTENTS)] = self._add_object(obj[PG.CONTENTS])
        self._objects.append(obj)
        obj.indirect_reference = IndirectObject(len(self._objects), 0, self)
        return obj.indirect_reference

    def get_object(
        self,
        indirect_reference: Union[int, IndirectObject],
    ) -> PdfObject:
        if isinstance(indirect_reference, int):
            obj = self._objects[indirect_reference - 1]
        elif indirect_reference.pdf != self:
            raise ValueError("PDF must be self")
        else:
            obj = self._objects[indirect_reference.idnum - 1]
        assert obj is not None, "mypy"
        return obj

    def _replace_object(
        self,
        indirect_reference: Union[int, IndirectObject],
        obj: PdfObject,
    ) -> PdfObject:
        if isinstance(indirect_reference, IndirectObject):
            if indirect_reference.pdf != self:
                raise ValueError("PDF must be self")
            indirect_reference = indirect_reference.idnum
        gen = self._objects[indirect_reference - 1].indirect_reference.generation  # type: ignore
        if (
            getattr(obj, "indirect_reference", None) is not None
            and obj.indirect_reference.pdf != self  # type: ignore
        ):
            obj = obj.clone(self)
        self._objects[indirect_reference - 1] = obj
        obj.indirect_reference = IndirectObject(indirect_reference, gen, self)

        assert isinstance(obj, PdfObject), "mypy"
        return obj

    def _add_page(
        self,
        page: PageObject,
        index: int,
        excluded_keys: Iterable[str] = (),
    ) -> PageObject:
        if not isinstance(page, PageObject) or page.get(PagesAttributes.TYPE, None) != CO.PAGE:
            raise ValueError("Invalid page object")
        assert self.flattened_pages is not None, "for mypy"
        page_org = page
        excluded_keys = list(excluded_keys)
        excluded_keys += [PagesAttributes.PARENT, "/StructParents"]
        # Acrobat does not accept two indirect references pointing on the same
        # page; therefore in order to add multiple copies of the same
        # page, we need to create a new dictionary for the page, however the
        # objects below (including content) are not duplicated:
        try:  # delete an already existing page
            del self._id_translated[id(page_org.indirect_reference.pdf)][  # type: ignore
                page_org.indirect_reference.idnum  # type: ignore
            ]
        except Exception:
            pass

        page = cast(
            "PageObject", page_org.clone(self, False, excluded_keys).get_object()
        )
        if page_org.pdf is not None:
            other = page_org.pdf.pdf_header
            self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other)

        node, idx = self._get_page_in_node(index)
        page[NameObject(PagesAttributes.PARENT)] = node.indirect_reference

        if idx >= 0:
            cast(ArrayObject, node[PagesAttributes.KIDS]).insert(idx, page.indirect_reference)
            self.flattened_pages.insert(index, page)
        else:
            cast(ArrayObject, node[PagesAttributes.KIDS]).append(page.indirect_reference)
            self.flattened_pages.append(page)
        recurse = 0
        while not is_null_or_none(node):
            node = cast(DictionaryObject, node.get_object())
            node[NameObject(PagesAttributes.COUNT)] = NumberObject(cast(int, node[PagesAttributes.COUNT]) + 1)
            node = node.get(PagesAttributes.PARENT, None)  # type: ignore[assignment]  # TODO: Fix.
            recurse += 1
            if recurse > 1000:
                raise PyPdfError("Too many recursive calls!")

        if page_org.pdf is not None:
            # the page may contain links to other pages, and those other
            # pages may or may not already be added.  we store the
            # information we need, so that we can resolve the references
            # later.
            self._unresolved_links.extend(extract_links(page, page_org))
            self._merged_in_pages[page_org.indirect_reference] = page.indirect_reference

        return page

    def set_need_appearances_writer(self, state: bool = True) -> None:
        """
        Sets the "NeedAppearances" flag in the PDF writer.

        The "NeedAppearances" flag indicates whether the appearance dictionary
        for form fields should be automatically generated by the PDF viewer or
        if the embedded appearance should be used.

        Args:
            state: The actual value of the NeedAppearances flag.

        Returns:
            None

        """
        # See §12.7.2 and §7.7.2 for more information:
        # https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
        try:
            # get the AcroForm tree
            if CatalogDictionary.ACRO_FORM not in self._root_object:
                self._root_object[
                    NameObject(CatalogDictionary.ACRO_FORM)
                ] = self._add_object(DictionaryObject())

            need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances)
            cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])[
                need_appearances
            ] = BooleanObject(state)
        except Exception as exc:  # pragma: no cover
            logger_warning(
                f"set_need_appearances_writer({state}) catch : {exc}", __name__
            )

    def create_viewer_preferences(self) -> ViewerPreferences:
        o = ViewerPreferences()
        self._root_object[
            NameObject(CatalogDictionary.VIEWER_PREFERENCES)
        ] = self._add_object(o)
        return o

    def add_page(
        self,
        page: PageObject,
        excluded_keys: Iterable[str] = (),
    ) -> PageObject:
        """
        Add a page to this PDF file.

        Recommended for advanced usage including the adequate excluded_keys.

        The page is usually acquired from a :class:`PdfReader<pypdf.PdfReader>`
        instance.

        Args:
            page: The page to add to the document. Should be
                an instance of :class:`PageObject<pypdf._page.PageObject>`
            excluded_keys:

        Returns:
            The added PageObject.

        """
        assert self.flattened_pages is not None, "mypy"
        return self._add_page(page, len(self.flattened_pages), excluded_keys)

    def insert_page(
        self,
        page: PageObject,
        index: int = 0,
        excluded_keys: Iterable[str] = (),
    ) -> PageObject:
        """
        Insert a page in this PDF file. The page is usually acquired from a
        :class:`PdfReader<pypdf.PdfReader>` instance.

        Args:
            page: The page to add to the document.
            index: Position at which the page will be inserted.
            excluded_keys:

        Returns:
            The added PageObject.

        """
        assert self.flattened_pages is not None, "mypy"
        if index < 0:
            index += len(self.flattened_pages)
        if index < 0:
            raise ValueError("Invalid index value")
        if index >= len(self.flattened_pages):
            return self.add_page(page, excluded_keys)
        return self._add_page(page, index, excluded_keys)

    def _get_page_number_by_indirect(
        self, indirect_reference: Union[None, int, NullObject, IndirectObject]
    ) -> Optional[int]:
        """
        Generate _page_id2num.

        Args:
            indirect_reference:

        Returns:
            The page number or None

        """
        # To provide same function as in PdfReader
        if is_null_or_none(indirect_reference):
            return None
        assert indirect_reference is not None, "mypy"
        if isinstance(indirect_reference, int):
            indirect_reference = IndirectObject(indirect_reference, 0, self)
        obj = indirect_reference.get_object()
        if isinstance(obj, PageObject):
            return obj.page_number
        return None

    def add_blank_page(
        self, width: Optional[float] = None, height: Optional[float] = None
    ) -> PageObject:
        """
        Append a blank page to this PDF file and return it.

        If no page size is specified, use the size of the last page.

        Args:
            width: The width of the new page expressed in default user
                space units.
            height: The height of the new page expressed in default
                user space units.

        Returns:
            The newly appended page.

        Raises:
            PageSizeNotDefinedError: if width and height are not defined
                and previous page does not exist.

        """
        page = PageObject.create_blank_page(self, width, height)
        return self.add_page(page)

    def insert_blank_page(
        self,
        width: Optional[Union[float, decimal.Decimal]] = None,
        height: Optional[Union[float, decimal.Decimal]] = None,
        index: int = 0,
    ) -> PageObject:
        """
        Insert a blank page to this PDF file and return it.

        If no page size is specified for a dimension, use the size of the last page.

        Args:
            width: The width of the new page in default user space units.
            height: The height of the new page in default user space units.
            index: Position to add the page.

        Returns:
            The newly inserted page.

        Raises:
            PageSizeNotDefinedError: if width and height are not defined
                and previous page does not exist.
            IndexError: Index is outside of [-self.get_num_pages(), self.get_num_pages()]
        """
        num_pages = self.get_num_pages()
        if abs(index) <= num_pages:
            # Use the chosen index, but do not exceed the available pages
            fixed_index = min(index, num_pages - 1)
            mediabox = self.pages[fixed_index].mediabox
            if width is None or width <= 0:
                width = mediabox.width
            if height is None or height <= 0:
                height = mediabox.height
        else:
            raise IndexError(f"Index should be in range [-{num_pages}, {num_pages}]")

        page = PageObject.create_blank_page(self, width, height)
        self.insert_page(page, index)
        return page

    @property
    def open_destination(
        self,
    ) -> Union[None, Destination, TextStringObject, ByteStringObject]:
        return super().open_destination

    @open_destination.setter
    def open_destination(self, dest: Union[None, str, Destination, PageObject]) -> None:
        if dest is None:
            try:
                del self._root_object["/OpenAction"]
            except KeyError:
                pass
        elif isinstance(dest, str):
            self._root_object[NameObject("/OpenAction")] = TextStringObject(dest)
        elif isinstance(dest, Destination):
            self._root_object[NameObject("/OpenAction")] = dest.dest_array
        elif isinstance(dest, PageObject):
            self._root_object[NameObject("/OpenAction")] = Destination(
                "Opening",
                dest.indirect_reference
                if dest.indirect_reference is not None
                else NullObject(),
                PAGE_FIT,
            ).dest_array

    def add_js(self, javascript: str) -> None:
        """
        Add JavaScript which will launch upon opening this PDF.

        Args:
            javascript: Your JavaScript.

        Example:
            This will launch the print window when the PDF is opened.

            >>> from pypdf import PdfWriter
            >>> output = PdfWriter()
            >>> output.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

        """
        # Names / JavaScript preferred to be able to add multiple scripts
        if "/Names" not in self._root_object:
            self._root_object[NameObject(CA.NAMES)] = DictionaryObject()
        names = cast(DictionaryObject, self._root_object[CA.NAMES])
        if "/JavaScript" not in names:
            names[NameObject("/JavaScript")] = DictionaryObject(
                {NameObject("/Names"): ArrayObject()}
            )
        js_list = cast(
            ArrayObject, cast(DictionaryObject, names["/JavaScript"])["/Names"]
        )
        # We need a name for parameterized JavaScript in the PDF file,
        # but it can be anything.
        js_list.append(create_string_object(str(uuid.uuid4())))

        js = DictionaryObject(
            {
                NameObject(PagesAttributes.TYPE): NameObject("/Action"),
                NameObject("/S"): NameObject("/JavaScript"),
                NameObject("/JS"): TextStringObject(f"{javascript}"),
            }
        )
        js_list.append(self._add_object(js))

    def add_attachment(self, filename: str, data: Union[str, bytes]) -> "EmbeddedFile":
        """
        Embed a file inside the PDF.

        Reference:
        https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
        Section 7.11.3

        Args:
            filename: The filename to display.
            data: The data in the file.

        Returns:
            EmbeddedFile instance for the newly created embedded file.

        """
        return EmbeddedFile._create_new(self, filename, data)

    def append_pages_from_reader(
        self,
        reader: PdfReader,
        after_page_append: Optional[Callable[[PageObject], None]] = None,
    ) -> None:
        """
        Copy pages from reader to writer. Includes an optional callback
        parameter which is invoked after pages are appended to the writer.

        ``append`` should be preferred.

        Args:
            reader: a PdfReader object from which to copy page
                annotations to this writer object. The writer's annots
                will then be updated.
            after_page_append:
                Callback function that is invoked after each page is appended to
                the writer. Signature includes a reference to the appended page
                (delegates to append_pages_from_reader). The single parameter of
                the callback is a reference to the page just appended to the
                document.

        """
        reader_num_pages = len(reader.pages)
        # Copy pages from reader to writer
        for reader_page_number in range(reader_num_pages):
            reader_page = reader.pages[reader_page_number]
            writer_page = self.add_page(reader_page)
            # Trigger callback, pass writer page as parameter
            if callable(after_page_append):
                after_page_append(writer_page)

    def _merge_content_stream_to_page(
        self,
        page: PageObject,
        new_content_data: bytes,
    ) -> None:
        """
        Combines existing content stream(s) with new content (as bytes).

        Args:
            page: The page to which the new content data will be added.
            new_content_data: A binary-encoded new content stream, for
                instance the commands to draw an XObject.
        """
        # First resolve the existing page content. This always is an IndirectObject:
        # PDF Explained by John Whitington
        # https://www.oreilly.com/library/view/pdf-explained/9781449321581/ch04.html
        if NameObject("/Contents") in page:
            existing_content_ref = page[NameObject("/Contents")]
            existing_content = existing_content_ref.get_object()

            if isinstance(existing_content, ArrayObject):
                # Create a new StreamObject for the new_content_data
                new_stream_obj = StreamObject()
                new_stream_obj.set_data(new_content_data)
                existing_content.append(self._add_object(new_stream_obj))
                page[NameObject("/Contents")] = self._add_object(existing_content)
            if isinstance(existing_content, StreamObject):
                # Merge new content to existing StreamObject
                merged_data = existing_content.get_data() + b"\n" + new_content_data
                new_stream = StreamObject()
                new_stream.set_data(merged_data)
                page[NameObject("/Contents")] = self._add_object(new_stream)
        else:
            # If no existing content, then we have an empty page.
            # Create a new StreamObject in a new /Contents entry.
            new_stream = StreamObject()
            new_stream.set_data(new_content_data)
            page[NameObject("/Contents")] = self._add_object(new_stream)

    def _add_apstream_object(
            self,
            page: PageObject,
            appearance_stream_obj: StreamObject,
            object_name: str,
            x_offset: float,
            y_offset: float,
        ) -> None:
        """
        Adds an appearance stream to the page content in the form of
        an XObject.

        Args:
            page: The page to which to add the appearance stream.
            appearance_stream_obj: The appearance stream.
            object_name: The name of the appearance stream.
            x_offset: The horizontal offset for the appearance stream.
            y_offset: The vertical offset for the appearance stream.
        """
        # Prepare XObject resource dictionary on the page. This currently
        # only deals with font resources, but can easily be adapted to also
        # include other resources.
        pg_res = cast(DictionaryObject, page[PG.RESOURCES])
        if "/Resources" in appearance_stream_obj:
            ap_stream_res = cast(DictionaryObject, appearance_stream_obj["/Resources"])
            ap_stream_font_dict = cast(DictionaryObject, ap_stream_res.get("/Font", DictionaryObject()))
            if "/Font" not in pg_res:
                font_dict_ref = self._add_object(DictionaryObject())
                pg_res[NameObject("/Font")] = font_dict_ref
            pg_font_res = cast(DictionaryObject, pg_res["/Font"].get_object())
            # Merge fonts from the appearance stream into the page's font resources
            for font_name, font_res in ap_stream_font_dict.items():
                if font_name not in pg_font_res:
                    font_res_ref = self._add_object(font_res)
                    pg_font_res[font_name] = font_res_ref
        # Always add the resolved stream object to the writer to get a new IndirectObject.
        # This ensures we have a valid IndirectObject managed by *this* writer.
        xobject_ref = self._add_object(appearance_stream_obj)
        xobject_name = NameObject(f"/Fm_{object_name}")._sanitize()
        if "/XObject" not in pg_res:
            pg_res[NameObject("/XObject")] = DictionaryObject()
        pg_xo_res  = cast(DictionaryObject, pg_res["/XObject"])
        if xobject_name not in pg_xo_res:
            pg_xo_res[xobject_name] = xobject_ref
        else:
            logger_warning(
                f"XObject {xobject_name!r} already added to page resources. This might be an issue.",
                __name__
            )
        xobject_cm = Transformation().translate(x_offset, y_offset)
        xobject_drawing_commands = f"q\n{xobject_cm._to_cm()}\n{xobject_name} Do\nQ".encode()
        self._merge_content_stream_to_page(page, xobject_drawing_commands)

    FFBITS_NUL = FA.FfBits(0)

    def update_page_form_field_values(
        self,
        page: Union[PageObject, list[PageObject], None],
        fields: Mapping[str, Union[str, list[str], tuple[str, str, float]]],
        flags: FA.FfBits = FFBITS_NUL,
        auto_regenerate: Optional[bool] = True,
        flatten: bool = False,
    ) -> None:
        """
        Update the form field values for a given page from a fields dictionary.

        Copy field texts and values from fields to page.
        If the field links to a parent object, add the information to the parent.

        Args:
            page: `PageObject` - references **PDF writer's page** where the
                annotations and field data will be updated.
                `List[Pageobject]` - provides list of pages to be processed.
                `None` - all pages.
            fields: a Python dictionary of:

                * field names (/T) as keys and text values (/V) as value
                * field names (/T) as keys and list of text values (/V) for multiple choice list
                * field names (/T) as keys and tuple of:
                    * text values (/V)
                    * font id (e.g. /F1, the font id must exist)
                    * font size (0 for autosize)

            flags: A set of flags from :class:`~pypdf.constants.FieldDictionaryAttributes.FfBits`.

            auto_regenerate: Set/unset the need_appearances flag;
                the flag is unchanged if auto_regenerate is None.

            flatten: Whether or not to flatten the annotation. If True, this adds the annotation's
                appearance stream to the page contents. Note that this option does not remove the
                annotation itself.

        """
        if CatalogDictionary.ACRO_FORM not in self._root_object:
            raise PyPdfError("No /AcroForm dictionary in PDF of PdfWriter Object")
        acro_form = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
        if InteractiveFormDictEntries.Fields not in acro_form:
            raise PyPdfError("No /Fields dictionary in PDF of PdfWriter Object")
        if isinstance(auto_regenerate, bool):
            self.set_need_appearances_writer(auto_regenerate)
        # Iterate through pages, update field values
        if page is None:
            page = list(self.pages)
        if isinstance(page, list):
            for p in page:
                if PG.ANNOTS in p:  # just to prevent warnings
                    self.update_page_form_field_values(p, fields, flags, None, flatten=flatten)
            return
        if PG.ANNOTS not in page:
            logger_warning("No fields to update on this page", __name__)
            return
        appearance_stream_obj: Optional[StreamObject] = None

        for annotation in page[PG.ANNOTS]:  # type: ignore
            annotation = cast(DictionaryObject, annotation.get_object())
            if annotation.get("/Subtype", "") != "/Widget":
                continue
            if "/FT" in annotation and "/T" in annotation:
                parent_annotation = annotation
            else:
                parent_annotation = annotation.get(
                    PG.PARENT, DictionaryObject()
                ).get_object()

            for field, value in fields.items():
                rectangle = cast(RectangleObject, annotation[AA.Rect])
                if not (
                    self._get_qualified_field_name(parent_annotation) == field
                    or parent_annotation.get("/T", None) == field
                ):
                    continue
                if (
                    parent_annotation.get("/FT", None) == "/Ch"
                    and "/I" in parent_annotation
                ):
                    del parent_annotation["/I"]
                if flags:
                    annotation[NameObject(FA.Ff)] = NumberObject(flags)
                # Set the field value
                if not (value is None and flatten):  # Only change values if given by user and not flattening.
                    if isinstance(value, list):
                        lst = ArrayObject(TextStringObject(v) for v in value)
                        parent_annotation[NameObject(FA.V)] = lst
                    elif isinstance(value, tuple):
                        annotation[NameObject(FA.V)] = TextStringObject(
                            value[0],
                        )
                    else:
                        parent_annotation[NameObject(FA.V)] = TextStringObject(value)
                # Get or create the field's appearance stream object
                if parent_annotation.get(FA.FT) == "/Btn":
                    # Checkbox button (no /FT found in Radio widgets);
                    # We can find the associated appearance stream object
                    # within the annotation.
                    v = NameObject(value)
                    ap = cast(DictionaryObject, annotation[NameObject(AA.AP)])
                    normal_ap = cast(DictionaryObject, ap["/N"])
                    if v not in normal_ap:
                        v = NameObject("/Off")
                    appearance_stream_obj = normal_ap.get(v)
                    # Other cases will be updated through the for loop
                    annotation[NameObject(AA.AS)] = v
                    annotation[NameObject(FA.V)] = v
                elif (
                    parent_annotation.get(FA.FT) == "/Tx"
                    or parent_annotation.get(FA.FT) == "/Ch"
                ):
                    # Textbox; we need to generate the appearance stream object
                    if isinstance(value, tuple):
                        appearance_stream_obj = TextStreamAppearance.from_text_annotation(
                            acro_form, parent_annotation, annotation, value[1], value[2]
                        )
                    else:
                        appearance_stream_obj = TextStreamAppearance.from_text_annotation(
                            acro_form, parent_annotation, annotation
                        )
                    # Add the appearance stream object
                    if AA.AP not in annotation:
                        annotation[NameObject(AA.AP)] = DictionaryObject(
                            {NameObject("/N"): self._add_object(appearance_stream_obj)}
                        )
                    elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])):
                        cast(DictionaryObject, annotation[NameObject(AA.AP)])[
                            NameObject("/N")
                        ] = self._add_object(appearance_stream_obj)
                    else:  # [/AP][/N] exists
                        n = annotation[AA.AP]["/N"].indirect_reference.idnum  # type: ignore
                        self._objects[n - 1] = appearance_stream_obj
                        appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self)
                elif (
                    annotation.get(FA.FT) == "/Sig"
                ):  # deprecated  # not implemented yet
                    logger_warning("Signature forms not implemented yet", __name__)
                if flatten and appearance_stream_obj is not None:
                    self._add_apstream_object(page, appearance_stream_obj, field, rectangle[0], rectangle[1])

    def reattach_fields(
        self, page: Optional[PageObject] = None
    ) -> list[DictionaryObject]:
        """
        Parse annotations within the page looking for orphan fields and
        reattach then into the Fields Structure.

        Args:
            page: page to analyze.
                  If none is provided, all pages will be analyzed.

        Returns:
            list of reattached fields.

        """
        lst = []
        if page is None:
            for p in self.pages:
                lst += self.reattach_fields(p)
            return lst

        try:
            af = cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])
        except KeyError:
            af = DictionaryObject()
            self._root_object[NameObject(CatalogDictionary.ACRO_FORM)] = af
        try:
            fields = cast(ArrayObject, af[InteractiveFormDictEntries.Fields])
        except KeyError:
            fields = ArrayObject()
            af[NameObject(InteractiveFormDictEntries.Fields)] = fields

        if "/Annots" not in page:
            return lst
        annotations = cast(ArrayObject, page["/Annots"])
        for idx, annotation in enumerate(annotations):
            is_indirect = isinstance(annotation, IndirectObject)
            annotation = cast(DictionaryObject, annotation.get_object())
            if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation:
                if (
                    "indirect_reference" in annotation.__dict__
                    and annotation.indirect_reference in fields
                ):
                    continue
                if not is_indirect:
                    annotations[idx] = self._add_object(annotation)
                fields.append(annotation.indirect_reference)
                lst.append(annotation)
        return lst

    def clone_reader_document_root(self, reader: PdfReader) -> None:
        """
        Copy the reader document root to the writer and all sub-elements,
        including pages, threads, outlines,... For partial insertion, ``append``
        should be considered.

        Args:
            reader: PdfReader from which the document root should be copied.

        """
        self._info_obj = None
        if self.incremental:
            self._objects = [None] * (cast(int, reader.trailer["/Size"]) - 1)
            for i in range(len(self._objects)):
                o = reader.get_object(i + 1)
                if o is not None:
                    self._objects[i] = o.replicate(self)
        else:
            self._objects.clear()
        self._root_object = reader.root_object.clone(self)
        self._pages = self._root_object.raw_get("/Pages")

        if len(self._objects) > cast(int, reader.trailer["/Size"]):
            if self.strict:
                raise PdfReadError(
                    f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}"
                )
            logger_warning(
                f"Object count {len(self._objects)} exceeds defined trailer size {reader.trailer['/Size']}",
                __name__
            )

        # must be done here before rewriting
        if self.incremental:
            self._original_hash = [
                (obj.hash_bin() if obj is not None else 0) for obj in self._objects
            ]

        try:
            self._flatten()
        except IndexError:
            raise PdfReadError("Got index error while flattening.")

        assert self.flattened_pages is not None
        for p in self.flattened_pages:
            self._replace_object(cast(IndirectObject, p.indirect_reference).idnum, p)
            if not self.incremental:
                p[NameObject("/Parent")] = self._pages
        if not self.incremental:
            cast(DictionaryObject, self._pages.get_object())[
                NameObject("/Kids")
            ] = ArrayObject([p.indirect_reference for p in self.flattened_pages])

    def clone_document_from_reader(
        self,
        reader: PdfReader,
        after_page_append: Optional[Callable[[PageObject], None]] = None,
    ) -> None:
        """
        Create a copy (clone) of a document from a PDF file reader cloning
        section '/Root' and '/Info' and '/ID' of the pdf.

        Args:
            reader: PDF file reader instance from which the clone
                should be created.
            after_page_append:
                Callback function that is invoked after each page is appended to
                the writer. Signature includes a reference to the appended page
                (delegates to append_pages_from_reader). The single parameter of
                the callback is a reference to the page just appended to the
                document.

        """
        self.clone_reader_document_root(reader)
        inf = reader._info
        if self.incremental:
            if inf is not None:
                self._info_obj = cast(
                    IndirectObject, inf.clone(self).indirect_reference
                )
                assert isinstance(self._info, DictionaryObject), "for mypy"
                self._original_hash[
                    self._info_obj.indirect_reference.idnum - 1
                ] = self._info.hash_bin()
        elif inf is not None:
            self._info_obj = self._add_object(
                DictionaryObject(cast(DictionaryObject, inf.get_object()))
            )
        # else: _info_obj = None done in clone_reader_document_root()

        try:
            self._ID = cast(ArrayObject, reader._ID).clone(self)
        except AttributeError:
            pass

        if callable(after_page_append):
            for page in cast(
                ArrayObject, cast(DictionaryObject, self._pages.get_object())["/Kids"]
            ):
                after_page_append(page.get_object())

    def _compute_document_identifier(self) -> ByteStringObject:
        stream = BytesIO()
        self._write_pdf_structure(stream)
        stream.seek(0)
        return ByteStringObject(_rolling_checksum(stream).encode("utf8"))

    def generate_file_identifiers(self) -> None:
        """
        Generate an identifier for the PDF that will be written.

        The only point of this is ensuring uniqueness. Reproducibility is not
        required.
        When a file is first written, both identifiers shall be set to the same value.
        If both identifiers match when a file reference is resolved, it is very
        likely that the correct and unchanged file has been found. If only the first
        identifier matches, a different version of the correct file has been found.
        see §14.4 "File Identifiers".
        """
        if self._ID:
            id1 = self._ID[0]
            id2 = self._compute_document_identifier()
        else:
            id1 = self._compute_document_identifier()
            id2 = id1
        self._ID = ArrayObject((id1, id2))

    def encrypt(
        self,
        user_password: str,
        owner_password: Optional[str] = None,
        use_128bit: bool = True,
        permissions_flag: UserAccessPermissions = ALL_DOCUMENT_PERMISSIONS,
        *,
        algorithm: Optional[str] = None,
    ) -> None:
        """
        Encrypt this PDF file with the PDF Standard encryption handler.

        Args:
            user_password: The password which allows for opening
                and reading the PDF file with the restrictions provided.
            owner_password: The password which allows for
                opening the PDF files without any restrictions. By default,
                the owner password is the same as the user password.
            use_128bit: flag as to whether to use 128bit
                encryption. When false, 40bit encryption will be used.
                By default, this flag is on.
            permissions_flag: permissions as described in
                Table 3.20 of the PDF 1.7 specification. A bit value of 1 means
                the permission is granted.
                Hence an integer value of -1 will set all flags.
                Bit position 3 is for printing, 4 is for modifying content,
                5 and 6 control annotations, 9 for form fields,
                10 for extraction of text and graphics.
            algorithm: encrypt algorithm. Values may be one of "RC4-40", "RC4-128",
                "AES-128", "AES-256-R5", "AES-256". If it is valid,
                `use_128bit` will be ignored.

        """
        if owner_password is None:
            owner_password = user_password

        if algorithm is not None:
            try:
                alg = getattr(EncryptAlgorithm, algorithm.replace("-", "_"))
            except AttributeError:
                raise ValueError(f"Algorithm '{algorithm}' NOT supported")
        else:
            alg = EncryptAlgorithm.RC4_128
            if not use_128bit:
                alg = EncryptAlgorithm.RC4_40
        self.generate_file_identifiers()
        assert self._ID
        self._encryption = Encryption.make(alg, permissions_flag, self._ID[0])
        # in case call `encrypt` again
        entry = self._encryption.write_entry(user_password, owner_password)
        if self._encrypt_entry:
            # replace old encrypt_entry
            assert self._encrypt_entry.indirect_reference is not None
            entry.indirect_reference = self._encrypt_entry.indirect_reference
            self._objects[entry.indirect_reference.idnum - 1] = entry
        else:
            self._add_object(entry)
        self._encrypt_entry = entry

    def _resolve_links(self) -> None:
        """Patch up links that were added to the document earlier, to
        make sure they still point to the same pages.
        """
        for (new_link, old_link) in self._unresolved_links:
            old_page = old_link.find_referenced_page()
            if not old_page:
                continue
            new_page = self._merged_in_pages.get(old_page)
            if new_page is None:
                continue
            new_link.patch_reference(self, new_page)

    def write_stream(self, stream: StreamType) -> None:
        if hasattr(stream, "mode") and "b" not in stream.mode:
            logger_warning(
                f"File <{stream.name}> to write to is not in binary mode. "
                "It may not be written to correctly.",
                __name__,
            )
        self._resolve_links()

        if self.incremental:
            self._reader.stream.seek(0)
            stream.write(self._reader.stream.read(-1))
            if len(self.list_objects_in_increment()) > 0:
                self._write_increment(stream)  # writes objs, xref stream and startxref
        else:
            object_positions, free_objects = self._write_pdf_structure(stream)
            xref_location = self._write_xref_table(
                stream, object_positions, free_objects
            )
            self._write_trailer(stream, xref_location)

    def write(self, stream: Union[Path, StrByteType]) -> tuple[bool, IO[Any]]:
        """
        Write the collection of pages added to this object out as a PDF file.

        Args:
            stream: An object to write the file to. The object can support
                the write method and the tell method, similar to a file object, or
                be a file path, just like the fileobj, just named it stream to keep
                existing workflow.

        Returns:
            A tuple (bool, IO).

        """
        my_file = False

        if stream == "":
            raise ValueError(f"Output({stream=}) is empty.")

        if isinstance(stream, (str, Path)):
            stream = FileIO(stream, "wb")
            my_file = True

        self.write_stream(stream)

        if my_file:
            stream.close()
        else:
            stream.flush()

        return my_file, stream

    def list_objects_in_increment(self) -> list[IndirectObject]:
        """
        For analysis or debugging.
        Provides the list of new or modified objects that will be written
        in the increment.
        Deleted objects will not be freed but will become orphans.

        Returns:
            List of new or modified IndirectObjects

        """
        original_hash_count = len(self._original_hash)
        return [
            cast(IndirectObject, obj).indirect_reference
            for i, obj in enumerate(self._objects)
            if (
                obj is not None
                and (
                    i >= original_hash_count
                    or obj.hash_bin() != self._original_hash[i]
                )
            )
        ]

    def _write_increment(self, stream: StreamType) -> None:
        object_positions = {}
        object_blocks = []
        current_start = -1
        current_stop = -2
        original_hash_count = len(self._original_hash)
        for i, obj in enumerate(self._objects):
            if obj is not None and (
                i >= original_hash_count
                or obj.hash_bin() != self._original_hash[i]
            ):
                idnum = i + 1
                assert isinstance(obj, PdfObject), "mypy"
                # first write new/modified object
                object_positions[idnum] = stream.tell()
                stream.write(f"{idnum} 0 obj\n".encode())
                """ encryption is not operational
                if self._encryption and obj != self._encrypt_entry:
                    obj = self._encryption.encrypt_object(obj, idnum, 0)
                """
                obj.write_to_stream(stream)
                stream.write(b"\nendobj\n")

                # prepare xref
                if idnum != current_stop:
                    if current_start > 0:
                        object_blocks.append(
                            [current_start, current_stop - current_start]
                        )
                    current_start = idnum
                current_stop = idnum + 1
        assert current_start > 0, "for pytest only"
        object_blocks.append([current_start, current_stop - current_start])
        # write incremented xref
        xref_location = stream.tell()
        xr_id = len(self._objects) + 1
        stream.write(f"{xr_id} 0 obj".encode())
        init_data = {
            NameObject("/Type"): NameObject("/XRef"),
            NameObject("/Size"): NumberObject(xr_id + 1),
            NameObject("/Root"): self.root_object.indirect_reference,
            NameObject("/Filter"): NameObject("/FlateDecode"),
            NameObject("/Index"): ArrayObject(
                [NumberObject(_it) for _su in object_blocks for _it in _su]
            ),
            NameObject("/W"): ArrayObject(
                [NumberObject(1), NumberObject(4), NumberObject(1)]
            ),
            "__streamdata__": b"",
        }
        if self._info is not None and (
            self._info.indirect_reference.idnum - 1  # type: ignore
            >= len(self._original_hash)
            or cast(IndirectObject, self._info).hash_bin()  # kept for future
            != self._original_hash[
                self._info.indirect_reference.idnum - 1  # type: ignore
            ]
        ):
            init_data[NameObject(TK.INFO)] = self._info.indirect_reference
        init_data[NameObject(TK.PREV)] = NumberObject(self._reader._startxref)
        if self._ID:
            init_data[NameObject(TK.ID)] = self._ID
        xr = StreamObject.initialize_from_dictionary(init_data)
        xr.set_data(
            b"".join(
                [struct.pack(b">BIB", 1, _pos, 0) for _pos in object_positions.values()]
            )
        )
        xr.write_to_stream(stream)
        stream.write(f"\nendobj\nstartxref\n{xref_location}\n%%EOF\n".encode())  # eof

    def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int]]:
        object_positions = []
        free_objects = []
        stream.write(self.pdf_header.encode() + b"\n")
        stream.write(b"%\xE2\xE3\xCF\xD3\n")

        for idnum, obj in enumerate(self._objects, start=1):
            if obj is not None:
                object_positions.append(stream.tell())
                stream.write(f"{idnum} 0 obj\n".encode())
                if self._encryption and obj != self._encrypt_entry:
                    obj = self._encryption.encrypt_object(obj, idnum, 0)
                obj.write_to_stream(stream)
                stream.write(b"\nendobj\n")
            else:
                object_positions.append(-1)
                free_objects.append(idnum)
        free_objects.append(0)  # add 0 to loop in accordance with specification
        return object_positions, free_objects

    def _write_xref_table(
        self, stream: StreamType, object_positions: list[int], free_objects: list[int]
    ) -> int:
        xref_location = stream.tell()
        stream.write(b"xref\n")
        stream.write(f"0 {len(self._objects) + 1}\n".encode())
        stream.write(f"{free_objects[0]:0>10} {65535:0>5} f \n".encode())
        free_idx = 1
        for offset in object_positions:
            if offset > 0:
                stream.write(f"{offset:0>10} {0:0>5} n \n".encode())
            else:
                stream.write(f"{free_objects[free_idx]:0>10} {1:0>5} f \n".encode())
                free_idx += 1
        return xref_location

    def _write_trailer(self, stream: StreamType, xref_location: int) -> None:
        """
        Write the PDF trailer to the stream.

        To quote the PDF specification:
            [The] trailer [gives] the location of the cross-reference table and
            of certain special objects within the body of the file.
        """
        stream.write(b"trailer\n")
        trailer = DictionaryObject(
            {
                NameObject(TK.SIZE): NumberObject(len(self._objects) + 1),
                NameObject(TK.ROOT): self.root_object.indirect_reference,
            }
        )
        if self._info is not None:
            trailer[NameObject(TK.INFO)] = self._info.indirect_reference
        if self._ID is not None:
            trailer[NameObject(TK.ID)] = self._ID
        if self._encrypt_entry:
            trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference
        trailer.write_to_stream(stream)
        stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode())  # eof

    @property
    def metadata(self) -> Optional[DocumentInformation]:
        """
        Retrieve/set the PDF file's document information dictionary, if it exists.

        Args:
            value: dict with the entries to be set. if None : remove the /Info entry from the pdf.

        Note that some PDF files use (XMP) metadata streams instead of document
        information dictionaries, and these metadata streams will not be
        accessed by this function, but by :meth:`~xmp_metadata`.

        """
        return super().metadata

    @metadata.setter
    def metadata(
        self,
        value: Optional[Union[DocumentInformation, DictionaryObject, dict[Any, Any]]],
    ) -> None:
        if value is None:
            self._info = None
        else:
            if self._info is not None:
                self._info.clear()

            self.add_metadata(value)

    def add_metadata(self, infos: dict[str, Any]) -> None:
        """
        Add custom metadata to the output.

        Args:
            infos: a Python dictionary where each key is a field
                and each value is your new metadata.

        """
        args = {}
        if isinstance(infos, PdfObject):
            infos = cast(DictionaryObject, infos.get_object())
        for key, value in list(infos.items()):
            if isinstance(value, PdfObject):
                value = value.get_object()
            args[NameObject(key)] = create_string_object(str(value))
        if self._info is None:
            self._info = DictionaryObject()
        self._info.update(args)

    def compress_identical_objects(
        self,
        remove_identicals: bool = True,
        remove_orphans: bool = True,
    ) -> None:
        """
        Parse the PDF file and merge objects that have the same hash.
        This will make objects common to multiple pages.
        Recommended to be used just before writing output.

        Args:
            remove_identicals: Remove identical objects.
            remove_orphans: Remove unreferenced objects.

        """

        def replace_in_obj(
            obj: PdfObject, crossref: dict[IndirectObject, IndirectObject]
        ) -> None:
            if isinstance(obj, DictionaryObject):
                key_val = obj.items()
            elif isinstance(obj, ArrayObject):
                key_val = enumerate(obj)  # type: ignore
            else:
                return
            assert isinstance(obj, (DictionaryObject, ArrayObject))
            for k, v in key_val:
                if isinstance(v, IndirectObject):
                    orphans[v.idnum - 1] = False
                    if v in crossref:
                        obj[k] = crossref[v]
                else:
                    """the filtering on DictionaryObject and ArrayObject only
                    will be performed within replace_in_obj"""
                    replace_in_obj(v, crossref)

        # _idnum_hash :dict[hash]=(1st_ind_obj,[other_indir_objs,...])
        self._idnum_hash = {}
        orphans = [True] * len(self._objects)
        # look for similar objects
        for idx, obj in enumerate(self._objects):
            if is_null_or_none(obj):
                continue
            assert obj is not None, "mypy"  # mypy: TypeGuard of `is_null_or_none` does not help here.
            assert isinstance(obj.indirect_reference, IndirectObject)
            h = obj.hash_value()
            if remove_identicals and h in self._idnum_hash:
                self._idnum_hash[h][1].append(obj.indirect_reference)
                self._objects[idx] = None
            else:
                self._idnum_hash[h] = (obj.indirect_reference, [])

        # generate the dict converting others to 1st
        cnv = {v[0]: v[1] for v in self._idnum_hash.values() if len(v[1]) > 0}
        cnv_rev: dict[IndirectObject, IndirectObject] = {}
        for k, v in cnv.items():
            cnv_rev.update(zip(v, (k,) * len(v)))

        # replace reference to merged objects
        for obj in self._objects:
            if isinstance(obj, (DictionaryObject, ArrayObject)):
                replace_in_obj(obj, cnv_rev)

        # remove orphans (if applicable)
        orphans[self.root_object.indirect_reference.idnum - 1] = False  # type: ignore

        if not is_null_or_none(self._info):
            orphans[self._info.indirect_reference.idnum - 1] = False  # type: ignore

        try:
            orphans[self._ID.indirect_reference.idnum - 1] = False  # type: ignore
        except AttributeError:
            pass
        for i in compress(range(len(self._objects)), orphans):
            self._objects[i] = None

    def get_reference(self, obj: PdfObject) -> IndirectObject:
        idnum = self._objects.index(obj) + 1
        ref = IndirectObject(idnum, 0, self)
        assert ref.get_object() == obj
        return ref

    def get_outline_root(self) -> TreeObject:
        if CO.OUTLINES in self._root_object:
            # Entries in the catalog dictionary
            outline = cast(TreeObject, self._root_object[CO.OUTLINES])
            if not isinstance(outline, TreeObject):
                t = TreeObject(outline)
                self._replace_object(outline.indirect_reference.idnum, t)
                outline = t
            idnum = self._objects.index(outline) + 1
            outline_ref = IndirectObject(idnum, 0, self)
            assert outline_ref.get_object() == outline
        else:
            outline = TreeObject()
            outline.update({})
            outline_ref = self._add_object(outline)
            self._root_object[NameObject(CO.OUTLINES)] = outline_ref

        return outline

    def get_threads_root(self) -> ArrayObject:
        """
        The list of threads.

        See §12.4.3 of the PDF 1.7 or PDF 2.0 specification.

        Returns:
            An array (possibly empty) of Dictionaries with an ``/F`` key,
            and optionally information about the thread in ``/I`` or ``/Metadata`` keys.

        """
        if CO.THREADS in self._root_object:
            # Entries in the catalog dictionary
            threads = cast(ArrayObject, self._root_object[CO.THREADS])
        else:
            threads = ArrayObject()
            self._root_object[NameObject(CO.THREADS)] = threads
        return threads

    @property
    def threads(self) -> ArrayObject:
        """
        Read-only property for the list of threads.

        See §12.4.3 of the PDF 1.7 or PDF 2.0 specification.

        Each element is a dictionary with an ``/F`` key, and optionally
        information about the thread in ``/I`` or ``/Metadata`` keys.
        """
        return self.get_threads_root()

    def add_outline_item_destination(
        self,
        page_destination: Union[IndirectObject, PageObject, TreeObject],
        parent: Union[None, TreeObject, IndirectObject] = None,
        before: Union[None, TreeObject, IndirectObject] = None,
        is_open: bool = True,
    ) -> IndirectObject:
        page_destination = cast(PageObject, page_destination.get_object())
        if isinstance(page_destination, PageObject):
            return self.add_outline_item_destination(
                Destination(
                    f"page #{page_destination.page_number}",
                    cast(IndirectObject, page_destination.indirect_reference),
                    Fit.fit(),
                )
            )

        if parent is None:
            parent = self.get_outline_root()

        page_destination[NameObject("/%is_open%")] = BooleanObject(is_open)
        parent = cast(TreeObject, parent.get_object())
        page_destination_ref = self._add_object(page_destination)
        if before is not None:
            before = before.indirect_reference
        parent.insert_child(
            page_destination_ref,
            before,
            self,
            page_destination.inc_parent_counter_outline
            if is_open
            else (lambda x, y: 0),  # noqa: ARG005
        )
        if "/Count" not in page_destination:
            page_destination[NameObject("/Count")] = NumberObject(0)

        return page_destination_ref

    def add_outline_item_dict(
        self,
        outline_item: OutlineItemType,
        parent: Union[None, TreeObject, IndirectObject] = None,
        before: Union[None, TreeObject, IndirectObject] = None,
        is_open: bool = True,
    ) -> IndirectObject:
        outline_item_object = TreeObject()
        outline_item_object.update(outline_item)

        """code currently unreachable
        if "/A" in outline_item:
            action = DictionaryObject()
            a_dict = cast(DictionaryObject, outline_item["/A"])
            for k, v in list(a_dict.items()):
                action[NameObject(str(k))] = v
            action_ref = self._add_object(action)
            outline_item_object[NameObject("/A")] = action_ref
        """
        return self.add_outline_item_destination(
            outline_item_object, parent, before, is_open
        )

    def add_outline_item(
        self,
        title: str,
        page_number: Union[None, PageObject, IndirectObject, int],
        parent: Union[None, TreeObject, IndirectObject] = None,
        before: Union[None, TreeObject, IndirectObject] = None,
        color: Optional[Union[tuple[float, float, float], str]] = None,
        bold: bool = False,
        italic: bool = False,
        fit: Fit = PAGE_FIT,
        is_open: bool = True,
    ) -> IndirectObject:
        """
        Add an outline item (commonly referred to as a "Bookmark") to the PDF file.

        Args:
            title: Title to use for this outline item.
            page_number: Page number this outline item will point to.
            parent: A reference to a parent outline item to create nested
                outline items.
            before:
            color: Color of the outline item's font as a red, green, blue tuple
                from 0.0 to 1.0 or as a Hex String (#RRGGBB)
            bold: Outline item font is bold
            italic: Outline item font is italic
            fit: The fit of the destination page.

        Returns:
            The added outline item as an indirect object.

        """
        page_ref: Union[None, NullObject, IndirectObject, NumberObject]
        if isinstance(italic, Fit):  # it means that we are on the old params
            if fit is not None and page_number is None:
                page_number = fit
            return self.add_outline_item(
                title, page_number, parent, None, before, color, bold, italic, is_open=is_open
            )
        if page_number is None:
            action_ref = None
        else:
            if isinstance(page_number, IndirectObject):
                page_ref = page_number
            elif isinstance(page_number, PageObject):
                page_ref = page_number.indirect_reference
            elif isinstance(page_number, int):
                try:
                    page_ref = self.pages[page_number].indirect_reference
                except IndexError:
                    page_ref = NumberObject(page_number)
            if page_ref is None:
                logger_warning(
                    f"can not find reference of page {page_number}",
                    __name__,
                )
                page_ref = NullObject()
            dest = Destination(
                NameObject("/" + title + " outline item"),
                page_ref,
                fit,
            )

            action_ref = self._add_object(
                DictionaryObject(
                    {
                        NameObject(GoToActionArguments.D): dest.dest_array,
                        NameObject(GoToActionArguments.S): NameObject("/GoTo"),
                    }
                )
            )
        outline_item = self._add_object(
            _create_outline_item(action_ref, title, color, italic, bold)
        )

        if parent is None:
            parent = self.get_outline_root()
        return self.add_outline_item_destination(outline_item, parent, before, is_open)

    def add_outline(self) -> None:
        raise NotImplementedError(
            "This method is not yet implemented. Use :meth:`add_outline_item` instead."
        )

    def add_named_destination_array(
        self, title: TextStringObject, destination: Union[IndirectObject, ArrayObject]
    ) -> None:
        named_dest = self.get_named_dest_root()
        i = 0
        while i < len(named_dest):
            if title < named_dest[i]:
                named_dest.insert(i, destination)
                named_dest.insert(i, TextStringObject(title))
                return
            i += 2
        named_dest.extend([TextStringObject(title), destination])
        return

    def add_named_destination_object(
        self,
        page_destination: PdfObject,
    ) -> IndirectObject:
        page_destination_ref = self._add_object(page_destination.dest_array)  # type: ignore
        self.add_named_destination_array(
            cast("TextStringObject", page_destination["/Title"]), page_destination_ref  # type: ignore
        )

        return page_destination_ref

    def add_named_destination(
        self,
        title: str,
        page_number: int,
    ) -> IndirectObject:
        page_ref = self.get_object(self._pages)[PagesAttributes.KIDS][page_number]  # type: ignore
        dest = DictionaryObject()
        dest.update(
            {
                NameObject(GoToActionArguments.D): ArrayObject(
                    [page_ref, NameObject(TypFitArguments.FIT_H), NumberObject(826)]
                ),
                NameObject(GoToActionArguments.S): NameObject("/GoTo"),
            }
        )

        dest_ref = self._add_object(dest)
        if not isinstance(title, TextStringObject):
            title = TextStringObject(str(title))

        self.add_named_destination_array(title, dest_ref)
        return dest_ref

    def remove_links(self) -> None:
        """Remove links and annotations from this output."""
        for page in self.pages:
            self.remove_objects_from_page(page, ObjectDeletionFlag.ALL_ANNOTATIONS)

    def remove_annotations(
        self, subtypes: Optional[Union[AnnotationSubtype, Iterable[AnnotationSubtype]]]
    ) -> None:
        """
        Remove annotations by annotation subtype.

        Args:
            subtypes: subtype or list of subtypes to be removed.
                Examples are: "/Link", "/FileAttachment", "/Sound",
                "/Movie", "/Screen", ...
                If you want to remove all annotations, use subtypes=None.

        """
        for page in self.pages:
            self._remove_annots_from_page(page, subtypes)

    def _remove_annots_from_page(
        self,
        page: Union[IndirectObject, PageObject, DictionaryObject],
        subtypes: Optional[Iterable[str]],
    ) -> None:
        page = cast(DictionaryObject, page.get_object())
        if PG.ANNOTS in page:
            i = 0
            while i < len(cast(ArrayObject, page[PG.ANNOTS])):
                an = cast(ArrayObject, page[PG.ANNOTS])[i]
                obj = cast(DictionaryObject, an.get_object())
                if subtypes is None or cast(str, obj["/Subtype"]) in subtypes:
                    if isinstance(an, IndirectObject):
                        self._objects[an.idnum - 1] = NullObject()  # to reduce PDF size
                    del page[PG.ANNOTS][i]  # type:ignore
                else:
                    i += 1

    def remove_objects_from_page(
        self,
        page: Union[PageObject, DictionaryObject],
        to_delete: Union[ObjectDeletionFlag, Iterable[ObjectDeletionFlag]],
        text_filters: Optional[dict[str, Any]] = None
    ) -> None:
        """
        Remove objects specified by ``to_delete`` from the given page.

        Args:
            page: Page object to clean up.
            to_delete: Objects to be deleted; can be a ``ObjectDeletionFlag``
                or a list of ObjectDeletionFlag
            text_filters: Properties of text to be deleted, if applicable. Optional.
                This is a Python dictionary with the following properties:

                * font_ids: List of font resource IDs (such as /F1 or /T1_0) to be deleted.

        """
        if isinstance(to_delete, (list, tuple)):
            for to_d in to_delete:
                self.remove_objects_from_page(page, to_d)
            return None
        assert isinstance(to_delete, ObjectDeletionFlag)

        if to_delete & ObjectDeletionFlag.LINKS:
            return self._remove_annots_from_page(page, ("/Link",))
        if to_delete & ObjectDeletionFlag.ATTACHMENTS:
            return self._remove_annots_from_page(
                page, ("/FileAttachment", "/Sound", "/Movie", "/Screen")
            )
        if to_delete & ObjectDeletionFlag.OBJECTS_3D:
            return self._remove_annots_from_page(page, ("/3D",))
        if to_delete & ObjectDeletionFlag.ALL_ANNOTATIONS:
            return self._remove_annots_from_page(page, None)

        jump_operators = []
        if to_delete & ObjectDeletionFlag.DRAWING_IMAGES:
            jump_operators = [
                b"w", b"J", b"j", b"M", b"d", b"i",
                b"W", b"W*",
                b"b", b"b*", b"B", b"B*", b"S", b"s", b"f", b"f*", b"F", b"n",
                b"m", b"l", b"c", b"v", b"y", b"h", b"re",
                b"sh"
            ]
        if to_delete & ObjectDeletionFlag.TEXT:
            jump_operators = [b"Tj", b"TJ", b"'", b'"']

        if not isinstance(page, PageObject):
            page = PageObject(self, page.indirect_reference)  # pragma: no cover
        if "/Contents" in page:
            content = cast(ContentStream, page.get_contents())

            images, forms = self._remove_objects_from_page__clean_forms(
                elt=page, stack=[], jump_operators=jump_operators, to_delete=to_delete, text_filters=text_filters,
            )

            self._remove_objects_from_page__clean(
                content=content, images=images, forms=forms,
                jump_operators=jump_operators, to_delete=to_delete,
                text_filters=text_filters
            )
            page.replace_contents(content)
        return [], []  # type: ignore[return-value]

    def _remove_objects_from_page__clean(
            self,
            content: ContentStream,
            images: list[str],
            forms: list[str],
            jump_operators: list[bytes],
            to_delete: ObjectDeletionFlag,
            text_filters: Optional[dict[str, Any]] = None,
    ) -> None:
        font_id = None
        font_ids_to_delete = []
        if text_filters and to_delete & ObjectDeletionFlag.TEXT:
            font_ids_to_delete = text_filters.get("font_ids", [])

        i = 0
        while i < len(content.operations):
            operands, operator = content.operations[i]
            if operator == b"Tf":
                font_id = operands[0]
            if (
                (
                    operator == b"INLINE IMAGE"
                    and (to_delete & ObjectDeletionFlag.INLINE_IMAGES)
                )
                or (operator in jump_operators)
                or (
                    operator == b"Do"
                    and (to_delete & ObjectDeletionFlag.XOBJECT_IMAGES)
                    and (operands[0] in images)
                )
            ):
                if (
                    not to_delete & ObjectDeletionFlag.TEXT
                    or (to_delete & ObjectDeletionFlag.TEXT and not text_filters)
                    or (to_delete & ObjectDeletionFlag.TEXT and font_id in font_ids_to_delete)
                ):
                    del content.operations[i]
                else:
                    i += 1
            else:
                i += 1
        content.get_data()  # this ensures ._data is rebuilt from the .operations

    def _remove_objects_from_page__clean_forms(
            self,
            elt: DictionaryObject,
            stack: list[DictionaryObject],
            jump_operators: list[bytes],
            to_delete: ObjectDeletionFlag,
            text_filters: Optional[dict[str, Any]] = None,
    ) -> tuple[list[str], list[str]]:
        # elt in recursive call is a new ContentStream object, so we have to check the indirect_reference
        if (elt in stack) or (
                hasattr(elt, "indirect_reference") and any(
                    elt.indirect_reference == getattr(x, "indirect_reference", -1)
                    for x in stack
                )
        ):
            # to prevent infinite looping
            return [], []  # pragma: no cover
        try:
            d = cast(
                dict[Any, Any],
                cast(DictionaryObject, elt["/Resources"])["/XObject"],
            )
        except KeyError:
            d = {}
        images = []
        forms = []
        for k, v in d.items():
            o = v.get_object()
            try:
                content: Any = None
                if (
                        to_delete & ObjectDeletionFlag.XOBJECT_IMAGES
                        and o["/Subtype"] == "/Image"
                ):
                    content = NullObject()  # to delete the image keeping the entry
                    images.append(k)
                if o["/Subtype"] == "/Form":
                    forms.append(k)
                    if isinstance(o, ContentStream):
                        content = o
                    else:
                        content = ContentStream(o, self)
                        content.update(
                            {
                                k1: v1
                                for k1, v1 in o.items()
                                if k1 not in ["/Length", "/Filter", "/DecodeParms"]
                            }
                        )
                        try:
                            content.indirect_reference = o.indirect_reference
                        except AttributeError:  # pragma: no cover
                            pass
                    stack.append(elt)

                    # clean subforms
                    self._remove_objects_from_page__clean_forms(
                        elt=content, stack=stack, jump_operators=jump_operators, to_delete=to_delete,
                        text_filters=text_filters,
                    )
                if content is not None:
                    if isinstance(v, IndirectObject):
                        self._objects[v.idnum - 1] = content
                    else:
                        # should only occur in a PDF not respecting PDF spec
                        # where streams must be indirected.
                        d[k] = self._add_object(content)  # pragma: no cover
            except (TypeError, KeyError):
                pass
        for im in images:
            del d[im]  # for clean-up
        if isinstance(elt, StreamObject):  # for /Form
            if not isinstance(elt, ContentStream):  # pragma: no cover
                e = ContentStream(elt, self)
                e.update(elt.items())
                elt = e
            # clean the content
            self._remove_objects_from_page__clean(
                content=elt, images=images, forms=forms, jump_operators=jump_operators,
                to_delete=to_delete, text_filters=text_filters
            )
        return images, forms

    def remove_images(
        self,
        to_delete: ImageType = ImageType.ALL,
    ) -> None:
        """
        Remove images from this output.

        Args:
            to_delete: The type of images to be deleted
                (default = all images types)

        """
        if isinstance(to_delete, bool):
            to_delete = ImageType.ALL

        i = ObjectDeletionFlag.NONE

        for image in ("XOBJECT_IMAGES", "INLINE_IMAGES", "DRAWING_IMAGES"):
            if to_delete & ImageType[image]:
                i |= ObjectDeletionFlag[image]

        for page in self.pages:
            self.remove_objects_from_page(page, i)

    def remove_text(self, font_names: Optional[list[str]] = None) -> None:
        """
        Remove text from the PDF.

        Args:
            font_names: List of font names to remove, such as "Helvetica-Bold".
                Optional. If not specified, all text will be removed.
        """
        if not font_names:
            font_names = []

        for page in self.pages:
            resource_ids_to_remove = []

            # Content streams reference fonts and other resources with names like "/F1" or "/T1_0"
            # Font names need to be converted to resource names/IDs for easier removal
            if font_names:
                # Recursively loop through page objects to gather font info
                def get_font_info(
                    obj: Any,
                    font_info: Optional[dict[str, Any]] = None,
                    key: Optional[str] = None
                ) -> dict[str, Any]:
                    if font_info is None:
                        font_info = {}
                    if isinstance(obj, IndirectObject):
                        obj = obj.get_object()
                    if isinstance(obj, dict):
                        if obj.get("/Type") == "/Font":
                            font_name = obj.get("/BaseFont", "")
                            # Normalize font names like "/RRXFFV+Palatino-Bold" to "Palatino-Bold"
                            normalized_font_name = font_name.lstrip("/").split("+")[-1]
                            if normalized_font_name not in font_info:
                                font_info[normalized_font_name] = {
                                    "normalized_font_name": normalized_font_name,
                                    "resource_ids": [],
                                }
                            if key not in font_info[normalized_font_name]["resource_ids"]:
                                font_info[normalized_font_name]["resource_ids"].append(key)
                        for k in obj:
                            font_info = get_font_info(obj[k], font_info, k)
                    elif isinstance(obj, (list, ArrayObject)):
                        for child_obj in obj:
                            font_info = get_font_info(child_obj, font_info)
                    return font_info

                # Add relevant resource names for removal
                font_info = get_font_info(page.get("/Resources"))
                for font_name in font_names:
                    if font_name in font_info:
                        resource_ids_to_remove.extend(font_info[font_name]["resource_ids"])

            text_filters = {}
            if font_names:
                text_filters["font_ids"] = resource_ids_to_remove
            self.remove_objects_from_page(page, ObjectDeletionFlag.TEXT, text_filters=text_filters)

    def add_uri(
        self,
        page_number: int,
        uri: str,
        rect: RectangleObject,
        border: Optional[ArrayObject] = None,
    ) -> None:
        """
        Add an URI from a rectangular area to the specified page.

        Args:
            page_number: index of the page on which to place the URI action.
            uri: URI of resource to link to.
            rect: :class:`RectangleObject<pypdf.generic.RectangleObject>` or
                array of four integers specifying the clickable rectangular area
                ``[xLL, yLL, xUR, yUR]``, or string in the form
                ``"[ xLL yLL xUR yUR ]"``.
            border: if provided, an array describing border-drawing
                properties. See the PDF spec for details. No border will be
                drawn if this argument is omitted.

        """
        page_link = self.get_object(self._pages)[PagesAttributes.KIDS][page_number]  # type: ignore
        page_ref = cast(dict[str, Any], self.get_object(page_link))

        border_arr: BorderArrayType
        if border is not None:
            border_arr = [NumberObject(n) for n in border[:3]]
            if len(border) == 4:
                dash_pattern = ArrayObject([NumberObject(n) for n in border[3]])
                border_arr.append(dash_pattern)
        else:
            border_arr = [NumberObject(2), NumberObject(2), NumberObject(2)]

        if isinstance(rect, str):
            rect = NumberObject(rect)
        elif isinstance(rect, RectangleObject):
            pass
        else:
            rect = RectangleObject(rect)

        lnk2 = DictionaryObject()
        lnk2.update(
            {
                NameObject("/S"): NameObject("/URI"),
                NameObject("/URI"): TextStringObject(uri),
            }
        )
        lnk = DictionaryObject()
        lnk.update(
            {
                NameObject(AA.Type): NameObject("/Annot"),
                NameObject(AA.Subtype): NameObject("/Link"),
                NameObject(AA.P): page_link,
                NameObject(AA.Rect): rect,
                NameObject("/H"): NameObject("/I"),
                NameObject(AA.Border): ArrayObject(border_arr),
                NameObject("/A"): lnk2,
            }
        )
        lnk_ref = self._add_object(lnk)

        if PG.ANNOTS in page_ref:
            page_ref[PG.ANNOTS].append(lnk_ref)
        else:
            page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref])

    _valid_layouts = (
        "/NoLayout",
        "/SinglePage",
        "/OneColumn",
        "/TwoColumnLeft",
        "/TwoColumnRight",
        "/TwoPageLeft",
        "/TwoPageRight",
    )

    def _get_page_layout(self) -> Optional[LayoutType]:
        try:
            return cast(LayoutType, self._root_object["/PageLayout"])
        except KeyError:
            return None

    def _set_page_layout(self, layout: Union[NameObject, LayoutType]) -> None:
        """
        Set the page layout.

        Args:
            layout: The page layout to be used.

        .. list-table:: Valid ``layout`` arguments
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right

        """
        if not isinstance(layout, NameObject):
            if layout not in self._valid_layouts:
                logger_warning(
                    f"Layout should be one of: {'', ''.join(self._valid_layouts)}",
                    __name__,
                )
            layout = NameObject(layout)
        self._root_object.update({NameObject("/PageLayout"): layout})

    def set_page_layout(self, layout: LayoutType) -> None:
        """
        Set the page layout.

        Args:
            layout: The page layout to be used

        .. list-table:: Valid ``layout`` arguments
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right

        """
        self._set_page_layout(layout)

    @property
    def page_layout(self) -> Optional[LayoutType]:
        """
        Page layout property.

        .. list-table:: Valid ``layout`` values
           :widths: 50 200

           * - /NoLayout
             - Layout explicitly not specified
           * - /SinglePage
             - Show one page at a time
           * - /OneColumn
             - Show one column at a time
           * - /TwoColumnLeft
             - Show pages in two columns, odd-numbered pages on the left
           * - /TwoColumnRight
             - Show pages in two columns, odd-numbered pages on the right
           * - /TwoPageLeft
             - Show two pages at a time, odd-numbered pages on the left
           * - /TwoPageRight
             - Show two pages at a time, odd-numbered pages on the right
        """
        return self._get_page_layout()

    @page_layout.setter
    def page_layout(self, layout: LayoutType) -> None:
        self._set_page_layout(layout)

    _valid_modes = (
        "/UseNone",
        "/UseOutlines",
        "/UseThumbs",
        "/FullScreen",
        "/UseOC",
        "/UseAttachments",
    )

    def _get_page_mode(self) -> Optional[PagemodeType]:
        try:
            return cast(PagemodeType, self._root_object["/PageMode"])
        except KeyError:
            return None

    @property
    def page_mode(self) -> Optional[PagemodeType]:
        """
        Page mode property.

        .. list-table:: Valid ``mode`` values
           :widths: 50 200

           * - /UseNone
             - Do not show outline or thumbnails panels
           * - /UseOutlines
             - Show outline (aka bookmarks) panel
           * - /UseThumbs
             - Show page thumbnails panel
           * - /FullScreen
             - Fullscreen view
           * - /UseOC
             - Show Optional Content Group (OCG) panel
           * - /UseAttachments
             - Show attachments panel
        """
        return self._get_page_mode()

    @page_mode.setter
    def page_mode(self, mode: PagemodeType) -> None:
        if isinstance(mode, NameObject):
            mode_name: NameObject = mode
        else:
            if mode not in self._valid_modes:
                logger_warning(
                    f"Mode should be one of: {', '.join(self._valid_modes)}", __name__
                )
            mode_name = NameObject(mode)
        self._root_object.update({NameObject("/PageMode"): mode_name})

    def add_annotation(
        self,
        page_number: Union[int, PageObject],
        annotation: dict[str, Any],
    ) -> DictionaryObject:
        """
        Add a single annotation to the page.
        The added annotation must be a new annotation.
        It cannot be recycled.

        Args:
            page_number: PageObject or page index.
            annotation: Annotation to be added (created with annotation).

        Returns:
            The inserted object.
            This can be used for popup creation, for example.

        """
        page = page_number
        if isinstance(page, int):
            page = self.pages[page]
        elif not isinstance(page, PageObject):
            raise TypeError("page: invalid type")

        to_add = cast(DictionaryObject, _pdf_objectify(annotation))
        to_add[NameObject("/P")] = page.indirect_reference

        if page.annotations is None:
            page[NameObject("/Annots")] = ArrayObject()
        assert page.annotations is not None

        # Internal link annotations need the correct object type for the
        # destination
        if to_add.get("/Subtype") == "/Link" and "/Dest" in to_add:
            tmp = cast(dict[Any, Any], to_add[NameObject("/Dest")])
            dest = Destination(
                NameObject("/LinkName"),
                tmp["target_page_index"],
                Fit(
                    fit_type=tmp["fit"], fit_args=dict(tmp)["fit_args"]
                ),  # I have no clue why this dict-hack is necessary
            )
            to_add[NameObject("/Dest")] = dest.dest_array

        page.annotations.append(self._add_object(to_add))

        if to_add.get("/Subtype") == "/Popup" and NameObject("/Parent") in to_add:
            cast(DictionaryObject, to_add["/Parent"].get_object())[
                NameObject("/Popup")
            ] = to_add.indirect_reference

        return to_add

    def clean_page(self, page: Union[PageObject, IndirectObject]) -> PageObject:
        """
        Perform some clean up in the page.
        Currently: convert NameObject named destination to TextStringObject
        (required for names/dests list)

        Args:
            page:

        Returns:
            The cleaned PageObject

        """
        page = cast("PageObject", page.get_object())
        for a in page.get("/Annots", []):
            a_obj = a.get_object()
            d = a_obj.get("/Dest", None)
            act = a_obj.get("/A", None)
            if isinstance(d, NameObject):
                a_obj[NameObject("/Dest")] = TextStringObject(d)
            elif act is not None:
                act = act.get_object()
                d = act.get("/D", None)
                if isinstance(d, NameObject):
                    act[NameObject("/D")] = TextStringObject(d)
        return page

    def _create_stream(
        self, fileobj: Union[Path, StrByteType, PdfReader]
    ) -> tuple[IOBase, Optional[Encryption]]:
        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a BytesIO stream object; if
        # it is a PdfReader, copy that reader's stream into a
        # BytesIO stream.
        # If fileobj is none of the above types, it is not modified
        encryption_obj = None
        stream: IOBase
        if isinstance(fileobj, (str, Path)):
            with FileIO(fileobj, "rb") as f:
                stream = BytesIO(f.read())
        elif isinstance(fileobj, PdfReader):
            if fileobj._encryption:
                encryption_obj = fileobj._encryption
            orig_tell = fileobj.stream.tell()
            fileobj.stream.seek(0)
            stream = BytesIO(fileobj.stream.read())

            # reset the stream to its original location
            fileobj.stream.seek(orig_tell)
        elif hasattr(fileobj, "seek") and hasattr(fileobj, "read"):
            fileobj.seek(0)
            filecontent = fileobj.read()
            stream = BytesIO(filecontent)
        else:
            raise NotImplementedError(
                "Merging requires an object that PdfReader can parse. "
                "Typically, that is a Path or a string representing a Path, "
                "a file object, or an object implementing .seek and .read. "
                "Passing a PdfReader directly works as well."
            )
        return stream, encryption_obj

    def append(
        self,
        fileobj: Union[StrByteType, PdfReader, Path],
        outline_item: Union[
            str, None, PageRange, tuple[int, int], tuple[int, int, int], list[int]
        ] = None,
        pages: Union[
            None,
            PageRange,
            tuple[int, int],
            tuple[int, int, int],
            list[int],
            list[PageObject],
        ] = None,
        import_outline: bool = True,
        excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = None,
    ) -> None:
        """
        Identical to the :meth:`merge()<merge>` method, but assumes you want to
        concatenate all pages onto the end of the file instead of specifying a
        position.

        Args:
            fileobj: A File Object or an object that supports the standard
                read and seek methods similar to a File Object. Could also be a
                string representing a path to a PDF file.
            outline_item: Optionally, you may specify a string to build an
                outline (aka 'bookmark') to identify the beginning of the
                included file.
            pages: Can be a :class:`PageRange<pypdf.pagerange.PageRange>`
                or a ``(start, stop[, step])`` tuple
                or a list of pages to be processed
                to merge only the specified range of pages from the source
                document into the output document.
            import_outline: You may prevent the source document's
                outline (collection of outline items, previously referred to as
                'bookmarks') from being imported by specifying this as ``False``.
            excluded_fields: Provide the list of fields/keys to be ignored
                if ``/Annots`` is part of the list, the annotation will be ignored
                if ``/B`` is part of the list, the articles will be ignored

        """
        if excluded_fields is None:
            excluded_fields = ()
        if isinstance(outline_item, (tuple, list, PageRange)):
            if isinstance(pages, bool):
                if not isinstance(import_outline, bool):
                    excluded_fields = import_outline
                import_outline = pages
            pages = outline_item
            self.merge(
                None,
                fileobj,
                None,
                pages,
                import_outline,
                excluded_fields,
            )
        else:  # if isinstance(outline_item, str):
            self.merge(
                None,
                fileobj,
                outline_item,
                pages,
                import_outline,
                excluded_fields,
            )

    def merge(
        self,
        position: Optional[int],
        fileobj: Union[Path, StrByteType, PdfReader],
        outline_item: Optional[str] = None,
        pages: Optional[Union[PageRangeSpec, list[PageObject]]] = None,
        import_outline: bool = True,
        excluded_fields: Optional[Union[list[str], tuple[str, ...]]] = (),
    ) -> None:
        """
        Merge the pages from the given file into the output file at the
        specified page number.

        Args:
            position: The *page number* to insert this file. File will
                be inserted after the given number.
            fileobj: A File Object or an object that supports the standard
                read and seek methods similar to a File Object. Could also be a
                string representing a path to a PDF file.
            outline_item: Optionally, you may specify a string to build an outline
                (aka 'bookmark') to identify the
                beginning of the included file.
            pages: can be a :class:`PageRange<pypdf.pagerange.PageRange>`
                or a ``(start, stop[, step])`` tuple
                or a list of pages to be processed
                to merge only the specified range of pages from the source
                document into the output document.
            import_outline: You may prevent the source document's
                outline (collection of outline items, previously referred to as
                'bookmarks') from being imported by specifying this as ``False``.
            excluded_fields: provide the list of fields/keys to be ignored
                if ``/Annots`` is part of the list, the annotation will be ignored
                if ``/B`` is part of the list, the articles will be ignored

        Raises:
            TypeError: The pages attribute is not configured properly

        """
        if isinstance(fileobj, PdfDocCommon):
            reader = fileobj
        else:
            stream, _encryption_obj = self._create_stream(fileobj)
            # Create a new PdfReader instance using the stream
            # (either file or BytesIO or StringIO) created above
            reader = PdfReader(stream, strict=False)  # type: ignore[arg-type]

        if excluded_fields is None:
            excluded_fields = ()
        # Find the range of pages to merge.
        if pages is None:
            pages = list(range(len(reader.pages)))
        elif isinstance(pages, PageRange):
            pages = list(range(*pages.indices(len(reader.pages))))
        elif isinstance(pages, list):
            pass  # keep unchanged
        elif isinstance(pages, tuple) and len(pages) <= 3:
            pages = list(range(*pages))
        elif not isinstance(pages, tuple):
            raise TypeError(
                '"pages" must be a tuple of (start, stop[, step]) or a list'
            )

        srcpages = {}
        for page in pages:
            if isinstance(page, PageObject):
                pg = page
            else:
                pg = reader.pages[page]
            assert pg.indirect_reference is not None
            if position is None:
                # numbers in the exclude list identifies that the exclusion is
                # only applicable to 1st level of cloning
                srcpages[pg.indirect_reference.idnum] = self.add_page(
                    pg, [*list(excluded_fields), 1, "/B", 1, "/Annots"]  # type: ignore
                )
            else:
                srcpages[pg.indirect_reference.idnum] = self.insert_page(
                    pg, position, [*list(excluded_fields), 1, "/B", 1, "/Annots"]  # type: ignore
                )
                position += 1
            srcpages[pg.indirect_reference.idnum].original_page = pg

        reader._named_destinations = (
            reader.named_destinations
        )  # need for the outline processing below

        arr: Any

        for dest in reader._named_destinations.values():
            self._merge__process_named_dests(dest=dest, reader=reader, srcpages=srcpages)

        outline_item_typ: TreeObject
        if outline_item is not None:
            outline_item_typ = cast(
                "TreeObject",
                self.add_outline_item(
                    TextStringObject(outline_item),
                    next(iter(srcpages.values())).indirect_reference,
                    fit=PAGE_FIT,
                ).get_object(),
            )
        else:
            outline_item_typ = self.get_outline_root()

        _ro = reader.root_object
        if import_outline and CO.OUTLINES in _ro:
            outline = self._get_filtered_outline(
                _ro.get(CO.OUTLINES, None), srcpages, reader
            )
            self._insert_filtered_outline(
                outline, outline_item_typ, None
            )  # TODO: use before parameter

        if "/Annots" not in excluded_fields:
            for pag in srcpages.values():
                lst = self._insert_filtered_annotations(
                    pag.original_page.get("/Annots", []), pag, srcpages, reader
                )
                if len(lst) > 0:
                    pag[NameObject("/Annots")] = lst
                self.clean_page(pag)

        if "/AcroForm" in _ro and not is_null_or_none(_ro["/AcroForm"]):
            if "/AcroForm" not in self._root_object:
                self._root_object[NameObject("/AcroForm")] = self._add_object(
                    cast(
                        DictionaryObject,
                        reader.root_object["/AcroForm"],
                    ).clone(self, False, ("/Fields",))
                )
                arr = ArrayObject()
            else:
                arr = cast(
                    ArrayObject,
                    cast(DictionaryObject, self._root_object["/AcroForm"])["/Fields"],
                )
            trslat = self._id_translated[id(reader)]
            try:
                for f in reader.root_object["/AcroForm"]["/Fields"]:  # type: ignore
                    try:
                        ind = IndirectObject(trslat[f.idnum], 0, self)
                        if ind not in arr:
                            arr.append(ind)
                    except KeyError:
                        # for trslat[] which mean the field has not be copied
                        # through the page
                        pass
            except KeyError:  # for /Acroform or /Fields are not existing
                arr = self._add_object(ArrayObject())
            cast(DictionaryObject, self._root_object["/AcroForm"])[
                NameObject("/Fields")
            ] = arr

        if "/B" not in excluded_fields:
            self.add_filtered_articles("", srcpages, reader)

    def _merge__process_named_dests(self, dest: Any, reader: PdfDocCommon, srcpages: dict[int, PageObject]) -> None:
        arr: Any = dest.dest_array
        if "/Names" in self._root_object and dest["/Title"] in cast(
            list[Any],
            cast(
                DictionaryObject,
                cast(DictionaryObject, self._root_object["/Names"]).get("/Dests", DictionaryObject()),
            ).get("/Names", DictionaryObject()),
        ):
            # already exists: should not duplicate it
            pass
        elif dest["/Page"] is None or isinstance(dest["/Page"], NullObject):
            pass
        elif isinstance(dest["/Page"], int):
            # the page reference is a page number normally not a PDF Reference
            # page numbers as int are normally accepted only in external goto
            try:
                p = reader.pages[dest["/Page"]]
            except IndexError:
                return
            assert p.indirect_reference is not None
            try:
                arr[NumberObject(0)] = NumberObject(
                    srcpages[p.indirect_reference.idnum].page_number
                )
                self.add_named_destination_array(dest["/Title"], arr)
            except KeyError:
                pass
        elif dest["/Page"].indirect_reference.idnum in srcpages:
            arr[NumberObject(0)] = srcpages[
                dest["/Page"].indirect_reference.idnum
            ].indirect_reference
            self.add_named_destination_array(dest["/Title"], arr)

    def _add_articles_thread(
        self,
        thread: DictionaryObject,  # thread entry from the reader's array of threads
        pages: dict[int, PageObject],
        reader: PdfReader,
    ) -> IndirectObject:
        """
        Clone the thread with only the applicable articles.

        Args:
            thread:
            pages:
            reader:

        Returns:
            The added thread as an indirect reference

        """
        nthread = thread.clone(
            self, force_duplicate=True, ignore_fields=("/F",)
        )  # use of clone to keep link between reader and writer
        self.threads.append(nthread.indirect_reference)
        first_article = cast("DictionaryObject", thread["/F"])
        current_article: Optional[DictionaryObject] = first_article
        new_article: Optional[DictionaryObject] = None
        while current_article is not None:
            pag = self._get_cloned_page(
                cast("PageObject", current_article["/P"]), pages, reader
            )
            if pag is not None:
                if new_article is None:
                    new_article = cast(
                        "DictionaryObject",
                        self._add_object(DictionaryObject()).get_object(),
                    )
                    new_first = new_article
                    nthread[NameObject("/F")] = new_article.indirect_reference
                else:
                    new_article2 = cast(
                        "DictionaryObject",
                        self._add_object(
                            DictionaryObject(
                                {NameObject("/V"): new_article.indirect_reference}
                            )
                        ).get_object(),
                    )
                    new_article[NameObject("/N")] = new_article2.indirect_reference
                    new_article = new_article2
                new_article[NameObject("/P")] = pag
                new_article[NameObject("/T")] = nthread.indirect_reference
                new_article[NameObject("/R")] = current_article["/R"]
                pag_obj = cast("PageObject", pag.get_object())
                if "/B" not in pag_obj:
                    pag_obj[NameObject("/B")] = ArrayObject()
                cast("ArrayObject", pag_obj["/B"]).append(
                    new_article.indirect_reference
                )
            current_article = cast("DictionaryObject", current_article["/N"])
            if current_article == first_article:
                new_article[NameObject("/N")] = new_first.indirect_reference  # type: ignore
                new_first[NameObject("/V")] = new_article.indirect_reference  # type: ignore
                current_article = None
        assert nthread.indirect_reference is not None
        return nthread.indirect_reference

    def add_filtered_articles(
        self,
        fltr: Union[
            Pattern[Any], str
        ],  # thread entry from the reader's array of threads
        pages: dict[int, PageObject],
        reader: PdfReader,
    ) -> None:
        """
        Add articles matching the defined criteria.

        Args:
            fltr:
            pages:
            reader:

        """
        if isinstance(fltr, str):
            fltr = re.compile(fltr)
        elif not isinstance(fltr, Pattern):
            fltr = re.compile("")
        for p in pages.values():
            pp = p.original_page
            for a in pp.get("/B", ()):
                a_obj = a.get_object()
                if is_null_or_none(a_obj):
                    continue
                thr = a_obj.get("/T")
                if thr is None:
                    continue
                thr = thr.get_object()
                if thr.indirect_reference.idnum not in self._id_translated[
                    id(reader)
                ] and fltr.search((thr.get("/I", {})).get("/Title", "")):
                    self._add_articles_thread(thr, pages, reader)

    def _get_cloned_page(
        self,
        page: Union[None, IndirectObject, PageObject, NullObject],
        pages: dict[int, PageObject],
        reader: PdfReader,
    ) -> Optional[IndirectObject]:
        if isinstance(page, NullObject):
            return None
        if isinstance(page, DictionaryObject) and page.get("/Type", "") == "/Page":
            _i = page.indirect_reference
        elif isinstance(page, IndirectObject):
            _i = page
        try:
            return pages[_i.idnum].indirect_reference  # type: ignore
        except Exception:
            return None

    def _insert_filtered_annotations(
        self,
        annots: Union[IndirectObject, list[DictionaryObject], None],
        page: PageObject,
        pages: dict[int, PageObject],
        reader: PdfReader,
    ) -> list[Destination]:
        outlist = ArrayObject()
        if isinstance(annots, IndirectObject):
            annots = cast("list[Any]", annots.get_object())
        if annots is None:
            return outlist
        if not isinstance(annots, list):
            logger_warning(f"Expected list of annotations, got {annots} of type {annots.__class__.__name__}.", __name__)
            return outlist
        for an in annots:
            ano = cast("DictionaryObject", an.get_object())
            if (
                ano["/Subtype"] != "/Link"
                or "/A" not in ano
                or cast("DictionaryObject", ano["/A"])["/S"] != "/GoTo"
                or "/Dest" in ano
            ):
                if "/Dest" not in ano:
                    outlist.append(self._add_object(ano.clone(self)))
                else:
                    d = ano["/Dest"]
                    if isinstance(d, str):
                        # it is a named dest
                        if str(d) in self.get_named_dest_root():
                            outlist.append(ano.clone(self).indirect_reference)
                    else:
                        d = cast("ArrayObject", d)
                        p = self._get_cloned_page(d[0], pages, reader)
                        if p is not None:
                            anc = ano.clone(self, ignore_fields=("/Dest",))
                            anc[NameObject("/Dest")] = ArrayObject([p, *d[1:]])
                            outlist.append(self._add_object(anc))
            else:
                d = cast("DictionaryObject", ano["/A"]).get("/D", NullObject())
                if is_null_or_none(d):
                    continue
                if isinstance(d, str):
                    # it is a named dest
                    if str(d) in self.get_named_dest_root():
                        outlist.append(ano.clone(self).indirect_reference)
                else:
                    d = cast("ArrayObject", d)
                    p = self._get_cloned_page(d[0], pages, reader)
                    if p is not None:
                        anc = ano.clone(self, ignore_fields=("/D",))
                        cast("DictionaryObject", anc["/A"])[
                            NameObject("/D")
                        ] = ArrayObject([p, *d[1:]])
                        outlist.append(self._add_object(anc))
        return outlist

    def _get_filtered_outline(
        self,
        node: Any,
        pages: dict[int, PageObject],
        reader: PdfReader,
    ) -> list[Destination]:
        """
        Extract outline item entries that are part of the specified page set.

        Args:
            node:
            pages:
            reader:

        Returns:
            A list of destination objects.

        """
        new_outline = []
        if node is None:
            node = NullObject()
        node = node.get_object()
        if is_null_or_none(node):
            node = DictionaryObject()
        if node.get("/Type", "") == "/Outlines" or "/Title" not in node:
            node = node.get("/First", None)
            if node is not None:
                node = node.get_object()
                new_outline += self._get_filtered_outline(node, pages, reader)
        else:
            v: Union[None, IndirectObject, NullObject]
            while node is not None:
                node = node.get_object()
                o = cast("Destination", reader._build_outline_item(node))
                v = self._get_cloned_page(cast("PageObject", o["/Page"]), pages, reader)
                if v is None:
                    v = NullObject()
                o[NameObject("/Page")] = v
                if "/First" in node:
                    o._filtered_children = self._get_filtered_outline(
                        node["/First"], pages, reader
                    )
                else:
                    o._filtered_children = []
                if (
                    not isinstance(o["/Page"], NullObject)
                    or len(o._filtered_children) > 0
                ):
                    new_outline.append(o)
                node = node.get("/Next", None)
        return new_outline

    def _clone_outline(self, dest: Destination) -> TreeObject:
        n_ol = TreeObject()
        self._add_object(n_ol)
        n_ol[NameObject("/Title")] = TextStringObject(dest["/Title"])
        if not isinstance(dest["/Page"], NullObject):
            if dest.node is not None and "/A" in dest.node:
                n_ol[NameObject("/A")] = dest.node["/A"].clone(self)
            else:
                n_ol[NameObject("/Dest")] = dest.dest_array
        # TODO: /SE
        if dest.node is not None:
            n_ol[NameObject("/F")] = NumberObject(dest.node.get("/F", 0))
            n_ol[NameObject("/C")] = ArrayObject(
                dest.node.get(
                    "/C", [FloatObject(0.0), FloatObject(0.0), FloatObject(0.0)]
                )
            )
        return n_ol

    def _insert_filtered_outline(
        self,
        outlines: list[Destination],
        parent: Union[TreeObject, IndirectObject],
        before: Union[None, TreeObject, IndirectObject] = None,
    ) -> None:
        for dest in outlines:
            # TODO: can be improved to keep A and SE entries (ignored for the moment)
            # with np=self.add_outline_item_destination(dest,parent,before)
            if dest.get("/Type", "") == "/Outlines" or "/Title" not in dest:
                np = parent
            else:
                np = self._clone_outline(dest)
                cast(TreeObject, parent.get_object()).insert_child(np, before, self)
            self._insert_filtered_outline(dest._filtered_children, np, None)

    def close(self) -> None:
        """Implemented for API harmonization."""
        return

    def find_outline_item(
        self,
        outline_item: dict[str, Any],
        root: Optional[OutlineType] = None,
    ) -> Optional[list[int]]:
        if root is None:
            o = self.get_outline_root()
        else:
            o = cast("TreeObject", root)

        i = 0
        while o is not None:
            if (
                o.indirect_reference == outline_item
                or o.get("/Title", None) == outline_item
            ):
                return [i]
            if "/First" in o:
                res = self.find_outline_item(
                    outline_item, cast(OutlineType, o["/First"])
                )
                if res:
                    return ([i] if "/Title" in o else []) + res
            if "/Next" in o:
                i += 1
                o = cast(TreeObject, o["/Next"])
            else:
                return None
        raise PyPdfError("This line is theoretically unreachable.")  # pragma: no cover

    def reset_translation(
        self, reader: Union[None, PdfReader, IndirectObject] = None
    ) -> None:
        """
        Reset the translation table between reader and the writer object.

        Late cloning will create new independent objects.

        Args:
            reader: PdfReader or IndirectObject referencing a PdfReader object.
                if set to None or omitted, all tables will be reset.

        """
        if reader is None:
            self._id_translated = {}
        elif isinstance(reader, PdfReader):
            try:
                del self._id_translated[id(reader)]
            except Exception:
                pass
        elif isinstance(reader, IndirectObject):
            try:
                del self._id_translated[id(reader.pdf)]
            except Exception:
                pass
        else:
            raise Exception("invalid parameter {reader}")

    def set_page_label(
        self,
        page_index_from: int,
        page_index_to: int,
        style: Optional[PageLabelStyle] = None,
        prefix: Optional[str] = None,
        start: Optional[int] = 0,
    ) -> None:
        """
        Set a page label to a range of pages.

        Page indexes must be given starting from 0.
        Labels must have a style, a prefix or both.
        If a range is not assigned any page label, a decimal label starting from 1 is applied.

        Args:
            page_index_from: page index of the beginning of the range starting from 0
            page_index_to: page index of the beginning of the range starting from 0
            style: The numbering style to be used for the numeric portion of each page label:

                       * ``/D`` Decimal Arabic numerals
                       * ``/R`` Uppercase Roman numerals
                       * ``/r`` Lowercase Roman numerals
                       * ``/A`` Uppercase letters (A to Z for the first 26 pages,
                         AA to ZZ for the next 26, and so on)
                       * ``/a`` Lowercase letters (a to z for the first 26 pages,
                         aa to zz for the next 26, and so on)

            prefix: The label prefix for page labels in this range.
            start:  The value of the numeric portion for the first page label
                    in the range.
                    Subsequent pages are numbered sequentially from this value,
                    which must be greater than or equal to 1.
                    Default value: 1.

        """
        if style is None and prefix is None:
            raise ValueError("At least one of style and prefix must be given")
        if page_index_from < 0:
            raise ValueError("page_index_from must be greater or equal than 0")
        if page_index_to < page_index_from:
            raise ValueError(
                "page_index_to must be greater or equal than page_index_from"
            )
        if page_index_to >= len(self.pages):
            raise ValueError("page_index_to exceeds number of pages")
        if start is not None and start != 0 and start < 1:
            raise ValueError("If given, start must be greater or equal than one")

        self._set_page_label(page_index_from, page_index_to, style, prefix, start)

    def _set_page_label(
        self,
        page_index_from: int,
        page_index_to: int,
        style: Optional[PageLabelStyle] = None,
        prefix: Optional[str] = None,
        start: Optional[int] = 0,
    ) -> None:
        """
        Set a page label to a range of pages.

        Page indexes must be given starting from 0.
        Labels must have a style, a prefix or both.
        If a range is not assigned any page label a decimal label starting from 1 is applied.

        Args:
            page_index_from: page index of the beginning of the range starting from 0
            page_index_to: page index of the beginning of the range starting from 0
            style:  The numbering style to be used for the numeric portion of each page label:
                        /D Decimal Arabic numerals
                        /R Uppercase Roman numerals
                        /r Lowercase Roman numerals
                        /A Uppercase letters (A to Z for the first 26 pages,
                           AA to ZZ for the next 26, and so on)
                        /a Lowercase letters (a to z for the first 26 pages,
                           aa to zz for the next 26, and so on)
            prefix: The label prefix for page labels in this range.
            start:  The value of the numeric portion for the first page label
                    in the range.
                    Subsequent pages are numbered sequentially from this value,
                    which must be greater than or equal to 1. Default value: 1.

        """
        default_page_label = DictionaryObject()
        default_page_label[NameObject("/S")] = NameObject("/D")

        new_page_label = DictionaryObject()
        if style is not None:
            new_page_label[NameObject("/S")] = NameObject(style)
        if prefix is not None:
            new_page_label[NameObject("/P")] = TextStringObject(prefix)
        if start != 0:
            new_page_label[NameObject("/St")] = NumberObject(start)

        if NameObject(CatalogDictionary.PAGE_LABELS) not in self._root_object:
            nums = ArrayObject()
            nums_insert(NumberObject(0), default_page_label, nums)
            page_labels = TreeObject()
            page_labels[NameObject("/Nums")] = nums
            self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels

        page_labels = cast(
            TreeObject, self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)]
        )
        nums = cast(ArrayObject, page_labels[NameObject("/Nums")])

        nums_insert(NumberObject(page_index_from), new_page_label, nums)
        nums_clear_range(NumberObject(page_index_from), page_index_to, nums)
        next_label_pos, *_ = nums_next(NumberObject(page_index_from), nums)
        if next_label_pos != page_index_to + 1 and page_index_to + 1 < len(self.pages):
            nums_insert(NumberObject(page_index_to + 1), default_page_label, nums)

        page_labels[NameObject("/Nums")] = nums
        self._root_object[NameObject(CatalogDictionary.PAGE_LABELS)] = page_labels

    def _repr_mimebundle_(
        self,
        include: Union[None, Iterable[str]] = None,
        exclude: Union[None, Iterable[str]] = None,
    ) -> dict[str, Any]:
        """
        Integration into Jupyter Notebooks.

        This method returns a dictionary that maps a mime-type to its
        representation.

        .. seealso::

           https://ipython.readthedocs.io/en/stable/config/integrating.html
        """
        pdf_data = BytesIO()
        self.write(pdf_data)
        data = {
            "application/pdf": pdf_data,
        }

        if include is not None:
            # Filter representations based on include list
            data = {k: v for k, v in data.items() if k in include}

        if exclude is not None:
            # Remove representations based on exclude list
            data = {k: v for k, v in data.items() if k not in exclude}

        return data


def _pdf_objectify(obj: Union[dict[str, Any], str, float, list[Any]]) -> PdfObject:
    if isinstance(obj, PdfObject):
        return obj
    if isinstance(obj, dict):
        to_add = DictionaryObject()
        for key, value in obj.items():
            to_add[NameObject(key)] = _pdf_objectify(value)
        return to_add
    if isinstance(obj, str):
        if obj.startswith("/"):
            return NameObject(obj)
        return TextStringObject(obj)
    if isinstance(obj, (float, int)):
        return FloatObject(obj)
    if isinstance(obj, list):
        return ArrayObject(_pdf_objectify(i) for i in obj)
    raise NotImplementedError(
        f"{type(obj)=} could not be cast to a PdfObject"
    )


def _create_outline_item(
    action_ref: Union[None, IndirectObject],
    title: str,
    color: Union[tuple[float, float, float], str, None],
    italic: bool,
    bold: bool,
) -> TreeObject:
    outline_item = TreeObject()
    if action_ref is not None:
        outline_item[NameObject("/A")] = action_ref
    outline_item.update(
        {
            NameObject("/Title"): create_string_object(title),
        }
    )
    if color:
        if isinstance(color, str):
            color = hex_to_rgb(color)
        outline_item.update(
            {NameObject("/C"): ArrayObject([FloatObject(c) for c in color])}
        )
    if italic or bold:
        format_flag = 0
        if italic:
            format_flag += OutlineFontFlag.italic
        if bold:
            format_flag += OutlineFontFlag.bold
        outline_item.update({NameObject("/F"): NumberObject(format_flag)})
    return outline_item


================================================
FILE: pypdf/annotations/__init__.py
================================================
"""
PDF specifies several annotation types which pypdf makes available here.

The names of the annotations and their attributes do not reflect the names in
the specification in all cases. For example, the PDF standard defines a
'Square' annotation that does not actually need to be square. For this reason,
pypdf calls it 'Rectangle'.

At their core, all annotation types are DictionaryObjects. That means if pypdf
does not implement a feature, users can easily extend the given functionality.
"""


from ._base import NO_FLAGS, AnnotationDictionary
from ._markup_annotations import (
    Ellipse,
    FreeText,
    Highlight,
    Line,
    MarkupAnnotation,
    Polygon,
    PolyLine,
    Rectangle,
    Text,
)
from ._non_markup_annotations import Link, Popup

__all__ = [
    "NO_FLAGS",
    "AnnotationDictionary",
    "Ellipse",
    "FreeText",
    "Highlight",
    "Line",
    "Link",
    "MarkupAnnotation",
    "PolyLine",
    "Polygon",
    "Popup",
    "Rectangle",
    "Text",
]


================================================
FILE: pypdf/annotations/_base.py
================================================
from abc import ABC

from ..constants import AnnotationFlag
from ..generic import NameObject, NumberObject
from ..generic._data_structures import DictionaryObject


class AnnotationDictionary(DictionaryObject, ABC):
    def __init__(self) -> None:
        super().__init__()

        from ..generic._base import NameObject  # noqa: PLC0415

        # /Rect should not be added here as Polygon and PolyLine can automatically set it
        self[NameObject("/Type")] = NameObject("/Annot")
        # The flags were NOT added to the constructor on purpose:
        # We expect that most users don't want to change the default.
        # If they do, they can use the property. The default is 0.

    @property
    def flags(self) -> AnnotationFlag:
        return self.get(NameObject("/F"), AnnotationFlag(0))

    @flags.setter
    def flags(self, value: AnnotationFlag) -> None:
        self[NameObject("/F")] = NumberObject(value)


NO_FLAGS = AnnotationFlag(0)


================================================
FILE: pypdf/annotations/_markup_annotations.py
================================================
import sys
import uuid
from abc import ABC
from typing import Any, Literal, Optional, Union

from ..constants import AnnotationFlag
from ..generic import ArrayObject, DictionaryObject
from ..generic._base import (
    BooleanObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NumberObject,
    TextStringObject,
)
from ..generic._rectangle import RectangleObject
from ..generic._utils import hex_to_rgb
from ._base import NO_FLAGS, AnnotationDictionary

if sys.version_info[:2] >= (3, 10):
    from typing import TypeAlias
else:
    # PEP 613 introduced typing.TypeAlias with Python 3.10
    # For older Python versions, the backport typing_extensions is necessary:
    from typing_extensions import TypeAlias


Vertex: TypeAlias = tuple[float, float]


def _get_bounding_rectangle(vertices: list[Vertex]) -> RectangleObject:
    x_min, y_min = vertices[0][0], vertices[0][1]
    x_max, y_max = vertices[0][0], vertices[0][1]
    for x, y in vertices:
        x_min = min(x_min, x)
        y_min = min(y_min, y)
        x_max = max(x_max, x)
        y_max = max(y_max, y)
    return RectangleObject((x_min, y_min, x_max, y_max))


class MarkupAnnotation(AnnotationDictionary, ABC):
    """
    Base class for all markup annotations.

    Args:
        title_bar: Text to be displayed in the title bar of the annotation;
            by convention this is the name of the author
        in_reply_to: The annotation that this annotation is "in reply to"
            (PDF 1.5). Can be either an annotation (previously added using
            :meth:`~pypdf.PdfWriter.add_annotation`) or a reference to the
            target annotation.
        reply_type: The relationship between this annotation and the one
            specified by ``in_reply_to``. Either ``"R"`` (a reply, default)
            or ``"Group"`` (grouped with the parent annotation). Raises
            ``ValueError`` if a non-default value is provided without
            ``in_reply_to``.
        annotation_name: A text string uniquely identifying this annotation
            among all annotations on its page. Automatically generated when
            ``in_reply_to`` is set and no name is provided. Raises
            ``ValueError`` if provided without ``in_reply_to``.

    """

    def __init__(
        self,
        *,
        title_bar: Optional[str] = None,
        in_reply_to: Optional[Union[DictionaryObject, IndirectObject]] = None,
        reply_type: Literal["R", "Group"] = "R",
        annotation_name: Optional[str] = None,
    ) -> None:
        if title_bar is not None:
            self[NameObject("/T")] = TextStringObject(title_bar)
        if annotation_name is not None and in_reply_to is None:
            raise ValueError(
                "annotation_name is only supported when in_reply_to is set"
            )
        if reply_type != "R" and in_reply_to is None:
            raise ValueError(
                "reply_type is only meaningful when in_reply_to is set"
            )
        if in_reply_to is not None:
            if isinstance(in_reply_to, IndirectObject):
                ref: IndirectObject = in_reply_to
            else:
                indirect_ref = getattr(in_reply_to, "indirect_reference", None)
                if not isinstance(indirect_ref, IndirectObject):
                    raise ValueError(
                        "in_reply_to must be a registered annotation "
                        "(added via writer.add_annotation() first)"
                    )
                ref = indirect_ref
            self[NameObject("/IRT")] = ref
            self[NameObject("/RT")] = NameObject(f"/{reply_type}")
            if annotation_name is None:
                annotation_name = str(uuid.uuid4())
            self[NameObject("/NM")] = TextStringObject(annotation_name)


class Text(MarkupAnnotation):
    """
    A text annotation.

    Args:
        rect: array of four integers ``[xLL, yLL, xUR, yUR]``
            specifying the clickable rectangular area
        text: The text that is added to the document
        open:
        flags:

    """

    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        text: str,
        open: bool = False,
        flags: int = NO_FLAGS,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self[NameObject("/Subtype")] = NameObject("/Text")
        self[NameObject("/Rect")] = RectangleObject(rect)
        self[NameObject("/Contents")] = TextStringObject(text)
        self[NameObject("/Open")] = BooleanObject(open)
        self[NameObject("/Flags")] = NumberObject(flags)


class FreeText(MarkupAnnotation):
    """A FreeText annotation"""

    def __init__(
        self,
        *,
        text: str,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        font: str = "Helvetica",
        bold: bool = False,
        italic: bool = False,
        font_size: str = "14pt",
        font_color: str = "000000",
        border_color: Optional[str] = "000000",
        background_color: Optional[str] = "ffffff",
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self[NameObject("/Subtype")] = NameObject("/FreeText")
        self[NameObject("/Rect")] = RectangleObject(rect)

        # Table 225 of the 1.7 reference ("CSS2 style attributes used in rich text strings")
        font_str = "font: "
        if italic:
            font_str = f"{font_str}italic "
        else:
            font_str = f"{font_str}normal "
        if bold:
            font_str = f"{font_str}bold "
        else:
            font_str = f"{font_str}normal "
        font_str = f"{font_str}{font_size} {font}"
        font_str = f"{font_str};text-align:left;color:#{font_color}"

        default_appearance_string = ""
        if border_color:
            for st in hex_to_rgb(border_color):
                default_appearance_string = f"{default_appearance_string}{st} "
            default_appearance_string = f"{default_appearance_string}rg"

        self.update(
            {
                NameObject("/Subtype"): NameObject("/FreeText"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Contents"): TextStringObject(text),
                # font size color
                NameObject("/DS"): TextStringObject(font_str),
                NameObject("/DA"): TextStringObject(default_appearance_string),
            }
        )
        if border_color is None:
            # Border Style
            self[NameObject("/BS")] = DictionaryObject(
                {
                    # width of 0 means no border
                    NameObject("/W"): NumberObject(0)
                }
            )
        if background_color is not None:
            self[NameObject("/C")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(background_color)]
            )


class Line(MarkupAnnotation):
    def __init__(
        self,
        p1: Vertex,
        p2: Vertex,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        text: str = "",
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Line"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/L"): ArrayObject(
                    [
                        FloatObject(p1[0]),
                        FloatObject(p1[1]),
                        FloatObject(p2[0]),
                        FloatObject(p2[1]),
                    ]
                ),
                NameObject("/LE"): ArrayObject(
                    [
                        NameObject("/None"),
                        NameObject("/None"),
                    ]
                ),
                NameObject("/IC"): ArrayObject(
                    [
                        FloatObject(0.5),
                        FloatObject(0.5),
                        FloatObject(0.5),
                    ]
                ),
                NameObject("/Contents"): TextStringObject(text),
            }
        )


class PolyLine(MarkupAnnotation):
    def __init__(
        self,
        vertices: list[Vertex],
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if len(vertices) == 0:
            raise ValueError("A polyline needs at least 1 vertex with two coordinates")
        coord_list = []
        for x, y in vertices:
            coord_list.append(NumberObject(x))
            coord_list.append(NumberObject(y))
        self.update(
            {
                NameObject("/Subtype"): NameObject("/PolyLine"),
                NameObject("/Vertices"): ArrayObject(coord_list),
                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
            }
        )


class Rectangle(MarkupAnnotation):
    def __init__(
        self,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        *,
        interior_color: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Square"),
                NameObject("/Rect"): RectangleObject(rect),
            }
        )

        if interior_color:
            self[NameObject("/IC")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(interior_color)]
            )


class Highlight(MarkupAnnotation):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        quad_points: ArrayObject,
        highlight_color: str = "ff0000",
        printing: bool = False,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Highlight"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/QuadPoints"): quad_points,
                NameObject("/C"): ArrayObject(
                    [FloatObject(n) for n in hex_to_rgb(highlight_color)]
                ),
            }
        )
        if printing:
            self.flags = AnnotationFlag.PRINT


class Ellipse(MarkupAnnotation):
    def __init__(
        self,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        *,
        interior_color: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)

        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Circle"),
                NameObject("/Rect"): RectangleObject(rect),
            }
        )

        if interior_color:
            self[NameObject("/IC")] = ArrayObject(
                [FloatObject(n) for n in hex_to_rgb(interior_color)]
            )


class Polygon(MarkupAnnotation):
    def __init__(
        self,
        vertices: list[tuple[float, float]],
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if len(vertices) == 0:
            raise ValueError("A polygon needs at least 1 vertex with two coordinates")

        coord_list = []
        for x, y in vertices:
            coord_list.append(NumberObject(x))
            coord_list.append(NumberObject(y))
        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Polygon"),
                NameObject("/Vertices"): ArrayObject(coord_list),
                NameObject("/IT"): NameObject("/PolygonCloud"),
                NameObject("/Rect"): RectangleObject(_get_bounding_rectangle(vertices)),
            }
        )


================================================
FILE: pypdf/annotations/_non_markup_annotations.py
================================================
from typing import TYPE_CHECKING, Any, Optional, Union

from ..generic._base import (
    BooleanObject,
    NameObject,
    NumberObject,
    TextStringObject,
)
from ..generic._data_structures import ArrayObject, DictionaryObject
from ..generic._fit import DEFAULT_FIT, Fit
from ..generic._rectangle import RectangleObject
from ._base import AnnotationDictionary


class Link(AnnotationDictionary):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        border: Optional[ArrayObject] = None,
        url: Optional[str] = None,
        target_page_index: Optional[int] = None,
        fit: Fit = DEFAULT_FIT,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        if TYPE_CHECKING:
            from ..types import BorderArrayType  # noqa: PLC0415

        is_external = url is not None
        is_internal = target_page_index is not None
        if not is_external and not is_internal:
            raise ValueError(
                "Either 'url' or 'target_page_index' have to be provided. Both were None."
            )
        if is_external and is_internal:
            raise ValueError(
                "Either 'url' or 'target_page_index' have to be provided. "
                f"{url=}, {target_page_index=}"
            )

        border_arr: BorderArrayType
        if border is not None:
            border_arr = [NumberObject(n) for n in border[:3]]
            if len(border) == 4:
                dash_pattern = ArrayObject([NumberObject(n) for n in border[3]])
                border_arr.append(dash_pattern)
        else:
            border_arr = [NumberObject(0)] * 3

        self.update(
            {
                NameObject("/Type"): NameObject("/Annot"),
                NameObject("/Subtype"): NameObject("/Link"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Border"): ArrayObject(border_arr),
            }
        )
        if is_external:
            self[NameObject("/A")] = DictionaryObject(
                {
                    NameObject("/S"): NameObject("/URI"),
                    NameObject("/Type"): NameObject("/Action"),
                    NameObject("/URI"): TextStringObject(url),
                }
            )
        if is_internal:
            # This needs to be updated later!
            dest_deferred = DictionaryObject(
                {
                    "target_page_index": NumberObject(target_page_index),
                    "fit": NameObject(fit.fit_type),
                    "fit_args": fit.fit_args,
                }
            )
            self[NameObject("/Dest")] = dest_deferred


class Popup(AnnotationDictionary):
    def __init__(
        self,
        *,
        rect: Union[RectangleObject, tuple[float, float, float, float]],
        parent: Optional[DictionaryObject] = None,
        open: bool = False,
        **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self.update(
            {
                NameObject("/Subtype"): NameObject("/Popup"),
                NameObject("/Rect"): RectangleObject(rect),
                NameObject("/Open"): BooleanObject(open),
            }
        )
        if parent:
            # This needs to be an indirect object
            try:
                self[NameObject("/Parent")] = parent.indirect_reference
            except AttributeError:
                from .._utils import logger_warning  # noqa: PLC0415

                logger_warning(
                    "Unregistered Parent object : No Parent field set",
                    __name__,
                )


================================================
FILE: pypdf/constants.py
================================================
"""Various constants, enums, and flags to aid readability."""

from enum import Enum, IntFlag, auto, unique


class StrEnum(str, Enum):  # Once we are on Python 3.11+: enum.StrEnum
    def __str__(self) -> str:
        return str(self.value)


class Core:
    """Keywords that don't quite belong anywhere else."""

    OUTLINES = "/Outlines"
    THREADS = "/Threads"
    PAGE = "/Page"
    PAGES = "/Pages"
    CATALOG = "/Catalog"


class TrailerKeys:
    SIZE = "/Size"
    PREV = "/Prev"
    ROOT = "/Root"
    ENCRYPT = "/Encrypt"
    INFO = "/Info"
    ID = "/ID"


class CatalogAttributes:
    NAMES = "/Names"
    DESTS = "/Dests"


class EncryptionDictAttributes:
    """
    Additional encryption dictionary entries for the standard security handler.

    Table 3.19, Page 122.
    Table 21 of the 2.0 manual.
    """

    R = "/R"  # number, required; revision of the standard security handler
    O = "/O"  # 32-byte string, required  # noqa: E741
    U = "/U"  # 32-byte string, required
    P = "/P"  # integer flag, required; permitted operations
    ENCRYPT_METADATA = "/EncryptMetadata"  # boolean flag, optional


class UserAccessPermissions(IntFlag):
    """
    Table 3.20 User access permissions.
    Table 22 of the 2.0 manual.
    """

    R1 = 1
    R2 = 2
    PRINT = 4
    MODIFY = 8
    EXTRACT = 16
    ADD_OR_MODIFY = 32
    R7 = 64
    R8 = 128
    FILL_FORM_FIELDS = 256
    EXTRACT_TEXT_AND_GRAPHICS = 512
    ASSEMBLE_DOC = 1024
    PRINT_TO_REPRESENTATION = 2048
    R13 = 2**12
    R14 = 2**13
    R15 = 2**14
    R16 = 2**15
    R17 = 2**16
    R18 = 2**17
    R19 = 2**18
    R20 = 2**19
    R21 = 2**20
    R22 = 2**21
    R23 = 2**22
    R24 = 2**23
    R25 = 2**24
    R26 = 2**25
    R27 = 2**26
    R28 = 2**27
    R29 = 2**28
    R30 = 2**29
    R31 = 2**30
    R32 = 2**31

    @classmethod
    def _is_reserved(cls, name: str) -> bool:
        """Check if the given name corresponds to a reserved flag entry."""
        return name.startswith("R") and name[1:].isdigit()

    @classmethod
    def _is_active(cls, name: str) -> bool:
        """Check if the given reserved name defaults to 1 = active."""
        return name not in {"R1", "R2"}

    def to_dict(self) -> dict[str, bool]:
        """Convert the given flag value to a corresponding verbose name mapping."""
        result: dict[str, bool] = {}
        for name, flag in UserAccessPermissions.__members__.items():
            if UserAccessPermissions._is_reserved(name):
                continue
            result[name.lower()] = (self & flag) == flag
        return result

    @classmethod
    def from_dict(cls, value: dict[str, bool]) -> "UserAccessPermissions":
        """Convert the verbose name mapping to the corresponding flag value."""
        value_copy = value.copy()
        result = cls(0)
        for name, flag in cls.__members__.items():
            if cls._is_reserved(name):
                # Reserved names have a required value. Use it.
                if cls._is_active(name):
                    result |= flag
                continue
            is_active = value_copy.pop(name.lower(), False)
            if is_active:
                result |= flag
        if value_copy:
            raise ValueError(f"Unknown dictionary keys: {value_copy!r}")
        return result

    @classmethod
    def all(cls) -> "UserAccessPermissions":
        return cls((2**32 - 1) - cls.R1 - cls.R2)


class Resources:
    """
    Table 3.30 Entries in a resource dictionary.
    Table 34 in the 2.0 reference.
    """

    EXT_G_STATE = "/ExtGState"  # dictionary, optional
    COLOR_SPACE = "/ColorSpace"  # dictionary, optional
    PATTERN = "/Pattern"  # dictionary, optional
    SHADING = "/Shading"  # dictionary, optional
    XOBJECT = "/XObject"  # dictionary, optional
    FONT = "/Font"  # dictionary, optional
    PROC_SET = "/ProcSet"  # array, optional
    PROPERTIES = "/Properties"  # dictionary, optional


class PagesAttributes:
    """§7.7.3.2 of the 1.7 and 2.0 reference."""

    TYPE = "/Type"  # name, required; must be /Pages
    PARENT = "/Parent"  # dictionary, required; indirect reference to pages object
    KIDS = "/Kids"  # array, required; List of indirect references
    COUNT = "/Count"
    # integer, required; the number of leaf nodes (page objects)
    # that are descendants of this node within the page tree


class PageAttributes:
    """§7.7.3.3 of the 1.7 and 2.0 reference."""

    TYPE = "/Type"  # name, required; must be /Page
    PARENT = "/Parent"  # dictionary, required; a pages object
    LAST_MODIFIED = (
        "/LastModified"  # date, optional; date and time of last modification
    )
    RESOURCES = "/Resources"  # dictionary, required if there are any
    MEDIABOX = "/MediaBox"  # rectangle, required; rectangle specifying page size
    CROPBOX = "/CropBox"  # rectangle, optional
    BLEEDBOX = "/BleedBox"  # rectangle, optional
    TRIMBOX = "/TrimBox"  # rectangle, optional
    ARTBOX = "/ArtBox"  # rectangle, optional
    BOX_COLOR_INFO = "/BoxColorInfo"  # dictionary, optional
    CONTENTS = "/Contents"  # stream or array, optional
    ROTATE = "/Rotate"  # integer, optional; page rotation in degrees
    GROUP = "/Group"  # dictionary, optional; page group
    THUMB = "/Thumb"  # stream, optional; indirect reference to image of the page
    B = "/B"  # array, optional
    DUR = "/Dur"  # number, optional
    TRANS = "/Trans"  # dictionary, optional
    ANNOTS = "/Annots"  # array, optional; an array of annotations
    AA = "/AA"  # dictionary, optional
    METADATA = "/Metadata"  # stream, optional
    PIECE_INFO = "/PieceInfo"  # dictionary, optional
    STRUCT_PARENTS = "/StructParents"  # integer, optional
    ID = "/ID"  # byte string, optional
    PZ = "/PZ"  # number, optional
    SEPARATION_INFO = "/SeparationInfo"  # dictionary, optional
    TABS = "/Tabs"  # name, optional
    TEMPLATE_INSTANTIATED = "/TemplateInstantiated"  # name, optional
    PRES_STEPS = "/PresSteps"  # dictionary, optional
    USER_UNIT = "/UserUnit"  # number, optional
    VP = "/VP"  # dictionary, optional
    AF = "/AF"  # array of dictionaries, optional
    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
    D_PART = "/DPart"  # dictionary, required, if this page is within the range of a DPart, not permitted otherwise


class FileSpecificationDictionaryEntries:
    """Table 3.41 Entries in a file specification dictionary."""

    Type = "/Type"
    FS = "/FS"  # The name of the file system to be used to interpret this file specification
    F = "/F"  # A file specification string of the form described in §3.10.1
    UF = "/UF"  # A Unicode string of the file as described in §3.10.1
    DOS = "/DOS"
    Mac = "/Mac"
    Unix = "/Unix"
    ID = "/ID"
    V = "/V"
    EF = "/EF"  # dictionary, containing a subset of the keys F, UF, DOS, Mac, and Unix
    RF = "/RF"  # dictionary, containing arrays of /EmbeddedFile
    DESC = "/Desc"  # description of the file
    Cl = "/Cl"


class StreamAttributes:
    """
    Table 4.2.
    Table 5 in the 2.0 reference.
    """

    LENGTH = "/Length"  # integer, required
    FILTER = "/Filter"  # name or array of names, optional
    DECODE_PARMS = "/DecodeParms"  # variable, optional -- 'decodeParams is wrong


@unique
class FilterTypes(StrEnum):
    """§7.4 of the 1.7 and 2.0 references."""

    ASCII_HEX_DECODE = "/ASCIIHexDecode"  # abbreviation: AHx
    ASCII_85_DECODE = "/ASCII85Decode"  # abbreviation: A85
    LZW_DECODE = "/LZWDecode"  # abbreviation: LZW
    FLATE_DECODE = "/FlateDecode"  # abbreviation: Fl
    RUN_LENGTH_DECODE = "/RunLengthDecode"  # abbreviation: RL
    CCITT_FAX_DECODE = "/CCITTFaxDecode"  # abbreviation: CCF
    DCT_DECODE = "/DCTDecode"  # abbreviation: DCT
    JPX_DECODE = "/JPXDecode"
    JBIG2_DECODE = "/JBIG2Decode"


class FilterTypeAbbreviations:
    """§8.9.7 of the 1.7 and 2.0 references."""

    AHx = "/AHx"
    A85 = "/A85"
    LZW = "/LZW"
    FL = "/Fl"
    RL = "/RL"
    CCF = "/CCF"
    DCT = "/DCT"


class LzwFilterParameters:
    """
    Table 4.4.
    Table 8 in the 2.0 reference.
    """

    PREDICTOR = "/Predictor"  # integer
    COLORS = "/Colors"  # integer
    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer
    COLUMNS = "/Columns"  # integer
    EARLY_CHANGE = "/EarlyChange"  # integer


class CcittFaxDecodeParameters:
    """
    Table 4.5.
    Table 11 in the 2.0 reference.
    """

    K = "/K"  # integer
    END_OF_LINE = "/EndOfLine"  # boolean
    ENCODED_BYTE_ALIGN = "/EncodedByteAlign"  # boolean
    COLUMNS = "/Columns"  # integer
    ROWS = "/Rows"  # integer
    END_OF_BLOCK = "/EndOfBlock"  # boolean
    BLACK_IS_1 = "/BlackIs1"  # boolean
    DAMAGED_ROWS_BEFORE_ERROR = "/DamagedRowsBeforeError"  # integer


class ImageAttributes:
    """§11.6.5 of the 1.7 and 2.0 references."""

    TYPE = "/Type"  # name, required; must be /XObject
    SUBTYPE = "/Subtype"  # name, required; must be /Image
    NAME = "/Name"  # name, required
    WIDTH = "/Width"  # integer, required
    HEIGHT = "/Height"  # integer, required
    BITS_PER_COMPONENT = "/BitsPerComponent"  # integer, required
    COLOR_SPACE = "/ColorSpace"  # name, required
    DECODE = "/Decode"  # array, optional
    INTENT = "/Intent"  # string, optional
    INTERPOLATE = "/Interpolate"  # boolean, optional
    IMAGE_MASK = "/ImageMask"  # boolean, optional
    MASK = "/Mask"  # 1-bit image mask stream
    S_MASK = "/SMask"  # dictionary or name, optional


class ColorSpaces:
    DEVICE_RGB = "/DeviceRGB"
    DEVICE_CMYK = "/DeviceCMYK"
    DEVICE_GRAY = "/DeviceGray"


class TypArguments:
    """Table 8.2 of the PDF 1.7 reference."""

    LEFT = "/Left"
    RIGHT = "/Right"
    BOTTOM = "/Bottom"
    TOP = "/Top"


class TypFitArguments:
    """Table 8.2 of the PDF 1.7 reference."""

    XYZ = "/XYZ"
    FIT = "/Fit"
    FIT_H = "/FitH"
    FIT_V = "/FitV"
    FIT_R = "/FitR"
    FIT_B = "/FitB"
    FIT_BH = "/FitBH"
    FIT_BV = "/FitBV"


class GoToActionArguments:
    S = "/S"  # name, required: type of action
    D = "/D"  # name, byte string, or array, required: destination to jump to
    SD = "/SD"  # array, optional: structure destination to jump to


class AnnotationDictionaryAttributes:
    """Table 8.15 Entries common to all annotation dictionaries."""

    Type = "/Type"
    Subtype = "/Subtype"
    Rect = "/Rect"
    Contents = "/Contents"
    P = "/P"
    NM = "/NM"
    M = "/M"
    F = "/F"
    AP = "/AP"
    AS = "/AS"
    DA = "/DA"
    Border = "/Border"
    C = "/C"
    StructParent = "/StructParent"
    OC = "/OC"


class InteractiveFormDictEntries:
    Fields = "/Fields"
    NeedAppearances = "/NeedAppearances"
    SigFlags = "/SigFlags"
    CO = "/CO"
    DR = "/DR"
    DA = "/DA"
    Q = "/Q"
    XFA = "/XFA"


class FieldDictionaryAttributes:
    """
    Entries common to all field dictionaries (Table 8.69 PDF 1.7 reference)
    (*very partially documented here*).

    FFBits provides the constants used for `/Ff` from Table 8.70/8.75/8.77/8.79
    """

    FT = "/FT"  # name, required for terminal fields
    Parent = "/Parent"  # dictionary, required for children
    Kids = "/Kids"  # array, sometimes required
    T = "/T"  # text string, optional
    TU = "/TU"  # text string, optional
    TM = "/TM"  # text string, optional
    Ff = "/Ff"  # integer, optional
    V = "/V"  # text string or array, optional
    DV = "/DV"  # text string, optional
    AA = "/AA"  # dictionary, optional
    Opt = "/Opt"  # array, optional

    class FfBits(IntFlag):
        """
        Ease building /Ff flags
        Some entries may be specific to:

        * Text (Tx) (Table 8.75 PDF 1.7 reference)
        * Buttons (Btn) (Table 8.77 PDF 1.7 reference)
        * Choice (Ch) (Table 8.79 PDF 1.7 reference)
        """

        ReadOnly = 1 << 0
        """common to Tx/Btn/Ch in Table 8.70"""
        Required = 1 << 1
        """common to Tx/Btn/Ch in Table 8.70"""
        NoExport = 1 << 2
        """common to Tx/Btn/Ch in Table 8.70"""

        Multiline = 1 << 12
        """Tx"""
        Password = 1 << 13
        """Tx"""

        NoToggleToOff = 1 << 14
        """Btn"""
        Radio = 1 << 15
        """Btn"""
        Pushbutton = 1 << 16
        """Btn"""

        Combo = 1 << 17
        """Ch"""
        Edit = 1 << 18
        """Ch"""
        Sort = 1 << 19
        """Ch"""

        FileSelect = 1 << 20
        """Tx"""

        MultiSelect = 1 << 21
        """Tx"""

        DoNotSpellCheck = 1 << 22
        """Tx/Ch"""
        DoNotScroll = 1 << 23
        """Tx"""
        Comb = 1 << 24
        """Tx"""

        RadiosInUnison = 1 << 25
        """Btn"""

        RichText = 1 << 25
        """Tx"""

        CommitOnSelChange = 1 << 26
        """Ch"""

    @classmethod
    def attributes(cls) -> tuple[str, ...]:
        """
        Get a tuple of all the attributes present in a Field Dictionary.

        This method returns a tuple of all the attribute constants defined in
        the FieldDictionaryAttributes class. These attributes correspond to the
        entries that are common to all field dictionaries as specified in the
        PDF 1.7 reference.

        Returns:
            A tuple containing all the attribute constants.

        """
        return (
            cls.TM,
            cls.T,
            cls.FT,
            cls.Parent,
            cls.TU,
            cls.Ff,
            cls.V,
            cls.DV,
            cls.Kids,
            cls.AA,
        )

    @classmethod
    def attributes_dict(cls) -> dict[str, str]:
        """
        Get a dictionary of attribute keys and their human-readable names.

        This method returns a dictionary where the keys are the attribute
        constants defined in the FieldDictionaryAttributes class and the values
        are their corresponding human-readable names. These attributes
        correspond to the entries that are common to all field dictionaries as
        specified in the PDF 1.7 reference.

        Returns:
            A dictionary containing attribute keys and their names.

        """
        return {
            cls.FT: "Field Type",
            cls.Parent: "Parent",
            cls.T: "Field Name",
            cls.TU: "Alternate Field Name",
            cls.TM: "Mapping Name",
            cls.Ff: "Field Flags",
            cls.V: "Value",
            cls.DV: "Default Value",
        }


class CheckboxRadioButtonAttributes:
    """Table 8.76 Field flags common to all field types."""

    Opt = "/Opt"  # Options, Optional

    @classmethod
    def attributes(cls) -> tuple[str, ...]:
        """
        Get a tuple of all the attributes present in a Field Dictionary.

        This method returns a tuple of all the attribute constants defined in
        the CheckboxRadioButtonAttributes class. These attributes correspond to
        the entries that are common to all field dictionaries as specified in
        the PDF 1.7 reference.

        Returns:
            A tuple containing all the attribute constants.

        """
        return (cls.Opt,)

    @classmethod
    def attributes_dict(cls) -> dict[str, str]:
        """
        Get a dictionary of attribute keys and their human-readable names.

        This method returns a dictionary where the keys are the attribute
        constants defined in the CheckboxRadioButtonAttributes class and the
        values are their corresponding human-readable names. These attributes
        correspond to the entries that are common to all field dictionaries as
        specified in the PDF 1.7 reference.

        Returns:
            A dictionary containing attribute keys and their names.

        """
        return {
            cls.Opt: "Options",
        }


class FieldFlag(IntFlag):
    """Table 8.70 Field flags common to all field types."""

    READ_ONLY = 1
    REQUIRED = 2
    NO_EXPORT = 4


class DocumentInformationAttributes:
    """Table 10.2 Entries in the document information dictionary."""

    TITLE = "/Title"  # text string, optional
    AUTHOR = "/Author"  # text string, optional
    SUBJECT = "/Subject"  # text string, optional
    KEYWORDS = "/Keywords"  # text string, optional
    CREATOR = "/Creator"  # text string, optional
    PRODUCER = "/Producer"  # text string, optional
    CREATION_DATE = "/CreationDate"  # date, optional
    MOD_DATE = "/ModDate"  # date, optional
    TRAPPED = "/Trapped"  # name, optional


class PageLayouts:
    """
    Page 84, PDF 1.4 reference.
    Page 115, PDF 2.0 reference.
    """

    SINGLE_PAGE = "/SinglePage"
    ONE_COLUMN = "/OneColumn"
    TWO_COLUMN_LEFT = "/TwoColumnLeft"
    TWO_COLUMN_RIGHT = "/TwoColumnRight"
    TWO_PAGE_LEFT = "/TwoPageLeft"  # (PDF 1.5)
    TWO_PAGE_RIGHT = "/TwoPageRight"  # (PDF 1.5)


class GraphicsStateParameters:
    """Table 58 – Entries in a Graphics State Parameter Dictionary"""

    TYPE = "/Type"  # name, optional
    LW = "/LW"  # number, optional
    LC = "/LC"  # integer, optional
    LJ = "/LJ"  # integer, optional
    ML = "/ML"  # number, optional
    D = "/D"  # array, optional
    RI = "/RI"  # name, optional
    OP = "/OP"
    op = "/op"
    OPM = "/OPM"
    FONT = "/Font"  # array, optional
    BG = "/BG"
    BG2 = "/BG2"
    UCR = "/UCR"
    UCR2 = "/UCR2"
    TR = "/TR"
    TR2 = "/TR2"
    HT = "/HT"
    FL = "/FL"
    SM = "/SM"
    SA = "/SA"
    BM = "/BM"
    S_MASK = "/SMask"  # dictionary or name, optional
    CA = "/CA"
    ca = "/ca"
    AIS = "/AIS"
    TK = "/TK"


class CatalogDictionary:
    """§7.7.2 of the 1.7 and 2.0 references."""

    TYPE = "/Type"  # name, required; must be /Catalog
    VERSION = "/Version"  # name
    EXTENSIONS = "/Extensions"  # dictionary, optional; ISO 32000-1
    PAGES = "/Pages"  # dictionary, required
    PAGE_LABELS = "/PageLabels"  # number tree, optional
    NAMES = "/Names"  # dictionary, optional
    DESTS = "/Dests"  # dictionary, optional
    VIEWER_PREFERENCES = "/ViewerPreferences"  # dictionary, optional
    PAGE_LAYOUT = "/PageLayout"  # name, optional
    PAGE_MODE = "/PageMode"  # name, optional
    OUTLINES = "/Outlines"  # dictionary, optional
    THREADS = "/Threads"  # array, optional
    OPEN_ACTION = "/OpenAction"  # array or dictionary or name, optional
    AA = "/AA"  # dictionary, optional
    URI = "/URI"  # dictionary, optional
    ACRO_FORM = "/AcroForm"  # dictionary, optional
    METADATA = "/Metadata"  # stream, optional
    STRUCT_TREE_ROOT = "/StructTreeRoot"  # dictionary, optional
    MARK_INFO = "/MarkInfo"  # dictionary, optional
    LANG = "/Lang"  # text string, optional
    SPIDER_INFO = "/SpiderInfo"  # dictionary, optional
    OUTPUT_INTENTS = "/OutputIntents"  # array, optional
    PIECE_INFO = "/PieceInfo"  # dictionary, optional
    OC_PROPERTIES = "/OCProperties"  # dictionary, optional
    PERMS = "/Perms"  # dictionary, optional
    LEGAL = "/Legal"  # dictionary, optional
    REQUIREMENTS = "/Requirements"  # array, optional
    COLLECTION = "/Collection"  # dictionary, optional
    NEEDS_RENDERING = "/NeedsRendering"  # boolean, optional
    DSS = "/DSS"  # dictionary, optional
    AF = "/AF"  # array of dictionaries, optional
    D_PART_ROOT = "/DPartRoot"  # dictionary, optional


class OutlineFontFlag(IntFlag):
    """A class used as an enumerable flag for formatting an outline font."""

    italic = 1
    bold = 2


class PageLabelStyle:
    """
    Table 8.10 in the 1.7 reference.
    Table 161 in the 2.0 reference.
    """

    DECIMAL = "/D"  # Decimal Arabic numerals
    UPPERCASE_ROMAN = "/R"  # Uppercase Roman numerals
    LOWERCASE_ROMAN = "/r"  # Lowercase Roman numerals
    UPPERCASE_LETTER = "/A"  # Uppercase letters
    LOWERCASE_LETTER = "/a"  # Lowercase letters


class AnnotationFlag(IntFlag):
    """See §12.5.3 "Annotation Flags"."""

    INVISIBLE = 1
    HIDDEN = 2
    PRINT = 4
    NO_ZOOM = 8
    NO_ROTATE = 16
    NO_VIEW = 32
    READ_ONLY = 64
    LOCKED = 128
    TOGGLE_NO_VIEW = 256
    LOCKED_CONTENTS = 512


PDF_KEYS = (
    AnnotationDictionaryAttributes,
    CatalogAttributes,
    CatalogDictionary,
    CcittFaxDecodeParameters,
    CheckboxRadioButtonAttributes,
    ColorSpaces,
    Core,
    DocumentInformationAttributes,
    EncryptionDictAttributes,
    FieldDictionaryAttributes,
    FileSpecificationDictionaryEntries,
    FilterTypeAbbreviations,
    FilterTypes,
    GoToActionArguments,
    GraphicsStateParameters,
    ImageAttributes,
    InteractiveFormDictEntries,
    LzwFilterParameters,
    PageAttributes,
    PageLayouts,
    PagesAttributes,
    Resources,
    StreamAttributes,
    TrailerKeys,
    TypArguments,
    TypFitArguments,
)


class ImageType(IntFlag):
    NONE = 0
    XOBJECT_IMAGES = auto()
    INLINE_IMAGES = auto()
    DRAWING_IMAGES = auto()
    ALL = XOBJECT_IMAGES | INLINE_IMAGES | DRAWING_IMAGES
    IMAGES = ALL  # for consistency with ObjectDeletionFlag


_INLINE_IMAGE_VALUE_MAPPING = {
    "/G": "/DeviceGray",
    "/RGB": "/DeviceRGB",
    "/CMYK": "/DeviceCMYK",
    "/I": "/Indexed",
    "/AHx": "/ASCIIHexDecode",
    "/A85": "/ASCII85Decode",
    "/LZW": "/LZWDecode",
    "/Fl": "/FlateDecode",
    "/RL": "/RunLengthDecode",
    "/CCF": "/CCITTFaxDecode",
    "/DCT": "/DCTDecode",
    "/DeviceGray": "/DeviceGray",
    "/DeviceRGB": "/DeviceRGB",
    "/DeviceCMYK": "/DeviceCMYK",
    "/Indexed": "/Indexed",
    "/ASCIIHexDecode": "/ASCIIHexDecode",
    "/ASCII85Decode": "/ASCII85Decode",
    "/LZWDecode": "/LZWDecode",
    "/FlateDecode": "/FlateDecode",
    "/RunLengthDecode": "/RunLengthDecode",
    "/CCITTFaxDecode": "/CCITTFaxDecode",
    "/DCTDecode": "/DCTDecode",
    "/RelativeColorimetric": "/RelativeColorimetric",
}

_INLINE_IMAGE_KEY_MAPPING = {
    "/BPC": "/BitsPerComponent",
    "/CS": "/ColorSpace",
    "/D": "/Decode",
    "/DP": "/DecodeParms",
    "/F": "/Filter",
    "/H": "/Height",
    "/W": "/Width",
    "/I": "/Interpolate",
    "/Intent": "/Intent",
    "/IM": "/ImageMask",
    "/BitsPerComponent": "/BitsPerComponent",
    "/ColorSpace": "/ColorSpace",
    "/Decode": "/Decode",
    "/DecodeParms": "/DecodeParms",
    "/Filter": "/Filter",
    "/Height": "/Height",
    "/Width": "/Width",
    "/Interpolate": "/Interpolate",
    "/ImageMask": "/ImageMask",
}


class AFRelationship:
    """
    Associated file relationship types, defining the relationship between
    the PDF component and the associated file.

    Defined in table 43 of the PDF 2.0 reference.
    """

    SOURCE = "/Source"  # Original content source
    DATA = "/Data"  # Base data for visual presentation
    ALTERNATIVE = "/Alternative"  # Alternative content representation
    SUPPLEMENT = "/Supplement"  # Supplemental representation of original source/data
    ENCRYPTED_PAYLOAD = "/EncryptedPayload"  # Encrypted payload document
    FORM_DATA = "/FormData"  # Data associated with AcroForm of this PDF
    SCHEMA = "/Schema"  # Schema definition for associated object
    UNSPECIFIED = "/Unspecified"  # Not known or cannot be described with values


class BorderStyles:
    """
    A class defining border styles used in PDF documents.

    Defined in table 168 of the PDF 2.0 reference.
    """

    BEVELED = "/B"
    DASHED = "/D"
    INSET = "/I"
    SOLID = "/S"
    UNDERLINED = "/U"


class FontFlags(IntFlag):
    """
    A class defining font flags in PDF document font descriptor resources.

    Defined in table 121 of the PDF 2.0 reference.
    """

    FIXED_PITCH = 1 << 0
    SERIF = 1 << 1
    SYMBOLIC = 1 << 2
    SCRIPT = 1 << 3
    NONSYMBOLIC = 1 << 5
    ITALIC = 1 << 6
    ALL_CAP = 1 << 16
    SMALL_CAP = 1 << 17
    FORCE_BOLD = 1 << 18


================================================
FILE: pypdf/errors.py
================================================
"""
All errors/exceptions pypdf raises and all of the warnings it uses.

Please note that broken PDF files might cause other Exceptions.
"""


class DeprecationError(Exception):
    """Raised when a deprecated feature is used."""


class DependencyError(Exception):
    """
    Raised when a required dependency (a library or module that pypdf depends on)
    is not available or cannot be imported.
    """


class PyPdfError(Exception):
    """Base class for all exceptions raised by pypdf."""


class PdfReadError(PyPdfError):
    """Raised when there is an issue reading a PDF file."""


class PageSizeNotDefinedError(PyPdfError):
    """Raised when the page size of a PDF document is not defined."""


class PdfReadWarning(UserWarning):
    """Issued when there is a potential issue reading a PDF file, but it can still be read."""


class PdfStreamError(PdfReadError):
    """Raised when there is an issue reading the stream of data in a PDF file."""


class ParseError(PyPdfError):
    """
    Raised when there is an issue parsing (analyzing and understanding the
    structure and meaning of) a PDF file.
    """


class FileNotDecryptedError(PdfReadError):
    """
    Raised when a PDF file that has been encrypted
    (meaning it requires a password to be accessed) has not been successfully
    decrypted.
    """


class WrongPasswordError(FileNotDecryptedError):
    """Raised when the wrong password is used to try to decrypt an encrypted PDF file."""


class EmptyFileError(PdfReadError):
    """Raised when a PDF file is empty or has no content."""


class EmptyImageDataError(PyPdfError):
    """Raised when trying to process an image that has no data."""


STREAM_TRUNCATED_PREMATURELY = "Stream has ended unexpectedly"


class LimitReachedError(PyPdfError):
    """Raised when a limit is reached."""


class XmpDocumentError(PyPdfError, RuntimeError):
    """Raised when the XMP XML document context is invalid or missing."""


================================================
FILE: pypdf/filters.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


"""
Implementation of stream filters; §7.4 Filters of the PDF 2.0 specification.

§8.9.7 Inline images of the PDF 2.0 specification has abbreviations that can be
used for the names of filters in an inline image object.
"""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

import binascii
import math
import os
import shutil
import struct
import subprocess
import zlib
from base64 import a85decode
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Optional, Union, cast

from ._codecs._codecs import LzwCodec as _LzwCodec
from ._utils import (
    WHITESPACES_AS_BYTES,
    deprecate,
    deprecation_with_replacement,
    logger_warning,
)
from .constants import CcittFaxDecodeParameters as CCITT
from .constants import FilterTypeAbbreviations as FTA
from .constants import FilterTypes as FT
from .constants import ImageAttributes as IA
from .constants import LzwFilterParameters as LZW
from .constants import StreamAttributes as SA
from .errors import DependencyError, LimitReachedError, PdfReadError, PdfStreamError
from .generic import (
    ArrayObject,
    DictionaryObject,
    IndirectObject,
    NullObject,
    NumberObject,
    StreamObject,
    is_null_or_none,
)

MAX_DECLARED_STREAM_LENGTH = 75_000_000
MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH = 75_000_000

JBIG2_MAX_OUTPUT_LENGTH = 75_000_000
LZW_MAX_OUTPUT_LENGTH = 75_000_000
RUN_LENGTH_MAX_OUTPUT_LENGTH = 75_000_000
ZLIB_MAX_OUTPUT_LENGTH = 75_000_000
ZLIB_MAX_RECOVERY_INPUT_LENGTH = 5_000_000

# Reuse cached 1-byte values in the fallback loop to avoid per-byte allocations.
_SINGLE_BYTES = tuple(bytes((i,)) for i in range(256))


def _decompress_with_limit(data: bytes) -> bytes:
    decompressor = zlib.decompressobj()
    result = decompressor.decompress(data, max_length=ZLIB_MAX_OUTPUT_LENGTH)
    if decompressor.unconsumed_tail:
        raise LimitReachedError(
            f"Limit reached while decompressing. {len(decompressor.unconsumed_tail)} bytes remaining."
        )
    return result


def decompress(data: bytes) -> bytes:
    """
    Decompress the given data using zlib.

    Attempts to decompress the input data using zlib.
    If the decompression fails due to a zlib error, it falls back
    to using a decompression object with a larger window size.

    Please note that the output length is limited to avoid memory
    issues. If you need to process larger content streams, consider
    adapting ``pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH``. In case you
    are only dealing with trusted inputs and/or want to disable these
    limits, set the value to `0`.

    Args:
        data: The input data to be decompressed.

    Returns:
        The decompressed data.

    """
    try:
        return _decompress_with_limit(data)
    except zlib.error:
        # First quick approach: There are known issues with faulty added bytes to the
        # tail of the encoded stream from early Adobe Distiller or Pitstop versions
        # with CR char as the default line separator (assumed by reverse engineering)
        # that breaks the decoding process in the end.
        #
        # Try first to cut off some of the tail byte by byte, but limited to not
        # iterate through too many loops and kill the performance for large streams,
        # to then allow the final fallback to run. Added this intermediate attempt,
        # because starting from the head of the stream byte by byte kills completely
        # the performance for large streams (e.g., 6 MB) with the tail-byte-issue
        # and takes ages. This solution is really fast:
        max_tail_cut_off_bytes: int = 8
        for i in range(1, min(max_tail_cut_off_bytes + 1, len(data))):
            try:
                return _decompress_with_limit(data[:-i])
            except zlib.error:
                pass

        # If still failing, then try with increased window size.
        decompressor = zlib.decompressobj(zlib.MAX_WBITS | 32)
        result_str = b""
        remaining_limit = ZLIB_MAX_OUTPUT_LENGTH
        data_length = len(data)
        known_errors = set()
        for index in range(data_length):
            chunk = _SINGLE_BYTES[data[index]]
            try:
                decompressed = decompressor.decompress(chunk, max_length=remaining_limit)
                result_str += decompressed
                remaining_limit -= len(decompressed)
                if remaining_limit <= 0:
                    raise LimitReachedError(
                        f"Limit reached while decompressing. {data_length - index} bytes remaining."
                    )
            except zlib.error as error:
                if index > ZLIB_MAX_RECOVERY_INPUT_LENGTH:
                    raise LimitReachedError(
                        f"Recovery limit reached while decompressing. {data_length - index} bytes remaining."
                    )
                error_str = str(error)
                if error_str in known_errors:
                    continue
                logger_warning(error_str, __name__)
                known_errors.add(error_str)
        return result_str


class FlateDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode data which is flate-encoded.

        Args:
          data: Flate-encoded data.
          decode_parms: Additional decoding parameters.

        Returns:
          The flate-decoded data.

        Raises:
          PdfReadError: Unsupported parameters have been found.

        """
        str_data = decompress(data)

        if isinstance(decode_parms, DictionaryObject):
            parameters = decode_parms
        else:
            parameters = DictionaryObject()

        predictor = parameters.get("/Predictor", 1)

        # predictor 1 == no predictor
        if predictor != 1:
            columns, colors, bits_per_component = FlateDecode._get_parameters(parameters)

            # PNG predictor can vary by row and so is the lead byte on each row
            rowlength = (
                math.ceil(columns * colors * bits_per_component / 8) + 1
            )  # number of bytes

            # TIFF prediction:
            if predictor == 2:
                rowlength -= 1  # remove the predictor byte
                bpp = rowlength // columns
                str_data = bytearray(str_data)
                for i in range(len(str_data)):
                    if i % rowlength >= bpp:
                        str_data[i] = (str_data[i] + str_data[i - bpp]) % 256
                str_data = bytes(str_data)
            # PNG prediction:
            elif 10 <= predictor <= 15:
                str_data = FlateDecode._decode_png_prediction(
                    str_data, columns, rowlength
                )
            else:
                raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}")
        return str_data

    @staticmethod
    def _get_parameters(parameters: DictionaryObject) -> tuple[int, int, int]:
        # For details, see table 8 of ISO 32000-2:2020.
        def get(key: str, default: int) -> int:
            _value = parameters.get(key, NumberObject(default)).get_object()
            if not isinstance(_value, int) or _value < 1:
                raise PdfReadError(f"Expected positive number for {key}, got {_value}!")
            return _value

        columns = get(key=LZW.COLUMNS, default=1)
        colors = get(key=LZW.COLORS, default=1)
        bits_per_component = get(key=LZW.BITS_PER_COMPONENT, default=8)
        return columns, colors, bits_per_component

    @staticmethod
    def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
        # PNG prediction can vary from row to row
        if (remainder := len(data) % rowlength) != 0:
            logger_warning("Image data is not rectangular. Adding padding.", __name__)
            data += b"\x00" * (rowlength - remainder)
            assert len(data) % rowlength == 0
        output = []
        prev_rowdata = (0,) * rowlength
        bpp = (rowlength - 1) // columns  # recomputed locally to not change params
        for row in range(0, len(data), rowlength):
            rowdata: list[int] = list(data[row : row + rowlength])
            filter_byte = rowdata[0]

            if filter_byte == 0:
                # PNG None Predictor
                pass
            elif filter_byte == 1:
                # PNG Sub Predictor
                for i in range(bpp + 1, rowlength):
                    rowdata[i] = (rowdata[i] + rowdata[i - bpp]) % 256
            elif filter_byte == 2:
                # PNG Up Predictor
                for i in range(1, rowlength):
                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
            elif filter_byte == 3:
                # PNG Average Predictor
                for i in range(1, bpp + 1):
                    floor = prev_rowdata[i] // 2
                    rowdata[i] = (rowdata[i] + floor) % 256
                for i in range(bpp + 1, rowlength):
                    left = rowdata[i - bpp]
                    floor = (left + prev_rowdata[i]) // 2
                    rowdata[i] = (rowdata[i] + floor) % 256
            elif filter_byte == 4:
                # PNG Paeth Predictor
                for i in range(1, bpp + 1):
                    rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
                for i in range(bpp + 1, rowlength):
                    left = rowdata[i - bpp]
                    up = prev_rowdata[i]
                    up_left = prev_rowdata[i - bpp]

                    p = left + up - up_left
                    dist_left = abs(p - left)
                    dist_up = abs(p - up)
                    dist_up_left = abs(p - up_left)

                    if dist_left <= dist_up and dist_left <= dist_up_left:
                        paeth = left
                    elif dist_up <= dist_up_left:
                        paeth = up
                    else:
                        paeth = up_left

                    rowdata[i] = (rowdata[i] + paeth) % 256
            else:
                raise PdfReadError(
                    f"Unsupported PNG filter {filter_byte!r}"
                )  # pragma: no cover
            prev_rowdata = tuple(rowdata)
            output.extend(rowdata[1:])
        return bytes(output)

    @staticmethod
    def encode(data: bytes, level: int = -1) -> bytes:
        """
        Compress the input data using zlib.

        Args:
            data: The data to be compressed.
            level: See https://docs.python.org/3/library/zlib.html#zlib.compress

        Returns:
            The compressed data.

        """
        return zlib.compress(data, level)


class ASCIIHexDecode:
    """
    The ASCIIHexDecode filter decodes data that has been encoded in ASCII
    hexadecimal form into a base-7 ASCII format.
    """

    @staticmethod
    def decode(
        data: Union[str, bytes],
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an ASCII-Hex encoded data stream.

        Args:
          data: a str sequence of hexadecimal-encoded values to be
            converted into a base-7 ASCII string
          decode_parms: this filter does not use parameters.

        Returns:
          A string conversion in base-7 ASCII, where each of its values
          v is such that 0 <= ord(v) <= 127.

        Raises:
          PdfStreamError:

        """
        if isinstance(data, str):
            data = data.encode()

        # Stop at EOD
        eod = data.find(b">")
        if eod == -1:
            logger_warning(
                "missing EOD in ASCIIHexDecode, check if output is OK",
                __name__,
            )
            hex_data = data
        else:
            hex_data = data[:eod]

        # Remove whitespace
        hex_data = b"".join(hex_data.split())

        # Pad if odd length
        if len(hex_data) % 2 == 1:
            hex_data += b"0"

        return binascii.unhexlify(hex_data)


class RunLengthDecode:
    """
    The RunLengthDecode filter decodes data that has been encoded in a
    simple byte-oriented format based on run length.
    The encoded data is a sequence of runs, where each run consists of
    a length byte followed by 1 to 128 bytes of data. If the length byte is
    in the range 0 to 127,
    the following length + 1 (1 to 128) bytes are copied literally during
    decompression.
    If length is in the range 129 to 255, the following single byte is to be
    copied 257 − length (2 to 128) times during decompression. A length value
    of 128 denotes EOD.
    """

    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode a run length encoded data stream.

        Args:
          data: a bytes sequence of length/data
          decode_parms: this filter does not use parameters.

        Returns:
          A bytes decompressed sequence.

        Raises:
          PdfStreamError:

        """
        lst = []
        index = 0
        data_length = len(data)
        total_length = 0
        while True:
            if index >= data_length:
                logger_warning(
                    "missing EOD in RunLengthDecode, check if output is OK", __name__
                )
                break  # Reached end of string without an EOD
            length = data[index]
            index += 1
            if length == 128:
                if index < data_length:
                    # We should first check, if we have an inner stream from a multi-encoded
                    # stream with a faulty trailing newline that we can decode properly.
                    # We will just ignore the last byte and raise a warning ...
                    if (index == data_length - 1) and (data[index : index + 1] == b"\n"):
                        logger_warning(
                            "Found trailing newline in stream data, check if output is OK", __name__
                        )
                        break
                    # Raising an exception here breaks all image extraction for this file, which might
                    # not be desirable. For this reason, indicate that the output is most likely wrong,
                    # as processing stopped after the first EOD marker. See issue #3517.
                    logger_warning(
                        "Early EOD in RunLengthDecode, check if output is OK", __name__
                    )
                break
            if length < 128:
                length += 1
                lst.append(data[index : (index + length)])
                index += length
            else:  # >128
                length = 257 - length
                lst.append(bytes((data[index],)) * length)
                index += 1
            total_length += length
            if total_length > RUN_LENGTH_MAX_OUTPUT_LENGTH:
                raise LimitReachedError("Limit reached while decompressing.")
        return b"".join(lst)


class LZWDecode:
    class Decoder:
        STOP = 257
        CLEARDICT = 256

        def __init__(self, data: bytes) -> None:
            self.data = data

        def decode(self) -> bytes:
            return _LzwCodec(max_output_length=LZW_MAX_OUTPUT_LENGTH).decode(self.data)

    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an LZW encoded data stream.

        Args:
          data: ``bytes`` or ``str`` text to decode.
          decode_parms: a dictionary of parameter values.

        Returns:
          decoded data.

        """
        # decode_parms is unused here
        return LZWDecode.Decoder(data).decode()


class ASCII85Decode:
    """Decodes string ASCII85-encoded data into a byte format."""

    @staticmethod
    def decode(
        data: Union[str, bytes],
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decode an Ascii85 encoded data stream.

        Args:
          data: ``bytes`` or ``str`` text to decode.
          decode_parms: this filter does not use parameters.

        Returns:
          decoded data.

        """
        if isinstance(data, str):
            data = data.encode()
        data = data.strip(WHITESPACES_AS_BYTES)
        if len(data) > 2 and data.endswith(b">"):
            data = data[:-1].rstrip(WHITESPACES_AS_BYTES) + data[-1:]
        try:
            return a85decode(data, adobe=True, ignorechars=WHITESPACES_AS_BYTES)
        except ValueError as error:
            if error.args[0] == "Ascii85 encoded byte sequences must end with b'~>'":
                logger_warning("Ignoring missing Ascii85 end marker.", __name__)
                return a85decode(data, adobe=False, ignorechars=WHITESPACES_AS_BYTES)
            raise


class DCTDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decompresses data encoded using a DCT (discrete cosine transform)
        technique based on the JPEG standard (IS0/IEC 10918),
        reproducing image sample data that approximates the original data.

        Args:
          data: text to decode.
          decode_parms: this filter does not use parameters.

        Returns:
          decoded data.

        """
        return data


class JPXDecode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        """
        Decompresses data encoded using the wavelet-based JPEG 2000 standard,
        reproducing the original image data.

        Args:
          data: text to decode.
          decode_parms: this filter does not use parameters.

        Returns:
          decoded data.

        """
        return data


@dataclass
class CCITTParameters:
    """§7.4.6, optional parameters for the CCITTFaxDecode filter."""

    K: int = 0
    columns: int = 1728
    rows: int = 0
    EndOfLine: Union[bool, None] = False
    EncodedByteAlign: Union[bool, None] = False
    EndOfBlock: Union[bool, None] = True
    BlackIs1: bool = False
    DamagedRowsBeforeError: Union[int, None] = 0

    @property
    def group(self) -> int:
        if self.K < 0:
            # Pure two-dimensional encoding (Group 4)
            CCITTgroup = 4
        else:
            # K == 0: Pure one-dimensional encoding (Group 3, 1-D)
            # K > 0: Mixed one- and two-dimensional encoding (Group 3, 2-D)
            CCITTgroup = 3
        return CCITTgroup


def __create_old_class_instance(
    K: int = 0,
    columns: int = 0,
    rows: int = 0
) -> CCITTParameters:
    deprecation_with_replacement("CCITParameters", "CCITTParameters", "6.0.0")
    return CCITTParameters(K, columns, rows)


# Create an alias for the old class name
CCITParameters = __create_old_class_instance


class CCITTFaxDecode:
    """
    §7.4.6, CCITTFaxDecode filter (ISO 32000).

    Either Group 3 or Group 4 CCITT facsimile (fax) encoding.
    CCITT encoding is bit-oriented, not byte-oriented.

    §7.4.6, optional parameters for the CCITTFaxDecode filter.
    """

    @staticmethod
    def _get_parameters(
        parameters: Union[None, ArrayObject, DictionaryObject, IndirectObject],
        rows: Union[int, IndirectObject],
    ) -> CCITTParameters:
        ccitt_parameters = CCITTParameters(rows=int(rows))
        if parameters:
            parameters_unwrapped = cast(
                Union[ArrayObject, DictionaryObject], parameters.get_object()
            )
            if isinstance(parameters_unwrapped, ArrayObject):
                for decode_parm in parameters_unwrapped:
                    if CCITT.K in decode_parm:
                        ccitt_parameters.K = decode_parm[CCITT.K].get_object()
                    if CCITT.COLUMNS in decode_parm:
                        ccitt_parameters.columns = decode_parm[CCITT.COLUMNS].get_object()
                    if CCITT.BLACK_IS_1 in decode_parm:
                        ccitt_parameters.BlackIs1 = decode_parm[CCITT.BLACK_IS_1].get_object().value
            else:
                if CCITT.K in parameters_unwrapped:
                    ccitt_parameters.K = parameters_unwrapped[CCITT.K].get_object()  # type: ignore
                if CCITT.COLUMNS in parameters_unwrapped:
                    ccitt_parameters.columns = parameters_unwrapped[CCITT.COLUMNS].get_object()  # type: ignore
                if CCITT.BLACK_IS_1 in parameters_unwrapped:
                    ccitt_parameters.BlackIs1 = parameters_unwrapped[CCITT.BLACK_IS_1].get_object().value  # type: ignore
        return ccitt_parameters

    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        height: int = 0,
        **kwargs: Any,
    ) -> bytes:
        params = CCITTFaxDecode._get_parameters(decode_parms, height)

        img_size = len(data)
        tiff_header_struct = "<2shlh" + "hhll" * 8 + "h"
        tiff_header = struct.pack(
            tiff_header_struct,
            b"II",  # Byte order indication: Little endian
            42,     # Version number (always 42)
            8,      # Offset to the first image file directory (IFD)
            8,      # Number of tags in IFD
            256,    # ImageWidth, LONG, 1, width
            4,
            1,
            params.columns,
            257,    # ImageLength, LONG, 1, length
            4,
            1,
            params.rows,
            258,    # BitsPerSample, SHORT, 1, 1
            3,
            1,
            1,
            259,    # Compression, SHORT, 1, compression Type
            3,
            1,
            params.group,
            262,    # Thresholding, SHORT, 1, 0 = BlackIs1
            3,
            1,
            int(params.BlackIs1),
            273,    # StripOffsets, LONG, 1, length of header
            4,
            1,
              struct.calcsize(
                tiff_header_struct
            ),
            278,    # RowsPerStrip, LONG, 1, length
            4,
            1,
            params.rows,
            279,    # StripByteCounts, LONG, 1, size of image
            4,
            1,
            img_size,
            0,      # last IFD
        )

        return tiff_header + data


JBIG2DEC_BINARY = shutil.which("jbig2dec")


class JBIG2Decode:
    @staticmethod
    def decode(
        data: bytes,
        decode_parms: Optional[DictionaryObject] = None,
        **kwargs: Any,
    ) -> bytes:
        if JBIG2DEC_BINARY is None:
            raise DependencyError("jbig2dec binary is not available.")

        with TemporaryDirectory() as tempdir:
            directory = Path(tempdir)
            paths: list[Path] = []

            if decode_parms and "/JBIG2Globals" in decode_parms:
                jbig2_globals = decode_parms["/JBIG2Globals"]
                if not is_null_or_none(jbig2_globals) and not is_null_or_none(pointer := jbig2_globals.get_object()):
                    assert pointer is not None, "mypy"
                    if isinstance(pointer, StreamObject):
                        path = directory.joinpath("globals.jbig2")
                        path.write_bytes(pointer.get_data())
                        paths.append(path)

            path = directory.joinpath("image.jbig2")
            path.write_bytes(data)
            paths.append(path)

            environment = os.environ.copy()
            environment["LC_ALL"] = "C"
            result = subprocess.run(  # noqa: S603
                [
                    JBIG2DEC_BINARY,
                    "--embedded",
                    "--format", "png",
                    "--output", "-",
                    "-M", str(JBIG2_MAX_OUTPUT_LENGTH),
                    *paths
                ],
                capture_output=True,
                env=environment,
            )
            if b"unrecognized option '--embedded'" in result.stderr or b"unrecognized option '-M'" in result.stderr:
                raise DependencyError("jbig2dec>=0.19 is required.")
            if b"FATAL ERROR failed to allocate image data buffer" in result.stderr:
                raise LimitReachedError(
                    f"Memory limit reached while reading JBIG2 data:\n{result.stderr.decode('utf-8')}"
                )
            if result.stderr:
                for line in result.stderr.decode("utf-8").splitlines():
                    logger_warning(line, __name__)
            if result.returncode != 0:
                raise PdfStreamError(f"Unable to decode JBIG2 data. Exit code: {result.returncode}")
        return result.stdout

    @staticmethod
    def _is_binary_compatible() -> bool:
        if not JBIG2DEC_BINARY:  # pragma: no cover
            return False
        result = subprocess.run(  # noqa: S603
            [JBIG2DEC_BINARY, "--version"],
            capture_output=True,
            text=True,
        )
        version = result.stdout.split(" ", maxsplit=1)[1]

        from ._utils import Version  # noqa: PLC0415
        return Version(version) >= Version("0.19")


def _deprecate_inline_image_filters(filter_name: str, old_name: str, new_name: str) -> None:
    if filter_name != old_name:
        return
    deprecate(
        f"The filter name {old_name} is deprecated and will be removed in pypdf 7.0.0. Use {new_name} instead.",
        4,
    )


def decode_stream_data(stream: StreamObject) -> bytes:
    """
    Decode the stream data based on the specified filters.

    This function decodes the stream data using the filters provided in the
    stream.

    Args:
        stream: The input stream object containing the data and filters.

    Returns:
        The decoded stream data.

    Raises:
        NotImplementedError: If an unsupported filter type is encountered.

    """
    filters = stream.get(SA.FILTER, ())
    if isinstance(filters, IndirectObject):
        filters = cast(ArrayObject, filters.get_object())
    if not isinstance(filters, ArrayObject):
        # We have a single filter instance
        filters = (filters,)
    decode_parms = stream.get(SA.DECODE_PARMS, ({},) * len(filters))
    if not isinstance(decode_parms, (list, tuple)):
        decode_parms = (decode_parms,)
    data: bytes = stream._data
    # If there is no data to decode, we should not try to decode it.
    if not data:
        return data
    for filter_name, params in zip(filters, decode_parms):
        if isinstance(params, NullObject):
            params = {}
        if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.AHx, new_name=FT.ASCII_HEX_DECODE)
            data = ASCIIHexDecode.decode(data)
        elif filter_name in (FT.ASCII_85_DECODE, FTA.A85):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.A85, new_name=FT.ASCII_85_DECODE)
            data = ASCII85Decode.decode(data)
        elif filter_name in (FT.LZW_DECODE, FTA.LZW):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.LZW, new_name=FT.LZW_DECODE)
            data = LZWDecode.decode(data, params)
        elif filter_name in (FT.FLATE_DECODE, FTA.FL):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.FL, new_name=FT.FLATE_DECODE)
            data = FlateDecode.decode(data, params)
        elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.RL, new_name=FT.RUN_LENGTH_DECODE)
            data = RunLengthDecode.decode(data)
        elif filter_name in (FT.CCITT_FAX_DECODE, FTA.CCF):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.CCF, new_name=FT.CCITT_FAX_DECODE)
            height = stream.get(IA.HEIGHT, ())
            data = CCITTFaxDecode.decode(data, params, height)
        elif filter_name in (FT.DCT_DECODE, FTA.DCT):
            _deprecate_inline_image_filters(filter_name=filter_name, old_name=FTA.DCT, new_name=FT.DCT_DECODE)
            data = DCTDecode.decode(data)
        elif filter_name == FT.JPX_DECODE:
            data = JPXDecode.decode(data)
        elif filter_name == FT.JBIG2_DECODE:
            data = JBIG2Decode.decode(data, params)
        elif filter_name == "/Crypt":
            if "/Name" in params or "/Type" in params:
                raise NotImplementedError(
                    "/Crypt filter with /Name or /Type not supported yet"
                )
        else:
            raise NotImplementedError(f"Unsupported filter {filter_name}")
    return data


================================================
FILE: pypdf/generic/__init__.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

"""Implementation of generic PDF objects (dictionary, number, string, ...)."""
__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

from ..constants import OutlineFontFlag
from ._base import (
    BooleanObject,
    ByteStringObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    TextStringObject,
    encode_pdfdocencoding,
    is_null_or_none,
)
from ._data_structures import (
    ArrayObject,
    ContentStream,
    DecodedStreamObject,
    Destination,
    DictionaryObject,
    EncodedStreamObject,
    Field,
    StreamObject,
    TreeObject,
    read_object,
)
from ._files import EmbeddedFile
from ._fit import Fit
from ._link import DirectReferenceLink, NamedReferenceLink, ReferenceLink, extract_links
from ._outline import OutlineItem
from ._rectangle import RectangleObject
from ._utils import (
    create_string_object,
    decode_pdfdocencoding,
    hex_to_rgb,
    read_hex_string_from_stream,
    read_string_from_stream,
)
from ._viewerpref import ViewerPreferences

PAGE_FIT = Fit.fit()


__all__ = [
    "PAGE_FIT",
    "ArrayObject",
    "BooleanObject",
    "ByteStringObject",
    "ContentStream",
    "DecodedStreamObject",
    "Destination",
    "DictionaryObject",
    "DirectReferenceLink",
    "EmbeddedFile",
    "EncodedStreamObject",
    "Field",
    "Fit",
    "FloatObject",
    "IndirectObject",
    "NameObject",
    "NamedReferenceLink",
    "NullObject",
    "NumberObject",
    "OutlineFontFlag",
    "OutlineItem",
    "PdfObject",
    "RectangleObject",
    "ReferenceLink",
    "StreamObject",
    "TextStringObject",
    "TreeObject",
    "ViewerPreferences",
    # Utility functions
    "create_string_object",
    "decode_pdfdocencoding",
    "encode_pdfdocencoding",
    "extract_links",
    "hex_to_rgb",
    "is_null_or_none",
    "read_hex_string_from_stream",
    # Data structures core functions
    "read_object",
    "read_string_from_stream",
]


================================================
FILE: pypdf/generic/_appearance_stream.py
================================================
import re
from dataclasses import dataclass
from enum import IntEnum
from typing import Any, Optional, Union, cast

from .._codecs import fill_from_encoding
from .._codecs.core_font_metrics import CORE_FONT_METRICS
from .._font import Font
from .._utils import logger_warning
from ..constants import AnnotationDictionaryAttributes, BorderStyles, FieldDictionaryAttributes
from ..generic import (
    DecodedStreamObject,
    DictionaryObject,
    NameObject,
    NumberObject,
    RectangleObject,
)
from ..generic._base import ByteStringObject, TextStringObject, is_null_or_none

DEFAULT_FONT_SIZE_IN_MULTILINE = 12


@dataclass
class BaseStreamConfig:
    """A container representing the basic layout of an appearance stream."""
    rectangle: Union[RectangleObject, tuple[float, float, float, float]] = (0.0, 0.0, 0.0, 0.0)
    border_width: int = 1  # The width of the border in points
    border_style: str = BorderStyles.SOLID


class BaseStreamAppearance(DecodedStreamObject):
    """A class representing the very base of an appearance stream, that is, a rectangle and a border."""

    def __init__(self, layout: Optional[BaseStreamConfig] = None) -> None:
        """
        Takes the appearance stream layout as an argument.

        Args:
            layout: The basic layout parameters.
        """
        super().__init__()
        self._layout = layout or BaseStreamConfig()
        self[NameObject("/Type")] = NameObject("/XObject")
        self[NameObject("/Subtype")] = NameObject("/Form")
        self[NameObject("/BBox")] = RectangleObject(self._layout.rectangle)


class TextAlignment(IntEnum):
    """Defines the alignment options for text within a form field's appearance stream."""

    LEFT = 0
    CENTER = 1
    RIGHT = 2


class TextStreamAppearance(BaseStreamAppearance):
    """
    A class representing the appearance stream for a text-based form field.

    This class generates the content stream (the `ap_stream_data`) that dictates
    how text is rendered within a form field's bounding box. It handles properties
    like font, font size, color, multiline text, and text selection highlighting.
    """

    def _scale_text(
        self,
        font: Font,
        font_size: float,
        leading_factor: float,
        field_width: float,
        field_height: float,
        text: str,
        min_font_size: float,
        font_size_step: float = 0.2
    ) -> tuple[list[tuple[float, str]], float]:
        """
        Takes a piece of text and scales it to field_width or field_height, given font_name
        and font_size. Wraps text where necessary.

        Args:
            font: The font to be used.
            font_size: The font size in points.
            leading_factor: The line distance.
            field_width: The width of the field in which to fit the text.
            field_height: The height of the field in which to fit the text.
            text: The text to fit with the field.
            min_font_size: The minimum font size at which to scale the text.
            font_size_step: The amount by which to decrement font size per step while scaling.

        Returns:
            The text in the form of list of tuples, each tuple containing the length of a line
            and its contents, and the font_size for these lines and lengths.
        """
        orig_text = text
        paragraphs = text.replace("\n", "\r").split("\r")
        wrapped_lines = []
        current_line_words: list[str] = []
        current_line_width: float = 0
        space_width = font.space_width * font_size / 1000
        for paragraph in paragraphs:
            if not paragraph.strip():
                wrapped_lines.append((0.0, ""))
                continue
            words = paragraph.split(" ")
            for i, word in enumerate(words):
                word_width = font.text_width(word) * font_size / 1000
                test_width = current_line_width + word_width + (space_width if i else 0)
                if test_width > field_width and current_line_words:
                    wrapped_lines.append((current_line_width, " ".join(current_line_words)))
                    current_line_words = [word]
                    current_line_width = word_width
                elif not current_line_words and word_width > field_width:
                    wrapped_lines.append((word_width, word))
                    current_line_words = []
                    current_line_width = 0
                else:
                    if current_line_words:
                        current_line_width += space_width
                    current_line_words.append(word)
                    current_line_width += word_width
            if current_line_words:
                wrapped_lines.append((current_line_width, " ".join(current_line_words)))
                current_line_words = []
                current_line_width = 0
        # Estimate total height.
        estimated_total_height = font_size + (len(wrapped_lines) - 1) * leading_factor * font_size
        if estimated_total_height > field_height:
            # Text overflows height; Retry with smaller font size.
            new_font_size = font_size - font_size_step
            if new_font_size >= min_font_size:
                return self._scale_text(
                    font,
                    new_font_size,
                    leading_factor,
                    field_width,
                    field_height,
                    orig_text,
                    min_font_size,
                    font_size_step
                )
        return wrapped_lines, round(font_size, 1)

    def _generate_appearance_stream_data(
        self,
        text: str,
        selection: Union[list[str], None],
        font: Font,
        font_glyph_byte_map: Optional[dict[str, bytes]] = None,
        font_name: str = "/Helv",
        font_size: float = 0.0,
        font_color: str = "0 g",
        is_multiline: bool = False,
        alignment: TextAlignment = TextAlignment.LEFT,
        is_comb: bool = False,
        max_length: Optional[int] = None
    ) -> bytes:
        """
        Generates the raw bytes of the PDF appearance stream for a text field.

        This private method assembles the PDF content stream operators to draw
        the provided text within the specified rectangle. It handles text positioning,
        font application, color, and special formatting like selected text.

        Args:
            text: The text to be rendered in the form field.
            selection: An optional list of strings that should be highlighted as selected.
            font: The font to use.
            font_glyph_byte_map: An optional dictionary mapping characters to their
                byte representation for glyph encoding.
            font_name: The name of the font resource to use (e.g., "/Helv").
            font_size: The font size. If 0, it is automatically calculated
                based on whether the field is multiline or not.
            font_color: The color to apply to the font, represented as a PDF
                graphics state string (e.g., "0 g" for black).
            is_multiline: A boolean indicating if the text field is multiline.
            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
            is_comb: Boolean that designates fixed-length fields, where every character
                fills one "cell", such as in a postcode.
            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
                length field.

        Returns:
            A byte string containing the PDF content stream data.

        """
        rectangle = self._layout.rectangle
        font_glyph_byte_map = font_glyph_byte_map or {}
        if isinstance(rectangle, tuple):
            rectangle = RectangleObject(rectangle)
        leading_factor = (font.font_descriptor.bbox[3] - font.font_descriptor.bbox[1]) / 1000.0

        # Set margins based on border width and style, but never less than 1 point
        factor = 2 if self._layout.border_style in {"/B", "/I"} else 1
        margin = max(self._layout.border_width * factor, 1)
        field_height = rectangle.height - 2 * margin
        field_width = rectangle.width - 4 * margin

        # If font_size is 0, apply the logic for multiline or large-as-possible font
        if font_size == 0:
            min_font_size = 4.0       # The mininum font size
            if selection:             # Don't wrap text when dealing with a /Ch field, in order to prevent problems
                is_multiline = False  # with matching "selection" with "line" later on.
            if is_multiline:
                font_size = DEFAULT_FONT_SIZE_IN_MULTILINE
                lines, font_size = self._scale_text(
                    font,
                    font_size,
                    leading_factor,
                    field_width,
                    field_height,
                    text,
                    min_font_size
                )
            else:
                max_vertical_size = field_height / leading_factor
                text_width_unscaled = font.text_width(text) / 1000
                max_horizontal_size = field_width / (text_width_unscaled or 1)
                font_size = round(max(min(max_vertical_size, max_horizontal_size), min_font_size), 1)
                lines = [(text_width_unscaled * font_size, text)]
        elif is_comb:
            if max_length and len(text) > max_length:
                logger_warning (
                    f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
                    __name__
                )
            # We act as if each character is one line, because we draw it separately later on
            lines = [(
                font.text_width(char) * font_size / 1000,
                char
            ) for index, char in enumerate(text) if index < (max_length or len(text))]
        else:
            lines = [(
                font.text_width(line) * font_size / 1000,
                line
            ) for line in text.replace("\n", "\r").split("\r")]

        # Set the vertical offset
        if is_multiline:
            y_offset = rectangle.height + margin - font.font_descriptor.bbox[3] * font_size / 1000.0
        else:
            y_offset = margin + ((field_height - font.font_descriptor.ascent * font_size / 1000) / 2)
        default_appearance = f"{font_name} {font_size} Tf {font_color}"

        ap_stream = (
            f"q\n/Tx BMC \nq\n{2 * margin} {margin} {field_width} {field_height} "
            f"re\nW\nBT\n{default_appearance}\n"
        ).encode()
        current_x_pos: float = 0  # Initial virtual position within the text object.

        for line_number, (line_width, line) in enumerate(lines):
            if selection and line in selection:
                # Might be improved, but cannot find how to get fill working => replaced with lined box
                ap_stream += (
                    f"1 {y_offset - (line_number * font_size * leading_factor) - 1} "
                    f"{rectangle.width - 2} {font_size + 2} re\n"
                    f"0.5 0.5 0.5 rg s\n{default_appearance}\n"
                ).encode()

            # Calculate the desired absolute starting X for the current line
            desired_abs_x_start: float = 0
            if is_comb and max_length:
                # Calculate the width of a cell for one character
                cell_width = rectangle.width / max_length
                # Space from the left edge of the cell to the character's baseline start
                # line_width here is the *actual* character width in points for the single character 'line'
                centering_offset_in_cell = (cell_width - line_width) / 2
                # Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
                desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
            elif alignment == TextAlignment.RIGHT:
                desired_abs_x_start = rectangle.width - margin * 2 - line_width
            elif alignment == TextAlignment.CENTER:
                desired_abs_x_start = (rectangle.width - line_width) / 2
            else:  # Left aligned; default
                desired_abs_x_start = margin * 2
            # Calculate x_rel_offset: how much to move from the current_x_pos
            # to reach the desired_abs_x_start.
            x_rel_offset = desired_abs_x_start - current_x_pos

            # Y-offset:
            y_rel_offset: float = 0
            if line_number == 0:
                y_rel_offset = y_offset  # Initial vertical position
            elif is_comb:
                y_rel_offset = 0.0  # DO NOT move vertically for subsequent characters
            else:
                y_rel_offset = - font_size * leading_factor  # Move down by line height

            # Td is a relative translation (Tx and Ty).
            # It updates the current text position.
            ap_stream += f"{x_rel_offset} {y_rel_offset} Td\n".encode()
            # Update current_x_pos based on the Td operation for the next iteration.
            # This is the X position where the *current line* will start.
            current_x_pos = desired_abs_x_start

            encoded_line: list[bytes] = [
                font_glyph_byte_map.get(c, c.encode("utf-16-be")) for c in line
            ]
            if any(len(c) >= 2 for c in encoded_line):
                ap_stream += b"<" + (b"".join(encoded_line)).hex().encode() + b"> Tj\n"
            else:
                ap_stream += b"(" + b"".join(encoded_line) + b") Tj\n"
        ap_stream += b"ET\nQ\nEMC\nQ\n"
        return ap_stream

    def __init__(
        self,
        layout: Optional[BaseStreamConfig] = None,
        text: str = "",
        selection: Optional[list[str]] = None,
        font_resource: Optional[DictionaryObject] = None,
        font_name: str = "/Helv",
        font_size: float = 0.0,
        font_color: str = "0 g",
        is_multiline: bool = False,
        alignment: TextAlignment = TextAlignment.LEFT,
        is_comb: bool = False,
        max_length: Optional[int] = None
    ) -> None:
        """
        Initializes a TextStreamAppearance object.

        This constructor creates a new PDF stream object configured as an XObject
        of subtype Form. It uses the `_appearance_stream_data` method to generate
        the content for the stream.

        Args:
            layout: The basic layout parameters.
            text: The text to be rendered in the form field.
            selection: An optional list of strings that should be highlighted as selected.
            font_resource: An optional variable that represents a PDF font dictionary.
            font_name: The name of the font resource, e.g., "/Helv".
            font_size: The font size. If 0, it's auto-calculated.
            font_color: The font color string.
            is_multiline: A boolean indicating if the text field is multiline.
            alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
            is_comb: Boolean that designates fixed-length fields, where every character
                fills one "cell", such as in a postcode.
            max_length: Used if is_comb is set. The maximum number of characters for a fixed-
                length field.

        """
        super().__init__(layout)

        # If a font resource was added, get the font character map
        if font_resource:
            font = Font.from_font_resource(font_resource)
        else:
            logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
            font_name = "/Helv"
            core_font_metrics = CORE_FONT_METRICS["Helvetica"]
            font = Font(
                name="Helvetica",
                character_map={},
                encoding=dict(zip(range(256), fill_from_encoding("cp1252"))),  # WinAnsiEncoding
                sub_type="Type1",
                font_descriptor=core_font_metrics.font_descriptor,
                character_widths=core_font_metrics.character_widths
            )
            font_resource = font.as_font_resource()

        # Check whether the font resource is able to encode the text value.
        encodable = True
        try:
            if isinstance(font.encoding, str):
                text.encode(font.encoding, "surrogatepass")
            else:
                supported_chars = set(font.encoding.values())
                if any(char not in supported_chars for char in text):
                    encodable = False
            # We should add a final check against the character_map (CMap) of the font,
            # but we don't appear to have PDF forms with such fonts, so we skip this for
            # now.

        except UnicodeEncodeError:
            encodable = False

        if not encodable:
            logger_warning(
                f"Text string '{text}' contains characters not supported by font encoding. "
                "This may result in text corruption. "
                "Consider calling writer.update_page_form_field_values with auto_regenerate=True.",
                __name__
            )

        font_glyph_byte_map: dict[str, bytes]
        if isinstance(font.encoding, str):
            font_glyph_byte_map = {
                v: k.encode(font.encoding) for k, v in font.character_map.items()
            }
        else:
            font_glyph_byte_map = {v: bytes((k,)) for k, v in font.encoding.items()}
            font_encoding_rev = {v: bytes((k,)) for k, v in font.encoding.items()}
            for key, value in font.character_map.items():
                font_glyph_byte_map[value] = font_encoding_rev.get(key, key)

        ap_stream_data = self._generate_appearance_stream_data(
            text,
            selection,
            font,
            font_glyph_byte_map,
            font_name=font_name,
            font_size=font_size,
            font_color=font_color,
            is_multiline=is_multiline,
            alignment=alignment,
            is_comb=is_comb,
            max_length=max_length
        )

        self.set_data(ByteStringObject(ap_stream_data))
        self[NameObject("/Length")] = NumberObject(len(ap_stream_data))
        # Update Resources with font information
        self[NameObject("/Resources")] = DictionaryObject({
            NameObject("/Font"): DictionaryObject({
                NameObject(font_name): getattr(font_resource, "indirect_reference", font_resource)
            })
        })

    @staticmethod
    def _find_annotation_font_resource(
            font_name: str,
            annotation: DictionaryObject,
            acro_form: DictionaryObject
        ) -> tuple[str, DictionaryObject]:
        # Try to find a resource dictionary for the font by examining the annotation and, if that fails,
        # the AcroForm resources dictionary
        acro_form_resources: Any = cast(
            DictionaryObject,
            annotation.get_inherited(
                "/DR",
                acro_form.get("/DR", DictionaryObject()),
            ),
        )
        acro_form_font_resources = acro_form_resources.get("/Font", DictionaryObject())
        font_resource = acro_form_font_resources.get(font_name, None)

        # Normally, we should have found a font resource by now. However, when a user has provided a specific
        # font name, we may not have found the associated font resource among the AcroForm resources. Also, in
        # case of the 14 Adobe Core fonts, we may be expected to construct a font resource ourselves.
        if is_null_or_none(font_resource):
            if font_name.removeprefix("/") not in CORE_FONT_METRICS:
                # Default to Helvetica if we haven't found a font resource and cannot construct one ourselves.
                logger_warning(f"Font dictionary for {font_name} not found; defaulting to Helvetica.", __name__)
                font_name = "/Helvetica"
            core_font_metrics = CORE_FONT_METRICS[font_name.removeprefix("/")]
            font_resource = Font(
                name=font_name.removeprefix("/"),
                character_map={},
                encoding=dict(zip(range(256), fill_from_encoding("cp1252"))),  # WinAnsiEncoding
                sub_type="Type1",
                font_descriptor=core_font_metrics.font_descriptor,
                character_widths=core_font_metrics.character_widths
            ).as_font_resource()

        return font_name, font_resource

    @classmethod
    def from_text_annotation(
        cls,
        acro_form: DictionaryObject,  # _root_object[CatalogDictionary.ACRO_FORM])
        field: DictionaryObject,
        annotation: DictionaryObject,
        user_font_name: str = "",
        user_font_size: float = -1,
    ) -> "TextStreamAppearance":
        """
        Creates a TextStreamAppearance object from a text field annotation.

        This class method is a factory for creating a `TextStreamAppearance`
        instance by extracting all necessary information (bounding box, font,
        text content, etc.) from the PDF field and annotation dictionaries.
        It respects inheritance for properties like default appearance (`/DA`).

        Args:
            acro_form: The root AcroForm dictionary from the PDF catalog.
            field: The field dictionary object.
            annotation: The widget annotation dictionary object associated with the field.
            user_font_name: An optional user-provided font name to override the
                default. Defaults to an empty string.
            user_font_size: An optional user-provided font size to override the
                default. A value of -1 indicates no override.

        Returns:
            A new `TextStreamAppearance` instance configured for the given field.

        """
        # Calculate rectangle dimensions
        _rectangle = cast(RectangleObject, annotation[AnnotationDictionaryAttributes.Rect])
        rectangle = RectangleObject((0, 0, abs(_rectangle[2] - _rectangle[0]), abs(_rectangle[3] - _rectangle[1])))

        # Get default appearance dictionary from annotation
        default_appearance = annotation.get_inherited(
            AnnotationDictionaryAttributes.DA,
            acro_form.get(AnnotationDictionaryAttributes.DA, None),
        )
        if not default_appearance:
            # Create a default appearance if none was found in the annotation
            default_appearance = TextStringObject("/Helv 0 Tf 0 g")
        else:
            default_appearance = default_appearance.get_object()

        # Retrieve field text and selected values
        field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
        if (
                field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
                field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
        ):
            text = "\n".join(annotation.get_inherited(FieldDictionaryAttributes.Opt, []))
            selection = field.get("/V", [])
            if not isinstance(selection, list):
                selection = [selection]
        else:  # /Tx
            text = field.get("/V", "")
            selection = []

        # Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
        text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")

        # Derive font name, size and color from the default appearance. Also set
        # user-provided font name and font size in the default appearance, if given.
        # For a font name, this presumes that we can find an associated font resource
        # dictionary. Uses the variable font_properties as an intermediate.
        # As per the PDF spec:
        # "At a minimum, the string [that is, default_appearance] shall include a Tf (text
        # font) operator along with its two operands, font and size" (Section 12.7.4.3
        # "Variable text" of the PDF 2.0 specification).
        font_properties = [prop for prop in re.split(r"\s", default_appearance) if prop]
        font_name = font_properties.pop(font_properties.index("Tf") - 2)
        font_size = float(font_properties.pop(font_properties.index("Tf") - 1))
        font_properties.remove("Tf")
        font_color = " ".join(font_properties)
        # Determine the font name to use, prioritizing the user's input
        if user_font_name:
            font_name = user_font_name
        # Determine the font size to use, prioritizing the user's input
        if user_font_size > 0:
            font_size = user_font_size

        font_name, font_resource = cls._find_annotation_font_resource(font_name, annotation, acro_form)

        # Retrieve formatting information
        is_comb = False
        max_length = None
        if field_flags & FieldDictionaryAttributes.FfBits.Comb:
            is_comb = True
            max_length = annotation.get("/MaxLen")
        is_multiline = False
        if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
            is_multiline = True
        alignment = field.get("/Q", TextAlignment.LEFT)
        border_width = 1
        border_style = BorderStyles.SOLID
        if "/BS" in field:
            border_width = cast(DictionaryObject, field["/BS"]).get("/W", border_width)
            border_style = cast(DictionaryObject, field["/BS"]).get("/S", border_style)

        # Create the TextStreamAppearance instance
        layout = BaseStreamConfig(rectangle=rectangle, border_width=border_width, border_style=border_style)
        new_appearance_stream = cls(
            layout,
            text,
            selection,
            font_resource,
            font_name=font_name,
            font_size=font_size,
            font_color=font_color,
            is_multiline=is_multiline,
            alignment=alignment,
            is_comb=is_comb,
            max_length=max_length
        )

        if AnnotationDictionaryAttributes.AP in annotation:
            for key, value in (
                cast(DictionaryObject, annotation[AnnotationDictionaryAttributes.AP]).get("/N", {}).items()
            ):
                if key in {"/BBox", "/Length", "/Subtype", "/Type", "/Filter"}:
                    continue
                # Don't overwrite font resources added by TextAppearanceStream.__init__
                if key == "/Resources":
                    if "/Font" not in value:
                        value.get_object()[NameObject("/Font")] = DictionaryObject()
                    value["/Font"].get_object()[NameObject(font_name)] = getattr(
                        font_resource, "indirect_reference", font_resource
                    )
                else:
                    new_appearance_stream[key] = value

        return new_appearance_stream


================================================
FILE: pypdf/generic/_base.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import binascii
import codecs
import hashlib
import re
import sys
from collections.abc import Sequence
from math import log10
from struct import iter_unpack
from typing import Any, Callable, ClassVar, Optional, Union, cast

if sys.version_info[:2] >= (3, 10):
    from typing import TypeGuard
else:
    from typing_extensions import TypeGuard  # PEP 647

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

from .._codecs import _pdfdoc_encoding_rev
from .._protocols import PdfObjectProtocol, PdfWriterProtocol
from .._utils import (
    StreamType,
    classproperty,
    deprecation_no_replacement,
    deprecation_with_replacement,
    logger_warning,
    read_non_whitespace,
    read_until_regex,
)
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfReadError, PdfStreamError

__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"


class PdfObject(PdfObjectProtocol):
    # function for calculating a hash value
    hash_func: Callable[..., "hashlib._Hash"] = hashlib.sha1
    indirect_reference: Optional["IndirectObject"]

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement .hash_bin() so far"
        )

    def hash_value_data(self) -> bytes:
        return f"{self}".encode()

    def hash_value(self) -> bytes:
        return (
            f"{self.__class__.__name__}:"
            f"{self.hash_func(self.hash_value_data()).hexdigest()}"
        ).encode()

    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "PdfObject":
        """
        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter)
        without ensuring links. This is used in clone_document_from_root with incremental = True.

        Args:
          pdf_dest: Target to clone to.

        Returns:
          The cloned PdfObject

        """
        return self.clone(pdf_dest)

    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "PdfObject":
        """
        Clone object into pdf_dest (PdfWriterProtocol which is an interface for PdfWriter).

        By default, this method will call ``_reference_clone`` (see ``_reference``).


        Args:
          pdf_dest: Target to clone to.
          force_duplicate: By default, if the object has already been cloned and referenced,
            the copy will be returned; when ``True``, a new copy will be created.
            (Default value = ``False``)
          ignore_fields: List/tuple of field names (for dictionaries) that will be ignored
            during cloning (applies to children duplication as well). If fields are to be
            considered for a limited number of levels, you have to add it as integer, for
            example ``[1,"/B","/TOTO"]`` means that ``"/B"`` will be ignored at the first
            level only but ``"/TOTO"`` on all levels.

        Returns:
          The cloned PdfObject

        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement .clone so far"
        )

    def _reference_clone(
        self, clone: Any, pdf_dest: PdfWriterProtocol, force_duplicate: bool = False
    ) -> PdfObjectProtocol:
        """
        Reference the object within the _objects of pdf_dest only if
        indirect_reference attribute exists (which means the objects was
        already identified in xref/xobjstm) if object has been already
        referenced do nothing.

        Args:
          clone:
          pdf_dest:

        Returns:
          The clone

        """
        try:
            if not force_duplicate and clone.indirect_reference.pdf == pdf_dest:
                return clone
        except Exception:
            pass
        # if hasattr(clone, "indirect_reference"):
        try:
            ind = self.indirect_reference
        except AttributeError:
            return clone
        if (
            pdf_dest.incremental
            and ind is not None
            and ind.pdf == pdf_dest._reader
            and ind.idnum <= len(pdf_dest._objects)
        ):
            i = ind.idnum
        else:
            i = len(pdf_dest._objects) + 1
        if ind is not None:
            if id(ind.pdf) not in pdf_dest._id_translated:
                pdf_dest._id_translated[id(ind.pdf)] = {}
                pdf_dest._id_translated[id(ind.pdf)]["PreventGC"] = ind.pdf  # type: ignore[index]
            if (
                not force_duplicate
                and ind.idnum in pdf_dest._id_translated[id(ind.pdf)]
            ):
                obj = pdf_dest.get_object(
                    pdf_dest._id_translated[id(ind.pdf)][ind.idnum]
                )
                assert obj is not None
                return obj
            pdf_dest._id_translated[id(ind.pdf)][ind.idnum] = i
        try:
            pdf_dest._objects[i - 1] = clone
        except IndexError:
            pdf_dest._objects.append(clone)
            i = len(pdf_dest._objects)
        clone.indirect_reference = IndirectObject(i, 0, pdf_dest)
        return clone

    def get_object(self) -> Optional["PdfObject"]:
        """Resolve indirect references."""
        return self

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        raise NotImplementedError


class NullObject(PdfObject):
    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NullObject":
        """Clone object into pdf_dest."""
        return cast(
            "NullObject", self._reference_clone(NullObject(), pdf_dest, force_duplicate)
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__,))

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"null")

    @staticmethod
    def read_from_stream(stream: StreamType) -> "NullObject":
        nulltxt = stream.read(4)
        if nulltxt != b"null":
            raise PdfReadError("Could not read Null object")
        return NullObject()

    def __repr__(self) -> str:
        return "NullObject"

    def __eq__(self, other: object) -> bool:
        return isinstance(other, NullObject)

    def __hash__(self) -> int:
        return self.hash_bin()


class BooleanObject(PdfObject):
    def __init__(self, value: Any) -> None:
        self.value = value

    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "BooleanObject":
        """Clone object into pdf_dest."""
        return cast(
            "BooleanObject",
            self._reference_clone(BooleanObject(self.value), pdf_dest, force_duplicate),
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self.value))

    def __eq__(self, o: object, /) -> bool:
        if isinstance(o, BooleanObject):
            return self.value == o.value
        if isinstance(o, bool):
            return self.value == o
        return False

    def __hash__(self) -> int:
        return self.hash_bin()

    def __repr__(self) -> str:
        return "True" if self.value else "False"

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        if self.value:
            stream.write(b"true")
        else:
            stream.write(b"false")

    @staticmethod
    def read_from_stream(stream: StreamType) -> "BooleanObject":
        word = stream.read(4)
        if word == b"true":
            return BooleanObject(True)
        if word == b"fals":
            stream.read(1)
            return BooleanObject(False)
        raise PdfReadError("Could not read Boolean object")


class IndirectObject(PdfObject):
    def __init__(self, idnum: int, generation: int, pdf: Any) -> None:  # PdfReader
        self.idnum = idnum
        self.generation = generation
        self.pdf = pdf

    def __hash__(self) -> int:
        return hash((self.idnum, self.generation, id(self.pdf)))

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self.idnum, self.generation, id(self.pdf)))

    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "PdfObject":
        return IndirectObject(self.idnum, self.generation, pdf_dest)

    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "IndirectObject":
        """Clone object into pdf_dest."""
        if self.pdf == pdf_dest and not force_duplicate:
            # Already duplicated and no extra duplication required
            return self
        if id(self.pdf) not in pdf_dest._id_translated:
            pdf_dest._id_translated[id(self.pdf)] = {}
            pdf_dest._id_translated[id(self.pdf)]["PreventGC"] = self.pdf  # type: ignore[index]

        if self.idnum in pdf_dest._id_translated[id(self.pdf)]:
            dup = pdf_dest.get_object(pdf_dest._id_translated[id(self.pdf)][self.idnum])
            if force_duplicate:
                assert dup is not None
                assert dup.indirect_reference is not None
                idref = dup.indirect_reference
                return IndirectObject(idref.idnum, idref.generation, idref.pdf)
        else:
            obj = self.get_object()
            # case observed : a pointed object can not be found
            if obj is None:
                # this normally
                obj = NullObject()
                assert isinstance(self, (IndirectObject,))
                obj.indirect_reference = self
            dup = pdf_dest._add_object(
                obj.clone(pdf_dest, force_duplicate, ignore_fields)
            )
        assert dup is not None, "mypy"
        assert dup.indirect_reference is not None, "mypy"
        return dup.indirect_reference

    @property
    def indirect_reference(self) -> "IndirectObject":  # type: ignore[override]
        return self

    def get_object(self) -> Optional["PdfObject"]:
        return self.pdf.get_object(self)

    def __deepcopy__(self, memo: Any) -> "IndirectObject":
        return IndirectObject(self.idnum, self.generation, self.pdf)

    def _get_object_with_check(self) -> Optional["PdfObject"]:
        o = self.get_object()
        # the check is done here to not slow down get_object()
        if isinstance(o, IndirectObject):
            raise PdfStreamError(
                f"{self.__repr__()} references an IndirectObject {o.__repr__()}"
            )
        return o

    def __getattr__(self, name: str) -> Any:
        # Attribute not found in object: look in pointed object
        try:
            return getattr(self._get_object_with_check(), name)
        except AttributeError:
            raise AttributeError(
                f"No attribute {name} found in IndirectObject or pointed object"
            )

    def __getitem__(self, key: Any) -> Any:
        # items should be extracted from pointed Object
        return self._get_object_with_check()[key]  # type: ignore

    def __contains__(self, key: Any) -> bool:
        return key in self._get_object_with_check()  # type: ignore

    def __iter__(self) -> Any:
        return self._get_object_with_check().__iter__()  # type: ignore

    def __float__(self) -> str:
        # in this case we are looking for the pointed data
        return self.get_object().__float__()  # type: ignore

    def __int__(self) -> int:
        # in this case we are looking for the pointed data
        return self.get_object().__int__()  # type: ignore

    def __str__(self) -> str:
        # in this case we are looking for the pointed data
        return self.get_object().__str__()

    def __repr__(self) -> str:
        return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})"

    def __eq__(self, other: object) -> bool:
        return (
            other is not None
            and isinstance(other, IndirectObject)
            and self.idnum == other.idnum
            and self.generation == other.generation
            and self.pdf is other.pdf
        )

    def __ne__(self, other: object) -> bool:
        return not self.__eq__(other)

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(f"{self.idnum} {self.generation} R".encode())

    @staticmethod
    def read_from_stream(stream: StreamType, pdf: Any) -> "IndirectObject":  # PdfReader
        idnum = b""
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                break
            idnum += tok
        generation = b""
        while True:
            tok = stream.read(1)
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
            if tok.isspace():
                if not generation:
                    continue
                break
            generation += tok
        r = read_non_whitespace(stream)
        if r != b"R":
            raise PdfReadError(
                f"Error reading indirect object reference at byte {hex(stream.tell())}"
            )
        return IndirectObject(int(idnum), int(generation), pdf)


FLOAT_WRITE_PRECISION = 8  # shall be min 5 digits max, allow user adj


class FloatObject(float, PdfObject):
    def __new__(
        cls, value: Any = "0.0", context: Optional[Any] = None
    ) -> Self:
        try:
            value = float(value)
            return float.__new__(cls, value)
        except Exception as e:
            # If this isn't a valid decimal (happens in malformed PDFs)
            # fallback to 0
            logger_warning(
                f"{e} : FloatObject ({value}) invalid; use 0.0 instead", __name__
            )
            return float.__new__(cls, 0.0)

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "FloatObject":
        """Clone object into pdf_dest."""
        return cast(
            "FloatObject",
            self._reference_clone(FloatObject(self), pdf_dest, force_duplicate),
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self.as_numeric))

    def myrepr(self) -> str:
        if self == 0:
            return "0.0"
        nb = FLOAT_WRITE_PRECISION - int(log10(abs(self)))
        return f"{self:.{max(1, nb)}f}".rstrip("0").rstrip(".")

    def __repr__(self) -> str:
        return self.myrepr()  # repr(float(self))

    def as_numeric(self) -> float:
        return float(self)

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(self.myrepr().encode("utf8"))


class NumberObject(int, PdfObject):
    NumberPattern = re.compile(b"[^+-.0-9]")

    def __new__(cls, value: Any) -> Self:
        try:
            return int.__new__(cls, int(value))
        except ValueError:
            logger_warning(f"NumberObject({value}) invalid; use 0 instead", __name__)
            return int.__new__(cls, 0)

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NumberObject":
        """Clone object into pdf_dest."""
        return cast(
            "NumberObject",
            self._reference_clone(NumberObject(self), pdf_dest, force_duplicate),
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self.as_numeric()))

    def as_numeric(self) -> int:
        return int(repr(self).encode("utf8"))

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(repr(self).encode("utf8"))

    @staticmethod
    def read_from_stream(stream: StreamType) -> Union["NumberObject", "FloatObject"]:
        num = read_until_regex(stream, NumberObject.NumberPattern)
        if b"." in num:
            return FloatObject(num)
        return NumberObject(num)


class ByteStringObject(bytes, PdfObject):
    """
    Represents a string object where the text encoding could not be determined.

    This occurs quite often, as the PDF spec doesn't provide an alternate way to
    represent strings -- for example, the encryption data stored in files (like
    /O) is clearly not text, but is still stored in a "String" object.
    """

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "ByteStringObject":
        """Clone object into pdf_dest."""
        return cast(
            "ByteStringObject",
            self._reference_clone(
                ByteStringObject(bytes(self)), pdf_dest, force_duplicate
            ),
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, bytes(self)))

    @property
    def original_bytes(self) -> bytes:
        """For compatibility with TextStringObject.original_bytes."""
        return self

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<")
        stream.write(binascii.hexlify(self))
        stream.write(b">")

    def __str__(self) -> str:
        charset_to_try = ["utf-16", *list(NameObject.CHARSETS)]
        for enc in charset_to_try:
            try:
                return self.decode(enc)
            except UnicodeDecodeError:
                pass
        raise PdfReadError("Cannot decode ByteStringObject.")


class TextStringObject(str, PdfObject):  # noqa: SLOT000
    """
    A string object that has been decoded into a real unicode string.

    If read from a PDF document, this string appeared to match the
    PDFDocEncoding, or contained a UTF-16BE BOM mark to cause UTF-16 decoding
    to occur.
    """

    autodetect_pdfdocencoding: bool
    autodetect_utf16: bool
    utf16_bom: bytes
    _original_bytes: Optional[bytes] = None

    def __new__(cls, value: Any) -> Self:
        original_bytes = None
        if isinstance(value, bytes):
            original_bytes = value
            value = value.decode("charmap")
        text_string_object = str.__new__(cls, value)
        text_string_object._original_bytes = original_bytes
        text_string_object.autodetect_utf16 = False
        text_string_object.autodetect_pdfdocencoding = False
        text_string_object.utf16_bom = b""
        if original_bytes is not None and original_bytes[:2] in {codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE}:
            # The value of `original_bytes` is only set for inputs being `bytes`.
            # If this is UTF-16 data according to the BOM (first two characters),
            # perform special handling. All other cases should not need any special conversion
            # due to already being a string.
            try:
                text_string_object = str.__new__(cls, original_bytes.decode("utf-16"))
            except UnicodeDecodeError as exception:
                logger_warning(
                    f"{exception!s}\ninitial string:{exception.object!r}",
                    __name__,
                )
                text_string_object = str.__new__(cls, exception.object[: exception.start].decode("utf-16"))
            text_string_object._original_bytes = original_bytes
            text_string_object.autodetect_utf16 = True
            text_string_object.utf16_bom = original_bytes[:2]
        else:
            try:
                encode_pdfdocencoding(text_string_object)
                text_string_object.autodetect_pdfdocencoding = True
            except UnicodeEncodeError:
                text_string_object.autodetect_utf16 = True
                text_string_object.utf16_bom = codecs.BOM_UTF16_BE
        return text_string_object

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "TextStringObject":
        """Clone object into pdf_dest."""
        obj = TextStringObject(self)
        obj._original_bytes = self._original_bytes
        obj.autodetect_pdfdocencoding = self.autodetect_pdfdocencoding
        obj.autodetect_utf16 = self.autodetect_utf16
        obj.utf16_bom = self.utf16_bom
        return cast(
            "TextStringObject", self._reference_clone(obj, pdf_dest, force_duplicate)
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self.original_bytes))

    @property
    def original_bytes(self) -> bytes:
        """
        It is occasionally possible that a text string object gets created where
        a byte string object was expected due to the autodetection mechanism --
        if that occurs, this "original_bytes" property can be used to
        back-calculate what the original encoded bytes were.
        """
        if self._original_bytes is not None:
            return self._original_bytes
        return self.get_original_bytes()

    def get_original_bytes(self) -> bytes:
        # We're a text string object, but the library is trying to get our raw
        # bytes. This can happen if we auto-detected this string as text, but
        # we were wrong. It's pretty common. Return the original bytes that
        # would have been used to create this object, based upon the autodetect
        # method.
        if self.autodetect_utf16:
            if self.utf16_bom == codecs.BOM_UTF16_LE:
                return codecs.BOM_UTF16_LE + self.encode("utf-16le")
            if self.utf16_bom == codecs.BOM_UTF16_BE:
                return codecs.BOM_UTF16_BE + self.encode("utf-16be")
            return self.encode("utf-16be")
        if self.autodetect_pdfdocencoding:
            return encode_pdfdocencoding(self)
        raise Exception("no information about original bytes")  # pragma: no cover

    def get_encoded_bytes(self) -> bytes:
        # Try to write the string out as a PDFDocEncoding encoded string. It's
        # nicer to look at in the PDF file. Sadly, we take a performance hit
        # here for trying...
        try:
            if self._original_bytes is not None:
                return self._original_bytes
            if self.autodetect_utf16:
                raise UnicodeEncodeError("", "forced", -1, -1, "")
            bytearr = encode_pdfdocencoding(self)
        except UnicodeEncodeError:
            if self.utf16_bom == codecs.BOM_UTF16_LE:
                bytearr = codecs.BOM_UTF16_LE + self.encode("utf-16le")
            elif self.utf16_bom == codecs.BOM_UTF16_BE:
                bytearr = codecs.BOM_UTF16_BE + self.encode("utf-16be")
            else:
                bytearr = self.encode("utf-16be")
        return bytearr

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        bytearr = self.get_encoded_bytes()
        stream.write(b"(")
        for c_ in iter_unpack("c", bytearr):
            c = cast(bytes, c_[0])
            if not c.isalnum() and c != b" ":
                # This:
                #   stream.write(rf"\{c:0>3o}".encode())
                # gives
                #   https://github.com/davidhalter/parso/issues/207
                stream.write(b"\\%03o" % ord(c))
            else:
                stream.write(c)
        stream.write(b")")


class NameObject(str, PdfObject):  # noqa: SLOT000
    delimiter_pattern = re.compile(rb"\s+|[\(\)<>\[\]{}/%]")
    prefix = b"/"
    renumber_table: ClassVar[dict[str, bytes]] = {
        **{chr(i): f"#{i:02X}".encode() for i in b"#()<>[]{}/%"},
        **{chr(i): f"#{i:02X}".encode() for i in range(33)},
    }

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "NameObject":
        """Clone object into pdf_dest."""
        return cast(
            "NameObject",
            self._reference_clone(NameObject(self), pdf_dest, force_duplicate),
        )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, self))

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(self.renumber())

    def renumber(self) -> bytes:
        out = self[0].encode("utf-8")
        if out != b"/":
            deprecation_no_replacement(
                f"Incorrect first char in NameObject, should start with '/': ({self})",
                "5.0.0",
            )
        parts = [out]
        for c in self[1:]:
            if c > "~":
                parts.extend(f"#{x:02X}".encode() for x in c.encode("utf-8"))
            else:
                try:
                    parts.append(self.renumber_table[c])
                except KeyError:
                    parts.append(c.encode("utf-8"))
        return b"".join(parts)

    def _sanitize(self) -> "NameObject":
        """
        Sanitize the NameObject's name to be a valid PDF name part
        (alphanumeric, underscore, hyphen). The _sanitize method replaces
        spaces and any non-alphanumeric/non-underscore/non-hyphen with
        underscores.

        Returns:
            NameObject with sanitized name.
        """
        name = str(self).removeprefix("/")
        name = re.sub(r"\ ", "_", name)
        name = re.sub(r"[^a-zA-Z0-9_-]", "_", name)
        return NameObject("/" + name)

    @classproperty
    def surfix(cls) -> bytes:  # noqa: N805
        deprecation_with_replacement("surfix", "prefix", "5.0.0")
        return b"/"

    @staticmethod
    def unnumber(sin: bytes) -> bytes:
        result = bytearray()
        i = 0
        while i < len(sin):
            if sin[i:i + 1] == b"#":
                try:
                    result.append(int(sin[i + 1 : i + 3], 16))
                    i += 3
                    continue
                except (ValueError, IndexError):
                    # if the 2 characters after # can not be converted to hex
                    # we change nothing and carry on
                    pass
            result.append(sin[i])
            i += 1
        return bytes(result)

    CHARSETS = ("utf-8", "gbk", "latin1")

    @staticmethod
    def read_from_stream(stream: StreamType, pdf: Any) -> "NameObject":  # PdfReader
        name = stream.read(1)
        if name != NameObject.prefix:
            raise PdfReadError("Name read error")
        name += read_until_regex(stream, NameObject.delimiter_pattern)
        try:
            # Name objects should represent irregular characters
            # with a '#' followed by the symbol's hex number
            name = NameObject.unnumber(name)
            for enc in NameObject.CHARSETS:
                try:
                    ret = name.decode(enc)
                    return NameObject(ret)
                except Exception:
                    pass
            raise UnicodeDecodeError("", name, 0, 0, "Code Not Found")
        except (UnicodeEncodeError, UnicodeDecodeError) as e:
            if not pdf.strict:
                logger_warning(
                    f"Illegal character in NameObject ({name!r}), "
                    "you may need to adjust NameObject.CHARSETS",
                    __name__,
                )
                return NameObject(name.decode("charmap"))
            raise PdfReadError(
                f"Illegal character in NameObject ({name!r}). "
                "You may need to adjust NameObject.CHARSETS.",
            ) from e


def encode_pdfdocencoding(unicode_string: str) -> bytes:
    try:
        return bytes([_pdfdoc_encoding_rev[k] for k in unicode_string])
    except KeyError:
        raise UnicodeEncodeError(
            "pdfdocencoding",
            unicode_string,
            -1,
            -1,
            "does not exist in translation table",
        )


def is_null_or_none(x: Any) -> TypeGuard[Union[None, NullObject, IndirectObject]]:
    """
    Returns:
        True if x is None or NullObject.

    """
    return x is None or (
        isinstance(x, PdfObject)
        and (x.get_object() is None or isinstance(x.get_object(), NullObject))
    )


================================================
FILE: pypdf/generic/_data_structures.py
================================================
# Copyright (c) 2006, Mathieu Fenniak
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


__author__ = "Mathieu Fenniak"
__author_email__ = "biziqe@mathieu.fenniak.net"

import logging
import re
import sys
from collections.abc import Iterable, Sequence
from io import BytesIO
from math import ceil
from typing import (
    Any,
    Callable,
    Optional,
    Union,
    cast,
)

from .._protocols import PdfReaderProtocol, PdfWriterProtocol, XmpInformationProtocol
from .._utils import (
    WHITESPACES,
    StreamType,
    deprecation_no_replacement,
    logger_warning,
    read_non_whitespace,
    read_until_regex,
    read_until_whitespace,
    skip_over_comment,
)
from ..constants import (
    CheckboxRadioButtonAttributes,
    FieldDictionaryAttributes,
    OutlineFontFlag,
)
from ..constants import FilterTypes as FT
from ..constants import StreamAttributes as SA
from ..constants import TypArguments as TA
from ..constants import TypFitArguments as TF
from ..errors import STREAM_TRUNCATED_PREMATURELY, LimitReachedError, PdfReadError, PdfStreamError
from ._base import (
    BooleanObject,
    ByteStringObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    PdfObject,
    TextStringObject,
    is_null_or_none,
)
from ._fit import Fit
from ._image_inline import (
    extract_inline__ascii85_decode,
    extract_inline__ascii_hex_decode,
    extract_inline__dct_decode,
    extract_inline__run_length_decode,
    extract_inline_default,
)
from ._utils import read_hex_string_from_stream, read_string_from_stream

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

logger = logging.getLogger(__name__)

IndirectPattern = re.compile(rb"[+-]?(\d+)\s+(\d+)\s+R[^a-zA-Z]")


class ArrayObject(list[Any], PdfObject):
    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "ArrayObject":
        arr = cast(
            "ArrayObject",
            self._reference_clone(ArrayObject(), pdf_dest, False),
        )
        for data in self:
            if hasattr(data, "replicate"):
                arr.append(data.replicate(pdf_dest))
            else:
                arr.append(data)
        return arr

    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "ArrayObject":
        """Clone object into pdf_dest."""
        try:
            if self.indirect_reference.pdf == pdf_dest and not force_duplicate:  # type: ignore
                return self
        except Exception:
            pass
        arr = cast(
            "ArrayObject",
            self._reference_clone(ArrayObject(), pdf_dest, force_duplicate=True),
        )
        for data in self:
            if isinstance(data, StreamObject):
                dup = data._reference_clone(
                    data.clone(pdf_dest, force_duplicate, ignore_fields),
                    pdf_dest,
                    force_duplicate,
                )
                arr.append(dup.indirect_reference)
            elif isinstance(data, IndirectObject) and isinstance(resolved := data.get_object(), StreamObject):
                dup = data._reference_clone(
                    resolved.clone(pdf_dest, force_duplicate=True, ignore_fields=ignore_fields),
                    pdf_dest,
                    force_duplicate,
                )
                arr.append(dup.indirect_reference)
            elif hasattr(data, "clone"):
                arr.append(data.clone(pdf_dest, force_duplicate, ignore_fields))
            else:
                arr.append(data)
        return arr

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash((self.__class__, tuple(x.hash_bin() for x in self)))

    def items(self) -> Iterable[Any]:
        """Emulate DictionaryObject.items for a list (index, object)."""
        return enumerate(self)

    def _to_lst(self, lst: Any) -> list[Any]:
        # Convert to list, internal
        if isinstance(lst, (list, tuple, set)):
            pass
        elif isinstance(lst, PdfObject):
            lst = [lst]
        elif isinstance(lst, str):
            if lst[0] == "/":
                lst = [NameObject(lst)]
            else:
                lst = [TextStringObject(lst)]
        elif isinstance(lst, bytes):
            lst = [ByteStringObject(lst)]
        else:  # for numbers,...
            lst = [lst]
        return lst

    def __add__(self, lst: Any) -> "ArrayObject":
        """
        Allow extension by adding list or add one element only

        Args:
            lst: any list, tuples are extended the list.
            other types(numbers,...) will be appended.
            if str is passed it will be converted into TextStringObject
            or NameObject (if starting with "/")
            if bytes is passed it will be converted into ByteStringObject

        Returns:
            ArrayObject with all elements

        """
        temp = ArrayObject(self)
        temp.extend(self._to_lst(lst))
        return temp

    def __iadd__(self, lst: Any) -> Self:
        """
         Allow extension by adding list or add one element only

        Args:
            lst: any list, tuples are extended the list.
            other types(numbers,...) will be appended.
            if str is passed it will be converted into TextStringObject
            or NameObject (if starting with "/")
            if bytes is passed it will be converted into ByteStringObject

        """
        self.extend(self._to_lst(lst))
        return self

    def __isub__(self, lst: Any) -> Self:
        """Allow to remove items"""
        for x in self._to_lst(lst):
            try:
                index = self.index(x)
                del self[index]
            except ValueError:
                pass
        return self

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"[")
        for data in self:
            stream.write(b" ")
            data.write_to_stream(stream)
        stream.write(b" ]")

    @staticmethod
    def read_from_stream(
        stream: StreamType,
        pdf: Optional[PdfReaderProtocol],
        forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
    ) -> "ArrayObject":
        arr = ArrayObject()
        tmp = stream.read(1)
        if tmp != b"[":
            raise PdfReadError("Could not read array")
        while True:
            # skip leading whitespace
            tok = stream.read(1)
            while tok.isspace():
                tok = stream.read(1)
            if tok == b"":
                break
            if tok == b"%":
                stream.seek(-1, 1)
                skip_over_comment(stream)
                continue
            stream.seek(-1, 1)
            # check for array ending
            peek_ahead = stream.read(1)
            if peek_ahead == b"]":
                break
            stream.seek(-1, 1)
            # read and append object
            arr.append(read_object(stream, pdf, forced_encoding))
        return arr


class DictionaryObject(dict[Any, Any], PdfObject):
    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "DictionaryObject":
        d__ = cast(
            "DictionaryObject",
            self._reference_clone(self.__class__(), pdf_dest, False),
        )
        for k, v in self.items():
            d__[k.replicate(pdf_dest)] = (
                v.replicate(pdf_dest) if hasattr(v, "replicate") else v
            )
        return d__

    def clone(
        self,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "DictionaryObject":
        """Clone object into pdf_dest."""
        try:
            if self.indirect_reference.pdf == pdf_dest and not force_duplicate:  # type: ignore
                return self
        except Exception:
            pass

        visited: set[tuple[int, int]] = set()  # (idnum, generation)
        d__ = cast(
            "DictionaryObject",
            self._reference_clone(self.__class__(), pdf_dest, force_duplicate),
        )
        if ignore_fields is None:
            ignore_fields = []
        if len(d__.keys()) == 0:
            d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
        return d__

    def _clone(
        self,
        src: "DictionaryObject",
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool,
        ignore_fields: Optional[Sequence[Union[str, int]]],
        visited: set[tuple[int, int]],  # (idnum, generation)
    ) -> None:
        """
        Update the object from src.

        Args:
            src: "DictionaryObject":
            pdf_dest:
            force_duplicate:
            ignore_fields:

        """
        # First we remove the ignore_fields
        # that are for a limited number of levels
        assert ignore_fields is not None
        ignore_fields = list(ignore_fields)
        x = 0
        while x < len(ignore_fields):
            if isinstance(ignore_fields[x], int):
                if cast(int, ignore_fields[x]) <= 0:
                    del ignore_fields[x]
                    del ignore_fields[x]
                    continue
                ignore_fields[x] -= 1  # type:ignore
            x += 1
        #  Check if this is a chain list, we need to loop to prevent recur
        if any(
            field not in ignore_fields
            and field in src
            and isinstance(src.raw_get(field), IndirectObject)
            and isinstance(src[field], DictionaryObject)
            and (
                src.get("/Type", None) is None
                or cast(DictionaryObject, src[field]).get("/Type", None) is None
                or src.get("/Type", None)
                == cast(DictionaryObject, src[field]).get("/Type", None)
            )
            for field in ["/Next", "/Prev", "/N", "/V"]
        ):
            ignore_fields = list(ignore_fields)
            for lst in (("/Next", "/Prev"), ("/N", "/V")):
                for k in lst:
                    objs = []
                    if (
                        k in src
                        and k not in self
                        and isinstance(src.raw_get(k), IndirectObject)
                        and isinstance(src[k], DictionaryObject)
                        # If need to go further the idea is to check
                        # that the types are the same
                        and (
                            src.get("/Type", None) is None
                            or cast(DictionaryObject, src[k]).get("/Type", None) is None
                            or src.get("/Type", None)
                            == cast(DictionaryObject, src[k]).get("/Type", None)
                        )
                    ):
                        cur_obj: Optional[DictionaryObject] = cast(
                            "DictionaryObject", src[k]
                        )
                        prev_obj: Optional[DictionaryObject] = self
                        while cur_obj is not None:
                            clon = cast(
                                "DictionaryObject",
                                cur_obj._reference_clone(
                                    cur_obj.__class__(), pdf_dest, force_duplicate
                                ),
                            )
                            # Check to see if we've previously processed our item
                            if clon.indirect_reference is not None:
                                idnum = clon.indirect_reference.idnum
                                generation = clon.indirect_reference.generation
                                if (idnum, generation) in visited:
                                    cur_obj = None
                                    break
                                visited.add((idnum, generation))
                            objs.append((cur_obj, clon))
                            assert prev_obj is not None
                            prev_obj[NameObject(k)] = clon.indirect_reference
                            prev_obj = clon
                            try:
                                if cur_obj == src:
                                    cur_obj = None
                                else:
                                    cur_obj = cast("DictionaryObject", cur_obj[k])
                            except Exception:
                                cur_obj = None
                        for s, c in objs:
                            c._clone(
                                s, pdf_dest, force_duplicate, ignore_fields, visited
                            )

        for k, v in src.items():
            if k not in ignore_fields:
                if isinstance(v, StreamObject):
                    if not hasattr(v, "indirect_reference"):
                        v.indirect_reference = None
                    vv = v.clone(pdf_dest, force_duplicate, ignore_fields)
                    assert vv.indirect_reference is not None
                    self[k.clone(pdf_dest)] = vv.indirect_reference
                elif k not in self:
                    self[NameObject(k)] = (
                        v.clone(pdf_dest, force_duplicate, ignore_fields)
                        if hasattr(v, "clone")
                        else v
                    )

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        return hash(
            (self.__class__, tuple(((k, v.hash_bin()) for k, v in self.items())))
        )

    def raw_get(self, key: Any) -> Any:
        return dict.__getitem__(self, key)

    def get_inherited(self, key: str, default: Any = None) -> Any:
        """
        Returns the value of a key or from the parent if not found.
        If not found returns default.

        Args:
            key: string identifying the field to return

            default: default value to return

        Returns:
            Current key or inherited one, otherwise default value.

        """
        if key in self:
            return self[key]
        try:
            if "/Parent" not in self:
                return default
            raise KeyError("Not present")
        except KeyError:
            return cast("DictionaryObject", self["/Parent"].get_object()).get_inherited(
                key, default
            )

    def __setitem__(self, key: Any, value: Any) -> Any:
        if not isinstance(key, PdfObject):
            raise ValueError("Key must be a PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("Value must be a PdfObject")
        return dict.__setitem__(self, key, value)

    def setdefault(self, key: Any, value: Optional[Any] = None) -> Any:
        if not isinstance(key, PdfObject):
            raise ValueError("Key must be a PdfObject")
        if not isinstance(value, PdfObject):
            raise ValueError("Value must be a PdfObject")
        return dict.setdefault(self, key, value)

    def __getitem__(self, key: Any) -> PdfObject:
        return dict.__getitem__(self, key).get_object()

    @property
    def xmp_metadata(self) -> Optional[XmpInformationProtocol]:
        """
        Retrieve XMP (Extensible Metadata Platform) data relevant to this
        object, if available.

        See Table 347 — Additional entries in a metadata stream dictionary.

        Returns:
          Returns a :class:`~pypdf.xmp.XmpInformation` instance
          that can be used to access XMP metadata from the document. Can also
          return None if no metadata was found on the document root.

        """
        from ..xmp import XmpInformation  # noqa: PLC0415

        metadata = self.get("/Metadata", None)
        if is_null_or_none(metadata):
            return None
        assert metadata is not None, "mypy"
        metadata = metadata.get_object()
        return XmpInformation(metadata)

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<<\n")
        for key, value in self.items():
            if len(key) > 2 and key[1] == "%" and key[-1] == "%":
                continue
            key.write_to_stream(stream, encryption_key)
            stream.write(b" ")
            value.write_to_stream(stream)
            stream.write(b"\n")
        stream.write(b">>")

    @classmethod
    def _get_next_object_position(
            cls, position_before: int, position_end: int, generations: list[int], pdf: PdfReaderProtocol
    ) -> int:
        out = position_end
        for generation in generations:
            location = pdf.xref[generation]
            values = [x for x in location.values() if position_before < x <= position_end]
            if values:
                out = min(out, *values)
        return out

    @classmethod
    def _read_unsized_from_stream(
            cls, stream: StreamType, pdf: PdfReaderProtocol
    ) -> bytes:
        object_position = cls._get_next_object_position(
            position_before=stream.tell(), position_end=2 ** 32, generations=list(pdf.xref), pdf=pdf
        ) - 1
        current_position = stream.tell()
        # Read until the next object position.
        read_value = stream.read(object_position - stream.tell())
        endstream_position = read_value.find(b"endstream")
        if endstream_position < 0:
            raise PdfReadError(
                f"Unable to find 'endstream' marker for obj starting at {current_position}."
            )
        # 9 = len(b"endstream")
        stream.seek(current_position + endstream_position + 9)
        return read_value[: endstream_position - 1]

    @staticmethod
    def read_from_stream(
        stream: StreamType,
        pdf: Optional[PdfReaderProtocol],
        forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
    ) -> "DictionaryObject":
        tmp = stream.read(2)
        if tmp != b"<<":
            raise PdfReadError(
                f"Dictionary read error at byte {hex(stream.tell())}: "
                "stream must begin with '<<'"
            )
        data: dict[Any, Any] = {}
        while True:
            tok = read_non_whitespace(stream)
            if tok == b"\x00":
                continue
            if tok == b"%":
                stream.seek(-1, 1)
                skip_over_comment(stream)
                continue
            if not tok:
                raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)

            if tok == b">":
                stream.read(1)
                break
            stream.seek(-1, 1)
            try:
                try:
                    key = read_object(stream, pdf)
                    if isinstance(key, NullObject):
                        break
                    if not isinstance(key, NameObject):
                        raise PdfReadError(
                            f"Expecting a NameObject for key but found {key!r}"
                        )
                except PdfReadError as exc:
                    if pdf is not None and pdf.strict:
                        raise
                    logger_warning(exc.__repr__(), __name__)
                    continue
                tok = read_non_whitespace(stream)
                stream.seek(-1, 1)
                value = read_object(stream, pdf, forced_encoding)
            except Exception as exc:
                if pdf is not None and pdf.strict:
                    raise PdfReadError(exc.__repr__())
                logger_warning(exc.__repr__(), __name__)
                retval = DictionaryObject()
                retval.update(data)
                return retval  # return partial data

            if not data.get(key):
                data[key] = value
            else:
                # multiple definitions of key not permitted
                msg = (
                    f"Multiple definitions in dictionary at byte "
                    f"{hex(stream.tell())} for key {key}"
                )
                if pdf is not None and pdf.strict:
                    raise PdfReadError(msg)
                logger_warning(msg, __name__)

        pos = stream.tell()
        s = read_non_whitespace(stream)
        if s == b"s" and stream.read(5) == b"tream":
            eol = stream.read(1)
            # Occasional PDF file output has spaces after 'stream' keyword but before EOL.
            # patch provided by Danial Sandler
            while eol == b" ":
                eol = stream.read(1)
            if eol not in (b"\n", b"\r"):
                raise PdfStreamError("Stream data must be followed by a newline")
            if eol == b"\r" and stream.read(1) != b"\n":
                stream.seek(-1, 1)
            # this is a stream object, not a dictionary
            if SA.LENGTH not in data:
                if pdf is not None and pdf.strict:
                    raise PdfStreamError("Stream length not defined")
                logger_warning(
                    f"Stream length not defined @pos={stream.tell()}", __name__
                )
                data[NameObject(SA.LENGTH)] = NumberObject(-1)
            length = data[SA.LENGTH]
            if isinstance(length, IndirectObject):
                t = stream.tell()
                assert pdf is not None, "mypy"
                length = pdf.get_object(length)
                stream.seek(t, 0)
            if length is None:  # if the PDF is damaged
                length = -1
            pstart = stream.tell()
            if length >= 0:
                from ..filters import MAX_DECLARED_STREAM_LENGTH  # noqa: PLC0415
                if length > MAX_DECLARED_STREAM_LENGTH:
                    raise LimitReachedError(f"Declared stream length of {length} exceeds maximum allowed length.")

                data["__streamdata__"] = stream.read(length)
            else:
                data["__streamdata__"] = read_until_regex(
                    stream, re.compile(b"endstream")
                )
            e = read_non_whitespace(stream)
            ndstream = stream.read(8)
            if (e + ndstream) != b"endstream":
                # the odd PDF file has a length that is too long, so
                # we need to read backwards to find the "endstream" ending.
                # ReportLab (unknown version) generates files with this bug,
                # and Python users into PDF files tend to be our audience.
                # we need to do this to correct the streamdata and chop off
                # an extra character.
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == b"endstream":
                    # we found it by looking back one character further.
                    data["__streamdata__"] = data["__streamdata__"][:-1]
                elif pdf is not None and not pdf.strict:
                    stream.seek(pstart, 0)
                    data["__streamdata__"] = DictionaryObject._read_unsized_from_stream(stream, pdf)
                    pos = stream.tell()
                else:
                    stream.seek(pos, 0)
                    raise PdfReadError(
                        "Unable to find 'endstream' marker after stream at byte "
                        f"{hex(stream.tell())} (nd='{ndstream!r}', end='{end!r}')."
                    )
        else:
            stream.seek(pos, 0)
        if "__streamdata__" in data:
            return StreamObject.initialize_from_dictionary(data)
        retval = DictionaryObject()
        retval.update(data)
        return retval


class TreeObject(DictionaryObject):
    def __init__(self, dct: Optional[DictionaryObject] = None) -> None:
        DictionaryObject.__init__(self)
        if dct:
            self.update(dct)

    def has_children(self) -> bool:
        return "/First" in self

    def __iter__(self) -> Any:
        return self.children()

    def children(self) -> Iterable[Any]:
        if not self.has_children():
            return

        child_ref = self[NameObject("/First")]
        last = self[NameObject("/Last")]
        child = child_ref.get_object()
        visited: set[int] = set()
        while True:
            child_id = id(child)
            if child_id in visited:
                logger_warning(f"Detected cycle in outline structure for {child}", __name__)
                return
            visited.add(child_id)

            yield child

            if child == last:
                return
            child_ref = child.get(NameObject("/Next"))  # type: ignore
            if is_null_or_none(child_ref):
                return
            child = child_ref.get_object()

    def add_child(self, child: Any, pdf: PdfWriterProtocol) -> None:
        self.insert_child(child, None, pdf)

    def inc_parent_counter_default(
        self, parent: Union[None, IndirectObject, "TreeObject"], n: int
    ) -> None:
        if is_null_or_none(parent):
            return
        assert parent is not None, "mypy"
        parent = cast("TreeObject", parent.get_object())
        if "/Count" in parent:
            parent[NameObject("/Count")] = NumberObject(
                max(0, cast(int, parent[NameObject("/Count")]) + n)
            )
            self.inc_parent_counter_default(parent.get("/Parent", None), n)

    def inc_parent_counter_outline(
        self, parent: Union[None, IndirectObject, "TreeObject"], n: int
    ) -> None:
        if is_null_or_none(parent):
            return
        assert parent is not None, "mypy"
        parent = cast("TreeObject", parent.get_object())
        #  BooleanObject requires comparison with == not is
        opn = parent.get("/%is_open%", True) == True  # noqa: E712
        c = cast(int, parent.get("/Count", 0))
        if c < 0:
            c = abs(c)
        parent[NameObject("/Count")] = NumberObject((c + n) * (1 if opn else -1))
        if not opn:
            return
        self.inc_parent_counter_outline(parent.get("/Parent", None), n)

    def insert_child(
        self,
        child: Any,
        before: Any,
        pdf: PdfWriterProtocol,
        inc_parent_counter: Optional[Callable[..., Any]] = None,
    ) -> IndirectObject:
        if inc_parent_counter is None:
            inc_parent_counter = self.inc_parent_counter_default
        child_obj = child.get_object()
        child = child.indirect_reference  # get_reference(child_obj)

        prev: Optional[DictionaryObject]
        if "/First" not in self:  # no child yet
            self[NameObject("/First")] = child
            self[NameObject("/Count")] = NumberObject(0)
            self[NameObject("/Last")] = child
            child_obj[NameObject("/Parent")] = self.indirect_reference
            inc_parent_counter(self, child_obj.get("/Count", 1))
            if "/Next" in child_obj:
                del child_obj["/Next"]
            if "/Prev" in child_obj:
                del child_obj["/Prev"]
            return child
        prev = cast("DictionaryObject", self["/Last"])

        while prev.indirect_reference != before:
            if "/Next" in prev:
                prev = cast("TreeObject", prev["/Next"])
            else:  # append at the end
                prev[NameObject("/Next")] = cast("TreeObject", child)
                child_obj[NameObject("/Prev")] = prev.indirect_reference
                child_obj[NameObject("/Parent")] = self.indirect_reference
                if "/Next" in child_obj:
                    del child_obj["/Next"]
                self[NameObject("/Last")] = child
                inc_parent_counter(self, child_obj.get("/Count", 1))
                return child
        try:  # insert as first or in the middle
            assert isinstance(prev["/Prev"], DictionaryObject)
            prev["/Prev"][NameObject("/Next")] = child
            child_obj[NameObject("/Prev")] = prev["/Prev"]
        except Exception:  # it means we are inserting in first position
            del child_obj["/Next"]
        child_obj[NameObject("/Next")] = prev
        prev[NameObject("/Prev")] = child
        child_obj[NameObject("/Parent")] = self.indirect_reference
        inc_parent_counter(self, child_obj.get("/Count", 1))
        return child

    def _remove_node_from_tree(
        self, prev: Any, prev_ref: Any, cur: Any, last: Any
    ) -> None:
        """
        Adjust the pointers of the linked list and tree node count.

        Args:
            prev:
            prev_ref:
            cur:
            last:

        """
        next_ref = cur.get(NameObject("/Next"), None)
        if prev is None:
            if next_ref:
                # Removing first tree node
                next_obj = next_ref.get_object()
                del next_obj[NameObject("/Prev")]
                self[NameObject("/First")] = next_ref
                self[NameObject("/Count")] = NumberObject(
                    self[NameObject("/Count")] - 1  # type: ignore
                )

            else:
                # Removing only tree node
                self[NameObject("/Count")] = NumberObject(0)
                del self[NameObject("/First")]
                if NameObject("/Last") in self:
                    del self[NameObject("/Last")]
        else:
            if next_ref:
                # Removing middle tree node
                next_obj = next_ref.get_object()
                next_obj[NameObject("/Prev")] = prev_ref
                prev[NameObject("/Next")] = next_ref
            else:
                # Removing last tree node
                assert cur == last
                del prev[NameObject("/Next")]
                self[NameObject("/Last")] = prev_ref
            self[NameObject("/Count")] = NumberObject(self[NameObject("/Count")] - 1)  # type: ignore

    def remove_child(self, child: Any) -> None:
        child_obj = child.get_object()
        child = child_obj.indirect_reference

        if NameObject("/Parent") not in child_obj:
            raise ValueError("Removed child does not appear to be a tree item")
        if child_obj[NameObject("/Parent")] != self:
            raise ValueError("Removed child is not a member of this tree")

        found = False
        prev_ref = None
        prev = None
        cur_ref: Optional[Any] = self[NameObject("/First")]
        cur: Optional[dict[str, Any]] = cur_ref.get_object()  # type: ignore
        last_ref = self[NameObject("/Last")]
        last = last_ref.get_object()
        while cur is not None:
            if cur == child_obj:
                self._remove_node_from_tree(prev, prev_ref, cur, last)
                found = True
                break

            # Go to the next node
            prev_ref = cur_ref
            prev = cur
            if NameObject("/Next") in cur:
                cur_ref = cur[NameObject("/Next")]
                cur = cur_ref.get_object()
            else:
                cur_ref = None
                cur = None

        if not found:
            raise ValueError("Removal couldn't find item in tree")

        _reset_node_tree_relationship(child_obj)

    def remove_from_tree(self) -> None:
        """Remove the object from the tree it is in."""
        if NameObject("/Parent") not in self:
            raise ValueError("Removed child does not appear to be a tree item")
        cast("TreeObject", self["/Parent"]).remove_child(self)

    def empty_tree(self) -> None:
        for child in self:
            child_obj = child.get_object()
            _reset_node_tree_relationship(child_obj)

        if NameObject("/Count") in self:
            del self[NameObject("/Count")]
        if NameObject("/First") in self:
            del self[NameObject("/First")]
        if NameObject("/Last") in self:
            del self[NameObject("/Last")]


def _reset_node_tree_relationship(child_obj: Any) -> None:
    """
    Call this after a node has been removed from a tree.

    This resets the nodes attributes in respect to that tree.

    Args:
        child_obj:

    """
    del child_obj[NameObject("/Parent")]
    if NameObject("/Next") in child_obj:
        del child_obj[NameObject("/Next")]
    if NameObject("/Prev") in child_obj:
        del child_obj[NameObject("/Prev")]


class StreamObject(DictionaryObject):
    def __init__(self) -> None:
        self._data: bytes = b""
        self.decoded_self: Optional[DecodedStreamObject] = None

    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "StreamObject":
        d__ = cast(
            "StreamObject",
            self._reference_clone(self.__class__(), pdf_dest, False),
        )
        d__._data = self._data
        try:
            decoded_self = self.decoded_self
            if decoded_self is None:
                self.decoded_self = None
            else:
                self.decoded_self = cast(
                    "DecodedStreamObject", decoded_self.replicate(pdf_dest)
                )
        except Exception:
            pass
        for k, v in self.items():
            d__[k.replicate(pdf_dest)] = (
                v.replicate(pdf_dest) if hasattr(v, "replicate") else v
            )
        return d__

    def _clone(
        self,
        src: DictionaryObject,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool,
        ignore_fields: Optional[Sequence[Union[str, int]]],
        visited: set[tuple[int, int]],
    ) -> None:
        """
        Update the object from src.

        Args:
            src:
            pdf_dest:
            force_duplicate:
            ignore_fields:

        """
        self._data = cast("StreamObject", src)._data
        try:
            decoded_self = cast("StreamObject", src).decoded_self
            if decoded_self is None:
                self.decoded_self = None
            else:
                self.decoded_self = cast(
                    "DecodedStreamObject",
                    decoded_self.clone(pdf_dest, force_duplicate, ignore_fields),
                )
        except Exception:
            pass
        super()._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)

    def hash_bin(self) -> int:
        """
        Used to detect modified object.

        Returns:
            Hash considering type and value.

        """
        # Use _data to prevent errors on non-decoded streams.
        return hash((super().hash_bin(), self._data))

    def get_data(self) -> bytes:
        return self._data

    def set_data(self, data: bytes) -> None:
        self._data = data

    def hash_value_data(self) -> bytes:
        data = super().hash_value_data()
        data += self.get_data()
        return data

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        self[NameObject(SA.LENGTH)] = NumberObject(len(self._data))
        DictionaryObject.write_to_stream(self, stream)
        del self[SA.LENGTH]
        stream.write(b"\nstream\n")
        stream.write(self._data)
        stream.write(b"\nendstream")

    @staticmethod
    def initialize_from_dictionary(
        data: dict[str, Any]
    ) -> Union["EncodedStreamObject", "DecodedStreamObject"]:
        retval: Union[EncodedStreamObject, DecodedStreamObject]
        if SA.FILTER in data:
            retval = EncodedStreamObject()
        else:
            retval = DecodedStreamObject()
        retval._data = data["__streamdata__"]
        del data["__streamdata__"]
        if SA.LENGTH in data:
            del data[SA.LENGTH]
        retval.update(data)
        return retval

    def flate_encode(self, level: int = -1) -> "EncodedStreamObject":
        from ..filters import FlateDecode  # noqa: PLC0415

        if SA.FILTER in self:
            f = self[SA.FILTER]
            if isinstance(f, ArrayObject):
                f = ArrayObject([NameObject(FT.FLATE_DECODE), *f])
                try:
                    params = ArrayObject(
                        [NullObject(), *self.get(SA.DECODE_PARMS, ArrayObject())]
                    )
                except TypeError:
                    # case of error where the * operator is not working (not an array
                    params = ArrayObject(
                        [NullObject(), self.get(SA.DECODE_PARMS, ArrayObject())]
                    )
            else:
                f = ArrayObject([NameObject(FT.FLATE_DECODE), f])
                params = ArrayObject(
                    [NullObject(), self.get(SA.DECODE_PARMS, NullObject())]
                )
        else:
            f = NameObject(FT.FLATE_DECODE)
            params = None
        retval = EncodedStreamObject()
        retval.update(self)
        retval[NameObject(SA.FILTER)] = f
        if params is not None:
            retval[NameObject(SA.DECODE_PARMS)] = params
        retval._data = FlateDecode.encode(self._data, level)
        return retval

    def decode_as_image(self, pillow_parameters: Union[dict[str, Any], None] = None) -> Any:
        """
        Try to decode the stream object as an image

        Args:
            pillow_parameters: parameters provided to Pillow Image.save() method,
                cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>

        Returns:
            a PIL image if proper decoding has been found
        Raises:
            Exception: Errors during decoding will be reported.
                It is recommended to catch exceptions to prevent
                stops in your program.

        """
        from ._image_xobject import _xobj_to_image  # noqa: PLC0415

        if self.get("/Subtype", "") != "/Image":
            try:
                msg = f"{self.indirect_reference} does not seem to be an Image"  # pragma: no cover
            except AttributeError:
                msg = f"{self.__repr__()} object does not seem to be an Image"  # pragma: no cover
            logger_warning(msg, __name__)
        extension, _, img = _xobj_to_image(self, pillow_parameters)
        if extension is None:
            return None  # pragma: no cover
        return img


class DecodedStreamObject(StreamObject):
    pass


class EncodedStreamObject(StreamObject):
    def __init__(self) -> None:
        self.decoded_self: Optional[DecodedStreamObject] = None

    # This overrides the parent method
    def get_data(self) -> bytes:
        from ..filters import decode_stream_data  # noqa: PLC0415

        if self.decoded_self is not None:
            # Cached version of decoded object
            return self.decoded_self.get_data()

        # Create decoded object
        decoded = DecodedStreamObject()
        decoded.set_data(decode_stream_data(self))
        for key, value in self.items():
            if key not in (SA.LENGTH, SA.FILTER, SA.DECODE_PARMS):
                decoded[key] = value
        self.decoded_self = decoded
        return decoded.get_data()

    # This overrides the parent method:
    def set_data(self, data: bytes) -> None:
        from ..filters import FlateDecode  # noqa: PLC0415

        if self.get(SA.FILTER, "") in (FT.FLATE_DECODE, [FT.FLATE_DECODE]):
            if not isinstance(data, bytes):
                raise TypeError("Data must be bytes")
            if self.decoded_self is None:
                self.get_data()  # to create self.decoded_self
            assert self.decoded_self is not None, "mypy"
            self.decoded_self.set_data(data)
            super().set_data(FlateDecode.encode(data))
        else:
            raise PdfReadError(
                "Streams encoded with a filter different from FlateDecode are not supported"
            )


CONTENT_STREAM_ARRAY_MAX_LENGTH = 10_000


class ContentStream(DecodedStreamObject):
    """
    In order to be fast, this data structure can contain either:

    * raw data in ._data
    * parsed stream operations in ._operations.

    At any time, ContentStream object can either have both of those fields defined,
    or one field defined and the other set to None.

    These fields are "rebuilt" lazily, when accessed:

    * when .get_data() is called, if ._data is None, it is rebuilt from ._operations.
    * when .operations is called, if ._operations is None, it is rebuilt from ._data.

    Conversely, these fields can be invalidated:

    * when .set_data() is called, ._operations is set to None.
    * when .operations is set, ._data is set to None.
    """

    def __init__(
        self,
        stream: Any,
        pdf: Any,
        forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
    ) -> None:
        self.pdf = pdf
        self._operations: list[tuple[Any, bytes]] = []

        # stream may be a StreamObject or an ArrayObject containing
        # StreamObjects to be concatenated together.
        if stream is None:
            super().set_data(b"")
        else:
            stream = stream.get_object()
            if isinstance(stream, ArrayObject):
                from pypdf.filters import MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH  # noqa: PLC0415

                if (stream_length := len(stream)) > CONTENT_STREAM_ARRAY_MAX_LENGTH:
                    raise LimitReachedError(
                        f"Array-based stream has {stream_length} > {CONTENT_STREAM_ARRAY_MAX_LENGTH} elements."
                    )
                data = bytearray()
                length = 0
                for s in stream:
                    s_resolved = s.get_object()
                    if isinstance(s_resolved, NullObject):
                        continue
                    if not isinstance(s_resolved, StreamObject):
                        # No need to emit an exception here for now - the PDF structure
                        # seems to already be broken beforehand in these cases.
                        logger_warning(
                            f"Expected StreamObject, got {type(s_resolved).__name__} instead. Data might be wrong.",
                            __name__
                        )
                    else:
                        new_data = s_resolved.get_data()
                        length += len(new_data)
                        if length > MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH:
                            raise LimitReachedError(
                                f"Array-based stream has at least {length} > "
                                f"{MAX_ARRAY_BASED_STREAM_OUTPUT_LENGTH} output bytes."
                            )
                        data += new_data
                    if len(data) == 0 or data[-1] != b"\n":
                        # There should be no direct need to check for a change of one byte.
                        length += 1
                        data += b"\n"
                super().set_data(bytes(data))
            else:
                stream_data = stream.get_data()
                assert stream_data is not None
                super().set_data(stream_data)
        self.forced_encoding = forced_encoding

    def replicate(
        self,
        pdf_dest: PdfWriterProtocol,
    ) -> "ContentStream":
        d__ = cast(
            "ContentStream",
            self._reference_clone(self.__class__(None, None), pdf_dest, False),
        )
        d__._data = self._data
        try:
            decoded_self = self.decoded_self
            if decoded_self is None:
                self.decoded_self = None
            else:
                self.decoded_self = cast(
                    "DecodedStreamObject", decoded_self.replicate(pdf_dest)
                )
        except Exception:
            pass
        for k, v in self.items():
            d__[k.replicate(pdf_dest)] = (
                v.replicate(pdf_dest) if hasattr(v, "replicate") else v
            )
        return d__
        d__.set_data(self._data)
        d__.pdf = pdf_dest
        d__._operations = list(self._operations)
        d__.forced_encoding = self.forced_encoding
        return d__

    def clone(
        self,
        pdf_dest: Any,
        force_duplicate: bool = False,
        ignore_fields: Optional[Sequence[Union[str, int]]] = (),
    ) -> "ContentStream":
        """
        Clone object into pdf_dest.

        Args:
            pdf_dest:
            force_duplicate:
            ignore_fields:

        Returns:
            The cloned ContentStream

        """
        try:
            if self.indirect_reference.pdf == pdf_dest and not force_duplicate:  # type: ignore
                return self
        except Exception:
            pass

        visited: set[tuple[int, int]] = set()
        d__ = cast(
            "ContentStream",
            self._reference_clone(
                self.__class__(None, None), pdf_dest, force_duplicate
            ),
        )
        if ignore_fields is None:
            ignore_fields = []
        d__._clone(self, pdf_dest, force_duplicate, ignore_fields, visited)
        return d__

    def _clone(
        self,
        src: DictionaryObject,
        pdf_dest: PdfWriterProtocol,
        force_duplicate: bool,
        ignore_fields: Optional[Sequence[Union[str, int]]],
        visited: set[tuple[int, int]],
    ) -> None:
        """
        Update the object from src.

        Args:
            src:
            pdf_dest:
            force_duplicate:
            ignore_fields:

        """
        src_cs = cast("ContentStream", src)
        super().set_data(src_cs._data)
        self.pdf = pdf_dest
        self._operations = list(src_cs._operations)
        self.forced_encoding = src_cs.forced_encoding
        # no need to call DictionaryObjection or anything
        # like super(DictionaryObject,self)._clone(src, pdf_dest, force_duplicate, ignore_fields, visited)

    def _parse_content_stream(self, stream: StreamType) -> None:
        # 7.8.2 Content Streams
        stream.seek(0, 0)
        operands: list[Union[int, str, PdfObject]] = []
        while True:
            peek = read_non_whitespace(stream)
            if peek in (b"", 0):
                break
            stream.seek(-1, 1)
            if peek.isalpha() or peek in (b"'", b'"'):
                operator = read_until_regex(stream, NameObject.delimiter_pattern)
                if operator == b"BI":
                    # begin inline image - a completely different parsing
                    # mechanism is required, of course... thanks buddy...
                    assert operands == []
                    ii = self._read_inline_image(stream)
                    self._operations.append((ii, b"INLINE IMAGE"))
                else:
                    self._operations.append((operands, operator))
                    operands = []
            elif peek == b"%":
                # If we encounter a comment in the content stream, we have to
                # handle it here. Typically, read_object will handle
                # encountering a comment -- but read_object assumes that
                # following the comment must be the object we're trying to
                # read. In this case, it could be an operator instead.
                while peek not in (b"\r", b"\n", b""):
                    peek = stream.read(1)
            else:
                operands.append(read_object(stream, None, self.forced_encoding))

    def _read_inline_image(self, stream: StreamType) -> dict[str, Any]:
        # begin reading just after the "BI" - begin image
        # first read the dictionary of settings.
        settings = DictionaryObject()
        while True:
            tok = read_non_whitespace(stream)
            stream.seek(-1, 1)
            if tok == b"I":
                # "ID" - begin of image data
                break
            key = read_object(stream, self.pdf)
            tok = read_non_whitespace(stream)
            stream.seek(-1, 1)
            value = read_object(stream, self.pdf)
            settings[key] = value
        # left at beginning of ID
        tmp = stream.read(3)
        assert tmp[:2] == b"ID"
        filtr = settings.get("/F", settings.get("/Filter", "not set"))
        savpos = stream.tell()
        if isinstance(filtr, list):
            filtr = filtr[0]  # used forencoding
        if "AHx" in filtr or "ASCIIHexDecode" in filtr:
            data = extract_inline__ascii_hex_decode(stream)
        elif "A85" in filtr or "ASCII85Decode" in filtr:
            data = extract_inline__ascii85_decode(stream)
        elif "RL" in filtr or "RunLengthDecode" in filtr:
            data = extract_inline__run_length_decode(stream)
        elif "DCT" in filtr or "DCTDecode" in filtr:
            data = extract_inline__dct_decode(stream)
        elif filtr == "not set":
            cs = settings.get("/CS", "")
            if isinstance(cs, list):
                cs = cs[0]
            if "RGB" in cs:
                lcs = 3
            elif "CMYK" in cs:
                lcs = 4
            else:
                bits = settings.get(
                    "/BPC",
                    8 if cs in {"/I", "/G", "/Indexed", "/DeviceGray"} else -1,
                )
                if bits > 0:
                    lcs = bits / 8.0
                else:
                    data = extract_inline_default(stream)
                    lcs = -1
            if lcs > 0:
                data = stream.read(
                    ceil(cast(int, settings["/W"]) * lcs) * cast(int, settings["/H"])
                )
            # Move to the `EI` if possible.
            ei = read_non_whitespace(stream)
            stream.seek(-1, 1)
        else:
            data = extract_inline_default(stream)

        ei = stream.read(3)
        stream.seek(-1, 1)
        if ei[:2] != b"EI" or ei[2:3] not in WHITESPACES:
            # Deal with wrong/missing `EI` tags. Example: Wrong dimensions specified above.
            stream.seek(savpos, 0)
            data = extract_inline_default(stream)
            ei = stream.read(3)
            stream.seek(-1, 1)
            if ei[:2] != b"EI" or ei[2:3] not in WHITESPACES:  # pragma: no cover
                # Check the same condition again. This should never fail as
                # edge cases are covered by `extract_inline_default` above,
                # but check this ot make sure that we are behind the `EI` afterwards.
                raise PdfStreamError(
                    f"Could not extract inline image, even using fallback. Expected 'EI', got {ei!r}"
                )
        return {"settings": settings, "data": data}

    # This overrides the parent method
    def get_data(self) -> bytes:
        if not self._data:
            new_data = BytesIO()
            for operands, operator in self._operations:
                if operator == b"INLINE IMAGE":
                    new_data.write(b"BI")
                    dict_text = BytesIO()
                    operands["settings"].write_to_stream(dict_text)
                    new_data.write(dict_text.getvalue()[2:-2])
                    new_data.write(b"ID ")
                    new_data.write(operands["data"])
                    new_data.write(b"EI")
                else:
                    for op in operands:
                        op.write_to_stream(new_data)
                        new_data.write(b" ")
                    new_data.write(operator)
                new_data.write(b"\n")
            self._data = new_data.getvalue()
        return self._data

    # This overrides the parent method
    def set_data(self, data: bytes) -> None:
        super().set_data(data)
        self._operations = []

    @property
    def operations(self) -> list[tuple[Any, bytes]]:
        if not self._operations and self._data:
            self._parse_content_stream(BytesIO(self._data))
            self._data = b""
        return self._operations

    @operations.setter
    def operations(self, operations: list[tuple[Any, bytes]]) -> None:
        self._operations = operations
        self._data = b""

    def isolate_graphics_state(self) -> None:
        if self._operations:
            self._operations.insert(0, ([], b"q"))
            self._operations.append(([], b"Q"))
        elif self._data:
            self._data = b"q\n" + self._data + b"\nQ\n"

    # This overrides the parent method
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if not self._data and self._operations:
            self.get_data()  # this ensures ._data is rebuilt
        super().write_to_stream(stream, encryption_key)


def read_object(
    stream: StreamType,
    pdf: Optional[PdfReaderProtocol],
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union[PdfObject, int, str, ContentStream]:
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    if tok == b"/":
        return NameObject.read_from_stream(stream, pdf)
    if tok == b"<":
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start
        if peek == b"<<":
            return DictionaryObject.read_from_stream(stream, pdf, forced_encoding)
        return read_hex_string_from_stream(stream, forced_encoding)
    if tok == b"[":
        return ArrayObject.read_from_stream(stream, pdf, forced_encoding)
    if tok in (b"t", b"f"):
        return BooleanObject.read_from_stream(stream)
    if tok == b"(":
        return read_string_from_stream(stream, forced_encoding)
    if tok == b"e" and stream.read(6) == b"endobj":
        return NullObject()
    if tok == b"n":
        return NullObject.read_from_stream(stream)
    if tok == b"%":
        # comment
        skip_over_comment(stream)
        tok = read_non_whitespace(stream)
        stream.seek(-1, 1)
        return read_object(stream, pdf, forced_encoding)
    if tok in b"0123456789+-.":
        # number object OR indirect reference
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if IndirectPattern.match(peek) is not None:
            assert pdf is not None, "mypy"
            return IndirectObject.read_from_stream(stream, pdf)
        return NumberObject.read_from_stream(stream)
    pos = stream.tell()
    stream.seek(-20, 1)
    stream_extract = stream.read(80)
    stream.seek(pos)
    read_until_whitespace(stream)
    raise PdfReadError(
        f"Invalid Elementary Object starting with {tok!r} @{pos}: {stream_extract!r}"
    )


class Field(TreeObject):
    """
    A class representing a field dictionary.

    This class is accessed through
    :meth:`get_fields()<pypdf.PdfReader.get_fields>`
    """

    def __init__(self, data: DictionaryObject) -> None:
        DictionaryObject.__init__(self)
        field_attributes = (
            FieldDictionaryAttributes.attributes()
            + CheckboxRadioButtonAttributes.attributes()
        )
        self.indirect_reference = data.indirect_reference
        for attr in field_attributes:
            try:
                self[NameObject(attr)] = data[attr]
            except KeyError:
                pass
        if isinstance(self.get("/V"), EncodedStreamObject):
            d = cast(EncodedStreamObject, self[NameObject("/V")]).get_data()
            if isinstance(d, bytes):
                d_str = d.decode()
            elif d is None:
                d_str = ""
            else:
                raise Exception("Should never happen")
            self[NameObject("/V")] = TextStringObject(d_str)

    # TABLE 8.69 Entries common to all field dictionaries
    @property
    def field_type(self) -> Optional[NameObject]:
        """Read-only property accessing the type of this field."""
        return self.get(FieldDictionaryAttributes.FT)

    @property
    def parent(self) -> Optional[DictionaryObject]:
        """Read-only property accessing the parent of this field."""
        return self.get(FieldDictionaryAttributes.Parent)

    @property
    def kids(self) -> Optional["ArrayObject"]:
        """Read-only property accessing the kids of this field."""
        return self.get(FieldDictionaryAttributes.Kids)

    @property
    def name(self) -> Optional[str]:
        """Read-only property accessing the name of this field."""
        return self.get(FieldDictionaryAttributes.T)

    @property
    def alternate_name(self) -> Optional[str]:
        """Read-only property accessing the alternate name of this field."""
        return self.get(FieldDictionaryAttributes.TU)

    @property
    def mapping_name(self) -> Optional[str]:
        """
        Read-only property accessing the mapping name of this field.

        This name is used by pypdf as a key in the dictionary returned by
        :meth:`get_fields()<pypdf.PdfReader.get_fields>`
        """
        return self.get(FieldDictionaryAttributes.TM)

    @property
    def flags(self) -> Optional[int]:
        """
        Read-only property accessing the field flags, specifying various
        characteristics of the field (see Table 8.70 of the PDF 1.7 reference).
        """
        return self.get(FieldDictionaryAttributes.Ff)

    @property
    def value(self) -> Optional[Any]:
        """
        Read-only property accessing the value of this field.

        Format varies based on field type.
        """
        return self.get(FieldDictionaryAttributes.V)

    @property
    def default_value(self) -> Optional[Any]:
        """Read-only property accessing the default value of this field."""
        return self.get(FieldDictionaryAttributes.DV)

    @property
    def additional_actions(self) -> Optional[DictionaryObject]:
        """
        Read-only property accessing the additional actions dictionary.

        This dictionary defines the field's behavior in response to trigger
        events. See Section 8.5.2 of the PDF 1.7 reference.
        """
        return self.get(FieldDictionaryAttributes.AA)


class Destination(TreeObject):
    """
    A class representing a destination within a PDF file.

    See section 12.3.2 of the PDF 2.0 reference.

    Args:
        title: Title of this destination.
        page: Reference to the page of this destination. Should
            be an instance of :class:`IndirectObject<pypdf.generic.IndirectObject>`.
        fit: How the destination is displayed.

    Raises:
        PdfReadError: If destination type is invalid.

    """

    node: Optional[
        DictionaryObject
    ] = None  # node provide access to the original Object

    def __init__(
        self,
        title: Union[str, bytes],
        page: Union[NumberObject, IndirectObject, NullObject, DictionaryObject],
        fit: Fit,
    ) -> None:
        self._filtered_children: list[Any] = []  # used in PdfWriter

        typ = fit.fit_type
        args = fit.fit_args

        DictionaryObject.__init__(self)
        self[NameObject("/Title")] = TextStringObject(title)
        self[NameObject("/Page")] = page
        self[NameObject("/Type")] = typ

        # from table 8.2 of the PDF 1.7 reference.
        if typ == "/XYZ":
            if len(args) < 1:  # left is missing : should never occur
                args.append(NumberObject(0.0))
            if len(args) < 2:  # top is missing
                args.append(NumberObject(0.0))
            if len(args) < 3:  # zoom is missing
                args.append(NumberObject(0.0))
            (
                self[NameObject(TA.LEFT)],
                self[NameObject(TA.TOP)],
                self[NameObject("/Zoom")],
            ) = args
        elif len(args) == 0:
            pass
        elif typ == TF.FIT_R:
            (
                self[NameObject(TA.LEFT)],
                self[NameObject(TA.BOTTOM)],
                self[NameObject(TA.RIGHT)],
                self[NameObject(TA.TOP)],
            ) = args
        elif typ in [TF.FIT_H, TF.FIT_BH]:
            try:  # Prefer to be more robust not only to null parameters
                (self[NameObject(TA.TOP)],) = args
            except Exception:
                (self[NameObject(TA.TOP)],) = (NullObject(),)
        elif typ in [TF.FIT_V, TF.FIT_BV]:
            try:  # Prefer to be more robust not only to null parameters
                (self[NameObject(TA.LEFT)],) = args
            except Exception:
                (self[NameObject(TA.LEFT)],) = (NullObject(),)
        elif typ in [TF.FIT, TF.FIT_B]:
            pass
        else:
            raise PdfReadError(f"Unknown Destination Type: {typ!r}")

    @property
    def dest_array(self) -> "ArrayObject":
        return ArrayObject(
            [self.raw_get("/Page"), self["/Type"]]
            + [
                self[x]
                for x in ["/Left", "/Bottom", "/Right", "/Top", "/Zoom"]
                if x in self
            ]
        )

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<<\n")
        key = NameObject("/D")
        key.write_to_stream(stream)
        stream.write(b" ")
        value = self.dest_array
        value.write_to_stream(stream)

        key = NameObject("/S")
        key.write_to_stream(stream)
        stream.write(b" ")
        value_s = NameObject("/GoTo")
        value_s.write_to_stream(stream)

        stream.write(b"\n")
        stream.write(b">>")

    @property
    def title(self) -> Optional[str]:
        """Read-only property accessing the destination title."""
        return self.get("/Title")

    @property
    def page(self) -> Optional[IndirectObject]:
        """Read-only property accessing the IndirectObject of the destination page."""
        return self.get("/Page")

    @property
    def typ(self) -> Optional[str]:
        """Read-only property accessing the destination type."""
        return self.get("/Type")

    @property
    def zoom(self) -> Optional[int]:
        """Read-only property accessing the zoom factor."""
        return self.get("/Zoom", None)

    @property
    def left(self) -> Optional[FloatObject]:
        """Read-only property accessing the left horizontal coordinate."""
        return self.get("/Left", None)

    @property
    def right(self) -> Optional[FloatObject]:
        """Read-only property accessing the right horizontal coordinate."""
        return self.get("/Right", None)

    @property
    def top(self) -> Optional[FloatObject]:
        """Read-only property accessing the top vertical coordinate."""
        return self.get("/Top", None)

    @property
    def bottom(self) -> Optional[FloatObject]:
        """Read-only property accessing the bottom vertical coordinate."""
        return self.get("/Bottom", None)

    @property
    def color(self) -> Optional["ArrayObject"]:
        """Read-only property accessing the color in (R, G, B) with values 0.0-1.0."""
        return self.get(
            "/C", ArrayObject([FloatObject(0), FloatObject(0), FloatObject(0)])
        )

    @property
    def font_format(self) -> Optional[OutlineFontFlag]:
        """
        Read-only property accessing the font type.

        1=italic, 2=bold, 3=both
        """
        return self.get("/F", 0)

    @property
    def outline_count(self) -> Optional[int]:
        """
        Read-only property accessing the outline count.

        positive = expanded
        negative = collapsed
        absolute value = number of visible descendants at all levels
        """
        return self.get("/Count", None)


================================================
FILE: pypdf/generic/_files.py
================================================
from __future__ import annotations

import bisect
from functools import cached_property
from typing import TYPE_CHECKING, cast

from pypdf._utils import format_iso8824_date, parse_iso8824_date
from pypdf.constants import CatalogAttributes as CA
from pypdf.constants import FileSpecificationDictionaryEntries
from pypdf.constants import PageAttributes as PG
from pypdf.errors import PdfReadError, PyPdfError
from pypdf.generic import (
    ArrayObject,
    ByteStringObject,
    DecodedStreamObject,
    DictionaryObject,
    NameObject,
    NullObject,
    NumberObject,
    StreamObject,
    TextStringObject,
    is_null_or_none,
)

if TYPE_CHECKING:
    import datetime
    from collections.abc import Generator

    from pypdf._writer import PdfWriter


class EmbeddedFile:
    """
    Container holding the information on an embedded file.

    Attributes are evaluated lazily if possible.

    Further information on embedded files can be found in section 7.11 of the PDF 2.0 specification.
    """
    def __init__(self, name: str, pdf_object: DictionaryObject, parent: ArrayObject | None = None) -> None:
        """
        Args:
            name: The (primary) name as provided in the name tree.
            pdf_object: The corresponding PDF object to allow retrieving further data.
            parent: The parent list.
        """
        self._name = name
        self.pdf_object = pdf_object
        self._parent = parent

    @property
    def name(self) -> str:
        """The (primary) name of the embedded file as provided in the name tree."""
        return self._name

    @classmethod
    def _create_new(cls, writer: PdfWriter, name: str, content: str | bytes) -> EmbeddedFile:
        """
        Create a new embedded file and add it to the PdfWriter.

        Args:
            writer: The PdfWriter instance to add the embedded file to.
            name: The filename to display.
            content: The data in the file.

        Returns:
            EmbeddedFile instance for the newly created embedded file.
        """
        # Convert string content to bytes if needed
        if isinstance(content, str):
            content = content.encode("latin-1")

        # Create the file entry (the actual embedded file stream)
        file_entry = DecodedStreamObject()
        file_entry.set_data(content)
        file_entry.update({NameObject(PG.TYPE): NameObject("/EmbeddedFile")})

        # Create the /EF entry
        ef_entry = DictionaryObject()
        ef_entry.update({NameObject("/F"): writer._add_object(file_entry)})

        # Create the filespec dictionary
        from pypdf.generic import create_string_object  # noqa: PLC0415
        filespec = DictionaryObject()
        filespec_reference = writer._add_object(filespec)
        name_object = cast(TextStringObject, create_string_object(name))
        filespec.update(
            {
                NameObject(PG.TYPE): NameObject("/Filespec"),
                NameObject(FileSpecificationDictionaryEntries.F): name_object,
                NameObject(FileSpecificationDictionaryEntries.EF): ef_entry,
            }
        )

        # Add the name and filespec to the names array.
        # We use the inverse order for insertion, as this allows us to re-use the
        # same index.
        names_array = cls._get_names_array(writer)
        insertion_index = cls._get_insertion_index(names_array, name_object)
        names_array.insert(insertion_index, filespec_reference)
        names_array.insert(insertion_index, name_object)

        # Return an EmbeddedFile instance
        return cls(name=name, pdf_object=filespec, parent=names_array)

    @classmethod
    def _get_names_array(cls, writer: PdfWriter) -> ArrayObject:
        """Get the names array for embedded files, possibly creating and flattening it."""
        if CA.NAMES not in writer.root_object:
            # Add the /Names entry to the catalog.
            writer.root_object[NameObject(CA.NAMES)] = writer._add_object(DictionaryObject())

        names_dict = cast(DictionaryObject, writer.root_object[CA.NAMES])
        if "/EmbeddedFiles" not in names_dict:
            # We do not yet have an entry for embedded files. Create and return it.
            names = ArrayObject()
            embedded_files_names_dictionary = DictionaryObject(
                {NameObject(CA.NAMES): names}
            )
            names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
            return names

        # We have an existing embedded files entry.
        embedded_files_names_tree = cast(DictionaryObject, names_dict["/EmbeddedFiles"])
        if "/Names" in embedded_files_names_tree:
            # Simple case: We already have a flat list.
            return cast(ArrayObject, embedded_files_names_tree[NameObject(CA.NAMES)])
        if "/Kids" not in embedded_files_names_tree:
            # Invalid case: This is no name tree.
            raise PdfReadError("Got neither Names nor Kids in embedded files tree.")

        # Complex case: Convert a /Kids-based name tree to a /Names-based one.
        # /Name-based ones are much easier to handle and allow us to simplify the
        # actual insertion logic by only having to consider one case.
        names = ArrayObject()
        kids = cast(ArrayObject, embedded_files_names_tree["/Kids"].get_object())
        embedded_files_names_dictionary = DictionaryObject(
            {NameObject(CA.NAMES): names}
        )
        names_dict[NameObject("/EmbeddedFiles")] = writer._add_object(embedded_files_names_dictionary)
        for kid in kids:
            # Write the flattened file entries. As we do not change the actual files,
            # this should not have any impact on references to them.
            # There might be further (nested) kids here.
            # Wait for an example before evaluating an implementation.
            for name in kid.get_object().get("/Names", []):
                names.append(name)
        return names

    @classmethod
    def _get_insertion_index(cls, names_array: ArrayObject, name: str) -> int:
        keys = [names_array[i].encode("utf-8") for i in range(0, len(names_array), 2)]
        name_bytes = name.encode("utf-8")

        start = bisect.bisect_left(keys, name_bytes)
        end = bisect.bisect_right(keys, name_bytes)

        if start != end:
            return end * 2
        if start == 0:
            return 0
        if start == (key_count := len(keys)):
            return key_count * 2
        return end * 2

    @property
    def alternative_name(self) -> str | None:
        """Retrieve the alternative name (file specification)."""
        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
            # PDF 2.0 reference, table 43:
            #   > A PDF reader shall use the value of the UF key, when present, instead of the F key.
            if key in self.pdf_object:
                value = self.pdf_object[key].get_object()
                if not is_null_or_none(value):
                    return cast(str, value)
        return None

    @alternative_name.setter
    def alternative_name(self, value: TextStringObject | None) -> None:
        """Set the alternative name (file specification)."""
        if value is None:
            if FileSpecificationDictionaryEntries.UF in self.pdf_object:
                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = NullObject()
            if FileSpecificationDictionaryEntries.F in self.pdf_object:
                self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = NullObject()
        else:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.UF)] = value
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.F)] = value

    @property
    def description(self) -> str | None:
        """Retrieve the description."""
        value = self.pdf_object.get(FileSpecificationDictionaryEntries.DESC)
        if is_null_or_none(value):
            return None
        return value

    @description.setter
    def description(self, value: TextStringObject | None) -> None:
        """Set the description."""
        if value is None:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = NullObject()
        else:
            self.pdf_object[NameObject(FileSpecificationDictionaryEntries.DESC)] = value

    @property
    def associated_file_relationship(self) -> str:
        """Retrieve the relationship of the referring document to this embedded file."""
        return self.pdf_object.get("/AFRelationship", "/Unspecified")

    @associated_file_relationship.setter
    def associated_file_relationship(self, value: NameObject) -> None:
        """Set the relationship of the referring document to this embedded file."""
        self.pdf_object[NameObject("/AFRelationship")] = value

    @property
    def _embedded_file(self) -> StreamObject:
        """Retrieve the actual embedded file stream."""
        if "/EF" not in self.pdf_object:
            raise PdfReadError(f"/EF entry not found: {self.pdf_object}")
        ef = cast(DictionaryObject, self.pdf_object["/EF"])
        for key in [FileSpecificationDictionaryEntries.UF, FileSpecificationDictionaryEntries.F]:
            if key in ef:
                return cast(StreamObject, ef[key].get_object())
        raise PdfReadError(f"No /(U)F key found in file dictionary: {ef}")

    @property
    def _params(self) -> DictionaryObject:
        """Retrieve the file-specific parameters."""
        return self._embedded_file.get("/Params", DictionaryObject()).get_object()

    @cached_property
    def _ensure_params(self) -> DictionaryObject:
        """Ensure the /Params dictionary exists and return it."""
        embedded_file = self._embedded_file
        if "/Params" not in embedded_file:
            embedded_file[NameObject("/Params")] = DictionaryObject()
        return cast(DictionaryObject, embedded_file["/Params"])

    @property
    def subtype(self) -> str | None:
        """Retrieve the subtype. This is a MIME media type, prefixed by a slash."""
        value = self._embedded_file.get("/Subtype")
        if is_null_or_none(value):
            return None
        return value

    @subtype.setter
    def subtype(self, value: NameObject | None) -> None:
        """Set the subtype. This should be a MIME media type, prefixed by a slash."""
        embedded_file = self._embedded_file
        if value is None:
            embedded_file[NameObject("/Subtype")] = NullObject()
        else:
            embedded_file[NameObject("/Subtype")] = value

    @property
    def content(self) -> bytes:
        """Retrieve the actual file content."""
        return self._embedded_file.get_data()

    @content.setter
    def content(self, value: str | bytes) -> None:
        """Set the file content."""
        if isinstance(value, str):
            value = value.encode("latin-1")
        self._embedded_file.set_data(value)

    @property
    def size(self) -> int | None:
        """Retrieve the size of the uncompressed file in bytes."""
        value = self._params.get("/Size")
        if is_null_or_none(value):
            return None
        return value

    @size.setter
    def size(self, value: NumberObject | None) -> None:
        """Set the size of the uncompressed file in bytes."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/Size")] = NullObject()
        else:
            params[NameObject("/Size")] = value

    @property
    def creation_date(self) -> datetime.datetime | None:
        """Retrieve the file creation datetime."""
        return parse_iso8824_date(self._params.get("/CreationDate"))

    @creation_date.setter
    def creation_date(self, value: datetime.datetime | None) -> None:
        """Set the file creation datetime."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/CreationDate")] = NullObject()
        else:
            date_str = format_iso8824_date(value)
            params[NameObject("/CreationDate")] = TextStringObject(date_str)

    @property
    def modification_date(self) -> datetime.datetime | None:
        """Retrieve the datetime of the last file modification."""
        return parse_iso8824_date(self._params.get("/ModDate"))

    @modification_date.setter
    def modification_date(self, value: datetime.datetime | None) -> None:
        """Set the datetime of the last file modification."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/ModDate")] = NullObject()
        else:
            date_str = format_iso8824_date(value)
            params[NameObject("/ModDate")] = TextStringObject(date_str)

    @property
    def checksum(self) -> bytes | None:
        """Retrieve the MD5 checksum of the (uncompressed) file."""
        value = self._params.get("/CheckSum")
        if is_null_or_none(value):
            return None
        return value

    @checksum.setter
    def checksum(self, value: ByteStringObject | None) -> None:
        """Set the MD5 checksum of the (uncompressed) file."""
        params = self._ensure_params
        if value is None:
            params[NameObject("/CheckSum")] = NullObject()
        else:
            params[NameObject("/CheckSum")] = value

    def delete(self) -> None:
        """Delete the file from the document."""
        if not self._parent:
            raise PyPdfError("Parent required to delete file from document.")
        if self.pdf_object in self._parent:
            index = self._parent.index(self.pdf_object)
        elif (
                (indirect_reference := getattr(self.pdf_object, "indirect_reference", None)) is not None
                and indirect_reference in self._parent
        ):
            index = self._parent.index(indirect_reference)
        else:
            raise PyPdfError("File not found in parent object.")
        self._parent.pop(index)  # Reference.
        self._parent.pop(index - 1)  # Name.
        self.pdf_object = DictionaryObject()  # Invalidate.

    def __repr__(self) -> str:
        return f"<{self.__class__.__name__} name={self.name!r}>"

    @classmethod
    def _load_from_names(cls, names: ArrayObject) -> Generator[EmbeddedFile]:
        """
        Convert the given name tree into class instances.

        Args:
            names: The name tree to load the data from.

        Returns:
            Iterable of class instances for the files found.
        """
        # This is a name tree of the format [name_1, reference_1, name_2, reference_2, ...]
        for i, name in enumerate(names):
            if not isinstance(name, str):
                # Skip plain strings and retrieve them as `direct_name` by index.
                file_dictionary = name.get_object()
                direct_name = names[i - 1].get_object()
                yield EmbeddedFile(name=direct_name, pdf_object=file_dictionary, parent=names)

    @classmethod
    def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]:
        """
        Load the embedded files for the given document catalog.

        This method and its signature are considered internal API and thus not exposed publicly for now.

        Args:
            catalog: The document catalog to load from.

        Returns:
            Iterable of class instances for the files found.
        """
        try:
            container = cast(
                DictionaryObject,
                cast(DictionaryObject, catalog["/Names"])["/EmbeddedFiles"],
            )
        except KeyError:
            return

        if "/Kids" in container:
            for kid in cast(ArrayObject, container["/Kids"].get_object()):
                # There might be further (nested) kids here.
                # Wait for an example before evaluating an implementation.
                kid = kid.get_object()
                if "/Names" in kid:
                    yield from cls._load_from_names(cast(ArrayObject, kid["/Names"]))
        if "/Names" in container:
            yield from cls._load_from_names(cast(ArrayObject, container["/Names"]))


================================================
FILE: pypdf/generic/_fit.py
================================================
from typing import Any, Optional, Union

from ._base import is_null_or_none


class Fit:
    def __init__(
        self, fit_type: str, fit_args: tuple[Union[None, float, Any], ...] = ()
    ) -> None:
        from ._base import FloatObject, NameObject, NullObject, NumberObject  # noqa: PLC0415

        self.fit_type = NameObject(fit_type)
        self.fit_args: list[Union[NullObject, FloatObject, NumberObject]] = [
            NullObject() if is_null_or_none(a) else FloatObject(a) for a in fit_args
        ]

    @classmethod
    def xyz(
        cls,
        left: Optional[float] = None,
        top: Optional[float] = None,
        zoom: Optional[float] = None,
    ) -> "Fit":
        """
        Display the page designated by page, with the coordinates (left, top)
        positioned at the upper-left corner of the window and the contents
        of the page magnified by the factor zoom.

        A null value for any of the parameters left, top, or zoom specifies
        that the current value of that parameter is to be retained unchanged.

        A zoom value of 0 has the same meaning as a null value.

        Args:
            left:
            top:
            zoom:

        Returns:
            The created fit object.

        """
        return Fit(fit_type="/XYZ", fit_args=(left, top, zoom))

    @classmethod
    def fit(cls) -> "Fit":
        """
        Display the page designated by page, with its contents magnified just
        enough to fit the entire page within the window both horizontally and
        vertically.

        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the page within the
        window in the other dimension.
        """
        return Fit(fit_type="/Fit")

    @classmethod
    def fit_horizontally(cls, top: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the vertical coordinate top
        positioned at the top edge of the window and the contents of the page
        magnified just enough to fit the entire width of the page within the
        window.

        A null value for ``top`` specifies that the current value of that
        parameter is to be retained unchanged.

        Args:
            top:

        Returns:
            The created fit object.

        """
        return Fit(fit_type="/FitH", fit_args=(top,))

    @classmethod
    def fit_vertically(cls, left: Optional[float] = None) -> "Fit":
        return Fit(fit_type="/FitV", fit_args=(left,))

    @classmethod
    def fit_rectangle(
        cls,
        left: Optional[float] = None,
        bottom: Optional[float] = None,
        right: Optional[float] = None,
        top: Optional[float] = None,
    ) -> "Fit":
        """
        Display the page designated by page, with its contents magnified
        just enough to fit the rectangle specified by the coordinates
        left, bottom, right, and top entirely within the window
        both horizontally and vertically.

        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the rectangle within
        the window in the other dimension.

        A null value for any of the parameters may result in unpredictable
        behavior.

        Args:
            left:
            bottom:
            right:
            top:

        Returns:
            The created fit object.

        """
        return Fit(fit_type="/FitR", fit_args=(left, bottom, right, top))

    @classmethod
    def fit_box(cls) -> "Fit":
        """
        Display the page designated by page, with its contents magnified just
        enough to fit its bounding box entirely within the window both
        horizontally and vertically.

        If the required horizontal and vertical magnification factors are
        different, use the smaller of the two, centering the bounding box
        within the window in the other dimension.
        """
        return Fit(fit_type="/FitB")

    @classmethod
    def fit_box_horizontally(cls, top: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the vertical coordinate top
        positioned at the top edge of the window and the contents of the page
        magnified just enough to fit the entire width of its bounding box
        within the window.

        A null value for top specifies that the current value of that parameter
        is to be retained unchanged.

        Args:
            top:

        Returns:
            The created fit object.

        """
        return Fit(fit_type="/FitBH", fit_args=(top,))

    @classmethod
    def fit_box_vertically(cls, left: Optional[float] = None) -> "Fit":
        """
        Display the page designated by page, with the horizontal coordinate
        left positioned at the left edge of the window and the contents of the
        page magnified just enough to fit the entire height of its bounding box
        within the window.

        A null value for left specifies that the current value of that
        parameter is to be retained unchanged.

        Args:
            left:

        Returns:
            The created fit object.

        """
        return Fit(fit_type="/FitBV", fit_args=(left,))

    def __str__(self) -> str:
        if not self.fit_args:
            return f"Fit({self.fit_type})"
        return f"Fit({self.fit_type}, {self.fit_args})"


DEFAULT_FIT = Fit.fit()


================================================
FILE: pypdf/generic/_image_inline.py
================================================
# Copyright (c) 2024, pypdf contributors
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

import logging
from io import BytesIO
from typing import IO

from .._utils import (
    WHITESPACES,
    WHITESPACES_AS_BYTES,
    StreamType,
    logger_warning,
    read_non_whitespace,
)
from ..errors import PdfReadError

logger = logging.getLogger(__name__)

# An inline image should be used only for small images (4096 bytes or less),
# but allow twice this for cases where this has been exceeded.
BUFFER_SIZE = 8192


def _check_end_image_marker(stream: StreamType) -> bool:
    ei_tok = read_non_whitespace(stream)
    ei_tok += stream.read(2)
    stream.seek(-3, 1)
    return ei_tok[:2] == b"EI" and (ei_tok[2:3] == b"" or ei_tok[2:3] in WHITESPACES)


def extract_inline__ascii_hex_decode(stream: StreamType) -> bytes:
    """
    Extract HexEncoded stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter > and EI as backup.
    while True:
        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b">")
        if pos_tok >= 0:  # found >
            data_out += data_buffered[: pos_tok + 1]
            stream.seek(-len(data_buffered) + pos_tok + 1, 1)
            break
        pos_ei = data_buffered.find(b"EI")
        if pos_ei >= 0:  # found EI
            stream.seek(-len(data_buffered) + pos_ei - 1, 1)
            c = stream.read(1)
            while c in WHITESPACES:
                stream.seek(-2, 1)
                c = stream.read(1)
                pos_ei -= 1
            data_out += data_buffered[:pos_ei]
            break
        if len(data_buffered) == 2:
            data_out += data_buffered
            raise PdfReadError("Unexpected end of stream")
        # Neither > nor EI found
        data_out += data_buffered[:-2]
        stream.seek(-2, 1)

    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out


def extract_inline__ascii85_decode(stream: StreamType) -> bytes:
    """
    Extract A85 stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter ~>
    while True:
        data_buffered = read_non_whitespace(stream) + stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b"~>")
        if pos_tok >= 0:  # found!
            data_out += data_buffered[: pos_tok + 2]
            stream.seek(-len(data_buffered) + pos_tok + 2, 1)
            break
        if len(data_buffered) == 2:  # end of buffer
            data_out += data_buffered
            raise PdfReadError("Unexpected end of stream")
        data_out += data_buffered[
            :-2
        ]  # back by one char in case of in the middle of ~>
        stream.seek(-2, 1)

    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out


def extract_inline__run_length_decode(stream: StreamType) -> bytes:
    """
    Extract RL (RunLengthDecode) stream from inline image.
    The stream will be moved onto the EI.
    """
    data_out: bytes = b""
    # Read data until delimiter 128
    while True:
        data_buffered = stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_tok = data_buffered.find(b"\x80")
        if pos_tok >= 0:  # found
            # Ideally, we could just use plain run-length decoding here, where 80_16 = 128_10
            # marks the EOD. But there apparently are cases like in issue #3517, where we have
            # an inline image with up to 51 EOD markers. In these cases, be resilient here and
            # use the default `EI` marker detection instead. Please note that this fallback
            # still omits special `EI` handling within the stream, but for now assume that having
            # both of these cases occur at the same time is very unlikely (and the image stream
            # is broken anyway).
            # For now, do not skip over more than one whitespace character.
            after_token = data_buffered[pos_tok + 1 : pos_tok + 4]
            if after_token.startswith(b"EI") or after_token.endswith(b"EI"):
                data_out += data_buffered[: pos_tok + 1]
                stream.seek(-len(data_buffered) + pos_tok + 1, 1)
            else:
                logger_warning("Early EOD in RunLengthDecode of inline image, using fallback.", __name__)
                ei_marker = data_buffered.find(b"EI")
                if ei_marker > 0:
                    data_out += data_buffered[: ei_marker]
                    stream.seek(-len(data_buffered) + ei_marker - 1, 1)
            break
        data_out += data_buffered

    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out


def extract_inline__dct_decode(stream: StreamType) -> bytes:
    """
    Extract DCT (JPEG) stream from inline image.
    The stream will be moved onto the EI.
    """
    def read(length: int) -> bytes:
        # If 0 bytes are returned, and *size* was not 0, this indicates end of file.
        # If the object is in non-blocking mode and no bytes are available, `None` is returned.
        _result = stream.read(length)
        if _result is None or len(_result) != length:
            raise PdfReadError("Unexpected end of stream")
        return _result

    data_out: bytes = b""
    # Read Blocks of data (ID/Size/data) up to ID=FF/D9
    # https://www.digicamsoft.com/itu/itu-t81-36.html
    not_first = False
    while True:
        c = read(1)
        if not_first or (c == b"\xff"):
            data_out += c
        if c != b"\xff":
            continue
        not_first = True
        c = read(1)
        data_out += c
        if c == b"\xff":
            stream.seek(-1, 1)  # pragma: no cover
        elif c == b"\x00":  # stuffing
            pass
        elif c == b"\xd9":  # end
            break
        elif c in (
            b"\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc9\xca\xcb\xcc\xcd\xce\xcf"
            b"\xda\xdb\xdc\xdd\xde\xdf"
            b"\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xfe"
        ):
            c = read(2)
            data_out += c
            sz = c[0] * 256 + c[1]
            data_out += read(sz - 2)

    if not _check_end_image_marker(stream):
        raise PdfReadError("EI stream not found")
    return data_out


def extract_inline_default(stream: StreamType) -> bytes:
    """Legacy method, used by default"""
    stream_out = BytesIO()
    # Read the inline image, while checking for EI (End Image) operator.
    while True:
        data_buffered = stream.read(BUFFER_SIZE)
        if not data_buffered:
            raise PdfReadError("Unexpected end of stream")
        pos_ei = data_buffered.find(
            b"E"
        )  # We can not look straight for "EI" because it may not have been loaded in the buffer

        if pos_ei == -1:
            stream_out.write(data_buffered)
        else:
            # Write out everything including E (the one from EI to be removed)
            stream_out.write(data_buffered[0 : pos_ei + 1])
            sav_pos_ei = stream_out.tell() - 1
            # Seek back in the stream to read the E next
            stream.seek(pos_ei + 1 - len(data_buffered), 1)
            saved_pos = stream.tell()
            # Check for End Image
            tok2 = stream.read(1)  # I of "EI"
            if tok2 != b"I":
                stream.seek(saved_pos, 0)
                continue
            tok3 = stream.read(1)  # possible space after "EI"
            if tok3 not in WHITESPACES:
                stream.seek(saved_pos, 0)
                continue
            while tok3 in WHITESPACES:
                tok3 = stream.read(1)
            if data_buffered[pos_ei - 1 : pos_ei] not in WHITESPACES and tok3 not in {
                b"Q",
                b"E",
            }:  # for Q or EMC
                stream.seek(saved_pos, 0)
                continue
            if is_followed_by_binary_data(stream):
                # Inline image contains `EI ` sequence usually marking the end of it, but
                # is followed by binary data which does not make sense for the actual end.
                stream.seek(saved_pos, 0)
                continue
            # Data contains [\s]EI[\s](Q|EMC): 4 chars are sufficient
            # remove E(I) wrongly inserted earlier
            stream.seek(saved_pos - 1, 0)
            stream_out.truncate(sav_pos_ei)
            break

    return stream_out.getvalue()


def is_followed_by_binary_data(stream: IO[bytes], length: int = 10) -> bool:
    """
    Check if the next bytes of the stream look like binary image data or regular page content.

    This is just some heuristics due to the PDF specification being too imprecise about
    inline images containing the `EI` marker which would end an image. Starting with PDF 2.0,
    we finally get a mandatory length field, but with (proper) PDF 2.0 support being very limited
    everywhere, we should not expect to be able to remove such hacks in the near future - especially
    considering legacy documents as well.

    The actual implementation draws some inspiration from
    https://github.com/itext/itext-java/blob/9.1.0/kernel/src/main/java/com/itextpdf/kernel/pdf/canvas/parser/util/InlineImageParsingUtils.java
    """
    position = stream.tell()
    data = stream.read(length)
    stream.seek(position)
    if not data:
        return False
    operator_start = None
    operator_end = None

    for index, byte in enumerate(data):
        if byte < 32 and byte not in WHITESPACES_AS_BYTES:
            # This covers all characters not being displayable directly, although omitting whitespace
            # to allow for operator detection.
            return True
        is_whitespace = byte in WHITESPACES_AS_BYTES
        if operator_start is None and not is_whitespace:
            # Interpret all other non-whitespace characters as the start of an operation.
            operator_start = index
        if operator_start is not None and is_whitespace:
            # A whitespace stops an operation.
            # Assume that having an inline image with tons of whitespace is rather unlikely.
            operator_end = index
            break

    if operator_start is None:
        # Inline images should not have tons of whitespaces, which would lead to no operator start.
        return False
    if operator_end is None:
        # We probably are inside an operation.
        operator_end = length
    operator_length = operator_end - operator_start
    operator = data[operator_start:operator_end]
    if operator.startswith(b"/") and operator_length > 1:
        # Name object.
        return False
    if operator.replace(b".", b"").isdigit():
        # Graphics operator, for example a move. A number (integer or float).
        return False
    if operator_length > 3:  # noqa: SIM103
        # Usually, the operators inside a content stream should not have more than three characters,
        # especially after an inline image.
        return True
    return False


================================================
FILE: pypdf/generic/_image_xobject.py
================================================
"""Functions to convert an image XObject to an image"""

import sys
from io import BytesIO
from typing import Any, Literal, Optional, Union, cast

from .._utils import check_if_whitespace_only, logger_warning
from ..constants import ColorSpaces, StreamAttributes
from ..constants import FilterTypes as FT
from ..constants import ImageAttributes as IA
from ..errors import EmptyImageDataError, PdfReadError
from ..generic import (
    ArrayObject,
    DecodedStreamObject,
    EncodedStreamObject,
    NullObject,
    TextStringObject,
    is_null_or_none,
)

if sys.version_info[:2] >= (3, 10):
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias


try:
    from PIL import Image, UnidentifiedImageError
except ImportError:
    raise ImportError(
        "pillow is required to do image extraction. "
        "It can be installed via 'pip install pypdf[image]'"
    )

mode_str_type: TypeAlias = Literal[
    "", "1", "RGB", "2bits", "4bits", "P", "L", "RGBA", "CMYK"
]

MAX_IMAGE_MODE_NESTING_DEPTH: int = 10


def _get_image_mode(
    color_space: Union[str, list[Any], Any],
    color_components: int,
    prev_mode: mode_str_type,
    depth: int = 0,
) -> tuple[mode_str_type, bool]:
    """
    Returns:
        Image mode, not taking into account mask (transparency).
        ColorInversion is required (like for some DeviceCMYK).

    """
    if depth > MAX_IMAGE_MODE_NESTING_DEPTH:
        raise PdfReadError(
            "Color spaces nested too deeply. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH."
        )
    if is_null_or_none(color_space):
        return "", False
    color_space_str: str = ""
    if isinstance(color_space, str):
        color_space_str = color_space
    elif not isinstance(color_space, list):
        raise PdfReadError(
            "Cannot interpret color space", color_space
        )  # pragma: no cover
    elif not color_space:
        return "", False
    elif color_space[0].startswith("/Cal"):  # /CalRGB or /CalGray
        color_space_str = "/Device" + color_space[0][4:]
    elif color_space[0] == "/ICCBased":
        icc_profile = color_space[1].get_object()
        color_components = cast(int, icc_profile["/N"])
        color_space_str = icc_profile.get("/Alternate", "")
    elif color_space[0] == "/Indexed":
        color_space_str = color_space[1].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        if mode in ("RGB", "CMYK"):
            mode = "P"
        return mode, invert_color
    elif color_space[0] == "/Separation":
        color_space_str = color_space[2].get_object()
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, True
    elif color_space[0] == "/DeviceN":
        original_color_space = color_space
        color_components = len(color_space[1])
        color_space_str = color_space[2].get_object()
        if color_space_str == "/DeviceCMYK" and color_components == 1:
            if original_color_space[1][0] != "/Black":
                logger_warning(
                    f"Color {original_color_space[1][0]} converted to Gray. Please share PDF with pypdf dev team",
                    __name__,
                )
            return "L", True
        mode, invert_color = _get_image_mode(
            color_space_str, color_components, prev_mode, depth + 1
        )
        return mode, invert_color

    mode_map: dict[str, mode_str_type] = {
        "1bit": "1",  # must be zeroth position: color_components may index the values
        "/DeviceGray": "L",  # must be first position: color_components may index the values
        "palette": "P",  # must be second position: color_components may index the values
        "/DeviceRGB": "RGB",  # must be third position: color_components may index the values
        "/DeviceCMYK": "CMYK",  # must be fourth position: color_components may index the values
        "2bit": "2bits",
        "4bit": "4bits",
    }

    mode = (
        mode_map.get(color_space_str)
        or list(mode_map.values())[color_components]
        or prev_mode
    )

    return mode, mode == "CMYK"


def bits2byte(data: bytes, size: tuple[int, int], bits: int) -> bytes:
    mask = (1 << bits) - 1
    byte_buffer = bytearray(size[0] * size[1])
    data_index = 0
    bit = 8 - bits
    for y in range(size[1]):
        if bit != 8 - bits:
            data_index += 1
            bit = 8 - bits
        for x in range(size[0]):
            byte_buffer[x + y * size[0]] = (data[data_index] >> bit) & mask
            bit -= bits
            if bit < 0:
                data_index += 1
                bit = 8 - bits
    return bytes(byte_buffer)


def _extended_image_from_bytes(
    mode: str, size: tuple[int, int], data: bytes
) -> Image.Image:
    try:
        img = Image.frombytes(mode, size, data)
    except ValueError as exc:
        nb_pix = size[0] * size[1]
        data_length = len(data)
        if data_length == 0:
            raise EmptyImageDataError(
                "Data is 0 bytes, cannot process an image from empty data."
            ) from exc
        if data_length % nb_pix != 0:
            raise exc
        k = nb_pix * len(mode) / data_length
        data = b"".join(bytes((x,) * int(k)) for x in data)
        img = Image.frombytes(mode, size, data)
    return img


def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, Any]:
    count = len(color_space)
    if count == 4:
        color_space, base, hival, lookup = (value.get_object() for value in color_space)
        return color_space, base, hival, lookup

    # Deal with strange AutoDesk files where `base` and `hival` look like this:
    #   /DeviceRGB\x00255
    element1 = color_space[1]
    element1 = element1 if isinstance(element1, str) else element1.get_object()
    if count == 3 and "\x00" in element1:
        color_space, lookup = color_space[0].get_object(), color_space[2].get_object()
        base, hival = element1.split("\x00")
        hival = int(hival)
        return color_space, base, hival, lookup
    raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}")


def _handle_flate(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
    obj_as_text: str,
) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded in flateEncode
    Returns img, image_format, extension, color inversion
    """
    extension = ".png"  # mime_type: "image/png"
    image_format = "PNG"
    lookup: Any
    base: Any
    hival: Any
    if isinstance(color_space, ArrayObject) and color_space[0] == "/Indexed":
        color_space, base, hival, lookup = __handle_flate__indexed(color_space)
    if mode == "2bits":
        mode = "P"
        data = bits2byte(data, size, 2)
    elif mode == "4bits":
        mode = "P"
        data = bits2byte(data, size, 4)
    img = _extended_image_from_bytes(mode, size, data)
    if color_space == "/Indexed":
        if isinstance(lookup, (EncodedStreamObject, DecodedStreamObject)):
            lookup = lookup.get_data()
        if isinstance(lookup, TextStringObject):
            lookup = lookup.original_bytes
        if isinstance(lookup, str):
            lookup = lookup.encode()
        try:
            nb, conv, mode = {  # type: ignore
                "1": (0, "", ""),
                "L": (1, "P", "L"),
                "P": (0, "", ""),
                "RGB": (3, "P", "RGB"),
                "CMYK": (4, "P", "CMYK"),
            }[_get_image_mode(base, 0, "")[0]]
        except KeyError:  # pragma: no cover
            logger_warning(
                f"Base {base} not coded please share the pdf file with pypdf dev team",
                __name__,
            )
            lookup = None
        else:
            if img.mode == "1":
                # Two values ("high" and "low").
                expected_count = 2 * nb
                actual_count = len(lookup)
                if actual_count != expected_count:
                    if actual_count < expected_count:
                        logger_warning(
                            f"Not enough lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                        lookup += bytes([0] * (expected_count - actual_count))
                    elif not check_if_whitespace_only(lookup[expected_count:]):
                        logger_warning(
                            f"Too many lookup values: Expected {expected_count}, got {actual_count}.",
                            __name__
                        )
                    lookup = lookup[:expected_count]
                colors_arr = [lookup[:nb], lookup[nb:]]
                arr = b"".join(
                    b"".join(
                        colors_arr[1 if img.getpixel((x, y)) > 127 else 0]  # type: ignore[operator,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
                        for x in range(img.size[0])
                    )
                    for y in range(img.size[1])
                )
                img = Image.frombytes(mode, img.size, arr)
            else:
                img = img.convert(conv)
                if len(lookup) != (hival + 1) * nb:
                    logger_warning(f"Invalid Lookup Table in {obj_as_text}", __name__)
                    lookup = None
                elif mode == "L":
                    # gray lookup does not work: it is converted to a similar RGB lookup
                    lookup = b"".join([bytes([b, b, b]) for b in lookup])
                    mode = "RGB"
                # TODO: https://github.com/py-pdf/pypdf/pull/2039
                # this is a work around until PIL is able to process CMYK images
                elif mode == "CMYK":
                    _rgb = []
                    for _c, _m, _y, _k in (
                        lookup[n : n + 4] for n in range(0, 4 * (len(lookup) // 4), 4)
                    ):
                        _r = int(255 * (1 - _c / 255) * (1 - _k / 255))
                        _g = int(255 * (1 - _m / 255) * (1 - _k / 255))
                        _b = int(255 * (1 - _y / 255) * (1 - _k / 255))
                        _rgb.append(bytes((_r, _g, _b)))
                    lookup = b"".join(_rgb)
                    mode = "RGB"
                if lookup is not None:
                    img.putpalette(lookup, rawmode=mode)
            img = img.convert("L" if base == ColorSpaces.DEVICE_GRAY else "RGB")
    elif not is_null_or_none(color_space) and color_space[0] == "/ICCBased":
        # Exclude pure black-and-white images.
        # TODO: The remaining code still does not look correct. Shouldn't the proper way be
        #       to use the original image and apply the ICC transformation on it?
        #       For now, this just loads the original image with a different color space.
        if mode != "1":
            # Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary
            mode2 = _get_image_mode(color_space, colors, mode)[0]
            if mode != mode2:
                img = Image.frombytes(mode, size, data)  # reloaded as mode may have changed
    if mode == "CMYK":
        extension = ".tif"
        image_format = "TIFF"
    return img, image_format, extension, False


def _handle_jpx(
    size: tuple[int, int],
    data: bytes,
    mode: mode_str_type,
    color_space: str,
    colors: int,
) -> tuple[Image.Image, str, str, bool]:
    """
    Process image encoded as JPX/JPEG2000
    Returns img, image_format, extension, inversion
    """
    extension = ".jp2"  # mime_type: "image/x-jp2"
    img1: Image.Image = Image.open(BytesIO(data), formats=("JPEG2000",))
    mode, invert_color = _get_image_mode(color_space, colors, mode)
    if mode == "":
        mode = cast(mode_str_type, img1.mode)
        invert_color = mode == "CMYK"
    if img1.mode == "RGBA" and mode == "RGB":
        mode = "RGBA"
    # we need to convert to the good mode
    if img1.mode == mode or {img1.mode, mode} == {"L", "P"}:  # compare (unordered) sets
        # L and P are indexed modes which should not be changed.
        img = img1
    elif {img1.mode, mode} == {"RGBA", "CMYK"}:
        # RGBA / CMYK are 4bytes encoding where
        # the encoding should be corrected
        img = Image.frombytes(mode, img1.size, img1.tobytes())
    else:  # pragma: no cover
        img = img1.convert(mode)
    # CMYK conversion
    # https://stackverflow.com/questions/38855022/
    if img.mode == "CMYK" and color_space == "/ICCBased":
        img = img.convert("RGB")
    image_format = "JPEG2000"
    return img, image_format, extension, invert_color


def _apply_decode(
    img: Image.Image,
    x_object_obj: dict[str, Any],
    lfilters: FT,
    color_space: Union[str, list[Any], Any],
    invert_color: bool,
) -> Image.Image:
    # CMYK image and other color spaces without decode
    # requires reverting scale (cf p243,2§ last sentence)
    if IA.DECODE in x_object_obj:
        decode = x_object_obj[IA.DECODE]
        # if invert_color and lfilters == FT.DCT_DECODE:
        #     decode = list(reversed(decode))
    elif img.mode == "CMYK" and lfilters == FT.JPX_DECODE:
        decode = [1.0, 0.0] if not invert_color else [0.0, 1.0]
        decode = decode * len(img.getbands())
    elif (img.mode == "CMYK" and lfilters == FT.DCT_DECODE) or (invert_color and img.mode == "L"):
        decode = [1.0, 0.0] * len(img.getbands())
    else:
        decode = None

    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Indexed"
    ):
        decode = None  # decode is meaningless if Indexed
    if (
        isinstance(color_space, ArrayObject)
        and color_space[0].get_object() == "/Separation"
    ):
        decode = [1.0, 0.0] * len(img.getbands())
    if decode is not None and not all(decode[i] == i % 2 for i in range(len(decode))):
        lut: list[int] = []
        for i in range(0, len(decode), 2):
            dmin = decode[i]
            dmax = decode[i + 1]
            lut.extend(
                round(255.0 * (j / 255.0 * (dmax - dmin) + dmin)) for j in range(256)
            )
        img = img.point(lut)
    return img


def _get_mode_and_invert_color(
    x_object_obj: dict[str, Any], colors: int, color_space: Union[str, list[Any], Any]
) -> tuple[mode_str_type, bool]:
    if (
        IA.COLOR_SPACE in x_object_obj
        and x_object_obj[IA.COLOR_SPACE] == ColorSpaces.DEVICE_RGB
    ):
        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes
        mode: mode_str_type = "RGB"
    if x_object_obj.get("/BitsPerComponent", 8) < 8:
        mode, invert_color = _get_image_mode(
            f"{x_object_obj.get('/BitsPerComponent', 8)}bit", 0, ""
        )
    else:
        mode, invert_color = _get_image_mode(
            color_space,
            2
            if (
                colors == 1
                and (
                    not is_null_or_none(color_space)
                    and "Gray" not in color_space
                )
            )
            else colors,
            "",
        )
    return mode, invert_color


def _xobj_to_image(
        x_object: dict[str, Any],
        pillow_parameters: Union[dict[str, Any], None] = None
) -> tuple[Optional[str], bytes, Any]:
    """
    Users need to have the pillow package installed.

    It's unclear if pypdf will keep this function here, hence it's private.
    It might get removed at any point.

    Args:
        x_object:
        pillow_parameters: parameters provided to Pillow Image.save() method,
            cf. <https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.save>

    Returns:
        Tuple[file extension, bytes, PIL.Image.Image]

    """
    def _apply_alpha(
        img: Image.Image,
        x_object: dict[str, Any],
        obj_as_text: str,
        image_format: str,
        extension: str,
    ) -> tuple[Image.Image, str, str]:
        alpha = None
        if IA.S_MASK in x_object:  # add alpha channel
            alpha = _xobj_to_image(x_object[IA.S_MASK])[2]
            if img.size != alpha.size:
                logger_warning(
                    f"image and mask size not matching: {obj_as_text}", __name__
                )
            else:
                # TODO: implement mask
                if alpha.mode != "L":
                    alpha = alpha.convert("L")
                if img.mode == "P":
                    img = img.convert("RGB")
                elif img.mode == "1":
                    img = img.convert("L")
                img.putalpha(alpha)
            if "JPEG" in image_format:
                image_format = "JPEG2000"
                extension = ".jp2"
            else:
                image_format = "PNG"
                extension = ".png"
        return img, extension, image_format

    # For error reporting
    obj_as_text = (
        x_object.indirect_reference.__repr__()
        if x_object is None  # pragma: no cover
        else x_object.__repr__()
    )

    # Get size and data
    size = (cast(int, x_object[IA.WIDTH]), cast(int, x_object[IA.HEIGHT]))
    data = x_object.get_data()  # type: ignore
    if isinstance(data, str):  # pragma: no cover
        data = data.encode()
    if len(data) % (size[0] * size[1]) == 1 and data[-1] == 0x0A:  # ie. '\n'
        data = data[:-1]

    # Get color properties
    colors = x_object.get("/Colors", 1)
    color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object()
    if isinstance(color_space, list) and len(color_space) == 1:
        color_space = color_space[0].get_object()

    mode, invert_color = _get_mode_and_invert_color(x_object, colors, color_space)

    # Get filters
    filters = x_object.get(StreamAttributes.FILTER, NullObject()).get_object()
    lfilters = filters[-1] if isinstance(filters, list) else filters
    decode_parms = x_object.get(StreamAttributes.DECODE_PARMS)
    if decode_parms and isinstance(decode_parms, (tuple, list)):
        decode_parms = decode_parms[0]
    else:
        decode_parms = {}
    if not isinstance(decode_parms, dict):
        decode_parms = {}

    extension = None
    if lfilters in (FT.FLATE_DECODE, FT.RUN_LENGTH_DECODE):
        img, image_format, extension, _ = _handle_flate(
            size,
            data,
            mode,
            color_space,
            colors,
            obj_as_text,
        )
    elif lfilters in (FT.LZW_DECODE, FT.ASCII_85_DECODE):
        # I'm not sure if the following logic is correct.
        # There might not be any relationship between the filters and the
        # extension
        if lfilters == FT.LZW_DECODE:
            image_format = "TIFF"
            extension = ".tiff"  # mime_type = "image/tiff"
        else:
            image_format = "PNG"
            extension = ".png"  # mime_type = "image/png"
        try:
            img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
        except UnidentifiedImageError:
            img = _extended_image_from_bytes(mode, size, data)
    elif lfilters == FT.DCT_DECODE:
        img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
        # invert_color kept unchanged
    elif lfilters == FT.JPX_DECODE:
        img, image_format, extension, invert_color = _handle_jpx(
            size, data, mode, color_space, colors
        )
    elif lfilters == FT.CCITT_FAX_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("TIFF",)),
            "TIFF",
            ".tiff",
            False,
        )
    elif lfilters == FT.JBIG2_DECODE:
        img, image_format, extension, invert_color = (
            Image.open(BytesIO(data), formats=("PNG", "PPM")),
            "PNG",
            ".png",
            False,
        )
    elif mode == "CMYK":
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "TIFF",
            ".tif",
            False,
        )
    elif mode == "":
        raise PdfReadError(f"ColorSpace field not found in {x_object}")
    else:
        img, image_format, extension, invert_color = (
            _extended_image_from_bytes(mode, size, data),
            "PNG",
            ".png",
            False,
        )

    img = _apply_decode(img, x_object, lfilters, color_space, invert_color)
    img, extension, image_format = _apply_alpha(
        img, x_object, obj_as_text, image_format, extension
    )

    if pillow_parameters is None:
        pillow_parameters = {}
    # Preserve JPEG image quality - see issue #3515.
    if image_format == "JPEG":
        # This prevents: Cannot use 'keep' when original image is not a JPEG:
        # "JPEG" is the value of PIL.JpegImagePlugin.JpegImageFile.format
        img.format = "JPEG"
        if "quality" not in pillow_parameters:
            pillow_parameters["quality"] = "keep"

    # Save image to bytes
    img_byte_arr = BytesIO()
    try:
        img.save(img_byte_arr, format=image_format, **pillow_parameters)
    except OSError:  # pragma: no cover  # covered with pillow 10.3
        # in case of we convert to RGBA and then to PNG
        img1 = img.convert("RGBA")
        image_format = "PNG"
        extension = ".png"
        img_byte_arr = BytesIO()
        img1.save(img_byte_arr, format=image_format)
    data = img_byte_arr.getvalue()

    try:  # temporary try/except until other fixes of images
        img = Image.open(BytesIO(data))
    except Exception as exception:
        logger_warning(f"Failed loading image: {exception}", __name__)
        img = None  # type: ignore[assignment,unused-ignore]  # TODO: Remove unused-ignore on Python 3.10
    return extension, data, img


================================================
FILE: pypdf/generic/_link.py
================================================
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


# This module contains code used by _writer.py to track links in pages
# being added to the writer until the links can be resolved.

from typing import TYPE_CHECKING, Optional, Union, cast

from .._utils import logger_warning
from . import ArrayObject, DictionaryObject, IndirectObject, PdfObject, TextStringObject, is_null_or_none

if TYPE_CHECKING:
    from .._page import PageObject
    from .._reader import PdfReader
    from .._writer import PdfWriter


class NamedReferenceLink:
    """Named reference link being preserved until we can resolve it correctly."""

    def __init__(self, reference: TextStringObject, source_pdf: "PdfReader") -> None:
        """reference: TextStringObject with named reference"""
        self._reference = reference
        self._source_pdf = source_pdf

    def find_referenced_page(self) -> Union[IndirectObject, None]:
        destination = self._source_pdf.named_destinations.get(str(self._reference))
        return destination.page if destination else None

    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        # point named destination in new PDF to the new page
        if str(self._reference) not in target_pdf.named_destinations:
            target_pdf.add_named_destination(str(self._reference), new_page.page_number)


class DirectReferenceLink:
    """Direct reference link being preserved until we can resolve it correctly."""

    def __init__(self, reference: ArrayObject) -> None:
        """reference: an ArrayObject whose first element is the Page indirect object"""
        self._reference = reference

    def find_referenced_page(self) -> IndirectObject:
        return self._reference[0]

    def patch_reference(self, target_pdf: "PdfWriter", new_page: IndirectObject) -> None:
        """target_pdf: PdfWriter which the new link went into"""
        self._reference[0] = new_page


ReferenceLink = Union[NamedReferenceLink, DirectReferenceLink]


def extract_links(new_page: "PageObject", old_page: "PageObject") -> list[tuple[ReferenceLink, ReferenceLink]]:
    """Extracts links from two pages on the assumption that the two pages are
    the same. Produces one list of (new link, old link) tuples.
    """
    new_annotations = new_page.get("/Annots", ArrayObject()).get_object()
    old_annotations = old_page.get("/Annots", ArrayObject()).get_object()
    if is_null_or_none(new_annotations):
        new_annotations = ArrayObject()
    if is_null_or_none(old_annotations):
        old_annotations = ArrayObject()
    if not isinstance(new_annotations, ArrayObject) or not isinstance(old_annotations, ArrayObject):
        logger_warning(
            f"Expected annotation arrays: {old_annotations} {new_annotations}. Ignoring annotations.",
            __name__
        )
        return []
    # TODO: Investigate in https://github.com/py-pdf/pypdf/issues/3667
    # if len(new_annotations) != len(old_annotations):
    #     logger_warning(f"Annotation sizes differ: {old_annotations} vs. {new_annotations}", __name__)

    new_links = [_build_link(link, new_page) for link in new_annotations]
    old_links = [_build_link(link, old_page) for link in old_annotations]

    return [
        (new_link, old_link) for (new_link, old_link)
        in zip(new_links, old_links)
        if new_link and old_link
    ]


def _build_link(indirect_object: IndirectObject, page: "PageObject") -> Optional[ReferenceLink]:
    src = cast("PdfReader", page.pdf)
    link = cast(DictionaryObject, indirect_object.get_object())
    if (not isinstance(link, DictionaryObject)) or link.get("/Subtype") != "/Link":
        return None

    if "/A" in link:
        action = cast(DictionaryObject, link["/A"])
        if action.get("/S") != "/GoTo":
            return None

        if "/D" not in action:
            return None
        return _create_link(action["/D"], src)

    if "/Dest" in link:
        return _create_link(link["/Dest"], src)

    return None  # Nothing to do here


def _create_link(reference: PdfObject, source_pdf: "PdfReader") -> Optional[ReferenceLink]:
    if isinstance(reference, TextStringObject):
        return NamedReferenceLink(reference, source_pdf)
    if isinstance(reference, ArrayObject):
        return DirectReferenceLink(reference)
    return None


================================================
FILE: pypdf/generic/_outline.py
================================================
from typing import Union

from .._utils import StreamType, deprecation_no_replacement
from ._base import NameObject
from ._data_structures import Destination


class OutlineItem(Destination):
    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        stream.write(b"<<\n")
        for key in [
            NameObject(x)
            for x in ["/Title", "/Parent", "/First", "/Last", "/Next", "/Prev"]
            if x in self
        ]:
            key.write_to_stream(stream)
            stream.write(b" ")
            value = self.raw_get(key)
            value.write_to_stream(stream)
            stream.write(b"\n")
        key = NameObject("/Dest")
        key.write_to_stream(stream)
        stream.write(b" ")
        value = self.dest_array
        value.write_to_stream(stream)
        stream.write(b"\n")
        stream.write(b">>")


================================================
FILE: pypdf/generic/_rectangle.py
================================================
from typing import Any, Union

from ._base import FloatObject, NumberObject
from ._data_structures import ArrayObject


class RectangleObject(ArrayObject):
    """
    This class is used to represent *page boxes* in pypdf.

    These boxes include:

    * :attr:`artbox <pypdf._page.PageObject.artbox>`
    * :attr:`bleedbox <pypdf._page.PageObject.bleedbox>`
    * :attr:`cropbox <pypdf._page.PageObject.cropbox>`
    * :attr:`mediabox <pypdf._page.PageObject.mediabox>`
    * :attr:`trimbox <pypdf._page.PageObject.trimbox>`
    """

    def __init__(
        self, arr: Union["RectangleObject", tuple[float, float, float, float]]
    ) -> None:
        # must have four points
        assert len(arr) == 4
        # automatically convert arr[x] into NumberObject(arr[x]) if necessary
        ArrayObject.__init__(self, [self._ensure_is_number(x) for x in arr])

    def _ensure_is_number(self, value: Any) -> Union[FloatObject, NumberObject]:
        if not isinstance(value, (FloatObject, NumberObject)):
            value = FloatObject(value)
        return value

    def scale(self, sx: float, sy: float) -> "RectangleObject":
        return RectangleObject(
            (
                float(self.left) * sx,
                float(self.bottom) * sy,
                float(self.right) * sx,
                float(self.top) * sy,
            )
        )

    def __repr__(self) -> str:
        return f"RectangleObject({list(self)!r})"

    @property
    def left(self) -> FloatObject:
        return self[0]

    @left.setter
    def left(self, f: float) -> None:
        self[0] = FloatObject(f)

    @property
    def bottom(self) -> FloatObject:
        return self[1]

    @bottom.setter
    def bottom(self, f: float) -> None:
        self[1] = FloatObject(f)

    @property
    def right(self) -> FloatObject:
        return self[2]

    @right.setter
    def right(self, f: float) -> None:
        self[2] = FloatObject(f)

    @property
    def top(self) -> FloatObject:
        return self[3]

    @top.setter
    def top(self, f: float) -> None:
        self[3] = FloatObject(f)

    @property
    def lower_left(self) -> tuple[float, float]:
        """
        Property to read and modify the lower left coordinate of this box
        in (x,y) form.
        """
        return self.left, self.bottom

    @lower_left.setter
    def lower_left(self, value: tuple[float, float]) -> None:
        self[0], self[1] = (self._ensure_is_number(x) for x in value)

    @property
    def lower_right(self) -> tuple[float, float]:
        """
        Property to read and modify the lower right coordinate of this box
        in (x,y) form.
        """
        return self.right, self.bottom

    @lower_right.setter
    def lower_right(self, value: tuple[float, float]) -> None:
        self[2], self[1] = (self._ensure_is_number(x) for x in value)

    @property
    def upper_left(self) -> tuple[float, float]:
        """
        Property to read and modify the upper left coordinate of this box
        in (x,y) form.
        """
        return self.left, self.top

    @upper_left.setter
    def upper_left(self, value: tuple[float, float]) -> None:
        self[0], self[3] = (self._ensure_is_number(x) for x in value)

    @property
    def upper_right(self) -> tuple[float, float]:
        """
        Property to read and modify the upper right coordinate of this box
        in (x,y) form.
        """
        return self.right, self.top

    @upper_right.setter
    def upper_right(self, value: tuple[float, float]) -> None:
        self[2], self[3] = (self._ensure_is_number(x) for x in value)

    @property
    def width(self) -> float:
        return self.right - self.left

    @property
    def height(self) -> float:
        return self.top - self.bottom


================================================
FILE: pypdf/generic/_utils.py
================================================
import codecs
from typing import Union

from .._codecs import _pdfdoc_encoding
from .._utils import StreamType, logger_warning, read_non_whitespace
from ..errors import STREAM_TRUNCATED_PREMATURELY, PdfStreamError
from ._base import ByteStringObject, TextStringObject


def hex_to_rgb(value: str) -> tuple[float, float, float]:
    return tuple(int(value.lstrip("#")[i : i + 2], 16) / 255.0 for i in (0, 2, 4))  # type: ignore


def read_hex_string_from_stream(
    stream: StreamType,
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union["TextStringObject", "ByteStringObject"]:
    stream.read(1)
    arr = []
    x = b""
    while True:
        tok = read_non_whitespace(stream)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b">":
            break
        x += tok
        if len(x) == 2:
            arr.append(int(x, base=16))
            x = b""
    if len(x) == 1:
        x += b"0"
    if x != b"":
        arr.append(int(x, base=16))
    return create_string_object(bytes(arr), forced_encoding)


__ESCAPE_DICT__ = {
    b"n": ord(b"\n"),
    b"r": ord(b"\r"),
    b"t": ord(b"\t"),
    b"b": ord(b"\b"),
    b"f": ord(b"\f"),
    b"(": ord(b"("),
    b")": ord(b")"),
    b"/": ord(b"/"),
    b"\\": ord(b"\\"),
    b" ": ord(b" "),
    b"%": ord(b"%"),
    b"<": ord(b"<"),
    b">": ord(b">"),
    b"[": ord(b"["),
    b"]": ord(b"]"),
    b"#": ord(b"#"),
    b"_": ord(b"_"),
    b"&": ord(b"&"),
    b"$": ord(b"$"),
}
__BACKSLASH_CODE__ = 92


def read_string_from_stream(
    stream: StreamType,
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union["TextStringObject", "ByteStringObject"]:
    tok = stream.read(1)
    parens = 1
    txt = []
    while True:
        tok = stream.read(1)
        if not tok:
            raise PdfStreamError(STREAM_TRUNCATED_PREMATURELY)
        if tok == b"(":
            parens += 1
        elif tok == b")":
            parens -= 1
            if parens == 0:
                break
        elif tok == b"\\":
            tok = stream.read(1)
            try:
                txt.append(__ESCAPE_DICT__[tok])
                continue
            except KeyError:
                if b"0" <= tok <= b"7":
                    # "The number ddd may consist of one, two, or three
                    # octal digits; high-order overflow shall be ignored.
                    # Three octal digits shall be used, with leading zeros
                    # as needed, if the next character of the string is also
                    # a digit." (PDF reference 7.3.4.2, p 16)
                    sav = stream.tell() - 1
                    for _ in range(2):
                        ntok = stream.read(1)
                        if b"0" <= ntok <= b"7":
                            tok += ntok
                        else:
                            stream.seek(-1, 1)  # ntok has to be analyzed
                            break
                    i = int(tok, base=8)
                    if i > 255:
                        txt.append(__BACKSLASH_CODE__)
                        stream.seek(sav)
                    else:
                        txt.append(i)
                    continue
                if tok in b"\n\r":
                    # This case is hit when a backslash followed by a line
                    # break occurs. If it's a multi-char EOL, consume the
                    # second character:
                    tok = stream.read(1)
                    if tok not in b"\n\r":
                        stream.seek(-1, 1)
                    # Then don't add anything to the actual string, since this
                    # line break was escaped:
                    continue
                msg = f"Unexpected escaped string: {tok.decode('utf-8', 'ignore')}"
                logger_warning(msg, __name__)
                txt.append(__BACKSLASH_CODE__)
        txt.append(ord(tok))
    return create_string_object(bytes(txt), forced_encoding)


def create_string_object(
    string: Union[str, bytes],
    forced_encoding: Union[None, str, list[str], dict[int, str]] = None,
) -> Union[TextStringObject, ByteStringObject]:
    """
    Create a ByteStringObject or a TextStringObject from a string to represent the string.

    Args:
        string: The data being used
        forced_encoding: Typically None, or an encoding string

    Returns:
        A ByteStringObject

    Raises:
        TypeError: If string is not of type str or bytes.

    """
    if isinstance(string, str):
        return TextStringObject(string)
    if isinstance(string, bytes):
        if isinstance(forced_encoding, (list, dict)):
            out = ""
            for x in string:
                try:
                    out += forced_encoding[x]
                except Exception:
                    out += bytes((x,)).decode("charmap")
            obj = TextStringObject(out)
            obj._original_bytes = string
            return obj
        if isinstance(forced_encoding, str):
            if forced_encoding == "bytes":
                return ByteStringObject(string)
            obj = TextStringObject(string.decode(forced_encoding))
            obj._original_bytes = string
            return obj
        try:
            if string.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
                retval = TextStringObject(string.decode("utf-16"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = string[:2]
                return retval
            if string.startswith(b"\x00"):
                retval = TextStringObject(string.decode("utf-16be"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = codecs.BOM_UTF16_BE
                return retval
            if string[1:2] == b"\x00":
                retval = TextStringObject(string.decode("utf-16le"))
                retval._original_bytes = string
                retval.autodetect_utf16 = True
                retval.utf16_bom = codecs.BOM_UTF16_LE
                return retval

            # This is probably a big performance hit here, but we need
            # to convert string objects into the text/unicode-aware
            # version if possible... and the only way to check if that's
            # possible is to try.
            # Some strings are strings, some are just byte arrays.
            retval = TextStringObject(decode_pdfdocencoding(string))
            retval._original_bytes = string
            retval.autodetect_pdfdocencoding = True
            return retval
        except UnicodeDecodeError:
            return ByteStringObject(string)
    else:
        raise TypeError("create_string_object should have str or unicode arg")


def decode_pdfdocencoding(byte_array: bytes) -> str:
    retval = ""
    for b in byte_array:
        c = _pdfdoc_encoding[b]
        if c == "\u0000":
            raise UnicodeDecodeError(
                "pdfdocencoding",
                bytearray(b),
                -1,
                -1,
                "does not exist in translation table",
            )
        retval += c
    return retval


================================================
FILE: pypdf/generic/_viewerpref.py
================================================
# Copyright (c) 2023, Pubpub-ZZ
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# * The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

from typing import (
    Any,
    Optional,
)

from ._base import BooleanObject, NameObject, NumberObject, is_null_or_none
from ._data_structures import ArrayObject, DictionaryObject

f_obj = BooleanObject(False)


class ViewerPreferences(DictionaryObject):
    def __init__(self, obj: Optional[DictionaryObject] = None) -> None:
        super().__init__(self)
        if not is_null_or_none(obj):
            self.update(obj.items())  # type: ignore
        try:
            self.indirect_reference = obj.indirect_reference  # type: ignore
        except AttributeError:
            pass

    def _get_bool(self, key: str, default: Optional[BooleanObject]) -> Optional[BooleanObject]:
        return self.get(key, default)

    def _set_bool(self, key: str, v: bool) -> None:
        self[NameObject(key)] = BooleanObject(v is True)

    def _get_name(self, key: str, default: Optional[NameObject]) -> Optional[NameObject]:
        return self.get(key, default)

    def _set_name(self, key: str, lst: list[str], v: NameObject) -> None:
        if v[0] != "/":
            raise ValueError(f"{v} does not start with '/'")
        if lst != [] and v not in lst:
            raise ValueError(f"{v} is an unacceptable value")
        self[NameObject(key)] = NameObject(v)

    def _get_arr(self, key: str, default: Optional[list[Any]]) -> Optional[ArrayObject]:
        return self.get(key, None if default is None else ArrayObject(default))

    def _set_arr(self, key: str, v: Optional[ArrayObject]) -> None:
        if v is None:
            try:
                del self[NameObject(key)]
            except KeyError:
                pass
            return
        if not isinstance(v, ArrayObject):
            raise ValueError("ArrayObject is expected")
        self[NameObject(key)] = v

    def _get_int(self, key: str, default: Optional[NumberObject]) -> Optional[NumberObject]:
        return self.get(key, default)

    def _set_int(self, key: str, v: int) -> None:
        self[NameObject(key)] = NumberObject(v)

    @property
    def PRINT_SCALING(self) -> NameObject:
        return NameObject("/PrintScaling")

    def __new__(cls: Any, value: Any = None) -> "ViewerPreferences":  # noqa: PYI034
        def _add_prop_bool(key: str, default: Optional[BooleanObject]) -> property:
            return property(
                lambda self: self._get_bool(key, default),
                lambda self, v: self._set_bool(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )

        def _add_prop_name(
            key: str, lst: list[str], default: Optional[NameObject]
        ) -> property:
            return property(
                lambda self: self._get_name(key, default),
                lambda self, v: self._set_name(key, lst, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined.
            Acceptable values: {lst}
            """,
            )

        def _add_prop_arr(key: str, default: Optional[ArrayObject]) -> property:
            return property(
                lambda self: self._get_arr(key, default),
                lambda self, v: self._set_arr(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )

        def _add_prop_int(key: str, default: Optional[int]) -> property:
            return property(
                lambda self: self._get_int(key, default),
                lambda self, v: self._set_int(key, v),
                None,
                f"""
            Returns/Modify the status of {key}, Returns {default} if not defined
            """,
            )

        cls.hide_toolbar = _add_prop_bool("/HideToolbar", f_obj)
        cls.hide_menubar = _add_prop_bool("/HideMenubar", f_obj)
        cls.hide_windowui = _add_prop_bool("/HideWindowUI", f_obj)
        cls.fit_window = _add_prop_bool("/FitWindow", f_obj)
        cls.center_window = _add_prop_bool("/CenterWindow", f_obj)
        cls.display_doctitle = _add_prop_bool("/DisplayDocTitle", f_obj)

        cls.non_fullscreen_pagemode = _add_prop_name(
            "/NonFullScreenPageMode",
            ["/UseNone", "/UseOutlines", "/UseThumbs", "/UseOC"],
            NameObject("/UseNone"),
        )
        cls.direction = _add_prop_name(
            "/Direction", ["/L2R", "/R2L"], NameObject("/L2R")
        )
        cls.view_area = _add_prop_name("/ViewArea", [], None)
        cls.view_clip = _add_prop_name("/ViewClip", [], None)
        cls.print_area = _add_prop_name("/PrintArea", [], None)
        cls.print_clip = _add_prop_name("/PrintClip", [], None)
        cls.print_scaling = _add_prop_name("/PrintScaling", [], None)
        cls.duplex = _add_prop_name(
            "/Duplex", ["/Simplex", "/DuplexFlipShortEdge", "/DuplexFlipLongEdge"], None
        )
        cls.pick_tray_by_pdfsize = _add_prop_bool("/PickTrayByPDFSize", None)
        cls.print_pagerange = _add_prop_arr("/PrintPageRange", None)
        cls.num_copies = _add_prop_int("/NumCopies", None)

        cls.enforce = _add_prop_arr("/Enforce", ArrayObject())

        return DictionaryObject.__new__(cls)


================================================
FILE: pypdf/pagerange.py
================================================
"""
Representation and utils for ranges of PDF file pages.

Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
All rights reserved. This software is available under a BSD license;
see https://github.com/py-pdf/pypdf/blob/main/LICENSE
"""

import re
from typing import Any, Union

from .errors import ParseError

_INT_RE = r"(0|-?[1-9]\d*)"  # A decimal int, don't allow "-0".
PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$"
# groups:         12     34     5 6     7 8


class PageRange:
    """
    A slice-like representation of a range of page indices.

    For example, page numbers, only starting at zero.

    The syntax is like what you would put between brackets [ ].
    The slice is one of the few Python types that can't be subclassed,
    but this class converts to and from slices, and allows similar use.

      -  PageRange(str) parses a string representing a page range.
      -  PageRange(slice) directly "imports" a slice.
      -  to_slice() gives the equivalent slice.
      -  str() and repr() allow printing.
      -  indices(n) is like slice.indices(n).
    """

    def __init__(self, arg: Union[slice, "PageRange", str]) -> None:
        """
        Initialize with either a slice -- giving the equivalent page range,
        or a PageRange object -- making a copy,
        or a string like
            "int", "[int]:[int]" or "[int]:[int]:[int]",
            where the brackets indicate optional ints.
        Remember, page indices start with zero.
        Page range expression examples:

            :     all pages.                   -1    last page.
            22    just the 23rd page.          :-1   all but the last page.
            0:3   the first three pages.       -2    second-to-last page.
            :3    the first three pages.       -2:   last two pages.
            5:    from the sixth page onward.  -3:-1 third & second to last.
        The third, "stride" or "step" number is also recognized.
            ::2       0 2 4 ... to the end.    3:0:-1    3 2 1 but not 0.
            1:10:2    1 3 5 7 9                2::-1     2 1 0.
            ::-1      all pages in reverse order.
        Note the difference between this notation and arguments to slice():
            slice(3) means the first three pages;
            PageRange("3") means the range of only the fourth page.
            However PageRange(slice(3)) means the first three pages.
        """
        if isinstance(arg, slice):
            self._slice = arg
            return

        if isinstance(arg, PageRange):
            self._slice = arg.to_slice()
            return

        m = isinstance(arg, str) and re.match(PAGE_RANGE_RE, arg)
        if not m:
            raise ParseError(arg)
        if m.group(2):
            # Special case: just an int means a range of one page.
            start = int(m.group(2))
            stop = start + 1 if start != -1 else None
            self._slice = slice(start, stop)
        else:
            self._slice = slice(*[int(g) if g else None for g in m.group(4, 6, 8)])

    @staticmethod
    def valid(input: Any) -> bool:
        """
        True if input is a valid initializer for a PageRange.

        Args:
            input: A possible PageRange string or a PageRange object.

        Returns:
            True, if the ``input`` is a valid PageRange.

        """
        return isinstance(input, (slice, PageRange)) or (
            isinstance(input, str) and bool(re.match(PAGE_RANGE_RE, input))
        )

    def to_slice(self) -> slice:
        """Return the slice equivalent of this page range."""
        return self._slice

    def __str__(self) -> str:
        """A string like "1:2:3"."""
        s = self._slice
        indices: Union[tuple[int, int], tuple[int, int, int]]
        if s.step is None:
            if s.start is not None and s.stop == s.start + 1:
                return str(s.start)

            indices = s.start, s.stop
        else:
            indices = s.start, s.stop, s.step
        return ":".join("" if i is None else str(i) for i in indices)

    def __repr__(self) -> str:
        """A string like "PageRange('1:2:3')"."""
        return "PageRange(" + repr(str(self)) + ")"

    def indices(self, n: int) -> tuple[int, int, int]:
        """
        Assuming a sequence of length n, calculate the start and stop indices,
        and the stride length of the PageRange.

        See help(slice.indices).

        Args:
            n:  the length of the list of pages to choose from.

        Returns:
            Arguments for range().

        """
        return self._slice.indices(n)

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, PageRange):
            return False
        return self._slice == other._slice

    def __hash__(self) -> int:
        return hash((self.__class__, (self._slice.start, self._slice.stop, self._slice.step)))

    def __add__(self, other: "PageRange") -> "PageRange":
        if not isinstance(other, PageRange):
            raise TypeError(f"Can't add PageRange and {type(other)}")
        if self._slice.step is not None or other._slice.step is not None:
            raise ValueError("Can't add PageRange with stride")
        a = self._slice.start, self._slice.stop
        b = other._slice.start, other._slice.stop

        if a[0] > b[0]:
            a, b = b, a

        # Now a[0] is the smallest
        if b[0] > a[1]:
            # There is a gap between a and b.
            raise ValueError("Can't add PageRanges with gap")
        return PageRange(slice(a[0], max(a[1], b[1])))


PAGE_RANGE_ALL = PageRange(":")  # The range of all pages.


def parse_filename_page_ranges(
    args: list[Union[str, PageRange, None]]
) -> list[tuple[str, PageRange]]:
    """
    Given a list of filenames and page ranges, return a list of (filename, page_range) pairs.

    Args:
        args: A list where the first element is a filename. The other elements are
            filenames, page-range expressions, slice objects, or PageRange objects.
            A filename not followed by a page range indicates all pages of the file.

    Returns:
        A list of (filename, page_range) pairs.

    """
    pairs: list[tuple[str, PageRange]] = []
    pdf_filename: Union[str, None] = None
    did_page_range = False
    for arg in [*args, None]:
        if PageRange.valid(arg):
            if not pdf_filename:
                raise ValueError(
                    "The first argument must be a filename, not a page range."
                )

            assert arg is not None
            pairs.append((pdf_filename, PageRange(arg)))
            did_page_range = True
        else:
            # New filename or end of list - use the complete previous file?
            if pdf_filename and not did_page_range:
                pairs.append((pdf_filename, PAGE_RANGE_ALL))

            assert not isinstance(arg, PageRange), arg
            pdf_filename = arg
            did_page_range = False
    return pairs


PageRangeSpec = Union[str, PageRange, tuple[int, int], tuple[int, int, int], list[int]]


================================================
FILE: pypdf/papersizes.py
================================================
"""Helper to get paper sizes."""

from typing import NamedTuple


class Dimensions(NamedTuple):
    width: int
    height: int


class PaperSize:
    """(width, height) of the paper in portrait mode in pixels at 72 ppi."""

    # Notes of how to calculate it:
    # 1. Get the size of the paper in millimeters
    # 2. Convert it to inches (25.4 millimeters is equal to 1 inch)
    # 3. Convert it to pixels at 72dpi (1 inch is equal to 72 pixels)

    # All Din-A paper sizes follow this pattern:
    # 2 x A(n - 1) = A(n)
    # So the height of the next bigger one is the width of the smaller one
    # The ratio is always approximately 1:2**0.5
    # Additionally, A0 is defined to have an area of 1 m**2
    # https://en.wikipedia.org/wiki/ISO_216
    # Be aware of rounding issues!
    A0 = Dimensions(2384, 3370)  # 841mm x 1189mm
    A1 = Dimensions(1684, 2384)
    A2 = Dimensions(1191, 1684)
    A3 = Dimensions(842, 1191)
    A4 = Dimensions(
        595, 842
    )  # Printer paper, documents - this is by far the most common
    A5 = Dimensions(420, 595)  # Paperback books
    A6 = Dimensions(298, 420)  # Postcards
    A7 = Dimensions(210, 298)
    A8 = Dimensions(147, 210)

    # Envelopes
    C4 = Dimensions(649, 918)


_din_a = (
    PaperSize.A0,
    PaperSize.A1,
    PaperSize.A2,
    PaperSize.A3,
    PaperSize.A4,
    PaperSize.A5,
    PaperSize.A6,
    PaperSize.A7,
    PaperSize.A8,
)


================================================
FILE: pypdf/py.typed
================================================


================================================
FILE: pypdf/types.py
================================================
"""Helpers for working with PDF types."""

import sys
from typing import Literal, Union

if sys.version_info[:2] >= (3, 10):
    # Python 3.10+: https://www.python.org/dev/peps/pep-0484
    from typing import TypeAlias
else:
    from typing_extensions import TypeAlias

from .generic._base import NameObject, NullObject, NumberObject
from .generic._data_structures import ArrayObject, Destination
from .generic._outline import OutlineItem

BorderArrayType: TypeAlias = list[Union[NameObject, NumberObject, ArrayObject]]

OutlineItemType: TypeAlias = Union[OutlineItem, Destination]

FitType: TypeAlias = Literal[
    "/XYZ", "/Fit", "/FitH", "/FitV", "/FitR", "/FitB", "/FitBH", "/FitBV"
]
# These go with the FitType, they specify values for the fit
ZoomArgType: TypeAlias = Union[NumberObject, NullObject, float]
ZoomArgsType: TypeAlias = list[ZoomArgType]

# Recursive types like the following are not yet supported by Sphinx:
#    OutlineType = List[Union[Destination, "OutlineType"]]
# Hence use this for the moment:
OutlineType = list[Union[Destination, list[Union[Destination, list[Destination]]]]]

LayoutType: TypeAlias = Literal[
    "/NoLayout",
    "/SinglePage",
    "/OneColumn",
    "/TwoColumnLeft",
    "/TwoColumnRight",
    "/TwoPageLeft",
    "/TwoPageRight",
]

PagemodeType: TypeAlias = Literal[
    "/UseNone",
    "/UseOutlines",
    "/UseThumbs",
    "/FullScreen",
    "/UseOC",
    "/UseAttachments",
]

AnnotationSubtype: TypeAlias = Literal[
    "/Text",
    "/Link",
    "/FreeText",
    "/Line",
    "/Square",
    "/Circle",
    "/Polygon",
    "/PolyLine",
    "/Highlight",
    "/Underline",
    "/Squiggly",
    "/StrikeOut",
    "/Caret",
    "/Stamp",
    "/Ink",
    "/Popup",
    "/FileAttachment",
    "/Sound",
    "/Movie",
    "/Screen",
    "/Widget",
    "/PrinterMark",
    "/TrapNet",
    "/Watermark",
    "/3D",
    "/Redact",
    "/Projection",
    "/RichMedia",
]


================================================
FILE: pypdf/xmp.py
================================================
"""
Anything related to Extensible Metadata Platform (XMP) metadata.

https://en.wikipedia.org/wiki/Extensible_Metadata_Platform
"""

import datetime
import decimal
import re
from collections.abc import Iterator
from typing import (
    Any,
    Callable,
    Optional,
    TypeVar,
    Union,
)
from xml.dom.minidom import Document, parseString
from xml.dom.minidom import Element as XmlElement
from xml.parsers.expat import ExpatError

from ._protocols import XmpInformationProtocol
from ._utils import StreamType, deprecate_with_replacement, deprecation_no_replacement
from .errors import PdfReadError, XmpDocumentError
from .generic import ContentStream, PdfObject

RDF_NAMESPACE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
DC_NAMESPACE = "http://purl.org/dc/elements/1.1/"
XMP_NAMESPACE = "http://ns.adobe.com/xap/1.0/"
PDF_NAMESPACE = "http://ns.adobe.com/pdf/1.3/"
XMPMM_NAMESPACE = "http://ns.adobe.com/xap/1.0/mm/"

# What is the PDFX namespace, you might ask?
# It's documented here: https://github.com/adobe/xmp-docs/raw/master/XMPSpecifications/XMPSpecificationPart3.pdf
# This namespace is used to place "custom metadata"
# properties, which are arbitrary metadata properties with no semantic or
# documented meaning.
#
# Elements in the namespace are key/value-style storage,
# where the element name is the key and the content is the value. The keys
# are transformed into valid XML identifiers by substituting an invalid
# identifier character with \u2182 followed by the unicode hex ID of the
# original character. A key like "my car" is therefore "my\u21820020car".
#
# \u2182 is the unicode character \u{ROMAN NUMERAL TEN THOUSAND}
#
# The pdfx namespace should be avoided.
# A custom data schema and sensical XML elements could be used instead, as is
# suggested by Adobe's own documentation on XMP under "Extensibility of
# Schemas".
PDFX_NAMESPACE = "http://ns.adobe.com/pdfx/1.3/"

# PDF/A
PDFAID_NAMESPACE = "http://www.aiim.org/pdfa/ns/id/"

# Internal mapping of namespace URI → prefix
_NAMESPACE_PREFIX_MAP = {
    DC_NAMESPACE: "dc",
    XMP_NAMESPACE: "xmp",
    PDF_NAMESPACE: "pdf",
    XMPMM_NAMESPACE: "xmpMM",
    PDFAID_NAMESPACE: "pdfaid",
    PDFX_NAMESPACE: "pdfx",
}

iso8601 = re.compile(
    """
        (?P<year>[0-9]{4})
        (-
            (?P<month>[0-9]{2})
            (-
                (?P<day>[0-9]+)
                (T
                    (?P<hour>[0-9]{2}):
                    (?P<minute>[0-9]{2})
                    (:(?P<second>[0-9]{2}(.[0-9]+)?))?
                    (?P<tzd>Z|[-+][0-9]{2}:[0-9]{2})
                )?
            )?
        )?
        """,
    re.VERBOSE,
)


K = TypeVar("K")

# Minimal XMP template
_MINIMAL_XMP = f"""<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pypdf">
    <rdf:RDF xmlns:rdf="{RDF_NAMESPACE}">
        <rdf:Description rdf:about=""
            xmlns:dc="{DC_NAMESPACE}"
            xmlns:xmp="{XMP_NAMESPACE}"
            xmlns:pdf="{PDF_NAMESPACE}"
            xmlns:xmpMM="{XMPMM_NAMESPACE}"
            xmlns:pdfaid="{PDFAID_NAMESPACE}"
            xmlns:pdfx="{PDFX_NAMESPACE}">
        </rdf:Description>
    </rdf:RDF>
</x:xmpmeta>
<?xpacket end="r"?>"""


def _identity(value: K) -> K:
    return value


def _converter_date(value: str) -> datetime.datetime:
    matches = iso8601.match(value)
    if matches is None:
        raise ValueError(f"Invalid date format: {value}")
    year = int(matches.group("year"))
    month = int(matches.group("month") or "1")
    day = int(matches.group("day") or "1")
    hour = int(matches.group("hour") or "0")
    minute = int(matches.group("minute") or "0")
    second = decimal.Decimal(matches.group("second") or "0")
    seconds_dec = second.to_integral(decimal.ROUND_FLOOR)
    milliseconds_dec = (second - seconds_dec) * 1_000_000

    seconds = int(seconds_dec)
    milliseconds = int(milliseconds_dec)

    tzd = matches.group("tzd") or "Z"
    dt = datetime.datetime(year, month, day, hour, minute, seconds, milliseconds)
    if tzd != "Z":
        tzd_hours, tzd_minutes = (int(x) for x in tzd.split(":"))
        tzd_hours *= -1
        if tzd_hours < 0:
            tzd_minutes *= -1
        dt = dt + datetime.timedelta(hours=tzd_hours, minutes=tzd_minutes)
    return dt


def _format_datetime_utc(value: datetime.datetime) -> str:
    """Format a datetime as UTC with trailing 'Z'.

    - If the input is timezone-aware, convert to UTC first.
    - If naive, assume UTC.
    """
    if value.tzinfo is not None and value.utcoffset() is not None:
        value = value.astimezone(datetime.timezone.utc)

    value = value.replace(tzinfo=None)
    return value.strftime("%Y-%m-%dT%H:%M:%S.%fZ")


def _generic_get(
        element: XmlElement, self: "XmpInformation", list_type: str, converter: Callable[[Any], Any] = _identity
) -> Optional[list[str]]:
    containers = element.getElementsByTagNameNS(RDF_NAMESPACE, list_type)
    retval: list[Any] = []
    if len(containers):
        for container in containers:
            for item in container.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                value = self._get_text(item)
                value = converter(value)
                retval.append(value)
        return retval
    return None


class XmpInformation(XmpInformationProtocol, PdfObject):
    """
    An object that represents Extensible Metadata Platform (XMP) metadata.
    Usually accessed by :py:attr:`xmp_metadata()<pypdf.PdfReader.xmp_metadata>`.

    Raises:
      PdfReadError: if XML is invalid

    """

    def __init__(self, stream: ContentStream) -> None:
        self.stream = stream
        try:
            data = self.stream.get_data()
            doc_root: Document = parseString(data)  # noqa: S318
        except (AttributeError, ExpatError) as e:
            raise PdfReadError(f"XML in XmpInformation was invalid: {e}")
        self.rdf_root: XmlElement = doc_root.getElementsByTagNameNS(
            RDF_NAMESPACE, "RDF"
        )[0]
        self.cache: dict[Any, Any] = {}

    @classmethod
    def create(cls) -> "XmpInformation":
        """
        Create a new XmpInformation object with minimal structure.

        Returns:
            A new XmpInformation instance with empty metadata fields.
        """
        stream = ContentStream(None, None)
        stream.set_data(_MINIMAL_XMP.encode("utf-8"))
        return cls(stream)

    def write_to_stream(
        self, stream: StreamType, encryption_key: Union[None, str, bytes] = None
    ) -> None:
        deprecate_with_replacement(
            "XmpInformation.write_to_stream",
            "PdfWriter.xmp_metadata",
            "6.0.0"
        )
        if encryption_key is not None:  # deprecated
            deprecation_no_replacement(
                "the encryption_key parameter of write_to_stream", "5.0.0"
            )
        self.stream.write_to_stream(stream)

    def get_element(self, about_uri: str, namespace: str, name: str) -> Iterator[Any]:
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                attr = desc.getAttributeNodeNS(namespace, name)
                if attr is not None:
                    yield attr
                yield from desc.getElementsByTagNameNS(namespace, name)

    def get_nodes_in_namespace(self, about_uri: str, namespace: str) -> Iterator[Any]:
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                for i in range(desc.attributes.length):
                    attr = desc.attributes.item(i)
                    if attr and attr.namespaceURI == namespace:
                        yield attr
                for child in desc.childNodes:
                    if child.namespaceURI == namespace:
                        yield child

    def _get_text(self, element: XmlElement) -> str:
        text = ""
        for child in element.childNodes:
            if child.nodeType == child.TEXT_NODE:
                text += child.data
        return text

    def _get_single_value(
        self,
        namespace: str,
        name: str,
        converter: Callable[[str], Any] = _identity,
    ) -> Optional[Any]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        value = None
        for element in self.get_element("", namespace, name):
            if element.nodeType == element.ATTRIBUTE_NODE:
                value = element.nodeValue
            else:
                value = self._get_text(element)
            break
        if value is not None:
            value = converter(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = value
        return value

    def _getter_bag(self, namespace: str, name: str) -> Optional[list[str]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: list[str] = []
        for element in self.get_element("", namespace, name):
            if (bags := _generic_get(element, self, list_type="Bag")) is not None:
                retval.extend(bags)
            else:
                value = self._get_text(element)
                retval.append(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval

    def _get_seq_values(
        self,
        namespace: str,
        name: str,
        converter: Callable[[Any], Any] = _identity,
    ) -> Optional[list[Any]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: list[Any] = []
        for element in self.get_element("", namespace, name):
            if (seqs := _generic_get(element, self, list_type="Seq", converter=converter)) is not None:
                retval.extend(seqs)
            elif (bags := _generic_get(element, self, list_type="Bag")) is not None:
                # See issue at https://github.com/py-pdf/pypdf/issues/3324
                # Some applications violate the XMP metadata standard regarding `dc:creator` which should
                # be an "ordered array" and thus a sequence, but use an unordered array (bag) instead.
                # This seems to stem from the fact that the original Dublin Core specification does indeed
                # use bags or direct values, while PDFs are expected to follow the XMP standard and ignore
                # the plain Dublin Core variant. For this reason, add a fallback here to deal with such
                # issues accordingly.
                retval.extend(bags)
            else:
                value = converter(self._get_text(element))
                retval.append(value)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval

    def _get_langalt_values(self, namespace: str, name: str) -> Optional[dict[Any, Any]]:
        cached = self.cache.get(namespace, {}).get(name)
        if cached:
            return cached
        retval: dict[Any, Any] = {}
        for element in self.get_element("", namespace, name):
            alts = element.getElementsByTagNameNS(RDF_NAMESPACE, "Alt")
            if len(alts):
                for alt in alts:
                    for item in alt.getElementsByTagNameNS(RDF_NAMESPACE, "li"):
                        value = self._get_text(item)
                        retval[item.getAttribute("xml:lang")] = value
            else:
                retval["x-default"] = self._get_text(element)
        ns_cache = self.cache.setdefault(namespace, {})
        ns_cache[name] = retval
        return retval

    @property
    def dc_contributor(self) -> Optional[list[str]]:
        """Contributors to the resource (other than the authors)."""
        return self._getter_bag(DC_NAMESPACE, "contributor")

    @dc_contributor.setter
    def dc_contributor(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "contributor", values)

    @property
    def dc_coverage(self) -> Optional[str]:
        """Text describing the extent or scope of the resource."""
        return self._get_single_value(DC_NAMESPACE, "coverage")

    @dc_coverage.setter
    def dc_coverage(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "coverage", value)

    @property
    def dc_creator(self) -> Optional[list[str]]:
        """A sorted array of names of the authors of the resource, listed in order of precedence."""
        return self._get_seq_values(DC_NAMESPACE, "creator")

    @dc_creator.setter
    def dc_creator(self, values: Optional[list[str]]) -> None:
        self._set_seq_values(DC_NAMESPACE, "creator", values)

    @property
    def dc_date(self) -> Optional[list[datetime.datetime]]:
        """A sorted array of dates of significance to the resource. The dates and times are in UTC."""
        return self._get_seq_values(DC_NAMESPACE, "date", _converter_date)

    @dc_date.setter
    def dc_date(self, values: Optional[list[Union[str, datetime.datetime]]]) -> None:
        if values is None:
            self._set_seq_values(DC_NAMESPACE, "date", None)
        else:
            date_strings = []
            for value in values:
                if isinstance(value, datetime.datetime):
                    date_strings.append(_format_datetime_utc(value))
                else:
                    date_strings.append(str(value))
            self._set_seq_values(DC_NAMESPACE, "date", date_strings)

    @property
    def dc_description(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of textual descriptions of the content of the resource."""
        return self._get_langalt_values(DC_NAMESPACE, "description")

    @dc_description.setter
    def dc_description(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "description", values)

    @property
    def dc_format(self) -> Optional[str]:
        """The mime-type of the resource."""
        return self._get_single_value(DC_NAMESPACE, "format")

    @dc_format.setter
    def dc_format(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "format", value)

    @property
    def dc_identifier(self) -> Optional[str]:
        """Unique identifier of the resource."""
        return self._get_single_value(DC_NAMESPACE, "identifier")

    @dc_identifier.setter
    def dc_identifier(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "identifier", value)

    @property
    def dc_language(self) -> Optional[list[str]]:
        """An unordered array specifying the languages used in the resource."""
        return self._getter_bag(DC_NAMESPACE, "language")

    @dc_language.setter
    def dc_language(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "language", values)

    @property
    def dc_publisher(self) -> Optional[list[str]]:
        """An unordered array of publisher names."""
        return self._getter_bag(DC_NAMESPACE, "publisher")

    @dc_publisher.setter
    def dc_publisher(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "publisher", values)

    @property
    def dc_relation(self) -> Optional[list[str]]:
        """An unordered array of text descriptions of relationships to other documents."""
        return self._getter_bag(DC_NAMESPACE, "relation")

    @dc_relation.setter
    def dc_relation(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "relation", values)

    @property
    def dc_rights(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of textual descriptions of the rights the user has to this resource."""
        return self._get_langalt_values(DC_NAMESPACE, "rights")

    @dc_rights.setter
    def dc_rights(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "rights", values)

    @property
    def dc_source(self) -> Optional[str]:
        """Unique identifier of the work from which this resource was derived."""
        return self._get_single_value(DC_NAMESPACE, "source")

    @dc_source.setter
    def dc_source(self, value: Optional[str]) -> None:
        self._set_single_value(DC_NAMESPACE, "source", value)

    @property
    def dc_subject(self) -> Optional[list[str]]:
        """An unordered array of descriptive phrases or keywords that specify the topic of the content."""
        return self._getter_bag(DC_NAMESPACE, "subject")

    @dc_subject.setter
    def dc_subject(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "subject", values)

    @property
    def dc_title(self) -> Optional[dict[str, str]]:
        """A language-keyed dictionary of the title of the resource."""
        return self._get_langalt_values(DC_NAMESPACE, "title")

    @dc_title.setter
    def dc_title(self, values: Optional[dict[str, str]]) -> None:
        self._set_langalt_values(DC_NAMESPACE, "title", values)

    @property
    def dc_type(self) -> Optional[list[str]]:
        """An unordered array of textual descriptions of the document type."""
        return self._getter_bag(DC_NAMESPACE, "type")

    @dc_type.setter
    def dc_type(self, values: Optional[list[str]]) -> None:
        self._set_bag_values(DC_NAMESPACE, "type", values)

    @property
    def pdf_keywords(self) -> Optional[str]:
        """An unformatted text string representing document keywords."""
        return self._get_single_value(PDF_NAMESPACE, "Keywords")

    @pdf_keywords.setter
    def pdf_keywords(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "Keywords", value)

    @property
    def pdf_pdfversion(self) -> Optional[str]:
        """The PDF file version, for example 1.0 or 1.3."""
        return self._get_single_value(PDF_NAMESPACE, "PDFVersion")

    @pdf_pdfversion.setter
    def pdf_pdfversion(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "PDFVersion", value)

    @property
    def pdf_producer(self) -> Optional[str]:
        """The name of the tool that saved the document as a PDF."""
        return self._get_single_value(PDF_NAMESPACE, "Producer")

    @pdf_producer.setter
    def pdf_producer(self, value: Optional[str]) -> None:
        self._set_single_value(PDF_NAMESPACE, "Producer", value)

    @property
    def xmp_create_date(self) -> Optional[datetime.datetime]:
        """The date and time the resource was originally created. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "CreateDate", _converter_date)

    @xmp_create_date.setter
    def xmp_create_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "CreateDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "CreateDate", None)

    @property
    def xmp_modify_date(self) -> Optional[datetime.datetime]:
        """The date and time the resource was last modified. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "ModifyDate", _converter_date)

    @xmp_modify_date.setter
    def xmp_modify_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "ModifyDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "ModifyDate", None)

    @property
    def xmp_metadata_date(self) -> Optional[datetime.datetime]:
        """The date and time that any metadata for this resource was last changed. Returned as a UTC datetime object."""
        return self._get_single_value(XMP_NAMESPACE, "MetadataDate", _converter_date)

    @xmp_metadata_date.setter
    def xmp_metadata_date(self, value: Optional[datetime.datetime]) -> None:
        if value:
            date_str = _format_datetime_utc(value)
            self._set_single_value(XMP_NAMESPACE, "MetadataDate", date_str)
        else:
            self._set_single_value(XMP_NAMESPACE, "MetadataDate", None)

    @property
    def xmp_creator_tool(self) -> Optional[str]:
        """The name of the first known tool used to create the resource."""
        return self._get_single_value(XMP_NAMESPACE, "CreatorTool")

    @xmp_creator_tool.setter
    def xmp_creator_tool(self, value: Optional[str]) -> None:
        self._set_single_value(XMP_NAMESPACE, "CreatorTool", value)

    @property
    def xmpmm_document_id(self) -> Optional[str]:
        """The common identifier for all versions and renditions of this resource."""
        return self._get_single_value(XMPMM_NAMESPACE, "DocumentID")

    @xmpmm_document_id.setter
    def xmpmm_document_id(self, value: Optional[str]) -> None:
        self._set_single_value(XMPMM_NAMESPACE, "DocumentID", value)

    @property
    def xmpmm_instance_id(self) -> Optional[str]:
        """An identifier for a specific incarnation of a document, updated each time a file is saved."""
        return self._get_single_value(XMPMM_NAMESPACE, "InstanceID")

    @xmpmm_instance_id.setter
    def xmpmm_instance_id(self, value: Optional[str]) -> None:
        self._set_single_value(XMPMM_NAMESPACE, "InstanceID", value)

    @property
    def pdfaid_part(self) -> Optional[str]:
        """The part of the PDF/A standard that the document conforms to (e.g., 1, 2, 3)."""
        return self._get_single_value(PDFAID_NAMESPACE, "part")

    @pdfaid_part.setter
    def pdfaid_part(self, value: Optional[str]) -> None:
        self._set_single_value(PDFAID_NAMESPACE, "part", value)

    @property
    def pdfaid_conformance(self) -> Optional[str]:
        """The conformance level within the PDF/A standard (e.g., 'A', 'B', 'U')."""
        return self._get_single_value(PDFAID_NAMESPACE, "conformance")

    @pdfaid_conformance.setter
    def pdfaid_conformance(self, value: Optional[str]) -> None:
        self._set_single_value(PDFAID_NAMESPACE, "conformance", value)

    @property
    def custom_properties(self) -> dict[Any, Any]:
        """
        Retrieve custom metadata properties defined in the undocumented pdfx
        metadata schema.

        Returns:
            A dictionary of key/value items for custom metadata properties.

        """
        if not hasattr(self, "_custom_properties"):
            self._custom_properties = {}
            for node in self.get_nodes_in_namespace("", PDFX_NAMESPACE):
                key = node.localName
                while True:
                    # see documentation about PDFX_NAMESPACE earlier in file
                    idx = key.find("\u2182")
                    if idx == -1:
                        break
                    key = (
                        key[:idx]
                        + chr(int(key[idx + 1 : idx + 5], base=16))
                        + key[idx + 5 :]
                    )
                if node.nodeType == node.ATTRIBUTE_NODE:
                    value = node.nodeValue
                else:
                    value = self._get_text(node)
                self._custom_properties[key] = value
        return self._custom_properties

    def _get_or_create_description(self, about_uri: str = "") -> XmlElement:
        """Get or create an rdf:Description element with the given about URI."""
        for desc in self.rdf_root.getElementsByTagNameNS(RDF_NAMESPACE, "Description"):
            if desc.getAttributeNS(RDF_NAMESPACE, "about") == about_uri:
                return desc

        doc = self.rdf_root.ownerDocument
        if doc is None:
            raise XmpDocumentError("XMP Document is None")
        desc = doc.createElementNS(RDF_NAMESPACE, "rdf:Description")
        desc.setAttributeNS(RDF_NAMESPACE, "rdf:about", about_uri)
        self.rdf_root.appendChild(desc)
        return desc

    def _clear_cache_entry(self, namespace: str, name: str) -> None:
        """Remove a cached value for a given namespace/name if present."""
        ns_cache = self.cache.get(namespace)
        if ns_cache and name in ns_cache:
            del ns_cache[name]

    def _set_single_value(self, namespace: str, name: str, value: Optional[str]) -> None:
        """Set or remove a single metadata value."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()

        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)

        if existing_attr := desc.getAttributeNodeNS(namespace, name):
            desc.removeAttributeNode(existing_attr)

        if value is not None:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            text_node = doc.createTextNode(str(value))
            elem.appendChild(text_node)
            desc.appendChild(elem)

        self._update_stream()

    def _set_bag_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
        """Set or remove bag values (unordered array)."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()

        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)

        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            bag = doc.createElementNS(RDF_NAMESPACE, "rdf:Bag")

            for value in values:
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                bag.appendChild(li)

            elem.appendChild(bag)
            desc.appendChild(elem)

        self._update_stream()

    def _set_seq_values(self, namespace: str, name: str, values: Optional[list[str]]) -> None:
        """Set or remove sequence values (ordered array)."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()

        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)

        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            seq = doc.createElementNS(RDF_NAMESPACE, "rdf:Seq")

            for value in values:
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                seq.appendChild(li)

            elem.appendChild(seq)
            desc.appendChild(elem)

        self._update_stream()

    def _set_langalt_values(self, namespace: str, name: str, values: Optional[dict[str, str]]) -> None:
        """Set or remove language alternative values."""
        self._clear_cache_entry(namespace, name)
        desc = self._get_or_create_description()

        existing_elements = list(desc.getElementsByTagNameNS(namespace, name))
        for elem in existing_elements:
            desc.removeChild(elem)

        if values:
            doc = self.rdf_root.ownerDocument
            if doc is None:
                raise XmpDocumentError("XMP Document is None")
            prefix = self._get_namespace_prefix(namespace)
            elem = doc.createElementNS(namespace, f"{prefix}:{name}")
            alt = doc.createElementNS(RDF_NAMESPACE, "rdf:Alt")

            for lang, value in values.items():
                li = doc.createElementNS(RDF_NAMESPACE, "rdf:li")
                li.setAttribute("xml:lang", lang)
                text_node = doc.createTextNode(str(value))
                li.appendChild(text_node)
                alt.appendChild(li)

            elem.appendChild(alt)
            desc.appendChild(elem)

        self._update_stream()

    def _get_namespace_prefix(self, namespace: str) -> str:
        """Get the appropriate namespace prefix for a given namespace URI."""
        return _NAMESPACE_PREFIX_MAP.get(namespace, "unknown")

    def _update_stream(self) -> None:
        """Update the stream with the current XML content."""
        doc = self.rdf_root.ownerDocument
        if doc is None:
            raise XmpDocumentError("XMP Document is None")

        xml_data = doc.toxml(encoding="utf-8")
        self.stream.set_data(xml_data)


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["flit_core >=3.11,<4"]
build-backend = "flit_core.buildapi"

[project]
name = "pypdf"
authors = [{ name = "Mathieu Fenniak", email = "biziqe@mathieu.fenniak.net" }]
maintainers = [{ name = "stefan6419846" }, { name = "Martin Thoma", email = "info@martin-thoma.de" }]
description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files"
readme = "README.md"
dynamic = ["version"]
license = "BSD-3-Clause"
license-files = ["LICENSE"]
requires-python = ">=3.9"
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3 :: Only",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Operating System :: OS Independent",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Typing :: Typed",
]

dependencies = [
    "typing_extensions >= 4.0; python_version < '3.11'",
]

[project.urls]
Changelog = "https://pypdf.readthedocs.io/en/latest/meta/CHANGELOG.html"
Documentation = "https://pypdf.readthedocs.io/en/latest/"
Source = "https://github.com/py-pdf/pypdf"
"Bug Reports" = "https://github.com/py-pdf/pypdf/issues"

[project.optional-dependencies]
crypto = ["cryptography"]
cryptodome = ["PyCryptodome"]
image = ["Pillow>=8.0.0"]
full = [
    "cryptography",
    "Pillow>=8.0.0"
]
dev = [
    "flit",
    "pip-tools",
    "pre-commit",
    "pytest-cov",
    "pytest-socket",
    "pytest-timeout",
    "pytest-xdist",
    "wheel"
]
docs = [
    "myst_parser",
    "sphinx",
    "sphinx_rtd_theme"
]

[tool.check-wheel-contents]
package = "./pypdf"

[tool.flit.sdist]
exclude = [
    ".gitblame-ignore-revs",
    ".github/*",
    ".gitignore",
    ".gitmodules",
    ".pre-commit-config.yaml",
    "docs/*",
    "make_release.py",
    "Makefile",
    "requirements/*",
    "sample-files/.github/*",
    "sample-files/.gitignore",
    "sample-files/.pre-commit-config.yaml",
    "tests/pdf_cache/*",
]
include = ["resources/", "tests/", "CHANGELOG.md"]

[tool.pytest.ini_options]
addopts = "--disable-socket"
filterwarnings = ["error"]
markers = [
    "slow: Test which require more than a second",
    "samples: Tests which use files from https://github.com/py-pdf/sample-files",
    "enable_socket: Tests which need to download files"
]
testpaths = ["tests"]
norecursedirs = ["tests/pdf_cache"]

[tool.isort]
line_length = 79
indent = '    '
multi_line_output = 3
include_trailing_comma = true
known_third_party = ["pytest"]

[tool.coverage.run]
source = ["pypdf"]
branch = true
patch = [
    "subprocess",
]
parallel = true

[tool.coverage.report]
# Regexes for lines to exclude from consideration
exclude_lines = [
    # Have to re-enable the standard pragma
    "pragma: no cover",
    "@overload",
    "deprecated",

    # Don't complain about type-checking code not being hit by unit tests
    "if TYPE_CHECKING",

    # Don't complain about missing debug-only code:
    "def __repr__",
    "def __str__",
    "if self\\.debug",

    # Don't complain if tests don't hit defensive assertion code:
    "raise AssertionError",
    "raise NotImplementedError",

    # Don't complain if non-runnable code isn't run:
    "if __name__ == .__main__.:",
]

[tool.ruff]
line-length = 120
exclude = [
    "sample-files/",
]

[tool.ruff.lint]
select = ["ALL"]
ignore = [
    "A001",    # Variable is shadowing a Python builtin
    "A002",    # Function argument is shadowing a Python builtin
    "ANN401",  # Dynamically typed expressions (typing.Any) are disallowed
    "ARG001",  # Unused function argument
    "ARG002",  # Unused method argument
    "ARG004",  # Unused static method argument
    "B904",    # Within an `except` clause, raise exceptions with
    "B905",    # `zip()` without an explicit `strict=` parameter
    "BLE001",  # Do not catch blind exception: `Exception`
    "COM812",  # Yes, they make the diff smaller
    "D101",    # Missing docstring in public class
    "D102",    # Missing docstring in public method
    "D105",    # Missing docstring in magic method
    "D106",    # Missing docstring in public nested class
    "D107",    # Missing docstring in `__init__`
    "D205",    # One blank line required between summary line and description
    "D212",    # I want multiline-docstrings to start at the second line
    "D401",    # First line of docstring should be in imperative mood - false positives
    "D415",    # First line should end with a period
    "D417",    # Missing argument descriptions in the docstring
    "DTZ001",  # The use of `datetime.datetime()` without `tzinfo` is necessary
    "EM101",   # Exception must not use a string literal, assign to variable first
    "EM102",   # Exception must not use an f-string literal, assign to variable first
    "ERA001",  # Found commented-out code
    "FA100",   # Missing `from __future__ import annotations`, but uses `typing.Dict`
    "FA102",   # Missing `from __future__ import annotations`, but uses PEP 604 union
    "FBT001",  # Boolean positional arg in function definition
    "FBT002",  # Boolean default value in function definition
    "FBT003",  # Boolean positional value in function call
    "FIX002",  # TODOs should typically not be in the code, but sometimes are ok
    "G004",    # f-string in logging statement
    "N806",    # non-lowercase-variable-in-function
    "N814",    # Camelcase `PageAttributes` imported as constant `PG`
    "N817",    # CamelCase `PagesAttributes` imported as acronym `PA`
    "PERF203", # `try`-`except` within a loop incurs performance overhead
    "PGH003",  # Use specific rule codes when ignoring type issues
    "PLW1510", # `subprocess.run` without explicit `check` argument
    "PLW2901", # `with` statement variable `img` overwritten by assignment target
    "PT011",   # `pytest.raises(ValueError)` is too broad, set the `match`
    "PT012",   # `pytest.raises()` block should contain a single simple statement
    "PT014",   # Ruff bug: Duplicate of test case at index 1 in `@pytest_mark.parametrize`
    "PTH123",  # `open()` should be replaced by `Path.open()`
    "PYI042",  # Type alias `mode_str_type` should be CamelCase
    "RUF001",  # Detect confusable Unicode-to-Unicode units. Introduces bugs
    "RUF002",  # Detect confusable Unicode-to-Unicode units. Introduces bugs
    "S101",    # Use of `assert` detected
    "S110",    # `try`-`except`-`pass` detected, consider logging the exception
    "SIM105",  # contextlib.suppress
    "SIM108",  # Don't enforce ternary operators
    "SLF001",  # Private member accessed
    "TC006",   # To discuss: Add quotes to type expression in `typing.cast()`
    "TD002",   # Authors of TODOs can be found via git
    "TD003",   # For the moment, fix it later: Missing issue link on the line following this TODO
    "TID252",  # We want relative imports
    "TRY002",  # Create your own exception
    "TRY003",  # Avoid specifying long messages outside the exception class
    "TRY004",  # Prefer `TypeError` exception for invalid type
    "TRY201",  # Use `raise` without specifying exception name
    "TRY300",  # Consider moving this statement to an `else` block
    "TRY301",  # Abstract `raise` to an inner function
    "UP006",   # Non-PEP 585 annotation. As long as we are not on Python 3.11+
    "UP007",   # Non-PEP 604 annotation. As long as we are not on Python 3.11+
]

[tool.ruff.lint.mccabe]
max-complexity = 30  # Recommended: 10

[tool.ruff.lint.per-file-ignores]
"_cryptography.py" = ["S304", "S305"]  # Use of insecure cipher / modes, aka RC4 and AES-ECB
"_encryption.py" = ["S324"]
"_writer.py" = ["S324"]
"pypdf/_codecs/symbol.py" = ["A005"]  # Module shadows a Python standard-library module
"types.py" = ["A005"]  # Module shadows a Python standard-library module
"pypdf/_text_extraction/__init__.py" = ["PLW0603"]  # Using the global statement to update is discouraged
"docs/conf.py" = ["INP001", "PTH100"]
"json_consistency.py" = ["T201"]
"make_release.py" = ["S603", "S607", "T201"]
"pypdf/*" = ["N802", "N803"]  # We first need to deprecate old stuff
"tests/*" = ["ANN001", "ANN201", "B017", "B018", "D103", "D104", "S105", "S106"]
"tests/test_workflows.py" =  ["T201"]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.ruff.lint.pylint]
allow-magic-value-types = ["bytes", "float", "int", "str"]
max-args = 12  # Recommended: 5
max-branches = 36  # Recommended: 12
max-returns = 11  # Recommended: 6
max-statements = 176  # Recommended: 50

[tool.docformatter]
pre-summary-newline = true
wrap-summaries = 0
wrap-descriptions = 0

[tool.mypy]
show_error_codes = true
ignore_missing_imports = true
check_untyped_defs = true
disallow_any_generics = true
disallow_untyped_defs = true
disallow_incomplete_defs = true
warn_redundant_casts = true
warn_unused_ignores = true
warn_unused_configs = true
exclude = ['venv', '.venv', 'tests', 'make_release.py']


================================================
FILE: requirements/ci-3.11.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
#    pip-compile --output-file=requirements/ci-3.11.txt requirements/ci.in
#
cffi==2.0.0
    # via cryptography
coverage[toml]==7.13.0
    # via
    #   -r requirements/ci.in
    #   pytest-cov
cryptography==46.0.5
    # via -r requirements/ci.in
defusedxml==0.7.1
    # via fpdf2
exceptiongroup==1.2.2
    # via pytest
execnet==2.1.1
    # via pytest-xdist
fonttools==4.61.0
    # via fpdf2
fpdf2==2.8.1
    # via -r requirements/ci.in
iniconfig==2.0.0
    # via pytest
mypy==1.17.0
    # via -r requirements/ci.in
mypy-extensions==1.0.0
    # via mypy
packaging==24.1
    # via pytest
pillow==12.1.1
    # via
    #   -r requirements/ci.in
    #   fpdf2
pluggy==1.5.0
    # via pytest
py-cpuinfo==9.0.0
    # via pytest-benchmark
pycparser==2.22
    # via cffi
pytest==8.3.3
    # via
    #   -r requirements/ci.in
    #   pytest-benchmark
    #   pytest-cov
    #   pytest-socket
    #   pytest-timeout
    #   pytest-xdist
pytest-benchmark==4.0.0
    # via -r requirements/ci.in
pytest-cov==5.0.0
    # via -r requirements/ci.in
pytest-socket==0.7.0
    # via -r requirements/ci.in
pytest-timeout==2.3.1
    # via -r requirements/ci.in
pytest-xdist==3.6.1
    # via -r requirements/ci.in
pyyaml==6.0.2
    # via -r requirements/ci.in
ruff==0.15.0
    # via -r requirements/ci.in
tomli==2.0.2
    # via
    #   coverage
    #   mypy
    #   pytest
typeguard==4.3.0
    # via -r requirements/ci.in
typing-extensions==4.12.2
    # via
    #   mypy
    #   typeguard


================================================
FILE: requirements/ci.in
================================================
coverage
fpdf2
mypy
pillow
cryptography
pytest
pytest-benchmark
pytest-socket
pytest-timeout
pytest-xdist
pytest-cov
# ruff  # only take this for 3.11
typeguard
pyyaml


================================================
FILE: requirements/ci.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
#    pip-compile requirements/ci.in
#
cffi==2.0.0
    # via cryptography
coverage[toml]==7.10.7
    # via
    #   -r requirements/ci.in
    #   pytest-cov
cryptography==46.0.5
    # via -r requirements/ci.in
exceptiongroup==1.2.2
    # via pytest
execnet==2.1.1
    # via pytest-xdist
importlib-metadata==8.5.0
    # via typeguard
iniconfig==2.0.0
    # via pytest
mypy==1.13.0
    # via -r requirements/ci.in
mypy-extensions==1.0.0
    # via mypy
packaging==24.1
    # via pytest
pillow==10.4.0
    # via
    #   -r requirements/ci.in
    #   fpdf2
pluggy==1.5.0
    # via pytest
py-cpuinfo==9.0.0
    # via pytest-benchmark
pycparser==2.22
    # via cffi
pytest==8.3.3
    # via
    #   -r requirements/ci.in
    #   pytest-benchmark
    #   pytest-cov
    #   pytest-socket
    #   pytest-timeout
    #   pytest-xdist
pytest-benchmark==4.0.0
    # via -r requirements/ci.in
pytest-cov==5.0.0
    # via -r requirements/ci.in
pytest-socket==0.7.0
    # via -r requirements/ci.in
pytest-timeout==2.3.1
    # via -r requirements/ci.in
pytest-xdist==3.6.1
    # via -r requirements/ci.in
pyyaml==6.0.2
    # via -r requirements/ci.in
tomli==2.0.2
    # via
    #   coverage
    #   mypy
    #   pytest
typeguard==4.3.0
    # via -r requirements/ci.in
typing-extensions==4.13.2
    # via
    #   mypy
    #   typeguard
zipp==3.20.2
    # via importlib-metadata


================================================
FILE: requirements/dev.in
================================================
pillow
pip-tools
pre-commit
pytest-cov
flit
wheel


================================================
FILE: requirements/dev.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
#    pip-compile requirements/dev.in
#
build==1.2.2.post1
    # via pip-tools
certifi==2024.8.30
    # via requests
cfgv==3.4.0
    # via pre-commit
charset-normalizer==3.4.0
    # via requests
click==8.1.7
    # via pip-tools
coverage[toml]==7.6.1
    # via pytest-cov
distlib==0.3.9
    # via virtualenv
docutils==0.20.1
    # via flit
exceptiongroup==1.2.2
    # via pytest
filelock==3.20.3
    # via virtualenv
flit==3.11.0
    # via -r dev.in
flit-core==3.11.0
    # via flit
identify==2.6.1
    # via pre-commit
idna==3.10
    # via requests
importlib-metadata==8.5.0
    # via build
iniconfig==2.0.0
    # via pytest
nodeenv==1.9.1
    # via pre-commit
packaging==24.1
    # via
    #   build
    #   pytest
    #   wheel
pillow==12.1.1
    # via -r dev.in
pip-tools==7.4.1
    # via -r dev.in
platformdirs==4.3.6
    # via virtualenv
pluggy==1.5.0
    # via pytest
pre-commit==3.5.0
    # via -r dev.in
pyproject-hooks==1.2.0
    # via
    #   build
    #   pip-tools
pytest==8.3.3
    # via pytest-cov
pytest-cov==5.0.0
    # via -r dev.in
pyyaml==6.0.2
    # via pre-commit
requests==2.32.4
    # via flit
tomli==2.0.2
    # via
    #   build
    #   coverage
    #   pip-tools
    #   pytest
tomli-w==1.0.0
    # via flit
typing-extensions==4.15.0
    # via virtualenv
urllib3==2.6.3
    # via requests
virtualenv==20.36.1
    # via pre-commit
wheel==0.46.2
    # via
    #   -r dev.in
    #   pip-tools
zipp==3.20.2
    # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# pip
# setuptools


================================================
FILE: requirements/docs.in
================================================
sphinx
sphinx_rtd_theme
myst_parser


================================================
FILE: requirements/docs.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
#    pip-compile requirements/docs.in
#
alabaster==1.0.0
    # via sphinx
babel==2.16.0
    # via sphinx
certifi==2024.8.30
    # via requests
charset-normalizer==3.4.0
    # via requests
docutils==0.21.2
    # via
    #   myst-parser
    #   sphinx
    #   sphinx-rtd-theme
idna==3.10
    # via requests
imagesize==1.4.1
    # via sphinx
jinja2==3.1.6
    # via
    #   myst-parser
    #   sphinx
markdown-it-py==3.0.0
    # via
    #   mdit-py-plugins
    #   myst-parser
markupsafe==3.0.1
    # via jinja2
mdit-py-plugins==0.4.2
    # via myst-parser
mdurl==0.1.2
    # via markdown-it-py
myst-parser==4.0.0
    # via -r requirements/docs.in
packaging==24.1
    # via sphinx
pygments==2.18.0
    # via sphinx
pyyaml==6.0.2
    # via myst-parser
requests==2.32.4
    # via sphinx
snowballstemmer==2.2.0
    # via sphinx
sphinx==8.1.3
    # via
    #   -r requirements/docs.in
    #   myst-parser
    #   sphinx-rtd-theme
    #   sphinxcontrib-jquery
sphinx-rtd-theme==3.0.1
    # via -r requirements/docs.in
sphinxcontrib-applehelp==2.0.0
    # via sphinx
sphinxcontrib-devhelp==2.0.0
    # via sphinx
sphinxcontrib-htmlhelp==2.1.0
    # via sphinx
sphinxcontrib-jquery==4.1
    # via sphinx-rtd-theme
sphinxcontrib-jsmath==1.0.1
    # via sphinx
sphinxcontrib-qthelp==2.0.0
    # via sphinx
sphinxcontrib-serializinghtml==2.0.0
    # via sphinx
tomli==2.0.2
    # via sphinx
urllib3==2.6.3
    # via requests


================================================
FILE: resources/010-pdflatex-forms.txt
================================================
Name

Check

Submit


                                 1

================================================
FILE: resources/AEO.1172.layout.rot180.txt
================================================
9  1of    Page                                                                                                                                                                   2022 AEO Management Co. All Rights Reserved. Proprietary and Confidential AEO Business Information. Subject to Legal Action if Disclosed Without Authorization from AEO.Date Printed: 17/Nov/2022
                                                                                                                                         PRODUCT SUMMARY
                                                                                                                                                                                                            Fit / Other:
                                                                                                                                                                                1172 KNIT SHORTIE           Style Desc:
                                                                                                                                                                                SUMMER-B 2023               Season:
                                                                                                                                                                                50 / 170                    Division / Dept:
                                                                                                                                                                                AMERICAN EAGLE OUTFITTERSCompany:
                             SUMMER-B 2023                                                                                               1172 KNIT SHORTIE                                                                                      STYLE: 1172
                                                                                                                                    STATUS: FNL


================================================
FILE: resources/AEO.1172.layout.txt
================================================
                                                                                                            STATUS: FNL
STYLE: 1172                                                                                            1172 KNIT SHORTIE                                                                                          SUMMER-B 2023
                                    Company:                    AMERICAN EAGLE OUTFITTERS
                                    Division / Dept:            50 / 170
                                    Season:                     SUMMER-B 2023
                                    Style Desc:                 1172 KNIT SHORTIE
                                    Fit / Other:
                                                                                                       PRODUCT SUMMARY
Date Printed: 17/Nov/2022                                     2022 AEO Management Co. All Rights Reserved. Proprietary and Confidential AEO Business Information. Subject to Legal Action if Disclosed Without Authorization from AEO.Page 1of 9


================================================
FILE: resources/Claim Maker Alerts Guide_pg2.layout.txt
================================================
 Updated System Responses for Common Scenarios


  Scenario                                 Before Change             After                           Why?

  An On Hold / Missing                     New doc info was          Leave state as On               Batches can be released early
  Documents case receives its              logged but no             Hold and update state           and coders can code all they can
  first documentation set after            further automated         reason to Ready To              and then leave the batch in In
  coding operations have                   action was taken.         Code.                           Progress. When docs come in,
  already begun for the batch                                                                        the case is picked up by the
  (batch state = In Progress).                                                                       normal On Hold process due to
                                                                                                     the assignment of the Ready to
                                                                                                     Code state reason.

  An “incomplete” case (not                All documents             All manually attached           Ensures that ALL info that has
  Code Completed or Ignored)               were “overwritten”        PDFs are preserved              arrived for the case remains
  in an “in flight” batch (state =         with data from the        in place and all                visible to users. Specifically
  Reconciled, Assigned, or In              new documents.            “extracted”                     addresses split labor / C-section
  Progress) receives new                                             documents are                   cases, allowing a coder to refer
  documents.                                                         aggregated under a              back to the “Superseded”
                                                                     SUPERSEDED ON                   documents to make sure a newly
                                                                     [DATE] text doc with            extracted “C-section only”
                                                                     type Complete                   document wasn’t really a Labor
                                                                     Record.                         to C-section case.

  New documents are received               New doc info was          Existing documents              Prompts the coder to review the
  for a Code Completed or                  logged but no             are “superseded”                new documentation set while
  Ignored case in an “in flight”           further automated         (see previous) and              retaining all previously applied
  batch.                                   action was taken.         the case is set back            codes.  If no significant change is
                                                                     to On Hold / Ready to           noted, the case can simply be set
                                                                     Code.                           back to Code Completed.

  Documentation for an                     New case info             The case is added to            Ensures proper review of any
  “uncoded” (aka not Code                  was logged but            a new batch with the            additional documentation
  Completed) case or a new                 no further                same date of service.           received for a previously
  patient is received for a                automated action          Set state to Ignored            completed batch as well as
  Complete or Charges Entered              was taken.                on the original case (if        documentation for brand new
  batch.                                                             it exists) and add              cases after a batch has already
                                                                     notes to both the               been Completed. Notes on the
                                                                     original and new                original and duplicate case
                                                                     cases indicating the            ensure that users are aware of
                                                                     link between the two.           actions taken by the system.

  Documentation for a Code                 New doc info was          Existing case                   The status of the new document
  Completed case in a                      logged but no             documents are left in           is clearly indicated as arriving
  Complete or Charges Entered              further automated         place and the new               AFTER the associated case was
  batch is received.                       action was taken.         documentation is                coded avoiding potential
                                                                     added as a PDF                  confusion regarding which
                                                                     attachment with type            documentation was utilized at the
                                                                     “complete record” and           time of coding while also
                                                                     title POSTED LATE -             providing access to the new info
                                                                     [DATE].                         and allowing the end user to
                                                                                                     determine the correct course of
                                                                                                     action.

================================================
FILE: resources/Epic.Page.layout.txt
================================================
All Postprocedure Notes
   Last edited 10/11/23 0919 by Danny Chaung, DO
   Date of Service 10/11/23 0918
   Status: Signed
Anesthesia Post Evaluation

Procedure Summary

   Date: 10/11/23                                                Room / Location: EHMC ENDOSCOPY
   Anesthesia Start: 0852                                        Anesthesia Stop: 0918
   Procedure: COLONOSCOPY                                        Diagnosis: Cancer screening
   Scheduled Providers: Walter A Klein, MD; Danny Chaung,        Responsible Provider: Danny Chaung, DO
   DO
   Anesthesia Type: general                                      ASA Status: 2


Patient location during evaluation: PACU
Post op Vital Signs: stable

Level of consciousness: awake and alert
Pain management: adequate analgesia
Airway patency: patent
Anesthetic complications: no
Respiratory status: unassisted
Hydration status: continuing
Post-op Complications: No


Assessment: Nausea and Vomiting: absent


MIPS Measure #404 - Smoking Abstinence
Is the patient a current smoker? No (XX404)


================================================
FILE: resources/afm_to_dataclass.py
================================================
# ruff: noqa: T201, INP001, D100
# Use this file to generate Font dataclasses for the 14 Adobe Core fonts.
import re
import textwrap
import urllib.request
from io import BytesIO
from typing import cast
from zipfile import ZipFile

from pypdf._codecs.adobe_glyphs import adobe_glyphs
from pypdf.constants import FontFlags

# FONT_LOC = "web.archive.org/web/20110531171921if_/http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/Core14_AFMs.zip"
FONT_LOC = "download.macromedia.com/pub/developer/opentype/tech-notes/Core14_AFMs.zip"
PROTOCOL = "https://"
FONT_URL = PROTOCOL + FONT_LOC

class Parser:
    def __init__(self) -> None:
        self.license_information = ""
        self.files: dict[str, str] = {}

    def get_fonts(self) -> None:
        with urllib.request.urlopen(
            f"https://{FONT_LOC}"
        ) as connection, ZipFile(BytesIO(
            connection.read())
        ) as font_zip:
            for filename in font_zip.namelist():
                if filename.lower().endswith(".afm"):
                    with font_zip.open(filename, mode="r") as afm_font_file:
                        self.files[filename] = afm_font_file.read().decode("utf-8")
                else:
                    with font_zip.open(filename, mode="r") as afm_font_file:
                        self.license_information = afm_font_file.read().decode("utf-8")

    def get_disclaimer(self, width: int = 95) -> str:
        pre = (
            "# This file is based upon the 14 core AFM files provided by Adobe/Macromedia at\n# " +
            FONT_URL +
            "\n# The original copyright follows:\n#\n# " +
            "-" * width +
            "\n"
        )
        title = "# " + self.license_information.split("<title>")[1].split("</title>")[0]
        text = self.license_information.split('<td width="300">')[1].split("<font color")[0]
        post = "\n# " + "-" * width + "\n\n"
        return pre + title + "\n#\n# " + "\n# ".join(textwrap.wrap(text=text, width=width)) + post

    def _handle_font(self, file_name: str, font_data: str) -> list[str]:  # noqa: C901
        # AFM specification: https://adobe-type-tools.github.io/font-tech-notes/pdfs/5004.AFM_Spec.pdf
        copyrights: list[str] = []
        name: str = ""
        family: str = ""
        weight: str = ""
        ascent: float = 0.0
        descent: float = 0.0
        cap_height: float = 0.0
        x_height: float = 0.0
        italic_angle: float = 0.0
        flags: int = 0
        bbox: tuple[float, float, float, float] = (0, 0, 0, 0)
        character_widths: dict[str, int] = {}

        for line in font_data.splitlines(keepends=False):
            if not line.strip():
                continue

            if " " not in line:
                continue
            key, value = line.split(" ", maxsplit=1)
            if not key:
                continue

            if key == "FontName":
                name = value
                if "Times" in value:
                    flags |= FontFlags.SERIF
            if key == "Weight":
                weight = value
            if key == "FamilyName":
                family = value

            if key == "Ascender":
                ascent = cast(float, value)
            if key == "Descender":
                descent = cast(float, value)
            if key == "CapHeight":
                cap_height = cast(float, value)
            if key == "XHeight":
                x_height = cast(float, value)
            if key == "ItalicAngle":
                italic_angle = cast(float, value)
                if value != "0":
                    flags |= FontFlags.ITALIC
            if key == "IsFixedPitch" and value.lower() == "true":
                flags |= FontFlags.FIXED_PITCH
            if key == "FontBBox":
                bbox = tuple(map(float, value.split(" ")[:4]))  # type: ignore
            if key == "EncodingScheme":
                if value == "FontSpecific":
                    flags |= FontFlags.SYMBOLIC
                else:
                    flags |= FontFlags.NONSYMBOLIC

            # Add copyright information. This is available in two fields: "Comment" and "Notice".
            # However, all information available in "Comment" is also available in "Notice", and
            # the information under "Notice" is more complete. Ignore "Comment" and only copy
            # information from "Notice", to avoid adding the same information twice.
            if key == "Notice" and value.startswith("Copyright"):
                copyrights.append(re.sub(r"\.([A-Z])", r".  \1", value))  # Take care of missing space after period.

            if key == "C":
                # C integer ; WX number ; N name; We're ignoring C.
                key_value_pairs = line.split(";")
                character_width_x = -1
                character_name = "dummy"
                for pair in key_value_pairs:
                    if not pair.strip():
                        continue
                    key_of_pair, value_of_pair = pair.strip().split(" ", maxsplit=1)
                    if key_of_pair == "WX":
                        character_width_x = int(value_of_pair)
                    if key_of_pair == "N":
                        character_name = value_of_pair
                glyph = adobe_glyphs[f"/{character_name}"]
                character_widths[glyph.encode("unicode_escape").decode("utf-8")] = character_width_x
            if key == "CH":
                raise NotImplementedError(name, line)
            # Add default width
            try:
                if (flags & FontFlags.FIXED_PITCH) == FontFlags.FIXED_PITCH:
                    character_widths["default"] = character_widths[" "]
                else:
                    character_widths["default"] = 2 * character_widths[" "]
            except KeyError:
                pass

        result = [
            f"    # Generated from {file_name}"
        ]
        for copyright_entry in sorted(set(copyrights)):
            result.extend(f"    # {line}" for line in textwrap.wrap(text=copyright_entry, width=95))
        result.append(f'    "{name}": CoreFontMetrics(')
        result.append("        font_descriptor=FontDescriptor(")
        result.append(f'            name="{name}",')
        result.append(f'            family="{family}",')
        result.append(f'            weight="{weight}",')
        result.append(f"            ascent={ascent},")
        result.append(f"            descent={descent},")
        result.append(f"            cap_height={cap_height},")
        result.append(f"            x_height={x_height},")
        result.append(f"            italic_angle={italic_angle},")
        result.append(f"            flags={flags},")
        result.append(f"            bbox=({', '.join(map(str, bbox))}),")
        result.append("        ),")
        result.append("        character_widths={")
        for character, width in character_widths.items():
            d = '"'
            try:
                if ord(character) == 34:  # Double quotation mark
                    d = "'"
            except TypeError:
                pass
            result.append(f"            {d}{character}{d}: {width},")
        result.append("        },")
        result.append("    ),")
        return result

    def get_font_data(self) -> str:
        data = [
            "from pypdf._font import CoreFontMetrics, FontDescriptor\n\n"
            "CORE_FONT_METRICS: dict[str, CoreFontMetrics] = {",
        ]
        for name, font_data in self.files.items():
            data.extend(self._handle_font(name, font_data))
        data.append("}\n")
        return "\n".join(data)


parser = Parser()
parser.get_fonts()

print(parser.get_disclaimer())
print(parser.get_font_data())


================================================
FILE: resources/crazyones.txt
================================================
The Crazy Ones
October 14, 1998
Heres to the crazy ones. The misﬁts. The rebels. The troublemakers.
The round pegs in the square holes.
The ones who see things diﬀerently. Theyre not fond of rules. And
they have no respect for the status quo. You can quote them,
disagree with them, glorify or vilify them.
About the only thing you cant do is ignore them. Because they change
things. They invent. They imagine. They heal. They explore. They
create. They inspire. They push the human race forward.
Maybe they have to be crazy.
How else can you stare at an empty canvas and see a work of art? Or
sit in silence and hear a song thats never been written? Or gaze at
a red planet and see a laboratory on wheels?
We make tools for these kinds of people.
While some see them as the crazy ones, we see genius. Because the
people who are crazy enough to think they can change the world,
are the ones who do.

================================================
FILE: resources/crazyones_layout_vertical_space.txt
================================================
The Crazy Ones
October 14, 1998

   Heres to the crazy ones. The misﬁts. The rebels. The troublemakers.
       The round pegs in the square holes.
   The ones who see things diﬀerently. Theyre not fond of rules. And
       they have no respect for the status quo. You can quote them,
       disagree with them, glorify or vilify them.
   About the only thing you cant do is ignore them. Because they change
       things. They invent. They imagine. They heal. They explore. They
       create. They inspire. They push the human race forward.
   Maybe they have to be crazy.
   How else can you stare at an empty canvas and see a work of art? Or
       sit in silence and hear a song thats never been written? Or gaze at
       a red planet and see a laboratory on wheels?
   We make tools for these kinds of people.
   While some see them as the crazy ones, we see genius. Because the
       people who are crazy enough to think they can change the world,
       are the ones who do.

================================================
FILE: resources/crazyones_layout_vertical_space_font_height_weight.txt
================================================
The Crazy Ones
October 14, 1998

   Heres to the crazy ones. The misﬁts. The rebels. The troublemakers.
       The round pegs in the square holes.

   The ones who see things diﬀerently. Theyre not fond of rules. And
       they have no respect for the status quo. You can quote them,
       disagree with them, glorify or vilify them.

   About the only thing you cant do is ignore them. Because they change
       things. They invent. They imagine. They heal. They explore. They
       create. They inspire. They push the human race forward.

   Maybe they have to be crazy.

   How else can you stare at an empty canvas and see a work of art? Or
       sit in silence and hear a song thats never been written? Or gaze at
       a red planet and see a laboratory on wheels?

   We make tools for these kinds of people.

   While some see them as the crazy ones, we see genius. Because the
       people who are crazy enough to think they can change the world,
       are the ones who do.

================================================
FILE: resources/jpeg.txt
================================================
ffd8ffe000104a46494600010100000100010000ffdb0043000302020302020303030304030304050805050404050a070706080c0a0c0c0b0a0b0b0d0e12100d0e110e0b0b1016101113141515150c0f171816141812141514ffdb00430103040405040509050509140d0b0d1414141414141414141414141414141414141414141414141414141414141414141414141414141414141414141414141414ffc20011080258032003012200021101031101ffc4001d000100010501010100000000000000000000050203040607010809ffc4001c0101000203010101000000000000000000000304010205060708ffda000c03010002100310000001f950000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000cab1ae2a6365f550e977ba07be8a1d12eefb9672fc7e978d1e79ad3d121b8926a8918ef236020c800000000000000000007b7f5d71d9f9fa4702cfc2df7a46db800000000000000000000000000000000002fef8b12d3131efaae14eef3d87af1fceb81db78f48c5f7cc6f4f064db8ff63cdea3cf60cf96f3b1e1cc3416d719c99355a64b03e736e81c990000000000000000af6e86bead3bb7e673793aec84fe651e6c24d64e4d7abe5491a3cdd3795fd33edab9f27bb472bf4bebe2c5de88000000000000000000000000000000d8afe91fb35d7d528decfc2f2675acce296a0cecdab62474b897cad5f2a0cecd7b5fccecc720b17a1ce458bf6a7c58f2f79be21a1b708be349a8f92f11f2ebc1ccdc0000000000013135bff2f8d19277ee70fcddbab325659e0aacbc1860cac98ec9df79cdbb4bedd2cdcc63ba7f27a1cdaade1c9435f89689f5670aef7a6d207a0f50000000000000000000000000000b956e1de8ac491f54a2f3df6c63cf2f55066cd19d174b68c8cccc3e5ef6fcabde4c9e5da6425c5ebd876ba5a4f65c24cfa18ae48741eb9c097e54c7ebbc9bb91476b3b9e0f377d295d1f21e806000000000cbc6b6fa4484cf9ff314dff2aa5cef7da33f5d3a06e393b1fa1f57c3746e95cc39dc9b1662e2ea50df379e419b1c5f44f32d52c4d676891d467b95c4ca8c9eb55aa70bd27e92e11ec3de420ebf740000000000000000000000017acedbd68f2336fd3f57a365ef9775ab271aaad9b96ed4b53da3b23bd6d1e6a6f9161bade87576b193294f4b4c08fcf839b164317368d4e6ae63ec3ea5c87b07ceadf2ef95fe91f9c7d9d78db52b67d8d7d320b7bd23e6376d8f2938000000a8bfd63036cf3fe5e8aaba79fcab757a8e25cb5e1d5a7f86e374fb3b073eb51d1c35506db65e4d32ba690fe64531c3eeeda6f528a1bb87b46a7479d662a564659be5db3d67937aef7416ae8000000000000000000000197bf68d39eeaaec78b8befad82ea8f7a1a54f3130af7ae57ef979feb9e53ccad71a4af371f63edc76b5494d7f47b47be67018cf976d3674ae8df3ecc7563d9e323b2ed624ac5cc78d15acecd85536d29553f21e80600000369d5bb4f3f97396b3e57cf794d66ade3579ed46e3fb83a47974c561997116ed4f67d79eef2012fb2e97b0e359684dde220adadf5ce45b061f4e729da22a7b3cf33706df278539f307d15cfaff004f960f59ee00000000000000000000000d96fcdc9fd739faee4ccd57b11989b659ab9d371b68898f346545c6e1278f8793d2d26a5a0aef1a489c7f69800cbcf7c05c2995c05bc4fdcc7bde8e0c9b567c8f2b77a8dda945ecdacfc8ef870650000243b8f27ebde6fc94bf60e49dbb48f1f8c76ff9ff00b7e8b49c3c2b9c5f3d3dbdec1d5fa7d8f99745fa1386691c47be5da5428f7a0c9cf63956565e0435fb359d8b65af57e6cbf2bad69a741cdd6a66ad1aad64d31c58fe5ff34d3e7bc1ea9cafd9fd002dde000000000000000000000eb92dad6c7f53a3730ba4e6d2db915dced9bb11e931bd679ed6da063f7394e4efc7bcd9a07bb1d6a6c51da8f0c6078cb268da77c45f6687fa83cfcdc6f81fdf7f2af3f7e3927ab4a7d1e9e7fb1be753491f7065e5c60e8bd3b9c7ce6de38f0b680000dc7abf1eeabe6bc849ecda4e250e6743e5f2917358d6e468ae7b1f4574ef94773ebf7a4b834fe9f5eafbba693d3e283bfe06efabf67d0fcf5a2eeba8f1f81f42efbccb69e7f2b48e4bdaf9e56abafed5a7ec38d6664e224638b6dd5fb046f4bafc5be7cfa5392edb73a1e9bd80000000000000000000006dfb972be9ff0047a7b46ebc9e7e9ed3f85660ad6bd6357c291f3f345f41e41b3e16b9cf66e6f2e35988db627d2c3abaaa707819bb6e95b0eee97f55fc37d578527d43f1eed5f3ed5da1b2f173bd3439b8bb1c7fa386d4b62e576e3b5a0740e79f3cb7647cd6e0000199daf8475fe3f0767b176be1f9ca31a4bcd34d5312720ed5c88a6aa2d5db7e57e6fbf931135631df6238ff009358cacc8798af53a4ecbcff0067a5cfb5096e361af6bdc5b3b6db75d8592ad5376dcb8f665bbd91a4ec7621aff38afd8f6ff460ce40000000000000000000742e7b2bd98fa45ca6bfa8d1b8f2b9176f635c851f76ac1e1c9b3c3625ee748c191a2deba2e2cc43cf81e63345cb5e54ce5dbb55d8c7b4d77a7c62ca64dbea4797663323a5a4de458bdd4d3179ded3ab7c7fa02ef94b169b149d7ec694deae457b4175dcd87a5c576ede71e3a73d5dabbc2f97557ac55af9fb11bb86f77fa5f3c4776be4b73a11a20ac07a78c5d908992db7d92a838c2e46dacd924f26eec954a5e6558ab48f33222bcaf5253c84c2d34e5560f6bf430ce4000000000000000000003a34f72ceabf51a2afcbdde8a8c8b556b990819c8fe249177ed5ca5b5dae9bbae60751e85a34d8c3f3df375bf2e4c57cc35eef51f1b92cc6557eb20f632431fa1a43e565e5d0de8c096caf3fe879a4eee177e3bef62b67c2eb7cff00458bb6cf697d1f19571f9484a5e9727222a5ea7a185b925996391834ec5090f46d7985469ccecbd67e2ceffdaf95e5f00ed7c1f1e4e1c50e61ee56566e4c24975bb12319bc95f95cde91e3cedebd4b9cabca99bd4556b18a54d51c546a7b8f25bdd1851e8bd580000000000000000000000e87cf32ba91f61928597faa5258db65b97bf39a65ec74f487f36782e349897eddee7ef569bb8c7498e7abf66d63cef7c57e97833dba137cd73c1d9f9135cdab5efac57c5af3673cbfb48495c97cd7e83e53579c2edc75fbdb1cb435ca7e879dbbe6be4a77de0315ff0021acebcc4b6f7cba567e5fdb7b7fc67876b85f4a70fd57cabdebd5e05fa5e9756c4db75c9f97b5476b9b1c7f2188f73b0f3e4ecdec3b534fb061c564e76a6e65ed1a471b3db566c9368f66522a973bdf482b79e55eb1e548dce62f98e4e37a5f5e166e0000000000000000000000006d5d2f85746f7b57a76e3cd6627c4fc3c9c0e71bfeaf6e6bcccfa56f3aace5fd2175aea9aa7435e3d81bce8bda8a7b77d1f23d243f54e95c9257e31f43899eccfa13c97a7d02466b9d6fc8c0d2e8b153bf5dea7c8af57d4795e6ef0f57d3755d5731c9ea7731f31625bcfc99b930cd8b2b38d6762ae312cde0c759c36b908dcd8ef5d55e55ef69b6372d52d79cdba3a89ea5f02d3f0f758a9a3d7e46ff00a5fe91cf67b48faa6b1ac614f62e6394f9e1a4635dde592e578d63d07a80bbd1000000000000000000000000015523a6ed9c23a1fd169efd463dff005f06549e0e4f1e4c4c8b345bd76343e5f9e961393761e3d7b5a247569fe2763a0e6e853bf1ff00b5cf7b4d50def69c5c1ce92b5c1646d1cc75ae25d1ed79feeb6fe74e71d1f11d8b8b624d55f418f973187ad9ca8bd729da1ccc579893df016ae799d7629185ea91dbd1abebda8ef5b4dc6cab5174759df75d95adf2f93879bc3a5f2f87f2559db0a46e578d71ac67624f656f22d16ad61739b36e6f493bfea0269c00000000000000000000000000000365e89c59e8a1fa2b2be7fd9fd145d8a3f4fcce9c7b3d5af61f435bfa0664351daeef105b0fc8fe9f5554fbc1f4397133146f535a92be8ba397abf45e77a4d6f13dc2b3c4c8cbc49292a48cfc646e24c981a81e1917738b4eb19f3f2b89fb251715ddfbea8e1bf4fdef2da945eff19678bf24e0ccc379afb7798b9789a72e7fdb1721fcd97acd56e1ad72e47e4e9a576a9d427b3b9e8da645767bd7ec1d6ee8672000000000000000000000000000000000000001d1e6a226783f57f445d0b8a19d5ae6c91bbd58bf306f494f13cb5724a7952713218df363bda7597df2437d92af326e3a76b23ba70afb467e5e5e3eef8bd1f1dc27e6dfb2fe34a3ea37efb7bf3a3adc57bec0e7dcbb9ab6b1e3ce57bf5357873ebbee99daf8b744f79d31e7b728a824b3dfb059b619c8000000000000000000000000000000000000000007469ed4f6ce17d51ed1ec5d0abd7b8dd6ee666d06a517f46f15ea783d42abd62bf66fe462646b2e4deb3d271276a9ddbfdebfcef9ff00cd9f617ce9075b8d7d63f2748d2f4dfa11e7ce90f7bcac9fce3936a87ab4ec16743d3da7cf7ca7e94053550c6bfccfb0f20ebfcee91d1f1a00000000000000000000000000000000000000000000006c9d0792f5ae57bfa55297a4c8b59746f5f1ef5a6963a041e548f7be4dc37065b0287adb39f1f271ddbbd63956e7a4ff006e55f3b74aeaf83dd7e27dcf8a55ee9e2a77803c0f7ca32de987934bd2d6f3dd65f28aace71739375be7577cceae3b5f3200000000000000000000000000000000000000000000002aebdc7fa7d3f4930ae9e47d0aed3458b3c5c8f7371abf5eecf6b5aa5bf3f459abd92945ca60674777dd835ec7c49b8e1ebd676af72edbaf5b1ef8f7593c7a3c78614d546712f3da5ef115e558f7f4b2b17ad696ebd2f75d627e4f3a1ddf9500000000000000000000000000000000000000000000000dd34b938ee75db176df07eb1e5bbb4e63df698f96ec7cdf43d7b6cd7b97efa13df2ab5e7f12b918bd2cdec7cbaa3b3837b2aac2cfb79a4d6aaad8cd36efda6b47977dcb1ed65e1ef1d33faf48e77d96ff004d9e9b9dc568db354e7fadb509390d678fcc476fe5e000000000000000000000000000000000000000000000001d733b51dbf83f587af62bd21b66893b7fc953ab6c507075b56b945e9f8fb5697baeb1acd45db9454ec514fb7718b74e5dac6d8deafed1def24ac2dc65a96e973f178d6276be3d3d68deefc2feb49a86fd6f6fb577ca726f987ebff8f391f42c883dab49afd9d4c77fe4800000000000000000000000000000000000000000000000137d4b89760e67b7929bb1b6c76f9dfb231f57bfe535dbc49aa5db155ef253b67132239efe1db57ede3dec762be559a33d88eb9d035d9a847d8c445d1fa47a24bcbf77e65a3718fa578beb6be67fa0b8ee6f0fea5f7579f1f43dff0025b9f35a25f97eeed721ddf9cf4bc5d23a9e0c000000000000000000000000000000000000000000000001bbe91950f43b96d7a7ec1cdf6d7a0368d5731d319916a8fa981c09b8ae8f8ebd971d9da661733072a2bb53c476333ea3e35f5774bc8e9d19d1636f79df8d20fac6a1c2fab7d51d0fe27eb577cbf79f983dd360eb6af81b2c573bd8dec1ae8ce9277e9d0a4ad1513ef9e83e3a1bc000000000000000000000000000000000000000000000000006f7d1780f62e47d0f6c8fb7e53f455d5896a3b98713b05ab5c3d7a9dce2e7e569f564c3631354dfcd8ba5b6fda7f01f71b9c2fa2e334fe47bf3f5b8c381f5cc4a323037af2be5aab4b34e267626d144466369fd1f1b37add0eaf8209290000000000000000000000000000000000000000000000000003aaf2ae97cc9b74f362c3f3ff4a8faae48dcf431945fb6cd7911d5cdcdd5346de345b7c09b93d5645bccca51374bd555579e56e955e53eb2c0ceb5b53a6fe7741e6f97d73649fb7caf23f206b3be687f45e105cd400000000000000000000000000000000000000000000000000006ff00a06ebcfdbaa6d5cd76cf136a623e62333dec5c3b15f5bd7d56aab96ee691a96d5aff0043ca6064534e61ca9bd733e29363af176fa18d7765de6e712088dd79d6c973ce336760f97891b98b97c197e63e71d3b98fd5a8874b400000000000000000000000000000000000000000000000000001b66a7b055cf4f979792f0f661ed4be1c3985b37a1ec6332d62513ef9317969e5c2bb7d98ee487bb54f06b8dda2b7c456cfcf25b8d24b4eeb937067abe9b2b816f58f90c0a3cecdc079a6f9a1fd3a9074350000000000000000000000000000000000000000000000000000176d30fadf64e7fd37e796f578dcfb442d9dd6bf4f0738d77adf3ea9b412f51476b7e55e65b0740e47b05dd7a36bf15812e35fc79fbfc8923b6a8c92a5b6c7b668bbe4b8c7d5f73d360dbe65d6367d63e81502ce000000000000000000000000000000000000000000000000000000379fa7fe27fa5bcc4db1dccbf7c9cf231f6a2af6b8b0d97667c62db97918d019fb6667a9874bc7df203832c4d797e7224c6b977d628a2e51b2a9dd6aee5b2c4e1c5ecf9cb18fa4530c8000000000000000000000000000000000000000000000000000001b2eb48df64d3c7bb17cd6e62634a62d2da2e2e6706de2c4879976f590cad7d3624e328b55f376e63e74b8f32b3f2fd0c50d19b2eb3c692d635fa39fbdbe43b9f12f470da1e9e1000000000000000000000000000000000000000000000000000000000c9fa8be54d839127d558f1f21f3cb545fa657a9a6164e5627a3861e3b3b0fcbcf6bdb88f34d7e7994bdc845cd72e36eda62ceb507cafafa5dc03d4c01b0000000000000000000000000000000000000000000000000000000000093eb9c41cfdbec3c8f91f7af2d3fd0185ccf2a8edb9dbd4b0776f7e685adcd8ec1e70d81bbafd1daaf04a2fe9d479f47bada05cd4000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003ffc40032100001040201030303030402020300000002000103040511120610131420212231600730501523324016332434357090ffda0008010100010502ff00ee4f4d2ec68ce69b1765d7f47b5a2c6d9067ad2b310b8fe36358c9474db941000bad2015f66374e8c06469b1d1934b4658bfde66db8d790d0519e45fd12e6ca859017171fe52be3ca5186a08a870f24a9f0251b156f1268d3b6979348a5da77edada36d2936a58f922171ff6a184a7283162a3a62286a6d47534a18f8b0ab38e83230dae8aad2bdcc55aa127f1f0c2539d6a410b047ccb0984f3bc1838abc798920846d4cc47e564669dddd37716532e3b5342a40443c5ff00d611737ab86d20a4c2c359990c08604d132105ad287685d70172ca74641236471b3e2ec7f1956af99e3008d0b285f81e2f30359aef546c6f654a7792c7c8d9f9697685f7df926747f2b49c7934d029634eda7ff528634efbd4a215a318d78d30f6674ce85d548bcae18afa26aef1391697954f0437a2cf74bc98d7fe2aae3db8ebb33a725e42446ee8cf48cd73d21990d84361348ce874b69fb9372696153c1f1fe9e3b024682261661ed142f2a6c6bea5afe35b4c4849519bc7362a509a3cbd06656be8367f98d11f16ea1c37a59bf8708ca42ab486bfbb86d18e9a5745df92127dc65a5ea1467cbb0839bb63a6d4911469db68e356e1f117fa10c2760e9e12085985337611e4588c5f35363580325598548da223e282cfc84cb1398784ace5c658af4ec5246ec85d9949f28e0e4f9fc03e3ff8686379a4af5c6b87766dae09856f4d348a474ebeeb83a18dc9c61e2ce5da1241f52c0613ce87a6c7c7d4183f4e89b8bbb6d4b173121702fdead564b5250a034c75db7da9ff00df8311e378599b304ccf6256679a75e57dc563e06d38a6ca9e8af72515bda80f9260da31d2b910d9832f8d2c65afe0e288a63ab59abc7dd932725c9712913616795ade2ce04103bbc5513d443598558f8ee0fa7c67d763a56a3387859756d6660bc3ab09d964a0d3feec15cecc98fa234e161f681f03c565fc637338c4194c97379ec6dddf7da174edf1b75cdd4523f2a24eb969a4916f6b27870ca559622825fe0b1f51e216044ddd9d725b546815b930fd26dc2ce32bd38ba86787c916b62c8e46152d9f890f93f7c7cde39ba43241c1e6061eafca03a99fcb23448e3d29e3630901e23fdb66727c2637d242c3ef1b0f129b266ed3d9737fbf7ae09e1db144e2fc5461fdcc5d2f235aa0e0130b838a89759e18f97f035a3f2cc2ec9df489fda72715d39918e1793ac638a0cc7561d9534a531d689dd48fe209a6722dfb40b8be1b365511f596e2bb962b842e994aa459187f73a7e9958b7c112d3a76d76224c48e6d296c239797b6b3a89b93495b6d2c5c5c7e1fa76d8a92b858af94a9e32dfcc66a4e13c36ab9d4b1fc0566d20b1a4d3ed7919725bed24ba5211128865140331227667841423c46f4ff00b109a17db28dd6d484894c1ce376717fdac355f494c476a0c73ca8b17c06d47e3452239114c8e42744efee8a4e2aad8faa21e63720d22fa5f1b6bc478ccc7f6b2b65a646df2c5a4d22eaea62507f015e2d42d5ddd0d7d26817a6f8281d91ed937d4a38c53943134d6f9a050a924e31cc7ccfde2fa78a45b4cfa5e44e7bececafc7c65fd9a15ded5b8db4d483c92e2e8338deacc23986e0f2cfa22976a8d37b45174ffd190c67894a1e33efaed58b5362a2f20652ab82b1f495693e6b592063b447d9d93329231963bb54a95aff007ebd5fa1abe9b5a4c81db460cea5aec8ab2900c53b3b3a075112b127d3ef18c8d156905a22d3c7f2daf612bf1f38ff0067a74395f1658f2e1362ac0f1ca58060ce5a67233d9c40e65d378a1d4948423cdc0dac80ea4400f2163f02f20d9c1f8c6e55f13c1ff6f4f45c87318f630ca527028cb815693b332715c7b75653f257ff007e87cd5d6dbd01c8a5ac712675a72442fa315c76a6aa8e3e2e2e80d4a5bf7042e4a0c73cc784e95f20643a4b50e671ef4a48255cbe392dae4beea683c913b69ff63a69ff00f20097978153cbf05772de41bf2bc84c1f3037197a72c070b328bc5d4168595d979cbb5d3b4bd44d8cc58857c9556e39a8599e01dcfd35f036b44198aac4ad41c0ea48a22d8b3aa80d213633616e9bc0f2c632c76ea9d2b1fef626467a5117d58e6026bb8f131b757c278da63235dc43305baaf13d3c6f9dece09e30b94dc1fc2fc8e3e0cefed887915781b581a8324d88a22104d558a3eb3aa20f19f17693e1cd79503ed4428b5c676d4ffb1d3e5aba048d73767e5b69c130e9cdbe3159a7aea4ea6dc593caf9dddf6eba4a5112a320955cbc82019db0ce548794f8697c4076f935d3636bb5f6b878e4ad276a5378e5c648134797c7b6ad45e33eadada2ff007b0528b0b16953b9c541739b5f8d9d52b5e278ed34c393acceb1d2b42432c73c797c7327afc4a4876a68b5edadf787fc7a7a768ec6225692ad995a283ae320252289f6bc7b670f984137c313ab3ff7fec54b0f56787e5b8edbc49a353448e3d29117df93fb31b71e9cf47a9f88653a81e456ac3ce741b4f4e7d3797e89e7f994f929436a1f840fb658ac9bc4f6724d2c5902e679aa9eaf1dfef5395a0b480f8bd7b7a53dbe4c32e8eb5ad2b365881e6e12d4b8a736962ba1a974ad027f87ef19f1286c28af7864c4759fa71ccf5bbcb0e46f15d99561db8c5f49836e36ec4a6ffbbf67116dadd7675c931226db4c2a545f75aedaecd213273724cabbf154a4db9d86109ad33395c647676867f98664d26d312f567a33e6e42c633c7e19bfdec4daf515937c2dbbf61371453bbb48fb786c383faffa253f2169580d8ccda2f60cae2bcceeb6fdf5b507d29ec699a4e483ecad1f8e2fdae9fb651cccfd996d483b5252335354305f6f78a89405c14f73e2c5b7777989d47cc9431ba16d262764d2a6993cc9a6f99cda49bfdec75cf4761b44dec61e4a4896b49bb98edadc7af737708b920abf120f14ea141f6e5a595b1f4768e2399e1c0da9545d36ee9fa7e38c63c542ca0c3d4e058fa8bd152276c2541766d7b28d47b0f1e17fb795c5f06bb0f8cfddcfe63953d95358daff00278abed435f49875ecf94e6ead484357f80c358f354f647f738d88260d3b26eda572246dc4bd8cc981d4706d450696b4d28ed3c4a1816b8b4b2715204d7a68ba6a4750e0aa44abd6398a8742deb4c1d055e11b9d394ab85f8c02587e40abf312ac60f17367dadadaf2332e9d08e662aec30e6819872dfe7ec68ddd7851c7a4c4ecb9ba6672786b28a1e3d9930a76efc57504de2adfc050b4f56cf6e1d9be1049f137cad264ddac86c2c8e8bbe3a83dc931fd23ca2c974d7a7450784b69dd12664df09a0925418d615e95c1d88d90c8ceba4f3b8ec5c32f5ee3c1af75e94aaee5ad5e23f91ab61a27f3724efb50d1b1617f40bfab54ec545c96d7f57b18fb9d3bd7f16522ce5e676c81f293bc21b431368b42a676ec11f25057411f1eecb9277f665720f7ac7f0384ba56636fbc6db628dd7174df089714ec993276e4d7e0d3f7e8dc5b12ad41a38f398f12ab9987c53ebb34446a3a0e82b847dca42120998970174f13271b0c724a51304e4616724bd713ae98c54f9ebf8be89c763d18c75c32d9da75472f997c89228f9058ac50133b8bd5cc1da8ed06fbbba8a5d2f51f073a2772411727af53682ae98878fec66f20f521fe0ab593a92d6986c451baa90b48a7c6b70b159e378e0f2a2c5bb0cb1bc6fc769a1364ceaf47cc661e26cdb50c3f3d16e2cc3fe39e95a3a79a91a4b1c5451ec84599bd840c4b11d256f30f8fe80a9585f0b46a0e627a9542dcc3666b9776cefb5b543256719347d7d97269f3d72e212e488f487992fa99492f319a178d7d9e097d546703b398bb27edb7401b410ede9639cd4189766929346369b8fbee6421a236272b337f0784c935636552c7050db631bb0312accd1491bc724791a2ceaad7d491d0138ef63b839c1b6c856e2f047b401c56072af4e4abd5a2d0e77a88ed8c9ca53c6f4e5dc9bd3fd3b3012e90a900e62856a43cd3cc85fe1e4658cfd403a3566fd449cdaef555fb8af5eda966724ecb8ae3da382499e0c71448ad41033e41c9eb9b908ba76dae02436eb3d692bcbe097834832c08eba285042863d2807eac6388b3da161bb736a53e6fedbb763a315dba77a6fe170b9569a302d2827d22b3c82493ebaf61d9148d24723f03ad6dd98f538598389656ba093c72472726555a475f24b1875e3bb0f55e1e18ee75d5216c87574f694f24b39f89c9344ccde35e164154dd5813ac335c39138ed3b218c8d35395d478b98d34352b23be4ec44e4eff0029bef4fe636f8ef34632059aaf0ac658e61c392920470ae1aed0a8a728d15e91d14845eec9e5c6934f39d997f8612717c4e65adb09e934ce9df6a3274337d3216de23504da52fd6b2306e3b70f19427f1a1b5f152519615af66dbb08b99627a0649c6b74de3a80daf1c23d45d4d540423daf1ed45519d340310cb948c14d6649fda4a83fd04c87b3a76626707a36a16e4c61f1282f1af128e3d211440b5d9fb492842393cf736fe259dc5f159869d850fdc5971461a4c84f49a459093fb57cb7228a3d450b4d0bc5912db1b3f692761452917688f83c79bf45603f562f30ddfd4ccb586c866efe4de2075146eea1aaad64e3aca6b125a266d7b7ec9fe5b1a4b19d3a5607fa05761ca609eb022653c3ea21c4cfceb11271da78d30ae299912d7ce93ab97e2a4190cacb7dff008cc6e6a4a45572b56cb09a134edb626d3b3a7361590b7f13b737a95fcb60231064c854d5e40379a507a7e4b44740c95981e296210679a667919ddd032841440c037324f32e2def343f6e8cc77f51cb853d315453d3621bd07a6b9da0fec5deeff099fb12d277601c8f51844734f2583fe3eb64ed54683ab660287acab701ea2a33306629984d97aeadde12457878e2b5246dde3f953ff918b18e2ee044f213461349e69663d3321513287e15bb6f3bfb6288e7928744ec2c745d770cae366c65805fa5602791681140a581662469b29dac37d31c8d207625cbe45d58b11d68ef752c211dec94f903fe6713ff00c7b771271445c9d6463e32bdc9658d9485c8d90a894f37d3eee81c1b141e911d45d63896b1870fbf46e71b039d8e4194175767a3c4d2fbf776db4f7ace36e37545943d58dcbfe531bc45d486e32e6ee4af24f24cff00cde2beac7fbaf8728173d0264280b4b7da9509f212c7d13270c8f4cd9a21dba36b8ffc73d3a3aebaa8460c10fdd74ef56dfc555b3fa85909a39ec496a5f675043f3f80e14f950f718f36b18bb7523fba76d3b265cbb43114f361b011e2e9bd352d45d518cfe9b945fa61990b7874edb5fa9d9f8f88b69d634fe3dd998bc957f01e9f27f0a74deca9ff00b4d7ddc3378e1a16e41f84c87e7b742e3bd4e742b7c15752d75fa920216963f2363156e8feadb3459afd54b36a2924298fb513e161fdd683c9010f12fc03046e3713a6431f26d7688b84ab3157d4d24edc5d90b6900f33e9eb638cca36b4a506e3d63976cc66fde25c0d9f90fb75b6c845e2b5f805493c567b328dfe256f9ed426f2c2edb6983c734cc9beea07d4cceba77af7d046fd758568fabff50e5c98fec12a07e5acefddfecddb3b1f19ff00006771780fcf076074526dfb4133c06397aaf1ca7e4949b6cdf7ed0db63053d8189b7ccff61d6325e1297c38bfb197508fe05d3d2b4b41db4fd9bef0539acb1038129c78cfda56d1f6dfcf98d3bed0b6bd9af6ba02e06cfe7899f5dc937cace8ee0fc07a66df09e66fabb6962ec157af9610ba0af8ea5ed61be03fc53ad2105a5a5a5a5a45f0b4b4b48bb50b5e34c43221f9644c85d66bff005ff01c6cfe9ef99727ef8e3dc4acc5e29b223f43762a8670c3f23a5c1715a5af69326ee6b6a8c2766ce33a523aa078789c72f8af4088d10e9653fc7f02a537a8a9de94de2956487ebbadb85937df5f0c1e3b0c2b4b4b4b8ad7666da08d144b8ebb13fc71dbafd38c434ccd511d559fa0d2e3064215ea496489f5f8174f58e517b2aded35db0d31596fed7d9f4a07f241783c77b827f85b4cdd8bb46a36f89155a92df9eaf46c42177a361902ed49684c47b5fa5e23274ff859140ba9e41a387fbaaf578b66a6e47f81622d7a6b8a28de53b5859eac5de61fec17dc3e5a94bc0b3317f685988246ecceb9277ec05a432e94b22e8fc1b56c6fa352545d7d8d61ab5ebf98fa2f350606c09318c920c41d7dd49fd5ac54afb56e4f0c17a4e72fe078eb3eaaad5061986c18b5d87c737690bff1a66d482fda79de4a94e6dd632edb5b54e84f90903a365e390c0d9c78ed37d4f8cacde8bd3a92bafd460d626031ac3ea6395aa750dfa11647a8b257da19ce526d8acc5cf822727fc0fa6a76e71fd062fb6b31b1878c5e49dda2197e20b208546eb4a12f14bbef56b1dcb18ac1478eaaf4d58a2c639ba1fd3722cdc9743e6432b85521080754661b2d7ed3b7976abc9138d91613a11686e596af1deb0e67f8256b075278a509e3a92ed8db60e3f320ba9bfc4fe51c7a76519ed5bfeddaeffa7f59ac7500d6f82aca5aebace93499b1a2cca85a9b0b660fd47b3e3cbf545ccbb13a96b01a9aaf06ac02724d1b0ca538d4832175d7dff06e99bac2e25c5fd5bb8691eb50c0d2c76ab3c49d6bb5d7d9573e43dba0ee352ea4d2d2ba71d583256defde53288b4fd89b69ddd94903a79783dbc8edc8dcdff0612702c4dc7bd476bcaca4179998258822825b0bd36a46a719358a5e25918b8c719f028a092c26a91409ecbae9afd47f0407d798a68f3fd533e6dfb4adb1fb267e4da5a56240886f65b829ed14df85748f12ab3c442410b0ad271765c886311110066528f10ce46c35954bc7182fba82142da6f618f1785d0bf90ea74cd9b29b014a8b752d46a39cfc2ba5dfc708719466a8f1a1fbb331338f07d693277775d40fff008e9be157b3c15701918078adadf6d27760668e6b6ab619f950863acde6465c975bc0307517e158191fd2d5b1b509b1b4b45897078de664ecb5db2e5e590a3d2fb273676199c24feaf2090e6a25fd661516424b44183c9ca38cc4d2aee528945e1faa3f843f3dbafe330ea1fc2b004ef1452717ab610cff001624da39c99c66675cc59d859d5eac5099b2765a4c2eea3a33ccabf4fc922a982a30aa9622ac366c3cac53bc458c95e677abf12b78e48c932fd438bc79efc2ba77eab035ddd431383348ec8cb6893ba775b66464c48a9c2efe8a04d5e1140fc109484dc645e530786cede23621b41b2c37c1beb8dff828cd01afd40b052e7bf0ac09f0cac5590d5455f4a4894df09cdd727f755afcd4543e0293ab94f8b485e292bddd27979bd13e07eab6d607c89e3e09a4d2eb6363ea1fc2a290a1930cc56e9bc1c54eda721da38369a93129b1ccc881c0bd98e974abccdc7d530abd719dac97291b6a090956251caea08f9c7660d0bfc3f536bfaefe17fa6b91f5b8fb65a7b0fb789b920aece9ab833db8435763fab82e2b4b48798bc16e415eadc9a4d9af4c9ab21afa51b69464a94ade29cd9c64ff003ea532933bf85f45e53fa4f50dc2faa46e4a11d381b32b122b16113791de05e976869a0a6ce9e832f49c5dabaf0b2e0c98596bb0baaf678095ae4a59c628ec98c963f0ce94ccbe631bc57f8a23533393490edd878a16da8419470b218019e5005337cfb1fb7d9f9ae6b2d686ae1ff0dc0664b077a29c2c444e9f69d48c8fe1c1d467c5e29d93caca495116d6d33261401b470a20d390a74c4bab725ea2cfe1dd0d971765a462a404516d4702f0271e2b6b7de2f9402ca360653b8a98be7922f959eca7f4c82490a693f0ead664a93e1733066aa224c1c97a567415348a0d35814fdb4b484b8bc761914cca59368fe57d96673418c82f5b2bd6bf10c366a7c2d8c56561cb552fb87dc3eceda52c8fa98f7fb1a52ca104791eb18e3566cc9727fc4b1d919f176b0fd6b14e0c98dd93caea437745edda2904066eaac744577ad5c8e5c85a9c3f16ad92b749e1ebcb8317fcfea78e2eb5c74aff00f2dc6139f5951647d67480a5eb89dded7575d9cace6af5b444e65ffed8ff00ffc4003d110001030204030604040404070000000001000203041105122131101341061420225051324061b1233071a1334291f0151624345262728081c1e1ffda0008010301013f01f5e240dd3a6017783ecb9ce5cf284c3aa041dbf3c9037466886ee0839aed8fccec9d2f46a0c2ed4a7b40e21aac9aeb26baff00992cec84799495ae7fc3a27484f54fd538b987337754d8c4f0e927982a6af82aff008675f6ebf2ce706ee9cf2fe19ca1aa2c56e17e0d726bb37e55557867922dd3dee71b94ccce1e545ae0754ed06aa9e364c2c54f46637a7b397a858662666fc19f7e87dfe51efc889277e16e0df0109b1dd38654c7e53f904868b95575ce90e58f409a0754eb1365491343157583bcab217ee9a0c2fb853cc6409f7ea83b5d161958e999cb94f9be49eeca11b9d780574d8fdd1d3c0533652ebc2275c5bc75b5265772dbb04e6d95d0dd32b0b5b9423779b9e0e1c1f173765c8e53bccb98e87cccdd5254b6aa1120fecfc8c86e51e2d1645eaf7f0dc857e0d394f8b10a8e445a6e546e321b052d3c8d172a360ea8b3314d65b89e0d7f29d7553cb959991d45961137226319d9df2278857e03c247823376f871792f2867b2c35adcd72aa4b72d964b2a7a7045caa988336e0d81c45d3865d13911982b906c9c2c55faaa3a8ef3087f5f90eab22b5f4562d56ba22de160ba70447187dbc389479a72999a1d90a973ceab3282701aa793985462ee4db06aa8233294d828e5f7528ea131de61752d2874799ab0a98c53728f5f909058dd31fee89b146ce09853878586c9ce478c1bf8714668d90279b85184ddd05757b2ef0eb5913752eba042072e5bc0d51690a2aa2c6e529b296c824081bea3f3dedcc38dd0d15efe3278c42c1005c6c1458357cdab623ff9d3ee99d9dac3f1587f7f44dec9d465bba468feaaa7b23513c45b148d72acc0313a13f8d09fbfd96ad3aaa681d31ba92131f832a1105a354925f6595b6d53e31d1362cc6c80b69f21236c7f24b4aba2500a8b01acadd436cdf72a93b39490ff00b8bbbf61fb290330f80ba8e1b9f60a6abc6ea0d83720586d15546d32554a5d7e89c79b1e5b26e1c2239f36509b5b4d7e5b6504feab10c330fc45b967841faf5feaaa7051417301bb3f75552b5da0e207073c353e42e4d013b85045cc96fd07c8bdb9820b22d50f034ea8eca3a692a1d9626dcac3fb272cfe6a97651f4dd51e0b4545fc366bee5594d27756991eeb354fda5a36e8c6977ecb0dc49d5cd27965a3dcaacc40b7f0e32856cd948075526175b54733e5cca8b09a6a1b4d2eae51b9b2ebd154d364d59b2c7f09eecfef707c2771ec86a805b2be6d02ee65c2e53e20c57574d6b9e6cd54b00a78f2f5f92959fcc131fd0a75815a10989e2cb75b1583f679f58c135568de81565f0b688a8a0bfd931b8f55ff00c83fa7ff00552b6be38f94e7171f729b054b0e6bdcaa8c3fbe0b556c9b4386d2eac6027faaa8aa7c9a0d022dbae5a60c86e139eecdaa9dee89cdcba28a5cec0557411bb342762ab699f4552e81dd3ec84964e7dd326c86e9f5ae76c9cf2ee11c6e95d958a9691b4fa9d5df292476d5bc05c2bd917dc20b0ca2ef73f9b60a0aa6e401da26398fd5a6e9f335ba26cfaecb16acabd194247d4a761f573bb35449fbdd51534548db052381fd10713b713aaac9a18e95b2c8561f8dd349680dc1faaaf61b87aed7511e4c75adfd0a0f2b3942ee4c1ee9c47454f48f9cdfa2861640dcacf967461cb94e08b0ab1401581537269f39ddca56e7610a9ae0a0f6b5c03ceea7823eaaa4b5a796c1c77e1b276274cd765ba63c3ce66ecb1595cf9f21d9bc221cc81bccf60b16a06d661f2530f6d10664d1c9fbad428a29a6f802a6c3037cd36bf44001a0f9ca11fe9e3ffa47db8582ab66788aa5abfe590a2ecc7314384d550c1a48e514cc98668cdd62b2164161d7861249611f555585bab9d9a23e6541d9e90481f55b0e9c718a08a3ac782dfaa387537fc3fb94ca4819b35016dbe7b0d7f328e33f4fb68afadb84cec91b9e7a2a6ab131ca7429a784afe5c6e7fb2738bce676eb0a796d4651d555d3f798b22186d49765b2a7a76d3c618140ec9283c4aed245695927bfa0e012e6a731fb1fbab6a992de42d29ed0f6969ea9ec7d24d94eed4d2988b43da5a7aa968a689f96d7586d1987f164dfc1b2130e5730a6b83c5c271b15da1873d267f6f41c0e731cc59efc26bc7502dd507b5c6c0ac5a8a39b2c8775b2893cd82ba60b0f0151b8be9dccf6540fbddaaa9d923cc156bbbcd049efe83472f26763d3764f682a49a4a2ae73c6f753913d3676febc213aa73332115b756564ed9593f40a9e611bb31d9371ca6a798e4692107c75f4dcc84dc14d6bb952b0fa150cfde2999270c670d74c79f08b9eaa8637b685ac937b2b26795e15acad745a80b94e8c06aa9ac8697479d5475b154fc0abdc5b01038766b3f25fed758b3b914ce73bafa1767aa3307539fd47fed62788cb433b58068a195b3b048cd8af759ace21145e1cc050364f94345dc5475d4f23b2b5daa74848b29a432c85ee5401c6a58d66e50c3217d398a5dcaff2c4c1fac832aa1a58a9630c8c681769ab448e10b7d0a82abb9d4b26e837fd17686939f0b6a63d6df65d9c9dae89d13fa153e80953b2cecc146ee8544ed32f0c62625e22e9c282964a8a7648f3b858860733273c8d41584609242ee6c9bfd93e27353b56aafaf65041941d54f33a790bdde87805789a3ee526e36fd3dbfbe8a1a4a5a73e46d9171276d154d337e31a0429f3fc250cd0c96720d73f61a2c530e7cc44b0ea7aaa1c0aaaaa41cc6e56f5253182368637609fe57dd66f652683313655f8d4507962d4aa9ab96a9d779f4485ee8de1ed3a8586e2acaaf24da3fee9ceb3ac9cd0f6dca10c6d398055a7f1953565bf0e554f0dfccb3376babaaa9a285b9a57594fda3631b9606dcfd554d7d4d59bcae4edfd15bba0553e2f34360ff305498bd2cc329758fd5492306b752de4797159509e4885b3582ff14e56a0dd4d8e55cba35d65248f90e679ba69e0ff0088fa28dfc0d95ecf84aef539fe646690eee45c8395f80d1029df11f461a8e04a1c48ba0d564139351dfd1a33d15916ab5b85d375f0ede8fb269b8f0655b2bacc5037e0f3d3d218ecbc1c802871caac8bfdbd2838b50902ccd5982ce17302e622e27fec43ffc400391100010302040403050801040300000000010002030411051221311013224132505106142040d12330334261718191a1157280b1c1e1f0ffda0008010201013f01f3e869e5a976589b7549ecc54d47e23837fca6fb214c3c729ff0a4f65e85ba073bfb1f447d93a722e2423fa553ecc4ecb981e1dfe0fd14d04b4eec92b6c7efc90374658c6ee41c0edf32c63a5706305c9587fb3ad60e6571fe3eaaab13828dbcb845bf6586574b2baee1a22f73b64d1adca9aac30d9365e66cab6844a3a85d57503a94e61e1fbc9256c43a93ea9cef0e89cfba72712d399aa0c4e68b47f5050564351e03afa7cb5150cd5f272e11f4587e1d0e1cce91777aa7e791d65fe990976672a89194ccb3028310be8e4da86386e9ec648991868d16555944d91a4762abe89d4725bb1dbeeaa2b037a63dd39c49b94dcce1a221c0a768140c6ca2c54b4a58e4e6e4d42a0c40c9f652efebf2985e14fc45d73a306e7e8a9e9e2a5672e2161c1d2b1a8485cefd155c99cdb8465fd936578eea9a4ced5598e329a4c81525536ae3cc16298736b212d1bf644169b1f8c9005caa8ab2f3666c9a11d4d95346d0d55760ed165cdba6831bae14d21784fbf740aa0a932b7249bfc9619446ba70c3e1eea1e5c2d10c6341c250e3b2e5068ccf5598c323e889412ba717726b02b01c2090b6ed588b1e273758139d132ce59c15ed051f267e7b3677fdfc7553991d91bb22db2ba0995240b047a8dcf0770747ccd972b967559dd17531534e2a230f1f2383c6628330fcca08edd4e59c14f7860b955b399c646aa7c2b3393e99b4e2df0036374ca582a7a9c13e95ad1d0afd96214a2ae99d19fe3f7445b43f0d6cdca8f4ee984bce8a485e05d31a3ba2db941b6e27835dcb3753e491b74750b0d97932643b3be46189b1b1acf456d1465d9ac54f258d9ca386376a9b66e81543b349f0d34f95d64d3982e58ba2c58dd37bbd63bd1dafc3893ef206aa10dba9ed956550422d72a78f2f010b88ba22c8a3a857b1b222caea9a6e7c41ff0020db985920f40998b81272dea5a98e26734a33c758cbb10a910bad754f52254ffc43c5adbac567745d2d587d53f3d8aa5a8cc2c5660bb2f6963be497f8f86b9979936f16c84ee71d566514a0354afce983a90b00a6f1290e89922907709875d5494e0b333561d272e5e59eff0021815409a9794efcab12c3339e644a0a5927a6c8f54cc9a927c9d96270b9afe63561d339c8756bc5af016274c67ea6aa0a2746733d4370fd130e6d131a0357b58472a303d7e1af6ecf09c6e1461350e17b2e73ad644a935d1729cb238044151d4168ca50932bc3c21aebf7f8555fba5482e3d274283504f8987a88550c6c9a151c2d8bc2986dc0aba6ea106176ca9e980d4a6b407279b3345ed0d489aa4463f2ff00e56c8d444dee8d5c7d97bfb3b052d532461639aacaf650446437524593e0ca84616813df7595bdd3a31d9363b9b2dbe4305acf7aa6ca776e9f45b276c88bbac9e32bb834f070cba954f55149272d182c06450c391b72a790df455ded152d0332976677a0553884b3c864f542f2bad2393594ccfd54d230e8c6af09bae6e6d2cb96fdc8593d55316b058aa8901d07103839d64e7dd342770a48f3c97f4f91c36b4d0ce1ff0097ba97ae30f66c86231b4f2dc98d8c0e629df1c87a166ca8390d557c0f14f995397098109b89b29e10e99d655dedb4510cb4accc7d4ecab71baeaf3f68fd3d069c1a33e81368e43b9534222eea38afa95cb6dd09e3668059493be5e91b22084c7df74f6907304100b657be817bb122e53a3caafc034b8d8282110b2ddfe4b00c505bdca73fedfa7d162f8638fda46a879b35398de87369aa32bb6588b646012316172b2a4653ba686c32752e5c7531e5ecb18ada6c3e731526ae1b9f45cd7d63b3cef47dd63fd53f944dc2ccc2992f2fc08cb33f729ad01665991d559345c6a88b14dea6ea8ddaeb20fb22fba6cb90a7d5128b89e0c63a4395aa9e9843af7f94c231a1503ddaacf5763ebffbff00bfdf76c6d66c14f1c32c9d41490472c79145869a6973314e40d5cb10c664a2a7223d09d027b09374411ba0d256550363de55cf8dbe00a47ba43aa03e18dae2fb052d33c75288f6551ea1071598add307aa2428299d37eca389b10b37e5a8b1ca9a36e43d4dfd5331da3947582d3fdfff007f4a9f11a57e8240b9d09d43c7f6aa24a6683cc78fed63552d9ea4b587a5a81b14f56d344d714c17d4fc5c97a22ca00036fc0e8e3647ab74d6db74edd6ca38e497c0141401bd52ea80b6df39278cf18cd9c9ecf456b716b1ced91696eea0177709f74c9b943552d58b599c628e39580b82f7183d1369a16ecd5b7cf4c2d21e2d17364f8f2f168b9b202da29c74a8df90dd73989efce6e9c2e3e0a377491e4354db3afc0b74ba06c6e810f6a28ad8dd364690a6933683e1cbad96dc291d67dbc86a5b76df83756ab150485ba70721c0fc2747dd4a3ba60b9518c928f219066691c0141a248c04de97db8395ec8bd6657415d3754e6dc2f767bdba956313ece5f981f2295b91e470a79837a5ca5239970ae8ea38df804d89d26c84259ba60bbf8565b3050753ec3c8aadb6b3d430895a4a734b4d8f022e102ad63c036fb2313da2e4209a328b29080d24a33bb3e60bdf5b6db552bcbcdcaa38ac331f22959cc616aa47e57642ab1a736609a9a74b27b7ba70efc295ba66e123db1b880a2aa696f52a8a90e19420e057750c46575d35b945bc8eae2ca79813a47bf7e0c7f659adba2330d165b6ea0903742a5aa630686e513737286a380f4515397eae4c6066de49887e028ea03ba4a01036598a8fc28b7d13dfd958f00a7ad8e3db55356ba46900aa3767a68ddfa0f25ae17854d1f70a0ab923e976a10998e4dd7643416e0ec8357296aa067eaa4af75ed1b6c993195a1a555c39023a2c38de8e2fdbc96a85e17232665a26b2e9ad2dd8a0f78ee8bddeaa2a6e6ea54b439429204d3cb7053fdb37452c56365878cb4910fd0792eea78cc12b98531a1e5414cccaaa2311ecafc21a9e5296b83859194294872a376a41556066b854df80cfd87936314fa09c7f2b9a46ca1ae7b549506551bb33aca3a7616aac6f2ce8b31574d17562c7688b4cae0d40585bc9a463656963b62aae98d2ca632815182ed908decd6cbdf1e3445e653d4991b49b28e8e22d553072cf4a37eeb0ba624f39dfc79462345ef71f4f882735d1b8b5dbaa291ac3d4a49e073148466d15d0714caa7b519cbd5261cf91d9e61609a034651e5353430d58eb1afaa9704a867e19bff85ee558ddd850a3a93f90a661d52ffca9983cc7c44051e0d18fc475d43490c1e06ffc10ffc4003f1000010203040607060406030101000000010002031121101231510420223041611323324252607140508191b1f0143362a105247282c1e14353d17090ffda0008010100063f02ff00ec92e89f3fe954847e345d8fdc29f47f0bc112611a29986471aaa823d7cb784bd5554c31a0e72d7939a1c39a177608585f1fa7dbe8c284a19ae744d943bd3c9c1173b478a00a92585544b8d7de9789ba3eabab629c94e5bbae39aafb549a1031093c82d8601aa61c660764ee2df45d43dd0bd6b2c7e7c0704d6468575e4175d0e0e32ce9ef0bad409179f9a92145b520881bcafb449a0b8e410744abbc3c10a6e43a42f0a4d07e864c123187dabdf338fdd17431c00ec448ce633f76ccd1a382935a1b6042aa57d1afb8cc8dc60e2800d17a4017018ee6606a7451e1b62b3272e9747bf1f469578b99ebcb9fbaef4518f77571553ee36c4d228cff00af8a9012032d49ea8b0908ea44d220868807b83bbee89344cabd39bbd8a404d4e4b685b3181f61b90da5eee4a1b9c0ba28919cf8ea0010a2c11d5133afd3c107a0388f07b99ac1c5506d115de61ae28b04ea295841441a11bfbb0db3e7c02201267c380d56a6d851d4a2c759d0a209b1c8b2a611ab1e78fb92eb7152e271dc5029c9555770d0536c253ad1107a1df5c86dbce418daf1279eb029b558a35d496a0d588d0d636391b311cdc13e1bc49ec3748e7ee3bceed3b865b8025441ce6a3390443772d299557af0922d6944da5a7028b4e23780013278057dfdb898b72dc62b1dc84160ab69fe22c20b241b11b412e7cfefe1ee168226de3b917949b20886ba6a6ea9dd0aa95faa333aa220f8ef3a4ba0b19c5c38fb23415346d7c28826c78bae1c94482fed30cbdc25db9a192da79526e39d9352dfb8291a1ddb1bb533521c703c6cc161ad5dd8574a28dacd28025ed3709184befebee16f313dcd549b41ec97b3dd438633aa0820648d11d4eca3452d66a08db43aae63c4dae12214480ec5871cfdc0d12972d5c352bbcd913532ddc4f8b7753c86765536a8d51b00cd368b046d0d1895392c1609bea9b444847710f486b7f2e8e74f87b8209fd02ca2a8b6a2da6ec043654eea9cb5dcdcf7513d2c0562b146c61e69b5b0d6dbc7d10a2344534734d52475b0b1d0de26d709109f06276db97b7c39114a192084d12db02ecdb31455dd0f54289c3927d370ff005dc9c88dc4af497691acd4ec68e20ac51450435c584816418e063b2e33f97f9f6f88c96d4e73cfeffcdb22a7648a2a4a5208c957735cd0927927827069aee1febb96c413a70071dce3a80f02a57911791b029a3ac01b63325370179b49d47b7c379a006b6e3ad8eec38200bd4ba445c4ee1fea7741dc703bdc554eee62d2d70041a1054487e1716d7dbeee0e87b3f0dcca7ec8f3c409eedd07161ad06075a9bcc6cc563b888e680017120012fdbdbe67b0ea39022a0f1f6c10fc58db286c73ce4d134661b0e5e328f49165fd2109bdcf77c82ac2fdca6ce0368813a342a7e8087f2f0a9fa420e84d74278c1ed7198d69c91a7b2c620c8861a8f4f7086f187b38fb1e1b9d8639e277448511e92335b95d135561886739bcab9061de393420e89284d2bac885c51e1cca774756d996b8ba4195851f6410bbcf3970f70b1d7a4c9ed7a7b0f243657614b568b0a299a95b2aa2c947675b3ed2d9bce5d4419732bac7d3258c94899f3b7ab82f7fa053fc3b87aaeb61965a1fa3452c2d4347d28745a40c327232f6535ea9a7607b89cc88fbd11b9e56d372751b308001132aaa5b40b69506a5560a8bb3b2ab545d76415d01488a2106038018b89e01073d9f888be288b65ad6019511bd19ae3936aa4d1761d844e45570cd4c508421463378c0e7ec9d1b2f08913070e1ee31121991faa6c46998395b416cd48aa2ecd93b426208cf75386db90bc6ec10fc43dd18aeaf47604ebc193c93881761ab8cc2de97468ae84fcdaa474d7aebb4988ffee539cd52aa73b2eb997829c8ddb2bda1b8c161b99c4757c0314e8aeed3b2f7218515c7a377649346db236554b50d110752486d221a68a725d54237732a71e306f20aae738ad832765666ab64380ed15af0c129b5757a3359ea519c5e8db93519ba67352141abb0d2e538d103464a4d9b951a00449b4b5c26d2a58b78141ca638eb0437379e6bdd6e68bdd41c1b3a0f737451e275a302eef7fbd7a6a4d116ccba8aaa1bb4913820d64a50e2060ca4baa0f888884d10c22e79bc79aa95458d9b309c7d029c486f1fdaa43645b46cd761545c1995b47a677252860436f2532666d3a92709a9e2d5d19c46a61ab53ac58cda8d97018628c4886f3ce27dce083223884d851691fd3b5bcc6c691ad8d81ad1327804d89a6c4e89bff5b715b1a334bbc4fa947b2c013e14170d2221a53016f655e790c6f35282dbc7c456dbbe1bbba704d3dd437979eeba1747a29207189eea041911c426c38ce9469e27bdbac6c68152a6d98404467cada54ac6c993209916056230cc12b6b4380e39cca943e8607f4b668fe27498914653a6a5d85d644cf80537ba7b8210891a6d69c1aa5d10462429968c45a471e0a47b4da1dd92f70bf2a338952fcb85e007ebeedbb10ba2c1c2ecf04d2c8ad04d2eb8c8cf5e5601c1506a4d910adafdd600346255225df822c2f2e5b7323923d18bace13d4bcea008b21ecc3faee83089c366d39616104514685e175b2eec4faee49719018928b3460224a85cec3e1fbabf11e5eee7ef022146201ceabac810dedc9bb28749062b5dc43644227a4b9fd6394d5e1a443f8992a46867fb94ef897aacd3a27396a85228e8f1361d3a1cd173a80273f352cf56e8a431fbeb358c6973dd40020ed29fb5e062eacbd8ef59ae8a28f477036699e2e8c7d753497370be6dbc316d5070e3ad7e2bc31b9945ba34e23c8ed60029c576cf81bd9f7d42f8fd75c3b34d86f792d161d4b83e3aeed39edda71bacf4b62bfbf076c1b20c67fe4bf6227a26bd8439aea822c7436ba7a544126b72e7a9244b5e4b3c0e336a33870b96284f4690e327ff00a46701c227013a7cd194101dc0ce8bf36e0c9945d64473f8ed19fbf217c7ebaf3cac3afd1c065f3c792eb34801d935b345edeba18c6588b74097fd76e9cf761d19b58c85103e18ee44aabb0d90e09f1013462457988f38b9daad7f909a3c248d72dcd08917477b219c1c4535d909957bdc1a13213057bceccd982706894389b6db3f04e775fa39c336da3f85c175e776a291c395ae6eb9e5e428ada5d0e9f3d785ea8c38ac6c584ea39a57553e81f564f87253d5d1defc1b370f96a6863bd75d63349d16218515981080d334126278a13b1f8230b4081f8507fe4799bbe09cf7b8b9ee3324f1b7d75de11197904b67473709ebb1d91b1de266d0b25680b458a7b0d749de982a616138051624333830fab67a6e1a503ac538675f20c2764e1ab3b403da6a927b723253b5b63745d35ae8905b46c56f69aaf7e289fd221ba69da1e840e8fa39ed38f69dff009ba9711ae0f90410644714c8b295e1396a4adbc15fe90097778a7bfc467ab2ef5999533babbc0a96b34f90ae7186658fc7549870cbe59293848e46c70d7ed6f0141e3c90f807bf56fafdfd355b728ba6bb76334565deb01ced050f6091c14c6afcfc8501ff00aa584f565958e09a6d27d85b0a18bce720628e92271c94ba31f257c7e59fd949a27633c8709f3bc4b6a79f1d4e46c69d40a2339fb046d31c267b0db74804776769bde1f21c4824f64cc57efeceadd7fcd00dc0236b4a61f18ddf4505b79c5031897bb9608f424c37f09e09d0a2b64e0a4153b42219dba4c475366ed979c9d2e34f21b66fb90dd47656068c4ae93b6de5a8e36dd3814d883ba5039ee9919cdeb636d1f4b61e900541ba5068f8afc3c5373478bdec8a0e69983c422e7b835a3894347d1eba2c3ef788abc512a59791213dd8915f54082889cc1e051bbd936bbd751f0dc27440654d5b90597b33c02da8c01e415f22fc3f136c00a812c2e0fa5b0e1f17c45202aa4ea2bba369ae0cf09aafe63497446e5c14a53b25c1aa66a7c89120388aed342054d578293705218a60cd4ec958f66a43830c4def3209b0983d4e7616b9b307828b07ba2a3d2c84dbd3d2200b8f16173c86b4624a3d1fe443a37ff519597620973526ba615e5fa8a2d9faf9159159da6e69b121baf31d81574a2a8aa9ba9253f8ea36627721976a197804ec11b458861bfeab6f4584e7e6090ae3dc2141f032d98aa93e815d18213c72539ed9f23bb45762e379bfe5514acaa2ec4298ab751a7929656e8ce7761fd59394ed7c6886eb1a264a8d1cf7cd3705c1735437ca9b8ccf91c1064460426c4719c41b2e90b688c8d85aee1661319a6bb9a9ad814cd758fe91d905260b83926e8ffc498e89768d8ccc7e2af35f1221f0862b92e87470690c1c7d771b46a8b45397159372f25446feb52fdf50dd59b8e3682077ace88bb63777218311f9354e3bc68ecc855c85c877dfe27d56950439cfa875e74a64913e1ebe4b26b22ee2a454c545925246d863f55b27f654da6635a64c975101f1078e526aebe27f6314a1b036dd22eb8baf86b8ccce54f5fbf4f25fa388b24a6da1522a7a81bc1b6b69eaa7066cf8a01ec556382ecb95dd1f467c477241d18b3456658b95e883a7899c4aabad000e48ea3cb9ce21d0da5b7b80e5f19f92e2b692067ab2e164a6aae47bcd3c757661b8aeb223618f9a9bc18c7f52bb098d863f48dc34df73afc16ba4e3d9a914f9792e2b7f4e1b9c5779764fcd7e505b2c68f86a0dc163990c746c01ae69a918d6b9cfc970364baf1bb413df60b04662cc75e3c8ce4d683b2070fdfef2f25b2230c9ed37815062b800e7344e584f972d4c161648ee0ea8d4d32e96117bfe312187d7c9913467340768c40986c8169c3e38a3bb98d5c358ad2cb8106f4aad970f2668f11d1190a0bfab885e642e9e72ce48ea62b1b70582c3752b22458864c602e71e4a2b9a5ee697120c4ed1f5e7e4d9457dfd220c9aea1c3819ceb391f66d3223dbd236e5dbb9ce9fe7c9c6386078730c333e1cd322c337a1c468734e63758eed9a3437de870bb52f17dfd4f93e2683167d293d235ee7622405dfdbee5ec9b37ba678d93c07dfde29cf799b9c664f93d91a0bcc388c330e0ba585b2f147c338b4fb151c0c6346b653af3f98f9a7c77e2ef289890aad7483d9989cff00d7c532342f08bc3c2729fdfefbf2f88f6c360c5ce320a5a1b1b1bf5be63f64e8d19d7a23b1329794dba468eebaf1c38386454283a6cd91e774c5ee1e672dd973886b45493c139bd317913ec34fc933f0d09cc635e092e226f1c47245913498d118716b9e48f2bb4c0d222439700ea6787c4fcd4a2c2644781473693a77bfd4937f978d7e931490ceab6ba585fd6cff00cfbaafce70c6a587eeaa4de91c66409890f5f4fb92126c57b4b419b4607228f47a34368952f126aa70ee4096176b4e73a7ec8f49a4c4208ba5ad37411e8117389738d493c7ff00db2fffc4002d10010002020103030304030101010100000100112131411051617181912060a13050b1f040c1d1e1f17090ffda0008010100013f21ff00f64cac0652d3547fb73eb2c312b8ddf9969077e07e65426e9a47e2b7b8ac103829a2adaf722f4ef615e3edca4a19a76858a7405f4bccbd0b98a7304285c2e3a9666fd6670f272fc7f9c8005ae009be7c6ab9a8f0b96ab1fca2c07ac6b5d5e738eddafda0c1ea080efa8dd33a282b0967e3f74d89051d9bbf88bd08bcedf980e5ccb4635a550229d2ad8b05d0bb58972ce50857f94cb3775d1283a61d5ea46fb0ebdffeb13685001536cc1c42201ef2e347c4bc0ae32ae6969f202e956abba0339483684d1873e1edfb801dce6dd10416fb68f48c1dd2d89ccbb6012aaefc45b5316e59cc4207a06662889532abc4cb42351c71fe39d69d05b3d74155c3528ca068ed3889e285cc14108b530699611456245720d597ec7c10512f36ce1c14bdb4e3b998137710b901f0ef3fb6aed41687308eccc1b96c044a7c8fac7a91c658f95cb369df4351aa97084a8152e4c53b9cd530a4661dff008ae2382b173e3fbcc039c00e3cb071880db00e3ab64c50298cae0c758c74d8318acf05d35567672e4cca328b24c02dc0f57e5e7f6a75d2e96aaf58c054a8e9a4e190ab47d65497c1a9e6957305e7ae9408afa8c36c45d1926bfc34402176bb7aeab86121e206012981516c1322232984a7a19e69848c5c243f12c6f3158139c969dc2e1b3868ed7fcea8fda2e5fb4442c94ef8319f5fa2fa175c2e9593125d4111d0a6d9835d75d928878623544333c32b6eff006ff8235638137b777a3c358bef3174d8c4ad5862f6a9e954670971084a6970dacc11a3de5cf789763bc66e692160ee499126a02f42adbc8b79f6edfb36d216fb430000b8adbf42c1874112dd2d8a53a1d1542a6ea711d34c49032b896ef84af2835a8f4615cd6a2aad92a42d69861f207f5c3312d3fc87d9800ccbb8ceb9e79570768418eaae6d4762dc63d418b7e65371894888a84adbd226f2c04a64a1982f1318417ad8d9870e430e3bce13cae9ff004d7e79fd90d9daf83cca13399f9ff9f41b6003ad30d9992012df294f1301886f105b488287d0f4cd0dc6aa1b2e024ab464b947357d60a01f93f58bb10ba05fefaca605dd669bfc4aa2743a573c333a8a0f674b2fb31de7a5830935732994e88ac2c5895513a621a85c346eaf60ebde5ddb6d5d0693f637e541b19feb1d0a3ad1186d0b8b28613df335a42595bed3279979059dc7d379df4c7137c54b1bdc432e19e4365e6a769094c153d98cfd476cea02d58f6cd42857899e7bff006ea26a32a6a5c49a3c2303f945598b6cf5babcf4dd3310bca3d598f9318f745608c598244e26909c4fc0672638fd887900e5588ea8225afd2423751afe61a44a989c3da5b85388e10a2ee98b79fa6a62c2e5441e06ae2f26f172e9acd26d2941e3febfbe3f506ef3b89e064cf9cd7c43945505e22c6a51d1013cd196e5dfd0188073295160a87926a295b538e23e3fbcc22999d09b1568a482cd3dd5d3e4be1dfec2c03bd46da00964f54af79604e2455a2259e01d0f59b51ca06e149758fd37b4c3198a66c364bb3de5607b0fe9e51405c2c0a1e2ee52a881b469ed389429d2a26b0635689f5599702e67a6435d2b32e1a138e605fca7cf4bfd00574ca6d73eff00d63f61f2a0dae29186a3aebad20f72cf339152d2b3da3c86f3721d52c1fd268b12f431e913a4fb547f3fa5cf6d925d06ff00acaa76864ed1e5d4917605ceea618292abb1d93ed15298e3e8b767a379914b504c25a9954188014cabeb97bcfbd563863bb743c1b1f729fd81866103943d573c553162343a51ba2666b0c11a527307ac199a56935a2fd7b97d12ec48d4388a16230ea3c5e4bfd2c7397b3f1cff001f89921f6907885456a4211b8ed53baf54ec49e21baa621b0c06aefd3b448436937de667b633d52a70e33e81030931264f110389604332c223e65aaeb1d184556106dac5673e717ace3fcf16c16ab457040d1360f47aa8680b3781eb2b88392025c2748a194feb5cc5dcb8b6c6a130cea25b0a6998a6715652a3e70133c688883b31fa3562e198152823a8a42e4eff00c4405b993098d021317307226bbc3b14b2e81de5b422456eb7033627989afc252c6351b3686308cc84d63d0aa94a665bb238428b96a86bd5625302f05a6d63c8fc7f9e36ca780fc1fdefb9be9869728a22e128ba864972c00f6e233c3536579ed0c0384760ccd422077fac4cc33030c29822d02ad3182b3c740be12e7aa59982e664a8e77dfe8a12f46bcff6ff00af4acd3d007b99632d651080ac3898238f78c0f62e3b2e7a7fee412b95f499c480a3191c10f2673115426b52d74b2afa64da800c6a70452ce1a3da5ade300bc7a3fcfa0218a362bf82a1d5c6a6666c8c20f31a9bd4b41770d44be25a1909550f89660cf479fd5eaa82228b7062559b282f2d79e8d84d98c69334782629f96e6ff46d61ca853b4642b963765730cb9359be3b6d6f98f5283fe885871aef090b5f98a4ea52be65706652c75cca86ae3bcad834dc48352ef64144dca23442ed947975effe7bace5076d3d12e23d639f966f130333632ea664d32e09bcb4e205c69899b9a86d3e9230dcba149ccc7857762f3f062024be798f4f291499a953467f4ddff48cb7569763f01aad74cac16134a6fa77f557410701953743eb05b3158416cdf63c6c8ee485cba4487b4a91292514ff00711161e06a0b13b4572ed8a0386b85fe5ff3c8980441cd30fe3f12a36398c3a45562d937a85e07b3ea4fb17a1259e696bb63051a67312ff48220bb23e3f4df023e63605fa7f719979d154318fac01431f5ab88aa77f5ed02d453b8c49e53d197b2aa72630e995f8982512e05ca2a38182f166bd2ff00cf60ec467077f6ff00b1a580b03848102109425046ce90812fe5e7ab1fa01154d8a8c70457b9b74da2286ae59706bf3fc757a465d855ed0376aaee7c5c1a8aac64cd7fdb9521f7549edffb111be9bcff00d25c1d612d5d5efbeff8ed2c80b5e038acf78a1caaca2e2bde2606bfc98b3f1d38e3ade2b10f240b30963f1f52c01044ac8b6cc57bd198b001885aca862398a34cb91e73b1291b7ec20e50d6326b87c7fe4a8421d2b17d0084ca5e2cb43a31254e4ac12c651edd20522744bccbdd4add74876cd567314b9c3fe66b99eec357f9a94282a2cfb51454b5c557bc5ccec8a8ee12f4d47dd207ce898d51d6d87b459286c423d0c399733136859a8431c74c287abfa13dcbc5f995a97710440a82024abab76089a562b4ce1d6c3e7f61b30635cd3a643756ca81052aa2da68425a1387404bb7e83a83c0bb562591e3da29743d25db2921496c30d63050b66bebdcc53e423691859770b89176e1b98c57c421215cab7e10d97ea314d4f11da6e2c7c802d4bf148d20778c369c9315387d6070365fc12ef46bbe8e442452e2675472c67ac4d38863e8604bfa1d28eb38ab8c67579abcfec4a71b6616d7f3ff00a74860ed2e2464a8583d14cc23b82a779ac750b6889e74e181193dad4bca017d15312ac765578e906a36cf1a986153729186c5454603965f81e89842796369b31027842ba8bfc481ee3e6f825541b8009e750d6626abb81db02e01170d9c46e66b5df09ba8b12500452c225952aba7e9a79a5fcc463ea8f4c44d517aae5f40e86e96e1079cf7fe37dbf63a283b1d0ecc175cbb579d873e2572b6c9908324311dcaec6505a82742d9c9254e70f445f44588eb799903310a1a483483988eab7328038b8080afa77e4b16ba93af647a0664d1161e41332a8e3885c1784ea70c1e08aade835a6ac94f77d633f2584aff002995f0b4b16a7a4bcc4640a778b61270b0524693998df2a20c4e36598b52ee5e9a88a881fe90a69321dc14840952a540e99b8c6a95f9aed0dd04b44d7e57f642a4a0c859e381bdc751d99a97a370d53995082584cb05c3a53dd9a3912843412235094112cb71c33131a2aa5325733647bc0f7d71510cb0e63ebeb2c596f05a2610d197626f402571006a7e3e6333b337329961c35300adb4dacb5e8e3e13d32aa563d120879eb298fa3a854f9f4fe2525f0f6085233a48e6d767be451b5cfa4a1d42e2d6a276cc499654e8d8426375702d91979cfd4b0f71ee3ad7fec6bdb7791f1f1fb30a20f5894e33cff59ccb72d998003da57623a665cda9918345c77e05d5640041e65c38cc0818c9001d4d11722b88aee0c0a29e358b043d2a14abe7989453950edbec4ab0a8b60e4567e6e163b2be54b888d92db2b9b4bd04d1a8ed90ed62f90f8d278e55bcb52ef2c14980e90ed2efa22008fd32f3da5c2767d3a18752a6766d01bd41cc33170188fd4536df573b8e43a71300938038ae3f6746ceb1291898708986377e1c45e52957115c2442917c928252ab953d272fcc140fb25d86622238c9d32815d1cca787cc11d31603a80b563e56cd77ef7887eadc3fcd1ad5c960083121a67eb32cdb2aaa29793e6505ce551117dbc916ca9d983a5f514dcbd4e523b3a6445036b89615bb87c4b46c7a4b17a7d13b2835288d1958ea7201173e057f032c3821c5e1b012ce33e3f6a76ceb129196d768051d9ad76f89a417d19725ab98b1bab4d02665127638945e778b6e2e3909731306c70ec740f71b313a8f6cb943f05314003cdafccb111ce9f0313b4e985baf785a47f45e65d73e0e0f695be95a758145657cafacff00ef29c8113c74e7276479843dfc8986e5332660095c7328d219bb88bbb95e8637379515a8f9eefdb4e501932d725f60d6a211207a654efda0c27981d302a8165991048d5ced032cc6a3cf4a241644d90cd5d4a56de12c08078cdef6576de5983bcd54b3ee12b66c597cbea2643395855bef733e09552fe9788ee31102a7f0408306a58111491d8e63a054566857d1a984e81c182e6f0a21ad1404626d82df1c1c9ff119aa72b5e0ec7ee1488cb01a946ef8a3d88d5a861557eadc4f8ba57ba9fc4b93af143fd2cf53c966821d17be1cca6fb208b5176f283c0bc04433567fb534e8626021f82280b9b6276ea1b6a56b1d2e57317dff000eae88416e0399e3083f97d481939258cad6e4c1eac6471a5a2722592d3e22dc116b47d2082024df89be6d5d5abb9a441302e5c18e15da586e2e27e5df34777c4d6252b6739c994c62abccc680aab81e6bbef3fbd7f6bca1c7573112c7a50fc66b80a3fefa2c1c6baab246fad0d728bcd0dbf3fc431d4c5a954ebe0db3e3a1fc11d2e7da0716b2b13a152a21b1ca8aa55b5e7a932d32be4d057c1c7b43174d680a6f9ce799547b72b50f48bada30951e7873c4c90dc8d05f6f4f33005c41862b7bfccb927792e541cf80f83f7cba71a182b940a3eabee59c351bae421d0e54b8d9bdbd2f6f6ae07758fb50b821f5b23ec642341e9d6d67097dddfe652b53c531300b75970749d4c041aaa1e9da5b063369f98d3eed16bf4330de37f61163b84f5bff007d16a0dfd1852e953baeb15ef1295118628e70e86a501eead1002b5bab3de98b50d182af351dbb9f3d1212d2272ae13d35f1d036820d68e63b3d7ccb4e9776ff00532ff4a8e3ec27ab0033b267f83f3f5f5cd27f0a8f091164f24ee5e92eee108a0b1d3b0eef7453b098753c3015ce7d2ce8c27b2f6767b9e2174c64f5ee6be606b2d353fe07e659590f6aeb5a7184c1afa582c9626a333b54fd8358822d52ebb1cbbfcc3a378ccdc68e7a78e060d90ab97ffb32a5cf42b4f31b314bef864fe65c1c9644897068402d5e22f9f7c8dbeedcbfa6fa7815943f27d0b52ee34dde19dafb044354c8a6b179cf44b20a954e27775b71c0c06591c3386efcbd6612754b9757bf469a9f601d939256edaffe0260cb6ec7db1a7d37f4b3a635543acaa11cd1a7ec172c8b06c653c89d9c75a278bf5dbeee7788530657e1531455b5f565b9311e1e834dc26da18e8a69ec92e9917f47597ae39bd92c2ba31299d92a3567e7ec20f88d46d4e0f8da7b4be3a2d116f0e95b42461e10e9ebd5f5b7f39977d1590d4c15688b2dca97d6a5a53f4dc1d8c2da2b32e4bbe98a4748982aeb9fb081db477629bc79398aba1d405cb6072631a358e07fde9e3f75f46622bf4ba0a6099cafd1ce988cba5eb5777608ac462cf4b894b00ec18cbdbec2624ba08b60e1c1cd332dd2a5665bf3512ca753b03764b3b0669d0cc717517b309cb8813eb1961c448916610bbd7404baa664e9ed2f8afc45c2fbab306d5f731d5ea623cfd87a0e582bb3f2bfa28ade07a7aeca96be3a754c6bd54f07aaf4ea58cba5af4b92de2668d4f4aa0da7132e7894431b1e3bc30d4c3a9a7e21e2a69597b57739b3269fb0f72193e0ee8f0fd0201f0ea3379c9de5d2ae87bb814ced0d0faebfe42843193d20e961b6043a5867f0004e747c551888db2e1ae7e7f9854d12af4a710a2126a3c19a1e5639510ff0507094b07b7d87e67b36c357efcca9b0f2a0c41caf1eaec964d4b9cc583921bbd7f3cab79fcf007a03049cf4d8be2cb1286e5b31102c78e04c0d4c1a8090b857114e6dca562502ee3bf0f0e7096302225ab412cbd9c83def4976bc43d1e289fd0f2fd883706ab71630bf274e66381530270aca1d12e59e7a449568fcc276344fa4efd4af8bd5bfb6783d4626fdbb691952b7c5ebd03842c2d7e3d76a470d4f04a62e86bc4c87f258d2efa21c3ad2d25684f7d7c2106044355443b79e9dd88decdbf621ab3cc395e6b19c573c7acc01aa653f74ba98aa486d9145dd55cbe871153395ed086f251b75a8a194d12cd795de63d41f06a57328fc27bad4454457906d671a7a15d1b55011995f17befd408f5c7459390731c53d8212426d1dfb7d8ae197594b1ee43604b099f326a557c40b5e98179f996cd28268b5349bedc17f7d40d9675d5d0b1ce0ff0070f84c1a9e18c1c060acb396fbad0ec91a0346b0be917fa318fabccc34189c0d31f50819a0c96e3742b887ae4d2763bc552adaf2fd8d87cb586dac1f60fcc4b153149994ab95188be31494bf88bb08dc7c26a335bc2645be85a8056e5b4702fbd40110c0b6f22774043c1c743a980e1e97006372c04bf594b01ed0ed5c215a0be704bb8f27d8e819d68a461287384c9ff94fbc28746aa2a0e353c45d0bae6e18abbaa536abcc5cfd0a004f64abf94d8de5d4cdfc490e522d435497654ec3fdcc198b143f9956e4961e0f57b24bda38b2dd16ca3b399ccdf9bfe4c1fd9502e2af25ce68d7698070c05dd83e2119254b15866f9ca5ea9b3cdca2e2cdc00c138f13702d44d530dd6c8a254b19812202662bb877bd59ff00239f7cce652a566109e81b5c7d96f5074a94f931eded306dcdf91c22f68aae34d36e65d6ee094b64a7bbb7e3a2db712963dfda0f277882653a37d15c43bb013ce143dddfb4696eef80f983fd1e4124c7e6602615f48c6cd786aaf060bf657f238f61ff0072a16c02b231f7d1214ccbea6e3c91156731330f6815ef2e6255a03615a1cc3de8bca03163798d3e2331a70afa41ee4d50b8e1d17000797f6101979a344b8d33d454950097bd2a8aac9c517195c72fd95a28a339b4cff07e630e66233d050672e46e0a97183b41943d9895b58fa234302cd72774c4587b00ca56f9058f880fb01444799b2621816fccce6a2762609913b8ab01a0ec36aeebdfecb0da19bed7bf8e7fb98c4c32611e9c976c186ee0db312c070ed1735f28172fac3167bb987b7788ede814c42336ae8d0c13187bc393c41426ae6283880b0bb76981b022ae069e4afb2ea5451178b37e2b6bdae3a6a2d6a170801a85b54b9514e7ea7dcdb88fcd71c531c208947f128040592a52154b95077185cb93390d12b576799973c687d974b0ba5748d91151de333e56d258948a370f8c34250d746f249980af48cb71f49d078e9bbad743ada008977302c8b86319a677331532e70c4db90a5f2ed77e6f5afb32a9040ac2e9bb17c76db70fc6e88107e49ff5103855d710db07bc5917d0aed2ac52085ee5f5c0b0b3a831675d2e725447c6353f2e1a858f4400d5bc0679de2ebecccf68faf92cac171aca16c6efc74469899619aa47d65d03c12ce2bcc991db47810a5102e276d11da574d46d338b98fee85d02dd78885de7b25c2ff2fb35eef17734a64ad61719bc72cd11bb89b046cb9f58d830219b26dd4d52006a8866074a9529edd0b2e5810bf333433d858c691abca9bafb3aa0522ba34d0b2e906acbc965d982e02a5859b974349dac1fa0415a705e9ad4ca2317b4aba829a9644b49620779562250de699e1a3e3ecf1b45908a2baf3805781d401181b8953904278989888e997ef185b84aed1e5a45350a3538706c102c048082cef9dd6f03c5d525c73bbe5cbf67f630113fbda16d256bff00687879f5b07a742146599cac03dba298741bc50e662726e70d965330dee7713002ec56392f0699854abd70017acd1d8f43ed1657261c11f40d0da9a144672a18a3ef11695f14e85ece855137082eaf5f5b544020e36cb861c500e3b0b335b3238aabc782615a8a30783ed3d0e4abd40727f772e9d5081d8c9fc2b9c1a08f4911d136fa2fa09f8a9d00dab28e176029c1d37c3ac6e0778c1f0b4e5dc789b6b818f65fb5f191f4ac0503a25a538cbbcf2892d58c337947c3da5ac8bf570c2d9acd60bf129d4eeafc0b793dabd9e682442f03c3468f51f9a712e86c5696a93bb5f02e1ecb3aa429b4bb25358c8a67c4d0da874b6f158ae3def894d45370ab719168ce5daad942f7c204a0771ff8a9dabbbfff006cbfffda000c03010002000300000010f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cffc0ae93edbcf3cf3cf3cf3cf3cf3cf37e3df3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cee10d0fd2b6b23bf3cf3cf3cf3cf3cf34030438e79f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3b66af9e1d8d847d9f3cf3cf3cf3cf3c8153d2af8280ff00cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3d611c95047440e1e2f6f3cf3cf3ce464b0683f41b8765cf3cf3cf3cf3cf3cf3cf3cf3cf3dbb68e945009cf48456696f3cf3cf386ec92789777447cb2057cf3cf3cf3cf3cf3cf3cf3cf165b3fd0852c15eac9688cbd3cf3cf3b76c40bf3ff00bdc4238d37cf3cf3cf3cf3cf3cf3cf3cf0791a4a1ead9fedbd20316e7bcf3cf369c010fb7a19f71ea0d4dff3cf3cf3cf3cf3cf3cf3cf03f437334887ff00ada206f8a47cf3cf356e516ccedcaf8362fa1b0f3cf3cf3cf3cf3cf3cf3cf25c61f38cb6fdfc930d49d8d3cf3cf1bf829ba33fca4eb434a924dbcf3cf3cf3cf3cf3cf3cf28f50ad82577ee400fa52d373fdfbcba203a8eff00fed22bd98fa0a7bcf3cf3cf3cf3cf3cf3cf3c761dbab6bc7ee356fd06e56c176ba83d02eaff18eb10d7c23bff3cf3cf3cf3cf3cf3cf3cf3cbde2b8fbe3e0b3300f0ffa8e42fb1ce62e142d5dbb2dfdb127df3cf3cf3cf3cf3cf3cf3cf3cf3ed25581d3b14f73414b801d7cfb45f9dbcd2698a76dd7a77cf3cf3cf3cf3cf3cf3cf3cf3cf2e8ce99cafaaec0daa187b454ef9ee09cf8f64ca3d837fef3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf1df9fb7f55d1c87b17cafcef7c2dcd74be441a9b5f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3c5de36a8422976b38f0667900c557ce7bcf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf88e8b1e8a9b250121de9c415b4f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3c632567f85716b00bfbbe81cb0b1f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3c717a7b2ae83407debaae5aaa729f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf212fbc0f6e7840823b3b9adff008f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3dbdff004f0ed08149435ca605ccf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf2ddf671de301701811e2fe555df3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3bc63482c07f8d57e681438b77f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3ca07cc0542510c9dd2d36df3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3ce80fd6b8d82b67192257ef3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf4ed7a5af2a2fb67e883ef3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf794ea5ed07d17dec1cb5f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf90cab769713d68b57735f3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf095d43215041934ec09cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3d6f3f8eeddbd0aea47cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf26dc3092db8bd67fcf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3f10f43fd957bcf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cf3cffc4002b1101000201020503040301010100000000010011213141105161719181a1b14050d1f020c1e130f180ffda0008010301013f10fbf0b6a68e5c76441f509d123b45406d7fdf2e822b42f526993d9fa9505b1df963164328801c2ccc693650f175ff00a5c9e5d0e73058bdfcc66db30a3306ba0d1226fa96b79dfc5d8e98a6ecaaee98f0dfd17e985b8633a428250510af3391164148a759710610bff95ba5f36c7e7e3bc52f6b30c25560ccdb5a2be5311e9344c31613f31a3d7973efafd198ade3b6b809940415c10944a98a6e2aa8d64d7f9b87a096a29dcbb7bfe23330c621b6a68e803829693cc289ae904ec0d1e675ea794ceb6fd164f7962dc104b38223980688b2f80bcc448162554cc753f96919aff73fe6de653b65f4262178d00eaa6f4c1a92bd4da81170f432465cb750db71fbb7d0e07947b129817354cac83978eb1c520dd7856bfc929e44edbf10a0005c6884331e06c8eb51c364621aca4dcb93e89a79d3c7d0a557821506e4511b61a3f812e2254bc4195dd3f8a6c03ddfd2320649b385442e1e0e2ab4c1b84b50bab899225841865c7d0f73e2f5aebf40ea22eac835162998af8234e1a4d2164c3a954a66f16bfc4eae61f112db95c71ba40c334482370408752c250d41b22434c29361a7f334f259d5afa0efa94620ed23b30929844d2135897344c2704d11c45ccb59fe2807a58bf1fdcd1a5ce758ae9341081ab2329acb1ae4bcd4680462259ea3223e201341ff00be93acb8c1691aa2b544fe0cbae1ad90d66779c3076bb1ac00007f4d0ca3f3d5f827bc4fbb8efb1fef48c3647168d9a561cdf503d6a28a61b853cda384299ca086f10b83611b635660e020f687b7151cf88001b7d066f6781aceb06ce0f033085c39a58cbb11b1fca2bc1ab0aabbebee2fdf6ef072f2105eabab5eb17a3a057bb98dc018d875b778956c25303ba9cd22868b5015f788d53c943b0a7de28f173d1df99d6640e0661cba8644238b62342329ebbaf7dbdfe3e8705bcc1a6592c8e550219957c4f184b4bace91293a732f2d0f78788a3a8fbc45543260dd6a65d9ccc3cb9f69449344b1e86081ea776133f185d9e78d652b17aad7a3550cb8392f67a06fe61d7179202737c7fc8b419bb8773a3ecf78883881835c105c4198608c032b5e5046e657aff9feeff4597c90a19926ac8870c67646c625b744096c86a9cde47bc2e869a861deb2bde2342fa248cc80ce0f42f35ef3dc61a7994aa55bcae3dc87801beaf7b0807a610364397132837e2202ac8c4e95b7789a889160d9e9d1dbf137dc61e6b23e236b8a6ef8788a8c93c0615afee7a41358d5d8e87e757a69f49d0d15758830c12b982674410a5e77af23d6553b85748052ec6e214cb01034256cb2f216742f1eb97b4cb85f34bf1ef1f22aeabbff004476eea11a347120a95234c1cde81fbd6377560694bcac5af5863e9a429dd4e8e9e1bf31e663441921b9518d41e4573fe39fc4a41ddddeff004cb5e8c741985da51a91ba108cf7fd36fcc5035813b4ace9817cf95f588aaaaf57e2e0e11595dfb5c1b844351c1416e93281ea18fdec41e96b46047c001df2fef4828d910a6d6cef45fbc14355ea327bcbc9526194c097948c83a73dbce9131adc9a1df9fc77808141f5953edfd2e01361994fdcc9e9191bd4617a6df88afa8e62845e80f2caf82e51c8fdd7943cfae9ed57c149a18789410034744efb311a06ba377df6af9811aa806e969eb9dab7b8b6ff0047ac5ecfd73f370050a3eb992d81fa76e38c85816bb1158f23affb2c20b8a8801745f047e96b2b0768047d0b21b5a9d47acbc2a73b2bf3ed3382995e6cef9579952e2c4afcd09e3ff7ec36d7f965f23159c7e72c4d1282798c70a7efa243c26f08e6685e11ece23c0c6c858fe3d62ce0580e5cefaff05459b4085c5402f6329061102d63e70fd86c3e0dd753fc5836591eda43f98f0ca6a5e48ae10c29b9b5c0a011d8cd1b8350e15c344066b97a450bb663bea95284e83e3ec28235483d9c3eccb86e3a2975163c15f51dbd49ab2101f3c33088710b28aca4c70858b9aa658daddda5cabc594781d7d6a72981cc4d939ca1358af7afb115ce529ee61f73804da00d5e49cfac30aacc3a9ad7b42325b30ab300a085cc4402b02321b196028db36758acea83c287e4577acff510d64ffe7d899769e0703b5d3dd61ba596f5ceced12cb0b21cdcbfa9927382cc6d033b92d46f50376191bea2fb5f089accac2e2d2aba567da229873d1da9e8c6afac1775db4f78e73e67758bde0d7d3fdaaecfd8ae4d4f261ef8c9d427712d66d6e74998bd0ecede650528a8d0c5a83296dc06ee05bd57f1fdc146c95ee01eae3fb8e87555b97b72ed2cad6df9679c0e96c204711be839e97fdbb13702f63ec6e06c56da3b9752fd7b32ad24b7eb2e194085de4b9f10b9deb1f94ccf778e0ff7d26396287c273eb0214dc0a6ba0e57db9c2f68003a1823652c58997504b77fd7dbe7a4718dcbf75f5fb2504b209b4aea761dbb3af4872ebe0a9a656c42b87223513068f2e9da055b640b012ce84a7baf37fad59dc58c0f1abed2c2539183c10533afd9a61b25d1a39ea1dff33b5d587be904a04e9113d2b1718e71baba3c4aa008e47fe10c683a50fb46eb5d5b9643ecc1d0953316a6a19d9489297e66ae7963de596c21c9326780efecc2a18b51ee3b95c52908c208b135becdbae016124a22ee28a9519a92f57d9c5564cb70658ca328d115510654cf0db7da2c6748225902e900c18952a2185210542de7ed3a4456b8966f3ab11de3b117b13507ff00843fffc4002b1101000201020405050101010100000000010011213141105161718191a1b1d1204050c1f0e13080f1ffda0008010201013f10fcf5f3be869ddd0f188616ce6bae9463be7a6b1cc93a03dc76e9f107b77f87dbfec5b836e2cf91b6de37b41460347238c1af36a91da8b9fe9d1ee7fdf5554c001e24d4c3f737d44006eb065cb34ed8dc564e4639a92a141a000f22367119b58886b65b24ecd17941fef38832dbc4efe3a7cffd2f8b3ca7253d623b5b65d33032526e441f10d7cfdf5e99bb26bb861feed7f6db00655c0eed3aec52be0ca816e332f6cb58d87305a690d5b604a152bd9302872d94e8a2532805853e32c5ce65faee7f72ff00958a5f36c7cfb47c96b31065259ba89352749cc6c09b91117a39ba3d7dfbebf677dac56e5d5d6cbc977a0de74824e3cb9baaf37ff9a450d65a45c9883e408bca5bca0240e595cae079d21072b93f0e8f9ed1c1d2611d47eb449412fd57a980e58c0104b5c0a31c063454148a5a21d4b3a3ccf9f7d79fd90b4872cd8f1dd706aef543156a2a0e18f9565c1184ef812a5980288c301d48845cc1e3431c395c5cfe46ff00333defeb75a1f5654b65b4260dbc1f5a4e906f306b2bd4de9316094999a9dba9c9dcfb1a694ab7b183e7c586eccc15432c8c59a732557029d6134e0e4210644288aa85286b3542abe81a7c766224293e9c1eac3e65222d482cc68d0f8c591d6a28ec9467594aa5cfc4269e7a797d8f2ba0f23c3da2ca0cbd8728e624c359d1113e850cb0aa38839a03784911c1b1f1d7d6ff00b07d09b007bc75d8dd23462a00cce3233a6546e1242b0e25cb2586cd65c1ae8f73e75f1fb06b1769cf6251a82b20cb2d99a2666e598ef897652b4415b84101758e2e1333339f06a7efe9767a11b5c52946c92b8cbd8848b8616384a14a30c260353440d054d2f79f3a77afb0a185baf0722fa9d6b9dc6dc2313c9a4b386d0f6e4dd32441e6e389673278024082a2d6649021ea9f23fdfa5c0dd17dbf734a96398b351541992ce10ce589c106da340c1313c1b5408c40068ffdce80f24d9f069ed7370c8ca8504bbc8822188f2896aa6b15114b71b92620462ecc34b0ac51d0d34f7a3ed5edb4505b3189f0cfb41ee7fbac6ca57ca2622fde286165f0f138f596601b8d398380410b866628b9a0500fb002dbd76dde457718a6a9938a90883c3658e710c2d887ed1c83589bcc15718a0bdd3cdda069052bbb97afc467a0736096b0b862b781a88b1a91a948769563485326700b85c04c6705cb11b70aeae32f8f5fb142adb00dceda29a9058ad022688e47c66e365b4e22155c42a581a91d711619aaadc2f443563cf091e4d5f48e571741e92dbb65b0ede933807ac402abc881871a8a62171af30c624d668354da24430634209c19492a98625c2c76b086f65eff00e7d930d0373e2a9fe6a72229bf09f2211e57660918e01454d1146b9433c756c2e4734f220d5bae6fb6c45d479ff91d380e444b5544b57ae60503ca67dcb05b104c54a6003109b5434236008513546133103a26b3c0c85ac0ed9e7f8f9fb3146c8401b13068c2be47b2197aa007292d7311f3e2180db12af26737c3de35bddf9cc00a9908d4bb835b3a102d7a352c5b255de256bc48cce5c9a4e902950d1bc622f598a039c08d21121aae7f8e72affcbdfed88545a16b3b3cba367297ced8a7999f46bbeb3d7e9af7a944507a3e62a0f17a2ebb6af86b2c05804d177654328914a469111442b970d386b062ea20a759596af07df59aa6e80284ae04b7446a9539ede72812dcb6ff00600a147de3f35f7e16e92826f98500478654c628d41b5db8100f48741c44bd577e20d574f28ab7ef7e62361e39f7b800a0c7df557afbe65638508de21b1b224ef28f998001a406eedc0491771154b93892ee45fc0d4f39ed2f12822200da58cd1208a0a03696aba83c9fd0671158e22a98164ef23f0265cbc28f7b440b497e749ac105b2a2b7e8210b9a515054b3a4ff00023d123ac4348dcd2a16e76e0317031c88a9689b9722b46a8d6245031a4a92294bf0889176368a7a371a4cd8ab16a1666d1e63d671cf81c814e1773aa1ab63f057837c3fa8d47311eb1394a5a94618d8225c6542d9a70414c311b4d220801a24bb070bd72c6fc1573c1d3bcbec37ef0437099a12eb22184cc734a8237ab8e054da3096949b11f7e00622d231ef0007e0f23e1d7bf3fede0595c0035cc4c3cb1da409a9a99e5bc15c644fd203f711b519a0955accda170ed020d43f097b539b2663a7de585c4588a94b0e32acc5784b32928de5cced11436f225752f9625b0daf9d45fe16e5e49f1fb8c70082b51a7ac1ae084234cd401de0e96ae9f3a44508e6e5f8f48a1db33b0aa5d3a7d0afee7afe154e725f2cfea692216e55b994a2688fce2f57e6f0205950cd23b188794650403fd8bdff00b963f0a80a749b453e9a9e950c5b1c7247e00cd650a594d31cdc32c850eb2172dfe83f0cc87a63cd87d6bc8e049d2c12d6382845b2120e59bc532dc2c21459b431ae5a80036fc3093614ccb0d68f32649821713bc03a4bb28754b5d9c1cad151b07ab9fe20eee8b6be63fae4f760974308f06d8019886c5f6816c60f571ba72b0050734eaf73639de7a6f058d014763f13e148309aefcb3a3888d99f37938f59596874cfb4c67a49ef16c57ba12adf5c7d31eb328bd807cfea0544699d5c757ff087ffc4002d1001000202010303020603010101000000010011213141516171108191a1c1205060b1d1f03040e1f17090ffda0008010100013f10ff00ec8291400da0dd295a69f8657e4af434abe1d1e735a689ce0b15cac28adb379ad3cd08a0150d0415974a6dd3786eb1757a006550704a30c9619e8d36ec8650163346eaebd1ad8f1c85855be889e47f4d8b6b1c5297da05e5b6687e332e3d946178dd5f329750695e446ab372d0986bbc120cd0e46aae9e63262201195ecdaef37c9c0118cb9a0adcef6305f264cffbc899c016aba0224d50d958bc85bd1df4a7a3040810d420dd8adf3bd9825b5f09213658614a5aa285bb3e9f86916a5a000e6196d221204cf0a11e447f34097b5a72a083a32c6f363551bec365c1cadac18d4360c5a4abbd7de1b501437a658a355bb86324ec53ab256a18cd562be22182295eb32add1a80ecbae8e7559dea8c76e2e52cae8991ff006b4c0abd42bb5feb863994b88072adb7d2c480555008d5006de682f6de6e165d2fa46c5ddf30500717a97a9fba1072b83b5a76aece1a08963847502e476c71c303b9118715b935620096a2bacafcbea7c255b406d41ae9e52138b915a61c1d556f7bd6a12f6aa2a9b0e1abc7fec37755e42cc11ac60ac36c6dad0d6e5c405cc9ab4714be200cb99772e812ccd62862fe95e26f20760160f4f111a32dba9febd8c02c931780ed1c99a8050bc919b4dddf4c43021001a7f4891574ed2f4e9e60c300957d20841d4b203476c130148f1312c722162b48360574e7b04a5e0ed0c4a959002c82b02178cda5a0c82d403a83f2dbe764209c3578291d8de4c6494b620aa106addbcee13ed13ed23c4ee713048ff0b547f7b4aeac9e55c4425b778c0403e5dee662ee5985468f694388fde651a962cb84810e0d8915d5b70a5d4d293a7faadbe45e285a0614a2f2553ac12d094d04f22abbe618a0c1d20f07c44f16f306a20ca9b820970179aa9aae5ac22f2c1d710134ea5a9bc4b35bb46d2b7baa8405e121a5d2da112b180800192987f2b3e396748f5a237db8e73a3ce371d18c4128b78699f020620f234bc06e25b3e88ac2be52e9f7c04249de604dacc612d443d1c09999155914aec87688a44a4d8ffa6972185b842e385c02f46a0fde0e00140070054b36938b28ae1ab4dccb9af78a0a4a8874e18180c379dc0bca41199764553032a6ff00ae38833948257f586d32c34a371ec00e1ee058ba2025620b3f282cdfc3745d5bd0c99712963828cc0a51545b7270725b131e96956ea25828956aa602b1ce920b46a7250b05c23f031421a8a8b55f43ea0d60c1e62a283587f881d16e78b9404cc3b4ca080b0ad983cbbfd9ff0046993757a8b0b5d05a5ad07315882aa1d381440500bd5750ceae7500dc011700063cc25514ca7cc7e50eaa94352a560c6639ce6059e7d61aa96222f0cb80ec14d2070893aab8828a32ae62f040a25ad74d13282e72f30b00b781d0913c10035cadf92ab5549e86d765d02d76850d13423297acaf4dfe0d04a2cc21c912bccba972cb9c422e72c62065129c41d6a1582bcc3387b92d5bbccd6584ada01d565b0ed4aa657ac46aa2c4e42882b5b20f783bd2d303b843aace912c226755701ff382f42c728b7c4527569a1712bc02981906b5b616802b84ed85f425547082acbcb778c41305ae0b6e6930099e095924d42026a177028ad3df11489dae5aef55b622177bbe61a0f625a06c7ac22a093952c97e88852a03d0d08a15541144086380632c531636d71401f926cd315d3ca783fbb9c1ad82dd0106868dd5f56527aeda5fb100d46bcc7872f217172e6ac7103b2c72c559034f2412cfa600631d0353c6063d693a32961700757c4bedadba948afa409ee555695cd40385384a7a476dadc05c5c0a0ae13abd7b0747fccdab834ceb4342d16a0b2d0b609294e81068ad5f03ace504a494dc0b0c119e68bee73089acc2387c4460b966f70aadb2f313aecbd27d12e186b18494b0a6a50fbcb8302f104dc698a44ab9e603333d39ecc1f5d52926b36092e86d3a718dbf5aac0a3489635f91d4d9f5c45c5df39262ab3a9766a098efd11c5a8fca5f2519398301616063a40cca30d590a092a50cf3d3de1294652b5aa37131701610acde2fd56a9963228ba6076a7ec53eff0011ad54bb067b79c9f32d2ad94ec38859d4b88305762d5c84016744be4730cd06a6d63c89edfe43ad070834006d5e22e5d2d900b48badb9553835714fb6b96298443e86a12cb251da0d1d2395a672da648b7de3d8adf524b7b444a36461b412a4a66b38c213efd6438990851947689b0473894d533008315090140cac3202154cb5171fc88a0843f7ca7e39edb05a567a4501b97df81c198f6998dd456bd8dc3e3ead60feea5ed050b3516b42ecd9ef169371b5a0d67b439a81dcdd27dff02dc750d661f3114af4ff00b50219c0baf1adea3c4e56d93acbb6e562f0955c16e6247a66fa2c7b5aff009178b62c80129d51454c14b816871cb77e3311504777f88a644f312972f304cac5f4fbcaec3ea8d295759659cfe0ad9170f3105c65e269549d270125ca798b2deb9944362faabff6305aaf14ef11d2487cb51675feed56044b14b1bef0472840a5c54142828588f3f9093c2992b559fef89556482ad9d44039a542ed1ef1b85a2094b32cc9e90eb52f4b8244eede1a0f6d71de346e24fb4605cacb68305e63b465ef16d566fd57d352842e093799752f6d08de035b8ee9467be00e1c5e2fbb100c6c693dbfc772081da69001083395cb2a0e9e023fdf2c6388ed509ca4adb4a8282621db9a8388e61990382a22003549a8b70dfe0c1b83ace09bf595a0b13701e04561d2079d0f785ceb02dcd408c2670dc0f791686c23b42d7de5ea2380752745301c668de9f9052aac05aa5e7a637afdf6e1230e64f799839bda4a26a6e3ea588e8e6de651162675f80acc0135f1dd8ac2b6c4429469ae232ca8317f1a5972dacccd711bb514a5c6b6632dc044413c446044b514062aeab46aef0f15fe2a10012a7292223828052a0ee3a3b05b7cf3145a635c30985a1c730a0051d2130c52c51ac447f7408a96b225f8fef58f9e35659a8669416420d16c9f505402d7820f74d4d38e669ef29a56d4c662618e157c46a58ce09445d4204a774ec9721bac5cb5d5b5e599ac3d0ca9131448588962e4ccb1c69a05a14c5ab28bc5d393f20b10b32a14555e2f5b8a1655694f022aec22cb7b39224b480a812a67f720c09d499a55d55b2865020654b506bde58af2fe05f56d29fa312923de78806e4348ec611663b19ee7a567d2803cc6e556816956589fdd74bff120d0b7055076d1be2526d6a20731bbac1c4396a5585d7796f0078f681b6b1c5c1642df797294b80dbb3b55d5bb88a45260eadbfdbeb098468545ca8d3c4ae9917a03d6d452e7d886b4c9465ed72fc2b3a222e415c563996d6ba6cbc730b3ec8b4432761a0a8fa316cafa3fbda5856af238859f0944d33888c6057688e5e5575894c5cb7202e05c8478ad9cacfe401140910b5b0183c12a20b812ef5d4a1bc6e9a1c3fdfde2eb3e97841511740a97179e91f048f58c8ace91261ae92a65266729645bf55a205b46e55b92f8a85fc5cab997cc2858e2bac0bc86e0bfa12a6d02715d1dd55e3de1205cc459588aaa57619d21ed395372c230b9aba786b9ac3013a451dcff000a1ed97cc03295d6dbbe987b5a3ac5eb1ab0728912056f074f31141e8739888130d60c5cc1aa5aef8961e93977062f1c16e2139617cc0a288375d3d04334046b39bf8fa4b52517c200d703c43d073e212eb1e174e7db705f8b36537e7fbc4772588207371f6dde7714b191801ca4a55046a5ccc6c3db349348b55fde84cae671640b292c5c90004d6b4801e8a1cd39c8367fbe1a9aca616d10a1cdb8bcdddac342c5dc146d625e93161b8806534ac1d21b5c0d99fa44159b11000f78953046944381170166b297dee03532d731c3ea55b6f746444a79bfc0b72acab08c3831d2293882de3ccd4595e22ba3b2cb0c7684060d13bab51073447a47de16fa3b452570c451bc90f365840b6155632aa50671972e76fcff0086de45b2a8d8450e00c9c1757605d1314ebb45dc09862f2e69946531cb3355a9484e77d67c4ca66b2d38e2f1d3bc3552e189c9e974db3158cab7d2e8166d5d2cf3da14366cf66ee58e4d3cc1b7ce63147911c5bfd660a146ae3a480e1b350259712e0d97b38840ac6986323676897149b3ccbdf647f1c438ec153fbe235528bc8f58b962bc102ed050f00b55cb9ff79e20a05d861a31c85ab0614b36388d9b9ef00aed9ccd701a41fceeb0cde08108b5775729216348ff007ccb5860c9157a16ef0afeb18601d2587118e155eabd1961eb0cc60cc1c8b75a806868e36d9bfda1e62556e7247786004b5e37d32d4c8c77cb72f80965ca9394c26ae05425eef40e0840c468b0b2e72ff8689c78cef8a3871c78a6909b7f12a8459a39836d5ca008d7796342e5db530f4e1283d88ad997a11eb0d4e5cba9fb7fe4bfd0387684c158a2d89990b5db9b962edb66374b08b04d622361f32a04b23011e95a866bb48841a46c6235154d38738ef09ed19ea5c288c9d6398d80eee0c0ee81b99cf7a7fde3063e58c534d94b605a691a485d1f173b4d3b9dc44376782b3d59bda577964b364155afde8c3c13bcb9b4d177894f46cd45710d3919c131955316bd6adbde2031040bbff28b50015c0f1fdf12f9586db07ce7bfd0966066ddab5777aafde6f1c1ae60eea5d5465a48291a5c390335d6aa3b4c675366dfe27382eb506b222ecc0335c4fd982b3a85d3309435421abf40971f7424800c247685ff0071da1e86bc294040a11de9b70742022b822dbb8cb30881814f46337ce59856e219f647b45588c329b2ab30b0b9e158be5e1922943844c54e56f845b20038e03a2eff00dfbcd9235819b96c25b9b62c25ee296291252a411d29c47696aaa28b43d9545c9dd6af72d64439eb0b152c9ad4bd7ac5bf4544cd2841b950cc5af3a765e3550dd131b7a147d3d2a1431d2c554c08c3b30ef5bb799a909861166451d963fc79d8b9b09ab361070d50e168c4f0c6dc9190dbed2ea5cb9a5dc642f05c70622eaaa38041b1fc742d131c76861c250b0aef0b1b2f2c5cd3b06e2bfd812551ca15b6a13b978b6d030b2a0a45200832c0006494100020e02ad5bbff007d46ba1cd2cd436e5c3841bb878582946444d8c1f4841150e91d1a8b427e0550ba94c648695f5d660cdce1e89546e166a5175e52100d5116d666671cc2ba3092f4172e1564b6ea0f914fbbd6c6a08c582e838b4cf7855d438d628d070136d0889637355a454146554b0a2a8b01b2e8564cba41bc18298c30275864b5db04f312d2b312a464e16ec6c4a360046115a05b600a14b86cba7609a703845b64050a5c365d3b0420427641e0682588ab1620abb3bc2c18305885b5b477eac71b2a5d26a0dec2f35158601b1f7fc37441186adc42c6c97e087620b4b5cc54543f163a30700c9cc1dc48cc4a86e4993b5a8eea040482269377dbf21262901b5050550da56b2aa301020822a8604cd665b21a835322bd228a7998a371783bc7a0b3d212c64427635a98643a348ec52e1f6352be09a61f1305fb2302774439a865890a740397b40eec590038b25392ae2f75347eaee55de9e3addbaacd7859152aa95861a46edbb3108f2aca429ae5555e55973c5d4afbe0fac6436676fe0947d0ca9ac79c6b89675142455c5f683161d0cc28cec601b945c60ba8679260b6877980567292a5200368f8bc47b0976f696b95b80c297a3cfe00b40db3b2471c9f319a987b4128d3880d32605bde5d160229045004b1970487623b9b2a6b88b1221475acd39014b5ad323f90988916acc4ec021bcf108b9a8c2e5d532d106b4ec97c225c0acc63e7d2a70b4de223df5a7d16218e70c532b7bcce9af2c3fbfc44d510ef4a8334063138d15cf596d48693d822b68d022ded9083c813808359439097658e570bdaf24e397167eed476afd083f9ae616cb692df862fc26fa2600a607f6daa70c41b2f06e28db3387750f96a16662f81336af4b2475dabd589b2b035700f3774399575d595e65efb43424a6bbc75936b6de3d6a59b05ae3c410b44bfc5bc47704c875702d2ee879301f14f4f4260dc20bec160618226e1181480da9a74d2ebf2207545d001aca11b5f2370149464cc71576711c08eb92a3dccbaef2c02fa256cc1da28f20a8b5289d388b563b31e8a416b8a8b620a6b9577fdef014d94b0dc3594f7318861b4706c38978b23c3879ac444afe88414af56002828e84ce0c45020f74a122ef1706cea476d8cb0e20517036512890c51cfb7f58dc1b9b605b09ec8fa8ebbb80e5e08bc0b3841f848feb71a43da5281307f8588b16e11e35fe258a8e7920dddd06159835ff504c912a44e6235b42c90e7bce735d972d73066097986e96c6254552d604bd0882f50a16a85ac4f1312a512f1724b38832c45b297d10a975440082961194628cdb236fc8b44f790cd9723fc2520c231974641a0584025a2eaea5b330b20dd5ac76b697cca440e21a02ef1538185d916215a166e59c74089614ce4d454020d23029978a7110d482bc4040b886341cf98354550f6fbf3356313c246163793d3e9297d2e893a50518823c783d56bd05e6eb293ed889f7793f986b58bb9fda6608da153d5651c02893ed514015aefc134163904296b05ce487df16a2a747ac34e98540fb32d67bb1be8205f4dda663a0fd4e221a174c08b406fb730db4522a7cf98ed5585fa5f58819560e180ae2747a9d628185dc2db4d74991b8cf74cd0bbde25145621167a45d1b0b2d559b22ac7a8f69588d44e610bf4d3e994272a1aa5127072d18abba1273a710d062c06357468a003f23b33e8a89496c2321a42f1698d0cad48449856618a14bc732a8583a7994f435056a86c413b47c75ba8607655051151042c710e07552c704cc1f67961d79982b08934ff007f98eed04344b8c00d4170f189697d5d8cc0be509f2f31369c8adaf39942156bdd7fe7c798a569a4b2fb426be5974d5c410b34bc4b5aa9642fe5b38bd39fe9c0d913221f6889414696bcc01b3aaf910f029df2f99b6f305d2f68aadbde2ceaa5a9dd463e5c4dad5d0e4f2fda5c2dd4d2f762354ed6d1f01471548841a7da2f4e4e6a00ec23fa9de7084c7c0f72688ad1ebca227120a8c5fb206dc60ce4982a370f3950464bcebb45240d0bda2f15055a99f89421874fde5dee8b7728d7e1369b5d69d814345d0a28b2f64765138ea0a0e940a816e68d7e4c1920f09e816702d56d29cad1c788b982a2716ecdca860cec99d38620b0d5372b8a8be23315748b2838c9d3bcaeba86a10aa4c0cce34b45e3ac1d48f4635888f08c29bf34f98e0bf39453a15f8b3920f96a0482bccaaabc6b7df339540732b84af22a399bd8ca105d637b84fb9da879c5e7ec90e87e407cd473b17ab1f5cdb5cc7e11caf7b21b81e5088acf2060e5a22ef69abfb7d595fbb417f2fe0891336a597873178cc1ee48b4b1a07afa31c3ab35dc9997adcfb18f858bbf31d89459efd434a35c45aba1da6ea87ed144caee555d83ad4642a2f37a88146f60ccbe97eb57141b4b494c0738b144e19d49e0ca0300341c7e4e1da4e026c44d23ccd95a515090a280563474dd0c42733d90476d75839657796e2cbd2ddb994c0ea2b2c3a42dc660e4386602565996c462fd587116f9539879f452b5830b5306ea58cd9de05c0f86121f3e4b401959aac28298ab716ce297598f1af48fef017256d04eec5e041466af43e0b970465bf131933d618f24672e2042ed6caaa58a91f06dfa4ec21e27d8f47d5854abdf495305a5e3d0de40364a54218798bd8f8822e158f9837c530055ef17fce259b36f12e438954bd216c7bcc25d92b1832e3be96ec94a0ca88a2da1968daa85ca1cb06d1155d5fca4e841c00d889a479815503b695a50118a357a2ad092482f501ab871be0622aab139ca63d65cc051758632a4e5d4ba81963329cf937d25cf9a6b4f923eeb8a55ef503128f4655d23114bf6089371e011576c7c72ddb71c81200234d7308101492af54196dc22c09e5a7d23b2eba88f0554f68b4ba17cc001d31294df2a0edd86d69dd36ec7ccc97ee81f406081573cb16a2dfa9d8ea0307925a27487b863145d7a3c4c114adb7f55cd9ba6727a9dbd16e04b9594ef1d50512e71a7e20b4d2768e98c719263539811440c88548d4720861865ccfaf6fdaa28151458a19928c03d5b1528bd825945145dafe595eee04da2d0b480b99710d0dc4bc18b5ab42ec2e0595cb698a150a398583537188e1c8174434ae354cc850cb501f15f240478067d05241a6cb2254bc4bf5f717fc46c7fb8f7e601555c0efa07598bebf43e4b8bc8abd6b2fac16f37ad1f231f316382f2af76e2d95f797896f0830c2ce809c1d3460bbf43b7cccd700341447f0b0ceb287b443b6f5883f5186400074856fb61eb6906c664611fc5d9f449b29c90ef5cc439b3a714fe62db96e09e112961945788cc2c135138c19ae23d7f1408b557001cb2dabf65ef5559555d17614336aa6dea5ad1ab168282f07e618500d0c150694305603415a3121db14a852af15c99c65c15df2b1aa4dc53a6d335685f1b6d5bb282eeab0740bb62e2d5478dacd63dccf88027103373f32cd509a0d0b2c2f39131d23e454b030b9ae98802f6381500ca19c970e6a9e48ecc0b8ad2943300238ca0a5298b2e98340160ceb81e8ea061b21058a5c1d0e0f894a9d6edfd7de6d0e497317ec15ab41093a9f23abec7a2d45bf532c91ed5c137d2f8c3d14b5f18f30b4a39438b194b183ee23ee389884497285de4bfb40b6047140c0a85675631bb06b7cebd781958de37f48421284991bc458a196000d2183f0962b4d0b5343740d0b5ac12cc965f78052e45b607c272ca11104a35bcb22b9abaa0fce45fa5c6bd2c4f1d2267b7d2b863279263406cdf4b735de250b95e3834f045a8f2422140b434a1df43d17f0e464b13503e9763c409fc702df6ca9325838466b2853f11e476889d02d577cd7aa07e61b72de88b11f458b064565038c6bbc7208b536ac5a8b704cb04621209e062eae5869bcaef34a40e5f56350c8bd2a956651bc52a57c825c651016b8b2fa912d6f15266815d03d19ce2f38ad9a00d4574ba6735c460e51482cd565c972a9a4a42b348f64590da734efa0707e78db81d0850260378cbb5cb96623d02fd42a014652fdb99a99693588fd08b314ca4b7d908cc9b32afa062f0061cd5ac07d5e2e007bdd794227b10532a9faa39d9dc62d45b95bd7ff22fd46300a9823407d88503666d3599bbc436c99f451a2e8ac797359d47a6a0a5bca5a8f899ff007823ede08b37e8e89416f0a38079f9af9fd0461c9bb2b6edb0a71d29559bb080330859e815e9612b603962b2ba4bb1d05f786ab2336847d66895a30654eb9f4cbe496a983e520f461b93dafd8382111124225226e502cc0516c16780bee7a02d12e11a0e44d9efd5e8050b8c188a9132c9ee1c62258d7a5e0ed67a2fe079948a22735321ee9fa089ab820d058bb0aa9a2f93580b9b54c14815e810db0201a7a9923fa5e48f641daba8ac795cbd5c88f59403b9e23a953285efe806299138d3ecb7ed0412044a9fb25801400c0eabf71fafa5904984e43d21856186254046e5a7f722e88e7baf429f597aa39043455b5576beb633430155c3f856580599b458ca1fbf56ac6bf40900949d88468c1c3142af14e9373d112d1e088c609e89d673c5e6002223913995aa05b9c187b972a88e4788a871af1167bccbbb676d6239a1adc02abc09f6897c008588e981b17193196001b57820e4215284b49dd5e123177eb71f4282d56c1d78d6bafe0336ce482ad504ccb94b317855bfa8beff00a04ceae46da0164012c6d0a72d419acdc7b2ee5410c06b68112ca9950c1bb4e1846d341e465c23876d844c28d618be72302ba80c14bbc4a29c395250c715014d875c06f10021bd2925ef6d63792cfdf35cf3aa4e4157afe05afc2c821e418ea618e65ab393afa1b515769a1329842ae2dc98f039ff009fa04d031d40362269215778545264c8597a6a92930902a04d38b8d679efe815090d02afc0e9104097d5ce1cb38e92deb3b12c6beb05fe4c7989338721e8841a46c610725addcb1cddc14153032f12ab5f4b97f8162dfa40870d65c4ba852a05ef4c849858d32836e58b936a707eff003dbf410b4238148a4d85075be78194e1f4474dc6a72b82ac6d0b8cb770047c4a9d2e68f781025d2b18fef0204d398b2f508708ea57a3f32c489e56e641b65afa59e276221c46fd75236b40666baa2b28c71307a64a053c99212719aa2f065f7abafd0543be0520d414a1cad8d0ce20127240a86d4106aad8f8c2e8db059828013971a67fec0a94260cbe6040a9a1ed6f796dec8ea229514c1716cf86125388767a9c5098ae308710535e8ca6c111e2000a1eb0c2f0dfa7b1caf18086c5cb83960fd05bb9d01c7646c505e6b0ea547026e695058426dfb286c168a49467da595499a2e641eb021ec558e6d648826ec91d6037134a10ed8094128e92e16e89921a898be8892f75a95350f48ff7a7a043ba3216ce84282df407e619059cd97463d79a5bb4c0168ef514afe83b87a8104d1470d31d3102a05ca940aa40882363c9280942af370fad564c4480cfab251c069fda58e50c01d58fa242788108441ac413e91298b54ac3946b62640470d4b2ef01da88af7817531770d6d3a0d30a3bdd7c403f6c1163000651d5958f1ff6282a57231a036e23cc5ccaab27e83c9118af758e002d4c5f0e42fd02a0a6e26680aa2fe65b58dccd994cd9301840079f0248649444ad883e86308604b2004017378617b581b614836988b729af295e08b8b97413d8e63a89c89777371df2d3d0701e8ca33442f1b628b162b20b63ef9feec14c46081949f7609aaf72e678aae8db068dbb1413752405a194f7acf17fa0f9352ff50f4553b05c82c50c1fecc2f44aa24e63e640b8105340c05884e8110ac408624a5d10c1b17274ff00a089188df72e32d4ba900800d4eba58ce7f1d626084177312244653e2c67de534fe20da6b963592b27d65d928784219bf088c105d9d30f970201d448aaac30075580a8eb4ba70e69588a184d0c5fa228778a28d1553742fb557d7f420fc0031682300a257098352b1e9c3153cf1048db5d7385d9e20545a51699964e9fc23a4e412563ad09541330a015de5a391fb42712f578d9f465d732d6158a63acaa5c0ebc12d0097783caa43dfaf5aceac6c359ef2c1861a8688f8b80a54e9da10a034fd9281db005946d5f67eb03da2ab661db7bbb6e13fbd075c67306ae281f48c7fec769376423ec9a9403654397017e6a2394da73fa108100a714a23a048d8c812c3bb805978a847360b8bae195c72e33965179aace2275278c056e0f898674ee08d9876969498788c768551dc7f88feb4fc63755dbd8dc14b64f90d423f862f431b6078942907e73e1750a0ddb573357cfb056fba43e9e8ca0a482dab02015b3062fc9c76877d573e629dacbba5806a2b76f332ef668c3004aba1de5e4d96979b69d269f3fa1409558008887445314e708e6710085a7ee236239111c90299d673059d2e19a8d75a881b5e18ab686538854391c444739ba7848005a3a5e6291afda53fb30892c4b1f566a777b060be73aff009083ed847f0c3a66962f38d34d7bd4a37c0413d28ddf39106904689b7b57cbb880c379fdc5700bacb0af30057bb258779023f942bde65a714dcb642e0ed7f895d88cd9f13e87de98e5956a5abd7f4364f6d4b4e62f194638b3a203a8ca605c4164662320a731e9152b98462c6c4d9138318e8f680ab1a487eecbf5a7fecccd78fdb88b0366a533f1e8cd235408040080e419c64eee02fe65a902b1a703e03d33399ffa0936c74d887060e1394460a6e900d9c996e5423e2ae7c5a7edfb62346ae56b3747433afd0e13fcd026c44c88f312dc8a1bb0d6ad6d58b555a329af328c5da70660c691db2f4ab2235946c712b059d2814aee8998d971c1e658881b3a5977f48a164d0ea4a9bccb7083ccb850e78ec1ca2d349d332b24800c4a3969d0f923951b843d1681f31237380516c516d3ad1eb99370340d99f4c02710eb62429161caf69b00cd3a055f42239e357125aad62dda72bcfed83cfe8ac97e5eb3b5814aac765be0a519af854055dd596007c45b61cdc5ce6735018728ea2885182ad060296432042d771843a1634c59e2fa4c65bbcdf58f436c454908014fa5d44b98c969880ac712d6f1a8fa568977deb07bc1578986879d7082597aed9ce4a21db73981ad06800028afd176cdbe8201c669c2ca5bd6487061d2ec983d7c86a1a2f6466222749c266bc4218e07f11b92f9831a1c3c42e61f4e8bf98350d600aaa6059ca83bfe4472ff0096ebf88270110e6e2dd6267ba09ed1f9fed6a185929402b7c0f76e207ab48f28ecc0507350be5db04979af9952cd2ef247b641b0d25a58082c0297e8b807b03292c790a195828ed6aa40e5c665645433132fa2625a08d5c125b37322179088214653221d2afdd2dbfb406e3d18205331b55374298d7d62f8c8094dbde36020ae171a59f002dfb4af7a029fcca8ee08cbbd18f784cb08b8ba5687de654af7a04766b730632843d8313006de08c2464e253ab9399bc4a97323a74b66963dd15fa2f4316a41a8b176154359e4d60a7aa506e0f301168958b87916c6433f0c1c19cb8c6576d302a04c173f32e7ae219af332b326a78400306a8cc5c56734f9331c257f038c4548e74b7e8c4c0be942ff001b9920d971896139be66d08da5588e552a0332183ae6e209276f585d4facf972cfe83f45100a8cd7512e8d0e4e725618198c3d61452d7585d0d76883239ccd1103aa84201f24c2af740d3b148c8fd22a2c734009476d3c3fb4be279bd1120dc2cae1be18ce6ab9c712fd579eb50c1205d830ac73e61a14b4965ea8ca510aaa91b78a9dbf1e6559ba8c45ef116b0e83fa64442f0ad8ff0045a8c402094ade8a814838588a057e25e1af88abb774980d2a144234f1069289370d8b7f873797dc15ff00b1372c6707d6674c0e798d53b5d891da281de076848695705846e026acddce2c23822d461a8d6941b19419a85d43de01af5c1318bc6c3fa2b1d6285312911a430950fc8186b40d2bb16a8002c6345858a86e16b97dc93ad4c8d06ed0fef32ff52802bde887296d29ad0fdcf45c7a3a8cd9175a94d928318f885bb1c585cce20bef8bb80f38738c66016064423b304c660b066e01dd973041ec978a852dee341aa7ace52007a2d0c0a1de4b44d3f45ae63d9d84b72652cbaa2802346001a95af5e22a0e734cc8cf152cad0c34bb1a2cc26269aa19afeb38c611d4592d85afc7c4ba02ef38e618f794baefe2240b9cd27d25d9bae906e48adb46d98df64104f7479a2f748ee0acc15d65e9488e91dd3d5b867c214dbdcb0505f1400a3f458ad1bf105d82d4268581722dcece46f9ebcf98954eb70c58a6e60a1dd96f4a75491576decdfda66b67de1a620bc127897bdcba9903be9550584be4986d3c406502763ec4779405638831c7e2351a2aa0148966c8a4c3cff788c905869e267890ab52505683805c6213f9eaca818508a36dfe8d64b8b477139c160a951abb0f9950a33da355141f30167bd4716d31d11f50c4da24a2018d2da9fda99853dad95870618150e96408da78c3352c6f1a9a4e08bb0710d469eb016768e5416ef8000a16016acd7e8e37072ed600bd6e802c44b4a304049404b11a4139251a0f05cbca2ae2eb5f5864c2dea5b08d7c4bc1bf7950d6392528a791d46962bc5cb2eaccb96d7b2c242e0852ec90ea6af7e22db17f58caec878e0c2ae202d69fb30aa8938ba0cec3010b40b761fa3a822b56140b278166a016ae20a5cb16a5a27ed15259784ed2b285933548fcf129a53fbc6bcaaf336b76c658978c6b843164acf48257bb3009027351c36bce3304a609b80d0241381f64a2821a9579aae81794a319575fa3c3f115cad387088a284451114887e29ab0d0eb09440038040688456b0c2c85b0d545f79a143bdccf50e3304c35d8620b99b5e9d425c82d6fac3420355cc0acf745496dee0b1771b1c468071538c4add81400485c61db450e900502a02dd0507e90cb0b962282ed2134a305b2c3a27a06f40b5619b5626314e9300dd1181639802d0f11d673554cb358b589e8a8bdbd514ab423dd28157955416805a87948da068a50b2e7208376450a7b727a03100501838fd27ad025562650b68c611044021535a3d564d16b4348bb22473825667116867de6cda965ae7d6e34223ac040d2055a180015583863aab605112ed0ee5165d0f4a88996bd8d5c403cd14983c2ea25a06907c87e97c650cfbb6268c4a6d32b24120ca9ad52968dabadd9269b6b0aee49a156b0039a8b9d46a101de5a12b9ba0a1467a8da857658e0b8b00e92b5338ae7230b9a52cb5229b0c91ea4410660b53b2e28c505356e5ca602296058ce6266e1a927a10a70037064b005d80b16151530e023e0692ab6a3955555ff00f6cbffd9

================================================
FILE: resources/multicolumn-lorem-ipsum.txt
================================================
                        Two-Column Document with Lorem Ipsum

                                                         Your Name
                                                     January 3, 2024

Abstract                                                          pellentesque ante.  Phasellus adipiscing semper elit.
                                                                  Proin fermentum massa ac quam.  Sed diam turpis,
This is a sample document with two columns ﬁlled                  molestie vitae, placerat a, molestie nec, leo.  Maece-
with Lorem Ipsum text.                                            nas lacinia. Nam ipsum ligula, eleifend at, accumsan
  Lorem ipsum dolor sit amet,  consectetuer adip-                 nec, suscipit a, ipsum.  Morbi blandit ligula feugiat
iscing elit.   Ut purus elit, vestibulum ut, placerat             magna.  Nunc eleifend consequat lorem.  Sed lacinia
ac, adipiscing vitae, felis. Curabitur dictum gravida             nulla vitae enim.   Pellentesque tincidunt purus vel
mauris.     Nam  arcu  libero,  nonummy  eget,  con-              magna.  Integer non enim.  Praesent euismod nunc
sectetuer id, vulputate a, magna.   Donec vehicula                eu purus.  Donec bibendum quam in tellus.  Nullam
augue eu neque.   Pellentesque habitant morbi tris-               cursus pulvinar lectus.  Donec et mi.  Nam vulpu-
tique senectus et netus et malesuada fames ac turpis              tate metus eu enim. Vestibulum pellentesque felis eu
egestas.   Mauris ut leo.   Cras viverra metus rhon-              massa.
cus sem.   Nulla et lectus vestibulum urna fringilla                 Quisque ullamcorper placerat ipsum.  Cras nibh.
ultrices.  Phasellus eu tellus sit amet tortor gravida            Morbi vel justo vitae lacus tincidunt ultrices. Lorem
placerat. Integer sapien est, iaculis in, pretium quis,           ipsum dolor sit amet, consectetuer adipiscing elit. In
viverra ac, nunc.   Praesent eget sem vel leo ultri-              hachabitasseplateadictumst. Integertempusconva-
ces bibendum.  Aenean faucibus.  Morbi dolor nulla,               llis augue. Etiam facilisis. Nunc elementum fermen-
malesuada eu, pulvinar at, mollis ac, nulla.   Cur-               tum wisi.  Aenean placerat.  Ut imperdiet, enim sed
abitur auctor semper nulla.  Donec varius orci eget               gravida sollicitudin, felis odio placerat quam, ac pul-
risus.  Duis nibh mi, congue eu, accumsan eleifend,               vinar elit purus eget enim. Nunc vitae tortor. Proin
sagittis quis, diam. Duis eget orci sit amet orci dig-            tempus nibh sit amet nisl. Vivamus quis tortor vitae
nissim rutrum.                                                    risus porta vehicula.
  Nam dui ligula, fringilla a, euismod sodales, sollic-
itudin vel, wisi. Morbi auctor lorem non justo. Nam                  Fusce mauris.  Vestibulum luctus nibh at lectus.
lacus libero, pretium at, lobortis vitae, ultricies et,           Sed bibendum, nulla a faucibus semper, leo velit ul-
tellus.   Donec aliquet, tortor sed accumsan biben-               tricies tellus, ac venenatis arcu wisi vel nisl. Vestibu-
dum,  erat ligula aliquet magna,  vitae ornare odio               lum diam. Aliquam pellentesque, augue quis sagittis
metus a mi.  Morbi ac orci et nisl hendrerit mollis.              posuere, turpis lacus congue quam, in hendrerit risus
Suspendisse ut massa.  Cras nec ante.  Pellentesque               eros eget felis.  Maecenas eget erat in sapien mattis
a nulla. Cum sociis natoque penatibus et magnis dis               porttitor.  Vestibulum porttitor.  Nulla facilisi.  Sed
parturient montes, nascetur ridiculus mus. Aliquam                a turpis eu lacus commodo facilisis.  Morbi fringilla,
tincidunturna. Nullaullamcorpervestibulumturpis.                  wisi in dignissim interdum, justo lectus sagittis dui,
Pellentesque cursus luctus mauris.                                et vehicula libero dui cursus dui.   Mauris tempor
  Nulla malesuada porttitor diam. Donec felis erat,               ligula sed lacus.  Duis cursus enim ut augue.  Cras
congue non, volutpat at, tincidunt tristique, libero.             ac magna.  Cras nulla.  Nulla egestas.  Curabitur a
Vivamus viverra fermentum felis.  Donec nonummy                   leo.   Quisque egestas wisi eget nunc.   Nam feugiat


                                                                1

================================================
FILE: resources/toy.layout.txt
================================================
AWAY again1
   AWAY again2


    Something[cited]

              Single quote operator
              Double quote operator
              Last Txt

================================================
FILE: tests/__init__.py
================================================
import concurrent.futures
import os
import ssl
import sys
import urllib.request
from pathlib import Path
from typing import Optional
from urllib.error import HTTPError

if sys.version_info >= (3, 11):
    from typing import Self
else:
    from typing_extensions import Self

import yaml

TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCE_ROOT = PROJECT_ROOT / "resources"
SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files"


def _get_data_from_url(url: str) -> bytes:
    ssl._create_default_https_context = ssl._create_unverified_context
    attempts = 0
    while attempts < 3:
        try:
            with urllib.request.urlopen(  # noqa: S310
                    url
            ) as response:
                return response.read()
        except HTTPError as e:
            if attempts < 3:
                attempts += 1
            else:
                raise e
    raise ValueError(f"Unknown error handling {url}")


# TODO: Make keyword-only and drop name being optional.
def get_data_from_url(url: Optional[str] = None, name: Optional[str] = None) -> bytes:
    """
    Download a File from a URL and return its contents.

    This function makes sure the PDF is not downloaded too often.
    This function is a last resort for PDF files where we are uncertain if
    we may add it for testing purposes to https://github.com/py-pdf/sample-files

    Args:
        url: location of the PDF file
        name: unique name across all files

    Returns:
        Read File as bytes

    """
    if name is None:
        raise ValueError("A name must always be specified")

    if os.getenv("GITHUB_JOB", None) is not None:
        cache_dir = Path("tests", "pdf_cache").resolve()
    else:
        cache_dir = Path(__file__).parent / "pdf_cache"
    if not cache_dir.exists():
        cache_dir.mkdir()
    cache_path = cache_dir / name

    if url is not None:
        if url.startswith("file://"):
            path = Path(url[7:].replace("\\", "/"))
            return path.read_bytes()
        if not cache_path.exists():
            cache_path.write_bytes(_get_data_from_url(url))
    return cache_path.read_bytes()


def _strip_position(line: str) -> str:
    """
    Remove the location information.

    The message
        WARNING  pypdf._reader:_utils.py:364 Xref table not zero-indexed.

    becomes
        Xref table not zero-indexed.

    Args:
        line: the original line

    Returns:
        A line with stripped position

    """
    line = ".py".join(line.split(".py:")[1:])
    return " ".join(line.split(" ")[1:])


def normalize_warnings(caplog_text: str) -> list[str]:
    return [_strip_position(line) for line in caplog_text.strip().split("\n")]


def is_sublist(child_list, parent_list):
    """
    Check if child_list is a sublist of parent_list, with respect to
    * elements order
    * elements repetition

    Elements are compared using `==`
    """
    if len(child_list) == 0:
        return True
    if len(parent_list) == 0:
        return False
    if parent_list[0] == child_list[0]:
        return is_sublist(child_list[1:], parent_list[1:])
    return is_sublist(child_list, parent_list[1:])


def read_yaml_to_list_of_dicts(yaml_file: Path) -> list[dict[str, str]]:
    with open(yaml_file) as yaml_input:
        return yaml.safe_load(yaml_input)


def download_test_pdfs():
    """
    Run this before the tests are executed to ensure you have everything locally.

    This is especially important to avoid pytest timeouts.
    """
    pdfs = read_yaml_to_list_of_dicts(Path(__file__).parent / "example_files.yaml")

    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
        futures = [
            executor.submit(get_data_from_url, pdf["url"], name=pdf["local_filename"])
            for pdf in pdfs
        ]
        concurrent.futures.wait(futures)


class PILContext:
    """Allow changing the PIL/Pillow configuration for some limited scope."""

    def __init__(self) -> None:
        self._saved_load_truncated_images = False

    def __enter__(self) -> Self:
        # Allow loading incomplete images.
        from PIL import ImageFile  # noqa: PLC0415
        self._saved_load_truncated_images = ImageFile.LOAD_TRUNCATED_IMAGES
        ImageFile.LOAD_TRUNCATED_IMAGES = True
        return self

    def __exit__(self, type_, value, traceback) -> Optional[bool]:
        from PIL import ImageFile  # noqa: PLC0415
        ImageFile.LOAD_TRUNCATED_IMAGES = self._saved_load_truncated_images
        if type_:
            # Error.
            return None
        return True


================================================
FILE: tests/bench.py
================================================
"""
Benchmark the speed of pypdf.

The results are on https://py-pdf.github.io/pypdf/dev/bench/
Please keep in mind that the variance is high.
"""
from io import BytesIO
from tempfile import NamedTemporaryFile

import pytest

import pypdf
from pypdf import PageObject, PdfReader, PdfWriter, Transformation
from pypdf.generic import Destination, read_string_from_stream

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url


def page_ops(pdf_path, password):
    pdf_path = RESOURCE_ROOT / pdf_path

    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    if password:
        reader.decrypt(password)

    page = reader.pages[0]
    page = writer.add_page(page)

    op = Transformation().rotate(90).scale(1.2)
    page.add_transformation(op)
    page.merge_page(page)

    op = Transformation().scale(1).translate(tx=1, ty=1)
    page.add_transformation(op)
    page.merge_page(page)

    op = Transformation().rotate(90).scale(1).translate(tx=1, ty=1)
    page.add_transformation(op)
    page.merge_page(page)

    page.add_transformation((1, 0, 0, 0, 0, 0))
    page.scale(2, 2)
    page.scale_by(0.5)
    page.scale_to(100, 100)

    page = writer.pages[0]
    page.compress_content_streams()
    page.extract_text()


def test_page_operations(benchmark):
    """
    Apply various page operations.

    Rotation, scaling, translation, content stream compression, text extraction
    """
    benchmark(page_ops, "libreoffice-writer-password.pdf", "openpassword")


def merge():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
    pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
    pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

    writer = PdfWriter()

    # string path:
    writer.append(pdf_path)
    writer.append(outline)
    writer.append(pdf_path, pages=pypdf.pagerange.PageRange(slice(0, 0)))
    writer.append(pdf_forms)

    # Merging an encrypted file
    reader = PdfReader(pdf_pw)
    reader.decrypt("openpassword")
    writer.append(reader)

    # PdfReader object:
    writer.append(PdfReader(pdf_path), outline_item="True")

    # File handle
    with open(pdf_path, "rb") as fh:
        writer.append(fh)

    outline_item = writer.add_outline_item("An outline item", 0)
    writer.add_outline_item("deeper", 0, parent=outline_item)
    writer.add_metadata({"/Author": "Martin Thoma"})
    writer.add_named_destination("title", 0)
    writer.set_page_layout("/SinglePage")
    writer.page_mode = "/UseThumbs"

    with NamedTemporaryFile(suffix=".pdf") as target_file:
        write_path = target_file.name
        writer.write(write_path)
        writer.close()

        # Check if outline is correct
        reader = PdfReader(write_path)
        assert [
            el.title for el in reader.outline if isinstance(el, Destination)
        ] == [
            "Foo",
            "Bar",
            "Baz",
            "Foo",
            "Bar",
            "Baz",
            "Foo",
            "Bar",
            "Baz",
            "True",
            "An outline item",
        ]


def test_merge(benchmark):
    """
    Apply various page operations.

    Rotation, scaling, translation, content stream compression, text extraction
    """
    benchmark(merge)


def text_extraction(pdf_path):
    with open(pdf_path, mode="rb") as fd:
        reader = PdfReader(fd)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text


def test_text_extraction(benchmark):
    file_path = SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
    benchmark(text_extraction, file_path)


def read_string_from_stream_performance():
    stream = BytesIO(b"(" + b"".join([b"x"] * 1024 * 256) + b")")
    assert read_string_from_stream(stream)


def test_read_string_from_stream_performance(benchmark):
    """
    This test simulates reading an embedded base64 image of 256kb.
    It should be faster than a second, even on ancient machines.

    Runs < 100ms on a 2019 notebook. Takes 10 seconds prior to #1350.
    """
    benchmark(read_string_from_stream_performance)


def image_new_property(data):
    reader = PdfReader(data)
    assert reader.pages[0].images.keys() == [
        "/I0",
        "/I1",
        "/I2",
        "/I3",
        "/I4",
        "/I5",
        "/I6",
        "/I7",
        "/I8",
        "/I9",
        ["/TPL1", "/Image5"],
        ["/TPL2", "/Image53"],
        ["/TPL2", "/Image37"],
        ["/TPL2", "/Image49"],
        ["/TPL2", "/Image51"],
        ["/TPL2", "/Image39"],
        ["/TPL2", "/Image57"],
        ["/TPL2", "/Image55"],
        ["/TPL2", "/Image43"],
        ["/TPL2", "/Image30"],
        ["/TPL2", "/Image22"],
        ["/TPL2", "/Image41"],
        ["/TPL2", "/Image47"],
        ["/TPL2", "/Image45"],
        ["/TPL3", "/Image65"],
        ["/TPL3", "/Image30"],
        ["/TPL3", "/Image61"],
        ["/TPL4", "/Image30"],
        ["/TPL5", "/Image30"],
        ["/TPL6", "/Image30"],
        ["/TPL7", "/Image30"],
        ["/TPL8", "/Image30"],
        ["/TPL9", "/Image30"],
        ["/TPL10", "/Image30"],
        ["/TPL11", "/Image30"],
        ["/TPL12", "/Image30"],
    ]
    assert len(reader.pages[0].images.items()) == 36
    assert reader.pages[0].images[0].name == "I0.png"
    assert len(reader.pages[0].images[-1].data) > 10000
    assert reader.pages[0].images["/TPL1", "/Image5"].image.format == "JPEG"
    assert (
        reader.pages[0].images["/I0"].indirect_reference.get_object()
        == reader.pages[0]["/Resources"]["/XObject"]["/I0"]
    )
    list(reader.pages[0].images[0:2])
    with pytest.raises(TypeError):
        reader.pages[0].images[b"0"]
    with pytest.raises(IndexError):
        reader.pages[0].images[9999]
    # just for test coverage:
    with pytest.raises(KeyError):
        reader.pages[0]._get_image(["test"], reader.pages[0])
    assert list(PageObject(None, None).images) == []


@pytest.mark.enable_socket
def test_image_new_property_performance(benchmark):
    url = "https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf"
    name = "pdf_font_garbled.pdf"
    data = BytesIO(get_data_from_url(url, name=name))

    benchmark(image_new_property, data)


def image_extraction(data):
    reader = PdfReader(data)
    list(reader.pages[0].images)


@pytest.mark.enable_socket
def test_large_compressed_image_performance(benchmark):
    url = "https://github.com/py-pdf/pypdf/files/15306199/file_with_large_compressed_image.pdf"
    data = BytesIO(get_data_from_url(url, name="file_with_large_compressed_image.pdf"))
    benchmark(image_extraction, data)


================================================
FILE: tests/conftest.py
================================================
"""Fixtures that are available automatically for all tests."""

import uuid

import pytest


@pytest.fixture(scope="session")
def pdf_file_path(tmp_path_factory):
    return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.pdf"


@pytest.fixture(scope="session")
def txt_file_path(tmp_path_factory):
    return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt"


================================================
FILE: tests/example_files.yaml
================================================
- local_filename: 2201.00214.pdf
  url: https://arxiv.org/pdf/2201.00214.pdf
- local_filename: ASurveyofImageClassificationBasedTechniques.pdf
  url: https://raw.githubusercontent.com/xyegithub/myBlog/12127c712ac2008782616c743224b187a4069477/posts/c94b2364/paper_pdfs/ImageClassification/2007%2CASurveyofImageClassificationBasedTechniques.pdf
- local_filename: Giacalone.pdf
  url: https://github.com/yxj-HGNwmb5kdp8ewr/yxj-HGNwmb5kdp8ewr.github.io/raw/master/files/Giacalone%20Llobell%20Jaeger%20(2022)%20Food%20Qual%20Prefer.pdf
- local_filename: iss1718.pdf
  url: https://github.com/py-pdf/pypdf/files/10983477/Ballinasloe_WS.pdf
- local_filename: iss2077.pdf
  url: https://github.com/py-pdf/pypdf/files/12309492/example_134.pdf
- local_filename: pdf_font_garbled.pdf
  url: https://github.com/py-pdf/pypdf/files/11219022/pdf_font_garbled.pdf
- local_filename: The%20lean%20times%20in%20the%20Peruvian%20economy.pdf
  url: https://github.com/alexanderquispe/1REI05/raw/main/reports/report_1/The%20lean%20times%20in%20the%20Peruvian%20economy.pdf
- local_filename: tika-908104.pdf
  url: https://github.com/user-attachments/files/18382273/tika-908104.pdf
- local_filename: tika-923406.pdf
  url: https://github.com/user-attachments/files/18382274/tika-923406.pdf
- local_filename: tika-955562.pdf
  url: https://github.com/user-attachments/files/18382288/tika-955562.pdf
- local_filename: tika-959173.pdf
  url: https://github.com/user-attachments/files/18382295/tika-959173.pdf
- local_filename: waarom-meisjes-het-beter-doen-op-HAVO-en-VWO-ROA.pdf
  url: https://github.com/py-pdf/pypdf/files/10773829/waarom-meisjes-het-beter-doen-op-HAVO-en-VWO-ROA.pdf
- local_filename: tika-957144.pdf
  url: https://github.com/user-attachments/files/18382302/tika-957144.pdf
- local_filename: ascii charset.pdf
  url: https://github.com/py-pdf/pypdf/files/9472500/main.pdf
- local_filename: cmap1370.pdf
  url: https://github.com/py-pdf/pypdf/files/9667138/cmap1370.pdf
- local_filename: 02voc.pdf
  url: https://github.com/py-pdf/pypdf/files/9712729/02voc.pdf
- local_filename: iss1533.pdf
  url: https://github.com/py-pdf/pypdf/files/10376149/iss1533.pdf
- local_filename: tstUCS2.pdf
  url: https://github.com/py-pdf/pypdf/files/11190189/pdf_font_garbled.pdf
- local_filename: tst-GBK_EUC.pdf
  url: https://github.com/py-pdf/pypdf/files/11315397/3.pdf
- local_filename: math_latex.pdf
  url: https://github.com/py-pdf/pypdf/files/12163370/math-in-text-created-via-latex.pdf
- local_filename: unixxx_glyphs.pdf
  url: https://arxiv.org/pdf/2201.00021.pdf
- local_filename: TextAttack_paper.pdf
  url: https://arxiv.org/pdf/2005.05909.pdf
- local_filename: iss2173.pdf
  url: https://github.com/py-pdf/pypdf/files/12552700/tt.pdf
- local_filename: iss2290.pdf
  url: https://github.com/py-pdf/pypdf/files/13452885/example.pdf
- local_filename: NewJersey.pdf
  url: https://github.com/py-pdf/pypdf/files/12090692/New.Jersey.Coinbase.staking.securities.charges.2023-0606_Coinbase-Penalty-and-C-D.pdf
- local_filename: tika-952445.pdf
  url: https://github.com/user-attachments/files/18382348/tika-952445.pdf
- local_filename: tika-921632.pdf
  url: https://github.com/user-attachments/files/18382354/tika-921632.pdf
- local_filename: tika-976970.pdf
  url: https://github.com/user-attachments/files/18382397/tika-976970.pdf
- local_filename: tika-914102.pdf
  url: https://github.com/user-attachments/files/18381687/tika-914102.pdf
- local_filename: iss1737.pdf
  url: https://github.com/py-pdf/pypdf/files/11068604/tt1.pdf
- local_filename: issue-1801.pdf
  url: https://github.com/py-pdf/pypdf/files/11250359/test_img.pdf
- local_filename: tika-924546.pdf
  url: https://github.com/user-attachments/files/18381697/tika-924546.pdf
- local_filename: issue-1801.png
  url: https://user-images.githubusercontent.com/1658117/232842886-9d1b0726-3a5b-430d-8464-595d919c266c.png
- local_filename: grimm10
  url: https://github.com/py-pdf/pypdf/files/11336817/grimm10.pdf
- local_filename: labeled-edges-center-image.png
  url: https://user-images.githubusercontent.com/4083478/236685544-a1940b06-fb42-4bb1-b589-1e4ad429d68e.png
- local_filename: watermark1.png
  url: https://user-images.githubusercontent.com/4083478/236793172-09340aef-3440-4c8a-af85-a91cdad27d46.png
- local_filename: tika-977609.pdf
  url: https://github.com/user-attachments/files/18381754/tika-977609.pdf
- local_filename: tifimage.png
  url: https://user-images.githubusercontent.com/4083478/236793166-288b4b59-dee3-49fd-a04e-410aab06199a.png
- local_filename: tika-972174.pdf
  url: https://github.com/user-attachments/files/18381744/tika-972174.pdf
- local_filename: tika-972174_p0-im0.png
  url: https://user-images.githubusercontent.com/4083478/238288207-b77dd38c-34b4-4f4f-810a-bf9db7ca0414.png
- local_filename: Vitocal.pdf
  url: https://github.com/py-pdf/pypdf/files/11962229/DB-5368770_Vitocal_200-G.pdf
- local_filename: VitocalImage.png
  url: https://user-images.githubusercontent.com/4083478/251283945-38c5b92c-cf94-473c-bb57-a51b74fc39be.jpg
- local_filename: cmyk_deflate.pdf
  url: https://github.com/py-pdf/pypdf/files/12078533/cmyk2.pdf
- local_filename: cmyk_deflate.tif
  url: https://github.com/py-pdf/pypdf/files/12078556/cmyk.tif.txt
- local_filename: o1whh9b3.pdf
  url: https://github.com/py-pdf/pypdf/files/11578953/USC.EMBA.-.Pre-Season.and.Theme.I.pdf
- local_filename: selbst.72916.pdf
  url: https://github.com/py-pdf/pypdf/files/14395695/selbst.72916.pdf
- local_filename: iss1912.pdf
  url: https://github.com/py-pdf/pypdf/files/11845099/GeoTopo-komprimiert.pdf
- local_filename: calRGB.pdf
  url: https://github.com/py-pdf/pypdf/files/12061061/tt.pdf
- local_filename: 2023USDC.pdf
  url: https://github.com/py-pdf/pypdf/files/12090523/2023.USDC_Circle.Examination.Report.May.2023.pdf
- local_filename: iss1982_im1.png
  url: https://github.com/py-pdf/pypdf/files/12144094/im1.png.txt
- local_filename: iss1982_im2.png
  url: https://github.com/py-pdf/pypdf/files/12144093/im2.png.txt
- local_filename: usa.png
  url: https://github.com/py-pdf/pypdf/assets/4083478/56c93021-33cd-4387-ae13-5cbe7e673f42
- local_filename: paid.pdf
  url: https://github.com/py-pdf/pypdf/files/12050253/tt.pdf
- local_filename: Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
  url: https://www.joinville.sc.gov.br/wp-content/uploads/2023/11/Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf
- local_filename: iss2138.pdf
  url: https://github.com/py-pdf/pypdf/files/12483807/AEO.1172.pdf
- local_filename: iss3268.pdf
  url: https://github.com/user-attachments/files/20060394/broken.pdf
- local_filename: direct-link.pdf
  url: https://github.com/user-attachments/files/20348304/tst.pdf
- local_filename: named-reference.pdf
  url: https://github.com/user-attachments/files/20455804/MinimalJob.pdf
- local_filename: large_lzw_example_encoded.dat
  url: https://github.com/user-attachments/files/20923310/large_lzw_example_encoded.dat.txt
- local_filename: issue-3419.pdf
  url: https://github.com/user-attachments/files/21578875/layout-parser-paper-with-empty-pages.pdf
- local_filename: issue-3429.pdf
  url: https://github.com/user-attachments/files/21711469/bomb.pdf
- local_filename: issue-3508.pdf
  url: https://github.com/user-attachments/files/23211824/repair-manual-thermo-230-300-350-2012-en.pdf
- local_filename: issue-3553.pdf
  url: https://github.com/user-attachments/files/23996861/ATOLCertificate.pdf
- local_filename: issue-3633.pdf
  url: https://github.com/user-attachments/files/25212719/minimal_signature.pdf


================================================
FILE: tests/generic/__init__.py
================================================


================================================
FILE: tests/generic/test_base.py
================================================
"""Test the pypdf.generic._base module."""
from io import BytesIO

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.generic import read_hex_string_from_stream
from tests import get_data_from_url


@pytest.mark.parametrize(
    ("source", "expected"),
    [
        (b"<00FE00FF>", "\xfe\xff"),
        (b"<00FE00FF00D6>", "\xfe\xff\xd6"),
    ]
)
def test_text_string_object__looks_like_bom(source: bytes, expected: str) -> None:
    stream = BytesIO(source)
    result = read_hex_string_from_stream(stream)
    assert result == expected


@pytest.mark.enable_socket
def test_text_string_object__wrongly_detected_bom():
    url = "https://github.com/user-attachments/files/24401507/minimal.pdf"
    name = "issue3587.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader_page = reader.pages[0]

    writer = PdfWriter()
    for page in reader.pages:
        writer_page = writer.add_blank_page(reader_page.mediabox.width, reader_page.mediabox.height)
        writer_page.merge_page(page)

        assert writer_page.extract_text() == (
            "无译形带 r的参 z慧队手行 c要枪互工先调 uC一在你 k该方导最 xT况 M味政没出 v大同团\n"
            "想急压游这体构主 m基重张预另做内已织程术并 U种规被中应 s过小立就公测和 F更为 BS\n"
            "把强型 w利 qfJ现能您关文）己个言 VW是 Z亲社 y。说准密令 K络通自力 i诸旦明量放及 I\n"
            "成战康养 d都蜂多开 pE次提朋动比台有培愿 A确 l充计标去人如么 b灵 N它 g弃语看 X；j\n"
            "轮 HG采共由地友入（器 Y果感建切理情从集德翻 a单第识任 Q模 eh目经相哪受起时着 DR\n"
            "用好 o备划付信、度解效作协读 O讨高具击始者意群治扩到 P才兰网认 t马倒来本整 L们 n\n"
            "系可论，步各之但\n"
            "12"
        )


================================================
FILE: tests/generic/test_data_structures.py
================================================
"""Test the pypdf.generic._data_structures module."""
import os
import subprocess
import sys
from io import BytesIO
from pathlib import Path
from typing import Callable

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.errors import LimitReachedError
from pypdf.generic import (
    ArrayObject,
    ContentStream,
    DictionaryObject,
    NameObject,
    NullObject,
    RectangleObject,
    StreamObject,
    TreeObject,
)
from tests import RESOURCE_ROOT, get_data_from_url

try:
    import resource
except ImportError:
    resource = None


def test_dictionary_object__get_next_object_position():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")

    # reader.xref = {0: {7: 15, 9: 10245, 12: 939, 14: 2999, 16: 4982, 18: 9949, 22: 11160}}
    assert DictionaryObject._get_next_object_position(
        position_before=12345, position_end=999999, generations=list(reader.xref), pdf=reader
    ) == 999999  # No value after 12345 in dictionary
    assert DictionaryObject._get_next_object_position(
        position_before=11111, position_end=999999, generations=list(reader.xref), pdf=reader
    ) == 11160  # First value after 11111 in dictionary.
    assert DictionaryObject._get_next_object_position(
        position_before=42, position_end=999999, generations=list(reader.xref), pdf=reader
    ) == 939  # First value after 42 in dictionary.

    # New generation.
    reader.xref[1] = {7: 42, 24: 15000}
    assert DictionaryObject._get_next_object_position(
        position_before=10, position_end=999999, generations=list(reader.xref), pdf=reader
    ) == 15


def test_tree_object__cyclic_reference(caplog):
    writer = PdfWriter()
    child1 = writer._add_object(DictionaryObject())
    child2 = writer._add_object(DictionaryObject({NameObject("/Next"): child1}))
    child3 = writer._add_object(DictionaryObject({NameObject("/Next"): child2}))
    child1.get_object()[NameObject("/Next")] = child3
    tree = TreeObject()
    tree[NameObject("/First")] = child2
    tree[NameObject("/Last")] = writer._add_object(DictionaryObject())

    assert list(tree.children()) == [child2.get_object(), child1.get_object(), child3.get_object()]
    assert "Detected cycle in outline structure for " in caplog.text


@pytest.mark.enable_socket
def test_array_object__clone_same_object_multiple_times(caplog):
    url = "https://github.com/user-attachments/files/25412858/Draft_OSMF_financial_statement_2013.pdf"
    name = "issue2991.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    writer = PdfWriter()
    for page in reader.pages:
        page2 = writer.add_page(page)
        assert page2.mediabox == RectangleObject((0, 0, 595, 841))
    assert caplog.messages == []


def test_array_object__clone_same_stream_multiple_times():
    writer = PdfWriter()

    # Unique streams.
    stream1 = StreamObject()
    stream1.set_data(b"Hello World!")
    stream2 = StreamObject()
    stream2.set_data(b"Lorem ipsum!")

    # Shared streams.
    shared_streams = [StreamObject() for _ in range(3)]
    [shared_stream.set_data(f"Shared stream {index}".encode()) for index, shared_stream in enumerate(shared_streams)]

    # Add to writer.
    writer._add_object(stream1)
    writer._add_object(stream2)
    shared_references = [writer._add_object(shared_stream) for shared_stream in shared_streams]

    # Arrays.
    array1 = ArrayObject([stream1.indirect_reference, *shared_references])
    array2 = ArrayObject([stream2.indirect_reference, *shared_references])

    # Cloned.
    cloned1 = array1.clone(pdf_dest=writer)
    cloned2 = array2.clone(pdf_dest=writer)

    # Nullify one shared object.
    writer._replace_object(shared_references[1].indirect_reference, NullObject())

    # The first entry is always different. The remaining shared entries should be dedicated copies.
    assert cloned1[1:] != cloned2[1:]

    assert ContentStream(stream=array1, pdf=None).get_data() == b"Hello World!\nShared stream 0\nShared stream 2\n"
    assert ContentStream(stream=array2, pdf=None).get_data() == b"Lorem ipsum!\nShared stream 0\nShared stream 2\n"
    assert (
        ContentStream(stream=cloned1, pdf=None).get_data() ==
        b"Hello World!\nShared stream 0\nShared stream 1\nShared stream 2\n"
    )
    assert (
        ContentStream(stream=cloned2, pdf=None).get_data() ==
        b"Lorem ipsum!\nShared stream 0\nShared stream 1\nShared stream 2\n"
    )


@pytest.mark.enable_socket
def test_dictionary_object__read_from_stream__limit():
    name = "read_from_stream__length_2gb.pdf"
    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))
    page = reader.pages[0]

    with pytest.raises(
            expected_exception=LimitReachedError,
            match=r"^Declared stream length of 2147483647 exceeds maximum allowed length\.$"
    ):
        page.extract_text()


def _prepare_test_dictionary_object__read_from_stream__no_limit(
        path: Path
) -> tuple[str, dict[str, str], Callable[[], None]]:
    env = os.environ.copy()
    env["COVERAGE_PROCESS_START"] = "pyproject.toml"

    name = "read_from_stream__length_2gb.pdf"
    url = "https://github.com/user-attachments/files/25842437/read_from_stream__length_2gb.pdf"
    data = get_data_from_url(url=url, name=name)
    pdf_path = path / name
    pdf_path.write_bytes(data)
    pdf_path_str = pdf_path.resolve().as_posix()

    try:
        env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"]
    except KeyError:
        env["PYTHONPATH"] = "."

    def limit_virtual_memory() -> None:
        limit_kb = 1_000_000
        limit_bytes = limit_kb * 1024
        resource.setrlimit(resource.RLIMIT_AS, (limit_bytes, limit_bytes))

    return pdf_path_str, env, limit_virtual_memory


@pytest.mark.enable_socket
@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
def test_dictionary_object__read_from_stream__no_limit(tmp_path):
    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)

    source_file = tmp_path / "script.py"
    source_file.write_text(
        f"""
import sys
from pypdf import filters, PdfReader

filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize

with open({pdf_path_str!r}, mode="rb") as fd:
    reader = PdfReader(fd)
    print(reader.pages[0].extract_text())
"""
    )

    result = subprocess.run(  # noqa: S603  # We have the control here.
        [sys.executable, source_file],
        capture_output=True,
        env=env,
        text=True,
        preexec_fn=limit_virtual_memory,
    )
    assert result.returncode == 1
    assert result.stdout == ""
    assert result.stderr.replace("\r", "").endswith("\nMemoryError\n")


@pytest.mark.enable_socket
@pytest.mark.skipif(condition=resource is None, reason="Does not have 'resource' module.")
@pytest.mark.skipif(sys.platform == "darwin", reason="RLIMIT_AS is unreliable.")
def test_dictionary_object__read_from_stream__no_limit__path(tmp_path):
    pdf_path_str, env, limit_virtual_memory = _prepare_test_dictionary_object__read_from_stream__no_limit(tmp_path)

    source_file = tmp_path / "script.py"
    source_file.write_text(
        f"""
import sys
from pypdf import filters, PdfReader

filters.MAX_DECLARED_STREAM_LENGTH = sys.maxsize

reader = PdfReader({pdf_path_str!r})
print(reader.pages[0].extract_text())
"""
    )

    result = subprocess.run(  # noqa: S603  # We have the control here.
        [sys.executable, source_file],
        capture_output=True,
        env=env,
        text=True,
        preexec_fn=limit_virtual_memory,
    )
    assert result.returncode == 0
    assert result.stdout.replace("\r", "") == "Hello from pypdf\n"
    assert result.stderr == ""


def _get_array_based_buffer(stream_count: int, chunk_bytes: int) -> BytesIO:
    writer = PdfWriter()
    page = writer.add_blank_page(width=10, height=10)

    streams = [ContentStream(stream=None, pdf=writer) for _ in range(stream_count)]
    chunk = b"q\n" + (b"A" * chunk_bytes) + b"\nQ\n"
    [stream.set_data(chunk) for stream in streams]
    contents = ArrayObject([writer._add_object(stream) for stream in streams])
    page[NameObject("/Contents")] = contents

    buffer = BytesIO()
    writer.write(buffer)
    buffer.flush()
    return buffer


@pytest.mark.timeout(10)
def test_content_stream__array_based__performance():
    buffer = _get_array_based_buffer(stream_count=10_000, chunk_bytes=7000)
    reader = PdfReader(buffer)
    _ = reader.pages[0].get_contents()


def test_content_stream__array_based__length():
    buffer = _get_array_based_buffer(stream_count=11_000, chunk_bytes=1)
    reader = PdfReader(buffer)
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Array\-based stream has 11000 > 10000 elements\.$"
    ):
        _ = reader.pages[0].get_contents()


@pytest.mark.timeout(10)
def test_content_stream__array_based__output_length():
    buffer = _get_array_based_buffer(stream_count=10_000, chunk_bytes=8192)
    reader = PdfReader(buffer)
    with pytest.raises(
            expected_exception=LimitReachedError,
            match=r"^Array\-based stream has at least 75003501 > 75000000 output bytes\.$"
    ):
        _ = reader.pages[0].get_contents()


================================================
FILE: tests/generic/test_files.py
================================================
"""Test the pypdf.generic._files module."""
import datetime
import shutil
import subprocess
from io import BytesIO

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.constants import AFRelationship
from pypdf.errors import PdfReadError, PyPdfError
from pypdf.generic import (
    ArrayObject,
    ByteStringObject,
    DictionaryObject,
    EmbeddedFile,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    TextStringObject,
    create_string_object,
)
from tests import SAMPLE_ROOT, get_data_from_url

PDFATTACH_BINARY = shutil.which("pdfattach")


@pytest.mark.skipif(PDFATTACH_BINARY is None, reason="Requires poppler-utils")
def test_embedded_file__basic(tmpdir):
    clean_path = SAMPLE_ROOT / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf"
    attached_path = tmpdir / "attached.pdf"
    file_path = tmpdir / "test.txt"
    file_path.write_binary(b"Hello World\n")
    subprocess.run([PDFATTACH_BINARY, clean_path, file_path, attached_path])  # noqa: S603
    with PdfReader(str(attached_path)) as reader:
        attachment = next(iter(EmbeddedFile._load(reader.root_object)))

        assert attachment.name == "test.txt"
        assert attachment.alternative_name == "test.txt"
        assert attachment.description is None
        assert attachment.associated_file_relationship == AFRelationship.UNSPECIFIED
        assert attachment.subtype is None
        assert attachment.content == b"Hello World\n"
        assert attachment.size == 12
        assert attachment.creation_date is None
        assert attachment.modification_date is None
        assert attachment.checksum is None
        assert repr(attachment) == "<EmbeddedFile name='test.txt'>"


def test_embedded_file__artificial():
    # No alternative name.
    pdf_object = DictionaryObject(answer=42)
    attachment = EmbeddedFile(name="dummy", pdf_object=pdf_object)
    assert attachment.alternative_name is None

    # No /EF.
    with pytest.raises(PdfReadError, match=f"/EF entry not found: {pdf_object}"):
        _ = attachment._embedded_file

    # Empty /EF dictionary.
    pdf_object = DictionaryObject()
    pdf_object[NameObject("/EF")] = DictionaryObject()
    attachment = EmbeddedFile(name="dummy", pdf_object=pdf_object)
    with pytest.raises(PdfReadError, match=r"No /\(U\)F key found in file dictionary: {}"):
        _ = attachment._embedded_file

    # Missing /Params key.
    pdf_object[NameObject("/EF")] = DictionaryObject()
    pdf_object[NameObject("/EF")][NameObject("/F")] = DictionaryObject(answer=42)
    assert attachment._params == DictionaryObject()

    # An actual checksum is set.
    # Generated using `hashlib.md5(b"Hello World!\n").digest()`
    params = DictionaryObject()
    params[NameObject("/CheckSum")] = ByteStringObject(b"\x8d\xdd\x8b\xe4\xb1y\xa5)\xaf\xa5\xf2\xff\xaeK\x98X")
    pdf_object[NameObject("/EF")][NameObject("/F")][NameObject("/Params")] = params
    assert attachment.checksum == b"\x8d\xdd\x8b\xe4\xb1y\xa5)\xaf\xa5\xf2\xff\xaeK\x98X"


@pytest.mark.enable_socket
def test_embedded_file__kids():
    # Generated using the instructions available from
    # https://medium.com/@pymupdf/zugferd-and-ghostscript-how-to-create-industry-standard-and-compliant-pdf-e-invoices-83c9fde31ee5
    # Notes:
    #   * Yes, we need the full paths. Otherwise, the output file will only have an empty page.
    #   * The XML file has been a custom basic text file.
    #   * The input PDF file has been the `002-trivial-libre-office-writer.pdf` file.
    url = "https://github.com/user-attachments/files/18691309/embedded_files_kids.pdf"
    name = "embedded_files_kids.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    attachments = list(EmbeddedFile._load(reader.root_object))
    assert len(attachments) == 1
    attachment = attachments[0]

    assert attachment.name == "factur-x.xml"
    assert attachment.alternative_name == "factur-x.xml"
    assert attachment.description == "ZUGFeRD electronic invoice"
    assert attachment.associated_file_relationship == AFRelationship.ALTERNATIVE
    assert attachment.subtype == "/text/xml"
    assert attachment.content.startswith(b"Hello World!\n\nLorem ipsum dolor sit amet, ")
    assert attachment.content.endswith(b"\ntakimata sanctus est Lorem ipsum dolor sit amet.\n")
    assert attachment.size == 606
    assert attachment.creation_date is None
    assert attachment.modification_date == datetime.datetime(
        2013, 1, 21, 8, 14, 33, tzinfo=datetime.timezone(datetime.timedelta(hours=1))
    )
    assert attachment.checksum is None
    assert repr(attachment) == "<EmbeddedFile name='factur-x.xml'>"

    # No /Names in /Kids.
    del (
        reader.root_object[NameObject("/Names")][NameObject("/EmbeddedFiles")][NameObject("/Kids")][0]
        .get_object()[NameObject("/Names")]
    )
    attachments = list(EmbeddedFile._load(reader.root_object))
    assert attachments == []


@pytest.mark.enable_socket
def test_embedded_file__ensure_params__existing_params():
    url = "https://github.com/user-attachments/files/18691309/embedded_files_kids.pdf"
    name = "embedded_files_kids.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    attachments = list(EmbeddedFile._load(reader.root_object))
    assert len(attachments) == 1
    attachment = attachments[0]

    assert "/Params" in attachment._embedded_file
    params_dict = attachment._ensure_params

    assert isinstance(params_dict, DictionaryObject)

    assert NameObject("/ModDate") in params_dict

    original_mod_date = params_dict.get(NameObject("/ModDate"))
    params_dict[NameObject("/TestParam")] = TextStringObject("test_value")

    assert params_dict[NameObject("/TestParam")] == TextStringObject("test_value")
    assert params_dict[NameObject("/ModDate")] == original_mod_date

    params_dict2 = attachment._ensure_params
    assert params_dict is params_dict2
    assert params_dict2[NameObject("/TestParam")] == TextStringObject("test_value")


def test_embedded_file__name_is_read_only():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    assert embedded_file.name == "test.txt"

    with pytest.raises(AttributeError):
        embedded_file.name = "new_name.txt"


def test_embedded_file__alternative_name_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.alternative_name = TextStringObject("Alternative Name")
    assert embedded_file.alternative_name == "Alternative Name"

    embedded_file.alternative_name = None
    if NameObject("/UF") in embedded_file.pdf_object:
        assert embedded_file.pdf_object[NameObject("/UF")] == NullObject()
    if NameObject("/F") in embedded_file.pdf_object:
        assert embedded_file.pdf_object[NameObject("/F")] == NullObject()
    assert embedded_file.alternative_name is None

    pdf_string = TextStringObject("PDF String")
    embedded_file.alternative_name = pdf_string
    assert embedded_file.alternative_name == "PDF String"


def test_embedded_file__alternative_name__uf_key_only():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.pdf_object[NameObject("/UF")] = create_string_object("original_uf")
    del embedded_file.pdf_object[NameObject("/F")]

    assert NameObject("/UF") in embedded_file.pdf_object
    assert NameObject("/F") not in embedded_file.pdf_object

    embedded_file.alternative_name = None
    assert embedded_file.pdf_object[NameObject("/UF")] == NullObject()
    assert NameObject("/F") not in embedded_file.pdf_object

    embedded_file.alternative_name = TextStringObject("new_uf")
    assert embedded_file.pdf_object[NameObject("/UF")] == create_string_object("new_uf")
    assert embedded_file.pdf_object[NameObject("/F")] == create_string_object("new_uf")


def test_embedded_file__alternative_name__f_key_only():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.pdf_object[NameObject("/F")] = create_string_object("original_f")
    if NameObject("/UF") in embedded_file.pdf_object:
        del embedded_file.pdf_object[NameObject("/UF")]

    assert NameObject("/F") in embedded_file.pdf_object
    assert NameObject("/UF") not in embedded_file.pdf_object

    embedded_file.alternative_name = None
    assert embedded_file.pdf_object[NameObject("/F")] == NullObject()
    assert NameObject("/UF") not in embedded_file.pdf_object

    embedded_file.alternative_name = TextStringObject("new_f")
    assert embedded_file.pdf_object[NameObject("/F")] == create_string_object("new_f")
    assert embedded_file.pdf_object[NameObject("/UF")] == create_string_object("new_f")


def test_embedded_file__alternative_name__both_f_and_uf():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.pdf_object[NameObject("/F")] = create_string_object("original_f")
    embedded_file.pdf_object[NameObject("/UF")] = create_string_object("original_uf")

    embedded_file.alternative_name = TextStringObject("new_name")
    assert embedded_file.pdf_object[NameObject("/F")] == create_string_object("new_name")
    assert embedded_file.pdf_object[NameObject("/UF")] == create_string_object("new_name")
    assert embedded_file.alternative_name == "new_name"

    embedded_file.alternative_name = None
    assert embedded_file.pdf_object[NameObject("/F")] == NullObject()
    assert embedded_file.pdf_object[NameObject("/UF")] == NullObject()
    assert embedded_file.alternative_name is None


def test_embedded_file__description_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.description = TextStringObject("Test Description")
    assert embedded_file.description == "Test Description"

    embedded_file.description = None
    assert embedded_file.pdf_object[NameObject("/Desc")] == NullObject()

    pdf_string = TextStringObject("PDF Description")
    embedded_file.description = pdf_string
    assert embedded_file.description == "PDF Description"


def test_embedded_file__subtype_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.subtype = NameObject("/text/plain")
    assert embedded_file.subtype == "/text/plain"

    embedded_file.subtype = None
    assert embedded_file._embedded_file[NameObject("/Subtype")] == NullObject()

    name_obj = NameObject("/application#2Fjson")
    embedded_file.subtype = name_obj
    assert embedded_file.subtype == "/application#2Fjson"


def test_embedded_file__content_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")
    assert embedded_file.content == b"content"

    embedded_file.content = b"Hello World!"
    assert embedded_file.content == b"Hello World!"

    embedded_file.content = "Lorem ipsum dolor sit amet"
    assert embedded_file.content == b"Lorem ipsum dolor sit amet"


def test_embedded_file__size_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.size = NumberObject(1024)
    assert embedded_file.size == 1024

    embedded_file.size = None
    assert embedded_file._ensure_params[NameObject("/Size")] == NullObject()

    num_obj = NumberObject(2048)
    embedded_file.size = num_obj
    assert embedded_file.size == 2048


def test_embedded_file__size_getter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file._ensure_params[NameObject("/Size")] = NullObject()
    assert embedded_file.size is None

    embedded_file._ensure_params[NameObject("/Size")] = NumberObject(4096)
    retrieved_size = embedded_file.size
    assert retrieved_size == 4096


def test_embedded_file__creation_date_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    test_date = datetime.datetime(2023, 1, 1, 12, 0, 0)
    embedded_file.creation_date = test_date
    assert embedded_file.creation_date == test_date

    embedded_file.creation_date = None
    assert embedded_file._ensure_params[NameObject("/CreationDate")] == NullObject()


def test_embedded_file__modification_date_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    test_date = datetime.datetime(2023, 1, 2, 12, 0, 0)
    embedded_file.modification_date = test_date
    assert embedded_file.modification_date == test_date

    embedded_file.modification_date = None
    assert embedded_file._ensure_params[NameObject("/ModDate")] == NullObject()


def test_embedded_file__checksum_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    checksum_bytes = ByteStringObject(b"checksum_value")
    embedded_file.checksum = checksum_bytes
    assert embedded_file.checksum == b"checksum_value"

    embedded_file.checksum = None
    assert embedded_file._ensure_params[NameObject("/CheckSum")] == NullObject()

    byte_string = ByteStringObject(b"pdf_checksum")
    embedded_file.checksum = byte_string
    assert embedded_file.checksum == b"pdf_checksum"


def test_embedded_file__associated_file_relationship_setter():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.associated_file_relationship = NameObject("/Data")
    assert embedded_file.associated_file_relationship == "/Data"


def test_embedded_file__setters_integration():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)

    embedded_file = writer.add_attachment("test.txt", b"Hello, World!")
    embedded_file.alternative_name = TextStringObject("Alternative Name")
    embedded_file.description = TextStringObject("Test Description")
    embedded_file.subtype = NameObject("/text/plain")
    embedded_file.size = NumberObject(13)
    creation_date = datetime.datetime(2023, 1, 1, 12, 0, 0)
    embedded_file.creation_date = creation_date
    modification_date = datetime.datetime(2023, 1, 2, 12, 0, 0)
    embedded_file.modification_date = modification_date
    embedded_file.checksum = ByteStringObject(b"checksum123")
    embedded_file.associated_file_relationship = NameObject(AFRelationship.DATA)

    # Make sure that this is an indirect object for PDF/A-3 compliance.
    assert embedded_file.pdf_object.indirect_reference == IndirectObject(6, 0, writer)

    pdf_bytes = BytesIO()
    writer.write(pdf_bytes)

    reader = PdfReader(pdf_bytes)
    assert "test.txt" in reader.attachments


def test_embedded_file__null_object_handling():
    writer = PdfWriter()
    embedded_file = writer.add_attachment("test.txt", b"content")

    embedded_file.alternative_name = TextStringObject("Name")
    embedded_file.description = TextStringObject("Description")
    embedded_file.subtype = NameObject("/text/plain")
    embedded_file.size = NumberObject(1024)
    embedded_file.checksum = ByteStringObject(b"checksum")

    embedded_file.alternative_name = None
    embedded_file.description = None
    embedded_file.subtype = None
    embedded_file.size = None
    embedded_file.checksum = None

    assert embedded_file.alternative_name is None
    assert embedded_file.description is None
    assert embedded_file.subtype is None
    assert embedded_file.size is None
    assert embedded_file.checksum is None


def test_embedded_file__delete_without_parent():
    attachment = EmbeddedFile(name="test.txt", pdf_object=DictionaryObject())
    with pytest.raises(PyPdfError, match=r"^Parent required to delete file from document\.$"):
        attachment.delete()


def test_embedded_file__delete_known():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)
    attachment = writer.add_attachment("test.txt", b"content")
    writer.add_attachment("test2.txt", b"content2")

    attachments = list(writer.attachment_list)
    assert len(attachments) == 2
    attachment.delete()
    with pytest.raises(PdfReadError, match=r"^/EF entry not found: {}$"):
        _ = attachment.content

    attachments = list(writer.attachment_list)
    assert len(attachments) == 1
    assert attachments[0].name == "test2.txt"

    # Delete second time.
    with pytest.raises(PyPdfError, match=r"^File not found in parent object\.$"):
        attachment.delete()


def test_embedded_file__delete__no_indirect_reference():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)

    # Add an attachment and replace the indirect reference in the name tree
    # by the dictionary itself. This is how pypdf <= 6.1.0 would embed files
    # and thus should be supported as well.
    embedded_file = writer.add_attachment("test.txt", b"Hello, World!")
    assert embedded_file.pdf_object.indirect_reference == IndirectObject(6, 0, writer)
    embedded_file._parent[-1] = embedded_file.pdf_object.get_object()

    embedded_file.delete()
    attachments = list(writer.attachment_list)
    assert len(attachments) == 0


@pytest.mark.enable_socket
def test_embedded_file__create__kids_based_name_tree():
    """Test for issue #3473."""
    url = "https://github.com/user-attachments/files/18691309/embedded_files_kids.pdf"
    name = "embedded_files_kids.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))

    writer.add_attachment("test.pdf", b"content")

    assert dict(writer.attachments) == {
        "factur-x.xml": [
            (
                b"Hello World!\n\nLorem ipsum dolor sit amet, consetetur sad"
                b"ipscing elitr, sed diam nonumy eirmod tempor\ninvidunt ut"
                b" labore et dolore magna aliquyam erat, sed diam voluptua"
                b". At vero eos et accusam\net justo duo dolores et ea rebu"
                b"m. Stet clita kasd gubergren, no sea takimata sanctus es"
                b"t Lorem\nipsum dolor sit amet. Lorem ipsum dolor sit amet"
                b", consetetur sadipscing elitr, sed diam\nnonumy eirmod te"
                b"mpor invidunt ut labore et dolore magna aliquyam erat, s"
                b"ed diam voluptua.\nAt vero eos et accusam et justo duo do"
                b"lores et ea rebum. Stet clita kasd gubergren, no sea\ntak"
                b"imata sanctus est Lorem ipsum dolor sit amet.\n"
            )
        ],
        "test.pdf": [b"content"]
    }

    attachments = list(writer.attachment_list)
    assert len(attachments) == 2
    assert writer.root_object["/Names"]["/EmbeddedFiles"]["/Names"] == [
        "factur-x.xml", attachments[0].pdf_object.indirect_reference,
        "test.pdf", attachments[1].pdf_object.indirect_reference,
    ]


def test_embedded_file__create__neither_kids_nor_names():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)

    # Add an attachment and remove the corresponding /Names key.
    writer.add_attachment("test.txt", b"Hello, World!")
    del writer.root_object["/Names"]["/EmbeddedFiles"]["/Names"]

    with pytest.raises(expected_exception=PdfReadError, match=r"^Got neither Names nor Kids in embedded files tree\.$"):
        writer.add_attachment("test2.txt", b"content2")


def test_embedded_file__get_insertion_index():
    # Empty list.
    assert EmbeddedFile._get_insertion_index(ArrayObject(), "test.txt") == 0

    # One mismatching entry.
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("dummy.txt"), NullObject()]),
        "test.txt"
    ) == 2
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("xxx.txt"), NullObject()]),
        "test.txt"
    ) == 0

    # Multiple entries.
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("dummy.txt"), NullObject(), TextStringObject("xxx.txt"), NullObject()]),
        "test.txt"
    ) == 2
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("xxx.txt"), NullObject(), TextStringObject("yyy.txt"), NullObject()]),
        "test.txt"
    ) == 0
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("aaa.txt"), NullObject(), TextStringObject("bbb.txt"), NullObject()]),
        "test.txt"
    ) == 4
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([
            TextStringObject("aaa.txt"), NullObject(),
            TextStringObject("test.txt"), NullObject(),
            TextStringObject("zzz.txt"), NullObject()
        ]),
        "test.txt"
    ) == 4

    # Length.
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("a"), NullObject()]),
        "aa"
    ) == 2
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("a"), NullObject()]),
        "a"
    ) == 2
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("aaa"), NullObject()]),
        "aa"
    ) == 0

    # Special characters.
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("café"), NullObject()]),
        "cafe"
    ) == 0
    assert EmbeddedFile._get_insertion_index(
        ArrayObject([TextStringObject("Tun"), NullObject()]),
        "Tür"
    ) == 2


def test_embedded_file__order():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)

    attachment1 = writer.add_attachment("test.txt", "content")
    attachment2 = writer.add_attachment("abc.txt", "content")
    attachment3 = writer.add_attachment("xyz.txt", "content")
    attachment4 = writer.add_attachment("test.txt", "content2")

    assert dict(writer.attachments) == {
        "abc.txt": [b"content"],
        "test.txt": [b"content", b"content2"],
        "xyz.txt": [b"content"]
    }

    assert writer.root_object["/Names"]["/EmbeddedFiles"]["/Names"] == [
        "abc.txt", attachment2.pdf_object.indirect_reference,
        "test.txt", attachment1.pdf_object.indirect_reference,
        "test.txt", attachment4.pdf_object.indirect_reference,
        "xyz.txt", attachment3.pdf_object.indirect_reference,
    ]


================================================
FILE: tests/generic/test_image_inline.py
================================================
"""Test the pypdf.generic._image_inline module."""
from io import BytesIO

import pytest

from pypdf import PdfReader
from pypdf.errors import PdfReadError
from pypdf.generic._image_inline import is_followed_by_binary_data
from tests import get_data_from_url


def test_is_followed_by_binary_data():
    # Empty/too short stream.
    stream = BytesIO()
    assert not is_followed_by_binary_data(stream)

    stream = BytesIO(b" q\n")
    assert not is_followed_by_binary_data(stream)

    # byte < 32 and no whitespace.
    stream = BytesIO(b"\x00\x11\x13\x37")
    assert is_followed_by_binary_data(stream)
    assert stream.read(1) == b"\x00"
    assert is_followed_by_binary_data(stream)
    assert stream.read(1) == b"\x11"
    assert is_followed_by_binary_data(stream)
    assert stream.read() == b"\x13\x37"

    # byte < 32, but whitespace.
    stream = BytesIO(b" q\n")
    assert not is_followed_by_binary_data(stream)

    # Whitespace only.
    stream = BytesIO(b" \n\n\n  \n")
    assert not is_followed_by_binary_data(stream)

    # No `operator_end`.
    stream = BytesIO(b"\n\n\n\n\n\n\n\nBT\n")
    assert not is_followed_by_binary_data(stream)

    # Operator length is <= 3.
    stream = BytesIO(b"\n\n\n\n\n\n\nBT\n")
    assert not is_followed_by_binary_data(stream)

    # Operator length is > 3.
    stream = BytesIO(b"\n\n\n\n\nTEST\n")
    assert is_followed_by_binary_data(stream)

    # Just characters.
    stream = BytesIO(b" ABCDEF")
    assert is_followed_by_binary_data(stream)

    # No `operator_start`.
    stream = BytesIO(b"ABCDEFG")
    assert is_followed_by_binary_data(stream)

    # Name object.
    stream = BytesIO(b"/R10 gs\n/R12 cs\n")
    assert not is_followed_by_binary_data(stream)

    # Numbers.
    stream = BytesIO(b"1337 42 m\n")
    assert not is_followed_by_binary_data(stream)

    stream = BytesIO(b"1234.56 42 13 37 10 20 c\n")
    assert not is_followed_by_binary_data(stream)


@pytest.mark.enable_socket
def test_extract_inline_dct__early_end_of_file():
    url = "https://github.com/user-attachments/files/23056988/inline_dct__early_eof.pdf"
    name = "inline_dct__early_eof.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]

    with pytest.raises(expected_exception=PdfReadError, match=r"^Unexpected end of stream$"):
        page.images[0].image.load()


@pytest.mark.enable_socket
def test_extract_inline_dct__multiple_eod():
    url = "https://github.com/user-attachments/files/23900687/cedolini_esempio-1.pdf"
    name = "issue3517.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    for page in reader.pages:
        for image in page.images:
            _ = image.image.load()


================================================
FILE: tests/generic/test_image_xobject.py
================================================
"""Test the pypdf.generic._image_xobject module."""
from io import BytesIO

import pytest
from PIL import Image

from pypdf import PdfReader
from pypdf._utils import Version
from pypdf.constants import FilterTypes, ImageAttributes, StreamAttributes
from pypdf.errors import EmptyImageDataError, PdfReadError
from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject, StreamObject, TextStringObject
from pypdf.generic._image_xobject import _extended_image_from_bytes, _handle_flate, _xobj_to_image

from .. import RESOURCE_ROOT, get_data_from_url
from ..utils import get_image_data


@pytest.mark.enable_socket
def test_get_imagemode_recursion_depth():
    """Avoid infinite recursion for nested color spaces."""
    url = "https://github.com/py-pdf/pypdf/files/12814018/out1.pdf"
    name = "issue2240.pdf"
    # Simple example: Just let the color space object reference itself.
    # The alternative would be to generate a chain of referencing objects.
    content = get_data_from_url(url, name=name)
    source = b"\n10 0 obj\n[ /DeviceN [ /HKS#2044#20K /Magenta /Yellow /Black ] 7 0 R 11 0 R 12 0 R ]\nendobj\n"
    target = b"\n10 0 obj\n[ /DeviceN [ /HKS#2044#20K /Magenta /Yellow /Black ] 10 0 R 11 0 R 12 0 R ]\nendobj\n"
    reader = PdfReader(BytesIO(content.replace(source, target)))
    with pytest.raises(
        PdfReadError,
        match=r"Color spaces nested too deeply\. If required, consider increasing MAX_IMAGE_MODE_NESTING_DEPTH\.",
    ):
        reader.pages[0].images[0]


def test_handle_flate__image_mode_1(caplog):
    data = b"\x00\xe0\x00"
    lookup = DecodedStreamObject()
    expected_data = (
        (66, 66, 66),
        (66, 66, 66),
        (66, 66, 66),
        (0, 19, 55),
        (0, 19, 55),
        (0, 19, 55),
        (66, 66, 66),
        (66, 66, 66),
        (66, 66, 66),
    )

    # No trailing data.
    lookup.set_data(b"\x42\x42\x42\x00\x13\x37")
    result = _handle_flate(
        size=(3, 3),
        data=data,
        mode="1",
        color_space=ArrayObject(
            [NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]
        ),
        colors=2,
        obj_as_text="dummy",
    )
    assert expected_data == get_image_data(result[0])
    assert not caplog.text

    # Trailing whitespace.
    lookup.set_data(b"\x42\x42\x42\x00\x13\x37  \x0a")
    result = _handle_flate(
        size=(3, 3),
        data=data,
        mode="1",
        color_space=ArrayObject(
            [NameObject("/Indexed"), NameObject("/DeviceRGB"), NumberObject(1), lookup]
        ),
        colors=2,
        obj_as_text="dummy",
    )
    assert expected_data == get_image_data(result[0])
    assert not caplog.text

    # Trailing non-whitespace character.
    lookup.set_data(b"\x42\x42\x42\x00\x13\x37\x12")
    result = _handle_flate(
        size=(3, 3),
        data=data,
        mode="1",
        color_space=ArrayObject(
            [
                NameObject("/Indexed"),
                NameObject("/DeviceRGB"),
                NumberObject(1),
                lookup,
            ]
        ),
        colors=2,
        obj_as_text="dummy",
    )
    assert expected_data == get_image_data(result[0])
    assert "Too many lookup values: Expected 6, got 7." in caplog.text

    # Not enough lookup data.
    # `\xe0` of the original input (the middle part) does not use `0x37 = 55` for the lookup
    # here, but received a custom padding of `0`.
    lookup.set_data(b"\x42\x42\x42\x00\x13")
    caplog.clear()
    expected_short_data = tuple([entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data])
    result = _handle_flate(
        size=(3, 3),
        data=data,
        mode="1",
        color_space=ArrayObject(
            [
                NameObject("/Indexed"),
                NameObject("/DeviceRGB"),
                NumberObject(1),
                lookup,
            ]
        ),
        colors=2,
        obj_as_text="dummy",
    )
    assert expected_short_data == get_image_data(result[0])
    assert "Not enough lookup values: Expected 6, got 5." in caplog.text


def test_extended_image_frombytes_zero_data():
    mode = "RGB"
    size = (1, 1)
    data = b""

    with pytest.raises(EmptyImageDataError, match=r"Data is 0 bytes, cannot process an image from empty data\."):
        _extended_image_from_bytes(mode, size, data)


def test_handle_flate__autodesk_indexed():
    reader = PdfReader(RESOURCE_ROOT / "AutoCad_Diagram.pdf")
    page = reader.pages[0]
    for name, image in page.images.items():
        assert name.startswith("/")
        image.image.load()

    data = RESOURCE_ROOT.joinpath("AutoCad_Diagram.pdf").read_bytes()
    data = data.replace(b"/DeviceRGB\x00255", b"/DeviceRGB")
    reader = PdfReader(BytesIO(data))
    page = reader.pages[0]
    with pytest.raises(
            PdfReadError,
            match=r"^Expected color space with 4 values, got 3: \['/Indexed', '/DeviceRGB', '\\x00\\x80\\x00\\x80\\x80耀"  # noqa: E501
    ):
        for name, _image in page.images.items():  # noqa: PERF102
            assert name.startswith("/")


@pytest.mark.enable_socket
def test_get_mode_and_invert_color():
    url = "https://github.com/user-attachments/files/18381726/tika-957721.pdf"
    name = "tika-957721.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[12]
    for _name, image in page.images.items():  # noqa: PERF102
        image.image.load()


@pytest.mark.enable_socket
def test_get_imagemode__empty_array():
    url = "https://github.com/user-attachments/files/23050451/poc.pdf"
    name = "issue3499.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]

    with pytest.raises(expected_exception=PdfReadError, match=r"^ColorSpace field not found in .+"):
        page.images[0].image.load()


def test_p_image_with_alpha_mask():
    # Generate the base image. Use TIFF as this is easy to do on the fly.
    image = Image.new(mode="P", size=(10, 10), color=0)
    image_data = BytesIO()
    image.save(image_data, format="tiff")

    # Set the common values.
    x_object = StreamObject()
    mask_object = StreamObject()
    for obj in [x_object, mask_object]:
        obj[NameObject(ImageAttributes.WIDTH)] = NumberObject(image.width)
        obj[NameObject(ImageAttributes.HEIGHT)] = NumberObject(image.height)
        obj[NameObject(StreamAttributes.FILTER)] = NameObject(FilterTypes.CCITT_FAX_DECODE)

    # Set the basic image data.
    x_object.set_data(image_data.getvalue())
    x_object[NameObject(ImageAttributes.COLOR_SPACE)] = TextStringObject("palette")

    # Generate the mask image. Will be a diagonal white stripe.
    image = Image.new(mode="1", size=(image.width, image.height))
    [image.putpixel((i, i), 1) for i in range(10)]
    image_data = BytesIO()
    image.save(image_data, format="tiff")

    # Set the mask data.
    mask_object.set_data(image_data.getvalue())
    mask_object[NameObject(ImageAttributes.COLOR_SPACE)] = TextStringObject("1bit")

    # Add the mask to the image.
    x_object[NameObject("/SMask")] = mask_object

    # Generate the output image and make sure that the diagonal stripe is present.
    extension, data, image = _xobj_to_image(x_object)
    assert extension == ".png"
    assert data.startswith(b"\x89PNG")
    for i in range(10):
        for j in range(10):
            assert image.getpixel((i, j)) == (0, 0, 0, 255 * (i == j))


@pytest.mark.enable_socket
def test_handle_flate__icc_based__image_mode_1():
    url = "https://github.com/user-attachments/files/23756943/pypdf_bug_3534_iccbased.pdf"
    name = "issue3534.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]

    image = page.images[0].image
    assert image is not None
    image.load()
    assert image.size == (64, 64)
    assert image.mode == "1"

    for y in range(64):
        for x in range(64):
            # Determine which chess square this pixel belongs to
            square_x = x // 8
            square_y = y // 8
            is_black_square = (square_x + square_y) % 2 == 1
            assert image.getpixel((x, y)) == 255 * int(not is_black_square)


@pytest.mark.skipif(
    condition=Version(Image.__version__) < Version("12.1.0"),
    reason="Unsuitable Pillow version."
)
def test_handle_jpx__explicit_decode():
    stream = StreamObject()
    stream[NameObject("/BitsPerComponent")] = NumberObject(8)
    stream[NameObject("/ColorSpace")] = NameObject("/DeviceCMYK")
    stream[NameObject("/Decode")] = ArrayObject([1, 0, 1, 0, 1, 0, 1, 0])
    stream[NameObject("/Filter")] = NameObject("/JPXDecode")
    stream[NameObject("/Height")] = NumberObject(16)
    stream[NameObject("/Width")] = NumberObject(16)

    image = Image.new(mode="CMYK", size=(16, 16))
    [image.putpixel((i, i), 255) for i in range(16)]
    image_data = BytesIO()
    image.save(image_data, format="JPEG2000")
    stream.set_data(image_data.getvalue())
    image.save(image_data, format="JPEG2000")

    result = _xobj_to_image(x_object=stream)[2]
    for y in range(16):
        for x in range(16):
            assert result.getpixel((x, y)) == (255 * (x != y), 255, 255, 255), (x, y)
            assert image.getpixel((x, y)) == (255 * (x == y), 0, 0, 0), (x, y)


================================================
FILE: tests/generic/test_link.py
================================================
"""Test the pypdf.generic._link module."""
from io import BytesIO

import pytest

from pypdf import PageObject, PdfReader, PdfWriter
from pypdf.generic import ArrayObject, NameObject, NullObject, extract_links
from tests import get_data_from_url


@pytest.mark.enable_socket
def test_extract_links__null_object_in_old_page():
    url = "https://github.com/user-attachments/files/25507697/sample.pdf"
    name = "issue3656.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    writer = PdfWriter()
    writer.append(reader)


def test_extract_links(caplog):
    page1 = PageObject()
    page2 = PageObject()

    # No annotations.
    assert extract_links(page1, page2) == []
    assert caplog.messages == []

    # Only old annotations.
    page1[NameObject("/Annots")] = NullObject()
    assert extract_links(page1, page2) == []
    assert caplog.messages == []
    caplog.clear()

    page1[NameObject("/Annots")] = ArrayObject([NullObject()])
    assert extract_links(page1, page2) == []
    assert caplog.messages == []
    caplog.clear()

    # Both old and new annotations.
    page2[NameObject("/Annots")] = ArrayObject([NullObject()])
    assert extract_links(page1, page2) == []
    assert caplog.messages == []  # Same size.
    caplog.clear()

    page2[NameObject("/Annots")] = NullObject()
    assert extract_links(page1, page2) == []
    assert caplog.messages == []
    caplog.clear()

    # Only new annotations.
    del page1[NameObject("/Annots")]
    page2[NameObject("/Annots")] = ArrayObject([NullObject()])
    assert extract_links(page1, page2) == []
    assert caplog.messages == []


================================================
FILE: tests/scripts/__init__.py
================================================


================================================
FILE: tests/scripts/data/commits__version_4_0_1.json
================================================
[
  {
    "sha": "b7bfd0d7eddfd0865a94cc9e7027df6596242cf7",
    "node_id": "C_kwDOAC-ZndoAKGI3YmZkMGQ3ZWRkZmQwODY1YTk0Y2M5ZTcwMjdkZjY1OTYyNDJjZjc",
    "commit": {
      "author": {
        "name": "rsinger417",
        "email": "159086296+rsinger417@users.noreply.github.com",
        "date": "2024-02-13T21:42:56Z"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "date": "2024-02-13T21:42:56Z"
      },
      "message": "BUG: Use NumberObject for /Border elements of annotations (#2451)\n\nAs defined in Table 164 – Entries common to all annotation dictionaries, the /Border Array consists of NumberObjects. Previously, pypdf used NameObject which is wrong.\r\n\r\nThe previous version caused a warning in the class NameObject: \"Incorrect first char in NameObject:({self})\".\r\n\r\nFixes #2444",
      "tree": {
        "sha": "e75b96ca1bb3e60a696bd57c5bb5aac9e7c5651b",
        "url": "https://api.github.com/repos/py-pdf/pypdf/git/trees/e75b96ca1bb3e60a696bd57c5bb5aac9e7c5651b"
      },
      "url": "https://api.github.com/repos/py-pdf/pypdf/git/commits/b7bfd0d7eddfd0865a94cc9e7027df6596242cf7",
      "comment_count": 0,
      "verification": {
        "verified": true,
        "reason": "valid",
        "signature": "-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJly+JgCRC1aQ7uu5UhlAAA56IQAKmAJws3vIKAR1dTx6e8fgp+\nWFMSZub7HGMi0Wz6MKF+hp1fxTNheBwlWVvVHuIGggtg9QSFkpHmi5Qqn+IUHJq1\nI5Jst6Il3lCF2UXUboyN+XbS/lo6rXHriz+Yi7Xwgj+JulHnruFvFEU40AdKnI1w\n88Wh94KXEJqQ6nyP4R2qpDLLlhQ0/4FTIZCWfw8XK1vPmTQwP0ZroL5N7s1pq2s9\nBBDtvcxTE1EbWIyyMzAiNByxdaTakqNLRMq80saiArR4t6f1H4v8dgYep/6R5dxU\n2GGXjh6JOS6xNObrSNvuFanrgAxZoft255OGsU5Y/2yxryp+Bs1QO/PXYXch2ERN\nXyYQKxp886PRcL1vGukksqx5t8Oc781z7RHV/QCIJ5Ry66vC7zDmkk2+Eq6gwWMr\nHzTg3eQ2DL+I4CsNIezb470UOKdIWu9SdQmOrGeUAnQ0rB0V7VOe9n1buPmMP/e0\ngXcq/BNFaNWTCyIHv1XgB6G516k4zM1F5j1BF0GCrhrdX8lXMZUB+WI3V8CsRObI\naKdnE9aGBJPZCFN5O+92ntKt7tUQQLmLNPDgYZktzBg73ejRlpQ4zOBRBFiSNfPj\nRrNzBn1LFtSLxc7/MsP1lLl5NtI0oLWrZMjym3CcAJQYmqsZCv22b94R4hSorAwW\nGr2/wRH3JQVIToDU1/W4\n=JYMh\n-----END PGP SIGNATURE-----\n",
        "payload": "tree e75b96ca1bb3e60a696bd57c5bb5aac9e7c5651b\nparent 8cacb0fc8fee9920b0515d1289e6ee8191eb3f21\nauthor rsinger417 <159086296+rsinger417@users.noreply.github.com> 1707860576 -0600\ncommitter GitHub <noreply@github.com> 1707860576 +0100\n\nBUG: Use NumberObject for /Border elements of annotations (#2451)\n\nAs defined in Table 164 – Entries common to all annotation dictionaries, the /Border Array consists of NumberObjects. Previously, pypdf used NameObject which is wrong.\r\n\r\nThe previous version caused a warning in the class NameObject: \"Incorrect first char in NameObject:({self})\".\r\n\r\nFixes #2444"
      }
    },
    "url": "https://api.github.com/repos/py-pdf/pypdf/commits/b7bfd0d7eddfd0865a94cc9e7027df6596242cf7",
    "html_url": "https://github.com/py-pdf/pypdf/commit/b7bfd0d7eddfd0865a94cc9e7027df6596242cf7",
    "comments_url": "https://api.github.com/repos/py-pdf/pypdf/commits/b7bfd0d7eddfd0865a94cc9e7027df6596242cf7/comments",
    "author": {
      "login": "rsinger417",
      "id": 159086296,
      "node_id": "U_kgDOCXt22A",
      "avatar_url": "https://avatars.githubusercontent.com/u/159086296?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/rsinger417",
      "html_url": "https://github.com/rsinger417",
      "followers_url": "https://api.github.com/users/rsinger417/followers",
      "following_url": "https://api.github.com/users/rsinger417/following{/other_user}",
      "gists_url": "https://api.github.com/users/rsinger417/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/rsinger417/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/rsinger417/subscriptions",
      "organizations_url": "https://api.github.com/users/rsinger417/orgs",
      "repos_url": "https://api.github.com/users/rsinger417/repos",
      "events_url": "https://api.github.com/users/rsinger417/events{/privacy}",
      "received_events_url": "https://api.github.com/users/rsinger417/received_events",
      "type": "User",
      "site_admin": false
    },
    "committer": {
      "login": "web-flow",
      "id": 19864447,
      "node_id": "MDQ6VXNlcjE5ODY0NDQ3",
      "avatar_url": "https://avatars.githubusercontent.com/u/19864447?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/web-flow",
      "html_url": "https://github.com/web-flow",
      "followers_url": "https://api.github.com/users/web-flow/followers",
      "following_url": "https://api.github.com/users/web-flow/following{/other_user}",
      "gists_url": "https://api.github.com/users/web-flow/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/web-flow/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/web-flow/subscriptions",
      "organizations_url": "https://api.github.com/users/web-flow/orgs",
      "repos_url": "https://api.github.com/users/web-flow/repos",
      "events_url": "https://api.github.com/users/web-flow/events{/privacy}",
      "received_events_url": "https://api.github.com/users/web-flow/received_events",
      "type": "User",
      "site_admin": false
    },
    "parents": [
      {
        "sha": "8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
        "url": "https://api.github.com/repos/py-pdf/pypdf/commits/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
        "html_url": "https://github.com/py-pdf/pypdf/commit/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21"
      }
    ]
  },
  {
    "sha": "8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
    "node_id": "C_kwDOAC-ZndoAKDhjYWNiMGZjOGZlZTk5MjBiMDUxNWQxMjg5ZTZlZTgxOTFlYjNmMjE",
    "commit": {
      "author": {
        "name": "Stefan",
        "email": "96178532+stefan6419846@users.noreply.github.com",
        "date": "2024-02-13T21:33:37Z"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "date": "2024-02-13T21:33:37Z"
      },
      "message": "DOC: Document easier way to update metadata (#2454)",
      "tree": {
        "sha": "79408055102933a8d62a3d1ec49df9f25fd5e963",
        "url": "https://api.github.com/repos/py-pdf/pypdf/git/trees/79408055102933a8d62a3d1ec49df9f25fd5e963"
      },
      "url": "https://api.github.com/repos/py-pdf/pypdf/git/commits/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
      "comment_count": 0,
      "verification": {
        "verified": true,
        "reason": "valid",
        "signature": "-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJly+AxCRC1aQ7uu5UhlAAAd2kQAB8venK8xBYafzASXTRV2ye/\nOkGIVobepYja0lKIgZpipPlmDbDnHB2UptWRMpAd7rNiL9iYnSqBNxOmCvfux99/\nqx0h9XuYzSZ1KJ6cK43ab1ErSsrjvLpO/LsMmtakzZR7BrFUjO6mIE3YuU0GmhKM\nNUPngT+A6/Lxz6Z+UwqkeylkcDj+90gNAPiKY2yr+mKmg99RI5Xqvm7j++vT3bPF\nJQmr46w0aiGW30Von0JAtu/IvprGksrfHWALFIYMHnJCaXJdv2mPJ8mwiLew/o4L\n0uicPmwnDvS7VdCObi6EKbEP4ptgierco8pAMVRpkUpnmu8ObgT7ZzPLT6iay6U1\n2Gtc0zYXlcVSo4JQW9iE9zrGMk91m+BmIOZAhJsgfdz4DewCWCBxmz4+u0wlIlzN\n6JwwZQsW3Yq/P/gJ9qxBUKPe3SAcs3jz2VG3fiOt/HzAA6YLAUPUDxnhwvWhju5i\nLiQEApEnIri4OeNhqYmOjsEI3aV/3s6jE2fEiGPDkQW61yMAAiSVgZk3BcnFwZzL\nHrf+JWTRnosPFOhkRoTH3AOzmOWOKUCCUmVdC8nKn4Sp0tp+31HIH/h3LmVflBLy\nXHwPT/6OwW1yBzueYM6LWwovNlk3AS2g19fgylOmokIkrnlmi4nCwD30hM8plEFk\ni7hsSGE/rfsjTt5lTBip\n=1RO9\n-----END PGP SIGNATURE-----\n",
        "payload": "tree 79408055102933a8d62a3d1ec49df9f25fd5e963\nparent 3fb63f7e3839ce39ac98978c996f3086ba230a20\nauthor Stefan <96178532+stefan6419846@users.noreply.github.com> 1707860017 +0100\ncommitter GitHub <noreply@github.com> 1707860017 +0100\n\nDOC: Document easier way to update metadata (#2454)\n\n"
      }
    },
    "url": "https://api.github.com/repos/py-pdf/pypdf/commits/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
    "html_url": "https://github.com/py-pdf/pypdf/commit/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
    "comments_url": "https://api.github.com/repos/py-pdf/pypdf/commits/8cacb0fc8fee9920b0515d1289e6ee8191eb3f21/comments",
    "author": {
      "login": "stefan6419846",
      "id": 96178532,
      "node_id": "U_kgDOBbuRZA",
      "avatar_url": "https://avatars.githubusercontent.com/u/96178532?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/stefan6419846",
      "html_url": "https://github.com/stefan6419846",
      "followers_url": "https://api.github.com/users/stefan6419846/followers",
      "following_url": "https://api.github.com/users/stefan6419846/following{/other_user}",
      "gists_url": "https://api.github.com/users/stefan6419846/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/stefan6419846/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/stefan6419846/subscriptions",
      "organizations_url": "https://api.github.com/users/stefan6419846/orgs",
      "repos_url": "https://api.github.com/users/stefan6419846/repos",
      "events_url": "https://api.github.com/users/stefan6419846/events{/privacy}",
      "received_events_url": "https://api.github.com/users/stefan6419846/received_events",
      "type": "User",
      "site_admin": false
    },
    "committer": {
      "login": "web-flow",
      "id": 19864447,
      "node_id": "MDQ6VXNlcjE5ODY0NDQ3",
      "avatar_url": "https://avatars.githubusercontent.com/u/19864447?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/web-flow",
      "html_url": "https://github.com/web-flow",
      "followers_url": "https://api.github.com/users/web-flow/followers",
      "following_url": "https://api.github.com/users/web-flow/following{/other_user}",
      "gists_url": "https://api.github.com/users/web-flow/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/web-flow/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/web-flow/subscriptions",
      "organizations_url": "https://api.github.com/users/web-flow/orgs",
      "repos_url": "https://api.github.com/users/web-flow/repos",
      "events_url": "https://api.github.com/users/web-flow/events{/privacy}",
      "received_events_url": "https://api.github.com/users/web-flow/received_events",
      "type": "User",
      "site_admin": false
    },
    "parents": [
      {
        "sha": "3fb63f7e3839ce39ac98978c996f3086ba230a20",
        "url": "https://api.github.com/repos/py-pdf/pypdf/commits/3fb63f7e3839ce39ac98978c996f3086ba230a20",
        "html_url": "https://github.com/py-pdf/pypdf/commit/3fb63f7e3839ce39ac98978c996f3086ba230a20"
      }
    ]
  },
  {
    "sha": "3fb63f7e3839ce39ac98978c996f3086ba230a20",
    "node_id": "C_kwDOAC-ZndoAKDNmYjYzZjdlMzgzOWNlMzlhYzk4OTc4Yzk5NmYzMDg2YmEyMzBhMjA",
    "commit": {
      "author": {
        "name": "Stefan",
        "email": "96178532+stefan6419846@users.noreply.github.com",
        "date": "2024-02-04T20:32:49Z"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "date": "2024-02-04T20:32:49Z"
      },
      "message": "TST: Avoid catching not emitted warnings (#2429)\n\nFix compatibility with pytest==8. \r\n\r\nRelevant upstream change: pytest-dev/pytest#9288\r\n\r\nFixes #2427",
      "tree": {
        "sha": "c96cab2f682f6db4c84440e26869b4d9de6a2bab",
        "url": "https://api.github.com/repos/py-pdf/pypdf/git/trees/c96cab2f682f6db4c84440e26869b4d9de6a2bab"
      },
      "url": "https://api.github.com/repos/py-pdf/pypdf/git/commits/3fb63f7e3839ce39ac98978c996f3086ba230a20",
      "comment_count": 0,
      "verification": {
        "verified": true,
        "reason": "valid",
        "signature": "-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJlv/RxCRC1aQ7uu5UhlAAATI8QAB/yRz+hoeJVtjW/CePJo2Jv\n451gPAo66s7JMG+PwcCiI8KAAUEusDbrJAmdq8rfqnShSB83h/7g/s5oFr/1lFyh\noKkwoeMt6hGKtwEkTpa877gAWJ4ssRb1ymJoy7quPNlbFYtKngMC60Vc5TNEY1ZX\nQ1FdIG5rVRBsA5H7fP7k0q2QC6w/Ns6nftpPFIf3JSVnillJ/RKDLhEfPw6/PMi0\nnIJ2moTgTs6uyc4R0blR44BoElPd46ot/SQDcnHEwIQlWpfa2RIpulhF8qkO9fe3\neCRBQ7TZXjedsG+Da71QKxRWRFdwPqO+HI4u5EHNLIaw8z9450jtbz5H1NhNIB1s\nkIDTMgFXxGVuFKXfneduA6TAxrrJ12ONHcrUkN30y9AQ7Qe/B8LJ50iXQvo81SwZ\nqTFBluB6WiVuMMMT0pHgNCjsAEPvaagPa10qvjVokXh1rXlzQiNwqBWCbwj2b6f4\n8i3Vf9ufrK5p2WhsfO1aCW7Yc2C620sq66ic2Ck5cT2HLJA+cF1j7d7PT3/N0veo\ncnpPpAFeUs2A6R/zL0yJSoPV+BLM0BdahxfsBlT9pdrdqvBA7JIGOC9c3msSWBZY\n6GmfmsmWp0xdwYDJEzUL06shKjH6GlzhWvkjYuYH3myJBCoAjlUWCsJCvXWOD3iX\nPID6Cv+BtDfu80muR94A\n=bK1N\n-----END PGP SIGNATURE-----\n",
        "payload": "tree c96cab2f682f6db4c84440e26869b4d9de6a2bab\nparent 61b73d49778e8f0fb172d5323e67677c9974e420\nauthor Stefan <96178532+stefan6419846@users.noreply.github.com> 1707078769 +0100\ncommitter GitHub <noreply@github.com> 1707078769 +0100\n\nTST: Avoid catching not emitted warnings (#2429)\n\nFix compatibility with pytest==8. \r\n\r\nRelevant upstream change: pytest-dev/pytest#9288\r\n\r\nFixes #2427"
      }
    },
    "url": "https://api.github.com/repos/py-pdf/pypdf/commits/3fb63f7e3839ce39ac98978c996f3086ba230a20",
    "html_url": "https://github.com/py-pdf/pypdf/commit/3fb63f7e3839ce39ac98978c996f3086ba230a20",
    "comments_url": "https://api.github.com/repos/py-pdf/pypdf/commits/3fb63f7e3839ce39ac98978c996f3086ba230a20/comments",
    "author": {
      "login": "stefan6419846",
      "id": 96178532,
      "node_id": "U_kgDOBbuRZA",
      "avatar_url": "https://avatars.githubusercontent.com/u/96178532?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/stefan6419846",
      "html_url": "https://github.com/stefan6419846",
      "followers_url": "https://api.github.com/users/stefan6419846/followers",
      "following_url": "https://api.github.com/users/stefan6419846/following{/other_user}",
      "gists_url": "https://api.github.com/users/stefan6419846/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/stefan6419846/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/stefan6419846/subscriptions",
      "organizations_url": "https://api.github.com/users/stefan6419846/orgs",
      "repos_url": "https://api.github.com/users/stefan6419846/repos",
      "events_url": "https://api.github.com/users/stefan6419846/events{/privacy}",
      "received_events_url": "https://api.github.com/users/stefan6419846/received_events",
      "type": "User",
      "site_admin": false
    },
    "committer": {
      "login": "web-flow",
      "id": 19864447,
      "node_id": "MDQ6VXNlcjE5ODY0NDQ3",
      "avatar_url": "https://avatars.githubusercontent.com/u/19864447?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/web-flow",
      "html_url": "https://github.com/web-flow",
      "followers_url": "https://api.github.com/users/web-flow/followers",
      "following_url": "https://api.github.com/users/web-flow/following{/other_user}",
      "gists_url": "https://api.github.com/users/web-flow/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/web-flow/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/web-flow/subscriptions",
      "organizations_url": "https://api.github.com/users/web-flow/orgs",
      "repos_url": "https://api.github.com/users/web-flow/repos",
      "events_url": "https://api.github.com/users/web-flow/events{/privacy}",
      "received_events_url": "https://api.github.com/users/web-flow/received_events",
      "type": "User",
      "site_admin": false
    },
    "parents": [
      {
        "sha": "61b73d49778e8f0fb172d5323e67677c9974e420",
        "url": "https://api.github.com/repos/py-pdf/pypdf/commits/61b73d49778e8f0fb172d5323e67677c9974e420",
        "html_url": "https://github.com/py-pdf/pypdf/commit/61b73d49778e8f0fb172d5323e67677c9974e420"
      }
    ]
  },
  {
    "sha": "61b73d49778e8f0fb172d5323e67677c9974e420",
    "node_id": "C_kwDOAC-ZndoAKDYxYjczZDQ5Nzc4ZThmMGZiMTcyZDUzMjNlNjc2NzdjOTk3NGU0MjA",
    "commit": {
      "author": {
        "name": "CWKSC",
        "email": "cwksc.person@gmail.com",
        "date": "2024-02-03T08:02:35Z"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "date": "2024-02-03T08:02:35Z"
      },
      "message": "DOC: Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426)",
      "tree": {
        "sha": "9fb79466999d9d73c6ba15afdc76ce4d6f59c470",
        "url": "https://api.github.com/repos/py-pdf/pypdf/git/trees/9fb79466999d9d73c6ba15afdc76ce4d6f59c470"
      },
      "url": "https://api.github.com/repos/py-pdf/pypdf/git/commits/61b73d49778e8f0fb172d5323e67677c9974e420",
      "comment_count": 0,
      "verification": {
        "verified": true,
        "reason": "valid",
        "signature": "-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJlvfMbCRC1aQ7uu5UhlAAA1X0QADKiCwRr4WJNPYlwgJKp/I4l\nO/6H/uQ5XO6fSvkLNchzU+017kgwEfaPoEunTvb0rpAVfwjJknytCCaR5duQQ7np\naP23J6gIViawM15qp20C53q+5r6NUZnerOIrMKMGLaRtsDMIePYT6zd5Q9KTnx5/\nhF6X+LMx5zKDuXHmRV8Jhmii+8IQA4Ekgv/t+UNmkqpVQig603/IzPTVnUkY+Gcu\nNEHb1W66bS5/BvMyrqwDx//Z0kpxJltNAoaVNAAz1+KgUm/NncBJcuR95U7ffGkO\neoi9UqlF06YO4mkA7ZbAUfgujWEDsbCsnFuVsKe5RJLeRvidHQl7YJQg36mWV+He\nNTMttZX2UJOiFLDeWeEoJ+DixBmXO5EbYsZlFDhGFizNAtY14zW/7RUioBao20DZ\ny8RmYmmJW5p39h4gEvDD6+62lYqz+2SIPPSQdPNmANn2OOge43KArfyNYHbg4M13\n6yLzMZuY61B5arfV0JdDlBdLncws3C7JjKljOfSCYCJ0/Bq8fKL5206k60U3jyru\nRCoTtHFIWn1vzHgOf9cJMiIPWTa8HxH2+2mvZbDxmT+p4J5qgfRJ0BUw1i/klWqt\n1OfmSgMgdkgPxczSjHd2gnnasClNy4yyrWsdDjRKaTEOMSIsb7DUm8UnD3oDs+nC\nKMudDi6gn5ASiZf+ZsA5\n=MAdq\n-----END PGP SIGNATURE-----\n",
        "payload": "tree 9fb79466999d9d73c6ba15afdc76ce4d6f59c470\nparent f851a532a5ec23b572d86bd7185b327a3fac6b58\nauthor CWKSC <cwksc.person@gmail.com> 1706947355 +0800\ncommitter GitHub <noreply@github.com> 1706947355 +0100\n\nDOC: Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426)\n\n"
      }
    },
    "url": "https://api.github.com/repos/py-pdf/pypdf/commits/61b73d49778e8f0fb172d5323e67677c9974e420",
    "html_url": "https://github.com/py-pdf/pypdf/commit/61b73d49778e8f0fb172d5323e67677c9974e420",
    "comments_url": "https://api.github.com/repos/py-pdf/pypdf/commits/61b73d49778e8f0fb172d5323e67677c9974e420/comments",
    "author": {
      "login": "CWKSC",
      "id": 53114952,
      "node_id": "MDQ6VXNlcjUzMTE0OTUy",
      "avatar_url": "https://avatars.githubusercontent.com/u/53114952?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/CWKSC",
      "html_url": "https://github.com/CWKSC",
      "followers_url": "https://api.github.com/users/CWKSC/followers",
      "following_url": "https://api.github.com/users/CWKSC/following{/other_user}",
      "gists_url": "https://api.github.com/users/CWKSC/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/CWKSC/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/CWKSC/subscriptions",
      "organizations_url": "https://api.github.com/users/CWKSC/orgs",
      "repos_url": "https://api.github.com/users/CWKSC/repos",
      "events_url": "https://api.github.com/users/CWKSC/events{/privacy}",
      "received_events_url": "https://api.github.com/users/CWKSC/received_events",
      "type": "User",
      "site_admin": false
    },
    "committer": {
      "login": "web-flow",
      "id": 19864447,
      "node_id": "MDQ6VXNlcjE5ODY0NDQ3",
      "avatar_url": "https://avatars.githubusercontent.com/u/19864447?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/web-flow",
      "html_url": "https://github.com/web-flow",
      "followers_url": "https://api.github.com/users/web-flow/followers",
      "following_url": "https://api.github.com/users/web-flow/following{/other_user}",
      "gists_url": "https://api.github.com/users/web-flow/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/web-flow/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/web-flow/subscriptions",
      "organizations_url": "https://api.github.com/users/web-flow/orgs",
      "repos_url": "https://api.github.com/users/web-flow/repos",
      "events_url": "https://api.github.com/users/web-flow/events{/privacy}",
      "received_events_url": "https://api.github.com/users/web-flow/received_events",
      "type": "User",
      "site_admin": false
    },
    "parents": [
      {
        "sha": "f851a532a5ec23b572d86bd7185b327a3fac6b58",
        "url": "https://api.github.com/repos/py-pdf/pypdf/commits/f851a532a5ec23b572d86bd7185b327a3fac6b58",
        "html_url": "https://github.com/py-pdf/pypdf/commit/f851a532a5ec23b572d86bd7185b327a3fac6b58"
      }
    ]
  },
  {
    "sha": "f851a532a5ec23b572d86bd7185b327a3fac6b58",
    "node_id": "C_kwDOAC-ZndoAKGY4NTFhNTMyYTVlYzIzYjU3MmQ4NmJkNzE4NWIzMjdhM2ZhYzZiNTg",
    "commit": {
      "author": {
        "name": "dependabot[bot]",
        "email": "49699333+dependabot[bot]@users.noreply.github.com",
        "date": "2024-02-03T08:00:35Z"
      },
      "committer": {
        "name": "GitHub",
        "email": "noreply@github.com",
        "date": "2024-02-03T08:00:35Z"
      },
      "message": "DEV: Bump codecov/codecov-action from 3 to 4 (#2430)\n\nBumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 3 to 4.\r\n- [Release notes](https://github.com/codecov/codecov-action/releases)\r\n- [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md)\r\n- [Commits](https://github.com/codecov/codecov-action/compare/v3...v4)\r\n\r\n---\r\nupdated-dependencies:\r\n- dependency-name: codecov/codecov-action\r\n  dependency-type: direct:production\r\n  update-type: version-update:semver-major\r\n...\r\n\r\nSigned-off-by: dependabot[bot] <support@github.com>\r\nCo-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>",
      "tree": {
        "sha": "fb40fe05c5f1a6679bc1e7a24b0f9fc55c150c88",
        "url": "https://api.github.com/repos/py-pdf/pypdf/git/trees/fb40fe05c5f1a6679bc1e7a24b0f9fc55c150c88"
      },
      "url": "https://api.github.com/repos/py-pdf/pypdf/git/commits/f851a532a5ec23b572d86bd7185b327a3fac6b58",
      "comment_count": 0,
      "verification": {
        "verified": true,
        "reason": "valid",
        "signature": "-----BEGIN PGP SIGNATURE-----\n\nwsFcBAABCAAQBQJlvfKjCRC1aQ7uu5UhlAAA9NUQAGTOt3JzejSo6o5fHUrLreus\nv8TScA1B4nuWsJLH0nvGArZ8y8L/9JqG2fUTs3WGjY3PL9Dgn9fhmO+3dMcUDEav\nEtBXdNHsodAUvNHKh1d9ZwCK+jSzbO9tSKiY4enxqUHnr+0m0q3XQHkYLf9eUklE\n9/vi/OCV8JSptRkiS+VOsSrqfO+zqNUfnOxpNy6UNLPaNDwZyom6WROZE6yXLm1W\nE0rsG10rBEXyvhjF2E4znoEcN/5+OIJr87h1Jys7y3qMXOo61my6bEpHY+gZpBRQ\nN3xo3ptu4BhP0a4oI8iDjnQMQLS4cLN++LeMuUbWIEpKtiKkF5q/bGP3s1wniLTD\nSYh14z0jIaJ7QPdkOEK2/Fv9lx5tno66bFe4vKC4DSmX3itcqh/XOiPFPkgRAalj\nAd5g6hs1QlJErAwQShe6lzNDRnIDGoD6ZOaTMdxlbRNdwInr83Qz4Gt92D+dX4eQ\njln9Welx4xTuPnYv6Qhmdc69Kk2nyhRuTnCsI0jaoqDRSLQxlzCuuQMn7u5XyqSS\npSkWUYOw8zjrJd7ItPVe3YII5JIiRLEkHrDzTwGZAcy2E6GPMDLeXsx4K6GUhsfC\nXenOpPuoo6BDk/bhrkWb7klyYG09JQtum31bCpDp1qxafXh5jh9Y0mztZJ4gWjaF\n0NawJ3AozsNrioHxf6xz\n=0OMP\n-----END PGP SIGNATURE-----\n",
        "payload": "tree fb40fe05c5f1a6679bc1e7a24b0f9fc55c150c88\nparent 757932944f54ba661b89e0629ed3fc9d8345dbab\nauthor dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> 1706947235 +0100\ncommitter GitHub <noreply@github.com> 1706947235 +0100\n\nDEV: Bump codecov/codecov-action from 3 to 4 (#2430)\n\nBumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 3 to 4.\r\n- [Release notes](https://github.com/codecov/codecov-action/releases)\r\n- [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md)\r\n- [Commits](https://github.com/codecov/codecov-action/compare/v3...v4)\r\n\r\n---\r\nupdated-dependencies:\r\n- dependency-name: codecov/codecov-action\r\n  dependency-type: direct:production\r\n  update-type: version-update:semver-major\r\n...\r\n\r\nSigned-off-by: dependabot[bot] <support@github.com>\r\nCo-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>"
      }
    },
    "url": "https://api.github.com/repos/py-pdf/pypdf/commits/f851a532a5ec23b572d86bd7185b327a3fac6b58",
    "html_url": "https://github.com/py-pdf/pypdf/commit/f851a532a5ec23b572d86bd7185b327a3fac6b58",
    "comments_url": "https://api.github.com/repos/py-pdf/pypdf/commits/f851a532a5ec23b572d86bd7185b327a3fac6b58/comments",
    "author": {
      "login": "dependabot[bot]",
      "id": 49699333,
      "node_id": "MDM6Qm90NDk2OTkzMzM=",
      "avatar_url": "https://avatars.githubusercontent.com/in/29110?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/dependabot%5Bbot%5D",
      "html_url": "https://github.com/apps/dependabot",
      "followers_url": "https://api.github.com/users/dependabot%5Bbot%5D/followers",
      "following_url": "https://api.github.com/users/dependabot%5Bbot%5D/following{/other_user}",
      "gists_url": "https://api.github.com/users/dependabot%5Bbot%5D/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/dependabot%5Bbot%5D/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/dependabot%5Bbot%5D/subscriptions",
      "organizations_url": "https://api.github.com/users/dependabot%5Bbot%5D/orgs",
      "repos_url": "https://api.github.com/users/dependabot%5Bbot%5D/repos",
      "events_url": "https://api.github.com/users/dependabot%5Bbot%5D/events{/privacy}",
      "received_events_url": "https://api.github.com/users/dependabot%5Bbot%5D/received_events",
      "type": "Bot",
      "site_admin": false
    },
    "committer": {
      "login": "web-flow",
      "id": 19864447,
      "node_id": "MDQ6VXNlcjE5ODY0NDQ3",
      "avatar_url": "https://avatars.githubusercontent.com/u/19864447?v=4",
      "gravatar_id": "",
      "url": "https://api.github.com/users/web-flow",
      "html_url": "https://github.com/web-flow",
      "followers_url": "https://api.github.com/users/web-flow/followers",
      "following_url": "https://api.github.com/users/web-flow/following{/other_user}",
      "gists_url": "https://api.github.com/users/web-flow/gists{/gist_id}",
      "starred_url": "https://api.github.com/users/web-flow/starred{/owner}{/repo}",
      "subscriptions_url": "https://api.github.com/users/web-flow/subscriptions",
      "organizations_url": "https://api.github.com/users/web-flow/orgs",
      "repos_url": "https://api.github.com/users/web-flow/repos",
      "events_url": "https://api.github.com/users/web-flow/events{/privacy}",
      "received_events_url": "https://api.github.com/users/web-flow/received_events",
      "type": "User",
      "site_admin": false
    },
    "parents": [
      {
        "sha": "757932944f54ba661b89e0629ed3fc9d8345dbab",
        "url": "https://api.github.com/repos/py-pdf/pypdf/commits/757932944f54ba661b89e0629ed3fc9d8345dbab",
        "html_url": "https://github.com/py-pdf/pypdf/commit/757932944f54ba661b89e0629ed3fc9d8345dbab"
      }
    ]
  }
]


================================================
FILE: tests/scripts/test_example_files.py
================================================
"""Tests related to the example files."""
from operator import itemgetter
from pathlib import Path

from tests import read_yaml_to_list_of_dicts


def test_consistency():
    pdfs = read_yaml_to_list_of_dicts(Path(__file__).parent.parent / "example_files.yaml")

    # Ensure the names are unique
    assert len(pdfs) == len(set(map(itemgetter("local_filename"), pdfs)))

    # Ensure the urls are unique
    assert len(pdfs) == len(set(map(itemgetter("url"), pdfs)))


================================================
FILE: tests/scripts/test_make_release.py
================================================
"""Test the `make_release.py` script."""
import sys
from pathlib import Path
from unittest import mock

import pytest

DATA_PATH = Path(__file__).parent.resolve() / "data"

# line starting with \ and ending with " have been observed on Windows
GIT_LOG__VERSION_4_0_1 = """
b7bfd0d7eddfd0865a94cc9e7027df6596242cf7:::BUG: Use NumberObject for /Border elements of annotations (#2451):::rsinger417
8cacb0fc8fee9920b0515d1289e6ee8191eb3f21:::DOC: Document easier way to update metadata (#2454):::Stefan
3fb63f7e3839ce39ac98978c996f3086ba230a20:::TST: Avoid catching not emitted warnings (#2429):::Stefan
\\61b73d49778e8f0fb172d5323e67677c9974e420:::DOC: Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426):::CWKSC"
f851a532a5ec23b572d86bd7185b327a3fac6b58:::DEV: Bump codecov/codecov-action from 3 to 4 (#2430):::dependabot[bot]""".encode()  # noqa: E501

COMMITS__VERSION_4_0_1 = DATA_PATH.joinpath("commits__version_4_0_1.json")
VERSION_3_9_PLUS = sys.version_info[:2] >= (3, 9)


@pytest.mark.skipif(not VERSION_3_9_PLUS, reason="Function uses method removeprefix added in Python 3.9")
@pytest.mark.parametrize(
    ("data", "expected"),
    [
        ("", ""),
        ("# CHANGELOG", ""),
        ("# CHANGELOG ", ""),
        ("# CHANGELOG  ", ""),
        ("## CHANGELOG", "## CHANGELOG"),
        ("CHANGELOG", "CHANGELOG"),
        ("# CHANGELOG #", "#"),
    ]
)
def test_strip_header(data, expected):
    """Removal of the 'CHANGELOG' header."""
    make_release = pytest.importorskip("make_release")
    assert make_release.strip_header(data) == expected


def test_get_git_commits_since_tag():
    make_release = pytest.importorskip("make_release")

    with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
        "urllib.request.urlopen", side_effect=lambda _: commits
    ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
        commits = make_release.get_git_commits_since_tag("4.0.1")
    assert commits == [
        make_release.Change(
            commit_hash="b7bfd0d7eddfd0865a94cc9e7027df6596242cf7",
            prefix="BUG",
            message="Use NumberObject for /Border elements of annotations (#2451)",
            author="rsinger417",
            author_login="rsinger417",
        ),
        make_release.Change(
            commit_hash="8cacb0fc8fee9920b0515d1289e6ee8191eb3f21",
            prefix="DOC",
            message="Document easier way to update metadata (#2454)",
            author="Stefan",
            author_login="stefan6419846",
        ),
        make_release.Change(
            commit_hash="3fb63f7e3839ce39ac98978c996f3086ba230a20",
            prefix="TST",
            message="Avoid catching not emitted warnings (#2429)",
            author="Stefan",
            author_login="stefan6419846",
        ),
        make_release.Change(
            commit_hash="61b73d49778e8f0fb172d5323e67677c9974e420",
            prefix="DOC",
            message="Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426)",
            author="CWKSC",
            author_login="CWKSC",
        ),
        make_release.Change(
            commit_hash="f851a532a5ec23b572d86bd7185b327a3fac6b58",
            prefix="DEV",
            message="Bump codecov/codecov-action from 3 to 4 (#2430)",
            author="dependabot[bot]",
            author_login="dependabot[bot]",
        ),
    ]


def test_get_formatted_changes():
    make_release = pytest.importorskip("make_release")

    with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch(
        "urllib.request.urlopen", side_effect=lambda _: commits
    ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1):
        output, output_with_user = make_release.get_formatted_changes("4.0.1")

    assert (
        output
        == """
### Bug Fixes (BUG)
- Use NumberObject for /Border elements of annotations (#2451)

### Documentation (DOC)
- Document easier way to update metadata (#2454)
- Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426)

### Developer Experience (DEV)
- Bump codecov/codecov-action from 3 to 4 (#2430)

### Testing (TST)
- Avoid catching not emitted warnings (#2429)
"""
    )
    assert (
        output_with_user
        == """
### Bug Fixes (BUG)
- Use NumberObject for /Border elements of annotations (#2451) by @rsinger417

### Documentation (DOC)
- Document easier way to update metadata (#2454) by @stefan6419846
- Typo `Polyline` → `PolyLine` in adding-pdf-annotations.md (#2426) by @CWKSC

### Developer Experience (DEV)
- Bump codecov/codecov-action from 3 to 4 (#2430) by @dependabot[bot]

### Testing (TST)
- Avoid catching not emitted warnings (#2429) by @stefan6419846
"""
    )


def test_get_formatted_changes__other():
    make_release = pytest.importorskip("make_release")

    changes = [
        make_release.Change(
            commit_hash="f20c36eabd59ea661f30c5da35af7c9e435c7de9",
            prefix="",
            message="Improve lossless compression example (#2488)",
            author="j-t-1",
            author_login="j-t-1",
        ),
        make_release.Change(
            commit_hash="afbee382f8fd2b39588db6470b9b2b2c82905318",
            prefix="ENH",
            message="Add reattach_fields function (#2480)",
            author="pubpub-zz",
            author_login="pubpub-zz",
        ),
        make_release.Change(
            commit_hash="cd705f959064d8125397ddf4f7bdd2ea296f889f",
            prefix="FIX",
            message="Broken test due to expired test file URL (#2468)",
            author="pubpub-zz",
            author_login="pubpub-zz",
        ),
    ]
    with mock.patch.object(
        make_release, "get_git_commits_since_tag", return_value=changes
    ):
        output, output_with_user = make_release.get_formatted_changes("dummy")

    assert (
        output
        == """
### New Features (ENH)
- Add reattach_fields function (#2480)

### Other
- : Improve lossless compression example (#2488)
- FIX: Broken test due to expired test file URL (#2468)
"""
    )

    assert (
        output_with_user
        == """
### New Features (ENH)
- Add reattach_fields function (#2480) by @pubpub-zz

### Other
- : Improve lossless compression example (#2488) by @j-t-1
- FIX: Broken test due to expired test file URL (#2468) by @pubpub-zz
"""
    )


================================================
FILE: tests/test_annotations.py
================================================
"""Test the pypdf.annotations submodule."""

from io import BytesIO
from pathlib import Path

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.annotations import (
    AnnotationDictionary,
    Ellipse,
    FreeText,
    Highlight,
    Line,
    Link,
    Polygon,
    PolyLine,
    Popup,
    Rectangle,
    Text,
)
from pypdf.errors import PdfReadError
from pypdf.generic import ArrayObject, FloatObject, NumberObject

from . import RESOURCE_ROOT, get_data_from_url


def test_ellipse(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    ellipse_annotation = Ellipse(
        rect=(50, 550, 500, 650),
        interior_color="ff0000",
    )
    writer.add_annotation(0, ellipse_annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_text(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    text_annotation = Text(
        text="Hello World\nThis is the second line!",
        rect=(50, 550, 500, 650),
        open=True,
    )
    writer.add_annotation(0, text_annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_free_text(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    free_text_annotation = FreeText(
        text="Hello World - bold and italic\nThis is the second line!",
        rect=(50, 550, 200, 650),
        font="Arial",
        bold=True,
        italic=True,
        font_size="20pt",
        font_color="00ff00",
        border_color=None,
        background_color=None,
    )
    writer.add_annotation(0, free_text_annotation)

    free_text_annotation = FreeText(
        text="Another free text annotation (not bold, not italic)",
        rect=(500, 550, 200, 650),
        font="Arial",
        bold=False,
        italic=False,
        font_size="20pt",
        font_color="00ff00",
        border_color="0000ff",
        background_color="cdcdcd",
    )
    writer.add_annotation(0, free_text_annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_free_text__font_specifier():
    free_text_annotation = FreeText(
        text="Hello World",
        rect=(0, 0, 0, 0),
    )
    assert free_text_annotation["/DS"] == "font: normal normal 14pt Helvetica;text-align:left;color:#000000"
    free_text_annotation = FreeText(
        text="Hello World",
        rect=(50, 550, 200, 650),
        font="Arial",
        bold=True,
        italic=True,
        font_size="20pt",
        font_color="00ff00",
        border_color=None,
        background_color=None,
    )
    assert free_text_annotation["/DS"] == "font: italic bold 20pt Arial;text-align:left;color:#00ff00"


def test_annotation_dictionary():
    a = AnnotationDictionary()
    a.flags = 123
    assert a.flags == 123


def test_polygon(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    with pytest.raises(ValueError):
        Polygon(
            vertices=[],
        )

    annotation = Polygon(
        vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
    )
    writer.add_annotation(0, annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_polyline(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    with pytest.raises(
            ValueError,
            match=r"A polyline needs at least 1 vertex with two coordinates",
    ):
        PolyLine(
            vertices=[],
        )

    annotation = PolyLine(
        vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
    )
    writer.add_annotation(0, annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_line(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    line_annotation = Line(
        text="Hello World\nLine2",
        rect=(50, 550, 200, 650),
        p1=(50, 550),
        p2=(200, 650),
    )
    writer.add_annotation(0, line_annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_rectangle(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    square_annotation = Rectangle(
        rect=(50, 550, 200, 650), interior_color="ff0000"
    )
    writer.add_annotation(0, square_annotation)

    square_annotation = Rectangle(rect=(40, 400, 150, 450))
    writer.add_annotation(0, square_annotation)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_highlight(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    highlight_annotation = Highlight(
        rect=(95.79332, 704.31777, 138.55779, 724.6855),
        highlight_color="ff0000",
        quad_points=ArrayObject(
            [
                FloatObject(100.060779),
                FloatObject(723.55398),
                FloatObject(134.29033),
                FloatObject(723.55398),
                FloatObject(100.060779),
                FloatObject(705.4493),
                FloatObject(134.29033),
                FloatObject(705.4493),
            ]
        ),
        printing=False,
    )
    writer.add_annotation(0, highlight_annotation)
    for annot in writer.pages[0]["/Annots"]:
        obj = annot.get_object()
        subtype = obj["/Subtype"]
        if subtype == "/Highlight":
            assert "/F" not in obj or obj["/F"] == NumberObject(0)

    writer.add_page(page)
    # Act
    highlight_annotation = Highlight(
        rect=(95.79332, 704.31777, 138.55779, 724.6855),
        highlight_color="ff0000",
        quad_points=ArrayObject(
            [
                FloatObject(100.060779),
                FloatObject(723.55398),
                FloatObject(134.29033),
                FloatObject(723.55398),
                FloatObject(100.060779),
                FloatObject(705.4493),
                FloatObject(134.29033),
                FloatObject(705.4493),
            ]
        ),
        printing=True,
    )
    writer.add_annotation(1, highlight_annotation)
    for annot in writer.pages[1]["/Annots"]:
        obj = annot.get_object()
        subtype = obj["/Subtype"]
        if subtype == "/Highlight":
            assert obj["/F"] == NumberObject(4)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_link(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    # Part 1: Too many args
    with pytest.raises(ValueError):
        Link(
            rect=(50, 550, 200, 650),
            url="https://martin-thoma.com/",
            target_page_index=3,
        )

    # Part 2: Too few args
    with pytest.raises(ValueError):
        Link(
            rect=(50, 550, 200, 650),
        )

    # Part 3: External Link
    link_annotation = Link(
        rect=(50, 50, 100, 100),
        url="https://martin-thoma.com/",
        border=[1, 0, 6, [3, 2]],
    )
    writer.add_annotation(0, link_annotation)

    # Part 4: Internal Link
    link_annotation = Link(
        rect=(100, 100, 300, 200),
        target_page_index=1,
        border=[50, 10, 4],
    )
    writer.add_annotation(0, link_annotation)

    for page in reader.pages[1:]:
        writer.add_page(page)

    # Assert: You need to inspect the file manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


def test_popup(caplog):
    # Arrange
    pdf_path = RESOURCE_ROOT / "outline-without-title.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    # Act
    text_annotation = Text(
        title_bar="hello world",
        text="Hello World\nThis is the second line!",
        rect=(50, 550, 200, 650),
        open=True,
    )
    ta = writer.add_annotation(0, text_annotation)
    popup_annotation = Popup(
        rect=(50, 550, 200, 650),
        open=True,
        parent=ta,  # prefer to use for evolutivity
    )
    writer.add_annotation(writer.pages[0], popup_annotation)

    Popup(
        rect=(50, 550, 200, 650),
        open=True,
        parent=True,  # broken parameter  # type: ignore
    )
    assert "Unregistered Parent object : No Parent field set" in caplog.text

    target = "annotated-pdf-popup.pdf"
    writer.write(target)
    Path(target).unlink()  # comment this out for manual inspection


def test_markup_annotation_in_reply_to():
    """Test that a reply annotation gets /IRT, /RT, and /NM after a write/read cycle."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")

    parent = Text(
        text="Parent comment",
        rect=(50, 550, 200, 650),
        open=True,
    )
    parent_ref = writer.add_annotation(0, parent)

    reply = Text(
        text="Reply to parent",
        rect=(50, 550, 200, 650),
        in_reply_to=parent_ref,
    )
    writer.add_annotation(0, reply)

    assert "/IRT" in reply
    assert reply["/IRT"].get_object() is parent_ref
    assert reply["/RT"] == "/R"
    assert "/NM" in reply

    assert "/NM" not in parent_ref

    buf = BytesIO()
    writer.write(buf)

    reader2 = PdfReader(buf)
    annots = reader2.pages[0]["/Annots"]
    assert len(annots) == 2

    reply_obj = annots[1].get_object()
    assert reply_obj["/IRT"].get_object()["/Contents"] == "Parent comment"
    assert reply_obj["/NM"] == reply["/NM"]


def test_markup_annotation_in_reply_to_group_type():
    """Test that a grouped annotation sets /RT to /Group."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")

    parent = Text(
        text="Parent",
        rect=(50, 550, 200, 650),
    )
    parent_ref = writer.add_annotation(0, parent)

    grouped = Text(
        text="Grouped with parent",
        rect=(50, 550, 200, 650),
        in_reply_to=parent_ref,
        reply_type="Group",
    )
    writer.add_annotation(0, grouped)

    assert grouped["/RT"] == "/Group"
    assert "/IRT" in grouped
    assert "/NM" in grouped


def test_markup_annotation_name_without_reply():
    """Test that annotation_name without in_reply_to raises ValueError."""
    with pytest.raises(ValueError, match="annotation_name is only supported when in_reply_to is set"):
        Text(
            text="Named but not a reply",
            rect=(50, 550, 200, 650),
            annotation_name="my-unique-name",
        )


def test_markup_annotation_reply_type_without_reply():
    """Test that non-default reply_type without in_reply_to raises ValueError."""
    with pytest.raises(ValueError, match="reply_type is only meaningful when in_reply_to is set"):
        Text(
            text="Grouped but not a reply",
            rect=(50, 550, 200, 650),
            reply_type="Group",
        )


def test_markup_annotation_in_reply_to_custom_name():
    """Test explicit annotation_name with in_reply_to."""
    writer = PdfWriter()
    writer.add_blank_page(width=200, height=200)

    parent = Text(text="Parent", rect=(0, 0, 100, 100))
    parent_ref = writer.add_annotation(0, parent)

    reply = Text(
        text="Reply",
        rect=(0, 0, 100, 100),
        in_reply_to=parent_ref,
        annotation_name="custom-reply-name",
    )
    writer.add_annotation(0, reply)

    assert reply["/NM"] == "custom-reply-name"
    assert "/IRT" in reply


def test_markup_annotation_in_reply_to_unregistered():
    """Test that an unregistered parent raises ValueError."""
    unregistered = Text(text="Not added to writer", rect=(0, 0, 100, 100))
    with pytest.raises(ValueError, match="in_reply_to must be a registered annotation"):
        Text(
            text="Reply",
            rect=(0, 0, 100, 100),
            in_reply_to=unregistered,
        )


def test_markup_annotation_in_reply_to_indirect_object():
    """Test passing an IndirectObject directly as in_reply_to."""
    writer = PdfWriter()
    writer.add_blank_page(width=200, height=200)

    parent = Text(text="Parent", rect=(0, 0, 100, 100))
    parent_ref = writer.add_annotation(0, parent)
    indirect_ref = parent_ref.indirect_reference

    reply = Text(
        text="Reply via IndirectObject",
        rect=(0, 0, 100, 100),
        in_reply_to=indirect_ref,
    )
    writer.add_annotation(0, reply)

    assert "/IRT" in reply
    assert reply["/RT"] == "/R"
    assert "/NM" in reply

    buf = BytesIO()
    writer.write(buf)

    reader = PdfReader(buf)
    annots = reader.pages[0]["/Annots"]
    assert len(annots) == 2
    reply_obj = annots[1].get_object()
    assert reply_obj["/IRT"].get_object()["/Contents"] == "Parent"
    assert reply_obj["/NM"] == reply["/NM"]


@pytest.mark.enable_socket
def test_outline_action_without_d_lenient():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    assert len(reader.outline) == 2


@pytest.mark.enable_socket
def test_outline_action_without_d_strict(pdf_file_path):
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    reader.strict = True
    with pytest.raises(PdfReadError) as e:
        assert len(reader.outline) == 2
    assert "Outline Action Missing /D" in str(e)


================================================
FILE: tests/test_appearance_stream.py
================================================
"""Test the pypdf.generic._appearance_stream module."""

from pypdf.generic._appearance_stream import BaseStreamConfig, TextStreamAppearance


def test_comb():
    layout=BaseStreamConfig(rectangle=(0.0, 0.0, 197.285, 18.455))
    font_size = 10.0
    text = "01234567"
    max_length = 10
    is_comb = True
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_comb=is_comb, max_length=max_length
    )
    assert appearance_stream.get_data() == (
        b"q\n/Tx BMC \nq\n2 1 193.285 16.455 re\nW\nBT\n/Helv 10.0 Tf 0 g\n"
        b"7.084250000000001 5.637499999999999 Td\n(0) Tj\n"
        b"19.7285 0.0 Td\n(1) Tj\n"
        b"19.728500000000004 0.0 Td\n(2) Tj\n"
        b"19.728499999999997 0.0 Td\n(3) Tj\n"
        b"19.728499999999997 0.0 Td\n(4) Tj\n"
        b"19.728499999999997 0.0 Td\n(5) Tj\n"
        b"19.72850000000001 0.0 Td\n(6) Tj\n"
        b"19.728499999999997 0.0 Td\n(7) Tj\nET\nQ\nEMC\nQ\n"
    )

    layout.rectangle = (0.0, 0.0, 20.852, 20.84)
    text = "AA"
    max_length = 1
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_comb=is_comb, max_length=max_length
    )
    assert appearance_stream.get_data() == (
        b"q\n/Tx BMC \nq\n2 1 16.852 18.84 re\nW\nBT\n/Helv 10.0 Tf 0 g\n7.091 6.83 Td\n(A) Tj\nET\nQ\nEMC\nQ\n"
    )


def test_scale_text():
    layout=BaseStreamConfig(rectangle=(0, 0, 9.1, 55.4))
    font_size = 10.1
    text = "Hello World"
    is_multiline = False
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    assert b"10.1 Tf" in appearance_stream.get_data()

    text = "This is a very very long sentence that probably will scale below the minimum font size"
    font_size = 0.0
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    assert b"4.0 Tf" in appearance_stream.get_data()

    layout.rectangle = (0, 0, 160, 360)
    font_size = 0.0
    text = """Welcome to pypdf
pypdf is a free and open source pure-python PDF library capable of splitting, merging, cropping, and
transforming the pages of PDF files. It can also add custom data, viewing options, and passwords to PDF
files. pypdf can retrieve text and metadata from PDFs as well.

See pdfly for a CLI application that uses pypdf to interact with PDFs.
    """
    is_multiline = True
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    assert b"12 Tf" in appearance_stream.get_data()
    assert b"pypdf is a free and open" in appearance_stream.get_data()

    layout.rectangle = (0, 0, 160, 160)
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    assert b"9.8 Tf" in appearance_stream.get_data()

    layout.rectangle = (0, 0, 160, 12)
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    text = """Option A
Option B
Option C
Option D
"""
    selection = "Option A"
    assert b"4.0 Tf" in appearance_stream.get_data()

    text = "pneumonoultramicroscopicsilicovolcanoconiosis"
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, selection=selection, font_size=font_size, is_multiline=is_multiline
    )
    assert b"7.3 Tf" in appearance_stream.get_data()

    layout.rectangle = (0, 0, 10, 100)
    text = "OneWord"
    appearance_stream = TextStreamAppearance(
        layout=layout, text=text, font_size=font_size, is_multiline=is_multiline
    )
    assert b"OneWord" in appearance_stream.get_data()


================================================
FILE: tests/test_cmap.py
================================================
"""Test the pypdf_cmap module."""
from io import BytesIO

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf._cmap import get_encoding, parse_bfchar, parse_bfrange
from pypdf._codecs import charset_encoding
from pypdf._font import Font
from pypdf.errors import LimitReachedError
from pypdf.generic import ArrayObject, DictionaryObject, IndirectObject, NameObject, NullObject, StreamObject

from . import RESOURCE_ROOT, get_data_from_url


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("url", "name", "strict"),
    [
        # compute_space_width:
        (
            None,
            "tika-923406.pdf",
            False,
        ),
        # _parse_to_unicode_process_rg:
        (
            None,
            "tika-959173.pdf",
            False,
        ),
        (
            None,
            "tika-959173.pdf",
            True,
        ),
        # issue #1718:
        (
            None,
            "iss1718.pdf",
            False,
        ),
    ],
)
def test_text_extraction_slow(caplog, url: str, name: str, strict: bool):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=strict)
    for page in reader.pages:
        page.extract_text()
    assert caplog.text == ""


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "strict"),
    [
        # bfchar_on_2_chars: issue #1293
        (
            None,
            "ASurveyofImageClassificationBasedTechniques.pdf",
            False,
        ),
        # L40, get_font_width_from_default
        (
            None,
            "tika-908104.pdf",
            False,
        ),
        # multiline_bfrange / regression test for issue #1285:
        (
            None,
            "The%20lean%20times%20in%20the%20Peruvian%20economy.pdf",
            False,
        ),
        (
            None,
            "Giacalone.pdf",
            False,
        ),
    ],
)
def test_text_extraction_fast(caplog, url: str, name: str, strict: bool):
    """Text extraction runs without exceptions or warnings"""
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=strict)
    for page in reader.pages:
        page.extract_text()
    assert caplog.text == ""


@pytest.mark.enable_socket
def test_parse_encoding_advanced_encoding_not_implemented(caplog):
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-957144.pdf")))
    for page in reader.pages:
        page.extract_text()
    # The correctly spelled encoding is /WinAnsiEncoding
    assert "Advanced encoding /WinAnsEncoding not implemented yet" in caplog.text


@pytest.mark.enable_socket
def test_ascii_charset():
    # Issue #1312
    reader = PdfReader(BytesIO(get_data_from_url(name="ascii charset.pdf")))
    assert "/a" not in reader.pages[0].extract_text()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "page_nb", "within_text"),
    [
        (
            None,
            "cmap1370.pdf",
            0,
            "",
        ),
        (
            None,
            "02voc.pdf",
            2,
            "Document delineation and character sequence decoding",
        ),
    ],
    ids=["iss1370", "iss1379"],
)
def test_text_extraction_of_specific_pages(
    url: str, name: str, page_nb: int, within_text
):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert within_text in reader.pages[page_nb].extract_text()


@pytest.mark.enable_socket
def test_iss1533():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss1533.pdf")))
    reader.pages[0].extract_text()  # no error
    font = Font.from_font_resource(reader.pages[0]["/Resources"]["/Font"]["/F"])
    assert font.character_map["\x01"] == "Ü"


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "page_index", "within_text", "caplog_text"),
    [
        (
            None,
            "tstUCS2.pdf",
            1,
            ["2 / 12", "S0490520090001", "于博"],
            "",
        ),
        (
            None,
            "tst-GBK_EUC.pdf",
            0,
            ["NJA", "中华男科学杂志"],
            "Multiple definitions in dictionary at byte 0x5cb42 for key /MediaBox\n",
        ),
    ],
)
def test_cmap_encodings(caplog, url, name, page_index, within_text, caplog_text):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    extracted = reader.pages[page_index].extract_text()  # no error
    for contained in within_text:
        assert contained in extracted
    assert caplog_text in caplog.text


@pytest.mark.enable_socket
def test_latex():
    reader = PdfReader(BytesIO(get_data_from_url(name="math_latex.pdf")))
    txt = reader.pages[0].extract_text()  # no error
    for pat in ("α", "β", "γ", "ϕ", "φ", "ℏ", "∫", "∂", "·", "×"):
        assert pat in txt
    # actually the ϕ and φ seems to be crossed in latex


@pytest.mark.enable_socket
def test_unixxx_glyphs():
    reader = PdfReader(BytesIO(get_data_from_url(name="unixxx_glyphs.pdf")))
    txt = reader.pages[0].extract_text()  # no error
    for pat in ("闫耀庭", "龚龑", "张江水", "1′′.2"):
        assert pat in txt


@pytest.mark.enable_socket
def test_cmap_compute_space_width():
    # issue 2137
    # original file URL:
    # url = "https://arxiv.org/pdf/2005.05909.pdf"
    # URL from github issue is too long to pass code type check, use original arxiv URL instead
    # url = "https://github.com/py-pdf/pypdf/files/12489914/Morris.et.al.-.2020.-.TextAttack.A.Framework.for.Adversarial.Attacks.Data.Augmentation.and.Adversarial.Training.in.NLP.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(name="TextAttack_paper.pdf")))
    reader.pages[0].extract_text()  # no error


@pytest.mark.enable_socket
def test_tabs_in_cmap():
    """Issue #2173"""
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2173.pdf")))
    reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_ignoring_non_put_entries():
    """Issue #2290"""
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf")))
    reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_eten_b5():
    """Issue #2356"""
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2290.pdf")))
    reader.pages[0].extract_text().startswith("1/7 \n富邦新終身壽險")


def test_missing_entries_in_cmap():
    """
    Issue #2702: this issue is observed on damaged pdfs
    use of this file in test has been discarded as too slow/long
    we will create the same error from crazyones
    """
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    p = reader.pages[0]
    p["/Resources"]["/Font"]["/F1"][NameObject("/ToUnicode")] = IndirectObject(
        99999999, 0, reader
    )
    p.extract_text()


def test_null_missing_width():
    """For coverage of #2792"""
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf")
    page = writer.pages[0]
    ft = page["/Resources"]["/Font"]["/F1"]
    ft[NameObject("/Widths")] = ArrayObject()
    ft["/FontDescriptor"][NameObject("/MissingWidth")] = NullObject()
    page.extract_text()


@pytest.mark.enable_socket
def test_unigb_utf16():
    """Cf #2812"""
    url = (
        "https://github.com/user-attachments/files/16767536/W020240105322424121296.pdf"
    )
    name = "iss2812.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert "《中国能源展望 2060（2024 年版）》编写委员会" in reader.pages[1].extract_text()


@pytest.mark.enable_socket
def test_too_many_differences():
    """Cf #2836"""
    url = (
        "https://github.com/user-attachments/files/16911741/dumb_extract_text_crash.pdf"
    )
    name = "iss2836.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.pages[0].extract_text() == ""


@pytest.mark.enable_socket
def test_iss2925():
    url = (
        "https://github.com/user-attachments/files/17621508/2305.09315.pdf"
    )
    name = "iss2925.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert "slicing on the PDG to extract the relevant contextual" in reader.pages[3].extract_text()


@pytest.mark.enable_socket
def test_iss2966():
    """Regression test for issue #2966: indirect objects in fonts"""
    url = (
        "https://github.com/user-attachments/files/17904233/repro_out.pdf"
    )
    name = "iss2966.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert "Lorem ipsum dolor sit amet" in reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_binascii_odd_length_string(caplog):
    """Tests for #2216"""
    url = "https://github.com/user-attachments/files/18199642/iss2216.pdf"
    name = "iss2216.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[0]
    assert "\n(Many other theorems may\n" in page.extract_text()
    assert "Skipping broken line b'143f   143f   10300': Odd-length string\n" in caplog.text


@pytest.mark.enable_socket
def test_standard_encoding(caplog):
    """Tests for #3156"""
    url = "https://github.com/user-attachments/files/18983503/standard-encoding.pdf"
    name = "issue3156.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[0]
    assert page.extract_text() == "Lorem ipsum"
    assert "Advanced encoding" not in caplog.text


@pytest.mark.enable_socket
def test_function_in_font_widths(caplog):
    """Tests for #3153"""
    url = "https://github.com/user-attachments/files/18945709/Marseille_pypdf_level_0.2._compressed.pdf"
    name = "issue3153.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[455]
    assert "La vulnérabilité correspond aux conséquences potentielles" in page.extract_text()
    assert "Expected numeric value for width, got {'/Bounds': [0.25, 0.25]," in caplog.text


def test_get_encoding__encoding_value_is_none():
    ft = DictionaryObject()
    ft[NameObject("/Encoding")] = NullObject()
    assert get_encoding(ft) == (
        dict(zip(range(256), charset_encoding["/StandardEncoding"])),
        {}
    )


def test_parse_bfchar(caplog):
    map_dict = {}
    int_entry = []
    parse_bfchar(line=b"057e   1337", map_dict=map_dict, int_entry=int_entry)
    parse_bfchar(line=b"056e   1f310", map_dict=map_dict, int_entry=int_entry)

    assert map_dict == {-1: 2, "ծ": "", "վ": "ጷ"}
    assert int_entry == [1406, 1390]
    assert caplog.messages == ["Got invalid hex string: Odd-length string (b'1f310')"]


def test_parse_bfrange__iteration_limit():
    writer = PdfWriter()

    to_unicode = StreamObject()
    to_unicode.set_data(
        b"beginbfrange\n"
        b"<00000000> <001FFFFF> <00000000>\n"
        b"endbfrange\n"
    )
    font = writer._add_object(DictionaryObject({
        NameObject("/Type"): NameObject("/Font"),
        NameObject("/Subtype"): NameObject("/Type1"),
        NameObject("/BaseFont"): NameObject("/Helvetica"),
        NameObject("/ToUnicode"): to_unicode,
    }))

    page = writer.add_blank_page(width=100, height=100)
    page[NameObject("/Resources")] = DictionaryObject({
        NameObject("/Font"): DictionaryObject({
            NameObject("/F1"): font.indirect_reference,
        })
    })

    # Case without list, exceeding list directly.
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 2097152 > 100000\.$"
    ):
        _ = page.extract_text()

    # Use a pre-filled dummy list to simulate multiple calls where the upper bound does
    # not overflow, but the overall size does. Case without list.
    int_entry = [0] * 99_999
    map_dict = {}
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 165535 > 100000\.$"
    ):
        _ = parse_bfrange(line=b"0000 FFFF 0000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
    assert map_dict == {-1: 2}

    # Exceeding from previous call.
    int_entry.append(1)
    map_dict = {}
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    ):
        _ = parse_bfrange(line=b"00000000 00000000 00000000", map_dict=map_dict, int_entry=int_entry, multiline_rg=None)
    assert map_dict == {-1: 4}

    # multiline_rg
    int_entry = [0] * 99_995
    map_dict = {-1: 1}
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    ):
        _ = parse_bfrange(
            line=b"0020  0021  0022  0023  0024  0025  0026  2019",
            map_dict=map_dict, int_entry=int_entry, multiline_rg=(32, 251)
        )
    assert map_dict == {-1: 1, " ": " ", "!": "!", '"': '"', "#": "#", "$": "$"}

    # No multiline_rg, but list.
    int_entry = [0] * 99_995
    map_dict = {}
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100001 > 100000\.$"
    ):
        _ = parse_bfrange(
            line=b"01 8A [ FFFD FFFD FFFD FFFF FFAB AAAA BBBB",
            map_dict=map_dict, int_entry=int_entry, multiline_rg=None
        )
    assert map_dict == {-1: 1, "\x01": "�", "\x02": "�", "\x03": "�", "\x04": "\uffff", "\x05": "ﾫ"}


def test_parse_bfchar__iteration_limit():
    int_entry = [0] * 99_995
    map_dict = {}
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum /ToUnicode size limit reached: 100002 > 100000\.$"
    ):
        parse_bfchar(
            line=b"0003   0020   0008   0025   0009   0026   000A   0027   000B   0028   000C   0029   000D   002A",
            map_dict=map_dict, int_entry=int_entry,
        )
    assert map_dict == {}


================================================
FILE: tests/test_codecs.py
================================================
"""Test LZW-related code."""
from io import BytesIO

import pytest

from pypdf import PdfReader
from pypdf._codecs._codecs import LzwCodec
from pypdf.errors import LimitReachedError

from . import RESOURCE_ROOT, get_data_from_url

test_cases = [
    pytest.param(b"", id="Empty input"),
    pytest.param(b"A", id="Single character"),
    pytest.param(b"AAAAAA", id="Repeating character"),
    pytest.param(b"Hello, World!", id="Simple text"),
    pytest.param(b"ABABABABABAB", id="Repeating pattern"),
    pytest.param(b"The quick brown fox jumps over the lazy dog", id="Longer text"),
    pytest.param(b"\x00\xFF\x00\xFF", id="Binary data"),
    pytest.param(
        b"BBBCBDBEBFBGBHBIBJBKBLBMBNBOBPBQBRBSBTBUBVBWBXBYBZB[B\\B]B^B_B`BaBbBcBdBeBfBgBhBiBjBkBlBmBnBoBpBqBrBsBtBuBvBwBxByCBCCCDCECFCGCHCICJCKCLCMCNCOCPCQCRCSCTCUCVCWCXCYCZC[C\\C]C^C_C`CaCbCcCdCeCfCgChCiCjCkClCmCnCoCpCqCrCsCtCuCvCwCxCyDBDCDDDEDFDGDHDIDJDKDLDMDNDODPDQDRDSDTDUDVDWDXDYDZD[D\\D]D^D_D`DaDbDcDdDeDfDgDhDiDjDkDlDmDnDoDpDqDrDsDtDuDvDwDxDyEBECEDEEEFEGEHEIEJEKELEMENEOEPEQERESETEUEVEWEXEYEZE[E\\E]E^E_E`EaEbEcEdEeEfEgEhEiEjEkElEmEnEoEpEqErEsEtEuEvEwExEyFBFCFDFEFFFGFHFIFJFKFLFMFNFOFPFQFRFSFTFUFVFWFXFYFZF[F\\F]F^F_F`FaFbFcFdFeFfFgFhFiFjFkFlFmFnFoFpFqFrFsFtFuFvFwFxFyGBGCGDGEGFGGGHGIGJGKGLGMGNGOGPGQGRGSGTGUGVGWGXGYGZG[G\\G]G^G_G`GaGbGcGdGeGfGgGhGiGjGkGlGmGnGoGpGqGrGsGtGuGvGwGxGyHBHCHDHEHFHGHHHIHJHKHLHMHNHOHPHQHRHSHTHUHVHWHXHYHZH[H\\H]H^H_H`HaHbHcHdHeHfHgHhHiHjHkHlHmHnHoHpHqHrHsHtHuHvHwHxHyIBICIDIEIFIGIHIIIJIKILIMINIOIPIQIRISITIUIVIWIXIYIZI[I\\I]I^I_I`IaIbIcIdIeIfIgIhIiIjIkIlImInIoIpIqIrIsItIuIvIwIxIyJBJCJDJEJFJGJHJIJJJKJLJMJNJOJPJQJRJSJTJUJVJWJXJYJZJ[J\\J]J^J_J`JaJbJcJdJeJfJgJhJiJjJkJlJmJnJoJpJqJrJsJtJuJvJwJxJyKBKCKDKEKFKGKHKIKJKKKLKMKNKOKPKQKRKSKTKUKVKWKXKYKZK[K\\K]K^K_K`KaKbKcKdKeKfKgKhKiKjKkKlKmKnKoKpKqKrKsKtKuKvKwKxKyLBLCLDLELFLGLHLILJLKLLLMLNLOLPLQLRLSLTLULVLWLXLYLZL[L\\L]L^L_L`LaLbLcLdLeLfLgLhLiLjLkLlLmLnLoLpLqLrLsLtLuLvLwLxLyMBMCMDMEMFMGMHMIMJMKMLMMMNMOMPMQMRMSMTMUMVMWMXMYMZM[M\\M]M^M_M`MaMbMcMdMeMfMgMhMiMjMkMlMmMnMoMpMqMrMsMtMuMvMwMxMyNBNCNDNENFNGNHNINJNKNLNMNNNONPNQNRNSNTNUNVNWNXNYNZN[N\\N]N^N_N`NaNbNcNdNeNfNgNhNiNjNkNlNmNnNoNpNqNrNsNtNuNvNwNxNyOBOCODOEOFOGOHOIOJOKOLOMONOOOPOQOROSOTOUOVOWOXOYOZO[O\\O]O^O_O`OaObOcOdOeOfOgOhOiOjOkOlOmOnOoOpOqOrOsOtOuOvOwOxOyPBPCPDPEPFPGPHPIPJPKPLPMPNPOPPPQPRPSPTPUPVPWPXPYPZP[P\\P]P^P_P`PaPbPcPdPePfPgPhPiPjPkPlPmPnPoPpPqPrPsPtPuPvPwPxPyQBQCQDQEQFQGQHQIQJQKQLQMQNQOQPQQQRQSQTQUQVQWQXQYQZQ[Q\\Q]Q^Q_Q`QaQbQcQdQeQfQgQhQiQjQkQlQmQnQoQpQqQrQsQtQuQvQwQxQyRBRCRDRERFRGRHRIRJRKRLRMRNRORPRQRRRSRTRURVRWRXRYRZR[R\\R]R^R_R`RaRbRcRdReRfRgRhRiRjRkRlRmRnRoRpRqRrRsRtRuRvRwRxRySBSCSDSESFSGSHSISJSKSLSMSNSOSPSQSRSSSTSUSVSWSXSYSZS[S\\S]S^S_S`SaSbScSdSeSfSgShSiSjSkSlSmSnSoSpSqSrSsStSuSvSwSxSyTBTCTDTETFTGTHTITJTKTLTMTNTOTPTQTRTSTTTUTVTWTXTYTZT[T\\T]T^T_T`TaTbTcTdTeTfTgThTiTjTkTlTmTnToTpTqTrTsTtTuTvTwTxTyUBUCUDUEUFUGUHUIUJUKULUMUNUOUPUQURUSUTUUUVUWUXUYUZU[U\\U]U^U_U`UaUbUcUdUeUfUgUhUiUjUkUlUmUnUoUpUqUrUsUtUuUvUwUxUyVBVCVDVEVFVGVHVIVJVKVLVMVNVOVPVQVRVSVTVUVVVWVXVYVZV[V\\V]V^V_V`VaVbVcVdVeVfVgVhViVjVkVlVmVnVoVpVqVrVsVtVuVvVwVxVyWBWCWDWEWFWGWHWIWJWKWLWMWNWOWPWQWRWSWTWUWVWWWXWYWZW[W\\W]W^W_W`WaWbWcWdWeWfWgWhWiWjWkWlWmWnWoWpWqWrWsWtWuWvWwWxWyXBXCXDXEXFXGXHXIXJXKXLXMXNXOXPXQXRXSXTXUXVXWXXXYXZX[X\\X]X^X_X`XaXbXcXdXeXfXgXhXiXjXkXlXmXnXoXpXqXrXsXtXuXvXwXxXyYBYCYDYEYFYGYHYIYJYKYLYMYNYOYPYQYRYSYTYUYVYWYXYYYZY[Y\\Y]Y^Y_Y`YaYbYcYdYeYfYgYhYiYjYkYlYmYnYoYpYqYrYsYtYuYvYwYxYyZBZCZDZEZFZGZHZIZJZKZLZMZNZOZPZQZRZSZTZUZVZWZXZYZZZ[Z\\Z]Z^Z_Z`ZaZbZcZdZeZfZgZhZiZjZkZlZmZnZoZpZqZrZsZtZuZvZwZxZy[B[C[D[E[F[G[H[I[J[K[L[M[N[O[P[Q[R[S[T[U[V[W[X[Y[Z[[[\\[][^[_[`[a[b[c[d[e[f[g[h[i[j[k[l[m[n[o[p[q[r[s[t[u[v[w[x[y\\B\\C\\D\\E\\F\\G\\H\\I\\J\\K\\L\\M\\N\\O\\P\\Q\\R\\S\\T\\U\\V\\W\\X\\Y\\Z\\[\\\\\\]\\^\\_\\`\\a\\b\\c\\d\\e\\f\\g\\h\\i\\j\\k\\l\\m\\n\\o\\p\\q\\r\\s\\t\\u\\v\\w\\x\\y]B]C]D]E]F]G]H]I]J]K]L]M]N]O]P]Q]R]S]T]U]V]W]X]Y]Z][]\\]]]^]_]`]a]b]c]d]e]f]g]h]i]j]k]l]m]n]o]p]q]r]s]t]u]v]w]x]y^B^C^D^E^F^G^H^I^J^K^L^M^N^O^P^Q^R^S^T^U^V^W^X^Y^Z^[^\\^]^^^_^`^a^b^c^d^e^f^g^h^i^j^k^l^m^n^o^p^q^r^s^t^u^v^w^x^y_B_C_D_E_F_G_H_I_J_K_L_M_N_O_P_Q_R_S_T_U_V_W_X_Y_Z_[_\\_]_^___`_a_b_c_d_e_f_g_h_i_j_k_l_m_n_o_p_q_r_s_t_u_v_w_x_y`B`C`D`E`F`G`H`I`J`K`L`M`N`O`P`Q`R`S`T`U`V`W`X`Y`Z`[`\\`]`^`_```a`b`c`d`e`f`g`h`i`j`k`l`m`n`o`p`q`r`s`t`u`v`w`x`yaBaCaDaEaFaGaHaIaJaKaLaMaNaOaPaQaRaSaTaUaVaWaXaYaZa[a\\a]a^a_a`aaabacadaeafagahaiajakalamanaoapaqarasatauavawaxaybBbCbDbEbFbGbHbIbJbKbLbMbNbObPbQbRbSbTbUbVbWbXbYbZb[b\\b]b^b_b`babbbcbdbebfbgbhbibjbkblbmbnbobpbqbrbsbtbubvbwbxbycBcCcDcEcFcGcHcIcJcKcLcMcNcOcPcQcRcScTcUcVcWcXcYcZc[c\\c]c^c_c`cacbcccdcecfcgchcicjckclcmcncocpcqcrcsctcucvcwcxcydBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdyeBeCeDeEeFeGeHeIeJeKeLeMeNeOePeQeReSeTeUeVeWeXeYeZe[e\\e]e^e_e`eaebecedeeefegeheiejekelemeneoepeqereseteuevewexeyfBfCfDfEfFfGfHfIfJfKfLfMfNfOfPfQfRfSfTfUfVfWfXfYfZf[f\\f]f^f_f`fafbfcfdfefffgfhfifjfkflfmfnfofpfqfrfsftfufvfwfxfygBgCgDgEgFgGgHgIgJgKgLgMgNgOgPgQgRgSgTgUgVgWgXgYgZg[g\\g]g^g_g`gagbgcgdgegfggghgigjgkglgmgngogpgqgrgsgtgugvgwgxgyhBhChDhEhFhGhHhIhJhKhLhMhNhOhPhQhRhShThUhVhWhXhYhZh[h\\h]h^h_h`hahbhchdhehfhghhhihjhkhlhmhnhohphqhrhshthuhvhwhxhyiBiCiDiEiFiGiHiIiJiKiLiMiNiOiPiQiRiSiTiUiViWiXiYiZi[i\\i]i^i_i`iaibicidieifigihiiijikiliminioipiqirisitiuiviwixiyjBjCjDjEjFjGjHjIjJjKjLjMjNjOjPjQjRjSjTjUjVjWjXjYjZj[j\\j]j^j_j`jajbjcjdjejfjgjhjijjjkjljmjnjojpjqjrjsjtjujvjwjxjykBkCkDkEkFkGkHkIkJkKkLkMkNkOkPkQkRkSkTkUkVkWkXkYkZk[k\\k]k^k_k`kakbkckdkekfkgkhkikjkkklkmknkokpkqkrksktkukvkwkxkylBlClDlElFlGlHlIlJlKlLlMlNlOlPlQlRlSlTlUlVlWlXlYlZl[l\\l]l^l_l`lalblcldlelflglhliljlklllmlnlolplqlrlsltlulvlwlxlymBmCmDmEmFmGmHmImJmKmLmMmNmOmPmQmRmSmTmUmVmWmXmYmZm[m\\m]m^m_m`mambmcmdmemfmgmhmimjmkmlmmmnmompmqmrmsmtmumvmwmxmynBnCnDnEnFnGnHnInJnKnLnMnNnOnPnQnRnSnTnUnVnWnXnYnZn[n\\n]n^n_n`nanbncndnenfngnhninjnknlnmnnnonpnqnrnsntnunvnwnxnyoBoCoDoEoFoGoHoIoJoKoLoMoNoOoPoQoRoSoToUoVoWoXoYoZo[o\\o]o^o_o`oaobocodoeofogohoiojokolomonooopoqorosotouovowoxoypBpCpDpEpFpGpHpIpJpKpLpMpNpOpPpQpRpSpTpUpVpWpXpYpZp[p\\p]p^p_p`papbpcpdpepfpgphpipjpkplpmpnpopppqprpsptpupvpwpxpyqBqCqDqEqFqGqHqIqJqKqLqMqNqOqPqQqRqSqTqUqVqWqXqYqZq[q\\q]q^q_q`qaqbqcqdqeqfqgqhqiqjqkqlqmqnqoqpqqqrqsqtquqvqwqxqyrBrCrDrErFrGrHrIrJrKrLrMrNrOrPrQrRrSrTrUrVrWrXrYrZr[r\\r]r^r_r`rarbrcrdrerfrgrhrirjrkrlrmrnrorprqrrrsrtrurvrwrxrysBsCsDsEsFsGsHsIsJsKsLsMsNsOsPsQsRsSsTsUsVsWsXsYsZs[s\\s]s^s_s`sasbscsdsesfsgshsisjskslsmsnsospsqsrssstsusvswsxsytBtCtDtEtFtGtHtItJtKtLtMtNtOtPtQtRtStTtUtVtWtXtYtZt[t\\t]t^t_t`tatbtctdtetftgthtitjtktltmtntotptqtrtstttutvtwtxtyuBuCuDuEuFuGuHuIuJuKuLuMuNuOuPuQuRuSuTuUuVuWuXuYuZu[u\\u]u^u_u`uaubucudueufuguhuiujukulumunuoupuqurusutuuuvuwuxuyvBvCvDvEvFvGvHvIvJvKvLvMvNvOvPvQvRvSvTvUvVvWvXvYvZv[v\\v]v^v_v`vavbvcvdvevfvgvhvivjvkvlvmvnvovpvqvrvsvtvuvvvwvxvywBwCwDwEwFwGwHwIwJwKwLwMwNwOwPwQwRwSwTwUwVwWwXwYwZw[w\\w]w^w_w`wawbwcwdwewfwgwhwiwjwkwlwmwnwowpwqwrwswtwuwvwwwxwyxBxCxDxExFxGxHxIxJxKxLxMxNxOxPxQxRxSxTxUxVxWxXxYxZx[x\\x]x^x_x`xaxbxcxdxexfxgxhxixjxkxlxmxnxoxpxqxrxsxtxuxvxwxxxyyByCyDyEyFyGyHyIyJyKyLyMyNyOyPyQyRySyTyUyVyWyXyYyZy[y\\y]y^y_y`yaybycydyeyfygyhyiyjykylymynyoypyqyrysytyuyvywyxyyBBBBBCBBDBBEBBFBBGBBHBBIBBJBBKBBLBBMBBNBBOBBPBBQBBRBBSBBTBBUBBVBBWBBXBBYBBZBB[BB\\BB]BB^BB_BB`BBaBBbBBcBBdBBeBBfBBgBBhBBiBBjBBkBBlBBmBBnBBoBBpBBqBBrBBsBBtBBuBBvBBwBBxBByBCBBCCBCDBCEBCFBCGBCHBCIBCJBCKBCLBCMBCNBCOBCPBCQBCRBCSBCTBCUBCVBCWBCXBCYBCZBC[BC\\BC]BC^BC_BC`BCaBCbBCcBCdBCeBCfBCgBChBCiBCjBCkBClBCmBCnBCoBCpBCqBCrBCsBCtBCuBCvBCwBCxBCyBDBBDCBDDBDEBDFBDGBDHBDIBDJBDKBDLBDMBDNBDOBDPBDQBDRBDSBDTBDUBDVBDWBDXBDYBDZBD[BD\\BD]BD^BD_BD`BDaBDbBDcBDdBDeBDfBDgBDhBDiBDjBDkBDlBDmBDnBDoBDpBDqBDrBDsBDtBDuBDvBDwBDxBDyBEBBECBEDBEEBEFBEGBEHBEIBEJBEKBELBEMBENBEOBEPBEQBERBESBETBEUBEVBEWBEXBEYBEZBE[BE\\BE]BE^BE_BE`BEaBEbBEcBEdBEeBEfBEgBEhBEiBEjBEkBElBEmBEnBEoBEpBEqBErBEsBEtBEuBEvBEwBExBEyBFBBFCBFDBFEBFFBFGBFHBFIBFJBFKBFLBFMBFNBFOBFPBFQBFRBFSBFTBFUBFVBFWBFXBFYBFZBF[BF\\BF]BF^BF_BF`BFaBFbBFcBFdBFeBFfBFgBFhBFiBFjBFkBFlBFmBFnBFoBFpBFqBFrBFsBFtBFuBFvBFwBFxBFyBGBBGCBGDBGEBGFBGGBGHBGIBGJBGKBGLBGMBGNBGOBGPBGQBGRBGSBGTBGUBGVBGWBGXBGYBGZBG[BG\\BG]BG^BG_BG`BGaBGbBGcBGdBGeBGfBGgBGhBGiBGjBGkBGlBGmBGnBGoBGpBGqBGrBGsBGtBGuBGvBGwBGxBGyBHBBHCBHDBHEBHFBHGBHHBHIBHJBHKBHLBHMBHNBHOBHPBHQBHRBHSBHTBHUBHVBHWBHXBHYBHZBH[BH\\BH]BH^BH_BH`BHaBHbBHcBHdBHeBHfBHgBHhBHiBHjBHkBHlBHmBHnBHoBHpBHqBHrBHsBHtBHuBHvBHwBHxBHyBIBBICBIDBIEBIFBIGBIHBIIBIJBIKBILBIMBINBIOBIPBIQBIRBISBITBIUBIVBIWBIXBIYBIZBI[BI\\BI]BI^BI_BI`BIaBIbBIcBIdBIeBIfBIgBIhBIiBIjBIkBIlBImBInBIoBIpBIqBIrBIsBItBIuBIvBIwBIxBIyBJBBJCBJDBJEBJFBJGBJHBJIBJJBJKBJLBJMBJNBJOBJPBJQBJRBJSBJTBJUBJVBJWBJXBJYBJZBJ[BJ\\BJ]BJ^BJ_BJ`BJaBJbBJcBJdBJeBJfBJgBJhBJiBJjBJkBJlBJmBJnBJoBJpBJqBJrBJsBJtBJuBJvBJwBJxBJyBKBBKCBKDBKEBKFBKGBKHBKIBKJBKKBKLBKMBKNBKOBKPBKQBKRBKSBKTBKUBKVBKWBKXBKYBKZBK[BK\\BK]BK^BK_BK`BKaBKbBKcBKdBKeBKfBKgBKhBKiBKjBKkBKlBKmBKnBKoBKpBKqBKrBKsBKtBKuBKvBKwBKxBKyBLBBLCBLDBLEBLFBLGBLHBLIBLJBLKBLLBLMBLNBLOBLPBLQBLRBLSBLTBLUBLVBLWBLXBLYBLZBL[BL\\BL]BL^BL_BL`BLaBLbBLcBLdBLeBLfBLgBLhBLiBLjBLkBLlBLmBLnBLoBLpBLqBLrBLsBLtBLuBLvBLwBLxBLyBMBBMCBMDBMEBMFBMGBMHBMIBMJBMKBMLBMMBMNBMOBMPBMQBMRBMSBMTBMUBMVBMWBMXBMYBMZBM[BM\\BM]BM^BM_BM`BMaBMbBMcBMdBMeBMfBMgBMhBMiBMjBMkBMlBMmBMnBMoBMpBMqBMrBMsBMtBMuBMvBMwBMxBMyBNBBNCBNDBNEBNFBNGBNHBNIBNJBNKBNLBNMBNNBNOBNPBNQBNRBNSBNTBNUBNVBNWBNXBNYBNZBN[BN\\BN]BN^BN_",
        id="Table overflow",
    ),
]


@pytest.mark.parametrize("data", test_cases)
def test_encode_decode(data):
    """Decoder and encoder match."""
    codec = LzwCodec()
    compressed_data = codec.encode(data)
    decoded = codec.decode(compressed_data)
    assert decoded == data


@pytest.mark.parametrize(
    ("plain", "expected_encoded"),
    [
        (b"", b"\x80@@"),
        (b"A", b"\x80\x10` "),
        (b"AAAAAA", b"\x80\x10`P8\x08"),
        (b"Hello, World!", b"\x80\x12\x0c\xa6\xc3a\xbcX +\x9b\xceF\xc3 \x86\x02"),
    ],
)
def test_encode_lzw(plain, expected_encoded):
    codec = LzwCodec()
    actual_encoded = codec.encode(plain)
    assert actual_encoded == expected_encoded


@pytest.mark.parametrize(
    ("encoded", "expected_decoded"),
    [
        # _pack_codes_into_bytes([256, 65, 66, 67, 68, 256, 256, 69, 70, 71, 72, 257])
        (b"\x80\x10HD2$\x02\x00E#\x11\xc9\x10\x10", b"ABCDEFGH"),  # Clear twice.
        # _pack_codes_into_bytes([65, 66, 67, 68, 257])
        (b" \x90\x88dH\x08", b"ABCD"),  # No explicit initial clear marker.
    ],
)
def test_decode_lzw(encoded, expected_decoded):
    codec = LzwCodec()
    actual_decoded = codec.decode(encoded)
    assert actual_decoded == expected_decoded


def test_lzw_decoder_table_overflow(caplog):
    path = RESOURCE_ROOT / "lzw_decoder_table_overflow.bin"
    codec = LzwCodec()
    assert codec.decode(path.read_bytes()).startswith(
        b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@'
    )
    assert len(codec.decoding_table) == 4096
    assert "Ignoring too large LZW table index." in caplog.text


@pytest.mark.enable_socket
@pytest.mark.timeout(timeout=15, method="thread")
def test_lzw_decoder_large_stream_performance(caplog):
    LzwCodec().decode(get_data_from_url(name="large_lzw_example_encoded.dat"))


@pytest.mark.enable_socket
def test_lzw_decoder__output_limit():
    url = "https://github.com/user-attachments/files/23057035/lzw__output_limit.pdf"
    name = "lzw__output_limit.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]

    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Limit reached while decompressing: 75000828 > 75000000$"
    ):
        page.images[0].image.load()


================================================
FILE: tests/test_constants.py
================================================
"""Test the pypdf.constants module."""
import re
from typing import Callable

import pytest

from pypdf.constants import PDF_KEYS, GraphicsStateParameters, UserAccessPermissions


def test_slash_prefix():
    """
    Naming conventions of PDF_KEYS (constant names) are followed.

    This test function validates if PDF key names follow the required pattern:
    - Starts with a slash "/"
    - Followed by an uppercase letter
    - Contains alphanumeric characters (letters and digits)
    - The attribute name should be a case-insensitive match, with underscores removed
    """
    pattern = re.compile(r"^\/[A-Z]+[a-zA-Z0-9]*$")
    for cls in PDF_KEYS:
        for attr in dir(cls):
            # Skip magic methods
            if attr.startswith("__") and attr.endswith("__"):
                continue

            # Skip methods
            constant_value = getattr(cls, attr)
            if isinstance(constant_value, Callable):
                continue

            assert constant_value.startswith("/")
            assert attr.replace("_", "").casefold() == constant_value[1:].casefold()

            # There are a few exceptions that may be lowercase
            if cls == GraphicsStateParameters and attr in ["ca", "op"]:
                continue

            assert pattern.match(constant_value)


def test_user_access_permissions__dict_handling():
    # Value is mix of configurable and reserved bits.
    # Reserved bits should not be part of the dictionary.
    as_dict = UserAccessPermissions(512 + 64 + 8).to_dict()
    assert as_dict == {
        "add_or_modify": False,
        "assemble_doc": False,
        "extract": False,
        "extract_text_and_graphics": True,
        "fill_form_fields": False,
        "modify": True,
        "print": False,
        "print_to_representation": False,
    }

    # Convert the dictionary back to an integer.
    # This should add the reserved bits automatically.
    permissions = UserAccessPermissions.from_dict(as_dict)
    assert permissions == 4294963912

    # Roundtrip for valid dictionary.
    data = {
        "add_or_modify": True,
        "assemble_doc": False,
        "extract": False,
        "extract_text_and_graphics": True,
        "fill_form_fields": False,
        "modify": True,
        "print": False,
        "print_to_representation": True,
    }
    assert UserAccessPermissions.from_dict(data).to_dict() == data

    # Empty inputs.
    assert UserAccessPermissions.from_dict({}) == 4294963392  # Reserved bits.
    assert UserAccessPermissions(0).to_dict() == {
        "add_or_modify": False,
        "assemble_doc": False,
        "extract": False,
        "extract_text_and_graphics": False,
        "fill_form_fields": False,
        "modify": False,
        "print": False,
        "print_to_representation": False,
    }

    # Unknown dictionary keys.
    data = {
        "add_or_modify": True,
        "key1": False,
        "key2": True,
    }
    unknown = {
        "key1": False,
        "key2": True,
    }
    with pytest.raises(
        ValueError,
        match=f"Unknown dictionary keys: {unknown!r}"
    ):
        UserAccessPermissions.from_dict(data)


def test_user_access_permissions__all():
    all_permissions = UserAccessPermissions.all()
    all_int = int(all_permissions)
    all_string = bin(all_permissions)

    assert all_string.startswith("0b")
    assert len(all_string[2:]) == 32  # 32-bit integer

    assert all_int & UserAccessPermissions.R1 == 0
    assert all_int & UserAccessPermissions.R2 == 0
    assert all_int & UserAccessPermissions.PRINT == UserAccessPermissions.PRINT
    assert all_int & UserAccessPermissions.R7 == UserAccessPermissions.R7
    assert all_int & UserAccessPermissions.R31 == UserAccessPermissions.R31


================================================
FILE: tests/test_doc_common.py
================================================
"""Test the pypdf._doc_common module."""
import itertools
import re
import shutil
import subprocess
from io import BytesIO
from operator import itemgetter
from pathlib import Path
from unittest import mock

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.errors import LimitReachedError, PdfReadError
from pypdf.filters import FlateDecode
from pypdf.generic import (
    ArrayObject,
    DictionaryObject,
    EmbeddedFile,
    EncodedStreamObject,
    NameObject,
    NullObject,
    TextStringObject,
    ViewerPreferences,
)
from tests import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url

PDFATTACH_BINARY = shutil.which("pdfattach")


@pytest.mark.skipif(PDFATTACH_BINARY is None, reason="Requires poppler-utils")
def test_attachments(tmpdir):
    tmpdir = Path(tmpdir)

    # No attachments.
    clean_path = SAMPLE_ROOT / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf"
    with PdfReader(clean_path) as pdf:
        assert pdf._list_attachments() == []
        assert list(pdf.attachment_list) == []

    # UF = name.
    attached_path = tmpdir / "attached.pdf"
    file_path = tmpdir / "test.txt"
    file_path.write_bytes(b"Hello World\n")
    subprocess.run([PDFATTACH_BINARY, clean_path, file_path, attached_path])  # noqa: S603
    with PdfReader(attached_path) as pdf:
        assert pdf._list_attachments() == ["test.txt"]
        assert pdf._get_attachments("test.txt") == {"test.txt": b"Hello World\n"}
        assert [(x.name, x.content) for x in pdf.attachment_list] == [("test.txt", b"Hello World\n")]
        assert next(pdf.attachment_list).alternative_name == "test.txt"

    # UF != name.
    different_path = tmpdir / "different.pdf"
    different_path.write_bytes(re.sub(rb" /UF [^/]+ /", b" /UF(my-file.txt) /", attached_path.read_bytes()))
    with PdfReader(different_path) as pdf:
        assert pdf._list_attachments() == ["test.txt", "my-file.txt"]
        assert pdf._get_attachments("test.txt") == {"test.txt": b"Hello World\n"}
        assert pdf._get_attachments("my-file.txt") == {"my-file.txt": b"Hello World\n"}
        assert [(x.name, x.content) for x in pdf.attachment_list] == [("test.txt", b"Hello World\n")]
        assert next(pdf.attachment_list).alternative_name == "my-file.txt"

    # Only name.
    no_f_path = tmpdir / "no-f.pdf"
    no_f_path.write_bytes(re.sub(rb" /UF [^/]+ /", b" /", attached_path.read_bytes()))
    with PdfReader(no_f_path) as pdf:
        assert pdf._list_attachments() == ["test.txt"]
        assert pdf._get_attachments("test.txt") == {"test.txt": b"Hello World\n"}
        assert [(x.name, x.content) for x in pdf.attachment_list] == [("test.txt", b"Hello World\n")]
        assert next(pdf.attachment_list).alternative_name is None

    # UF and F.
    uf_f_path = tmpdir / "uf-f.pdf"
    uf_f_path.write_bytes(attached_path.read_bytes().replace(b" /UF ", b"/F(file.txt) /UF "))
    with PdfReader(uf_f_path) as pdf:
        assert pdf._list_attachments() == ["test.txt"]
        assert pdf._get_attachments("test.txt") == {"test.txt": b"Hello World\n"}
        assert [(x.name, x.content) for x in pdf.attachment_list] == [("test.txt", b"Hello World\n")]
        assert next(pdf.attachment_list).alternative_name == "test.txt"

    # Only F.
    only_f_path = tmpdir / "f.pdf"
    only_f_path.write_bytes(attached_path.read_bytes().replace(b" /UF ", b" /F "))
    with PdfReader(only_f_path) as pdf:
        assert pdf._list_attachments() == ["test.txt"]
        assert pdf._get_attachments("test.txt") == {"test.txt": b"Hello World\n"}
        assert [(x.name, x.content) for x in pdf.attachment_list] == [("test.txt", b"Hello World\n")]
        assert next(pdf.attachment_list).alternative_name == "test.txt"


def test_get_attachments__same_attachment_more_than_twice():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)
    for i in range(5):
        writer.add_attachment("test.txt", f"content{i}")
    assert writer._get_attachments("test.txt") == {
        "test.txt": [b"content0", b"content1", b"content2", b"content3", b"content4"]
    }
    assert [(x.name, x.content) for x in writer.attachment_list] == [
        ("test.txt", b"content0"),
        ("test.txt", b"content1"),
        ("test.txt", b"content2"),
        ("test.txt", b"content3"),
        ("test.txt", b"content4"),
    ]


def test_get_attachments__alternative_name_is_none():
    writer = PdfWriter()
    attachment = EmbeddedFile(name="test.txt", pdf_object=writer.root_object)
    assert attachment.alternative_name is None
    with mock.patch(
            "pypdf._writer.PdfWriter.attachment_list",
            new_callable=mock.PropertyMock(return_value=[attachment])
    ), mock.patch(
            "pypdf.generic._files.EmbeddedFile.content",
            new_callable=mock.PropertyMock(return_value=b"content")
    ):
        assert writer._get_attachments() == {"test.txt": b"content"}


@pytest.mark.enable_socket
def test_byte_encoded_named_destinations():
    url = "https://github.com/user-attachments/files/19820164/pypdf_issue.pdf"
    name = "issue3261.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    page = reader.pages[0]
    for annotation in page.annotations:
        if annotation.get("/Subtype") == "/Link":
            action = annotation["/A"]
            if action["/S"] == "/GoTo":
                named_dest = action["/D"]
                assert str(named_dest) in reader.named_destinations
                assert TextStringObject(named_dest) in reader.named_destinations

    assert reader.named_destinations == {
        "Doc-Start": {
            "/Title": "Doc-Start",
            "/Page": page.indirect_reference,
            "/Type": "/XYZ",
            "/Left": 133.768,
            "/Top": 667.198,
            "/Zoom": NullObject()
        },
        "cite.dacÃ\xadk2025racerflightweightstaticdata": {
            "/Title": "cite.dacÃ\xadk2025racerflightweightstaticdata",
            "/Page": page.indirect_reference,
            "/Type": "/XYZ",
            "/Left": 133.768,
            "/Top": 614.424,
            "/Zoom": NullObject()
        },
        # This is the same as the previous entry, but with `str(name)` instead of the title.
        "楣整搮捡귃㉫㈰爵捡牥汦杩瑨敷杩瑨瑳瑡捩慤慴": {
            "/Left": 133.768,
            "/Page": page.indirect_reference,
            "/Title": "cite.dacÃ\xadk2025racerflightweightstaticdata",
            "/Top": 614.424,
            "/Type": "/XYZ",
            "/Zoom": NullObject()
        },
        "page.1": {
            "/Title": "page.1",
            "/Page": page.indirect_reference,
            "/Type": "/XYZ",
            "/Left": 132.768,
            "/Top": 705.06,
            "/Zoom": NullObject()
        },
        "section*.1": {
            "/Title": "section*.1",
            "/Page": page.indirect_reference,
            "/Type": "/XYZ",
            "/Left": 133.768,
            "/Top": 642.222,
            "/Zoom": NullObject()
        }
    }


def test_viewer_preferences__indirect_reference():
    input_path = RESOURCE_ROOT / "git.pdf"
    reader = PdfReader(input_path)
    assert (0, 24) not in reader.resolved_objects
    viewer_preferences = reader.viewer_preferences
    assert isinstance(viewer_preferences, ViewerPreferences)
    assert viewer_preferences == {"/DisplayDocTitle": True}
    assert (0, 24) in reader.resolved_objects
    assert id(viewer_preferences) == id(reader.viewer_preferences)
    assert id(viewer_preferences) == id(reader.resolved_objects[(0, 24)])


@pytest.mark.enable_socket
def test_named_destinations__tree_is_null_object():
    url = "https://github.com/user-attachments/files/20885216/test.pdf"
    name = "issue3330.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    assert reader.named_destinations == {}


@pytest.mark.enable_socket
def test_outline__issue3462():
    url = "https://github.com/user-attachments/files/22293402/e371fffe0b_a7cccde95a.pdf"
    name = "issue3462.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    outline_flat = list(
        itertools.chain.from_iterable(
            entry if isinstance(entry, list) else [entry] for entry in reader.outline
        )
    )
    assert list(map(itemgetter("/Title"), outline_flat)) == [
        "AR 2021 - Daftar Isi",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "AR 2021 Book 001 (Highlights - Ikhtisar Saham)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "AR 2021 Book 002 (Laporan Manajemen)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "AR 2021 Book 003-1 (Profil Perusahaan)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "Page 13",
        "Page 14",
        "Page 15",
        "Page 16",
        "Page 17",
        "Page 18",
        "Page 19",
        "Page 20",
        "Page 21",
        "Page 22",
        "Page 23",
        "Page 24",
        "Page 25",
        "Page 26",
        "Page 27",
        "Page 28",
        "Page 29",
        "Page 30",
        "Page 31",
        "Page 32",
        "Page 33",
        "Page 34",
        "Page 35",
        "Page 36",
        "Page 37",
        "Page 38",
        "Page 39",
        "Page 40",
        "Page 41",
        "Page 42",
        "Page 43",
        "Page 44",
        "Page 45",
        "Page 46",
        "Page 47",
        "AR 2021 Book 003-2 (Sumber Daya Manusia)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "AR 2021 Book 003-3 (Komposisi pemegang saham)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "AR 2021 Book 003-4 (Kronologis Pencatatan Saham)",
        "Page 1",
        "Page 2",
        "AR 2021 Book 003-5 (Akuntan Publik Independen)",
        "Page 1",
        "Page 2",
        "Page 3",
        "AR 2021 Book 004 (Analisa dan Pembahasan Manajemen)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "Page 13",
        "Page 14",
        "Page 15",
        "Page 16",
        "Page 17",
        "Page 18",
        "Page 19",
        "Page 20",
        "Page 21",
        "AR 2021 Book 005-1 (Tata Kelola Perusahaan)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "AR 2021 Book 005-2 (Direksi-Komisaris)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "Page 13",
        "Page 14",
        "Page 15",
        "Page 16",
        "Page 17",
        "Page 18",
        "Page 19",
        "Page 20",
        "Page 21",
        "Page 22",
        "Page 23",
        "Page 24",
        "Page 25",
        "Page 26",
        "Page 27",
        "Page 28",
        "Page 29",
        "Page 30",
        "Page 31",
        "Page 32",
        "Page 33",
        "Page 34",
        "Page 35",
        "Page 36",
        "Page 37",
        "Page 38",
        "AR 2021 Book 005-3 (Komite Audit)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "AR 2021 Book 005-4 (Sekretaris Perusahaan)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "AR 2021 Book 005-5 (Unit Audit Internal)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "AR 2021 Book 005-6 (Sistem Pengendalian Internal)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "AR 2021 Book 005-7 (Program Saham)",
        "Page 1",
        "AR 2021 Book 005-8 ( Whistleblowing)",
        "Page 1",
        "Page 2",
        "Page 3",
        "Page 4",
        "Page 5",
        "Page 6",
        "Page 7",
        "Page 8",
        "Page 9",
        "Page 10",
        "Page 11",
        "Page 12",
        "Page 13",
        "Page 14",
        "Page 15",
        "Page 16",
        "Page 17",
        "Page 18",
        "Page 19",
        "Page 20",
        "Page 21",
        "Page 22",
        "Page 23",
        "Page 24",
        "Page 25",
        "AR 2021 Book 006 (Tanggung Jawab Sosial - CSR)",
        "Page 1",
        "Page 2",
        "AR 2021 Book 007-1 (LAPORAN KEUANGAN KONSOLIDASIAN)",
        "Page 1",
        "AR 2021 Book 007-2 (Isi Laporan Keuangan)",
        "AR 2021 Book 008 (Tanggung Jawab Atas Laporan Tahunan)",
        "Page 1",
        "Page 2"
    ]


def test_flatten__cyclic_references():
    path = RESOURCE_ROOT / "crazyones.pdf"

    reader = PdfReader(path)
    assert len(reader.pages) == 1
    reader._flatten()

    # Make the first child point to the object itself.
    pages_object = reader.get_object(10)
    pages_object[NameObject("/Kids")][0].indirect_reference.idnum = 10
    reader.resolved_objects[(10, 0)] = pages_object

    with pytest.raises(expected_exception=PdfReadError, match=r"^Detected cyclic page references\.$"):
        reader._flatten()


@pytest.mark.enable_socket
@pytest.mark.timeout(10)
def test_get_outline__cyclic_references(caplog):
    url = "https://github.com/user-attachments/files/24859044/circular_outline.pdf"
    name = "circular_outline.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))

    assert reader.outline == [
        {
            "/%is_open%": True,
            "/Page": reader.pages[0].indirect_reference,
            "/Title": "Bookmark A",
            "/Type": "/Fit"
        },
        {
            "/%is_open%": True,
            "/Page": reader.pages[0].indirect_reference,
            "/Title": "Bookmark B",
            "/Type": "/Fit"
        }
    ]
    assert caplog.messages[0].startswith("Detected cycle in outline structure for {")


@pytest.mark.enable_socket
@pytest.mark.timeout(10)
def test_get_outline__cyclic_references__nested_handling(caplog):
    url = "https://github.com/user-attachments/files/24859044/circular_outline.pdf"
    name = "circular_outline.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url=url, name=name)))

    nested_outline = DictionaryObject()
    writer._add_object(nested_outline)
    nested_outline.update({
        NameObject("/Title"): TextStringObject("Nested entry"),
        NameObject("/Parent"): writer.get_object(5),
        NameObject("/Dest"): ArrayObject([writer.pages[0].indirect_reference, NameObject("/Fit")]),
        NameObject("/Next"): writer.get_object(6),
    })
    writer.get_object(5)[NameObject("/First")] = nested_outline.indirect_reference
    writer.get_object(6)[NameObject("/First")] = nested_outline.indirect_reference

    assert writer.outline == [
        {
            "/%is_open%": True,
            "/Page": writer.pages[0].indirect_reference,
            "/Title": "Bookmark A",
            "/Type": "/Fit"
        },
        [
            {
                "/%is_open%": True,
                "/Page": writer.pages[0].indirect_reference,
                "/Title": "Nested entry",
                "/Type": "/Fit"
            },
            {
                "/%is_open%": True,
                "/Page": writer.pages[0].indirect_reference,
                "/Title": "Bookmark B",
                "/Type": "/Fit"
            }
        ],
        {
            "/%is_open%": True,
            "/Page": writer.pages[0].indirect_reference,
            "/Title": "Bookmark B",
            "/Type": "/Fit"
        },
        [
            {
                "/%is_open%": True,
                "/Page": writer.pages[0].indirect_reference,
                "/Title": "Nested entry",
                "/Type": "/Fit"
            }
        ]
    ]
    assert caplog.messages[0].startswith("Detected cycle in outline structure for {")


def test_xfa__decompression_limit():
    payload = b"A" * 100_0000
    compressed = FlateDecode.encode(payload, 9)

    writer = PdfWriter()
    writer.add_blank_page(width=72, height=72)

    stream = EncodedStreamObject()
    stream._data = compressed
    stream[NameObject("/Filter")] = NameObject("/FlateDecode")
    stream_reference = writer._add_object(stream)

    acro = DictionaryObject()
    acro[NameObject("/XFA")] = ArrayObject([TextStringObject("datasets"), stream_reference])
    writer.root_object[NameObject("/AcroForm")] = writer._add_object(acro)

    data = BytesIO()
    writer.write(data)
    data.flush()

    reader = PdfReader(data)
    with mock.patch("pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH", 75_000), pytest.raises(
            expected_exception=LimitReachedError, match=r"^Limit reached while decompressing. 902 bytes remaining.$"
    ):
        _ = reader.xfa


================================================
FILE: tests/test_encryption.py
================================================
"""Test the pypdf._encryption module."""
import secrets
from io import BytesIO

import pytest

import pypdf
from pypdf import PasswordType, PdfReader, PdfWriter
from pypdf._crypt_providers import crypt_provider
from pypdf._crypt_providers._fallback import _DEPENDENCY_ERROR_STR
from pypdf._encryption import AlgV5, CryptAES, CryptRC4
from pypdf.errors import DependencyError, PdfReadError
from tests import RESOURCE_ROOT, SAMPLE_ROOT

USE_CRYPTOGRAPHY = crypt_provider[0] == "cryptography"
USE_PYCRYPTODOME = crypt_provider[0] == "pycryptodome"
HAS_AES = USE_CRYPTOGRAPHY or USE_PYCRYPTODOME


@pytest.mark.parametrize(
    ("name", "requires_aes"),
    [
        # unencrypted pdf
        ("unencrypted.pdf", False),
        # created by:
        # qpdf --encrypt "" "" 40 -- unencrypted.pdf r2-empty-password.pdf
        ("r2-empty-password.pdf", False),
        # created by:
        # qpdf --encrypt "" "" 128 -- unencrypted.pdf r3-empty-password.pdf
        ("r3-empty-password.pdf", False),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 40 -- unencrypted.pdf r2-user-password.pdf
        ("r2-user-password.pdf", False),
        # created by:
        # qpdf --encrypt "" "asdfzxcv" 40 -- unencrypted.pdf r2-owner-password.pdf
        ("r2-owner-password.pdf", False),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 128 -- unencrypted.pdf r3-user-password.pdf
        ("r3-user-password.pdf", False),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 128 --force-V4 -- unencrypted.pdf r4-user-password.pdf
        ("r4-user-password.pdf", False),
        # created by:
        # qpdf --encrypt "" "asdfzxcv" 128 --force-V4 -- unencrypted.pdf r4-owner-password.pdf
        ("r4-owner-password.pdf", False),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 128 --use-aes=y -- unencrypted.pdf r4-aes-user-password.pdf
        ("r4-aes-user-password.pdf", True),
        # created by:
        # qpdf --encrypt "" "" 256 --force-R5 -- unencrypted.pdf r5-empty-password.pdf
        ("r5-empty-password.pdf", True),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 256 --force-R5 -- unencrypted.pdf r5-user-password.pdf
        ("r5-user-password.pdf", True),
        # created by:
        # qpdf --encrypt "" "asdfzxcv" 256 --force-R5 -- unencrypted.pdf r5-owner-password.pdf
        ("r5-owner-password.pdf", True),
        # created by:
        # qpdf --encrypt "" "" 256 -- unencrypted.pdf r6-empty-password.pdf
        ("r6-empty-password.pdf", True),
        # created by:
        # qpdf --encrypt "asdfzxcv" "" 256 -- unencrypted.pdf r6-user-password.pdf
        ("r6-user-password.pdf", True),
        # created by:
        # qpdf --encrypt "" "asdfzxcv" 256 -- unencrypted.pdf r6-owner-password.pdf
        ("r6-owner-password.pdf", True),
    ],
)
def test_encryption(name, requires_aes):
    """
    Encrypted PDFs are handled correctly.

    This test function ensures that:
    - If PyCryptodome or cryptography is not available and required, a DependencyError is raised
    - Encrypted PDFs are identified correctly
    - Decryption works for encrypted PDFs
    - Metadata is properly extracted from the decrypted PDF
    """
    inputfile = RESOURCE_ROOT / "encryption" / name
    if requires_aes and not HAS_AES:
        with pytest.raises(DependencyError) as exc:
            ipdf = pypdf.PdfReader(inputfile)
            ipdf.decrypt("asdfzxcv")
            dd = dict(ipdf.metadata)
        assert exc.value.args[0] == _DEPENDENCY_ERROR_STR
        return
    ipdf = pypdf.PdfReader(inputfile)
    if str(inputfile).endswith("unencrypted.pdf"):
        assert not ipdf.is_encrypted
    else:
        assert ipdf.is_encrypted
        ipdf.decrypt("asdfzxcv")
    assert len(ipdf.pages) == 1
    dd = dict(ipdf.metadata)
    # remove empty value entry
    dd = {x[0]: x[1] for x in dd.items() if x[1]}
    assert dd == {
        "/Author": "cheng",
        "/CreationDate": "D:20220414132421+05'24'",
        "/Creator": "WPS Writer",
        "/ModDate": "D:20220414132421+05'24'",
        "/SourceModified": "D:20220414132421+05'24'",
        "/Trapped": "/False",
    }


@pytest.mark.parametrize(
    ("name", "user_passwd", "owner_passwd"),
    [
        # created by
        # qpdf --encrypt "foo" "bar" 256 -- unencrypted.pdf r6-both-passwords.pdf
        ("r6-both-passwords.pdf", "foo", "bar"),
    ],
)
@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_pdf_with_both_passwords(name, user_passwd, owner_passwd):
    """
    PDFs with both user and owner passwords are handled correctly.

    This test function ensures that:
    - Encrypted PDFs with both user and owner passwords are identified correctly
    - Decryption works for both user and owner passwords
    - The correct password type is returned after decryption
    - The number of pages is correctly identified after decryption
    """
    inputfile = RESOURCE_ROOT / "encryption" / name
    ipdf = pypdf.PdfReader(inputfile)
    assert ipdf.is_encrypted
    assert ipdf.decrypt(user_passwd) == PasswordType.USER_PASSWORD
    assert ipdf.decrypt(owner_passwd) == PasswordType.OWNER_PASSWORD
    assert len(ipdf.pages) == 1


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_aesv2_without_length_in_encrypt_dict():
    """
    AESV2-encrypted PDF without /Length in encrypt dict decrypts correctly.

    Some PDFs omit /Length in the main encrypt dict (defaulting to 40 bits),
    but AESV2 requires 128 bits. The key length should be read from the
    crypt filter dict instead.
    """
    inputfile = RESOURCE_ROOT / "encryption" / "r4-aes-v2-no-key-length.pdf"
    reader = PdfReader(inputfile)
    assert reader.is_encrypted
    result = reader.decrypt("")
    assert result in (PasswordType.USER_PASSWORD, PasswordType.OWNER_PASSWORD)
    assert len(reader.pages) == 1


@pytest.mark.parametrize(
    ("pdffile", "password"),
    [
        ("crazyones-encrypted-256.pdf", "password"),
        ("crazyones-encrypted-256.pdf", b"password"),
    ],
)
@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_read_page_from_encrypted_file_aes_256(pdffile, password):
    """
    A page can be read from an encrypted.

    This is a regression test for issue 327:
    IndexError for get_page() of decrypted file
    """
    path = RESOURCE_ROOT / pdffile
    pypdf.PdfReader(path, password=password).pages[0]


@pytest.mark.parametrize(
    "names",
    [
        (
            [
                "unencrypted.pdf",
                "r3-user-password.pdf",
                "r4-aes-user-password.pdf",
                "r5-user-password.pdf",
            ]
        ),
    ],
)
@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_merge_encrypted_pdfs(names):
    """Encrypted PDFs can be merged after decryption."""
    merger = pypdf.PdfWriter()
    files = [RESOURCE_ROOT / "encryption" / x for x in names]
    pdfs = [pypdf.PdfReader(x) for x in files]
    for pdf in pdfs:
        if pdf.is_encrypted:
            pdf.decrypt("asdfzxcv")
        merger.append(pdf)
    # no need to write to file
    merger.close()


@pytest.mark.skipif(
    USE_CRYPTOGRAPHY,
    reason="Limitations of cryptography. see https://github.com/pyca/cryptography/issues/2494",
)
@pytest.mark.parametrize(
    "cryptcls",
    [
        CryptRC4,
    ],
)
def test_encrypt_decrypt_with_cipher_class(cryptcls):
    """Encryption and decryption using a cipher class work as expected."""
    message = b"Hello World"
    key = bytes(0 for _ in range(128))  # b"secret key"
    crypt = cryptcls(key)
    assert crypt.decrypt(crypt.encrypt(message)) == message


def test_attempt_decrypt_unencrypted_pdf():
    """Attempting to decrypt an unencrypted PDF raises a PdfReadError."""
    path = RESOURCE_ROOT / "crazyones.pdf"
    with pytest.raises(PdfReadError) as exc:
        PdfReader(path, password="nonexistent")
    assert exc.value.args[0] == "Not an encrypted file"


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_alg_v5_generate_values():
    """
    Algorithm V5 values are generated without raising exceptions.

    This test function checks if there is an exception during the value generation.
    It does not verify that the content is correct.
    """
    key = b"0123456789123451"
    values = AlgV5.generate_values(
        R=5,
        user_password=b"foo",
        owner_password=b"bar",
        key=key,
        p=0,
        metadata_encrypted=True,
    )
    assert values == {
        "/U": values["/U"],
        "/UE": values["/UE"],
        "/O": values["/O"],
        "/OE": values["/OE"],
        "/Perms": values["/Perms"],
    }


@pytest.mark.parametrize(
    ("alg", "requires_aes"),
    [
        ("RC4-40", False),
        ("RC4-128", False),
        ("AES-128", True),
        ("AES-256-R5", True),
        ("AES-256", True),
        ("ABCD", False),
    ],
)
def test_pdf_encrypt(pdf_file_path, alg, requires_aes):
    user_password = secrets.token_urlsafe(10)
    owner_password = secrets.token_urlsafe(10)

    reader = PdfReader(RESOURCE_ROOT / "encryption" / "unencrypted.pdf")
    page = reader.pages[0]
    text0 = page.extract_text()

    writer = PdfWriter()
    writer.add_page(page)

    # test with invalid algorithm name
    if alg == "ABCD":
        with pytest.raises(ValueError) as exc:
            writer.encrypt(
                user_password=user_password,
                owner_password=owner_password,
                algorithm=alg,
            )
        assert exc.value.args[0] == "Algorithm 'ABCD' NOT supported"
        return

    if requires_aes and not HAS_AES:
        with pytest.raises(DependencyError) as exc:
            writer.encrypt(
                user_password=user_password,
                owner_password=owner_password,
                algorithm=alg,
            )
            with open(pdf_file_path, "wb") as output_stream:
                writer.write(output_stream)
        assert exc.value.args[0] == _DEPENDENCY_ERROR_STR
        return

    writer.encrypt(
        user_password=user_password, owner_password=owner_password, algorithm=alg
    )
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    reader = PdfReader(pdf_file_path)
    assert reader.is_encrypted
    assert reader.decrypt(owner_password) == PasswordType.OWNER_PASSWORD
    assert reader.decrypt(user_password) == PasswordType.USER_PASSWORD

    page = reader.pages[0]
    text1 = page.extract_text()
    assert text0 == text1


@pytest.mark.parametrize(
    "count",
    [1, 2, 3, 4, 5, 10],
)
def test_pdf_encrypt_multiple(pdf_file_path, count):
    user_password = secrets.token_urlsafe(10)
    owner_password = secrets.token_urlsafe(10)

    reader = PdfReader(RESOURCE_ROOT / "encryption" / "unencrypted.pdf")
    page = reader.pages[0]
    text0 = page.extract_text()

    writer = PdfWriter()
    writer.add_page(page)

    if count == 1:
        owner_password = None

    for _i in range(count):
        writer.encrypt(
            user_password=user_password,
            owner_password=owner_password,
            algorithm="RC4-128",
        )
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    reader = PdfReader(pdf_file_path)
    assert reader.is_encrypted
    if owner_password is None:
        # NOTICE: owner_password will set to user_password if it's None
        assert reader.decrypt(user_password) == PasswordType.OWNER_PASSWORD
    else:
        assert reader.decrypt(owner_password) == PasswordType.OWNER_PASSWORD
        assert reader.decrypt(user_password) == PasswordType.USER_PASSWORD

    page = reader.pages[0]
    text1 = page.extract_text()
    assert text0 == text1


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_aes_decrypt_corrupted_data():
    """Just for robustness"""
    aes = CryptAES(secrets.token_bytes(16))
    for num in [0, 17, 32]:
        aes.decrypt(secrets.token_bytes(num))


@pytest.mark.samples
def test_encrypt_stream_dictionary(pdf_file_path):
    user_password = secrets.token_urlsafe(10)

    reader = PdfReader(SAMPLE_ROOT / "023-cmyk-image/cmyk-image.pdf")
    page = reader.pages[0]
    original_image_obj = reader.get_object(page.images["/I"].indirect_reference)

    writer = PdfWriter()
    writer.add_page(reader.pages[0])
    writer.encrypt(
        user_password=user_password,
        owner_password=None,
        algorithm="RC4-128",
    )
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    reader = PdfReader(pdf_file_path)
    assert reader.is_encrypted
    assert reader.decrypt(user_password) == PasswordType.OWNER_PASSWORD
    page = reader.pages[0]
    decrypted_image_obj = reader.get_object(page.images["/I"].indirect_reference)

    assert decrypted_image_obj["/ColorSpace"][3] == original_image_obj["/ColorSpace"][3]


def test_are_permissions_valid_none_for_unencrypted():
    """are_permissions_valid is None for unencrypted documents."""
    reader = PdfReader(RESOURCE_ROOT / "encryption" / "unencrypted.pdf")
    assert reader.are_permissions_valid is None


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_are_permissions_valid_none_before_decrypt():
    """are_permissions_valid is None for encrypted documents before decrypt()."""
    reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-both-passwords.pdf")
    assert reader.are_permissions_valid is None


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_are_permissions_valid_true_for_valid_r6():
    """are_permissions_valid is True when /Perms integrity check passes."""
    reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf")
    reader.decrypt("usersecret")
    assert reader.are_permissions_valid is True


def test_are_permissions_valid_true_for_v4():
    """are_permissions_valid defaults to True for V4 encryption (no /Perms field)."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "encryption" / "unencrypted.pdf")
    writer.encrypt(user_password="user", owner_password="owner", algorithm="RC4-128")
    output = BytesIO()
    writer.write(output)
    reader = PdfReader(output)
    reader.decrypt("user")
    assert reader.are_permissions_valid is True


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_are_permissions_valid_false_when_tampered():
    """are_permissions_valid is False when /Perms has been tampered with."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "encryption" / "unencrypted.pdf")
    writer.encrypt(user_password="user", owner_password="owner", algorithm="AES-256")
    output = BytesIO()
    writer.write(output)

    # Tamper with /Perms by modifying the raw bytes
    data = bytearray(output.getvalue())
    perms_marker = b"/Perms "
    idx = data.find(perms_marker)
    assert idx != -1, "/Perms not found in PDF"
    # Find the hex string value after /Perms and corrupt a byte
    start = data.index(b"<", idx)
    data[start + 2] ^= 0xFF  # flip bits in the first byte of the hex string
    tampered = BytesIO(bytes(data))

    reader = PdfReader(tampered)
    reader.decrypt("user")
    assert reader.are_permissions_valid is False


================================================
FILE: tests/test_filters.py
================================================
"""Test the pypdf.filters module."""
import os
import string
import subprocess
import sys
import zlib
from io import BytesIO
from itertools import product as cartesian_product
from pathlib import Path
from typing import cast
from unittest import mock

import pytest
from PIL import Image, ImageOps

from pypdf import PdfReader, PdfWriter
from pypdf.errors import DependencyError, DeprecationError, LimitReachedError, PdfReadError, PdfStreamError
from pypdf.filters import (
    ASCII85Decode,
    ASCIIHexDecode,
    CCITParameters,
    CCITTFaxDecode,
    CCITTParameters,
    FlateDecode,
    JBIG2Decode,
    RunLengthDecode,
    decode_stream_data,
    decompress,
)
from pypdf.generic import (
    ArrayObject,
    BooleanObject,
    ContentStream,
    DictionaryObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    StreamObject,
    TextStringObject,
)

from . import RESOURCE_ROOT, PILContext, get_data_from_url
from .test_encryption import HAS_AES
from .test_images import image_similarity
from .utils import get_image_data

filter_inputs = (
    string.ascii_letters,
    string.ascii_lowercase,
    string.ascii_uppercase,
    string.digits,
    string.hexdigits,
    string.octdigits,
    string.punctuation,
    string.printable,
    string.whitespace,  # Add more
)


@pytest.mark.parametrize(
    ("predictor", "s"), list(cartesian_product([1], filter_inputs))
)
def test_flate_decode_encode(predictor, s):
    """FlateDecode encode() and decode() methods work as expected."""
    codec = FlateDecode()
    s = s.encode()
    encoded = codec.encode(s)
    assert codec.decode(encoded, DictionaryObject({"/Predictor": predictor})) == s


def test_flatedecode_unsupported_predictor():
    """
    FlateDecode raises PdfReadError for unsupported predictors.

    Predictor values outside the ranges [1, 2] and [10, 15] are not supported.

    Checks that a PdfReadError is raised when decoding with unsupported predictors.
    """
    codec = FlateDecode()
    predictors = (-10, -1, 0, 3, 9, 16, 20, 100)

    for predictor, s in cartesian_product(predictors, filter_inputs):
        s = s.encode()
        with pytest.raises(PdfReadError):
            codec.decode(codec.encode(s), DictionaryObject({NameObject("/Predictor"): NumberObject(predictor)}))


@pytest.mark.parametrize(
    ("data", "expected"),
    [
        (">", b""),
        (
            "6162636465666768696a6b6c6d6e6f707172737475767778797a>",
            string.ascii_lowercase.encode(),
        ),
        (
            "4142434445464748494a4b4c4d4e4f505152535455565758595a>",
            string.ascii_uppercase.encode(),
        ),
        (
            "6162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a>",
            string.ascii_letters.encode(),
        ),
        ("30313233343536373839>", string.digits.encode()),
        (
            "3  031323334353637   3839>",
            string.digits.encode(),
        ),  # Same as previous, but whitespaced
        ("30313233343536373839616263646566414243444546>", string.hexdigits.encode()),
        ("20090a0d0b0c>", string.whitespace.encode()),
        # Odd number of hexadecimal digits behaves as if a 0 (zero) followed the last digit
        ("3938373635343332313>", string.digits[::-1].encode()),
    ],
    ids=[
        "empty",
        "ascii_lowercase",
        "ascii_uppercase",
        "ascii_letters",
        "digits",
        "digits_whitespace",
        "hexdigits",
        "whitespace",
        "odd_number",
    ],
)
def test_ascii_hex_decode_method(data, expected):
    """
    Feeds a bunch of values to ASCIIHexDecode.decode() and ensures the
    correct output is returned.
    """
    assert ASCIIHexDecode.decode(data) == expected


def test_ascii_hex_decode_missing_eod(caplog):
    """ASCIIHexDecode.decode() logs warning when no EOD character is present."""
    ASCIIHexDecode.decode("")
    assert "missing EOD in ASCIIHexDecode, check if output is OK" in caplog.text


@pytest.mark.enable_socket
def test_decode_ahx():
    """
    See #1979
    Gray Image in CMYK : requiring reverse
    """
    reader = PdfReader(BytesIO(get_data_from_url(name="NewJersey.pdf")))
    for p in reader.pages:
        _ = list(p.images.keys())


def test_ascii85decode_with_overflow():
    inputs = (
        v + "~>"
        for v in "\x01\x02\x03\x04\x05\x06\x07\x08\x0e\x0f"
        "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a"
        "\x1b\x1c\x1d\x1e\x1fvwxy{|}~\x7f\x80\x81\x82"
        "\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d"
        "\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98"
        "\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0¡¢£¤¥¦§¨©ª«¬"
        "\xad®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇ"
    )

    for i in inputs:
        with pytest.raises(ValueError):
            ASCII85Decode.decode(i)


def test_ascii85decode_five_zero_bytes():
    """
    ASCII85Decode handles the special case of five zero bytes correctly.

    ISO 32000-1:2008 §7.4.3:

    «As a special case, if all five bytes are 0, they shall be represented by
    the character with code 122 (z) instead of by five exclamation points
    (!!!!!).»
    """
    inputs = ("z", "zz", "zzz")
    exp_outputs = (
        b"\x00\x00\x00\x00",
        b"\x00\x00\x00\x00" * 2,
        b"\x00\x00\x00\x00" * 3,
    )

    assert ASCII85Decode.decode("!!!!!~>") == ASCII85Decode.decode("z~>")

    for expected, i in zip(exp_outputs, inputs):
        assert ASCII85Decode.decode(i + "~>") == expected


def test_ccitparameters():
    with pytest.raises(
        DeprecationError,
        match=r"CCITParameters is deprecated and was removed in pypdf 6\.0\.0\. Use CCITTParameters instead",
    ):
        CCITParameters()


def test_ccittparameters():
    params = CCITTParameters()
    assert params.K == 0  # zero is the default according to page 78
    assert params.BlackIs1 is False
    assert params.group == 3


@pytest.mark.parametrize(
    ("parameters", "expected_k", "expected_black_is_1"),
    [
        (None, 0, False),
        (
            ArrayObject([{"/K": NumberObject(1)}, {"/Columns": NumberObject(13)}, {"/BlackIs1": BooleanObject(True)}]),
            1, True
        ),
    ],
)
def test_ccitt_get_parameters(parameters, expected_k, expected_black_is_1):
    parameters = CCITTFaxDecode._get_parameters(parameters=parameters, rows=0)
    assert parameters.K == expected_k  # noqa: SIM300
    assert parameters.BlackIs1 == expected_black_is_1


def test_ccitt_get_parameters__indirect_object():
    class Pdf:
        def get_object(self, reference) -> NumberObject:
            return NumberObject(42)

    parameters = CCITTFaxDecode._get_parameters(
        parameters=None, rows=IndirectObject(13, 1, Pdf())
    )
    assert parameters.rows == 42


def test_ccitt_fax_decode():
    data = b""
    parameters = DictionaryObject(
        {"/K": NumberObject(-1), "/Columns": NumberObject(17)}
    )

    # This is the header of an empty TIFF image.
    assert CCITTFaxDecode.decode(data, parameters) == (
        b"II*\x00\x08\x00\x00\x00\x08\x00\x00\x01\x04\x00\x01\x00\x00\x00\x11\x00"
        b"\x00\x00\x01\x01\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x01"
        b"\x03\x00\x01\x00\x00\x00\x01\x00\x00\x00\x03\x01\x03\x00\x01\x00"
        b"\x00\x00\x04\x00\x00\x00\x06\x01\x03\x00\x01\x00\x00\x00\x00\x00"
        b"\x00\x00\x11\x01\x04\x00\x01\x00\x00\x00l\x00\x00\x00\x16\x01"
        b"\x04\x00\x01\x00\x00\x00\x00\x00\x00\x00\x17\x01\x04\x00\x01\x00"
        b"\x00\x00\x00\x00\x00\x00\x00\x00"
    )


@pytest.mark.enable_socket
def test_decompress_zlib_error(caplog):
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-952445.pdf")))
    for page in reader.pages:
        page.extract_text()
    assert "incorrect startxref pointer(3)" in caplog.text


@pytest.mark.enable_socket
def test_lzw_decode_neg1():
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-921632.pdf")))
    page = reader.pages[47]
    assert page.extract_text().startswith("Chapter 2")


@pytest.mark.enable_socket
def test_issue_399():
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-976970.pdf")))
    reader.pages[1].extract_text()


@pytest.mark.enable_socket
def test_image_without_pillow(tmp_path):
    env = os.environ.copy()
    env["COVERAGE_PROCESS_START"] = "pyproject.toml"

    name = "tika-914102.pdf"
    pdf_path = Path(__file__).parent / "pdf_cache" / name
    pdf_path_str = pdf_path.resolve().as_posix()

    source_file = tmp_path / "script.py"
    source_file.write_text(
        f"""
import sys
from pypdf import PdfReader

import pytest


sys.modules["PIL"] = None
reader = PdfReader("{pdf_path_str}", strict=True)

for page in reader.pages:
    with pytest.raises(ImportError) as exc:
        page.images[0]
    assert exc.value.args[0] == (
        "pillow is required to do image extraction. "
        "It can be installed via 'pip install pypdf[image]'"
    ), exc.value.args[0]
"""
    )

    try:
        env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"]
    except KeyError:
        env["PYTHONPATH"] = "."
    result = subprocess.run(  # noqa: S603  # We have the control here.
        [sys.executable, source_file],
        capture_output=True,
        env=env,
    )
    assert result.returncode == 0
    assert result.stdout == b""
    assert (
        result.stderr.replace(b"\r", b"")
        == b"Superfluous whitespace found in object header b'4' b'0'\n"
    )


@pytest.mark.enable_socket
def test_issue_1737():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss1737.pdf")))
    reader.pages[0]["/Resources"]["/XObject"]["/Im0"].get_data()
    reader.pages[0]["/Resources"]["/XObject"]["/Im1"].get_data()
    reader.pages[0]["/Resources"]["/XObject"]["/Im2"].get_data()


@pytest.mark.enable_socket
def test_pa_image_extraction():
    """
    PNG images with PA mode can be extracted.

    This is a regression test for issue #1801
    """
    reader = PdfReader(BytesIO(get_data_from_url(name="issue-1801.pdf")))

    page0 = reader.pages[0]
    images = page0.images
    assert len(images) == 1
    assert images[0].name == "Im1.png"

    # Ensure visual appearance
    expected_data = BytesIO(get_data_from_url(name="issue-1801.png"))
    assert image_similarity(expected_data, images[0].image) == 1


@pytest.mark.enable_socket
def test_1bit_image_extraction():
    """Cf issue #1814"""
    reader = PdfReader(BytesIO(get_data_from_url(name="grimm10")))
    for p in reader.pages:
        p.images


@pytest.mark.enable_socket
def test_png_transparency_reverse():
    """Cf issue #1599"""
    pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf"
    reader = PdfReader(pdf_path)
    refimg = Image.open(
        BytesIO(get_data_from_url(name="labeled-edges-center-image.png"))
    )
    data = reader.pages[0].images[0]
    img = Image.open(BytesIO(data.data))
    assert ".jp2" in data.name
    assert get_image_data(img) == get_image_data(refimg)


@pytest.mark.enable_socket
def test_iss1787():
    """Cf issue #1787"""
    reader = PdfReader(BytesIO(get_data_from_url(name="pdf_font_garbled.pdf")))
    refimg = Image.open(BytesIO(get_data_from_url(name="watermark1.png")))
    data = reader.pages[0].images[0]
    img = Image.open(BytesIO(data.data))
    assert ".png" in data.name
    assert get_image_data(img) == get_image_data(refimg)
    obj = data.indirect_reference.get_object()
    obj["/DecodeParms"][NameObject("/Columns")] = NumberObject(1000)
    obj.decoded_self = None
    with pytest.raises(expected_exception=PdfReadError, match=r"^Unsupported PNG filter 244$"):
        _ = reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_tiff_predictor():
    """Decode Tiff Predictor 2 Images"""
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-977609.pdf")))
    refimg = Image.open(BytesIO(get_data_from_url(name="tifimage.png")))
    data = reader.pages[0].images[0]
    img = Image.open(BytesIO(data.data))
    assert ".png" in data.name
    assert get_image_data(img) == get_image_data(refimg)


@pytest.mark.enable_socket
def test_rgba():
    """Decode RGB with transparency"""
    with PILContext():
        reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
        data = reader.pages[0].images[0]
        assert ".jp2" in data.name
        similarity = image_similarity(
            data.image, BytesIO(get_data_from_url(name="tika-972174_p0-im0.png"))
        )
        assert similarity > 0.99


@pytest.mark.enable_socket
@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_cmyk():
    """Decode CMYK"""
    # JPEG compression
    reader = PdfReader(BytesIO(get_data_from_url(name="Vitocal.pdf")))
    refimg = BytesIO(get_data_from_url(name="VitocalImage.png"))
    data = reader.pages[1].images[0]
    assert data.image.mode == "CMYK"
    assert ".jpg" in data.name
    assert image_similarity(data.image, refimg) > 0.99
    # deflate
    reader = PdfReader(BytesIO(get_data_from_url(name="cmyk_deflate.pdf")))
    refimg = BytesIO(get_data_from_url(name="cmyk_deflate.tif"))
    data = reader.pages[0].images[0]
    assert data.image.mode == "CMYK"
    assert ".tif" in data.name
    assert image_similarity(data.image, refimg) > 0.999  # lossless compression expected


@pytest.mark.enable_socket
def test_iss1863():
    """Test doc from iss1863"""
    reader = PdfReader(BytesIO(get_data_from_url(name="o1whh9b3.pdf")))
    for p in reader.pages:
        for i in p.images:
            i.name


@pytest.mark.enable_socket
def test_read_images():
    reader = PdfReader(BytesIO(get_data_from_url(name="selbst.72916.pdf")))
    page = reader.pages[0]
    for _ in page.images:
        pass


@pytest.mark.enable_socket
def test_cascaded_filters_images():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss1912.pdf")))
    # for focus, analyse the page 23
    for p in reader.pages:
        for i in p.images:
            _ = i.name, i.image


@pytest.mark.enable_socket
def test_calrgb():
    reader = PdfReader(BytesIO(get_data_from_url(name="calRGB.pdf")))
    reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_index_lookup():
    """The lookup is provided as an str and bytes"""
    reader = PdfReader(BytesIO(get_data_from_url(name="2023USDC.pdf")))
    # TextStringObject Lookup
    refimg = BytesIO(get_data_from_url(name="iss1982_im1.png"))
    data = reader.pages[0].images[-1]
    assert data.image.mode == "RGB"
    assert image_similarity(data.image, refimg) > 0.999
    # ByteStringObject Lookup
    refimg = BytesIO(get_data_from_url(name="iss1982_im2.png"))
    data = reader.pages[-1].images[-1]
    assert data.image.mode == "RGB"
    assert image_similarity(data.image, refimg) > 0.999
    # indexed CMYK images
    # currently with a TODO as we convert the palette to RGB
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
    refimg = Image.open(BytesIO(get_data_from_url(name="usa.png")))
    data = reader.pages[0].images["/Im3"]
    # assert data.image.mode == "PA" but currently "RGBA"
    assert image_similarity(data.image, refimg) > 0.999


@pytest.mark.enable_socket
def test_2bits_image():
    """From #1954, test with 2bits image. TODO: 4bits also"""
    reader = PdfReader(BytesIO(get_data_from_url(name="paid.pdf")))
    url_png = "https://user-images.githubusercontent.com/4083478/253568117-ca95cc85-9dea-4145-a5e0-032f1c1aa322.png"
    name_png = "Paid.png"
    refimg = BytesIO(get_data_from_url(url_png, name=name_png))
    data = reader.pages[0].images[0]
    assert image_similarity(data.image, refimg) > 0.99


@pytest.mark.enable_socket
def test_gray_devicen_cmyk():
    """
    Cf #1979
    Gray Image in CMYK : requiring reverse
    """
    url = "https://github.com/py-pdf/pypdf/files/12080338/example_121.pdf"
    name = "gray_cmyk.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url_png = "https://user-images.githubusercontent.com/4083478/254545494-42df4949-1557-4f2d-acca-6be6e8de1122.png"
    name_png = "velo.png"
    refimg = BytesIO(get_data_from_url(url_png, name=name_png))
    data = reader.pages[0].images[0]
    assert data.image.mode == "L"
    assert image_similarity(data.image, refimg) > 0.999


@pytest.mark.enable_socket
def test_runlengthdecode():
    """From #1954, test with 2bits image. TODO: 4bits also"""
    url = "https://github.com/py-pdf/pypdf/files/12159941/out.pdf"
    name = "RunLengthDecode.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url_png = "https://user-images.githubusercontent.com/4083478/255940800-6d63972e-a3d6-4cf9-aa6f-0793af24cded.png"
    name_png = "RunLengthDecode.png"
    refimg = BytesIO(get_data_from_url(url_png, name=name_png))
    data = reader.pages[0].images[0]
    assert image_similarity(data.image, refimg) > 0.999
    url = "https://github.com/py-pdf/pypdf/files/12162905/out.pdf"
    name = "FailedRLE1.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].images[0]
    url = "https://github.com/py-pdf/pypdf/files/12162926/out.pdf"
    name = "FailedRLE2.pdf"
    reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_gray_separation_cmyk():
    """
    Cf #1955
    Gray Image in Separation/RGB : requiring reverse
    """
    url = "https://github.com/py-pdf/pypdf/files/12143372/tt.pdf"
    name = "TestWithSeparationBlack.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url_png = "https://user-images.githubusercontent.com/4083478/254545494-42df4949-1557-4f2d-acca-6be6e8de1122.png"
    name_png = "velo.png"  # reused
    refimg = BytesIO(get_data_from_url(url_png, name=name_png))
    data = reader.pages[0].images[0]
    assert data.image.mode == "L"
    assert image_similarity(data.image, refimg) > 0.999


@pytest.mark.enable_socket
def test_singleton_device():
    """From #2023"""
    url = "https://github.com/py-pdf/pypdf/files/12177287/tt.pdf"
    name = "pypdf_with_arr_deviceRGB.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_jpx_no_spacecode():
    """From #2061"""
    url = "https://github.com/py-pdf/pypdf/files/12253581/tt2.pdf"
    name = "jpx_no_spacecode.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    im = reader.pages[0].images[0]
    # create an object without filter and without colorspace
    # just for coverage
    del im.indirect_reference.get_object()["/Filter"]
    with pytest.raises(PdfReadError) as exc:
        reader.pages[0].images[0]
    assert exc.value.args[0].startswith("ColorSpace field not found")


@pytest.mark.enable_socket
def test_encodedstream_lookup():
    """From #2124"""
    url = "https://github.com/py-pdf/pypdf/files/12455580/10.pdf"
    name = "iss2124.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[12].images[0]


@pytest.mark.enable_socket
def test_convert_1_to_la():
    """From #2165"""
    url = "https://github.com/py-pdf/pypdf/files/12543290/whitepaper.WBT.token.blockchain.whitepaper.pdf"
    name = "iss2165.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for i in reader.pages[13].images:
        _ = i


@pytest.mark.enable_socket
def test_nested_device_n_color_space():
    """From #2240"""
    url = "https://github.com/py-pdf/pypdf/files/12814018/out1.pdf"
    name = "issue2240.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].images[0]


@pytest.mark.enable_socket
@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_flate_decode_with_image_mode_1():
    """From #2248"""
    url = "https://github.com/py-pdf/pypdf/files/12847339/Prototype-Declaration-VDE4110-HYD-5000-20000-ZSS-DE.pdf"
    name = "issue2248.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for image in reader.pages[7].images:
        _ = image


@pytest.mark.enable_socket
def test_flate_decode_with_image_mode_1__whitespace_at_end_of_lookup():
    """From #2331"""
    url = "https://github.com/py-pdf/pypdf/files/13611048/out1.pdf"
    name = "issue2331.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_ascii85decode__invalid_end__recoverable(caplog):
    """From #2996"""
    url = "https://github.com/user-attachments/files/18050808/1af7d56a-5c8c-4914-85b3-b2536a5525cd.pdf"
    name = "issue2996.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[1]
    assert page.extract_text() == ""
    assert "Ignoring missing Ascii85 end marker." in caplog.text


def test_ascii85decode__non_recoverable(caplog):
    # Without our custom handling, this would complain about the final `~>` being missing.
    data = "äöüß"
    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
        ASCII85Decode.decode(data)
    assert "Ignoring missing Ascii85 end marker." in caplog.text
    caplog.clear()

    data += "~>"
    with pytest.raises(ValueError, match="Non-Ascii85 digit found: Ã"):
        ASCII85Decode.decode(data)
    assert caplog.text == ""


def test_ascii85decode__ignore_whitespaces(caplog):
    """Whitespace characters must be silently ignored"""
    data = b"Cqa;:3k~\n>"
    result = ASCII85Decode.decode(data)
    assert result == b"l\xbe`\x8d:"


@pytest.mark.enable_socket
def test_ccitt_fax_decode__black_is_1():
    url = "https://github.com/user-attachments/files/19288881/imagemagick-CCITTFaxDecode_BlackIs1-true.pdf"
    name = "issue3193.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    other_reader = PdfReader(RESOURCE_ROOT / "imagemagick-CCITTFaxDecode.pdf")

    actual_image = reader.pages[0].images[0].image
    expected_image_inverted = other_reader.pages[0].images[0].image
    expected_pixels = get_image_data(ImageOps.invert(expected_image_inverted))
    actual_pixels = get_image_data(actual_image)
    assert expected_pixels == actual_pixels

    # AttributeError: 'NullObject' object has no attribute 'get'
    data_modified = get_data_from_url(url, name=name).replace(
        b"/DecodeParms [ << /K -1 /BlackIs1 true /Columns 16 /Rows 16 >> ]",
        b"/DecodeParms [ null ]"
    )
    reader = PdfReader(BytesIO(data_modified))
    _ = reader.pages[0].images[0].image


@pytest.mark.enable_socket
def test_flate_decode__image_is_none_due_to_size_limit(caplog):
    url = "https://github.com/user-attachments/files/19464256/file.pdf"
    name = "issue3220.pdf"

    with mock.patch("pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH", 0):
        reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
        images = reader.pages[0].images
        assert len(images) == 1
        image = images[0]
        assert image.name == "Im0.png"
        assert image.image is None

    assert (
        "Failed loading image: Image size (180000000 pixels) exceeds limit of "
        "178956970 pixels, could be decompression bomb DOS attack."
    ) in caplog.messages


@pytest.mark.enable_socket
def test_flate_decode__not_rectangular(caplog):
    url = "https://github.com/user-attachments/files/19663603/issue3241_compressed.txt"
    name = "issue3241.txt"
    data = get_data_from_url(url, name=name)
    decode_parms = DictionaryObject()
    decode_parms[NameObject("/Predictor")] = NumberObject(15)
    decode_parms[NameObject("/Columns")] = NumberObject(4881)
    actual = FlateDecode.decode(data=data, decode_parms=decode_parms)
    actual_image = Image.frombytes(mode="1", size=(4881, 81), data=actual)

    url = "https://github.com/user-attachments/assets/c5695850-c076-4255-ab72-7c86851a4a04"
    name = "issue3241.png"
    expected_data = BytesIO(get_data_from_url(url, name=name))
    assert image_similarity(expected_data, actual_image) == 1
    assert caplog.messages == ["Image data is not rectangular. Adding padding."]


def test_jbig2decode__binary_errors():
    with mock.patch("pypdf.filters.JBIG2DEC_BINARY", None), \
            pytest.raises(DependencyError, match=r"jbig2dec binary is not available\."):
        JBIG2Decode.decode(b"dummy")

    result = subprocess.CompletedProcess(
        args=["dummy"], returncode=0, stdout=b"",
        stderr=(
            b"jbig2dec: unrecognized option '--embedded'\n"
            b"Usage: jbig2dec [options] <file.jbig2>\n"
            b"   or  jbig2dec [options] <global_stream> <page_stream>\n"
        )
    )
    with mock.patch("pypdf.filters.subprocess.run", return_value=result), \
            mock.patch("pypdf.filters.JBIG2DEC_BINARY", "/usr/bin/jbig2dec"), \
            pytest.raises(DependencyError, match=r"jbig2dec>=0.19 is required\."):
        JBIG2Decode.decode(b"dummy")

    result = subprocess.CompletedProcess(
        args=["dummy"], returncode=0, stdout=b"",
        stderr=(
            b"jbig2dec: unrecognized option '-M'\n"
            b"Usage: jbig2dec [options] <file.jbig2>\n"
            b"   or  jbig2dec [options] <global_stream> <page_stream>\n"
        )
    )
    with mock.patch("pypdf.filters.subprocess.run", return_value=result), \
            mock.patch("pypdf.filters.JBIG2DEC_BINARY", "/usr/bin/jbig2dec"), \
            pytest.raises(DependencyError, match=r"jbig2dec>=0.19 is required\."):
        JBIG2Decode.decode(b"dummy")


@pytest.mark.skipif(condition=not JBIG2Decode._is_binary_compatible(), reason="Requires recent jbig2dec")
def test_jbig2decode__edge_cases(caplog):
    image_data = (
        b'\x00\x00\x00\x010\x00\x01\x00\x00\x00\x13\x00\x00\x00\x05\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x06"'
        b'\x00\x01\x00\x00\x00\x1c\x00\x00\x00\x05\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x9f\xa8_\xff\xac'

    )
    jbig2_globals = b"\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x18\x00\x00\x03\xff\xfd\xff\x02\xfe\xfe\xfe\x00\x00\x00\x01\x00\x00\x00\x01R\xd0u7\xff\xac"  # noqa: E501

    # Validation: Is our image data valid?
    content_stream = ContentStream(stream=None, pdf=None)
    content_stream.set_data(jbig2_globals)
    result = JBIG2Decode.decode(image_data, decode_parms=DictionaryObject({"/JBIG2Globals": content_stream}))
    image = Image.open(BytesIO(result), formats=("PNG", "PPM"))
    for x in range(5):
        for y in range(5):
            assert image.getpixel((x, y)) == (255 if x < 3 else 0), (x, y)
    assert caplog.messages == []

    # No decode_params. Completely white image.
    result = JBIG2Decode.decode(image_data)
    image = Image.open(BytesIO(result), formats=("PNG", "PPM"))
    for x in range(5):
        for y in range(5):
            assert image.getpixel((x, y)) == 255, (x, y)
    assert caplog.messages == [
        "jbig2dec WARNING text region refers to no symbol dictionaries (segment 0x00000002)",
        "jbig2dec WARNING ignoring out of range symbol ID (0/0) (segment 0x00000002)"
    ]
    caplog.clear()

    # JBIG2Globals is NULL. Completely white image.
    result = JBIG2Decode.decode(image_data, decode_parms=DictionaryObject({"/JBIG2Globals": NullObject()}))
    image = Image.open(BytesIO(result), formats=("PNG", "PPM"))
    for x in range(5):
        for y in range(5):
            assert image.getpixel((x, y)) == 255, (x, y)
    assert caplog.messages == [
        "jbig2dec WARNING text region refers to no symbol dictionaries (segment 0x00000002)",
        "jbig2dec WARNING ignoring out of range symbol ID (0/0) (segment 0x00000002)"
    ]
    caplog.clear()

    # JBIG2Globals is DictionaryObject. Completely white image.
    result = JBIG2Decode.decode(image_data, decode_parms=DictionaryObject({"/JBIG2Globals": DictionaryObject()}))
    image = Image.open(BytesIO(result), formats=("PNG", "PPM"))
    for x in range(5):
        for y in range(5):
            assert image.getpixel((x, y)) == 255, (x, y)
    assert caplog.messages == [
        "jbig2dec WARNING text region refers to no symbol dictionaries (segment 0x00000002)",
        "jbig2dec WARNING ignoring out of range symbol ID (0/0) (segment 0x00000002)"
    ]
    caplog.clear()

    # Invalid input.
    with pytest.raises(PdfStreamError, match=r"Unable to decode JBIG2 data\. Exit code: 1"):
        JBIG2Decode.decode(b"aaaaaa")
    assert caplog.messages == [
        "jbig2dec FATAL ERROR page has no image, cannot be completed",
        "jbig2dec WARNING unable to complete page"
    ]


@pytest.mark.timeout(timeout=30, method="thread")
@pytest.mark.enable_socket
def test_flate_decode_stream_with_faulty_tail_bytes():
    """
    Test for #3332

    The test ensures two things:
        1. stream can be decoded at all
        2. decoding doesn't falls through to last fallback in try-except blocks
           that is too slow and takes ages for this stream
    """
    data = get_data_from_url(
        url="https://github.com/user-attachments/files/20901522/faulty_stream_tail_example.1.pdf",
        name="faulty_stream_tail_example.1.pdf"
    )
    expected = get_data_from_url(
        url="https://github.com/user-attachments/files/20941717/decoded.dat.txt",
        name="faulty_stream_tail_example.1.decoded.dat"
    )
    reader = PdfReader(BytesIO(data))
    obj = reader.get_object(IndirectObject(182, 0, reader))
    assert cast(StreamObject, obj).get_data() == expected


@pytest.mark.enable_socket
def test_rle_decode_with_faulty_tail_byte_in_multi_encoded_stream(caplog):
    """
    Test for #3355

    The test ensures that the inner RLE encoded stream can be decoded,
    because this stream contains an extra faulty newline byte in the
    end that can be ignored during decoding.
    """
    data = get_data_from_url(
        url="https://github.com/user-attachments/files/21038398/test_data_rle.txt",
        name="multi_decoding_example_with_faulty_tail_byte.pdf"
    )
    reader = PdfReader(BytesIO(data))
    obj = reader.get_object(IndirectObject(60, 0, reader))
    cast(StreamObject, obj).get_data()
    assert "Found trailing newline in stream data, check if output is OK" in caplog.messages


@pytest.mark.enable_socket
def test_rle_decode_exception_with_corrupted_stream(caplog):
    """
    Additional Test to #3355

    This test must report the EOD warning during RLE decoding and ensures
    that we do not fail during code coverage analyses in the git PR pipeline.
    """
    data = get_data_from_url(
        url="https://github.com/user-attachments/files/21052626/rle_stream_with_error.txt",
        name="rle_stream_with_error.txt"
    )
    decoded = RunLengthDecode.decode(data)
    assert decoded.startswith(b"\x01\x01\x01\x01\x01\x01\x01\x02\x02\x02\x02\x02\x02\x02\x03\x03")
    assert decoded.endswith(b"\x87\x83\x83\x83\x83\x83\x83\x83]]]]]]]RRRRRRRX\xa5")
    assert len(decoded) == 1048576
    assert caplog.messages == ["Early EOD in RunLengthDecode, check if output is OK"]


def test_decompress():
    data = string.printable.encode("utf-8") + string.printable[::-1].encode("utf-8")
    compressed = FlateDecode.encode(data)

    # Decompress regularly.
    decompressed = decompress(compressed)
    assert decompressed == data

    # Decompress byte-wise.
    with mock.patch("pypdf.filters._decompress_with_limit", side_effect=zlib.error):
        decompressed = decompress(compressed)
        assert decompressed == data

    # Decompress byte-wise with very low output limit.
    with mock.patch("pypdf.filters._decompress_with_limit", side_effect=zlib.error), \
            mock.patch("pypdf.filters.ZLIB_MAX_OUTPUT_LENGTH", len(compressed) - 13), \
            pytest.raises(
                LimitReachedError, match=r"^Limit reached while decompressing\. 12 bytes remaining\.$"
            ):
        decompress(compressed)

    # Decompress byte-wise with input limit.
    with mock.patch("pypdf.filters.ZLIB_MAX_RECOVERY_INPUT_LENGTH", 1000), \
            pytest.raises(
                LimitReachedError, match=r"^Recovery limit reached while decompressing\. 336 bytes remaining\.$"
            ):
        decompress(b"A" * 1337)


def test_decompress__logging_on_invalid_data(caplog):
    """We do not like suddenly getting empty outputs for non-empty inputs without a warning."""
    codec = FlateDecode()
    encoded = codec.encode(b"My test string")
    assert len(encoded) > 5
    assert codec.decode(encoded[5:]) == b""
    assert caplog.messages == ["Error -3 while decompressing data: incorrect header check"]


def test_ccittfaxdecode__ccf_inline():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "jpeg.pdf")
    page = writer.pages[0]
    writer.remove_images()

    image_data = (
        b"\nBI\n  /W 16\n  /H 16\n  /CS /G\n  /BPC 1\n  /F [/CCF]\n"
        b"  /DP [ << /K -1 /BlackIs1 false /Columns 16 /Rows 16 >> ]\nID\n"
        b"&\xa0\xbf\xcc9\x14|G#\x1f\xff\xf1\xcc9\x18\xfe\xbbX\xfc\x00@\x04"
        b"\nEI\n"
    )
    content_stream = page.get_contents()
    content_stream.set_data(
        content_stream.get_data().replace(b"/Im4 Do", b"").replace(b"\nET", image_data)
    )
    page.replace_contents(content_stream)

    expected = PdfReader(RESOURCE_ROOT / "imagemagick-CCITTFaxDecode.pdf").pages[0].images[0].image
    assert get_image_data(expected) == get_image_data(page.images[0].image)


def test_dctdecode__dct_inline():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "jpeg.pdf")
    page = writer.pages[0]
    writer.remove_images()

    image_data = (
        b"\nBI\n  /W 16\n  /H 16\n  /CS /G\n  /BPC 8\n  /F [/DCT]\nID\n"
        b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x01,\x01,\x00\x00\xff\xfe\x00\x13Created with GIMP\xff\xe2"
        b"\x02\xb0ICC_PROFILE\x00\x01\x01\x00\x00\x02\xa0lcms\x040\x00\x00mntrRGB XYZ \x07\xe6\x00\x04\x00\x0f\x00"
        b"\t\x00\x1d\x007acspAPPL\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        b"\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-lcms\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\rdesc\x00\x00\x01 \x00\x00\x00@cprt\x00\x00\x01`"
        b"\x00\x00\x006wtpt\x00\x00\x01\x98\x00\x00\x00\x14chad\x00\x00\x01\xac\x00\x00\x00,rXYZ\x00\x00\x01\xd8"
        b"\x00\x00\x00\x14bXYZ\x00\x00\x01\xec\x00\x00\x00\x14gXYZ\x00\x00\x02\x00\x00\x00\x00\x14rTRC\x00\x00"
        b"\x02\x14\x00\x00\x00 gTRC\x00\x00\x02\x14\x00\x00\x00 bTRC\x00\x00\x02\x14\x00\x00\x00 chrm\x00\x00"
        b"\x024\x00\x00\x00$dmnd\x00\x00\x02X\x00\x00\x00$dmdd\x00\x00\x02|\x00\x00\x00$mluc\x00\x00\x00\x00"
        b"\x00\x00\x00\x01\x00\x00\x00\x0cenUS\x00\x00\x00$\x00\x00\x00\x1c\x00G\x00I\x00M\x00P\x00 \x00b\x00"
        b"u\x00i\x00l\x00t\x00-\x00i\x00n\x00 \x00s\x00R\x00G\x00Bmluc\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00"
        b"\x00\x0cenUS\x00\x00\x00\x1a\x00\x00\x00\x1c\x00P\x00u\x00b\x00l\x00i\x00c\x00 \x00D\x00o\x00m\x00a"
        b"\x00i\x00n\x00\x00XYZ \x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-sf32\x00\x00\x00"
        b"\x00\x00\x01\x0cB\x00\x00\x05\xde\xff\xff\xf3%\x00\x00\x07\x93\x00\x00\xfd\x90\xff\xff\xfb\xa1\xff"
        b"\xff\xfd\xa2\x00\x00\x03\xdc\x00\x00\xc0nXYZ \x00\x00\x00\x00\x00\x00o\xa0\x00\x008\xf5\x00\x00\x03"
        b"\x90XYZ \x00\x00\x00\x00\x00\x00$\x9f\x00\x00\x0f\x84\x00\x00\xb6\xc4XYZ \x00\x00\x00\x00\x00\x00b"
        b"\x97\x00\x00\xb7\x87\x00\x00\x18\xd9para\x00\x00\x00\x00\x00\x03\x00\x00\x00\x02ff\x00\x00\xf2\xa7"
        b"\x00\x00\rY\x00\x00\x13\xd0\x00\x00\n[chrm\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\xa3\xd7\x00\x00T|"
        b"\x00\x00L\xcd\x00\x00\x99\x9a\x00\x00&g\x00\x00\x0f\\mluc\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00"
        b"\x00\x0cenUS\x00\x00\x00\x08\x00\x00\x00\x1c\x00G\x00I\x00M\x00Pmluc\x00\x00\x00\x00\x00\x00\x00"
        b"\x01\x00\x00\x00\x0cenUS\x00\x00\x00\x08\x00\x00\x00\x1c\x00s\x00R\x00G\x00B\xff\xdb\x00C\x00\x01"
        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
        b"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xff\xc0\x00\x0b\x08\x00\x10\x00\x10"
        b"\x01\x01\x11\x00\xff\xc4\x00\x17\x00\x00\x03\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
        b"\x00\x06\x07\x08\n\xff\xc4\x00\x1d\x10\x00\x03\x00\x03\x01\x01\x01\x01\x00\x00\x00\x00\x00\x00"
        b"\x00\x00\x05\x06\x07\x01\x04\x08\x02\x03\x13\x15\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xc4D\x0eA"
        b"\x8e\x91\xa8\xf3\xcf5N\xb5\x7f\x87k\xbc_\x96\xe3\x83]\x9c\\\xff\x00\x19f1^=:A\x98jm.\x03\x9f\x10"
        b"mW\xc2\xcbYF\xd2T\x06\xef,OXfX`^\x18\x0ez\xb4U \x91\x17\xd4\xf6\xbe\xc2\xb7\x85s:{\xa1\x8f\xec;}"
        b"\x8f-l/1|\x19\x86|\x14\xc5+j\x8cm\xf0\xde\x10\xba\x7f\xa5=\xe2\x86\xd8\x18\r\xed$o\xab2h\xbc\xad"
        b"\x8cS\x18\xba\xd8,\xb2\xa3\xbf\xd9\xd8I\x84+\x07\x9d\x1ay\x1cr\xba\x81\nu\x0f\xa7yk\xa0%5\xf2\xf4"
        b"\xf4\x9e\x8d\xe6\x19\x90+s;P\xfd\xd1\xb3\x8f\xac\xf8\x0e@5\xf5\x8f(i\xc3\x0e\xf3\xd3\xbc\xf5\xa5"
        b"\xed:\x85<$\xee\xd1@%i\xde\x1ao\xdaF$\t?Vq\xce\x92\xde\xe1\xbd\x14H\x8a'\"\x8d\xbf75\xaef\x90\xc3|"
        b"\xe8~\x82\x04\xab+3O.\xdeX&\xac\xf2t\x89\xcf\xd3\xfa\x85\xbdFu=\x8e*\xa9\xfb!\x96\xed\xfa\xe3S\xe5A"
        b"\xf2\xa8\xf5\xe8\xd7\x85\xa5\x05\t\xf8a\xff\x00\xff\xd9"
        b"\nEI\n"
    )
    content_stream = page.get_contents()
    content_stream.set_data(
        content_stream.get_data().replace(b"/Im4 Do", b"").replace(b"\nET", image_data)
    )
    page.replace_contents(content_stream)

    expected = PdfReader(RESOURCE_ROOT / "imagemagick-images.pdf").pages[3].images[0].image
    assert get_image_data(expected) == get_image_data(page.images[0].image)


def test_deprecate_inline_image_filters():
    stream = ContentStream(stream=None, pdf=None)
    stream.set_data(b"&\xa0\xbf\xcc9\x14|G#\x1f\xff\xf1\xcc9\x18\xfe\xbbX\xfc\x00@\x04")

    # The abbreviations do not work here, which is one of the reasons for the deprecation.
    stream[NameObject("/Width")] = NumberObject(16)
    stream[NameObject("/Height")] = NumberObject(16)
    stream[NameObject("/ColorSpace")] = NameObject("/DeviceGray")
    stream[NameObject("/BitsPerComponent")] = NumberObject(1)
    stream[NameObject("/Filter")] = NameObject("/CCF")
    stream[NameObject("/DecodeParams")] = ArrayObject(
        [
            DictionaryObject(
                {
                    NameObject("/K"): NumberObject(-1),
                    NameObject("/BlackIs1"): TextStringObject("false"),
                    NameObject("/Columns"): NumberObject(16),
                    NameObject("/Rows"): NumberObject(16),
                }
            )
        ]
    )

    with pytest.warns(
            expected_warning=DeprecationWarning,
            match=r"^The filter name /CCF is deprecated and will be removed in pypdf 7\.0\.0\. Use /CCITTFaxDecode instead\.$"  # noqa: E501
    ):
        decode_stream_data(stream)

    stream[NameObject("/Filter")] = NameObject("/CCITTFaxDecode")
    assert decode_stream_data(stream).startswith(b"II*")


def test_flatedecode__columns_is_zero():
    codec = FlateDecode()
    data = b"Hello World!"
    parameters = DictionaryObject({
        NameObject("/Predictor"): NumberObject(13),
        NameObject("/Columns"): NumberObject(0)
    })

    with pytest.raises(expected_exception=PdfReadError, match=r"^Expected positive number for /Columns, got 0!$"):
        codec.decode(codec.encode(data), parameters)


def test_runlengthdecode__decode_limit():
    uncompressed_size = 76 * 1024 * 1024  # 76 MB target
    runs = uncompressed_size // 128
    encoded = (b"\x81A" * runs) + b"\x80"

    with pytest.raises(expected_exception=LimitReachedError, match=r"^Limit reached while decompressing\.$"):
        RunLengthDecode.decode(encoded)

    uncompressed_size = 5 * 1024
    runs = uncompressed_size // 128
    encoded = (b"\x81A" * runs) + b"\x80"

    # Use a very low limit for this exact comparison, otherwise *pytest* takes ages to render a failure diff.
    with mock.patch("pypdf.filters.RUN_LENGTH_MAX_OUTPUT_LENGTH", uncompressed_size):
        assert RunLengthDecode.decode(encoded) == b"A" * uncompressed_size


@pytest.mark.timeout(10)
def test_asciihexdecode__speed():
    encoded = (b"41" * 1_200_000) + b">"
    ASCIIHexDecode.decode(encoded)


================================================
FILE: tests/test_font.py
================================================
"""Test font-related functionality."""

from pypdf._font import Font
from pypdf.generic import DictionaryObject, NameObject


def test_font_descriptor():
    font_res = DictionaryObject({
        NameObject("/BaseFont"): NameObject("/Helvetica"),
        NameObject("/Subtype"): NameObject("/Type1")
    })
    my_font = Font.from_font_resource(font_res)
    assert my_font.font_descriptor.family == "Helvetica"
    assert my_font.font_descriptor.weight == "Medium"
    assert my_font.font_descriptor.ascent == 718
    assert my_font.font_descriptor.descent == -207

    test_string = "This is a long sentence. !@%%^€€€. çûįö¶´"
    charwidth = my_font.text_width(test_string)
    assert charwidth == 19251

    font_res[NameObject("/BaseFont")] = NameObject("/Palatino")
    my_font = Font.from_font_resource(font_res)
    assert my_font.font_descriptor.weight == "Unknown"

    font_res[NameObject("/BaseFont")] = NameObject("/Courier-Bold")
    my_font = Font.from_font_resource(font_res)
    assert my_font.font_descriptor.italic_angle == 0
    assert my_font.font_descriptor.flags == 33
    assert my_font.font_descriptor.bbox == (-113.0, -250.0, 749.0, 801.0)


================================================
FILE: tests/test_forms.py
================================================
"""Test form-related functionality. Separate file to keep overview."""

from io import BytesIO

import pytest

from pypdf import PdfReader, PdfWriter
from tests import get_data_from_url


@pytest.mark.enable_socket
def test_form_button__v_value_should_be_name_object():
    url = "https://github.com/user-attachments/files/18736500/blank-form.pdf"
    name = "issue3115.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter(clone_from=reader)
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Other": "/On"},
        auto_regenerate=False,
    )
    stream = BytesIO()
    writer.write(stream)

    # Wrong: `/V (/On)`.
    assert b"\n/V /On\n" in stream.getvalue()


================================================
FILE: tests/test_generic.py
================================================
"""Test the pypdf.generic module."""

import codecs
import gc
import weakref
from base64 import a85encode
from copy import deepcopy
from io import BytesIO

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf.constants import CheckboxRadioButtonAttributes
from pypdf.errors import DeprecationError, PdfReadError, PdfStreamError
from pypdf.generic import (
    ArrayObject,
    BooleanObject,
    ByteStringObject,
    ContentStream,
    DecodedStreamObject,
    Destination,
    DictionaryObject,
    Fit,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    OutlineItem,
    PdfObject,
    RectangleObject,
    StreamObject,
    TextStringObject,
    TreeObject,
    create_string_object,
    encode_pdfdocencoding,
    is_null_or_none,
    read_hex_string_from_stream,
    read_object,
    read_string_from_stream,
)
from pypdf.generic._image_inline import (
    extract_inline__ascii85_decode,
    extract_inline__ascii_hex_decode,
    extract_inline__dct_decode,
    extract_inline__run_length_decode,
)

from . import RESOURCE_ROOT, get_data_from_url
from .utils import ReaderDummy


class ChildDummy(DictionaryObject):
    @property
    def indirect_reference(self):
        return self


def test_float_object_exception(caplog):
    assert FloatObject("abc") == 0
    assert caplog.text != ""


def test_number_object_exception(caplog):
    assert NumberObject("0,0") == 0
    assert caplog.text != ""


def test_number_object_no_exception():
    NumberObject(2**100_000_000)


def test_create_string_object_exception():
    with pytest.raises(TypeError) as exc:
        create_string_object(123)
    assert (  # typeguard is not running
        exc.value.args[0] == "create_string_object should have str or unicode arg"
    ) or (  # typeguard is enabled
        'type of argument "string" must be one of (str, bytes); got int instead'
        in exc.value.args[0]
    )


@pytest.mark.parametrize(
    ("value", "expected", "tell"), [(b"true", b"true", 4), (b"false", b"false", 5)]
)
def test_boolean_object(value, expected, tell):
    stream = BytesIO(value)
    assert BooleanObject.read_from_stream(stream).value == (expected == b"true")
    stream.seek(0, 0)
    assert stream.read() == expected
    assert stream.tell() == tell


def test_boolean_object_write():
    stream = BytesIO()
    boolobj = BooleanObject(None)
    boolobj.write_to_stream(stream)
    stream.seek(0, 0)
    assert stream.read() == b"false"


def test_boolean_eq():
    boolobj = BooleanObject(True)
    assert (boolobj == True) is True  # noqa: E712
    assert (boolobj == False) is False  # noqa: E712
    assert (boolobj == "True") is False
    hash1 = hash(boolobj)
    assert hash1 == hash(boolobj)

    boolobj = BooleanObject(False)
    assert (boolobj == True) is False  # noqa: E712
    assert (boolobj == False) is True  # noqa: E712
    assert (boolobj == "True") is False
    assert hash1 != hash(boolobj)


def test_boolean_object_exception():
    stream = BytesIO(b"False")
    with pytest.raises(PdfReadError) as exc:
        BooleanObject.read_from_stream(stream)
    assert exc.value.args[0] == "Could not read Boolean object"


def test_array_object_exception():
    stream = BytesIO(b"False")
    with pytest.raises(PdfReadError) as exc:
        ArrayObject.read_from_stream(stream, None)
    assert exc.value.args[0] == "Could not read array"


def test_null_object_exception():
    stream = BytesIO(b"notnull")
    with pytest.raises(PdfReadError) as exc:
        NullObject.read_from_stream(stream)
    assert exc.value.args[0] == "Could not read Null object"


@pytest.mark.parametrize("value", [b"", b"False", b"foo ", b"foo  ", b"foo bar"])
def test_indirect_object_premature(value):
    stream = BytesIO(value)
    with pytest.raises(PdfStreamError) as exc:
        IndirectObject.read_from_stream(stream, None)
    assert exc.value.args[0] == "Stream has ended unexpectedly"


def test_read_hex_string_from_stream():
    stream = BytesIO(b"a1>")
    assert read_hex_string_from_stream(stream) == "\x10"


def test_read_hex_string_from_stream_exception():
    stream = BytesIO(b"")
    with pytest.raises(PdfStreamError) as exc:
        read_hex_string_from_stream(stream)
    assert exc.value.args[0] == "Stream has ended unexpectedly"


def test_read_string_from_stream_exception():
    stream = BytesIO(b"x")
    with pytest.raises(PdfStreamError) as exc:
        read_string_from_stream(stream)
    assert exc.value.args[0] == "Stream has ended unexpectedly"


def test_read_string_from_stream_not_in_escapedict_no_digit():
    stream = BytesIO(b"x\\y")
    with pytest.raises(PdfReadError) as exc:
        read_string_from_stream(stream)
    assert exc.value.args[0] == "Stream has ended unexpectedly"


def test_read_string_from_stream_multichar_eol():
    stream = BytesIO(b"x\\\n )")
    assert read_string_from_stream(stream) == " "


def test_read_string_from_stream_multichar_eol2():
    stream = BytesIO(b"x\\\n\n)")
    assert read_string_from_stream(stream) == ""


def test_read_string_from_stream_excape_digit():
    stream = BytesIO(b"x\\1a )")
    assert read_string_from_stream(stream) == "\x01a "


def test_read_string_from_stream_excape_digit2():
    stream = BytesIO(b"(hello \\1\\2\\3\\4)")
    assert read_string_from_stream(stream) == "hello \x01\x02\x03\x04"


def test_name_object(caplog):
    stream = BytesIO(b"x")
    with pytest.raises(PdfReadError) as exc:
        NameObject.read_from_stream(stream, None)
    assert exc.value.args[0] == "Name read error"

    with pytest.raises(
        DeprecationError,
        match=r"surfix is deprecated and was removed in pypdf 5\.0\.0\. Use prefix instead\.",
    ):
        _ = NameObject.surfix

    assert (
        NameObject.read_from_stream(
            BytesIO(b"/A;Name_With-Various***Characters?"), None
        )
        == "/A;Name_With-Various***Characters?"
    )

    assert (
        NameObject.read_from_stream(BytesIO(b"/paired#28#29parentheses"), None)
        == "/paired()parentheses"
    )

    assert NameObject.read_from_stream(BytesIO(b"/A#42"), None) == "/AB"

    assert (
        NameObject.read_from_stream(
            BytesIO(b"/#f1j#d4#aa#0c#ce#87#b4#b3#b0#23J#86#fe#2a#b2jYJ#94"),
            ReaderDummy(),
        )
        == "/ñjÔª\x0cÎ\x87´³°#J\x86þ*²jYJ\x94"
    )

    assert (NameObject.read_from_stream(BytesIO(b"/#JA#231f"), None)) == "/#JA#1f"

    assert (
        NameObject.read_from_stream(
            BytesIO(b"/#e4#bd#a0#e5#a5#bd#e4#b8#96#e7#95#8c"), None
        )
    ) == "/你好世界"

    # test PDFDocEncoding (latin-1)
    assert (
        NameObject.read_from_stream(BytesIO(b"/DocuSign\xae"), None)
    ) == "/DocuSign®"

    # test write
    b = BytesIO()
    NameObject("/hello").write_to_stream(b)
    assert bytes(b.getbuffer()) == b"/hello"

    caplog.clear()
    b = BytesIO()
    with pytest.raises(
            expected_exception=DeprecationError,
            match=r"Incorrect first char in NameObject, should start with '/': \(hello\) is deprecated and was"
    ):
        NameObject("hello").write_to_stream(b)

    caplog.clear()
    b = BytesIO()
    NameObject("/DIJMAC+Arial Black#1").write_to_stream(b)
    assert bytes(b.getbuffer()) == b"/DIJMAC+Arial#20Black#231"
    assert caplog.text == ""

    caplog.clear()
    b = BytesIO()
    NameObject("/你好世界 (%)").write_to_stream(b)
    assert bytes(b.getbuffer()) == b"/#E4#BD#A0#E5#A5#BD#E4#B8#96#E7#95#8C#20#28#25#29"
    assert caplog.text == ""

    caplog.clear()
    b = BytesIO()
    NameObject("/{foo}<bar>(baz)[qux]#/%").write_to_stream(b)
    assert bytes(b.getbuffer()) == b"/#7Bfoo#7D#3Cbar#3E#28baz#29#5Bqux#5D#23#2F#25"
    assert caplog.text == ""


def test_destination_fit_r():
    d = Destination(
        TextStringObject("title"), NullObject(), Fit.fit_rectangle(0, 0, 0, 0)
    )
    assert d.title == NameObject("title")
    assert d.typ == "/FitR"
    assert d.zoom is None
    assert d.left == FloatObject(0)
    assert d.right == FloatObject(0)
    assert d.top == FloatObject(0)
    assert d.bottom == FloatObject(0)
    assert list(d) == []
    d.empty_tree()


def test_destination_fit_v():
    d = Destination(NameObject("title"), NullObject(), Fit.fit_vertically(left=0))

    writer = PdfWriter()
    writer.add_named_destination_object(d)

    # Trigger Exception
    Destination(NameObject("title"), NullObject(), Fit.fit_vertically(left=None))


def test_outline_item_write_to_stream():
    stream = BytesIO()
    oi = OutlineItem(NameObject("title"), NullObject(), Fit.fit_vertically(left=0))
    oi.write_to_stream(stream)
    stream.seek(0, 0)
    assert stream.read() == b"<<\n/Title (title)\n/Dest [ null /FitV 0.0 ]\n>>"


def test_encode_pdfdocencoding_keyerror():
    with pytest.raises(UnicodeEncodeError) as exc:
        encode_pdfdocencoding("😀")
    assert exc.value.args[0] == "pdfdocencoding"


@pytest.mark.parametrize("test_input", ["", "data"])
def test_encode_pdfdocencoding_returns_bytes(test_input):
    """
    Test that encode_pdfdocencoding() always returns bytes because bytearray
    is duck type compatible with bytes in mypy
    """
    out = encode_pdfdocencoding(test_input)
    assert isinstance(out, bytes)


def test_read_object_comment_exception():
    stream = BytesIO(b"% foobar")
    pdf = None
    with pytest.raises(PdfStreamError) as exc:
        read_object(stream, pdf)
    assert exc.value.args[0] == "File ended unexpectedly."


def test_read_object_empty():
    stream = BytesIO(b"endobj")
    pdf = None
    assert isinstance(read_object(stream, pdf), NullObject)


def test_read_object_empty_in_array():
    stream = BytesIO(b"[endobj")
    pdf = None
    result = read_object(stream, pdf)
    assert isinstance(result, ArrayObject)
    assert len(result) == 1
    assert isinstance(result[0], NullObject)


def test_read_object_invalid():
    stream = BytesIO(b"hello")
    pdf = None
    with pytest.raises(PdfReadError) as exc:
        read_object(stream, pdf)
    assert "hello" in exc.value.args[0]


def test_read_object_comment():
    stream = BytesIO(b"% foobar\n1 ")
    pdf = None
    out = read_object(stream, pdf)
    assert out == 1


def test_bytestringobject():
    bo = ByteStringObject("stream", encoding="utf-8")
    stream = BytesIO(b"")
    bo.write_to_stream(stream)
    stream.seek(0, 0)
    assert stream.read() == b"<73747265616d>"  # TODO: how can we verify this?


def test_dictionaryobject_key_is_no_pdfobject():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    with pytest.raises(ValueError) as exc:
        do["foo"] = NameObject("/GoTo")
    assert exc.value.args[0] == "Key must be a PdfObject"


def test_dictionaryobject_xmp_meta():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    assert do.xmp_metadata is None


def test_dictionaryobject_value_is_no_pdfobject():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    with pytest.raises(ValueError) as exc:
        do[NameObject("/S")] = "/GoTo"
    assert exc.value.args[0] == "Value must be a PdfObject"


def test_dictionaryobject_setdefault_key_is_no_pdfobject():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    with pytest.raises(ValueError) as exc:
        do.setdefault("foo", NameObject("/GoTo"))
    assert exc.value.args[0] == "Key must be a PdfObject"


def test_dictionaryobject_setdefault_value_is_no_pdfobject():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    with pytest.raises(ValueError) as exc:
        do.setdefault(NameObject("/S"), "/GoTo")
    assert exc.value.args[0] == "Value must be a PdfObject"


def test_dictionaryobject_setdefault_value():
    do = DictionaryObject({NameObject("/S"): NameObject("/GoTo")})
    do.setdefault(NameObject("/S"), NameObject("/GoTo"))


def test_dictionaryobject_read_from_stream():
    stream = BytesIO(b"<< /S /GoTo >>")
    pdf = None
    out = DictionaryObject.read_from_stream(stream, pdf)
    assert out.get_object() == {NameObject("/S"): NameObject("/GoTo")}


def test_dictionaryobject_read_from_stream_broken():
    stream = BytesIO(b"< /S /GoTo >>")
    pdf = None
    with pytest.raises(PdfReadError) as exc:
        DictionaryObject.read_from_stream(stream, pdf)
    assert (
        exc.value.args[0]
        == "Dictionary read error at byte 0x2: stream must begin with '<<'"
    )


def test_dictionaryobject_read_from_stream_unexpected_end():
    stream = BytesIO(b"<< \x00/S /GoTo")
    pdf = None
    with pytest.raises(PdfStreamError) as exc:
        DictionaryObject.read_from_stream(stream, pdf)
    assert exc.value.args[0] == "Stream has ended unexpectedly"


def test_dictionaryobject_read_from_stream_stream_no_newline():
    stream = BytesIO(b"<< /S /GoTo >>stream")
    pdf = None
    with pytest.raises(PdfReadError) as exc:
        DictionaryObject.read_from_stream(stream, pdf)
    assert exc.value.args[0] == "Stream data must be followed by a newline"


@pytest.mark.parametrize(("strict"), [(True), (False)])
def test_dictionaryobject_read_from_stream_stream_no_stream_length(strict, caplog):
    stream = BytesIO(b"<< /S /GoTo >>stream\n123456789endstream abcd")

    class Tst:  # to replace pdf
        strict = False

    pdf = Tst()
    pdf.strict = strict
    if strict:
        with pytest.raises(PdfReadError) as exc:
            DictionaryObject.read_from_stream(stream, pdf)
        assert exc.value.args[0] == "Stream length not defined"
    else:
        o = DictionaryObject.read_from_stream(stream, pdf)
        assert "Stream length not defined" in caplog.text
        assert o.get_data() == b"123456789"


@pytest.mark.parametrize(
    ("strict", "length", "should_fail"),
    [
        (True, 6, False),
        (True, 10, False),
        (True, 4, True),
        (False, 6, False),
        (False, 10, False),
    ],
)
def test_dictionaryobject_read_from_stream_stream_stream_valid(
    strict, length, should_fail
):
    stream = BytesIO(b"<< /S /GoTo /Length %d >>stream\nBT /F1\nendstream\n" % length)

    class Tst:  # to replace pdf
        strict = True

    pdf = Tst()
    pdf.strict = strict
    with pytest.raises(PdfReadError) as exc:
        do = DictionaryObject.read_from_stream(stream, pdf)
        # TODO: What should happen with the stream?
        assert do == {"/S": "/GoTo"}
        if length in (6, 10):
            assert b"BT /F1" in do.get_data()
        raise PdfReadError("__ALLGOOD__")
    assert should_fail ^ (exc.value.args[0] == "__ALLGOOD__")


def test_rectangleobject():
    ro = RectangleObject((1, 2, 3, 4))
    assert ro.lower_left == (1, 2)
    assert ro.lower_right == (3, 2)
    assert ro.upper_left == (1, 4)
    assert ro.upper_right == (3, 4)

    ro.lower_left = (5, 6)
    assert ro.lower_left == (5, 6)

    ro.bottom -= 2
    ro.left -= 2
    assert ro.lower_left == (3, 4)

    ro.lower_right = (7, 8)
    assert ro.lower_right == (7, 8)

    ro.upper_left = (9, 11)
    assert ro.upper_left == (9, 11)

    ro.upper_right = (13, 17)
    assert ro.upper_right == (13, 17)
    ro.top += 1
    ro.right += 1
    assert ro.upper_right == (14, 18)


def test_textstringobject_exc():
    tso = TextStringObject("foo")
    assert tso.get_original_bytes() == b"foo"


def test_textstringobject_autodetect_utf16():
    tso = TextStringObject("foo")
    tso.autodetect_utf16 = True
    tso.utf16_bom = codecs.BOM_UTF16_BE
    assert tso.get_original_bytes() == b"\xfe\xff\x00f\x00o\x00o"
    tso.utf16_bom = codecs.BOM_UTF16_LE
    assert tso.get_original_bytes() == b"\xff\xfef\x00o\x00o\x00"
    assert tso.get_encoded_bytes() == b"\xff\xfef\x00o\x00o\x00"


def test_textstringobject__numbers_as_input():
    _ = TextStringObject(42)
    _ = TextStringObject(13.37)


def test_remove_child_not_in_tree():
    tree = TreeObject()
    with pytest.raises(ValueError) as exc:
        tree.remove_child(ChildDummy())
    assert exc.value.args[0] == "Removed child does not appear to be a tree item"


def test_remove_child_not_in_that_tree():
    tree = TreeObject()
    tree.indirect_reference = NullObject()
    child = TreeObject()
    child.indirect_reference = NullObject()
    with pytest.raises(ValueError) as exc:
        child.remove_from_tree()
    assert exc.value.args[0] == "Removed child does not appear to be a tree item"
    tree.add_child(child, ReaderDummy())
    with pytest.raises(ValueError) as exc:
        tree.remove_child(child)
    assert exc.value.args[0] == "Removed child is not a member of this tree"


def test_remove_child_not_found_in_tree():
    class ChildDummy(DictionaryObject):
        @property
        def indirect_reference(self) -> "ChildDummy":
            return self

    tree = TreeObject()
    tree.indirect_reference = NullObject()
    child = ChildDummy(TreeObject())
    tree.add_child(child, ReaderDummy())
    child2 = ChildDummy(TreeObject())
    child2[NameObject("/Parent")] = tree
    with pytest.raises(ValueError) as exc:
        tree.remove_child(child2)
    assert exc.value.args[0] == "Removal couldn't find item in tree"


def test_remove_child_found_in_tree():
    writer = PdfWriter()

    # Add Tree
    tree = TreeObject()
    writer._add_object(tree)

    # Add first child
    # It's important to set a value, otherwise the writer.get_reference will
    # return the same object when a second child is added.
    child1 = TreeObject()
    child1[NameObject("/Foo")] = TextStringObject("bar")
    child1_ref = writer._add_object(child1)
    tree.add_child(child1_ref, writer)
    assert tree[NameObject("/Count")] == 1
    assert len(list(tree.children())) == 1

    # Add second child
    child2 = TreeObject()
    child2[NameObject("/Foo")] = TextStringObject("baz")
    child2_ref = writer._add_object(child2)
    tree.add_child(child2_ref, writer)
    assert tree[NameObject("/Count")] == 2
    assert len(list(tree.children())) == 2

    # Remove last child
    tree.remove_child(child2_ref)
    assert tree[NameObject("/Count")] == 1
    assert len(list(tree.children())) == 1

    # Add new child
    child3 = TreeObject()
    child3[NameObject("/Foo")] = TextStringObject("3")
    child3_ref = writer._add_object(child3)
    tree.add_child(child3_ref, writer)
    assert tree[NameObject("/Count")] == 2
    assert len(list(tree.children())) == 2

    # Remove first child
    child1 = tree[NameObject("/First")]
    tree.remove_child(child1)
    assert tree[NameObject("/Count")] == 1
    assert len(list(tree.children())) == 1

    child4 = TreeObject()
    child4[NameObject("/Foo")] = TextStringObject("4")
    child4_ref = writer._add_object(child4)
    tree.add_child(child4_ref, writer)
    assert tree[NameObject("/Count")] == 2
    assert len(list(tree.children())) == 2

    child5 = TreeObject()
    child5[NameObject("/Foo")] = TextStringObject("5")
    child5_ref = writer._add_object(child5)
    tree.add_child(child5_ref, writer)
    assert tree[NameObject("/Count")] == 3
    assert len(list(tree.children())) == 3

    # Remove middle child
    child4.remove_from_tree()
    assert tree[NameObject("/Count")] == 2
    assert len(list(tree.children())) == 2

    tree.empty_tree()


def test_remove_child_in_tree():
    pdf = RESOURCE_ROOT / "form.pdf"

    tree = TreeObject()
    reader = PdfReader(pdf)
    writer = PdfWriter()
    writer._add_object(tree)
    writer.add_page(reader.pages[0])
    writer.add_outline_item("foo", page_number=0)
    obj = writer._objects[-1]
    tree.add_child(obj, writer)
    tree.remove_child(obj)
    tree.add_child(obj, writer)
    tree.empty_tree()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "caplog_content"),
    [
        (  # parse_content_stream_peek_percentage
            "https://github.com/user-attachments/files/18381763/tika-985770.pdf",
            "tika-985770.pdf",
            "",
        ),
        (  # read_inline_image_no_has_q
            "https://github.com/user-attachments/files/18381775/tika-998719.pdf",
            "tika-998719.pdf",
            "",
        ),
        (  # read_inline_image_loc_neg_1
            "https://github.com/user-attachments/files/18381706/tika-935066.pdf",
            "tika-935066.pdf",
            "",
        ),
        (  # object_read_from_stream_unicode_error
            "https://github.com/user-attachments/files/18381750/tika-974966.pdf",
            "tika-974966.pdf",
            "",
        ),
        (  # dict_read_from_stream
            "https://github.com/user-attachments/files/18381762/tika-984877.pdf",
            "tika-984877.pdf",
            "Multiple definitions in dictionary at byte 0x1084 for key /Length",
        ),
    ],
    ids=[
        "parse_content_stream_peek_percentage",
        "read_inline_image_no_has_q",
        "read_inline_image_loc_neg_1",
        "object_read_from_stream_unicode_error",
        "dict_read_from_stream",
    ],
)
def test_extract_text(caplog, url: str, name: str, caplog_content: str):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for page in reader.pages:
        page.extract_text()
    if caplog_content == "":
        assert caplog_content == caplog.text
    else:
        assert caplog_content in caplog.text


@pytest.mark.slow
@pytest.mark.enable_socket
def test_text_string_write_to_stream():
    url = "https://github.com/user-attachments/files/18381698/tika-924562.pdf"
    name = "tika-924562.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    for page in writer.pages:
        page.compress_content_streams()


@pytest.mark.enable_socket
def test_bool_repr(tmp_path):
    url = "https://github.com/user-attachments/files/18381703/tika-932449.pdf"
    name = "tika-932449.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    write_path = tmp_path / "tmp-fields-report.txt"
    with open(write_path, "w") as fp:
        fields = reader.get_fields(fileobj=fp)
    assert fields
    assert list(fields.keys()) == ["USGPOSignature"]
    with open(write_path) as fp:
        data = fp.read()
    assert data.startswith(
        "Field Name: USGPOSignature\nField Type: Signature\nField Flags: 1\n"
        "Value: {'/Type': '/Sig', '/Filter': '/Adobe.PPKLite', "
        "'/SubFilter':"
    )


@pytest.mark.enable_socket
def test_issue_997(pdf_file_path):
    url = (
        "https://github.com/py-pdf/pypdf/files/8908874/"
        "Exhibit_A-2_930_Enterprise_Zone_Tax_Credits_final.pdf"
    )
    name = "gh-issue-997.pdf"

    merger = PdfWriter()
    merger.append(BytesIO(get_data_from_url(url, name=name)))  # here the error raises
    with open(pdf_file_path, "wb") as f:
        merger.write(f)
    merger.close()

    # Strict
    merger = PdfWriter()
    merger.append(BytesIO(get_data_from_url(url, name=name)))  # here the error raises
    with open(pdf_file_path, "wb") as f:
        merger.write(f)
    merger.close()


def test_checkboxradiobuttonattributes_opt():
    assert "/Opt" in CheckboxRadioButtonAttributes.attributes_dict()


def test_name_object_invalid_decode():
    charsets = deepcopy(NameObject.CHARSETS)
    try:
        NameObject.CHARSETS = ("utf-8",)
        stream = BytesIO(b"/\x80\x02\x03")
        # strict:
        with pytest.raises(PdfReadError) as exc:
            NameObject.read_from_stream(stream, ReaderDummy(strict=True))
        assert "Illegal character in NameObject " in exc.value.args[0]

        # non-strict:
        stream.seek(0)
        NameObject.read_from_stream(stream, ReaderDummy(strict=False))
    finally:
        NameObject.CHARSETS = charsets


def test_indirect_object_invalid_read():
    stream = BytesIO(b"0 1 s")
    with pytest.raises(PdfReadError) as exc:
        IndirectObject.read_from_stream(stream, ReaderDummy())
    assert exc.value.args[0] == "Error reading indirect object reference at byte 0x5"


def test_create_string_object_utf16_bom():
    # utf16-be
    result = create_string_object(
        b"\xfe\xff\x00P\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00"
    )
    assert result == "PaperPort 14\x00"
    assert result.autodetect_utf16 is True
    assert result.utf16_bom == b"\xfe\xff"
    assert (
        result.get_encoded_bytes()
        == b"\xfe\xff\x00P\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00"
    )

    # utf16-le
    result = create_string_object(
        b"\xff\xfeP\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00\x00"
    )
    assert result == "PaperPort 14\x00"
    assert result.autodetect_utf16 is True
    assert result.utf16_bom == b"\xff\xfe"
    assert (
        result.get_encoded_bytes()
        == b"\xff\xfeP\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00\x00"
    )
    result = TextStringObject(
        b"\xff\xfeP\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00\x00"
    )
    assert result == "PaperPort 14\x00"
    assert result.autodetect_utf16 is True
    assert result.utf16_bom == b"\xff\xfe"
    assert (
        result.get_encoded_bytes()
        == b"\xff\xfeP\x00a\x00p\x00e\x00r\x00P\x00o\x00r\x00t\x00 \x001\x004\x00\x00\x00"
    )

    # utf16-be without bom
    result = TextStringObject("ÿ")
    result.autodetect_utf16 = True
    result.utf16_bom = b""
    assert result.get_encoded_bytes() == b"\x00\xFF"
    assert result.original_bytes == b"\x00\xFF"


def test_create_string_object_force():
    assert create_string_object(b"Hello World", []) == "Hello World"
    assert create_string_object(b"Hello World", {72: "A"}) == "Aello World"
    assert create_string_object(b"Hello World", "utf8") == "Hello World"


@pytest.mark.parametrize(
    ("value", "expected"),
    [
        ("0.000000", "0.0"),
        ("0.0", "0.0"),
        ("1.0", "1"),
        ("0.123000", "0.123"),
        ("0.000123000", "0.000123"),
        ("0.0", "0.0"),
        ("0", "0.0"),
        ("1", "1"),
        ("1.0", "1"),
        ("1.01", "1.01"),
        ("1.010", "1.01"),
        ("0000.0000", "0.0"),
        ("0.10101010", "0.1010101"),
        ("50000000000", "50000000000"),
        ("99900000000000000123", "99900000000000000000"),
        ("99900000000000000123.456000", "99900000000000000000"),
        ("0.00000000000000000000123", "0.00000000000000000000123"),
        ("0.00000123", "0.00000123"),
        ("0.00000000000000000000123000", "0.00000000000000000000123"),
        ("-4.6", "-4.6"),  # from #1910
        # (
        #    "50032481330523882508234.00000000000000000000123000",
        #    "50032481330523882508234.00000000000000000000123",
        # ),
        # (
        #    "928457298572093487502198745102973402987412908743.75249875981374981237498213740000",
        #    "928457298572093487502198745102973402987412908743.7524987598137498123749821374",
        # ),
    ],
)
def test_float_object_decimal_to_string(value, expected):
    assert repr(FloatObject(value)) == expected


def test_cloning(caplog):
    writer = PdfWriter()
    with pytest.raises(Exception) as exc:
        PdfObject().clone(writer)
    assert "PdfObject does not implement .clone so far" in exc.value.args[0]

    obj1 = DictionaryObject()
    obj1.indirect_reference = None
    n = len(writer._objects)
    obj2 = obj1.clone(writer)
    assert len(writer._objects) == n + 1
    obj3 = obj2.clone(writer)
    assert len(writer._objects) == n + 1
    assert obj2.indirect_reference == obj3.indirect_reference
    obj3 = obj2.indirect_reference.clone(writer)
    assert len(writer._objects) == n + 1
    assert obj2.indirect_reference == obj3.indirect_reference
    assert (
        obj2.indirect_reference
        == obj2._reference_clone(obj2, writer).indirect_reference
    )
    assert len(writer._objects) == n + 1
    assert obj2.indirect_reference == obj3.indirect_reference

    obj3 = obj2.indirect_reference.clone(writer, True)
    assert len(writer._objects) == n + 2
    assert obj2.indirect_reference != obj3.indirect_reference

    arr1 = ArrayObject([obj2])
    arr2 = arr1.clone(writer)
    arr3 = arr2.clone(writer)
    assert arr2 == arr3
    obj10 = StreamObject()
    arr1 = ArrayObject([obj10])
    obj11 = obj10.clone(writer)
    assert arr1[0] == obj11

    obj20 = DictionaryObject(
        {NameObject("/Test"): NumberObject(1), NameObject("/Test2"): StreamObject()}
    )
    obj21 = obj20.clone(writer, ignore_fields=None)
    assert "/Test" in obj21
    assert isinstance(obj21.get("/Test2"), IndirectObject)


def test_cloning_indirect_obj_keeps_hard_reference():
    """
    Reported in #3450

    Ensure that cloning an IndirectObject keeps a hard reference to
    the underlying object, preventing its deallocation, which could allow
    `id(obj)` to return the same value for different objects.
    """
    writer1 = PdfWriter()
    indirect_object = IndirectObject(1, 0, writer1)

    # Create a weak reference to the underlying object to test later
    # if it is still alive in memory or not
    obj_weakref = weakref.ref(indirect_object.pdf)
    assert obj_weakref() is not None

    writer2 = PdfWriter()
    indirect_object.clone(writer2)

    # Mimic indirect_object/writer1 going out of scope and being
    # garbage collected. Clone should have kept a hard reference to
    # it, preventing its deallocation.
    del indirect_object
    del writer1
    gc.collect()
    assert obj_weakref() is not None


def test_cloning_null_obj_keeps_hard_reference():
    """
    Ensure that cloning a NullObject keeps a hard reference to
    the underlying object, preventing its deallocation, which could allow
    `id(obj)` to return the same value for different objects.
    """
    writer1 = PdfWriter()
    indirect_object = IndirectObject(1, 0, writer1)
    null_obj = NullObject()
    null_obj.indirect_reference = indirect_object

    # Create a weak reference to the underlying object to test later
    # if it is still alive in memory or not
    obj_weakref = weakref.ref(indirect_object.pdf)
    assert obj_weakref() is not None

    writer2 = PdfWriter()
    null_obj.clone(writer2)

    # Mimic indirect_object/writer1 going out of scope and being
    # garbage collected. Clone should have kept a hard reference to
    # it, preventing its deallocation.
    del indirect_object
    del writer1
    del null_obj
    gc.collect()
    assert obj_weakref() is not None


@pytest.mark.enable_socket
def test_append_with_indirectobject_not_pointing(caplog):
    """
    Reported in #1631
    the object 43 0 is not invalid
    """
    url = "https://github.com/py-pdf/pypdf/files/10729142/document.pdf"
    name = "tst_iss1631.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=False)
    writer = PdfWriter()
    writer.append(reader)
    assert "Object 43 0 not defined." in caplog.text


@pytest.mark.enable_socket
def test_iss1615_1673():
    """
    Test cases where /N is not indicating chains of objects
    test also where /N,... are not part of chains
    """
    # #1615
    url = "https://github.com/py-pdf/pypdf/files/10671366/graph_letter.pdf"
    name = "graph_letter.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)
    assert (
        "/N"
        in writer.pages[0]["/Annots"][0]
        .get_object()["/AP"]["/N"]["/Resources"]["/ColorSpace"]["/Cs1"][1]
        .get_object()
    )
    # #1673
    url = "https://github.com/py-pdf/pypdf/files/10848750/budgeting-loan-form-sf500.pdf"
    name = "budgeting-loan-form-sf500.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)


@pytest.mark.enable_socket
def test_destination_withoutzoom():
    """Cf issue #1832"""
    url = "https://github.com/user-attachments/files/15605648/2021_book_security.pdf"
    name = "2021_book_security.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.outline

    out = BytesIO()
    writer = PdfWriter(clone_from=reader)
    writer.write(out)


def test_encodedstream_set_data():
    """
    EncodedStreamObject.set_data to extend data stream works.

    Checks also the flate_encode.
    """
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    co = reader.pages[0]["/Contents"][0].get_object()
    co.set_data(b"%hello\n" + co.get_data())
    assert b"hello" in co.get_data()
    b = BytesIO()
    co.write_to_stream(b)
    b.seek(0)
    aa = read_object(b, None)
    assert b"hello" in aa.get_data()
    assert aa["/Filter"] == "/FlateDecode"
    assert "/DecodeParms" not in aa
    bb = aa.flate_encode()
    assert b"hello" in bb.get_data()
    assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"]
    assert str(bb["/DecodeParms"]) == "[NullObject, NullObject]"
    bb[NameObject("/Test")] = NameObject("/MyTest")
    cc = bb.flate_encode()
    assert bb["/Filter"] == ["/FlateDecode", "/FlateDecode"]
    assert b"hello" in cc.get_data()
    assert cc["/Filter"] == ["/FlateDecode", "/FlateDecode", "/FlateDecode"]
    assert str(cc["/DecodeParms"]) == "[NullObject, NullObject, NullObject]"
    assert cc[NameObject("/Test")] == "/MyTest"

    with pytest.raises(TypeError):
        aa.set_data("toto")

    aa[NameObject("/Filter")] = NameObject("/JPXEncode")
    with pytest.raises(PdfReadError):
        aa.set_data(b"toto")


@pytest.mark.enable_socket
def test_set_data_2():
    """
    Modify a stream not yet loaded and
    where the filter is ["/FlateDecode"]
    """
    url = "https://github.com/user-attachments/files/16796095/f5471sm-2.pdf"
    name = "iss2780.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    writer.root_object["/AcroForm"]["/XFA"][7].set_data(b"test")
    assert writer.root_object["/AcroForm"]["/XFA"][7].get_object()["/Filter"] == [
        "/FlateDecode"
    ]
    assert writer.root_object["/AcroForm"]["/XFA"][7].get_object().get_data() == b"test"


@pytest.mark.enable_socket
def test_calling_indirect_objects():
    """Cope with cases where attributes/items are called from indirectObject"""
    url = "https://github.com/user-attachments/files/15605648/2021_book_security.pdf"
    name = "2021_book_security.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.trailer.get("/Info")["/Creator"]
    reader.pages[0]["/Contents"][0].get_data()
    writer = PdfWriter(clone_from=reader)
    ind = writer._add_object(writer)
    assert ind.fileobj == writer.fileobj
    with pytest.raises(AttributeError):
        ind.not_existing_attribute
    # create an IndirectObject referencing an IndirectObject.
    writer._objects.append(writer.pages[0].indirect_reference)
    ind = IndirectObject(len(writer._objects), 0, writer)
    with pytest.raises(PdfStreamError):
        ind["/Type"]


@pytest.mark.enable_socket
def test_indirect_object_page_dimensions():
    url = "https://github.com/py-pdf/pypdf/files/13302338/Zymeworks_Corporate.Presentation_FINAL1101.pdf.pdf"
    name = "issue2287.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=False)
    mediabox = reader.pages[0].mediabox
    assert mediabox == RectangleObject((0, 0, 792, 612))


def test_indirect_object_contains():
    writer = PdfWriter()
    indirect_object = IndirectObject(1, 0, writer)
    assert "foo" not in indirect_object
    assert "/Producer" in indirect_object


def test_indirect_object_iter():
    writer = PdfWriter()
    indirect_object = IndirectObject(1, 0, writer)
    assert "foo" not in list(indirect_object)
    assert "/Producer" in list(indirect_object)


def test_array_operators():
    a = ArrayObject(
        [
            NumberObject(1),
            NumberObject(2),
            NumberObject(3),
            NumberObject(4),
        ]
    )
    b = a + 5
    assert isinstance(b, ArrayObject)
    assert b == [1, 2, 3, 4, 5]
    assert a == [1, 2, 3, 4]
    a -= 2
    a += "abc"
    a -= (3, 4)
    a += ["d", "e"]
    a += BooleanObject(True)
    assert a == [1, "abc", "d", "e", True]
    a += "/toto"
    assert isinstance(a[-1], NameObject)
    assert isinstance(a[1], TextStringObject)
    a += b"1234"
    assert a[-1] == ByteStringObject(b"1234")
    la = len(a)
    a -= 300
    assert len(a) == la


def test_unitary_extract_inline_buffer_invalid():
    with pytest.raises(PdfReadError):
        extract_inline__ascii_hex_decode(BytesIO())
    with pytest.raises(PdfReadError):
        extract_inline__ascii_hex_decode(BytesIO(4095 * b"00" + b"   "))
    with pytest.raises(PdfReadError):
        extract_inline__ascii_hex_decode(BytesIO(b"00"))
    with pytest.raises(PdfReadError):
        extract_inline__ascii85_decode(BytesIO())
    with pytest.raises(PdfReadError):
        extract_inline__ascii85_decode(BytesIO(a85encode(b"1")))
    with pytest.raises(PdfReadError):
        extract_inline__ascii85_decode(BytesIO(a85encode(b"1") + b"~> Q"))
    with pytest.raises(PdfReadError):
        extract_inline__ascii85_decode(BytesIO(a85encode(b"1234578" * 990)))
    with pytest.raises(PdfReadError):
        extract_inline__run_length_decode(BytesIO())
    with pytest.raises(PdfReadError):
        extract_inline__run_length_decode(BytesIO(b"\x01\x01\x80"))
    with pytest.raises(PdfReadError):
        extract_inline__dct_decode(BytesIO(b"\xFF\xD9"))


def test_unitary_extract_inline():
    # AHx
    b = 16000 * b"00"
    assert len(extract_inline__ascii_hex_decode(BytesIO(b + b" EI"))) == len(b)
    with pytest.raises(PdfReadError):
        extract_inline__ascii_hex_decode(BytesIO(b + b"> "))
    # RL
    b = 8200 * b"\x00\xAB" + b"\x80"
    assert len(extract_inline__run_length_decode(BytesIO(b + b" EI"))) == len(b)

    # default
    # EIDD instead of EI; using A85
    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F [/A85 /Fl]\nID
Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>
EIDD
Q\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
    ec = DecodedStreamObject()
    ec.set_data(b)
    co = ContentStream(ec, None)
    with pytest.raises(PdfReadError) as exc:
        co.operations
    assert "EI stream not found" in exc.value.args[0]
    # EIDD instead of EI; using /Fl (default extraction)
    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F /Fl \nID
Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>
EIDD
Q\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
    ec = DecodedStreamObject()
    ec.set_data(b)
    co = ContentStream(ec, None)
    with pytest.raises(PdfReadError) as exc:
        co.operations
    assert "Unexpected end of stream" in exc.value.args[0]

    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
BI\n/W 16 /H 16 /BPC 8 /CS /RGB /F /Fl \nID
Gar8O(o6*is8QV#;;JAuTq2lQ8J;%6#\'d5b"Q[+ZD?\'\\+CGj9~>EI
BT\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
    ec = DecodedStreamObject()
    ec.set_data(b)
    co = ContentStream(ec, None)
    with pytest.raises(PdfReadError) as exc:
        co.operations
    assert "Unexpected end of stream" in exc.value.args[0]

    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
BI\n/W 4 /H 4 /CS /G \nID
abcdefghijklmnopEI
Q\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
    ec = DecodedStreamObject()
    ec.set_data(b)
    co = ContentStream(ec, None)
    assert co.operations[7][0]["data"] == b"abcdefghijklmnop"

    b = b"""1 0 0 1 0 0 cm  BT /F1 12 Tf 14.4 TL ET\nq 100 0 0 100 100 100 cm
BI\n/W 4 /H 4 \nID
abcdefghijklmnopEI
Q\nQ\nBT 1 0 0 1 200 100 Tm (Test) Tj T* ET\n \n"""
    ec = DecodedStreamObject()
    ec.set_data(b)
    co = ContentStream(ec, None)
    assert co.operations[7][0]["data"] == b"abcdefghijklmnop"


def test_missing_hashbin():
    assert NullObject().hash_bin() == hash((NullObject,))
    assert hash(NullObject()) == NullObject().hash_bin()
    t = ByteStringObject(b"123")
    assert t.hash_bin() == hash((ByteStringObject, b"123"))


def test_is_null_or_none():
    assert is_null_or_none(NullObject())
    assert not is_null_or_none(PdfObject())

    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    # used with get
    assert is_null_or_none(reader.root_object.get("/do_no_exist"))
    # object unknown...
    assert is_null_or_none(IndirectObject(99999, 0, reader).get_object())
    # ... or which has been replaced with NullObject
    writer = PdfWriter(reader)
    writer.pages[0]["/Contents"].append(writer._add_object(NullObject()))
    assert is_null_or_none(writer.pages[0]["/Contents"][-1])


def test_coverage_arrayobject():
    writer = PdfWriter()
    a = ArrayObject([1])
    assert isinstance(a.replicate(writer)[0], int)
    assert isinstance(a.clone(writer)[0], int)
    a.indirect_reference = IndirectObject(1, 0, writer)
    assert isinstance(a.clone(writer)[0], int)
    r = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    a = ArrayObject([r.pages[0]["/Contents"][0].get_object()])
    aa = a.clone(writer)
    assert isinstance(aa[0], IndirectObject)
    for k, v in aa.items():
        assert isinstance(k, int)
        assert isinstance(v, PdfObject)


def test_coverage_streamobject():
    writer = PdfWriter()
    s = StreamObject()
    del s.decoded_self
    s.replicate(writer)
    s.clone(writer)

    co = ContentStream(None, None)
    co.replicate(writer)
    co.clone(writer, False, None)
    co.indirect_reference = IndirectObject(1, 0, writer)
    assert co == co.clone(writer)

    r = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    co = r.pages[0].get_contents()
    co[NameObject("/testkey")] = NameObject("/test")
    co.decoded_self = None
    assert "/testkey" in co.replicate(writer)
    co = r.pages[0].get_contents()
    co[NameObject("/testkey")] = NameObject("/test")
    co.decoded_self = DecodedStreamObject()
    assert "/testkey" in co.replicate(writer)


def test_contentstream_arrayobject_containing_nullobject(caplog):
    stream_object = DecodedStreamObject()
    stream_object.set_data(b"Hello World!")

    input_stream = ArrayObject([NullObject(), stream_object])
    content_stream = ContentStream(stream=input_stream, pdf=None)
    assert content_stream.get_data() == b"Hello World!\n"
    assert caplog.text == ""


@pytest.mark.enable_socket
def test_build_link__go_to_action_without_destination():
    reader = PdfReader(BytesIO(get_data_from_url(name="issue-3419.pdf")))
    writer = PdfWriter()
    for page in reader.pages:
        writer.add_page(page)
    assert len(writer.pages) == len(reader.pages)


@pytest.mark.enable_socket
def test_dictionaryobject__length_0_stream():
    """Test for issue #3052."""
    url = "https://github.com/user-attachments/files/18734105/correct.pdf"
    name = "issue3052.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    output = BytesIO()
    writer.write(output)
    assert b"\n8 0 obj\n<<\n/Length 0\n>>\nstream\n\nendstream\nendobj\n" in output.getvalue()


================================================
FILE: tests/test_images.py
================================================
"""
Tests which ensure that image extraction works properly go here.

Typically, tests in here should compare the extracted images count, names,
and/or the actual image data with the expected value.
"""

from io import BytesIO
from pathlib import Path
from typing import Union
from unittest import mock
from zipfile import ZipFile

import pytest
from PIL import Image, ImageChops, ImageDraw

from pypdf import PageObject, PdfReader, PdfWriter
from pypdf.errors import LimitReachedError
from pypdf.filters import JBIG2Decode
from pypdf.generic import ContentStream, NameObject, NullObject

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url
from .utils import get_image_data


def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image:
    if isinstance(path, Image.Image):
        img = path
    else:
        if isinstance(path, Path):
            assert path.exists()
        with Image.open(path) as img:
            img = (
                img.copy()
            )  # Opened image should be copied to avoid issues with file closing
    return img


def image_size(image: Image.Image):
    buffer = BytesIO()
    image.save(buffer, format=image.format)
    return buffer.tell()


def image_similarity(
    path1: Union[Path, Image.Image, BytesIO], path2: Union[Path, Image.Image, BytesIO]
) -> float:
    """
    Check image similarity.

    A value of "0" means the images are different. A value of 1 means they are
    identical. A value above 0.9 means they are almost the same.

    This can be used to ensure visual similarity.
    """
    # Open the images using Pillow
    image1 = open_image(path1)
    image2 = open_image(path2)

    # Check if the images have the same dimensions
    if image1.size != image2.size:
        return 0

    # Check if the color modes are the same
    if image1.mode != image2.mode:
        return 0

    # Calculate the Mean Squared Error (MSE)
    diff = ImageChops.difference(image1, image2)
    pixels = get_image_data(diff)

    if isinstance(pixels[0], tuple):
        mse = sum(sum((c / 255.0) ** 2 for c in p) for p in pixels) / (
            len(pixels) * len(pixels[0])
        )
    else:
        mse = sum((p / 255.0) ** 2 for p in pixels) / len(pixels)

    return 1 - mse


@pytest.mark.samples
def test_image_similarity_one():
    path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png"
    path_b = path_a
    assert image_similarity(path_a, path_b) == 1


@pytest.mark.samples
def test_image_similarity_zero():
    path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png"
    path_b = SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png"
    assert image_similarity(path_a, path_b) == 0


@pytest.mark.samples
def test_image_similarity_mid():
    path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png"
    img_b = Image.open(path_a)
    draw = ImageDraw.Draw(img_b)

    # Fill the rectangle with black color
    draw.rectangle([0, 0, 100, 100], fill=(0, 0, 0))
    sim1 = image_similarity(path_a, img_b)
    assert sim1 > 0.9
    assert sim1 > 0
    assert sim1 < 1

    draw.rectangle([0, 0, 200, 200], fill=(0, 0, 0))
    sim2 = image_similarity(path_a, img_b)
    assert sim2 < sim1
    assert sim2 > 0


@pytest.mark.enable_socket
def test_image_new_property():
    name = "pdf_font_garbled.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(name=name)))
    assert reader.pages[0].images.keys() == [
        "/I0",
        "/I1",
        "/I2",
        "/I3",
        "/I4",
        "/I5",
        "/I6",
        "/I7",
        "/I8",
        "/I9",
        ["/TPL1", "/Image5"],
        ["/TPL2", "/Image53"],
        ["/TPL2", "/Image37"],
        ["/TPL2", "/Image49"],
        ["/TPL2", "/Image51"],
        ["/TPL2", "/Image39"],
        ["/TPL2", "/Image57"],
        ["/TPL2", "/Image55"],
        ["/TPL2", "/Image43"],
        ["/TPL2", "/Image30"],
        ["/TPL2", "/Image22"],
        ["/TPL2", "/Image41"],
        ["/TPL2", "/Image47"],
        ["/TPL2", "/Image45"],
        ["/TPL3", "/Image65"],
        ["/TPL3", "/Image30"],
        ["/TPL3", "/Image61"],
        ["/TPL4", "/Image30"],
        ["/TPL5", "/Image30"],
        ["/TPL6", "/Image30"],
        ["/TPL7", "/Image30"],
        ["/TPL8", "/Image30"],
        ["/TPL9", "/Image30"],
        ["/TPL10", "/Image30"],
        ["/TPL11", "/Image30"],
        ["/TPL12", "/Image30"],
    ]
    assert len(reader.pages[0].images.items()) == 36
    assert reader.pages[0].images[0].name == "I0.png"

    expected_image_url = "https://github.com/user-attachments/assets/3bf25760-2113-4e25-b4c2-fc1d3a84a263"
    expected_image_name = "pdf_font_garbled_image30.png"
    expected_image_data = BytesIO(get_data_from_url(url=expected_image_url, name=expected_image_name))
    assert image_similarity(
        expected_image_data,
        reader.pages[0].images[-1].image
    ) == 1

    assert reader.pages[0].images["/TPL1", "/Image5"].image.format == "JPEG"
    assert (
        reader.pages[0].images["/I0"].indirect_reference.get_object()
        == reader.pages[0]["/Resources"]["/XObject"]["/I0"]
    )
    list(reader.pages[0].images[0:2])
    with pytest.raises(TypeError):
        reader.pages[0].images[b"0"]
    with pytest.raises(IndexError):
        reader.pages[0].images[9999]
    # just for test coverage:
    with pytest.raises(KeyError):
        reader.pages[0]._get_image(["test"], reader.pages[0])
    assert list(PageObject(None, None).images) == []


@pytest.mark.parametrize(
    ("src", "page_index", "image_key", "expected"),
    [
        (
            SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf",
            23,
            "/Im2",
            SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png",
        ),
        (
            SAMPLE_ROOT / "003-pdflatex-image/pdflatex-image.pdf",
            0,
            "/Im1",
            SAMPLE_ROOT / "003-pdflatex-image/page-0-Im1.jpg",
        ),
        (
            SAMPLE_ROOT / "018-base64-image/base64image.pdf",
            0,
            "/QuickPDFImd32aa1ab",
            SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png",
        ),
        (
            SAMPLE_ROOT / "019-grayscale-image/grayscale-image.pdf",
            0,
            "/X0",
            SAMPLE_ROOT / "019-grayscale-image/page-0-X0.png",
        ),
    ],
    ids=[
        "009-pdflatex-geotopo/page-23-Im2.png",
        "003-pdflatex-image/page-0-Im1.jpg",
        "018-base64-image/page-0-QuickPDFImd32aa1ab.png",
        "019-grayscale-image/page-0-X0.png",
    ],
)
@pytest.mark.samples
def test_image_extraction(src, page_index, image_key, expected):
    reader = PdfReader(src)
    actual_image = reader.pages[page_index].images[image_key]
    if not expected.exists():
        # A little helper for test generation
        with open(f"page-{page_index}-{actual_image.name}", "wb") as fp:
            fp.write(actual_image.data)
    assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99


def test_get_inline_image_without_xobject_resources():
    page = PageObject(None, None)
    inline_image = object()

    with mock.patch.object(page, "_get_inline_images", return_value={"~0~": inline_image}):
        assert page._get_image("~0~") is inline_image


def test_get_inline_image_without_xobject_resources_raises_when_missing():
    page = PageObject(None, None)

    with (
        mock.patch.object(page, "_get_inline_images", return_value=None),
        pytest.raises(KeyError, match="No inline image can be found"),
    ):
        page._get_image("~0~")


def test_get_xobject_image_without_xobject_resources_raises():
    page = PageObject(None, None)

    with pytest.raises(
        KeyError,
        match="Cannot access image object /Im0 without XObject resources",
    ):
        page._get_image("/Im0")


@pytest.mark.enable_socket
@pytest.mark.timeout(30)
def test_loop_in_image_keys():
    """Cf #2077"""
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2077.pdf")))
    reader.pages[0]["/Resources"]["/XObject"][NameObject("/toto")] = NullObject()
    reader.pages[0].images.keys()


@pytest.mark.enable_socket
def test_devicen_cmyk_black_only():
    """Cf #2321"""
    url = "https://github.com/py-pdf/pypdf/files/13501846/Addressing_Adversarial_Attacks.pdf"
    name = "iss2321.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/cc2dabc1-86e6-4179-a8a4-2b0efea124be"
    name = "iss2321_img0.pdf"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[5].images[0].image, img) >= 0.99
    url = "https://github.com/py-pdf/pypdf/assets/4083478/6b64a949-42be-40d5-9eea-95707f350d89"
    name = "iss2321_img1.pdf"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[10].images[0].image, img) >= 0.99


@pytest.mark.enable_socket
def test_bi_in_text():
    """Cf #2456"""
    url = "https://github.com/py-pdf/pypdf/files/14322910/BI_text_with_one_image.pdf"
    name = "BI_text_with_one_image.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.pages[0].images.keys() == ["~0~"]
    assert reader.pages[0].images[0].name == "~0~.png"


@pytest.mark.enable_socket
def test_cmyk_no_filter():
    """Cf #2522"""
    url = "https://github.com/py-pdf/pypdf/files/14614887/out3.pdf"
    name = "iss2522.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].images[0].image


@pytest.mark.enable_socket
def test_separation_1byte_to_rgb_inverted():
    """Cf #2343"""
    url = "https://github.com/py-pdf/pypdf/files/13679585/test2_P038-038.pdf"
    name = "iss2343.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/b7f41897-96ef-4ea6-b165-5ef307a92b87"
    name = "iss2343.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[0].images[0].image, img) >= 0.99
    obj = reader.pages[0].images[0].indirect_reference.get_object()
    obj.set_data(obj.get_data() + b"\x00")
    with pytest.raises(ValueError):
        reader.pages[0].images[0]


@pytest.mark.enable_socket
def test_data_with_lf():
    """Cf #2343"""
    url = "https://github.com/py-pdf/pypdf/files/13946477/panda.pdf"
    name = "iss2343b.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/1120b0cf-a67a-403f-aa1a-9a191cbc087f"
    name = "iss2343b0.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[8].images[9].image, img) == 1.0


@pytest.mark.enable_socket
def test_oserror():
    """Cf #2265"""
    url = "https://github.com/py-pdf/pypdf/files/13127130/Binance.discovery.responses.2.gov.uscourts.dcd.256060.140.1.pdf"
    name = "iss2265.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[2].images[1]
    # Due to errors in translation in pillow we may not get
    # the correct image. Therefore we cannot use `image_similarity`.


@pytest.mark.parametrize(
    ("pdf", "pdf_name", "images", "images_name", "filtr"),
    [
        (
            "https://github.com/py-pdf/pypdf/files/13127197/FTX.Claim.SC30.01072023101624File595287144.pdf",
            "iss2266a.pdf",
            "https://github.com/py-pdf/pypdf/files/14967061/iss2266a_images.zip",
            "iss2266a_images.zip",
            ((0, 0), (1, 0), (4, 0), (9, 0)),  # random pick-up to speed up test
        ),
        (
            "https://github.com/py-pdf/pypdf/files/13127242/FTX.Claim.Skybridge.Capital.30062023113350File971325116.pdf",
            "iss2266b.pdf",
            "https://github.com/py-pdf/pypdf/files/14967099/iss2266b_images.zip",
            "iss2266b_images.zip",
            ((0, 0), (1, 0), (4, 0), (9, 0)),  # random pick-up to speed up test
        ),
    ],
)
@pytest.mark.enable_socket
def test_corrupted_jpeg_iss2266(pdf, pdf_name, images, images_name, filtr):
    """
    Code to create zipfile:
    import pypdf;zipfile

    with pypdf.PdfReader("____inputfile___") as r:
     with zipfile.ZipFile("__outputzip___","w") as z:
      for p in r.pages:
       for ii,i in enumerate(p.images):
        print(i.name)
        b=BytesIO()
        i.image.save(b,"JPEG")
        z.writestr(f"image_{p.page_number}_{ii}_{i.name}",b.getbuffer())
    """
    url = pdf
    name = pdf_name
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = images
    name = images_name
    print(pdf_name, images_name)  # noqa: T201
    with ZipFile(BytesIO(get_data_from_url(url, name=name)), "r") as zf:
        for fn in zf.namelist():
            sp = fn.split("_")
            p, i = int(sp[1]), int(sp[2])
            if filtr is not None and (p, i) not in filtr:
                continue
            print(fn)  # noqa: T201
            img = Image.open(BytesIO(zf.read(fn)))
            assert image_similarity(reader.pages[p].images[i].image, img) >= 0.99


@pytest.mark.enable_socket
@pytest.mark.timeout(30)
def test_large_compressed_image():
    url = "https://github.com/py-pdf/pypdf/files/15306199/file_with_large_compressed_image.pdf"
    reader = PdfReader(
        BytesIO(get_data_from_url(url, name="file_with_large_compressed_image.pdf"))
    )
    list(reader.pages[0].images)


@pytest.mark.enable_socket
def test_ff_fe_starting_lut():
    """Cf issue #2660"""
    url = "https://github.com/py-pdf/pypdf/files/15385628/original_before_merge.pdf"
    name = "iss2660.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    b = BytesIO()
    writer.write(b)
    reader = PdfReader(b)
    url = "https://github.com/py-pdf/pypdf/assets/4083478/6150700d-87fd-43a2-8695-c2c05a44838c"
    name = "iss2660.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(writer.pages[1].images[0].image, img) == 1.0
    assert image_similarity(reader.pages[1].images[0].image, img) == 1.0


@pytest.mark.enable_socket
def test_inline_image_extraction():
    """Cf #2598"""
    url = "https://github.com/py-pdf/pypdf/files/14982414/lebo102.pdf"
    name = "iss2598.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # there is no error because images are correctly extracted
    reader.pages[1].extract_text()
    reader.pages[2].extract_text()
    reader.pages[3].extract_text()

    url = "https://github.com/py-pdf/pypdf/files/15210011/Pages.62.73.from.0560-22_WSP.Plan_July.2022_Version.1.pdf"
    name = "iss2598a.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].extract_text()
    reader.pages[1].extract_text()

    url = "https://github.com/mozilla/pdf.js/raw/master/test/pdfs/issue14256.pdf"
    name = "iss2598b.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/71bc5053-cfc7-44ba-b7be-8e2333e2c749"
    name = "iss2598b.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    for i in range(8):
        assert image_similarity(writer.pages[0].images[i].image, img) == 1
    writer.pages[0].extract_text()
    # check recalculation of inline images
    assert writer.pages[0].inline_images is not None
    writer.pages[0].merge_scaled_page(writer.pages[0], 0.25)
    assert writer.pages[0].inline_images is None
    reader = PdfReader(RESOURCE_ROOT / "imagemagick-ASCII85Decode.pdf")
    writer.pages[0].merge_page(reader.pages[0])
    assert list(writer.pages[0].images.keys()) == [
        "/Im0",
        "~0~",
        "~1~",
        "~2~",
        "~3~",
        "~4~",
        "~5~",
        "~6~",
        "~7~",
        "~8~",
        "~9~",
        "~10~",
        "~11~",
        "~12~",
        "~13~",
        "~14~",
        "~15~",
    ]

    url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
    name = "iss2598c.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/bfb221be-11bd-46fe-8129-55a58088a4b6"
    name = "iss2598c.jpg"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[0].images[0].image, img) >= 0.99

    url = "https://github.com/py-pdf/pypdf/files/15282904/tt.pdf"
    name = "iss2598d.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/1a770e1b-9ad2-4125-89ae-6069992dda23"
    name = "iss2598d.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[0].images[0].image, img) == 1


@pytest.mark.enable_socket
def test_extract_image_from_object(caplog):
    url = "https://github.com/py-pdf/pypdf/files/15176076/B2.pdf"
    name = "iss2613.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    image = reader.pages[0]["/Resources"]["/Pattern"]["/P1"]["/Resources"]["/XObject"][
        "/X1"
    ].decode_as_image()
    assert isinstance(image, Image.Image)
    with pytest.raises(Exception):
        co = reader.pages[0].get_contents()
        co.decode_as_image()
    assert "does not seem to be an Image" in caplog.text
    caplog.clear()
    co.indirect_reference = "for_test"
    with pytest.raises(Exception):
        co = reader.pages[0].get_contents()
        co.decode_as_image()
    assert "does not seem to be an Image" in caplog.text


def test_extract_jpeg_with_explicit_quality():
    reader = PdfReader(RESOURCE_ROOT / "side-by-side-subfig.pdf")
    page = reader.pages[0]
    x_object = page["/Resources"]["/XObject"]["/Im1"]
    assert x_object["/Filter"] == "/DCTDecode"
    image = x_object.decode_as_image()
    assert isinstance(image, Image.Image)
    assert image.format == "JPEG"
    small_image = x_object.decode_as_image(pillow_parameters={"quality": 75})
    assert image_size(small_image) < image_size(image)


@pytest.mark.enable_socket
def test_4bits_images(caplog):
    url = "https://github.com/user-attachments/files/16624406/tt.pdf"
    name = "iss2411.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/user-attachments/assets/53058564-9a28-4e4a-818f-a6528013d7dc"
    name = "iss2411.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert image_similarity(reader.pages[0].images[1].image, img) == 1.0


@pytest.mark.enable_socket
def test_no_filter_with_colorspace_as_list():
    """Tests for #2998"""
    url = "https://github.com/user-attachments/files/18058571/9bf7a2e2-72c8-4ac1-b8ae-164df16c8cef.pdf"
    name = "iss2998.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[0]
    page.images.items()


def test_contentstream__read_inline_image__fallback_is_successful():
    stream = ContentStream(stream=None, pdf=None)
    stream.set_data(
        b"""Q
q 9.6 0 0 4.8 5523.6 1031 cm
BI
/CS /RGB
/W 2
/H 1
/BPC 8
ID \x8b\x8b\x8b\xfe\xfe\xfe
EI Q
/R413 gs
        """
    )
    page = PageObject(pdf=None)
    with mock.patch.object(page, "get_contents", return_value=stream):
        images = page._get_inline_images()
        assert list(images) == ["~0~"]
        assert images["~0~"].data == (
            b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x0f"
            b"IDATx\x9cc\xe8\xee\xee\xfe\xf7\xef\x1f\x00\x0e \x04\x9cpr_\x96\x00\x00\x00\x00IEND\xaeB`\x82"
        )


@pytest.mark.enable_socket
def test_inline_image_containing_ei_in_body():
    """Tests for #3107"""
    expected = """\nID ><8d>£^H<8e><8b>¢AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA¡^BêMEI E^N^^<8a>^AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA^D
<8b>²: AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA5>^D
é^EAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD<98>AAAAAA<8d><82>
AAAAAAAA^B
EI\nQ\n""".encode("latin1")  # noqa: E501
    url = "https://github.com/user-attachments/files/18943249/testing.pdf"
    name = "issue3107.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter(clone_from=reader)
    for page in writer.pages:
        page.transfer_rotation_to_content()
    output = BytesIO()
    writer.write(output)
    assert expected in output.getvalue()


@pytest.mark.enable_socket
@pytest.mark.skipif(condition=not JBIG2Decode._is_binary_compatible(), reason="Requires recent jbig2dec")
def test_jbig2decode():
    url = "https://github.com/py-pdf/pypdf/files/12090692/New.Jersey.Coinbase.staking.securities.charges.2023-0606_Coinbase-Penalty-and-C-D.pdf"
    name = "jbig2.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]
    image = next(iter(page.images))
    assert image.image.size == (5138, 6630)
    assert image.image.mode == "1"
    assert image.image.format == "PNG"

    url = "https://github.com/user-attachments/assets/d6f88c80-a2e0-4ea9-b1e0-34442041d004"
    name = "jbig2.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))

    assert image_similarity(image.image, img) >= 0.999


@pytest.mark.enable_socket
@pytest.mark.skipif(condition=not JBIG2Decode._is_binary_compatible(), reason="Requires recent jbig2dec")
def test_jbig2decode__jbig2globals():
    url = "https://github.com/user-attachments/files/20119148/out.pdf"
    name = "jbig2_globals.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]
    image = next(iter(page.images))
    assert image.image.size == (1067, 1067)
    assert image.image.mode == "1"
    assert image.image.format == "PNG"

    url = "https://github.com/user-attachments/assets/7ac41ee3-9c13-44cf-aa74-8f106287e354"
    name = "jbig2_globals.png"
    img = Image.open(BytesIO(get_data_from_url(url, name=name)))

    # Wrong image: 0.9618265964800714
    assert image_similarity(image.image, img) >= 0.999


@pytest.mark.enable_socket
@pytest.mark.skipif(condition=not JBIG2Decode._is_binary_compatible(), reason="Requires recent jbig2dec")
def test_jbig2decode__memory_limit():
    url = "https://github.com/py-pdf/pypdf/files/12090692/New.Jersey.Coinbase.staking.securities.charges.2023-0606_Coinbase-Penalty-and-C-D.pdf"
    name = "jbig2.pdf"
    error_messages = [
        # Version 0.20
        (
            r"^Memory limit reached while reading JBIG2 data:\n"
            r"jbig2dec FATAL ERROR memory: limit reached: limit: 5000000 \(4 Mbyte\) used: 4329386 \(4 Mbyte\) allocation: 4263106 \(4 Mbyte\)\n"  # noqa: E501
            r"jbig2dec FATAL ERROR failed to allocate image data buffer \(stride=643, height=6630\)"
        ),
        # Version 0.19
        (
            r"^Memory limit reached while reading JBIG2 data:\n"
            r"jbig2dec FATAL ERROR failed to allocate image data buffer \(stride=643, height=6630\)"
        ),
    ]

    with mock.patch("pypdf.filters.JBIG2_MAX_OUTPUT_LENGTH", 5_000_000):
        reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
        page = reader.pages[0]
        with pytest.raises(expected_exception=LimitReachedError, match=rf"({'|'.join(error_messages)})"):
            _ = next(iter(page.images))


@pytest.mark.enable_socket
def test_get_ids_image__resources_is_none():
    url = "https://github.com/user-attachments/files/18381726/tika-957721.pdf"
    name = "tika-957721.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[2]
    assert list(page.images.items()) == []


================================================
FILE: tests/test_javascript.py
================================================
"""Test topics around the usage of JavaScript in PDF documents."""
from typing import Any

import pytest

from pypdf import PdfReader, PdfWriter
from tests import RESOURCE_ROOT


@pytest.fixture
def pdf_file_writer():
    reader = PdfReader(RESOURCE_ROOT / "issue-604.pdf")
    writer = PdfWriter()
    writer.append_pages_from_reader(reader)
    return writer


def test_add_js(pdf_file_writer):
    pdf_file_writer.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

    assert (
        "/Names" in pdf_file_writer._root_object
    ), "add_js should add a name catalog in the root object."
    assert (
        "/JavaScript" in pdf_file_writer._root_object["/Names"]
    ), "add_js should add a JavaScript name tree under the name catalog."


def test_added_js(pdf_file_writer):
    def get_javascript_name() -> Any:
        assert "/Names" in pdf_file_writer._root_object
        assert "/JavaScript" in pdf_file_writer._root_object["/Names"]
        assert "/Names" in pdf_file_writer._root_object["/Names"]["/JavaScript"]
        return pdf_file_writer._root_object["/Names"]["/JavaScript"]["/Names"][
            -2
        ]  # return -2 in order to get the latest javascript

    pdf_file_writer.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
    first_js = get_javascript_name()

    pdf_file_writer.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")
    second_js = get_javascript_name()

    assert (
        first_js != second_js
    ), "add_js should add to the previous script in the catalog."


================================================
FILE: tests/test_merger.py
================================================
"""Test merging PDF functionality."""
from io import BytesIO
from pathlib import Path

import pytest

import pypdf
from pypdf import PdfReader, PdfWriter
from pypdf.generic import ArrayObject, Destination, DictionaryObject, Fit, NameObject, NullObject

from . import RESOURCE_ROOT, get_data_from_url
from .test_encryption import HAS_AES


def merger_operate(merger):
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    outline = RESOURCE_ROOT / "pdflatex-outline.pdf"
    pdf_forms = RESOURCE_ROOT / "pdflatex-forms.pdf"
    pdf_pw = RESOURCE_ROOT / "libreoffice-writer-password.pdf"

    merger.append(pdf_path)
    merger.append(outline)
    merger.append(pdf_path, pages=pypdf.pagerange.PageRange(slice(0, 0)))
    merger.append(pdf_forms)
    merger.merge(0, pdf_path, import_outline=False)
    with pytest.raises(NotImplementedError) as exc:
        with open(pdf_path, "rb") as fp:
            data = fp.read()
        merger.append(data)
    assert exc.value.args[0].startswith(
        "Merging requires an object that PdfReader can parse. "
        "Typically, that is a Path"
    )

    # Merging an encrypted file
    reader = pypdf.PdfReader(pdf_pw)
    reader.decrypt("openpassword")
    merger.append(reader)

    # PdfReader object:
    r = pypdf.PdfReader(pdf_path)
    merger.append(r, outline_item="foo", pages=list(range(len(r.pages))))

    # File handle
    with open(pdf_path, "rb") as fh:
        merger.append(fh)

    # to force to build outlines and ensure the add_outline_item is
    # at end of the list
    merger.write(BytesIO())
    outline_item = merger.add_outline_item("An outline item", 0)
    oi2 = merger.add_outline_item(
        "deeper", 0, parent=outline_item, italic=True, bold=True
    )
    merger.add_outline_item(
        "Let's see", 2, oi2, (255, 255, 0), True, True, Fit.fit_box_vertically(left=12)
    )
    merger.add_outline_item(
        "The XYZ fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.xyz(left=10, top=20, zoom=3),
    )
    merger.add_outline_item(
        "The FitH fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.fit_horizontally(top=10),
    )
    merger.add_outline_item(
        "The FitV fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.fit_vertically(left=10),
    )
    merger.add_outline_item(
        "The FitR fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
    )
    merger.add_outline_item(
        "The FitB fit", 0, outline_item, (255, 0, 15), True, True, Fit.fit_box()
    )
    merger.add_outline_item(
        "The FitBH fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.fit_box_horizontally(top=10),
    )
    merger.add_outline_item(
        "The FitBV fit",
        0,
        outline_item,
        (255, 0, 15),
        True,
        True,
        Fit.fit_box_vertically(left=10),
    )

    found_oi = merger.find_outline_item("nothing here")
    assert found_oi is None

    found_oi = merger.find_outline_item("foo")
    assert found_oi == [9]

    merger.add_metadata({"/Author": "Martin Thoma"})
    merger.add_named_destination("/Title", 0)
    merger.set_page_layout("/SinglePage")
    merger.page_mode = "/UseThumbs"


def check_outline(tmp_path):
    # Check if outline is correct
    reader = pypdf.PdfReader(tmp_path)
    assert [el.title for el in reader.outline if isinstance(el, Destination)] == [
        "Foo",
        "Bar",
        "Baz",
        "Foo",
        "Bar",
        "Baz",
        "Foo",
        "Bar",
        "Baz",
        "foo",
        "An outline item",  # this has been moved to end normal???
    ]

    # TODO: There seem to be no destinations for those links?


tmp_filename = "dont_commit_merged.pdf"


def test_merger_operations_by_traditional_usage_with_writer(tmp_path):
    # Arrange
    merger = PdfWriter()
    merger_operate(merger)
    path = tmp_path / tmp_filename

    # Act
    merger.write(path)
    merger.close()
    # Assert
    check_outline(path)


def test_merger_operations_by_semi_traditional_usage_with_writer(tmp_path):
    path = tmp_path / tmp_filename

    with PdfWriter() as merger:
        merger_operate(merger)
        merger.write(path)  # Act

    # Assert
    assert Path(path).is_file()
    check_outline(path)


def test_merger_operation_by_new_usage_with_writer(tmp_path):
    path = tmp_path / tmp_filename
    with PdfWriter(fileobj=path) as merger:
        merger_operate(merger)

    # Assert
    assert Path(path).is_file()
    check_outline(path)


def test_merge_page_exception_with_writer():
    merger = pypdf.PdfWriter()
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    with pytest.raises(TypeError) as exc:
        merger.merge(0, pdf_path, pages="a:b")
    assert (
        exc.value.args[0]
        == '"pages" must be a tuple of (start, stop[, step]) or a list'
    )
    merger.close()


def test_merge_page_tuple_with_writer():
    merger = pypdf.PdfWriter()
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    merger.merge(0, pdf_path, pages=(0, 1))
    merger.close()


def test_merge_write_closed_fh_with_writer(pdf_file_path):
    merger = pypdf.PdfWriter()
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    merger.append(pdf_path)

    merger.close()
    merger.write(pdf_file_path)
    merger.add_metadata({"author": "Martin Thoma"})
    merger.set_page_layout("/SinglePage")
    merger.page_mode = "/UseNone"
    merger.add_outline_item("An outline item", 0)


@pytest.mark.enable_socket
def test_trim_outline_list_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381771/tika-995175.pdf"
    name = "tika-995175.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.add_outline_item_dict(merger.outline[0])
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
def test_zoom_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381769/tika-994759.pdf"
    name = "tika-994759.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_zoom_xyz_no_left_with_add_page(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381704/tika-933322.pdf"
    name = "tika-933322.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    for p in reader.pages:
        merger.add_page(p)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
def test_zoom_xyz_no_left_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381704/tika-933322.pdf"
    name = "tika-933322.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.slow
def test_outline_item_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381773/tika-997511.pdf"
    name = "tika-997511.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.slow
def test_trim_outline_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381759/tika-982336.pdf"
    name = "tika-982336.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.slow
def test1_with_writer(pdf_file_path):
    url = "https://github.com/user-attachments/files/18381696/tika-923621.pdf"
    name = "tika-923621.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.slow
def test_sweep_recursion1_with_writer(pdf_file_path):
    # TODO: This test looks like an infinite loop.
    url = "https://github.com/user-attachments/files/18381697/tika-924546.pdf"
    name = "tika-924546.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()

    reader2 = PdfReader(pdf_file_path)
    reader2.pages


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            # TODO: This test looks like an infinite loop.
            "https://github.com/user-attachments/files/18381700/tika-924794.pdf",
            "tika-924794.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381697/tika-924546.pdf",
            "tika-924546.pdf",
        ),
    ],
)
def test_sweep_recursion2_with_writer(url, name, pdf_file_path):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()

    reader2 = PdfReader(pdf_file_path)
    reader2.pages


@pytest.mark.enable_socket
def test_sweep_indirect_list_newobj_is_none_with_writer(caplog, pdf_file_path):
    url = "https://github.com/user-attachments/files/18381681/tika-906769.pdf"
    name = "tika-906769.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)
    merger.close()
    # used to be: assert "Object 21 0 not defined." in caplog.text

    reader2 = PdfReader(pdf_file_path)
    reader2.pages


@pytest.mark.enable_socket
def test_iss1145_with_writer():
    # issue with FitH destination with null param
    url = "https://github.com/py-pdf/pypdf/files/9164743/file-0.pdf"
    name = "iss1145.pdf"
    merger = PdfWriter()
    merger.append(PdfReader(BytesIO(get_data_from_url(url, name=name))))
    merger.close()


@pytest.mark.enable_socket
def test_iss1344_with_writer(caplog):
    url = "https://github.com/py-pdf/pypdf/files/9549001/input.pdf"
    name = "iss1344.pdf"
    m = PdfWriter()
    m.append(PdfReader(BytesIO(get_data_from_url(url, name=name))))
    b = BytesIO()
    m.write(b)
    p = PdfReader(b).pages[0]
    assert "/DIJMAC+Arial Black" in p._debug_for_extract()
    assert "adresse où le malade peut être visité" in p.extract_text()


@pytest.mark.enable_socket
def test_articles_with_writer(caplog):
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "924666.pdf"
    m = PdfWriter()
    m.append(PdfReader(BytesIO(get_data_from_url(url, name=name))), (2, 10))
    b = BytesIO()
    m.write(b)
    r = PdfReader(b)
    assert len(r.threads) == 4
    assert r.threads[0].get_object()["/F"]["/P"] == r.pages[0]


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
@pytest.mark.enable_socket
def test_null_articles_with_writer():
    data = get_data_from_url(name="issue-3508.pdf")
    merger = PdfWriter()
    merger.append(BytesIO(data))
    assert len(merger.pages) == 98


def test_get_reference():
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf")
    assert writer.get_reference(writer.pages[0]) == writer.pages[0].indirect_reference


@pytest.mark.enable_socket
def test_direct_link_preserved(pdf_file_path):
    # this could be any PDF -- we don't care which
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    writer = PdfWriter(clone_from=reader)

    # this PDF has a direct link from p1 to p2
    merger = PdfReader(BytesIO(get_data_from_url(name="direct-link.pdf")))
    for p in merger.pages:
        writer.add_page(p)

    writer.write(pdf_file_path)

    check = PdfReader(pdf_file_path)
    page3 = check.pages[2]
    link = page3["/Annots"][0].get_object()
    assert link["/Subtype"] == "/Link"
    dest = link["/Dest"][0]  # indirect reference of page referred to

    page4 = check.flattened_pages[3]
    assert dest == page4.indirect_reference, "Link from page 3 to page 4 is broken"


@pytest.mark.enable_socket
def test_direct_link_preserved_reordering(pdf_file_path):
    # this could be any PDF -- we don't care which
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    writer = PdfWriter(clone_from=reader)

    # this PDF has a direct link from p1 to p2
    merger = PdfReader(BytesIO(get_data_from_url(name="direct-link.pdf")))
    for p in merger.pages:
        writer.add_page(p)

    # let's insert a page to mess up the page order
    writer.insert_page(reader.pages[0], 3)

    writer.write(pdf_file_path)

    check = PdfReader(pdf_file_path)
    page3 = check.pages[2]
    link = page3["/Annots"][0].get_object()
    assert link["/Subtype"] == "/Link"
    dest = link["/Dest"][0]  # indirect reference of page referred to

    page5 = check.flattened_pages[4]  # it moved one out
    assert dest == page5.indirect_reference, "Link from page 3 to page 5 is broken"


@pytest.mark.enable_socket
def test_direct_link_page_missing(pdf_file_path):
    # this could be any PDF -- we don't care which
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    writer = PdfWriter(clone_from=reader)

    # this PDF has a direct link from p1 to p2
    merger = PdfReader(BytesIO(get_data_from_url(name="direct-link.pdf")))
    writer.add_page(merger.pages[0])
    # but we're not adding page 2

    writer.write(pdf_file_path)  # verify nothing crashes


@pytest.mark.enable_socket
def test_named_reference_preserved(pdf_file_path):
    # this could be any PDF -- we don't care which
    reader = PdfReader(BytesIO(get_data_from_url(name="iss3268.pdf")))
    writer = PdfWriter(clone_from=reader)

    # this PDF has a named reference from from p3 to p5
    merger = PdfReader(BytesIO(get_data_from_url(name="named-reference.pdf")))
    for p in merger.pages:
        writer.add_page(p)

    writer.write(pdf_file_path)

    check = PdfReader(pdf_file_path)
    page5 = check.pages[4]
    page7 = check.flattened_pages[6]
    for link in page5["/Annots"]:
        action = link["/A"]
        assert action.get("/S") == "/GoTo"
        dest = str(action["/D"])
        assert dest in check.named_destinations
        pref = check.named_destinations[dest].page

        assert pref == page7.indirect_reference, "Link from page 5 to page 7 is broken"


@pytest.mark.enable_socket
def test_named_ref_to_page_that_is_gone(pdf_file_path):
    source = PdfReader(BytesIO(get_data_from_url(name="named-reference.pdf")))
    buf = BytesIO()
    tmp = PdfWriter()
    tmp.add_page(source.pages[2])  # we add only the page with the reference
    tmp.write(buf)

    source = PdfReader(buf)

    writer = PdfWriter()
    writer.add_page(source.pages[0])  # now references to non-existent page
    writer.write(pdf_file_path)  # don't crash


def test_merge__null_destination():
    """Tests for issue #3444."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    writer2 = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")

    annotation = DictionaryObject()
    annotation[NameObject("/Subtype")] = NameObject("/Link")
    a = DictionaryObject()
    annotation[NameObject("/A")] = a
    a[NameObject("/S")] = NameObject("/GoTo")

    target = NullObject()
    a[NameObject("/D")] = writer._add_object(target)

    annots = ArrayObject([annotation])
    page = writer2.pages[0]
    page[NameObject("/Annots")] = annots

    data = BytesIO()
    writer2.write(data)
    data.seek(0)

    writer.merge(position=1, fileobj=data)
    assert writer.pages[0].annotations is None


================================================
FILE: tests/test_page.py
================================================
"""Test the pypdf._page module."""
import json
import math
import os
import re
import shutil
import subprocess
import sys
from copy import deepcopy
from io import BytesIO
from pathlib import Path
from random import shuffle
from typing import Any
from unittest import mock

import pytest

from pypdf import PdfReader, PdfWriter, Transformation
from pypdf._page import PageObject
from pypdf.constants import PageAttributes
from pypdf.constants import PageAttributes as PG
from pypdf.errors import PdfReadError, PdfReadWarning, PyPdfError
from pypdf.generic import (
    ArrayObject,
    ContentStream,
    DictionaryObject,
    FloatObject,
    IndirectObject,
    NameObject,
    NullObject,
    RectangleObject,
    TextStringObject,
)

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url, normalize_warnings
from .test_images import image_similarity
from .utils import extract_cell_text, extract_table, extract_text_and_rectangles

GHOSTSCRIPT_BINARY = shutil.which("gs")


def get_all_sample_files():
    meta_file = SAMPLE_ROOT / "files.json"
    if not Path(meta_file).is_file():
        return {"data": []}
    with open(meta_file) as fp:
        data = fp.read()
    return json.loads(data)


all_files_meta = get_all_sample_files()


@pytest.mark.samples
@pytest.mark.parametrize(
    "meta",
    [m for m in all_files_meta["data"] if not m["encrypted"]],
    ids=[m["path"] for m in all_files_meta["data"] if not m["encrypted"]],
)
@pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning")
def test_read(meta):
    pdf_path = SAMPLE_ROOT / meta["path"]
    reader = PdfReader(pdf_path)
    try:
        reader.pages[0]
    except Exception:
        return
    assert len(reader.pages) == meta["pages"]


@pytest.mark.samples
@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("pdf_path", "password"),
    [
        ("crazyones.pdf", None),
        ("attachment.pdf", None),
        (
            "libreoffice-writer-password.pdf",
            "openpassword",
        ),
        ("imagemagick-images.pdf", None),
        ("imagemagick-lzw.pdf", None),
        ("reportlab-inline-image.pdf", None),
        ("https://arxiv.org/pdf/2201.00029.pdf", None),
    ],
)
def test_page_operations(pdf_path, password):
    """
    This test just checks if the operation throws an exception.

    This should be done way more thoroughly: It should be checked if the output
    is as expected.
    """
    if pdf_path.startswith("http"):
        pdf_path = BytesIO(get_data_from_url(pdf_path, pdf_path.split("/")[-1]))
    else:
        pdf_path = RESOURCE_ROOT / pdf_path
    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    if password:
        reader.decrypt(password)

    writer.clone_document_from_reader(reader)
    page: PageObject = writer.pages[0]

    t = Transformation().translate(50, 100).rotate(90)
    assert abs(t.ctm[4] + 100) < 0.01
    assert abs(t.ctm[5] - 50) < 0.01

    transformation = (
        Transformation()
        .rotate(90)
        .scale(1)
        .translate(1, 1)
        .transform(Transformation((1, 0, 0, -1, 0, 0)))
    )
    page.add_transformation(transformation, expand=True)
    page.add_transformation((1, 0, 0, 0, 0, 0))
    page.scale(2, 2)
    page.scale_by(0.5)
    page.scale_to(100, 100)
    page.compress_content_streams()
    page.extract_text()
    page.scale_by(0.5)
    page.scale_to(100, 100)
    page.extract_text()


@pytest.mark.parametrize(
    ("angle", "expected_width", "expected_height"),
    [
        (175, 680, 844),
        (45, 994, 994),
        (-80, 888, 742),
    ],
)
def test_mediabox_expansion_after_rotation(
    angle: float, expected_width: int, expected_height: int
):
    """
    Mediabox dimensions after rotation at a non-right angle with expansion are correct.

    The test was validated against pillow (see PR #2282)
    """
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    writer = PdfWriter(clone_from=pdf_path)

    transformation = Transformation().rotate(angle)
    for page_box in writer.pages:
        page_box.add_transformation(transformation, expand=True)

    mediabox = writer.pages[0].mediabox

    # Deviation of up to 2 pixels is acceptable
    assert math.isclose(mediabox.width, expected_width, abs_tol=2)
    assert math.isclose(mediabox.height, expected_height, abs_tol=2)


def test_transformation_equivalence():
    pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf"
    writer_base = PdfWriter(clone_from=pdf_path)
    page_base = writer_base.pages[0]

    pdf_path = RESOURCE_ROOT / "box.pdf"
    writer_add = PdfWriter(clone_from=pdf_path)
    page_box = writer_add.pages[0]

    op = Transformation().scale(2).rotate(45)

    # Option 1: The new way
    page_box1 = deepcopy(page_box)
    page_base1 = deepcopy(page_base)
    page_box1.add_transformation(op, expand=True)
    page_base1.merge_page(page_box1, expand=False)

    # Option 2: The old way
    page_box2 = deepcopy(page_box)
    page_base2 = deepcopy(page_base)
    page_base2.merge_transformed_page(page_box2, op, expand=False)
    page_box2.add_transformation(op)
    page_base2.merge_page(page_box2)

    # Should be the same
    assert page_base1[NameObject(PG.CONTENTS)] == page_base2[NameObject(PG.CONTENTS)]
    assert page_base1.mediabox == page_base2.mediabox
    assert page_base1.trimbox == page_base2.trimbox
    assert page_base1.get(NameObject(PG.ANNOTS)) == page_base2.get(NameObject(PG.ANNOTS))
    compare_dict_objects(
        page_base1[NameObject(PG.RESOURCES)], page_base2[NameObject(PG.RESOURCES)]
    )


def test_transformation_equivalence2():
    pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf"
    reader_base = PdfReader(pdf_path)

    pdf_path = RESOURCE_ROOT / "box.pdf"
    reader_add = PdfReader(pdf_path)

    writer = PdfWriter()
    writer.append(reader_base)
    writer.pages[0].merge_transformed_page(
        reader_add.pages[0], Transformation().scale(2).rotate(-45), False, False
    )
    writer.pages[0].merge_transformed_page(
        reader_add.pages[0], Transformation().scale(2).translate(100, 100), True, False
    )
    # No special assert: the test should be visual in a viewer; 2 box with a arrow rotated and translated

    writer = PdfWriter()
    writer.append(reader_add)
    writer.pages[0].merge_transformed_page(
        reader_base.pages[0], Transformation(), True, True
    )
    # No special assert: Visual check the page has been increased and all is visible (box + graph)

    writer = PdfWriter()
    writer.append(reader_add)
    height = reader_add.pages[0].mediabox.height
    writer.pages[0].merge_transformed_page(
        reader_base.pages[0],
        Transformation().transform(Transformation((1, 0, 0, -1, 0, height))),
        False,
        False,
    )
    # No special assert: Visual check the page has been increased and all is visible (box + graph)

    pdf_path = RESOURCE_ROOT / "commented-xmp.pdf"
    reader_comments = PdfReader(pdf_path)

    writer = PdfWriter()
    writer.append(reader_base)
    writer.pages[0].merge_transformed_page(
        reader_comments.pages[0], Transformation().rotate(-15), True, True
    )
    nb_annots1 = len(writer.pages[0]["/Annots"])
    writer.pages[0].merge_transformed_page(
        reader_comments.pages[0], Transformation().rotate(-30), True, True
    )
    assert len(writer.pages[0]["/Annots"]) == 2 * nb_annots1
    # No special assert: Visual check the overlay has its comments at the good position


def test_get_user_unit_property():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    assert reader.pages[0].user_unit == 1


def compare_dict_objects(d1, d2):
    assert sorted(d1.keys()) == sorted(d2.keys())
    for key in d1:
        if isinstance(d1[key], DictionaryObject):
            compare_dict_objects(d1[key], d2[key])
        else:
            assert d1[key] == d2[key]


@pytest.mark.slow
def test_page_transformations():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    writer = PdfWriter(clone_from=pdf_path)

    page: PageObject = writer.pages[0]
    page.merge_rotated_page(page, 90, expand=True)

    op = Transformation().rotate(90).scale(1, 1)
    page.merge_transformed_page(page, op, expand=True)

    op = Transformation().rotate(90).scale(1, 1).translate(1, 1)
    page.merge_transformed_page(page, op, expand=True)

    op = Transformation().translate(-100, -100).rotate(90).translate(100, 100)
    page.merge_transformed_page(page, op, expand=False)

    page.merge_scaled_page(page, 2, expand=False)

    op = Transformation().scale(1, 1).translate(1, 1)
    page.merge_transformed_page(page, op)

    page.merge_translated_page(page, 100, 100, expand=False)
    page.add_transformation((1, 0, 0, 0, 0, 0))


@pytest.mark.parametrize(
    ("pdf_path", "password"),
    [
        (RESOURCE_ROOT / "crazyones.pdf", None),
        (RESOURCE_ROOT / "attachment.pdf", None),
        (RESOURCE_ROOT / "side-by-side-subfig.pdf", None),
        (
            RESOURCE_ROOT / "libreoffice-writer-password.pdf",
            "openpassword",
        ),
    ],
)
def test_compress_content_streams(pdf_path, password):
    reader = PdfReader(pdf_path)

    writer = PdfWriter()
    if password:
        reader.decrypt(password)
    for i, page in enumerate(reader.pages):
        assert i == page.page_number

    assert isinstance(reader.pages[0].get_contents(), ContentStream)
    writer.clone_document_from_reader(reader)
    assert isinstance(writer.pages[0].get_contents(), ContentStream)
    for i, page in enumerate(writer.pages):
        assert i == page.page_number
        page.compress_content_streams()

    # test from reader should fail as adding_object out of
    # PdfWriter not possible
    with pytest.raises(ValueError):
        reader.pages[0].compress_content_streams()


def test_page_properties():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    page = reader.pages[0]
    assert page.mediabox == RectangleObject((0, 0, 612, 792))
    assert page.cropbox == RectangleObject((0, 0, 612, 792))
    assert page.bleedbox == RectangleObject((0, 0, 612, 792))
    assert page.trimbox == RectangleObject((0, 0, 612, 792))
    assert page.artbox == RectangleObject((0, 0, 612, 792))

    page.bleedbox = RectangleObject((0, 1, 100, 101))
    assert page.bleedbox == RectangleObject((0, 1, 100, 101))


def test_page_rotation():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    page = writer.pages[0]
    with pytest.raises(ValueError) as exc:
        page.rotate(91)
    assert exc.value.args[0] == "Rotation angle must be a multiple of 90"

    # test rotation
    assert page.rotation == 0
    page.rotation = 180
    assert page.rotation == 180
    page.rotation += 190
    assert page.rotation == 0

    # test transfer_rotate_to_content
    page.rotation -= 90
    page.transfer_rotation_to_content()
    assert math.isclose(page.mediabox.left, 0, abs_tol=0.1)
    assert math.isclose(page.mediabox.bottom, 0, abs_tol=0.1)
    assert math.isclose(page.mediabox.right, 792, abs_tol=0.1)
    assert math.isclose(page.mediabox.top, 612, abs_tol=0.1)


def test_page_indirect_rotation():
    reader = PdfReader(RESOURCE_ROOT / "indirect-rotation.pdf")
    page = reader.pages[0]

    # test rotation
    assert page.rotation == 0


def test_page_scale():
    op = Transformation()
    with pytest.raises(ValueError) as exc:
        op.scale()
    assert exc.value.args[0] == "Either sx or sy must be specified"

    assert op.scale(sx=2).ctm == (2, 0, 0, 2, 0, 0)
    assert op.scale(sy=3).ctm == (3, 0, 0, 3, 0, 0)


def test_add_transformation_on_page_without_contents():
    page = PageObject()
    assert page.get_contents() is None
    page.add_transformation(Transformation())
    page[NameObject("/Contents")] = ContentStream(None, None)
    assert isinstance(page.get_contents(), ContentStream)


@pytest.mark.enable_socket
def test_iss_1142():
    # check fix for problem of context save/restore (q/Q)
    url = "https://github.com/py-pdf/pypdf/files/9150656/ST.2019.PDF"
    name = "st2019.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    txt = reader.pages[3].extract_text()
    # The following text is contained in two different cells:
    assert txt.find("有限公司") > 0
    assert txt.find("郑州分公司") > 0
    # 有限公司 = limited company
    # 郑州分公司 = branch office in Zhengzhou
    # First cell (see page 4/254):
    assert txt.find("郑州药素电子商务有限公司") > 0
    # Next cell (first cell in next line):
    assert txt.find("郑州分公司") > 0


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("url", "name"),
    [
        # keyerror_potentially_empty_page
        (
            "https://github.com/user-attachments/files/18381736/tika-964029.pdf",
            "tika-964029.pdf",
        ),
        # 1140 / 1141:
        (
            "https://github.com/user-attachments/files/18381702/tika-932446.pdf",
            "tika-932446.pdf",
        ),
        # iss 1134:
        (
            "https://github.com/py-pdf/pypdf/files/9150656/ST.2019.PDF",
            "iss_1134.pdf",
        ),
        # iss 1:
        (
            "https://github.com/py-pdf/pypdf/files/9432350/Work.Flow.From.Check.to.QA.pdf",
            "WFCA.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381736/tika-964029.pdf",
            "tika-964029.pdf",
        ),  # single_quote_op
        (
            "https://github.com/py-pdf/pypdf/files/9428434/TelemetryTX_EM.pdf",
            "tika-964029.pdf",
        ),  # no_resources
        (
            # https://www.itu.int/rec/T-REC-X.25-199610-I/en
            "https://github.com/py-pdf/pypdf/files/12423313/T-REC-X.25-199610-I.PDF-E.pdf",
            "T-REC-X.25-199610-I!!PDF-E.pdf",
        ),
    ],
)
def test_extract_text(url, name):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for page in reader.pages:
        page.extract_text()


@pytest.mark.enable_socket
@pytest.mark.slow
def test_extract_text_page_pdf_impossible_decode_xform(caplog):
    url = "https://github.com/user-attachments/files/18381748/tika-972962.pdf"
    name = "tika-972962.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for page in reader.pages:
        page.extract_text()
    warn_msgs = normalize_warnings(caplog.text)
    assert warn_msgs == [""]  # text extraction recognise no text


@pytest.mark.enable_socket
@pytest.mark.slow
def test_extract_text_operator_t_star():  # L1266, L1267
    url = "https://github.com/user-attachments/files/18381740/tika-967943.pdf"
    name = "tika-967943.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for page in reader.pages:
        page.extract_text()


def test_extract_text_visitor_callbacks():
    """
    Extract text in rectangle-objects or simple tables.

    This test uses GeoBase_NHNC1_Data_Model_UML_EN.pdf.
    It extracts the labels of package-boxes in Figure 2.
    It extracts the texts in table "REVISION HISTORY".
    """
    # Test 1: We test the analysis of page 7 "2.1 LRS model".
    reader = PdfReader(RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf")
    page_lrs_model = reader.pages[6]

    # We ignore the invisible large rectangles.
    def ignore_large_rectangles(r) -> bool:
        return r.w < 400 and r.h < 400

    (texts, rectangles) = extract_text_and_rectangles(
        page_lrs_model, rect_filter=ignore_large_rectangles
    )

    # We see ten rectangles (5 tabs, 5 boxes) but there are 64 rectangles
    # (including some invisible ones).
    assert len(rectangles) == 60
    rectangle2texts = {}
    for t in texts:
        for r in rectangles:
            if r.contains(t.x, t.y):
                texts = rectangle2texts.setdefault(r, [])
                texts.append(t.text.strip())
                break
    # Five boxes and the figure-description below.
    assert len(rectangle2texts) == 6
    box_texts = [" ".join(texts) for texts in rectangle2texts.values()]
    assert "Hydro Network" in box_texts
    assert "Hydro Events" in box_texts
    assert "Metadata" in box_texts
    assert "Hydrography" in box_texts
    assert "Toponymy (external model)" in box_texts

    # Test 2: Parse table "REVISION HISTORY" on page 3.
    page_revisions = reader.pages[2]
    # We ignore the second table, therefore: r.y > 350

    def filter_first_table(r) -> bool:
        return r.w > 1 and r.h > 1 and r.w < 400 and r.h < 400 and r.y > 350

    (texts, rectangles) = extract_text_and_rectangles(
        page_revisions, rect_filter=filter_first_table
    )
    rows = extract_table(texts, rectangles)

    assert len(rows) == 9
    assert extract_cell_text(rows[0][0]) == "Date"
    assert extract_cell_text(rows[0][1]) == "Version"
    assert extract_cell_text(rows[0][2]) == "Description"
    assert extract_cell_text(rows[1][0]) == "September 2002"
    # The line break between "English review;"
    # and "Remove" is not detected.
    assert (
        extract_cell_text(rows[6][2])
        == "English review;Remove the UML model for the Segmented view."
    )
    assert extract_cell_text(rows[7][2]) == "Update from the March Workshop comments."

    # Check the fonts. We check: /F2 9.96 Tf [...] [(Dat)-2(e)] TJ
    text_dat_of_date = rows[0][0][0]
    assert text_dat_of_date.font_dict is not None
    assert text_dat_of_date.font_dict["/Name"] == "/F2"
    assert text_dat_of_date.get_base_font() == "/Arial,Bold"
    assert text_dat_of_date.font_dict["/Encoding"] == "/WinAnsiEncoding"
    assert text_dat_of_date.font_size == 9.96
    # Check: /F1 9.96 Tf [...] [(S)4(ep)4(t)-10(em)-20(be)4(r)-3( 20)4(02)] TJ
    texts = rows[1][0][0]
    assert texts.font_dict is not None
    assert texts.font_dict["/Name"] == "/F1"
    assert texts.get_base_font() == "/Arial"
    assert texts.font_dict["/Encoding"] == "/WinAnsiEncoding"
    assert text_dat_of_date.font_size == 9.96

    # Test 3: Read a table in a document using a non-translating
    #         but scaling Tm-operand
    reader = PdfReader(RESOURCE_ROOT / "Sample_Td-matrix.pdf")
    page_td_model = reader.pages[0]
    # We store the translations of the Td-executions.
    list_td = []

    def visitor_td(op, args, cm, tm) -> None:
        if op == b"Td":
            list_td.append((tm[4], tm[5]))

    page_td_model.extract_text(visitor_operand_after=visitor_td)
    assert len(list_td) == 4
    # Check the translations of the four Td-executions.
    assert list_td[0] == (210.0, 110.0)
    assert list_td[1] == (410.0, 110.0)
    assert list_td[2] == (210.0, 210.0)
    assert list_td[3] == (410.0, 210.0)


@pytest.mark.parametrize(
    ("pdf_path", "password", "embedded", "unembedded"),
    [
        (
            RESOURCE_ROOT / "crazyones.pdf",
            None,
            {
                "/HHXGQB+SFTI1440",
                "/TITXYI+SFRM0900",
                "/YISQAD+SFTI1200",
            },
            set(),
        ),
        (
            RESOURCE_ROOT / "attachment.pdf",
            None,
            {
                "/HHXGQB+SFTI1440",
                "/TITXYI+SFRM0900",
                "/YISQAD+SFTI1200",
            },
            set(),
        ),
        (
            RESOURCE_ROOT / "libreoffice-writer-password.pdf",
            "openpassword",
            {"/BAAAAA+DejaVuSans"},
            set(),
        ),
        (
            RESOURCE_ROOT / "imagemagick-images.pdf",
            None,
            set(),
            {"/Helvetica"},
        ),
        (RESOURCE_ROOT / "imagemagick-lzw.pdf", None, set(), set()),
        (
            RESOURCE_ROOT / "reportlab-inline-image.pdf",
            None,
            set(),
            {"/Helvetica"},
        ),
        # fonts in annotations
        (
            RESOURCE_ROOT / "FormTestFromOo.pdf",
            None,
            {"/CAAAAA+LiberationSans", "/EAAAAA+SegoeUI", "/BAAAAA+LiberationSerif"},
            {"/LiberationSans", "/ZapfDingbats"},
        ),
    ],
)
def test_get_fonts(pdf_path, password, embedded, unembedded):
    reader = PdfReader(pdf_path, password=password)
    a = set()
    b = set()
    for page in reader.pages:
        a_tmp, b_tmp = page._get_fonts()
        a = a.union(a_tmp)
        b = b.union(b_tmp)
    assert (a, b) == (embedded, unembedded)


@pytest.mark.enable_socket
def test_get_fonts2():
    url = "https://github.com/py-pdf/pypdf/files/12618104/WS_T.483.8-2016.pdf"
    name = "WS_T.483.8-2016.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.pages[1]._get_fonts() == (
        {
            "/E-HZ9-PK7483a5-Identity-H",
            "/SSJ-PK748200005d9-Identity-H",
            "/QGNGZS+FzBookMaker1DlFont10536872415",
            "/E-BZ9-PK748344-Identity-H",
            "/E-FZ9-PK74836f-Identity-H",
            "/O9-PK748464-Identity-H",
            "/QGNGZR+FzBookMaker0DlFont00536872414",
            "/SSJ-PK748200005db-Identity-H",
            "/F-BZ9-PK7483cb-Identity-H",
            "/SSJ-PK748200005da-Identity-H",
            "/H-SS9-PK748200005e0-Identity-H",
            "/H-HT9-PK748200005e1-Identity-H",
        },
        set(),
    )
    assert reader.pages[2]._get_fonts() == (
        {
            "/E-HZ9-PK7483a5-Identity-H",
            "/E-FZ9-PK74836f-Identity-H",
            "/E-BZ9-PK748344-Identity-H",
            "/QGNGZT+FzBookMaker0DlFont00536872418",
            "/O9-PK748464-Identity-H",
            "/F-BZ9-PK7483cb-Identity-H",
            "/H-SS9-PK748200005e0-Identity-H",
            "/QGNGZU+FzBookMaker1DlFont10536872420",
            "/H-HT9-PK748200005e1-Identity-H",
        },
        set(),
    )


def test_annotation_getter():
    pdf_path = RESOURCE_ROOT / "commented.pdf"
    reader = PdfReader(pdf_path)
    annotations = reader.pages[0].annotations
    assert annotations is not None
    assert isinstance(annotations[0], IndirectObject)

    annot_dict = dict(annotations[0].get_object())
    assert "/P" in annot_dict
    assert isinstance(annot_dict["/P"], IndirectObject)
    del annot_dict["/P"]

    annot_dict["/Popup"] = annot_dict["/Popup"].get_object()
    del annot_dict["/Popup"]["/P"]
    del annot_dict["/Popup"]["/Parent"]
    assert annot_dict == {
        "/Type": "/Annot",
        "/Subtype": "/Text",
        "/Rect": ArrayObject(
            [
                270.75,
                596.25,
                294.75,
                620.25,
            ]
        ),
        "/Contents": "Note in second paragraph",
        "/C": ArrayObject([1, 1, 0]),
        "/M": "D:20220406191858+02'00",
        "/Popup": DictionaryObject(
            {
                "/M": "D:20220406191847+02'00",
                "/Rect": ArrayObject([294.75, 446.25, 494.75, 596.25]),
                "/Subtype": "/Popup",
                "/Type": "/Annot",
            }
        ),
        "/T": "moose",
    }


def test_annotation_setter(pdf_file_path):
    # Arrange
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)
    with pytest.raises(ValueError):
        writer.add_page(DictionaryObject())

    # Act
    page_number = 0
    page_link = writer.get_object(writer._pages)["/Kids"][page_number]
    annot_dict = {
        NameObject("/P"): page_link,
        NameObject("/Type"): NameObject("/Annot"),
        NameObject("/Subtype"): NameObject("/Text"),
        NameObject("/Rect"): ArrayObject(
            [
                FloatObject(270.75),
                FloatObject(596.25),
                FloatObject(294.75),
                FloatObject(620.25),
            ]
        ),
        NameObject("/Contents"): TextStringObject("Note in second paragraph"),
        NameObject("/C"): ArrayObject([FloatObject(1), FloatObject(1), FloatObject(0)]),
        NameObject("/M"): TextStringObject("D:20220406191858+02'00"),
        NameObject("/Popup"): DictionaryObject(
            {
                NameObject("/M"): TextStringObject("D:20220406191847+02'00"),
                NameObject("/Rect"): ArrayObject(
                    [
                        FloatObject(294.75),
                        FloatObject(446.25),
                        FloatObject(494.75),
                        FloatObject(596.25),
                    ]
                ),
                NameObject("/Subtype"): NameObject("/Popup"),
                NameObject("/Type"): TextStringObject("/Annot"),
            }
        ),
        NameObject("/T"): TextStringObject("moose"),
    }
    arr = ArrayObject()
    page.annotations = arr

    # Delete Annotations
    page.annotations = None

    d = DictionaryObject(annot_dict)
    ind_obj = writer._add_object(d)
    arr.append(ind_obj)

    # Assert manually
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


@pytest.mark.enable_socket
@pytest.mark.xfail(reason="#1091")
def test_text_extraction_issue_1091():
    url = "https://github.com/user-attachments/files/18381737/tika-966635.pdf"
    name = "tika-966635.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    with pytest.warns(PdfReadWarning):
        reader = PdfReader(stream)
    for page in reader.pages:
        page.extract_text()


@pytest.mark.enable_socket
def test_empyt_password_1088():
    url = "https://github.com/user-attachments/files/18381712/tika-941536.pdf"
    name = "tika-941536.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(stream)
    len(reader.pages)


@pytest.mark.enable_socket
def test_old_habibi():
    # this habibi has multiple characters associated with the h
    reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi.pdf")
    txt = reader.pages[0].extract_text()  # very odd file
    # extract from acrobat reader "حَبيبي habibi􀀃􀏲􀎒􀏴􀎒􀎣􀋴
    assert "habibi" in txt
    assert "حَبيبي" in txt


@pytest.mark.samples
def test_read_link_annotation():
    reader = PdfReader(SAMPLE_ROOT / "016-libre-office-link/libre-office-link.pdf")
    assert len(reader.pages[0].annotations) == 1
    annot = dict(reader.pages[0].annotations[0].get_object())
    expected = {
        "/Type": "/Annot",
        "/Subtype": "/Link",
        "/A": DictionaryObject(
            {
                "/S": "/URI",
                "/Type": "/Action",
                "/URI": "https://martin-thoma.com/",
            }
        ),
        "/Border": ArrayObject([0, 0, 0]),
        "/Rect": [
            92.043,
            771.389,
            217.757,
            785.189,
        ],
    }

    assert set(expected.keys()) == set(annot.keys())
    del expected["/Rect"]
    del annot["/Rect"]
    assert annot == expected


@pytest.mark.enable_socket
def test_no_resources():
    url = "https://github.com/py-pdf/pypdf/files/9572045/108.pdf"
    name = "108.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    page_one = writer.pages[0]
    page_two = writer.pages[0]
    page_one.merge_page(page_two)


def test_merge_page_reproducible_with_proc_set():
    page1 = PageObject.create_blank_page(width=100, height=100)
    page2 = PageObject.create_blank_page(width=100, height=100)

    ordered = sorted(NameObject(f"/{x}") for x in range(20))

    shuffled = list(ordered)
    shuffle(shuffled)

    # each page has some overlap in their /ProcSet, and they're in a weird order
    page1[NameObject("/Resources")][NameObject("/ProcSet")] = ArrayObject(shuffled[:15])
    page2[NameObject("/Resources")][NameObject("/ProcSet")] = ArrayObject(shuffled[5:])
    page1.merge_page(page2)

    assert page1[NameObject("/Resources")][NameObject("/ProcSet")] == ordered


@pytest.mark.parametrize(
    ("apage1", "apage2", "expected_result", "expected_renames"),
    [
        # simple cases:
        pytest.param({}, {}, {}, {}, id="no resources"),
        pytest.param(
            {"/1": "/v1"},
            {"/2": "/v2"},
            {"/1": "/v1", "/2": "/v2"},
            {},
            id="no overlap",
        ),
        pytest.param(
            {"/x": "/v"}, {"/x": "/v"}, {"/x": "/v"}, {}, id="overlap, matching values"
        ),
        pytest.param(
            {"/x": "/v1"},
            {"/x": "/v2"},
            {"/x": "/v1", "/x-0": "/v2"},
            {"/x": "/x-0"},
            id="overlap, different values",
        ),
        # carefully crafted names that match the renaming pattern:
        pytest.param(
            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v1"},
            {"/x": "/v2"},
            {
                "/x": "/v1",
                "/x-0": "/v1",
                "/x-1": "/v1",
                "/x-2": "/v2",
            },
            {"/x": "/x-2"},
            id="crafted, different values",
        ),
        pytest.param(
            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v"},
            {"/x": "/v"},
            {"/x": "/v1", "/x-0": "/v1", "/x-1": "/v"},
            {"/x": "/x-1"},
            id="crafted, matching value in chain",
        ),
        pytest.param(
            {"/x": "/v1"},
            {"/x": "/v2.1", "/x-0": "/v2.2"},
            {"/x": "/v1", "/x-0": "/v2.1", "/x-0-0": "/v2.2"},
            {"/x": "/x-0", "/x-0": "/x-0-0"},
            id="crafted, overlaps with previous rename, different value",
        ),
        pytest.param(
            {"/x": "/v1"},
            {"/x": "/v2", "/x-0": "/v2"},
            {"/x": "/v1", "/x-0": "/v2"},
            {"/x": "/x-0"},
            id="crafted, overlaps with previous rename, matching value",
        ),
    ],
)
def test_merge_resources(apage1, apage2, expected_result, expected_renames):
    for new_res in (False, True):
        # Arrange
        page1 = PageObject()
        page1[NameObject(PG.RESOURCES)] = DictionaryObject()
        for k, v in apage1.items():
            page1[PG.RESOURCES][NameObject(k)] = NameObject(v)

        page2 = PageObject()
        page2[NameObject(PG.RESOURCES)] = DictionaryObject()
        for k, v in apage2.items():
            page2[PG.RESOURCES][NameObject(k)] = NameObject(v)

        # Act
        result, renames = page1._merge_resources(page1, page2, PG.RESOURCES, new_res)

        # Assert
        assert result == expected_result
    assert renames == expected_renames


def test_merge_page_resources_smoke_test():
    # Arrange
    page1 = PageObject.create_blank_page(width=100, height=100)
    page2 = PageObject.create_blank_page(width=100, height=100)

    NO = NameObject

    # set up some dummy resources that overlap (or not) between the two pages
    # (note, all the edge cases are tested in test_merge_resources)
    props1 = page1[NO("/Resources")][NO("/Properties")] = DictionaryObject(
        {
            NO("/just1"): NO("/just1-value"),
            NO("/overlap-matching"): NO("/overlap-matching-value"),
            NO("/overlap-different"): NO("/overlap-different-value1"),
        }
    )
    props2 = page2[NO("/Resources")][NO("/Properties")] = DictionaryObject(
        {
            NO("/just2"): NO("/just2-value"),
            NO("/overlap-matching"): NO("/overlap-matching-value"),
            NO("/overlap-different"): NO("/overlap-different-value2"),
        }
    )
    # use these keys for some "operations", to validate renaming
    # (the operand name doesn't matter)
    contents1 = page1[NO("/Contents")] = ContentStream(None, None)
    contents1.operations = [(ArrayObject(props1.keys()), b"page1-contents")]
    contents2 = page2[NO("/Contents")] = ContentStream(None, None)
    contents2.operations = [(ArrayObject(props2.keys()), b"page2-contents")]

    expected_properties = {
        "/just1": "/just1-value",
        "/just2": "/just2-value",
        "/overlap-matching": "/overlap-matching-value",
        "/overlap-different": "/overlap-different-value1",
        "/overlap-different-0": "/overlap-different-value2",
    }
    expected_operations = [
        # no renaming
        (ArrayObject(props1.keys()), b"page1-contents"),
        # some renaming
        (
            ArrayObject(
                [
                    NO("/just2"),
                    NO("/overlap-matching"),
                    NO("/overlap-different-0"),
                ]
            ),
            b"page2-contents",
        ),
    ]

    # Act
    page1.merge_page(page2)

    # Assert
    assert page1[NO("/Resources")][NO("/Properties")] == expected_properties

    relevant_operations = [
        (op, name)
        for op, name in page1.get_contents().operations
        if name in (b"page1-contents", b"page2-contents")
    ]
    assert relevant_operations == expected_operations


@pytest.mark.enable_socket
def test_merge_transformed_page_into_blank():
    url = "https://github.com/py-pdf/pypdf/files/10768334/badges_3vjrh_7LXDZ_1-1.pdf"
    name = "badges_3vjrh_7LXDZ_1.pdf"
    r1 = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/files/10768335/badges_3vjrh_7LXDZ_2-1.pdf"
    name = "badges_3vjrh_7LXDZ_2.pdf"
    r2 = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.add_blank_page(100, 100)
    writer.pages[0].merge_translated_page(r1.pages[0], 0, 0, True, True)
    writer.pages[0].merge_translated_page(r2.pages[0], 1000, 1000, True, True)
    assert (
        writer.pages[0]["/Resources"]["/Font"].raw_get("/F2+0").idnum
        != writer.pages[0]["/Resources"]["/Font"].raw_get("/F2+0-0").idnum
    )
    writer.add_blank_page(100, 100)
    for x in range(4):
        for y in range(7):
            writer.pages[1].merge_translated_page(
                r1.pages[0],
                x * r1.pages[0].trimbox[2],
                y * r1.pages[0].trimbox[3],
                True,
                True,
            )
    blank = PageObject.create_blank_page(width=100, height=100)
    assert blank.page_number is None
    inserted_blank = writer.add_page(blank)
    assert blank.page_number is None  # the inserted page is a clone
    assert inserted_blank.page_number == len(writer.pages) - 1
    writer.remove_page(inserted_blank.indirect_reference)
    assert inserted_blank.page_number is None
    inserted_blank = writer.add_page(blank)
    del writer._pages.get_object()["/Kids"][-1]
    assert inserted_blank.page_number is not None


def test_pages_printing():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    assert str(reader.pages) == "[PageObject(0)]"
    assert len(reader.pages[0].images) == 0
    with pytest.raises(KeyError):
        reader.pages[0].images["~1~"]


@pytest.mark.enable_socket
def test_del_pages():
    url = "https://github.com/user-attachments/files/18381712/tika-941536.pdf"
    name = "tika-941536.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    ll = len(writer.pages)
    pp = writer.pages[1].indirect_reference
    del writer.pages[1]
    assert len(writer.pages) == ll - 1
    pages = writer._pages.get_object()
    assert pages["/Count"] == ll - 1
    assert len(pages["/Kids"]) == ll - 1
    assert pp not in pages["/Kids"]
    del writer.pages[-2]
    with pytest.raises(TypeError):
        del writer.pages["aa"]
    with pytest.raises(IndexError):
        del writer.pages[9999]
    pp = tuple(p.indirect_reference for p in writer.pages[3:5])
    ll = len(writer.pages)
    del writer.pages[3:5]
    assert len(writer.pages) == ll - 2
    for p in pp:
        assert p not in pages["/Kids"]
    # del whole arborescence
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # error case
    pp = reader.pages[2]
    i = pp["/Parent"].get_object()["/Kids"].index(pp.indirect_reference)
    del pp["/Parent"].get_object()["/Kids"][i]
    with pytest.raises(PdfReadError):
        del reader.pages[2]

    url = "https://github.com/py-pdf/pypdf/files/13946477/panda.pdf"
    name = "iss2343b.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)), incremental=True)
    node, idx = writer._get_page_in_node(53)
    assert (node.indirect_reference.idnum, idx) == (11776, 1)
    node, idx = writer._get_page_in_node(10000)
    assert (node.indirect_reference.idnum, idx) == (11769, -1)
    with pytest.raises(PyPdfError):
        writer._get_page_in_node(-1)

    del writer.pages[4]  # to propagate among /Pages
    del writer.pages[:]
    assert len(writer.pages) == 0
    assert len(writer.root_object["/Pages"]["/Kids"]) == 0
    assert len(writer.flattened_pages) == 0


def test_pdf_pages_missing_type():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    del reader.trailer["/Root"]["/Pages"]["/Kids"][0].get_object()["/Type"]
    reader.pages[0]
    writer = PdfWriter(clone_from=reader)
    writer.pages[0]


@pytest.mark.enable_socket
def test_merge_with_stream_wrapped_in_save_restore():
    """Test for issue #2587"""
    url = "https://github.com/py-pdf/pypdf/files/14895914/blank_portrait.pdf"
    name = "blank_portrait.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    page_one = writer.pages[0]
    assert page_one.get_contents().get_data() == b"q Q"
    page_two = writer.pages[0]
    page_one.merge_page(page_two)
    assert b"QQ" not in page_one.get_contents().get_data()


@pytest.mark.samples
def test_compression():
    """Test for issue #1897"""

    def create_stamp_pdf() -> BytesIO:
        pytest.importorskip("fpdf")
        from fpdf import FPDF  # noqa: PLC0415

        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("helvetica", "B", 16)
        pdf.cell(40, 10, "Hello World!")
        byte_string = pdf.output()
        return BytesIO(byte_string)

    template = PdfReader(create_stamp_pdf())
    template_page = template.pages[0]
    writer = PdfWriter()
    writer.append(SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf", [1])
    nb1 = len(writer._objects)

    # 1 page only is modified
    for page in writer.pages:
        page.merge_page(template_page)
    # font is added; +1 streamobjects + 1 ArrayObject
    assert len(writer._objects) == nb1 + 1 + 2
    for page in writer.pages:
        page.compress_content_streams()
    # objects are recycled
    assert len(writer._objects) == nb1 + 1 + 2

    contents = writer.pages[0]["/Contents"]
    writer.pages[0].replace_contents(None)
    writer.pages[0].replace_contents(None)
    assert isinstance(
        writer._objects[contents.indirect_reference.idnum - 1], NullObject
    )


def test_merge_with_no_resources():
    """Test for issue #2147"""
    writer = PdfWriter()
    p0 = writer.add_blank_page(900, 1200)
    del p0["/Resources"]
    p1 = writer.add_blank_page(900, 1200)
    del p1["/Resources"]
    writer.pages[0].merge_page(p1)


def test_get_contents_from_nullobject():
    """Issue #2157"""
    writer = PdfWriter()
    page1 = writer.add_blank_page(100, 100)
    page1[NameObject("/Contents")] = writer._add_object(NullObject())
    assert page1.get_contents() is None
    page2 = writer.add_blank_page(100, 100)
    page1.merge_page(page2, over=True)


@pytest.mark.enable_socket
def test_pos_text_in_textvisitor():
    """See #2200"""
    url = "https://github.com/py-pdf/pypdf/files/12675974/page_178.pdf"
    name = "test_text_pos.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    p = ()

    def visitor_body2(text, cm, tm, fontdict, fontsize) -> None:
        nonlocal p
        if text.startswith("5425."):
            p = (tm[4], tm[5])

    reader.pages[0].extract_text(visitor_text=visitor_body2)
    assert abs(p[0] - 323.5) < 0.1
    assert abs(p[1] - 457.4) < 0.1


@pytest.mark.enable_socket
def test_pos_text_in_textvisitor2():
    """See #2075"""
    url = "https://github.com/py-pdf/pypdf/files/12318042/LegIndex-page6.pdf"
    name = "LegIndex-page6.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    x_lvl = 26
    lst = []

    def visitor_lvl(text, cm, tm, fontdict, fontsize) -> None:
        nonlocal x_lvl, lst
        if abs(tm[4] - x_lvl) < 2 and tm[5] < 740 and tm[5] > 210:
            lst.append(text.strip(" \n"))

    reader.pages[0].extract_text(visitor_text=visitor_lvl)
    assert lst == [
        "ACUPUNCTURE BOARD",
        "ACUPUNCTURISTS AND ACUPUNCTURE",
        "ADMINISTRATIVE LAW AND PROCEDURE",
        "ADMINISTRATIVE LAW, OFFICE OF",
        "ADOPTION",
        "ADULT EDUCATION",
        "ADVERTISING. See also MARKETING; and particular subject matter (e.g.,",
    ]
    x_lvl = 35
    lst = []
    reader.pages[0].extract_text(visitor_text=visitor_lvl)
    assert lst == [
        "members,  AB 1264",
        "assistants, acupuncture,  AB 1264",
        "complaints, investigations, etc.,  AB 1264",
        "day, california acupuncture,  HR 48",
        "massage services, asian,  AB 1264",
        "supervising acupuncturists,  AB 1264",
        "supportive acupuncture services, basic,  AB 1264",
        "rules and regulations—",
        "professional assistants and employees: employment and compensation,  AB 916",
        "adults, adoption of,  AB 1756",
        "agencies, organizations, etc.: requirements, prohibitions, etc.,  SB 807",
        "assistance programs, adoption: nonminor dependents,  SB 9",
        "birth certificates,  AB 1302",
        "contact agreements, postadoption—",
        "facilitators, adoption,  AB 120",
        "failed adoptions: reproductive loss leave,  SB 848",
        "hearings, adoption finalization: remote proceedings, technology, etc.,  SB 21",
        "native american tribes,  AB 120",
        "parental rights, reinstatement of,  AB 20",
        "parents, prospective adoptive: criminal background checks,  SB 824",
        "services, adult educational,  SB 877",
        "week, adult education,  ACR 31",
        "alcoholic beverages: tied-house restrictions,  AB 546",
        "campaign re social equity, civil rights, etc.,  SB 447",
        "cannabis,  AB 794",
        "elections. See ELECTIONS.",
        "false, misleading, etc., advertising—",
        "hotels, short-term rentals, etc., advertised rates: mandatory fee disclosures,  SB 683",
        "housing rental properties advertised rates: disclosures,  SB 611",
    ]


@pytest.mark.enable_socket
def test_missing_basefont_in_type3():
    """Cf #2289"""
    url = "https://github.com/py-pdf/pypdf/files/13307713/missing-base-font.pdf"
    name = "missing-base-font.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0]._get_fonts()


def test_invalid_index():
    src_abs = RESOURCE_ROOT / "git.pdf"
    reader = PdfReader(src_abs)
    with pytest.raises(TypeError):
        _ = reader.pages["0"]


def test_negative_index():
    src_abs = RESOURCE_ROOT / "git.pdf"
    reader = PdfReader(src_abs)
    assert reader.pages[0] == reader.pages[-1]


def test_get_contents_as_bytes():
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf")
    co = writer.pages[0]["/Contents"][0]
    expected = co.get_data()
    assert writer.pages[0]._get_contents_as_bytes() == expected
    writer.pages[0][NameObject("/Contents")] = writer.pages[0]["/Contents"][0]
    assert writer.pages[0]._get_contents_as_bytes() == expected
    del writer.pages[0]["/Contents"]
    assert writer.pages[0]._get_contents_as_bytes() is None


def test_recursive_get_page_from_node():
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True)
    writer.root_object["/Pages"].get_object()[
        NameObject("/Parent")
    ] = writer.root_object["/Pages"].indirect_reference
    with pytest.raises(PyPdfError):
        writer.add_page(writer.pages[0])
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True)
    writer.insert_page(writer.pages[0], -1)
    with pytest.raises(ValueError):
        writer.insert_page(writer.pages[0], -10)


def test_get_contents__none_type():
    # We can observe this in reality as well, but these documents might be
    # confidential. Thus use a more complex dummy implementation here while
    # assigning a value of `None` is not possible from code, but from PDFs
    # itself.
    class MyPage(PageObject):
        def __contains__(self, item) -> bool:
            assert item == "/Contents"
            return True

        def __getitem__(self, item) -> Any:
            assert item == "/Contents"

    page = MyPage()
    assert page.get_contents() is None


def test_extract_text__none_type():
    class MyPage(PageObject):
        def __getitem__(self, item) -> Any:
            if item == "/Contents":
                return None
            return super().__getitem__(item)

    page = MyPage()
    resources = DictionaryObject()
    none_reference = IndirectObject(1, 0, None)
    resources[NameObject("/Font")] = none_reference
    page[NameObject("/Resources")] = resources
    with mock.patch.object(none_reference, "get_object", return_value=None):
        assert page.extract_text() == ""


@pytest.mark.enable_socket
def test_scale_by():
    """Tests for #3487"""
    url = "https://github.com/user-attachments/files/22685841/input.pdf"
    name = "issue3487.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))

    original_box = RectangleObject((0, 0, 595.275604, 841.88974))
    expected_box = RectangleObject((0.0, 0.0, 297.637802, 420.94487))
    for page in writer.pages:
        assert page.artbox == original_box
        assert page.bleedbox == original_box
        assert page.cropbox == original_box
        assert page.mediabox == original_box
        assert page.trimbox == original_box

        page.scale_by(0.5)
        assert page.artbox == expected_box
        assert page.bleedbox == expected_box
        assert page.cropbox == expected_box
        assert page.mediabox == expected_box
        assert page.trimbox == expected_box


@pytest.mark.enable_socket
@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
def test_box_rendering(tmp_path):
    """Tests for issue #3487."""
    url = "https://github.com/user-attachments/files/22685841/input.pdf"
    name = "issue3487.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))

    for page in writer.pages:
        page.scale_by(0.5)

    target_png_path = tmp_path / "target.png"
    url = "https://github.com/user-attachments/assets/e9c2271c-bfc3-4a6f-8c91-ffefa24502e2"
    name = "issue3487.png"
    target_png_path.write_bytes(get_data_from_url(url, name=name))

    pdf_path = tmp_path / "out.pdf"
    writer.write(pdf_path)

    for box in ["Art", "Bleed", "Crop", "Media", "Trim"]:
        png_path = tmp_path / f"{box}.png"
        # False positive: https://github.com/PyCQA/bandit/issues/333
        subprocess.run(  # noqa: S603
            [
                GHOSTSCRIPT_BINARY,
                f"-dUse{box}Box",
                "-dFirstPage=1",
                "-dLastPage=1",
                "-sDEVICE=pngalpha",
                "-o",
                png_path,
                pdf_path,
            ]
        )
        assert png_path.is_file(), box
        assert image_similarity(png_path, target_png_path) >= 0.95, box


def test_delete_non_existent_annotations():
    writer = PdfWriter()
    writer.add_blank_page(width=100, height=100)
    page = writer.pages[0]
    assert page.annotations is None
    page.annotations = None
    assert page.annotations is None


def test_replace_contents_on_reader():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    content_stream = ContentStream(stream=None, pdf=reader)
    content_stream.set_data(b"Test data")

    expected_message = (
        "Calling `PageObject.replace_contents()` for pages not assigned to a writer is deprecated and "
        "will be removed in pypdf 7.0.0. Attach the page to the writer first or use `PdfWriter(clone_from=...)` "
        "directly. The existing approach has proved being unreliable."
    )
    with pytest.warns(DeprecationWarning, match=rf"^{re.escape(expected_message)}$"):
        page.replace_contents(content_stream)


@pytest.mark.enable_socket
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_replace_contents_on_reader__indirect_reference():
    url = "https://github.com/user-attachments/files/24195534/test.pdf"
    name = "issue3568.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()

    lhs = reader.get_page(3)
    writer.add_page(lhs)

    lhs = reader.get_page(1)
    lhs.merge_page(PageObject.create_blank_page(reader))
    writer.add_page(lhs)


def test_merge_page__coverage():
    # Test with some otherwise untested cases.

    # Own resources are missing.
    page = PageObject.create_blank_page(width=10, height=10)
    del page[PageAttributes.RESOURCES]
    page.merge_page(PageObject.create_blank_page(width=10, height=10))

    # Other resources are missing.
    page = PageObject.create_blank_page(width=10, height=10)
    del page[PageAttributes.RESOURCES]
    PageObject.create_blank_page(width=10, height=10).merge_page(page)

    # No expansion.
    page = PageObject.create_blank_page(width=10, height=10)
    page.merge_page(PageObject.create_blank_page(width=20, height=30))
    assert page.mediabox == RectangleObject((0.0, 0.0, 10, 10))

    # With expansion.
    page = PageObject.create_blank_page(width=10, height=10)
    page.merge_page(PageObject.create_blank_page(width=20, height=5), expand=True)
    assert page.mediabox == RectangleObject((0.0, 0.0, 20, 10))

    # With transformation.
    path = RESOURCE_ROOT / "crazyones.pdf"
    page = PdfWriter(clone_from=path).pages[0]
    page.indirect_reference = None
    page2 = PageObject.create_blank_page(width=20, height=5)
    transformation = Transformation().rotate(90)
    page2.merge_transformed_page(page, ctm=transformation, expand=True)
    assert page2.mediabox == RectangleObject((-792, 0.0, 20, 612))

    page2 = PageObject.create_blank_page(width=20, height=5)
    page2.merge_transformed_page(page, ctm=transformation.ctm, expand=True)
    assert page2.mediabox == RectangleObject((-792, 0.0, 20, 612))

    # Not over.
    page = PdfWriter(clone_from=path).pages[0]
    page.indirect_reference = None
    page2 = PageObject.create_blank_page(width=20, height=5)
    page2.merge_page(page, over=False)


@pytest.mark.enable_socket
def test_importing_without_pillow(tmp_path):
    env = os.environ.copy()
    env["COVERAGE_PROCESS_START"] = "pyproject.toml"

    source_file = tmp_path / "script.py"
    source_file.write_text(
        """
import sys
sys.modules["PIL"] = None

from pypdf import PageObject
from pypdf._page import pil_not_imported

print(pil_not_imported)
"""
    )

    try:
        env["PYTHONPATH"] = "." + os.pathsep + env["PYTHONPATH"]
    except KeyError:
        env["PYTHONPATH"] = "."
    result = subprocess.run(  # noqa: S603  # We have the control here.
        [sys.executable, source_file],
        capture_output=True,
        env=env,
    )
    assert result.returncode == 0
    assert result.stdout.replace(b"\r\n", b"\n") == b"True\n"
    assert result.stderr == b""


@pytest.mark.enable_socket
def test_replace_contents__null_object_cloning_error():
    url = "https://github.com/user-attachments/files/25240822/ML-4.30.24.pdf"
    name = "issue3632.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url=url, name=name)))
    writer = PdfWriter()

    for page in reader.pages:
        new_page = writer.add_page(page)
        new_page.scale_by(1)

    page4_idnum = writer.pages[3].indirect_reference.idnum
    assert isinstance(writer.get_object(page4_idnum)["/Contents"], ContentStream)
    assert isinstance(writer.get_object(page4_idnum + 1), NullObject)

    data = BytesIO()
    writer.write(data)

    reader = PdfReader(data)
    assert len(reader.pages) == 10


def test_get_rectangle__size_handling(caplog):
    """
    See issue #2991 and related ones. We would previously generate invalid page boxes when they
    were part of the `/Pages` instead of the `/Page` due to re-using the same target object,
    while appending to the existing "full" object. To keep compatibility with our old code,
    allow these boxes to have more than four entries.
    """
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    page = reader.pages[0]
    assert page.mediabox == RectangleObject((0, 0, 612, 792))
    assert caplog.messages == []

    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    page = reader.pages[0]
    page[NameObject("/MediaBox")] = ArrayObject([0, 0, 13, 37, 0, 0, 13, 37])
    assert page.mediabox == RectangleObject((0, 0, 13, 37))
    assert "Expected four values, got 8: [0, 0, 13, 37, 0, 0, 13, 37]\n" in caplog.text


================================================
FILE: tests/test_page_labels.py
================================================
"""Test the pypdf._page_labels module."""
from io import BytesIO

import pytest

from pypdf import PdfReader
from pypdf._page_labels import (
    get_label_from_nums,
    index2label,
    number2lowercase_letter,
    number2lowercase_roman_numeral,
    number2uppercase_letter,
    number2uppercase_roman_numeral,
    nums_clear_range,
    nums_insert,
    nums_next,
)
from pypdf.generic import (
    ArrayObject,
    DictionaryObject,
    NameObject,
    NullObject,
    NumberObject,
)

from . import RESOURCE_ROOT, get_data_from_url


@pytest.mark.parametrize(
    ("number", "expected"),
    [
        (1, "I"),
        (2, "II"),
        (3, "III"),
        (4, "IV"),
        (5, "V"),
        (6, "VI"),
        (7, "VII"),
        (8, "VIII"),
        (9, "IX"),
        (10, "X"),
    ],
)
def test_number2uppercase_roman_numeral(number, expected):
    assert number2uppercase_roman_numeral(number) == expected


def test_number2lowercase_roman_numeral():
    assert number2lowercase_roman_numeral(123) == "cxxiii"


@pytest.mark.parametrize(
    ("number", "expected"),
    [
        (1, "a"),
        (2, "b"),
        (3, "c"),
        (25, "y"),
        (26, "z"),
        (27, "aa"),
        (28, "ab"),
    ],
)
def test_number2lowercase_letter(number, expected):
    assert number2lowercase_letter(number) == expected


def test_number2uppercase_letter():
    with pytest.raises(ValueError):
        number2uppercase_letter(-1)


@pytest.mark.enable_socket
def test_index2label(caplog):
    name = "waarom-meisjes-het-beter-doen-op-HAVO-en-VWO-ROA.pdf"
    r = PdfReader(BytesIO(get_data_from_url(name=name)))
    assert index2label(r, 1) == "ii"
    assert index2label(r, 9) == "6"
    # very silly data to get test cover
    r.trailer["/Root"]["/PageLabels"]["/Nums"].append(8)
    r.trailer["/Root"]["/PageLabels"]["/Nums"].append(NullObject())
    assert index2label(r, 9) == "10"

    with pytest.raises(ValueError):
        nums_clear_range(
            NumberObject(10), 8, r.trailer["/Root"]["/PageLabels"]["/Nums"]
        )
    r.trailer["/Root"]["/PageLabels"]["/Nums"].append(8)
    with pytest.raises(ValueError):
        nums_next(NumberObject(10), r.trailer["/Root"]["/PageLabels"]["/Nums"])
    with pytest.raises(ValueError):
        nums_clear_range(
            NumberObject(10), 8, r.trailer["/Root"]["/PageLabels"]["/Nums"]
        )
    with pytest.raises(ValueError):
        nums_insert(
            NumberObject(10),
            DictionaryObject(),
            r.trailer["/Root"]["/PageLabels"]["/Nums"],
        )

    del r.trailer["/Root"]["/PageLabels"]["/Nums"]
    assert index2label(r, 1) == "2"
    caplog.clear()
    r.trailer["/Root"]["/PageLabels"][NameObject("/Kids")] = NullObject()
    assert index2label(r, 1) == "2"
    assert caplog.text != ""


@pytest.mark.enable_socket
def test_index2label_kids():
    url = "https://github.com/py-pdf/pypdf/files/14858124/Terminologie_Epochen.Schwerpunkte.Umsetzungen.pdf"
    r = PdfReader(BytesIO(get_data_from_url(url=url, name="index2label_kids.pdf")))
    expected = [
        "C1",
        "I",
        "II",
        "III",
        "IV",
        "V",
        "VI",
        "VII",
        "VIII",
        "IX",
        "X",
        "XI",
        "XII",
        "XIII",
        "XIV",
        "XV",
        "XVI",
        "XVII",
        *list(map(str, range(1, 284)))
    ]
    for x in ["20", "44", "58", "82", "94", "116", "154", "166", "192", "224", "250"]:
        # Some page labels are unused. Removing them is still easier than copying the
        # whole list itself here.
        expected.remove(x)
    assert r.page_labels == expected


@pytest.mark.enable_socket
def test_index2label_kids__recursive(caplog):
    url = "https://github.com/py-pdf/pypdf/files/14842446/tt1.pdf"
    r = PdfReader(
        BytesIO(get_data_from_url(url=url, name="index2label_kids_recursive.pdf"))
    )
    expected = [
        "A",
        "B",
        "C",
        "D",
        "E",
        "F",
        "G",
        "H",
        "I",
        "J",
        "K",
        "L",
        "M",
        "N",
        "O",
        "P",
        "17",
        "18",
        "19",
    ]
    assert r.page_labels == expected
    assert caplog.text != ""


def test_get_label_from_nums__empty_nums_list():
    dictionary_object = DictionaryObject()
    dictionary_object[NameObject("/Nums")] = ArrayObject()
    assert get_label_from_nums(dictionary_object, 13) == "14"


def test_index2label__empty_kids_list():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    number_tree = DictionaryObject()
    number_tree[NameObject("/Kids")] = ArrayObject()
    root = reader.root_object
    root[NameObject("/PageLabels")] = number_tree

    assert index2label(reader, 42) == "43"


================================================
FILE: tests/test_pagerange.py
================================================
"""Test the pypdf.pagerange module."""
import pytest

from pypdf.pagerange import PageRange, ParseError, parse_filename_page_ranges


def test_equality():
    pr1 = PageRange(slice(0, 5))
    pr2 = PageRange(slice(0, 5))
    assert pr1 == pr2


def test_hash():
    pr1 = PageRange(slice(0, 5))
    pr2 = PageRange(slice(0, 5))
    pr3 = PageRange(slice(10, 11))
    pr4 = PageRange(slice(10, 11, 1))
    assert hash(pr1) == hash(pr2)
    assert hash(pr1) != hash(pr3)
    # Consider this different for now, although slicing with step size of 1 and `None` should be identical.
    assert hash(pr3) != hash(pr4)


@pytest.mark.parametrize(
    ("page_range", "expected"),
    [(slice(0, 5), "0:5"), (slice(0, 5, 2), "0:5:2"), ("-1", "-1:"), ("0", "0")],
)
def test_str(page_range, expected):
    assert str(PageRange(page_range)) == expected


@pytest.mark.parametrize(
    ("page_range", "expected"),
    [(slice(0, 5), "PageRange('0:5')"), (slice(0, 5, 2), "PageRange('0:5:2')")],
)
def test_repr(page_range, expected):
    assert repr(PageRange(page_range)) == expected


def test_equality_other_objectc():
    pr1 = PageRange(slice(0, 5))
    pr2 = "PageRange(slice(0, 5))"
    assert pr1 != pr2


def test_idempotency():
    pr = PageRange(slice(0, 5))
    pr2 = PageRange(pr)
    assert pr == pr2


@pytest.mark.parametrize(
    ("range_str", "expected"),
    [
        ("42", slice(42, 43)),
        ("1:2", slice(1, 2)),
    ],
)
def test_str_init(range_str, expected):
    pr = PageRange(range_str)
    assert pr._slice == expected
    assert PageRange.valid


def test_str_init_error():
    init_str = "1-2"
    assert PageRange.valid(init_str) is False
    with pytest.raises(ParseError) as exc:
        PageRange(init_str)
    assert exc.value.args[0] == "1-2"


@pytest.mark.parametrize(
    ("params", "expected"),
    [
        (["foo.pdf", "1:5"], [("foo.pdf", PageRange("1:5"))]),
        (
            ["foo.pdf", "1:5", "bar.pdf"],
            [("foo.pdf", PageRange("1:5")), ("bar.pdf", PageRange(":"))],
        ),
    ],
)
def test_parse_filename_page_ranges(params, expected):
    assert parse_filename_page_ranges(params) == expected


def test_parse_filename_page_ranges_err():
    with pytest.raises(ValueError) as exc:
        parse_filename_page_ranges(["1:5", "foo.pdf"])
    assert (
        exc.value.args[0] == "The first argument must be a filename, not a page range."
    )


@pytest.mark.parametrize(
    ("a", "b", "expected"),
    [
        (PageRange(slice(0, 5)), PageRange(slice(2, 10)), slice(0, 10)),
        (PageRange(slice(0, 5)), PageRange(slice(2, 3)), slice(0, 5)),
        (PageRange(slice(0, 5)), PageRange(slice(5, 10)), slice(0, 10)),
    ],
)
def test_addition(a, b, expected):
    pr1 = PageRange(a)
    pr2 = PageRange(b)
    assert pr1 + pr2 == PageRange(expected)
    assert pr2 + pr1 == PageRange(expected)  # addition is commutative


@pytest.mark.parametrize(
    ("a", "b"),
    [
        (PageRange(slice(0, 5)), PageRange(slice(7, 10))),
        (PageRange(slice(7, 10)), PageRange(slice(0, 5))),
    ],
)
def test_addition_gap(a: PageRange, b: PageRange):
    with pytest.raises(ValueError) as exc:
        a + b
    assert exc.value.args[0] == "Can't add PageRanges with gap"


def test_addition_non_page_range():
    with pytest.raises(TypeError) as exc:
        PageRange(slice(0, 5)) + "2:7"
    assert exc.value.args[0] == "Can't add PageRange and <class 'str'>"


def test_addition_stride():
    a = PageRange(slice(0, 5, 2))
    b = PageRange(slice(7, 9))
    with pytest.raises(ValueError) as exc:
        a + b
    assert exc.value.args[0] == "Can't add PageRange with stride"


================================================
FILE: tests/test_papersizes.py
================================================
"""Test the pypdf.papersizes module."""
import pytest

from pypdf import papersizes


def test_din_a0_paper_size():
    """The dimensions and area of the DIN A0 paper size are correct."""
    dim = papersizes.PaperSize.A0
    area_square_pixels = float(dim.width) * dim.height

    # 72 pixels is 1 inch
    area_square_inch = area_square_pixels / 72**2

    # 25.4 millimeter is equal to 1 inches
    area_square_mm = area_square_inch * (25.4) ** 2
    assert abs(area_square_mm - 999949) < 100
    conversion_factor = 72 / 25.4
    assert (dim.width - 841 * conversion_factor) < 1
    assert (dim.width - 1189 * conversion_factor) < 1


@pytest.mark.parametrize("dimensions", papersizes._din_a)
def test_din_a_aspect_ratio(dimensions):
    """The aspect ratio of DIN A paper sizes is correct."""
    assert abs(dimensions.height - dimensions.width * 2**0.5) <= 2.5


@pytest.mark.parametrize(
    ("dimensions_a", "dimensions_b"),
    list(zip(papersizes._din_a, papersizes._din_a[1:])),
)
def test_din_a_size_doubling(dimensions_a, dimensions_b):
    """The height of a DIN A paper size doubles when moving to the next size."""
    assert abs(dimensions_a.height - 2 * dimensions_b.width) <= 4


================================================
FILE: tests/test_pdfa.py
================================================
"""Ensure that pypdf doesn't break PDF/A compliance."""

from io import BytesIO
from pathlib import Path
from typing import Optional

import pytest

from pypdf import PdfReader, PdfWriter
from tests import SAMPLE_ROOT


def is_pdfa1b_compliant(src: BytesIO):
    """Check if a PDF is PDF/A-1b compliant."""

    def document_information_has_analogous_xml(src: BytesIO) -> bool:
        reader = PdfReader(src)
        meta = reader.metadata
        xmp = reader.xmp_metadata
        if not meta:
            return True
        if not xmp:
            return False
        if meta.title and not xmp.dc_title:
            return meta.title == xmp.dc_title
        return True

    return document_information_has_analogous_xml(src)


@pytest.mark.samples
@pytest.mark.parametrize(
    ("src", "diagnostic_write_name"),
    [
        (SAMPLE_ROOT / "021-pdfa/crazyones-pdfa.pdf", None),
    ],
)
def test_pdfa(src: Path, diagnostic_write_name: Optional[str]):
    with open(src, "rb") as fp:
        data = BytesIO(fp.read())
    reader = PdfReader(src)
    assert is_pdfa1b_compliant(data)
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)

    stream = BytesIO()
    writer.write(stream)
    stream.seek(0)

    assert is_pdfa1b_compliant(stream)
    if diagnostic_write_name:
        with open(diagnostic_write_name, "wb") as fp:
            stream.seek(0)
            fp.write(stream.read())


================================================
FILE: tests/test_protocols.py
================================================
"""Test the pypdf._protocols module."""
from pypdf._protocols import PdfObjectProtocol


class IPdfObjectProtocol(PdfObjectProtocol):
    pass


def test_pdfobjectprotocol():
    o = IPdfObjectProtocol()
    assert o.clone(None, False, None) is None
    assert o._reference_clone(None, None) is None
    assert o.get_object() is None
    assert o.hash_value() is None
    assert o.write_to_stream(None) is None


================================================
FILE: tests/test_reader.py
================================================
"""Test the pypdf._reader module."""
import io
import sys
import time
from io import BytesIO
from pathlib import Path
from typing import Union

import pytest

from pypdf import PdfReader, PdfWriter
from pypdf._crypt_providers import crypt_provider
from pypdf._reader import convert_to_int
from pypdf.constants import ImageAttributes as IA
from pypdf.constants import PageAttributes as PG
from pypdf.constants import UserAccessPermissions as UAP
from pypdf.errors import (
    DeprecationError,
    EmptyFileError,
    FileNotDecryptedError,
    LimitReachedError,
    PdfReadError,
    PdfStreamError,
    WrongPasswordError,
)
from pypdf.generic import (
    ArrayObject,
    Destination,
    DictionaryObject,
    IndirectObject,
    NameObject,
    NumberObject,
    TextStringObject,
)

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url, normalize_warnings

HAS_AES = crypt_provider[0] in ["pycryptodome", "cryptography"]


NestedList = Union[int, None, list["NestedList"]]


@pytest.mark.parametrize(
    ("src", "num_pages"),
    [("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)],
)
def test_get_num_pages(src, num_pages):
    src = RESOURCE_ROOT / src
    with PdfReader(src) as reader:
        assert len(reader.pages) == num_pages
        # from #1911
        assert "/Size" in reader.trailer


@pytest.mark.parametrize(
    ("pdf_path", "expected"),
    [
        (
            RESOURCE_ROOT / "crazyones.pdf",
            {
                "/CreationDate": "D:20150604133406-06'00'",
                "/Creator": " XeTeX output 2015.06.04:1334",
                "/Producer": "xdvipdfmx (20140317)",
            },
        ),
        (
            RESOURCE_ROOT / "metadata.pdf",
            {
                "/CreationDate": "D:20220415093243+02'00'",
                "/ModDate": "D:20220415093243+02'00'",
                "/Creator": "pdflatex, or other tool",
                "/Producer": "Latex with hyperref, or other system",
                "/Author": "Martin Thoma",
                "/Keywords": "Some Keywords, other keywords; more keywords",
                "/Subject": "The Subject",
                "/Title": "The Title",
                "/Trapped": "/False",
                "/PTEX.Fullbanner": (
                    "This is pdfTeX, Version "
                    "3.141592653-2.6-1.40.23 (TeX Live 2021) "
                    "kpathsea version 6.3.3"
                ),
            },
        ),
    ],
    ids=["crazyones", "metadata"],
)
def test_read_metadata(pdf_path, expected):
    with open(pdf_path, "rb") as inputfile:
        reader = PdfReader(inputfile)
        docinfo = reader.metadata
        assert docinfo is not None
        metadict = dict(docinfo)
        assert metadict == expected
        docinfo.title
        docinfo.title_raw
        docinfo.author
        docinfo.author_raw
        docinfo.creator
        docinfo.creator_raw
        docinfo.producer
        docinfo.producer_raw
        docinfo.subject
        docinfo.subject_raw
        docinfo.creation_date
        docinfo.creation_date_raw
        docinfo.modification_date
        docinfo.modification_date_raw
        docinfo.keywords
        docinfo.keywords_raw
        if "/Title" in metadict:
            assert isinstance(docinfo.title, str)
            assert metadict["/Title"] == docinfo.title


def test_read_metadata_title_is_utf8():
    with open(RESOURCE_ROOT / "bytes.pdf", "rb") as inputfile:
        reader = PdfReader(inputfile)
        title = reader.metadata.title
        # Should be a str.
        assert title == "Microsoft Word - トランスバース社買収電話会議英語Final.docx"


def test_iss1943():
    with PdfReader(RESOURCE_ROOT / "crazyones.pdf") as reader:
        docinfo = reader.metadata
        docinfo.update(
            {
                NameObject("/CreationDate"): TextStringObject(
                    "D:20230705005151Z00'00'"
                ),
                NameObject("/ModDate"): TextStringObject("D:20230705005151Z00'00'"),
            }
        )
        docinfo.creation_date
        docinfo.creation_date_raw
        docinfo.modification_date
        docinfo.modification_date_raw
        docinfo.update({NameObject("/CreationDate"): NumberObject(1)})
        assert docinfo.creation_date is None


@pytest.mark.samples
@pytest.mark.parametrize(
    "pdf_path", [SAMPLE_ROOT / "017-unreadable-meta-data/unreadablemetadata.pdf"]
)
def test_broken_meta_data(pdf_path):
    with open(pdf_path, "rb") as f:
        reader = PdfReader(f)
        assert reader.metadata is None

    with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as f:
        b = f.read(-1)
    reader = PdfReader(BytesIO(b.replace(b"/Info 2 0 R", b"/Info 2    ")))
    with pytest.raises(PdfReadError) as exc:
        reader.metadata
    assert "does not point to a document information dictionary" in repr(exc)


@pytest.mark.parametrize(
    "src",
    [
        RESOURCE_ROOT / "crazyones.pdf",
        RESOURCE_ROOT / "commented.pdf",
    ],
)
def test_get_annotations(src):
    with PdfReader(src) as reader:
        for page in reader.pages:
            if PG.ANNOTS in page:
                for annot in page[PG.ANNOTS]:
                    subtype = annot.get_object()[IA.SUBTYPE]
                    if subtype == "/Text":
                        annot.get_object()[PG.CONTENTS]


@pytest.mark.parametrize(
    ("src", "nb_attachments"),
    [
        (RESOURCE_ROOT / "attachment.pdf", 1),
        (RESOURCE_ROOT / "crazyones.pdf", 0),
    ],
)
def test_get_attachments(src, nb_attachments):
    reader = PdfReader(src)

    attachments = {}
    for page in reader.pages:
        if PG.ANNOTS in page:
            for annotation in page[PG.ANNOTS]:
                annotobj = annotation.get_object()
                if annotobj[IA.SUBTYPE] == "/FileAttachment":
                    fileobj = annotobj["/FS"]
                    attachments[fileobj["/F"]] = fileobj["/EF"]["/F"].get_data()
    assert len(attachments) == nb_attachments


@pytest.mark.parametrize(
    ("src", "outline_elements"),
    [
        (RESOURCE_ROOT / "pdflatex-outline.pdf", 9),
        (RESOURCE_ROOT / "crazyones.pdf", 0),
    ],
)
def test_get_outline(src, outline_elements):
    reader = PdfReader(src)
    outline = reader.outline
    assert len(outline) == outline_elements


@pytest.mark.samples
@pytest.mark.parametrize(
    ("src", "expected_images"),
    [
        ("pdflatex-outline.pdf", []),
        ("crazyones.pdf", []),
        ("git.pdf", ["Image9.png"]),
        pytest.param(
            "imagemagick-lzw.pdf",
            ["Im0.png"],
            marks=pytest.mark.xfail(reason="broken image extraction"),
        ),
        pytest.param(
            "imagemagick-ASCII85Decode.pdf",
            ["Im0.png"],
            # marks=pytest.mark.xfail(reason="broken image extraction"),
        ),
        ("imagemagick-CCITTFaxDecode.pdf", ["Im0.tiff"]),
        (SAMPLE_ROOT / "019-grayscale-image/grayscale-image.pdf", ["X0.png"]),
    ],
)
def test_get_images(src, expected_images):
    from PIL import Image  # noqa: PLC0415

    src_abs = RESOURCE_ROOT / src
    reader = PdfReader(src_abs)
    page = reader.pages[0]
    images_extracted = page.images

    assert len(images_extracted) == len(expected_images)
    for image, expected_image in zip(images_extracted, expected_images):
        assert image.name == expected_image
        assert (
            image.name.split(".")[-1].upper()
            == Image.open(io.BytesIO(image.data)).format
        )


@pytest.mark.parametrize(
    ("strict", "with_prev_0", "startx_correction", "should_fail", "warning_msgs"),
    [
        (
            True,
            False,
            -1,
            False,
            [
                "startxref on same line as offset",
                "Xref table not zero-indexed. "
                "ID numbers for objects will be corrected.",
            ],
        ),  # all nominal => no fail
        (True, True, -1, True, ""),  # Prev=0 => fail expected
        (
            False,
            False,
            -1,
            False,
            [
                "startxref on same line as offset",
            ],
        ),
        (
            False,
            True,
            -1,
            False,
            [
                "startxref on same line as offset",
                "/Prev=0 in the trailer - assuming there is no previous xref table",
            ],
        ),  # Prev =0 => no strict so tolerant
        (True, False, 0, True, ""),  # error on startxref, in strict => fail expected
        (True, True, 0, True, ""),
        (
            False,
            False,
            0,
            False,
            [
                "startxref on same line as offset",
                "incorrect startxref pointer(1)",
                "parsing for Object Streams",
            ],
        ),  # error on startxref, but no strict => xref rebuilt,no fail
        (
            False,
            True,
            0,
            False,
            [
                "startxref on same line as offset",
                "incorrect startxref pointer(1)",
                "parsing for Object Streams",
            ],
        ),
    ],
)
def test_get_images_raw(
    caplog, strict, with_prev_0, startx_correction, should_fail, warning_msgs
):
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << %s/Root 5 0 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    pdf_data = pdf_data % (
        # - 1 below in the find because of the double %
        pdf_data.find(b"1 0 obj") - 1,
        pdf_data.find(b"2 0 obj") - 1,
        pdf_data.find(b"3 0 obj") - 1,
        pdf_data.find(b"4 0 obj") - 1,
        pdf_data.find(b"5 0 obj") - 1,
        b"/Prev 0 " if with_prev_0 else b"",
        # startx_correction should be -1 due to double % at the beginning
        # inducing an error on startxref computation
        pdf_data.find(b"xref") + startx_correction,
    )
    pdf_stream = io.BytesIO(pdf_data)
    if should_fail:
        with pytest.raises(PdfReadError) as exc:
            PdfReader(pdf_stream, strict=strict)
        assert exc.type == PdfReadError
        if startx_correction == -1:
            assert (
                exc.value.args[0]
                == "/Prev=0 in the trailer (try opening with strict=False)"
            )
    else:
        PdfReader(pdf_stream, strict=strict)
        assert normalize_warnings(caplog.text) == warning_msgs


def test_issue297(caplog):
    path = RESOURCE_ROOT / "issue-297.pdf"
    with pytest.raises(PdfReadError) as exc:
        reader = PdfReader(path, strict=True)
    assert caplog.text == ""
    assert "Broken xref table" in exc.value.args[0]
    reader = PdfReader(path, strict=False)
    assert normalize_warnings(caplog.text) == [
        "incorrect startxref pointer(1)",
        "parsing for Object Streams",
    ]
    reader.pages[0]


@pytest.mark.parametrize(
    ("pdffile", "password", "should_fail"),
    [
        ("encrypted-file.pdf", "test", False),
        ("encrypted-file.pdf", b"test", False),
        ("encrypted-file.pdf", "qwerty", True),
        ("encrypted-file.pdf", b"qwerty", True),
    ],
)
def test_get_page_of_encrypted_file(pdffile, password, should_fail):
    """
    Check if we can read a page of an encrypted file.

    This is a regression test for issue 327:
    IndexError for get_page() of decrypted file
    """
    path = RESOURCE_ROOT / pdffile
    if should_fail:
        with pytest.raises(PdfReadError):
            PdfReader(path, password=password)
    else:
        PdfReader(path, password=password).pages[0]


@pytest.mark.parametrize(
    ("src", "expected", "expected_get_fields"),
    [
        (
            "form.pdf",
            {"foo": ""},
            {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": ""}},
        ),
        (
            "form_acrobatReader.pdf",
            {"foo": "Bar"},
            {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": "Bar"}},
        ),
        (
            "form_evince.pdf",
            {"foo": "bar"},
            {"foo": {"/DV": "", "/FT": "/Tx", "/T": "foo", "/V": "bar"}},
        ),
        (
            "crazyones.pdf",
            {},
            None,
        )
    ],
)
def test_get_form(src, expected, expected_get_fields, txt_file_path):
    """Check if we can read out form data."""
    src = RESOURCE_ROOT / src
    reader = PdfReader(src)
    fields = reader.get_form_text_fields()
    assert fields == expected

    with open(txt_file_path, "w") as f:
        fields = reader.get_fields(fileobj=f)
    assert fields == expected_get_fields
    if fields:
        for field in fields.values():
            # Just access the attributes
            [
                field.field_type,
                field.parent,
                field.kids,
                field.name,
                field.alternate_name,
                field.mapping_name,
                field.flags,
                field.value,
                field.default_value,
                field.additional_actions,
            ]


@pytest.mark.enable_socket
def test_reading_choice_field_without_opt_key():
    """Tests reading a choice field in a PDF without an /Opt key."""
    url = "https://github.com/user-attachments/files/23853677/Musterservicevertrag-HNRAGB_Okt2022-Blanko.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name="Musterservicevertrag-HNRAGB_Okt2022-Blanko.pdf")))
    fields = reader.get_fields()

    tn_anrede = fields.get("TN_Anrede")
    assert tn_anrede is not None

    # Ensure that parsing of a choice field without /Opt key worked
    tn_anrede_opt = tn_anrede.get("/Opt")
    assert tn_anrede_opt is None


@pytest.mark.parametrize(
    ("src", "page_number"),
    [
        ("form.pdf", 0),
        ("pdflatex-outline.pdf", 2),
    ],
)
def test_get_page_number(src, page_number):
    src = RESOURCE_ROOT / src
    reader = PdfReader(src)
    reader.get_page(0)
    page = reader.pages[page_number]
    assert reader.get_page_number(page) == page_number


@pytest.mark.parametrize(
    ("src", "expected"),
    [("form.pdf", None), ("AutoCad_Simple.pdf", "/SinglePage")],
)
def test_get_page_layout(src, expected):
    src = RESOURCE_ROOT / src
    reader = PdfReader(src)
    assert reader.page_layout == expected


@pytest.mark.parametrize(
    ("src", "expected"),
    [
        ("form.pdf", "/UseNone"),
        ("crazyones.pdf", None),
    ],
)
def test_get_page_mode(src, expected):
    src = RESOURCE_ROOT / src
    reader = PdfReader(src)
    assert reader.page_mode == expected


def test_read_empty():
    with pytest.raises(EmptyFileError) as exc:
        PdfReader(io.BytesIO())
    assert exc.value.args[0] == "Cannot read an empty file"


def test_read_malformed_header(caplog):
    with pytest.raises(PdfReadError) as exc:
        PdfReader(io.BytesIO(b"foo"), strict=True)
    assert exc.value.args[0] == "PDF starts with 'foo', but '%PDF-' expected"
    caplog.clear()
    try:
        PdfReader(io.BytesIO(b"foo"), strict=False)
    except Exception:
        pass
    assert caplog.messages[0].startswith("invalid pdf header")


def test_read_malformed_body():
    with pytest.raises(PdfReadError) as exc:
        PdfReader(io.BytesIO(b"%PDF-"), strict=True)
    assert (
        exc.value.args[0] == "EOF marker not found"
    )  # used to be:STREAM_TRUNCATED_PREMATURELY


def test_read_prev_0_trailer():
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << %s/Root 5 0 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    with_prev_0 = True
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj"),
        pdf_data.find(b"2 0 obj"),
        pdf_data.find(b"3 0 obj"),
        pdf_data.find(b"4 0 obj"),
        pdf_data.find(b"5 0 obj"),
        b"/Prev 0 " if with_prev_0 else b"",
        pdf_data.find(b"xref") - 1,
    )
    pdf_stream = io.BytesIO(pdf_data)
    with pytest.raises(PdfReadError) as exc:
        PdfReader(pdf_stream, strict=True)
    assert exc.value.args[0] == "/Prev=0 in the trailer (try opening with strict=False)"


def test_circular_xref_prev_reference(caplog):
    """Circular /Prev in trailer should be detected, not loop forever (#3654)."""
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << /Prev %d /Root 5 0 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    xref_offset = pdf_data.find(b"xref") - 1
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj"),
        pdf_data.find(b"2 0 obj"),
        pdf_data.find(b"3 0 obj"),
        pdf_data.find(b"4 0 obj"),
        pdf_data.find(b"5 0 obj"),
        xref_offset,  # /Prev points to same xref = circular
        xref_offset,  # startxref
    )
    PdfReader(io.BytesIO(pdf_data))
    assert "Circular xref chain detected" in caplog.text


def test_read_missing_startxref():
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << /Root 5 0 R /Size 6 >>\n"
        # Removed for this test: b"startxref %d\n"
        b"%%%%EOF"
    )
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj"),
        pdf_data.find(b"2 0 obj"),
        pdf_data.find(b"3 0 obj"),
        pdf_data.find(b"4 0 obj"),
        pdf_data.find(b"5 0 obj"),
        # Removed for this test: pdf_data.find(b"xref") - 1,
    )
    pdf_stream = io.BytesIO(pdf_data)
    with pytest.raises(PdfReadError) as exc:
        PdfReader(pdf_stream, strict=True)
    assert exc.value.args[0] == "startxref not found"


def test_read_unknown_zero_pages(caplog):
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        # Pages 0 0 is the key point:
        b"5 0 obj << /Pages 0 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << /Root 5 1 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj") - 1,
        pdf_data.find(b"2 0 obj") - 1,
        pdf_data.find(b"3 0 obj") - 1,
        pdf_data.find(b"4 0 obj") - 1,
        pdf_data.find(b"5 0 obj") - 1,
        pdf_data.find(b"xref") - 1,
    )
    pdf_stream = io.BytesIO(pdf_data)
    reader = PdfReader(pdf_stream, strict=True)
    warnings = [
        "startxref on same line as offset",
        "Xref table not zero-indexed. ID numbers for objects will be corrected.",
    ]
    assert normalize_warnings(caplog.text) == warnings
    with pytest.raises(PdfReadError) as exc:
        len(reader.pages)

    assert exc.value.args[0] == "Could not find object."
    reader = PdfReader(pdf_stream, strict=False)
    warnings += [
        "Object 5 1 not defined.",
        "startxref on same line as offset",
    ]
    assert normalize_warnings(caplog.text) == warnings
    with pytest.raises(PdfReadError) as exc:
        len(reader.pages)
    assert exc.value.args[0] == "Invalid object in /Pages"


def test_read_encrypted_without_decryption():
    src = RESOURCE_ROOT / "libreoffice-writer-password.pdf"
    reader = PdfReader(src)
    with pytest.raises(FileNotDecryptedError) as exc:
        len(reader.pages)
    assert exc.value.args[0] == "File has not been decrypted"


def test_get_destination_page_number():
    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
    reader = PdfReader(src)
    outline = reader.outline
    for outline_item in outline:
        if not isinstance(outline_item, list):
            reader.get_destination_page_number(outline_item)


def test_do_not_get_stuck_on_large_files_without_start_xref():
    """
    Tests for the absence of a DoS bug, where a large file without an
    startxref mark would cause the library to hang for minutes to hours.
    """
    start_time = time.time()
    broken_stream = BytesIO(b"\0" * 5 * 1000 * 1000)
    with pytest.raises(PdfReadError):
        PdfReader(broken_stream)
    parse_duration = time.time() - start_time
    # parsing is expected take less than a second on a modern cpu, but include
    # a large tolerance to account for busy or slow systems
    assert parse_duration < 60


@pytest.mark.enable_socket
def test_decrypt_when_no_id():
    """
    Decrypt an encrypted file that's missing the 'ID' value in its trailer.

    https://github.com/py-pdf/pypdf/issues/608
    """
    with open(RESOURCE_ROOT / "encrypted_doc_no_id.pdf", "rb") as inputfile:
        ipdf = PdfReader(inputfile)
        ipdf.decrypt("")
        assert ipdf.metadata == {"/Producer": "European Patent Office"}


def test_reader_properties():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    assert reader.outline == []
    assert len(reader.pages) == 1
    assert reader.page_layout is None
    assert reader.page_mode is None
    assert reader.is_encrypted is False


@pytest.mark.parametrize(
    "strict",
    [True, False],
)
def test_issue604(caplog, strict):
    """Test with invalid destinations."""
    with open(RESOURCE_ROOT / "issue-604.pdf", "rb") as f:
        pdf = None
        outline = None
        if strict:
            pdf = PdfReader(f, strict=strict)
            with pytest.raises(PdfReadError) as exc:
                outline = pdf.outline
            if "Unknown Destination" not in exc.value.args[0]:
                raise Exception("Expected exception not raised")
            return  # outline is not correct
        pdf = PdfReader(f, strict=strict)
        outline = pdf.outline
        msg = [
            "Unknown destination: 'ms_Thyroid_2_2020_071520_watermarked.pdf' [0, 1]"
        ]
        assert normalize_warnings(caplog.text) == msg

        def get_dest_pages(x) -> NestedList:
            if isinstance(x, list):
                return [get_dest_pages(y) for y in x]
            destination_page_number = pdf.get_destination_page_number(x)
            if destination_page_number is None:
                return destination_page_number
            return destination_page_number + 1

        out = []

        # oi can be destination or a list:preferred to just print them
        for oi in outline:
            out.append(get_dest_pages(oi))  # noqa: PERF401


def test_decode_permissions():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    base = {
        "accessability": False,  # Do not fix typo, as part of official, but deprecated API.
        "annotations": False,
        "assemble": False,
        "copy": False,
        "forms": False,
        "modify": False,
        "print_high_quality": False,
        "print": False,
    }

    print_ = base.copy()
    print_["print"] = True
    with pytest.raises(
        DeprecationError,
            match=(
                r"decode_permissions is deprecated and was removed in pypdf 5\.0\.0\. "
                r"Use user_access_permissions instead"
            ),
    ):
        assert reader.decode_permissions(4) == print_

    modify = base.copy()
    modify["modify"] = True
    with pytest.raises(
        DeprecationError,
        match=(
            r"decode_permissions is deprecated and was removed in pypdf 5\.0\.0\. "
            r"Use user_access_permissions instead"
        ),
    ):
        assert reader.decode_permissions(8) == modify


@pytest.mark.skipif(not HAS_AES, reason="No AES implementation")
def test_user_access_permissions():
    # Not encrypted.
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    assert reader.user_access_permissions is None

    # Encrypted.
    reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf")
    assert reader.user_access_permissions == UAP.all()

    # Custom writer permissions.
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    writer.encrypt(
        user_password="",
        owner_password="abc",
        permissions_flag=UAP.PRINT | UAP.FILL_FORM_FIELDS,
    )
    output = BytesIO()
    writer.write(output)
    reader = PdfReader(output)
    assert reader.user_access_permissions == (UAP.PRINT | UAP.FILL_FORM_FIELDS)

    # All writer permissions.
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    writer.encrypt(
        user_password="",
        owner_password="abc",
        permissions_flag=UAP.all(),
    )
    output = BytesIO()
    writer.write(output)
    reader = PdfReader(output)
    assert reader.user_access_permissions == UAP.all()


def test_pages_attribute():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)

    # Test if getting as slice throws an error
    assert len(reader.pages[:]) == 1

    with pytest.raises(IndexError) as exc:
        reader.pages[-1000]

    assert exc.value.args[0] == "Sequence index out of range"

    with pytest.raises(IndexError):
        reader.pages[1000]

    assert exc.value.args[0] == "Sequence index out of range"


def test_convert_to_int():
    assert convert_to_int(b"\x01", 8) == 1


def test_convert_to_int_error():
    with pytest.raises(PdfReadError) as exc:
        convert_to_int(b"256", 16)
    assert exc.value.args[0] == "Invalid size in convert_to_int"


@pytest.mark.enable_socket
def test_iss925():
    url = "https://github.com/py-pdf/pypdf/files/8796328/1.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name="iss925.pdf")))

    for page_sliced in reader.pages:
        page_object = page_sliced.get_object()
        # Extracts the PDF's Annots (Annotations and Commenting):
        annots = page_object.get("/Annots")
        if annots is not None:
            for annot in annots:
                annot.get_object()


def test_get_object():
    reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
    assert reader.get_object(22)["/Type"] == "/Catalog"
    assert reader._get_indirect_object(22, 0)["/Type"] == "/Catalog"


def test_extract_text_hello_world():
    reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
    text = reader.pages[0].extract_text().split("\n")
    assert text == [
        "English:",
        "Hello World",
        "Arabic:",
        "مرحبا بالعالم",
        "Russian:",
        "Привет, мир",
        "Chinese (traditional):",
        "你好世界",
        "Thai:",
        "สวัสดีชาวโลก",
        "Japanese:",
        "こんにちは世界",
    ]


def test_read_path():
    path = Path(RESOURCE_ROOT, "crazyones.pdf")
    reader = PdfReader(path)
    assert len(reader.pages) == 1


def test_read_not_binary_mode(caplog):
    with open(RESOURCE_ROOT / "crazyones.pdf") as f:
        msg = (
            "PdfReader stream/file object is not in binary mode. "
            "It may not be read correctly."
        )
        with pytest.raises(io.UnsupportedOperation):
            PdfReader(f)
    assert normalize_warnings(caplog.text) == [msg]


@pytest.mark.enable_socket
@pytest.mark.skipif(not HAS_AES, reason="No AES algorithm available")
def test_read_form_416():
    url = (
        "https://www.fda.gov/downloads/AboutFDA/ReportsManualsForms/Forms/UCM074728.pdf"
    )
    reader = PdfReader(BytesIO(get_data_from_url(url, name="issue_416.pdf")))
    fields = reader.get_form_text_fields()
    assert len(fields) > 0


def test_form_topname_with_and_without_acroform(caplog):
    r = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    r.add_form_topname("no")
    r.rename_form_topname("renamed")
    assert "/AcroForm" not in r.trailer["/Root"]
    r.trailer["/Root"][NameObject("/AcroForm")] = DictionaryObject()
    r.add_form_topname("toto")
    r.rename_form_topname("renamed")
    assert len(r.get_fields()) == 0

    r = PdfReader(RESOURCE_ROOT / "form.pdf")
    r.add_form_topname("top")
    flds = r.get_fields()
    assert "top" in flds
    assert "top.foo" in flds
    r.rename_form_topname("renamed")
    flds = r.get_fields()
    assert "renamed" in flds
    assert "renamed.foo" in flds

    r = PdfReader(RESOURCE_ROOT / "form.pdf")
    r.get_fields()["foo"].indirect_reference.get_object()[
        NameObject("/Parent")
    ] = DictionaryObject()
    r.add_form_topname("top")
    assert "have a non-expected parent" in caplog.text


@pytest.mark.enable_socket
def test_extract_text_xref_issue_2(caplog):
    # pdf/0264cf510015b2a4b395a15cb23c001e.pdf
    url = "https://github.com/user-attachments/files/18381758/tika-981961.pdf"
    msg = [
        "incorrect startxref pointer(2)",
        "parsing for Object Streams",
    ]
    reader = PdfReader(BytesIO(get_data_from_url(url, name="tika-981961.pdf")))
    for page in reader.pages:
        page.extract_text()
    assert normalize_warnings(caplog.text) == msg


@pytest.mark.enable_socket
@pytest.mark.slow
def test_extract_text_xref_issue_3(caplog):
    # pdf/0264cf510015b2a4b395a15cb23c001e.pdf
    url = "https://github.com/user-attachments/files/18381755/tika-977774.pdf"
    msg = [
        "incorrect startxref pointer(3)",
    ]
    reader = PdfReader(BytesIO(get_data_from_url(url, name="tika-977774.pdf")))
    for page in reader.pages:
        page.extract_text()
    assert normalize_warnings(caplog.text) == msg


@pytest.mark.enable_socket
def test_extract_text_pdf15():
    # pdf/0264cf510015b2a4b395a15cb23c001e.pdf
    url = "https://github.com/user-attachments/files/18381751/tika-976030.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name="tika-976030.pdf")))
    for page in reader.pages:
        page.extract_text()


@pytest.mark.enable_socket
def test_extract_text_xref_table_21_bytes_clrf():
    # pdf/0264cf510015b2a4b395a15cb23c001e.pdf
    url = "https://github.com/user-attachments/files/18381723/tika-956939.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name="tika-956939.pdf")))
    for page in reader.pages:
        page.extract_text()


@pytest.mark.enable_socket
def test_get_fields():
    url = "https://github.com/user-attachments/files/18381747/tika-972486.pdf"
    name = "tika-972486.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    fields = reader.get_fields()
    assert fields is not None
    assert "c1-1" in fields
    assert dict(fields["c1-1"]) == (
        {"/FT": "/Btn", "/T": "c1-1", "/_States_": ["/On", "/Off"]}
    )


@pytest.mark.enable_socket
def test_get_full_qualified_fields():
    url = "https://github.com/py-pdf/pypdf/files/10142389/fields_with_dots.pdf"
    name = "fields_with_dots.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    fields = reader.get_form_text_fields(True)
    assert fields is not None
    assert "customer.name" in fields

    fields = reader.get_form_text_fields(False)
    assert fields is not None
    assert "customer.name" not in fields
    assert "name" in fields

    fields = reader.get_fields(True)
    assert fields is not None
    assert "customer.name" in fields
    assert fields["customer.name"]["/T"] == "name"


@pytest.mark.enable_socket
@pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning")
def test_get_fields_read_else_block():
    # covers also issue 1089
    url = "https://github.com/user-attachments/files/18381705/tika-934771.pdf"
    name = "tika-934771.pdf"
    PdfReader(BytesIO(get_data_from_url(url, name=name)))


@pytest.mark.enable_socket
def test_get_fields_read_else_block2():
    url = "https://github.com/user-attachments/files/18381689/tika-914902.pdf"
    name = "tika-914902.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    fields = reader.get_fields()
    assert fields is None


@pytest.mark.enable_socket
@pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning")
def test_get_fields_read_else_block3():
    url = "https://github.com/user-attachments/files/18381726/tika-957721.pdf"
    name = "tika-957721.pdf"
    PdfReader(BytesIO(get_data_from_url(url, name=name)))


@pytest.mark.enable_socket
def test_metadata_is_none():
    url = "https://github.com/user-attachments/files/18381735/tika-963692.pdf"
    name = "tika-963692.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.metadata is None


@pytest.mark.enable_socket
def test_get_fields_read_write_report(txt_file_path):
    url = "https://github.com/user-attachments/files/18381683/tika-909655.pdf"
    name = "tika-909655.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    with open(txt_file_path, "w") as fp:
        fields = reader.get_fields(fileobj=fp)
    assert fields


@pytest.mark.parametrize(
    "src",
    [
        RESOURCE_ROOT / "crazyones.pdf",
        RESOURCE_ROOT / "commented.pdf",
    ],
)
def test_xfa(src):
    reader = PdfReader(src)
    assert reader.xfa is None


@pytest.mark.enable_socket
def test_xfa_non_empty():
    url = "https://github.com/user-attachments/files/18381713/tika-942050.pdf"
    name = "tika-942050.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert list(reader.xfa.keys()) == [
        "preamble",
        "config",
        "template",
        "PDFSecurity",
        "datasets",
        "postamble",
    ]


@pytest.mark.parametrize(
    ("src", "pdf_header"),
    [
        (RESOURCE_ROOT / "attachment.pdf", "%PDF-1.5"),
        (RESOURCE_ROOT / "crazyones.pdf", "%PDF-1.5"),
    ],
)
def test_header(src, pdf_header):
    reader = PdfReader(src)

    assert reader.pdf_header == pdf_header


@pytest.mark.enable_socket
def test_outline_color():
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-924546.pdf")))
    assert reader.outline[0].color == [0, 0, 1]


@pytest.mark.enable_socket
def test_outline_font_format():
    reader = PdfReader(BytesIO(get_data_from_url(name="tika-924546.pdf")))
    assert reader.outline[0].font_format == 2


def get_outline_property(outline, attribute_name: str):
    results = []
    if isinstance(outline, list):
        for outline_item in outline:
            if isinstance(outline_item, Destination):
                results.append(getattr(outline_item, attribute_name))
            else:
                results.append(get_outline_property(outline_item, attribute_name))
    else:
        raise ValueError(f"got {type(outline)}")
    return results


@pytest.mark.samples
def test_outline_title_issue_1121():
    reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf")

    assert get_outline_property(reader.outline, "title") == [
        "First",
        [
            "Second",
            "Third",
            "Fourth",
            [
                "Fifth",
                "Sixth",
            ],
            "Seventh",
            [
                "Eighth",
                "Ninth",
            ],
        ],
        "Tenth",
        [
            "Eleventh",
            "Twelfth",
            "Thirteenth",
            "Fourteenth",
        ],
        "Fifteenth",
        [
            "Sixteenth",
            "Seventeenth",
        ],
        "Eighteenth",
        "Nineteenth",
        [
            "Twentieth",
            "Twenty-first",
            "Twenty-second",
            "Twenty-third",
            "Twenty-fourth",
            "Twenty-fifth",
            "Twenty-sixth",
            "Twenty-seventh",
        ],
    ]


@pytest.mark.samples
def test_outline_count():
    reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf")

    assert get_outline_property(reader.outline, "outline_count") == [
        5,
        [
            None,
            None,
            2,
            [
                None,
                None,
            ],
            -2,
            [
                None,
                None,
            ],
        ],
        4,
        [
            None,
            None,
            None,
            None,
        ],
        -2,
        [
            None,
            None,
        ],
        None,
        8,
        [
            None,
            None,
            None,
            None,
            None,
            None,
            None,
            None,
        ],
    ]


def test_outline_missing_title(caplog):
    # Strict
    reader = PdfReader(RESOURCE_ROOT / "outline-without-title.pdf", strict=True)
    with pytest.raises(PdfReadError) as exc:
        reader.outline
    assert exc.value.args[0].startswith("Outline Entry Missing /Title attribute:")

    # Non-strict : no errors
    reader = PdfReader(RESOURCE_ROOT / "outline-without-title.pdf", strict=False)
    assert reader.outline[0]["/Title"] == ""


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        # 1st case : the named_dest are stored directly as a dictionary, PDF 1.1 style
        (
            "https://github.com/py-pdf/pypdf/files/9197028/lorem_ipsum.pdf",
            "lorem_ipsum.pdf",
        ),
        # 2nd case : Dest below names and with Kids...
        (
            "https://github.com/py-pdf/pypdf/files/11714214/PDF32000_2008.pdf",
            "PDF32000_2008.pdf",
        )
        # 3rd case : Dests with Name tree (TODO: Add this case)
    ],
    ids=["stored_directly", "dest_below_names_with_kids"],
)
def test_named_destination(url, name):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert len(reader.named_destinations) > 0


@pytest.mark.enable_socket
def test_outline_with_missing_named_destination():
    url = "https://github.com/user-attachments/files/18381686/tika-913678.pdf"
    name = "tika-913678.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # outline items in document reference a named destination that is not defined
    assert reader.outline[1][0].title.startswith("Report for 2002AZ3B: Microbial")


@pytest.mark.enable_socket
def test_outline_with_empty_action():
    url = "https://github.com/user-attachments/files/18381697/tika-924546.pdf"
    name = "tika-924546.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # outline items (entitled Tables and Figures) utilize an empty action (/A)
    # that has no type or destination
    assert reader.outline[-4].title == "Tables"


def test_outline_with_invalid_destinations():
    reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf")
    # contains 9 outline items, 6 with invalid destinations
    # caused by different malformations
    assert len(reader.outline) == 9


@pytest.mark.enable_socket
def test_pdfreader_multiple_definitions(caplog):
    """iss325"""
    url = "https://github.com/py-pdf/pypdf/files/9176644/multipledefs.pdf"
    name = "multipledefs.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].extract_text()
    assert normalize_warnings(caplog.text) == [
        "Multiple definitions in dictionary at byte 0xb5 for key /Group"
    ]


def test_wrong_password_error():
    encrypted_pdf_path = RESOURCE_ROOT / "encrypted-file.pdf"
    with pytest.raises(WrongPasswordError):
        PdfReader(
            encrypted_pdf_path,
            password="definitely_the_wrong_password!",
        )


def test_get_page_number_by_indirect():
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    reader._get_page_number_by_indirect(1)


@pytest.mark.enable_socket
def test_corrupted_xref_table():
    # issue #1292
    url = "https://github.com/py-pdf/pypdf/files/9444747/BreezeManual.orig.pdf"
    name = "BreezeMan1.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].extract_text()
    url = "https://github.com/py-pdf/pypdf/files/9444748/BreezeManual.failed.pdf"
    name = "BreezeMan2.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_reader(caplog):
    # iss #1273
    url = "https://github.com/py-pdf/pypdf/files/9464742/shiv_resume.pdf"
    name = "shiv_resume.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert "Previous trailer cannot be read" in caplog.text
    caplog.clear()
    # first call requires some reparations...
    reader.pages[0].extract_text()
    caplog.clear()
    # ...and now no more required
    reader.pages[0].extract_text()
    assert caplog.text == ""


@pytest.mark.enable_socket
def test_zeroing_xref():
    # iss #328
    url = (
        "https://github.com/py-pdf/pypdf/files/9066120/"
        "UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
    )
    name = "UTA_OSHA.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    len(reader.pages)


@pytest.mark.enable_socket
def test_thread():
    url = (
        "https://github.com/py-pdf/pypdf/files/9066120/"
        "UTA_OSHA_3115_Fall_Protection_Training_09162021_.pdf"
    )
    name = "UTA_OSHA.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.threads is None
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "tika-924666.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert isinstance(reader.threads, ArrayObject)
    assert len(reader.threads) >= 1


@pytest.mark.enable_socket
def test_build_outline_item(caplog):
    url = "https://github.com/py-pdf/pypdf/files/9464742/shiv_resume.pdf"
    name = "shiv_resume.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    outline = reader._build_outline_item(
        DictionaryObject(
            {
                NameObject("/Title"): TextStringObject("Toto"),
                NameObject("/Dest"): NumberObject(2),
            }
        )
    )
    assert "Removed unexpected destination 2 from destination" in caplog.text
    assert outline["/Title"] == "Toto"
    reader.strict = True
    with pytest.raises(PdfReadError) as exc:
        reader._build_outline_item(
            DictionaryObject(
                {
                    NameObject("/Title"): TextStringObject("Toto"),
                    NameObject("/Dest"): NumberObject(2),
                }
            )
        )
    assert "Unexpected destination 2" in exc.value.args[0]


@pytest.mark.samples
@pytest.mark.parametrize(
    ("src", "page_labels"),
    [
        (RESOURCE_ROOT / "selenium-pypdf-issue-177.pdf", ["1"]),
        (RESOURCE_ROOT / "encrypted_doc_no_id.pdf", ["1", "2", "3"]),
        (RESOURCE_ROOT / "pdflatex-outline.pdf", ["1", "2", "3", "4"]),
        (
            SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf",
            ["i", "ii", "iii", "1", "2", "3"],
        ),
    ],
    ids=[
        "selenium-pypdf-issue-177.pdf",
        "encrypted_doc_no_id.pdf",
        "pdflatex-outline.pdf",
        "GeoTopo.pdf",
    ],
)
def test_page_labels(src, page_labels):
    max_indices = 6
    assert PdfReader(src).page_labels[:max_indices] == page_labels[:max_indices]


@pytest.mark.enable_socket
def test_iss1559():
    url = "https://github.com/py-pdf/pypdf/files/10441992/default.pdf"
    name = "iss1559.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for p in reader.pages:
        p.extract_text()


@pytest.mark.enable_socket
def test_iss1652():
    # test of an annotation(link) directly stored in the /Annots in the page
    url = "https://github.com/py-pdf/pypdf/files/10818844/tt.pdf"
    name = "invalidNamesDest.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.named_destinations


@pytest.mark.enable_socket
def test_iss1689():
    url = "https://github.com/py-pdf/pypdf/files/10948283/error_file_without_data.pdf"
    name = "iss1689.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0]


@pytest.mark.enable_socket
def test_iss1710():
    url = "https://github.com/py-pdf/pypdf/files/15234776/irbookonlinereading.pdf"
    name = "irbookonlinereading.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.outline


def test_broken_file_header():
    pdf_data = (
        b"%%PDF-\xa0sd\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << %s/Root 5 0 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    with_prev_0 = True
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj"),
        pdf_data.find(b"2 0 obj"),
        pdf_data.find(b"3 0 obj"),
        pdf_data.find(b"4 0 obj"),
        pdf_data.find(b"5 0 obj"),
        b"/Prev 0 " if with_prev_0 else b"",
        pdf_data.find(b"xref") - 1,
    )
    PdfReader(io.BytesIO(pdf_data))


@pytest.mark.enable_socket
def test_iss1756():
    url = "https://github.com/py-pdf/pypdf/files/11105591/641-Attachment-B-Pediatric-Cardiac-Arrest-8-1-2019.pdf"
    name = "iss1756.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.trailer["/ID"]
    # removed to cope with missing cryptodome during commit check : len(reader.pages)


@pytest.mark.enable_socket
@pytest.mark.timeout(30)
def test_iss1825():
    url = "https://github.com/py-pdf/pypdf/files/11367871/MiFO_LFO_FEIS_NOA_Published.3.pdf"
    name = "iss1825.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]
    page.extract_text()


@pytest.mark.enable_socket
def test_iss2082():
    url = "https://github.com/py-pdf/pypdf/files/12317939/test.pdf"
    name = "iss2082.pdf"
    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    reader.pages[0].extract_text()

    bb = bytearray(b)
    bb[b.find(b"xref") + 2] = ord(b"E")
    with pytest.raises(PdfReadError):
        reader = PdfReader(BytesIO(bb))


@pytest.mark.enable_socket
def test_issue_140():
    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
    name = "issue-140.pdf"
    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    assert len(reader.pages) == 54


@pytest.mark.enable_socket
def test_xyz_with_missing_param():
    """Cf #2236"""
    url = "https://github.com/py-pdf/pypdf/files/12795356/tt1.pdf"
    name = "issue2236.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.outline[0]["/Left"] == 820
    assert reader.outline[0]["/Top"] == 0
    assert reader.outline[1]["/Left"] == 0
    assert reader.outline[0]["/Top"] == 0


@pytest.mark.enable_socket
def test_corrupted_xref():
    url = "https://github.com/py-pdf/pypdf/files/14628314/iss2516.pdf"
    name = "iss2516.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.root_object["/Type"] == "/Catalog"


@pytest.mark.enable_socket
def test_truncated_xref(caplog):
    url = "https://github.com/py-pdf/pypdf/files/14843553/002-trivial-libre-office-writer-broken.pdf"
    name = "iss2575.pdf"
    PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert "Invalid/Truncated xref table. Rebuilding it." in caplog.text


@pytest.mark.enable_socket
def test_damaged_pdf():
    url = "https://github.com/py-pdf/pypdf/files/15186107/malformed_pdf.pdf"
    name = "malformed_pdf.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False)
    len(reader.pages)
    strict_reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=True)
    with pytest.raises(PdfReadError) as exc:
        len(strict_reader.pages)
    assert (
        exc.value.args[0] == "Expected object ID (21 0) does not match actual (-1 -1)."
    )


@pytest.mark.enable_socket
@pytest.mark.timeout(10)
def test_looping_form(caplog):
    """Cf iss 2643"""
    url = "https://github.com/py-pdf/pypdf/files/15306053/inheritance.pdf"
    name = "iss2643.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False)
    flds = reader.get_fields()
    assert all(
        x in flds
        for x in (
            "Text10",
            "Text10.0.0.1.1.1.1.1.1.1.1.1.1.1.1.1.1.1",
            "amt1.0",
            "amt1.1",
            "DSS#3pg3#0hgu7",
        )
    )
    writer = PdfWriter(reader)
    writer.root_object["/AcroForm"]["/Fields"][5]["/Kids"].append(
        writer.root_object["/AcroForm"]["/Fields"][5]["/Kids"][0]
    )
    flds2 = writer.get_fields()
    assert "Text68.0 already parsed" in caplog.text
    assert list(flds.keys()) == list(flds2.keys())


def test_context_manager_with_stream():
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [4 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Contents 3 0 R /CropBox [0.0 0.0 2550.0 3508.0]"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"5 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 5\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << /Root 5 0 R /Size 6 >>\n"
        b"startxref %d\n"
        b"%%%%EOF"
    )
    pdf_data = pdf_data % (
        pdf_data.find(b"1 0 obj"),
        pdf_data.find(b"2 0 obj"),
        pdf_data.find(b"3 0 obj"),
        pdf_data.find(b"4 0 obj"),
        pdf_data.find(b"5 0 obj"),
        pdf_data.find(b"xref") - 1,
    )
    pdf_stream = io.BytesIO(pdf_data)
    with PdfReader(pdf_stream) as reader:
        assert not reader.stream.closed
    assert not pdf_stream.closed


@pytest.mark.enable_socket
@pytest.mark.timeout(10)
def test_iss2761():
    url = "https://github.com/user-attachments/files/16312198/crash-b26d05712a29b241ac6f9dc7fff57428ba2d1a04.pdf"
    name = "iss2761.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)), strict=False)
    with pytest.raises(PdfReadError):
        reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_iss2817():
    """Test for rebuiling Xref_ObjStm"""
    url = "https://github.com/user-attachments/files/16764070/crash-7e1356f1179b4198337f282304cb611aea26a199.pdf"
    name = "iss2817.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert (
        reader.pages[0]["/Annots"][0].get_object()["/Contents"]
        == "A\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0\xa0 B"
    )


@pytest.mark.enable_socket
def test_truncated_files(caplog):
    """Cf #2853"""
    url = "https://github.com/user-attachments/files/16796095/f5471sm-2.pdf"
    name = "iss2780.pdf"  # reused
    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    assert caplog.text == ""
    # remove \n at end of file : invisible
    reader = PdfReader(BytesIO(b[:-1]))
    assert caplog.text == ""
    # truncate but still detectable
    for i in range(-2, -6, -1):
        caplog.clear()
        reader = PdfReader(BytesIO(b[:i]))
        assert "EOF marker seems truncated" in caplog.text
        assert reader._startxref == 100993
    # remove completely EOF : we will not read last section
    caplog.clear()
    reader = PdfReader(BytesIO(b[:-6]))
    assert "CAUTION: startxref found while searching for %%EOF" in caplog.text
    assert reader._startxref < 100993


@pytest.mark.enable_socket
def test_comments_in_array(caplog):
    """Cf #2843: this deals with comments"""
    url = "https://github.com/user-attachments/files/16992416/crash-2347912aa2a6f0fab5df4ebc8a424735d5d0d128.pdf"
    name = "iss2843.pdf"  # reused
    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    reader.pages[0]
    assert caplog.text == ""
    reader = PdfReader(BytesIO(b))
    reader.stream = BytesIO(b[:1149])
    with pytest.raises(PdfStreamError):
        reader.pages[0]


@pytest.mark.enable_socket
def test_space_in_names_to_continue_processing(caplog):
    """
    This deals with space not encoded in names inducing errors.
    Also covers case where NameObject not met for key.
    """
    url = "https://github.com/user-attachments/files/17095516/crash-e108c4f677040b61e12fa9f1cfde025d704c9b0d.pdf"
    name = "iss2866.pdf"  # reused
    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    obj = reader.get_object(70)
    assert all(
        x in obj
        for x in (
            "/BaseFont",
            "/DescendantFonts",
            "/Encoding",
            "/Subtype",
            "/ToUnicode",
            "/Type",
        )
    )
    assert obj["/BaseFont"] == "/AASGAA+Arial,Unicode"  # MS is missing to meet spec
    assert 'PdfReadError("Invalid Elementary Object starting with' in caplog.text

    caplog.clear()

    b = b[:264] + b"(Inv) /d " + b[273:]
    reader = PdfReader(BytesIO(b))
    obj = reader.get_object(70)
    assert all(
        x in obj
        for x in ["/DescendantFonts", "/Encoding", "/Subtype", "/ToUnicode", "/Type"]
    )
    assert all(
        x in caplog.text
        for x in (
            "Expecting a NameObject for key but",
            'PdfReadError("Invalid Elementary Object starting with',
        )
    )
    reader = PdfReader(BytesIO(b), strict=True)
    with pytest.raises(PdfReadError):
        obj = reader.get_object(70)


@pytest.mark.enable_socket
def test_unbalanced_brackets_in_dictionary_object(caplog):
    """Cf #2877"""
    url = "https://github.com/user-attachments/files/17162634/7f40cb209fb97d1782bffcefc5e7be40.pdf"
    name = "iss2877.pdf"  # reused
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert len(reader.pages) == 43  # note:  /Count = 46 but 3 kids are None


@pytest.mark.enable_socket
def test_repair_root(caplog):
    """Cf #2877"""
    url = "https://github.com/user-attachments/files/17162216/crash-6620e8b1abfe3da639b654595da859b87f985748.pdf"
    name = "iss2875.pdf"

    b = get_data_from_url(url, name=name)
    reader = PdfReader(BytesIO(b))
    assert len(reader.pages) == 1
    assert all(
        msg in caplog.text
        for msg in (
            "Invalid Root object",
            'Searching object with "/Catalog" key',
            "Root found at IndirectObject(2, 0,",
        )
    )

    # no /Root Entry
    reader = PdfReader(BytesIO(b.replace(b"/Root", b"/Roo ")))
    caplog.clear()
    assert len(reader.pages) == 1
    assert all(
        msg in caplog.text
        for msg in (
            'Cannot find "/Root" key in trailer',
            'Searching object with "/Catalog" key',
            "Root found at IndirectObject(2, 0,",
        )
    )

    # Invalid /Root Entry
    caplog.clear()
    reader = PdfReader(
        BytesIO(
            b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
        )
    )
    with pytest.raises(PdfReadError):
        len(reader.pages)
    assert all(
        msg in caplog.text
        for msg in (
            "Invalid Root object in trailer",
            'Searching object with "/Catalog" key',
        )
    )

    # Invalid /Root Entry + error in get_object
    caplog.clear()
    data = b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog/Pages 3 0 R", b"/Catalo ")
    data = data[:5124] + b"A" + data[5125:]
    reader = PdfReader(BytesIO(data))
    with pytest.raises(PdfReadError):
        len(reader.pages)
    assert all(
        msg in caplog.text
        for msg in (
            "Invalid Root object in trailer",
            'Searching object with "/Catalog" key',
        )
    )

    # Invalid /Root Entry without /Type, but /Pages.
    caplog.clear()
    reader = PdfReader(
        BytesIO(
            b.replace(b"/Root 1 0 R", b"/Root 2 0 R").replace(b"/Catalog", b"/Catalo ")
        )
    )
    assert len(reader.pages) == 1
    assert all(
        msg in caplog.text
        for msg in (
            "Invalid Root object in trailer",
            'Searching object with "/Catalog" key',
            f"Possible root found at IndirectObject(2, 0, {id(reader)}), but missing /Catalog key"
        )
    )


@pytest.mark.enable_socket
def test_issue3151(caplog):
    """Tests for #3151"""
    url = "https://github.com/user-attachments/files/18941494/bible.pdf"
    name = "issue3151.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert len(reader.pages) == 742


@pytest.mark.enable_socket
def test_issue2886(caplog):
    """Tests for #2886"""
    url = "https://github.com/user-attachments/files/17187711/crash-e8a85d82de01cab5eb44e7993304d8b9d1544970.pdf"
    name = "issue2886.pdf"

    with pytest.raises(PdfReadError, match=r"Unexpected empty line in Xref table\."):
        _ = PdfReader(BytesIO(get_data_from_url(url, name=name)))


@pytest.mark.enable_socket
def test_infinite_loop_for_length_value():
    """Tests for #3112"""
    url = "https://github.com/user-attachments/files/19106009/Special.n.15.du.jeudi.22.fevrier.2024.pdf"
    name = "issue3112.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    with pytest.raises(PdfReadError, match=r"^Detected loop with self reference for IndirectObject\(165, 0, \d+\)\.$"):
        writer.add_page(reader.pages[0])


def test_trailer_cannot_be_read():
    path = RESOURCE_ROOT / "crazyones.pdf"
    data = path.read_bytes().replace(b"/Type/XRef", b"/Type/Invalid")
    with pytest.raises(PdfReadError, match=r"^Trailer cannot be read: Unexpected type '/Invalid'$"):
        reader = PdfReader(BytesIO(data))
        list(reader.pages)


@pytest.mark.enable_socket
def test_read_pdf15_xref_stream():
    data = get_data_from_url(name="issue-3429.pdf")

    with pytest.raises(PdfReadError, match=r"^Trailer cannot be read: Size missing from XRef stream {"):
        PdfReader(BytesIO(data))

    data_modified = data.replace(b"/XRef/", b"/XRef/Size/2/")
    with pytest.raises(
            PdfReadError,
            match=r"^Trailer cannot be read: Limit reached while decompressing\. 1545392 bytes remaining\.$"
    ):
        PdfReader(BytesIO(data_modified))


@pytest.mark.enable_socket
def test_read_standard_xref_table__two_whitespace_characters_between_offset_and_generation():
    """Tests for #3482"""
    url = "https://github.com/user-attachments/files/22591813/helloworld.pdf"
    name = "issue3482.pdf"

    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert len(reader.pages) == 1
    assert reader.pages[0].extract_text() == "Hello World!"


@pytest.mark.enable_socket
def test_root_object_recovery_limit(caplog):
    url = "https://github.com/user-attachments/files/24525509/root_object_recovery_limit.pdf"
    name = "root_object_recovery_limit.pdf"
    data = get_data_from_url(url, name=name)

    # Default limit.
    reader = PdfReader(BytesIO(data))
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum Root object recovery limit reached\.$"
    ):
        _ = list(reader.pages)
    message_numbers = {
        int(message.split(" ", maxsplit=2)[1])
        for message in caplog.messages
        if message.startswith("Object ") and message.endswith(" 0 not defined.")
    }
    assert sorted(message_numbers) == list(range(5, 10001))

    # Custom limit.
    caplog.clear()
    reader = PdfReader(BytesIO(data), root_object_recovery_limit=42)
    with pytest.raises(
            expected_exception=LimitReachedError, match=r"^Maximum Root object recovery limit reached\.$"
    ):
        _ = list(reader.pages)
    message_numbers = {
        int(message.split(" ", maxsplit=2)[1])
        for message in caplog.messages
        if message.startswith("Object ") and message.endswith(" 0 not defined.")
    }
    assert sorted(message_numbers) == list(range(5, 43))

    # No limit. Do not run actual process for speed reasons.
    reader = PdfReader(BytesIO(data), root_object_recovery_limit=None)
    assert reader._root_object_recovery_limit == sys.maxsize

    # Strict mode.
    with pytest.raises(expected_exception=PdfReadError, match=r"^Broken xref table$"):
        reader = PdfReader(BytesIO(data), strict=True)
        _ = list(reader.pages)


@pytest.mark.timeout(10)
def test_rebuild_xref_table__speed():
    total_len = 2_000_790
    middle = b"\nstartxref   1\n % "
    leading_len = 0x55E  # 1374
    leading = b" " * leading_len
    trailing = b" " * (total_len - leading_len - len(middle))
    data = leading + middle + trailing

    reader = PdfReader(BytesIO(data))
    with pytest.raises(expected_exception=PdfReadError, match=r"^Cannot find Root object in pdf$"):
        _ = list(reader.pages)


def test_find_pdf_objects():
    data = (
        b"     \n"
        b"  11 0 obj\n"
        b"  12 0 obj\n"
        b"13  1  obj\n"
        b"ob\n"
        b"ab obj\n"
        b"42 1337 obj \n"
        b"\n"
    )

    result = list(PdfReader._find_pdf_objects(data))
    assert result == [(11, 0, 8), (12, 0, 19), (13, 1, 28), (42, 1337, 49)]


@pytest.mark.parametrize(
    ("data", "expected"),
    [
        (b"\n\ntrailer", []),
        (b"\n\ntrailer abc", []),
        (b"\n\ntrailer <<", [10]),
        (b"\n\ntrailer << /Key null >>\n\n  trailer << /Key 42 >>\n", [10, 37])
    ]
)
def test_find_pdf_trailers(data: bytes, expected: list[int]):
    result = list(PdfReader._find_pdf_trailers(data))
    assert result == expected


def test_objstm_batch_parse_caches_all_objects():
    """Resolving one ObjStm object should batch-cache all siblings."""
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    assert len(reader.xref_objStm) > 0

    obj_ids = list(reader.xref_objStm.keys())
    first_obj = reader.get_object(obj_ids[0])
    assert first_obj is not None

    for idnum in obj_ids[1:]:
        cached = reader.cache_get_indirect_object(0, idnum)
        assert cached is not None, f"Object {idnum} was not batch-cached"


def test_objstm_cache_hit_returns_target():
    """Second call to _get_object_from_stream should return cached objects."""
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    obj_ids = list(reader.xref_objStm.keys())

    # Trigger batch parse
    reader.get_object(obj_ids[0])

    # Call again — all objects are already cached
    second_id = obj_ids[1]
    ref = IndirectObject(second_id, 0, reader)
    result = reader._get_object_from_stream(ref)
    assert result is reader.cache_get_indirect_object(0, second_id)


def test_objstm_skips_cache_for_overridden_objects():
    """Objects removed from xref_objStm should not be cached during batch parse."""
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    obj_ids = list(reader.xref_objStm.keys())
    assert len(obj_ids) >= 2

    # Simulate an incremental update overriding one object
    removed_id = obj_ids[-1]
    saved_entry = reader.xref_objStm.pop(removed_id)
    reader.resolved_objects.clear()

    result = reader.get_object(obj_ids[0])
    assert result is not None
    assert reader.cache_get_indirect_object(0, removed_id) is None
    assert reader.cache_get_indirect_object(0, obj_ids[0]) is not None

    reader.xref_objStm[removed_id] = saved_entry


================================================
FILE: tests/test_text_extraction.py
================================================
"""
Testing the text-extraction submodule and ensuring the quality of text extraction.

The tested code might be in _page.py.
"""

import re
from dataclasses import asdict
from io import BytesIO
from unittest.mock import patch

import pytest

from pypdf import PdfReader, PdfWriter, mult
from pypdf._font import Font
from pypdf._text_extraction import set_custom_rtl
from pypdf._text_extraction._layout_mode._fixed_width_page import text_show_operations
from pypdf.errors import PdfReadError
from pypdf.generic import ContentStream

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url


@pytest.mark.samples
@pytest.mark.parametrize(("visitor_text"), [None, lambda a, b, c, d, e: None])  # noqa: ARG005
def test_multi_language(visitor_text):
    reader = PdfReader(RESOURCE_ROOT / "multilang.pdf")
    txt = reader.pages[0].extract_text(visitor_text=visitor_text)
    assert "Hello World" in txt, "English not correctly extracted"
    # iss #1296
    assert "مرحبا بالعالم" in txt, "Arabic not correctly extracted"
    assert "Привет, мир" in txt, "Russian not correctly extracted"
    assert "你好世界" in txt, "Chinese not correctly extracted"
    assert "สวัสดีชาวโลก" in txt, "Thai not correctly extracted"
    assert "こんにちは世界" in txt, "Japanese not correctly extracted"
    # check customizations
    set_custom_rtl(None, None, "Russian:")
    assert ":naissuR" in reader.pages[0].extract_text(
        visitor_text=visitor_text
    ), "(1) CUSTOM_RTL_SPECIAL_CHARS failed"
    set_custom_rtl(None, None, [ord(x) for x in "Russian:"])
    assert ":naissuR" in reader.pages[0].extract_text(
        visitor_text=visitor_text
    ), "(2) CUSTOM_RTL_SPECIAL_CHARS failed"
    set_custom_rtl(0, 255, None)
    assert ":hsilgnE" in reader.pages[0].extract_text(
        visitor_text=visitor_text
    ), "CUSTOM_RTL_MIN/MAX failed"
    set_custom_rtl("A", "z", [])
    assert ":hsilgnE" in reader.pages[0].extract_text(
        visitor_text=visitor_text
    ), "CUSTOM_RTL_MIN/MAX failed"
    set_custom_rtl(-1, -1, [])  # to prevent further errors

    reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi-rotated.pdf")
    assert "habibi" in reader.pages[0].extract_text(visitor_text=visitor_text)
    assert "حَبيبي" in reader.pages[0].extract_text(visitor_text=visitor_text)
    assert "habibi" in reader.pages[1].extract_text(visitor_text=visitor_text)
    assert "حَبيبي" in reader.pages[1].extract_text(visitor_text=visitor_text)
    assert "habibi" in reader.pages[2].extract_text(visitor_text=visitor_text)
    assert "حَبيبي" in reader.pages[2].extract_text(visitor_text=visitor_text)
    assert "habibi" in reader.pages[3].extract_text(visitor_text=visitor_text)
    assert "حَبيبي" in reader.pages[3].extract_text(visitor_text=visitor_text)


@pytest.mark.parametrize(
    ("file_name", "constraints"),
    [
        (
            "inkscape-abc.pdf",
            {
                "A": lambda x, y: 0 < x < 94 and 189 < y < 283,  # In upper left
                "B": lambda x, y: 94 < x < 189 and 94 < y < 189,  # In the center
                "C": lambda x, y: 189 < x < 283 and 0 < y < 94,  # In lower right
            },
        )
    ],
)
def test_visitor_text_matrices(file_name, constraints):
    """
    Checks if the matrices given to the visitor_text function when calling
    `extract_text` on the first page of `file_name` match some given constraints.
    `constraints` is a dictionary mapping a line of text to a constraint that should
    evaluate to `True` on its expected x,y-coordinates.
    """
    reader = PdfReader(RESOURCE_ROOT / file_name)

    lines = []

    def visitor_text(text, cm, tm, font_dict, font_size) -> None:
        ctm = mult(tm, cm)
        x = ctm[4]  # mult(tm, cm)[4]
        y = ctm[5]  # mult(tm, cm)[5]
        lines.append({"text": text, "x": x, "y": y})

    reader.pages[0].extract_text(visitor_text=visitor_text)

    for text, constraint in constraints.items():
        matches = [li for li in lines if li["text"].strip() == text]
        assert len(matches) <= 1, f"Multiple lines match {text}"
        assert len(matches) >= 1, f"No lines match {text}"

        x = matches[0]["x"]
        y = matches[0]["y"]
        assert constraint(x, y), f'Line "{text}" is wrong at x:{x}, y:{y}'


@pytest.mark.xfail(reason="known whitespace issue #2336")
@pytest.mark.enable_socket
def test_issue_2336():
    name = "Pesquisa-de-Precos-Combustiveis-novembro-2023.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(name=name)))
    page = reader.pages[0]
    actual_text = page.extract_text()
    assert "Beira Rio" in actual_text


def test_font_class_to_dict():
    font = Font(
        name = "Unknown",
        space_width=8,
        character_map={},
        encoding = "utf-16-be"
    )
    assert asdict(font) == {
        "name": "Unknown",
        "character_map": {},
        "encoding": "utf-16-be",
        "sub_type": "Unknown",
        "font_descriptor": {
            "name": "Unknown",
            "family": "Unknown",
            "weight": "Unknown",
            "ascent": 700.0,
            "descent": -200.0,
            "cap_height": 600.0,
            "x_height": 500.0,
            "italic_angle": 0.0,
            "flags": 32,
            "bbox": (
                -100.0,
                -200.0,
                1000.0,
                900.0,
            ),
        },
        "character_widths": {"default": 500},
        "space_width": 8,
        "interpretable": True,
    }


@pytest.mark.enable_socket
@patch("pypdf._text_extraction._layout_mode._fixed_width_page.logger_warning")
def test_uninterpretable_type3_font(mock_logger_warning):
    url = "https://github.com/user-attachments/files/18551904/UninterpretableType3Font.pdf"
    name = "UninterpretableType3Font.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]
    assert page.extract_text(extraction_mode="layout") == ""
    mock_logger_warning.assert_called_with(
        "PDF contains an uninterpretable font. Output will be incomplete.",
        "pypdf._text_extraction._layout_mode._fixed_width_page"
    )


@pytest.mark.enable_socket
def test_layout_mode_epic_page_fonts():
    url = "https://github.com/py-pdf/pypdf/files/13836944/Epic.Page.PDF"
    name = "Epic Page.PDF"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    expected = (RESOURCE_ROOT / "Epic.Page.layout.txt").read_text(encoding="utf-8")
    assert expected == reader.pages[0].extract_text(extraction_mode="layout")


def test_layout_mode_uncommon_operators():
    # Coverage for layout mode Tc, Tz, Ts, ', ", TD, TL, and Tw
    reader = PdfReader(RESOURCE_ROOT / "toy.pdf")
    expected = (RESOURCE_ROOT / "toy.layout.txt").read_text(encoding="utf-8")
    assert expected == reader.pages[0].extract_text(extraction_mode="layout")


@pytest.mark.enable_socket
def test_layout_mode_type0_font_widths():
    # Cover both the 'int int int' and 'int [int int ...]' formats for Type0
    # /DescendantFonts /W array entries.
    url = "https://github.com/py-pdf/pypdf/files/13533204/Claim.Maker.Alerts.Guide_pg2.PDF"
    name = "Claim Maker Alerts Guide_pg2.PDF"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    expected = (RESOURCE_ROOT / "Claim Maker Alerts Guide_pg2.layout.txt").read_text(
        encoding="utf-8"
    )
    assert expected == reader.pages[0].extract_text(extraction_mode="layout")


@pytest.mark.enable_socket
def test_layout_mode_indirect_sequence_font_widths(caplog):
    # Cover the situation where the sequence for font widths is an IndirectObject
    # https://github.com/py-pdf/pypdf/pull/2788
    url = "https://github.com/user-attachments/files/16491621/2788_example.pdf"
    name = "2788_example.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert reader.pages[0].extract_text(extraction_mode="layout") == ""
    url = "https://github.com/user-attachments/files/16491619/2788_example_malformed.pdf"
    name = "2788_example_malformed.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader.pages[0].extract_text(extraction_mode="layout")
    assert "Invalid font width definition" in caplog.text


def dummy_visitor_text(text, ctm, tm, fd, fs):
    pass


@patch("pypdf._page.logger_warning")
def test_layout_mode_warnings(mock_logger_warning):
    # Check that a warning is issued when an argument is ignored
    reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
    page = reader.pages[0]
    page.extract_text(extraction_mode="plain", visitor_text=dummy_visitor_text)
    mock_logger_warning.assert_not_called()
    page.extract_text(extraction_mode="layout", visitor_text=dummy_visitor_text)
    mock_logger_warning.assert_called_with(
        "Argument visitor_text is ignored in layout mode", "pypdf._page"
    )


@pytest.mark.enable_socket
def test_space_with_one_unit_smaller_than_font_width():
    """Tests for #1328"""
    url = "https://github.com/py-pdf/pypdf/files/9498481/0004.pdf"
    name = "iss1328.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[0]
    extracted = page.extract_text()
    assert "Reporting crude oil leak.\n" in extracted


@pytest.mark.enable_socket
def test_space_position_calculation():
    """Tests for #1153"""
    url = "https://github.com/py-pdf/pypdf/files/9164743/file-0.pdf"
    name = "iss1153.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    page = reader.pages[3]
    extracted = page.extract_text()
    assert "Shortly after the Geneva BOF session, the" in extracted


def test_text_leading_height_unit():
    """Tests for #2262"""
    reader = PdfReader(RESOURCE_ROOT / "toy.pdf")
    page = reader.pages[0]
    extracted = page.extract_text()
    assert "Something[cited]\n" in extracted


def test_layout_mode_space_vertically_font_height_weight():
    """Tests layout mode with vertical space and font height weight (issue #2915)"""
    with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile:
        # Load PDF file from file
        reader = PdfReader(inputfile)
        page = reader.pages[0]

        # Normal behaviour
        with open(RESOURCE_ROOT / "crazyones_layout_vertical_space.txt", "rb") as pdftext_file:
            pdftext = pdftext_file.read()

        text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=True).encode("utf-8")

        # Compare the text of the PDF to a known source
        for expected_line, actual_line in zip(text.splitlines(), pdftext.splitlines()):
            assert expected_line == actual_line

        pdftext = pdftext.replace(b"\r\n", b"\n")  # fix for windows
        assert text == pdftext

        # Blank lines are added to truly separate paragraphs
        with open(RESOURCE_ROOT / "crazyones_layout_vertical_space_font_height_weight.txt", "rb") as pdftext_file:
            pdftext = pdftext_file.read()

        text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=True,
                                 layout_mode_font_height_weight=0.85).encode("utf-8")

        # Compare the text of the PDF to a known source
        for expected_line, actual_line in zip(text.splitlines(), pdftext.splitlines()):
            assert expected_line == actual_line

        pdftext = pdftext.replace(b"\r\n", b"\n")  # fix for windows
        assert text == pdftext


@pytest.mark.enable_socket
def test_infinite_loop_arrays():
    """Tests for #2928"""
    url = "https://github.com/user-attachments/files/17576546/arrayabruptending.pdf"
    name = "arrayabruptending.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    page = reader.pages[0]
    extracted = page.extract_text()
    assert "RNA structure comparison" in extracted


@pytest.mark.enable_socket
def test_content_stream_is_dictionary_object(caplog):
    """Tests for #2995"""
    url = "https://github.com/user-attachments/files/18049322/6fa5fd46-5f98-4a67-800d-5e2362b0164f.pdf"
    name = "iss2995.pdf"
    data = get_data_from_url(url, name=name)

    reader = PdfReader(BytesIO(data))
    page = reader.pages[0]
    assert "\nYours faithfully   \n" in page.extract_text()
    assert "Expected StreamObject, got DictionaryObject instead. Data might be wrong." in caplog.text
    caplog.clear()

    reader = PdfReader(BytesIO(data), strict=True)
    page = reader.pages[0]
    with pytest.raises(PdfReadError) as exception:
        page.extract_text()
    assert (
        "Invalid Elementary Object starting with b\\'\\\\x18\\' @3557: b\\'ateDecode/Length 629\\\\x18ck["
        in exception.value.args[0]
    )


@pytest.mark.enable_socket
def test_tz_with_no_operands():
    """Tests for #2975"""
    url = "https://github.com/user-attachments/files/17974120/9E5E080E-C8DB-4A6B-822B-9A67DC04E526-120438.pdf"
    name = "iss2975.pdf"
    data = get_data_from_url(url, name=name)

    reader = PdfReader(BytesIO(data))
    page = reader.pages[1]
    assert "\nThankyouforyourattentiontothismatter.\n" in page.extract_text()


@pytest.mark.enable_socket
def test_iss3060():
    """Test for not throwing 'font not set: is PDF missing a Tf operator'"""
    url = "https://github.com/user-attachments/files/18482531/test-anon.pdf"
    name = "iss3060.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # pypdf.errors.PdfReadError: font not set: is PDF missing a Tf operator?
    txt = reader.pages[0].extract_text(extraction_mode="layout")
    assert txt.startswith(" *******")


@pytest.mark.enable_socket
def test_iss3074():
    """Test for not throwing 'ZeroDivisionError: float division by zero'"""
    url = "https://github.com/user-attachments/files/18533211/test-anon.pdf"
    name = "iss3074.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # pypdf.errors.PdfReadError: ZeroDivisionError: float division by zero
    txt = reader.pages[0].extract_text(extraction_mode="layout")
    assert txt.strip().startswith("AAAAAA")


@pytest.mark.enable_socket
def test_layout_mode_text_state():
    """Ensure the text state is stored and reset with q/Q operators."""
    # Get the PDF from issue #3212
    url = "https://github.com/user-attachments/files/19396790/garbled.pdf"
    name = "garbled-font.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # Get the txt from issue #3212 and normalize line endings
    txt_url = "https://github.com/user-attachments/files/19510731/garbled-font.layout.txt"
    txt_name = "garbled-font.layout.txt"
    expected = get_data_from_url(txt_url, name=txt_name).decode("utf-8").replace("\r\n", "\n")
    # Ignore differences in rendering of spaces to work around older differences between the
    # old layout mode Font code and the new Font class in calculating and dealing with the
    # fallback width for a character that has no width defined in character_widths.
    assert expected.replace(" ", "") == reader.pages[0].extract_text(extraction_mode="layout").replace(" ", "")


@pytest.mark.enable_socket
def test_rotated_line_wrap():
    """Ensure correct 2D translation of rotated text after a line wrap."""
    # Get the PDF from issue #3247
    url = "https://github.com/user-attachments/files/19696918/link16-line-wrap.sanitized.pdf"
    name = "link16-line-wrap.sanitized.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    # Get the txt from issue #3247 and normalize line endings
    txt_url = "https://github.com/user-attachments/files/19696917/link16-line-wrap.sanitized.expected.txt"
    txt_name = "link16-line-wrap.sanitized.expected.txt"
    expected = get_data_from_url(txt_url, name=txt_name).decode("utf-8").replace("\r\n", "\n")

    assert expected == reader.pages[0].extract_text()


@pytest.mark.parametrize(
        ("op", "msg"),
        [
            (b"BT", "Unbalanced target operations, expected b'ET'."),
            (b"q", "Unbalanced target operations, expected b'Q'."),
        ],
)
def test_layout_mode_warns_on_malformed_content_stream(op, msg, caplog):
    """Ensures that imbalanced q/Q or EB/ET is handled gracefully."""
    text_show_operations(ops=iter([([], op)]), fonts={})
    assert caplog.records
    assert caplog.records[-1].msg == msg


def test_process_operation__cm_multiplication_issue():
    """Test for #3262."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    page = writer.pages[0]
    content = page.get_contents().get_data()
    content = content.replace(b" 1 0 0 1 72 720 cm ", b" 0.70278 65.3 163.36 cm ")
    stream = ContentStream(stream=None, pdf=writer)
    stream.set_data(content)
    page.replace_contents(stream)
    assert page.extract_text().startswith("The Crazy Ones\nOctober 14, 1998\n")


@pytest.mark.enable_socket
def test_rotated_layout_mode(caplog):
    """Ensures text extraction of rotated pages, as in issue #3270."""
    url = "https://github.com/user-attachments/files/19981120/rotated-page.pdf"
    name = "rotated-page.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    page = writer.pages[0]

    page.transfer_rotation_to_content()
    text = page.extract_text(extraction_mode="layout")

    assert not caplog.records, "No warnings should be issued"
    assert text, "Text matching the page rotation should be extracted"
    assert re.search(r"\r?\n +69\r?\n +UNCLASSIFIED$", text), "Contents should be in expected layout"


@pytest.mark.enable_socket
@pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning")
def test_extract_text__none_objects():
    url = "https://github.com/user-attachments/files/18381726/tika-957721.pdf"
    name = "tika-957721.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    reader.pages[0].extract_text()
    reader.pages[8].extract_text()


@pytest.mark.enable_socket
def test_extract_text__with_visitor_text():
    def visitor_text(*args, **kwargs):  # noqa: ANN002, ANN003, ANN202
        pass

    url = "https://github.com/user-attachments/files/18381718/tika-952016.pdf"
    name = "tika-952016.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(stream)
    page = reader.pages[0]
    page.extract_text(visitor_text=visitor_text)

    reader = PdfReader(BytesIO(get_data_from_url(name="TextAttack_paper.pdf")))
    page = reader.pages[0]
    page.extract_text(visitor_text=visitor_text)


@pytest.mark.enable_socket
def test_extract_text__restore_cm_stack_pop_error():
    url = "https://github.com/user-attachments/files/18381737/tika-966635.pdf"
    name = "tika-966635.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(stream)
    page = reader.pages[10]

    # There is a previous error we already omit ("pop from empty list"), thus
    # check for the message explicitly here.
    with pytest.raises(IndexError, match="list index out of range"):
        page.extract_text()


@pytest.mark.timeout(60)
@pytest.mark.enable_socket
def test_slow_huge_string():
    """Tests for #3541"""
    url = "https://github.com/user-attachments/files/23855795/file.pdf"
    name = "issue-3541.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(stream)
    page = reader.pages[0]

    _ = page.extract_text(extraction_mode="layout")


@pytest.mark.enable_socket
def test_extract_text_with_missing_font_bbox():
    url = "https://github.com/user-attachments/files/24611650/bbox_bug_emoji.pdf"
    name = "issue-3599.pdf"
    stream = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(stream)
    page = reader.pages[0]
    text = page.extract_text()
    assert "🎉" in text


================================================
FILE: tests/test_utils.py
================================================
"""Test the pypdf._utils module."""
import functools
import io
import re
import subprocess
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Callable

import pytest

import pypdf._utils
from pypdf._utils import (
    File,
    Version,
    _get_max_pdf_version_header,
    _human_readable_bytes,
    check_if_whitespace_only,
    classproperty,
    deprecate_with_replacement,
    deprecation_no_replacement,
    format_iso8824_date,
    logger_error,
    mark_location,
    matrix_multiply,
    parse_iso8824_date,
    read_block_backwards,
    read_previous_line,
    read_until_regex,
    read_until_whitespace,
    rename_kwargs,
    skip_over_comment,
    skip_over_whitespace,
)
from pypdf.errors import DeprecationError, PdfReadError, PdfStreamError
from pypdf.generic import DictionaryObject, NameObject, TextStringObject

from . import is_sublist


@pytest.mark.parametrize(
    ("stream", "expected"),
    [
        (io.BytesIO(b"foo"), False),
        (io.BytesIO(b""), False),
        (io.BytesIO(b" "), True),
        (io.BytesIO(b"  "), True),
        (io.BytesIO(b"  \n"), True),
        (io.BytesIO(b"    \n"), True),
        (io.BytesIO(b"\f"), True),
    ],
)
def test_skip_over_whitespace(stream, expected):
    assert skip_over_whitespace(stream) == expected


@pytest.mark.parametrize(
    ("value", "expected"),
    [
        (b"foo", False),
        (b" a", False),
        (b" a\n b", False),
        (b"", True),
        (b" ", True),
        (b"  ", True),
        (b"  \n", True),
        (b"    \n", True),
        (b"\f", True),
    ],
)
def test_check_if_whitespace_only(value, expected):
    assert check_if_whitespace_only(value) is expected


def test_read_until_whitespace():
    assert read_until_whitespace(io.BytesIO(b"foo"), maxchars=1) == b"f"


@pytest.mark.parametrize(
    ("stream", "remainder"),
    [
        (io.BytesIO(b"% foobar\n"), b""),
        (io.BytesIO(b""), b""),
        (io.BytesIO(b" "), b" "),
        (io.BytesIO(b"% foo%\nbar"), b"bar"),
    ],
)
def test_skip_over_comment(stream, remainder):
    skip_over_comment(stream)
    assert stream.read() == remainder


def test_read_until_regex_premature_ending_name():
    stream = io.BytesIO(b"")
    assert read_until_regex(stream, re.compile(b".")) == b""


def test_read_until_regex_match_in_first_chunk():
    """Match within the first 16-byte chunk."""
    stream = io.BytesIO(b"hello world")
    result = read_until_regex(stream, re.compile(b" "))
    assert result == b"hello"
    assert stream.tell() == 5


def test_read_until_regex_match_in_second_chunk():
    """Match lands in the second chunk (past first 16 bytes)."""
    payload = b"0123456789abcdefghij"
    assert len(payload) == 20
    data = payload + b" rest"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(b" "))
    assert result == payload
    assert stream.tell() == 20


def test_read_until_regex_match_at_chunk_boundary():
    """Delimiter sits exactly at byte 16 (first byte of second chunk)."""
    payload = b"0123456789abcdef"
    assert len(payload) == 16
    data = payload + b" after"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(b" "))
    assert result == payload
    assert stream.tell() == 16


def test_read_until_regex_multi_byte_spanning_boundary():
    """Multi-byte regex pattern spans across a chunk boundary."""
    # "X" at byte 15 (last byte of first chunk), "Y" at byte 16 (first of second)
    payload = b"0123456789abcde"
    assert len(payload) == 15
    data = payload + b"XYafter"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(b"XY"))
    assert result == payload
    assert stream.tell() == 15


def test_read_until_regex_no_match_exhausted():
    """No match - stream is fully consumed and all data returned."""
    data = b"0123456789" * 10
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(b"ZZZ"))
    assert result == data


def test_read_until_regex_exponential_chunk_growth():
    """Verify correctness with long input that exercises chunk doubling."""
    payload = (b"0123456789abcdef" * 3125)[:50_000]
    assert len(payload) == 50_000
    data = payload + b"|done"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(rb"\|"))
    assert result == payload
    assert stream.tell() == 50_000


def test_read_until_regex_match_spanning_later_boundary():
    """Multi-byte match spanning a boundary after chunk size has grown."""
    # Chunk 1: 16 bytes, chunk 2: 32 bytes → total 48 after two reads.
    # Place "END" at offset 47 so it spans bytes 47-49.
    payload = (b"0123456789abcdef" * 3)[:47]
    assert len(payload) == 47
    data = payload + b"ENDrest"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(b"END"))
    assert result == payload
    assert stream.tell() == 47


def test_read_until_regex_tail_overlap_is_fixed():
    """Tail overlap is 16 bytes regardless of chunk size growth.

    Chunk reads: 16, 32, 64 -> total 112. Place a 16-byte pattern starting
    one byte before the 64-byte chunk boundary (at offset 47) so it spans
    into the third chunk. This only works if the tail kept from chunk 2
    covers at least 16 bytes.
    """
    pattern = b"ABCDEFGHIJKLMNOP"  # 16 bytes
    assert len(pattern) == 16
    # Chunk 1: 16 bytes, chunk 2: 32 bytes -> boundary at offset 48.
    # Pattern starts at 47, spanning bytes 47-62.
    payload = b"x" * 47
    data = payload + pattern + b"rest"
    stream = io.BytesIO(data)
    result = read_until_regex(stream, re.compile(re.escape(pattern)))
    assert result == payload
    assert stream.tell() == 47


@pytest.mark.parametrize(
    ("a", "b", "expected"),
    [
        (((3,),), ((7,),), ((21,),)),
        (((3, 7),), ((5,), (13,)), ((3 * 5.0 + 7 * 13,),)),
        (((3,), (7,)), ((5, 13),), ((3 * 5, 3 * 13), (7 * 5, 7 * 13))),
    ],
)
def test_matrix_multiply(a, b, expected):
    assert matrix_multiply(a, b) == expected


def test_mark_location():
    stream = io.BytesIO(b"abde" * 6000)
    mark_location(stream)
    Path("pypdf_pdfLocation.txt").unlink()  # cleanup


def test_deprecate_no_replacement():
    with pytest.warns(
            expected_warning=DeprecationWarning,
            match="foo is deprecated and will be removed in pypdf 3.0.0."
    ):
        pypdf._utils.deprecate_no_replacement("foo", removed_in="3.0.0")


@pytest.mark.parametrize(
    ("dat", "pos", "to_read", "expected", "expected_pos"),
    [
        (b"abc", 1, 0, b"", 1),
        (b"abc", 1, 1, b"a", 0),
        (b"abc", 2, 1, b"b", 1),
        (b"abc", 2, 2, b"ab", 0),
        (b"abc", 3, 1, b"c", 2),
        (b"abc", 3, 2, b"bc", 1),
        (b"abc", 3, 3, b"abc", 0),
        (b"", 0, 1, None, 0),
        (b"a", 0, 1, None, 0),
        (b"abc", 0, 10, None, 0),
    ],
)
def test_read_block_backwards(dat, pos, to_read, expected, expected_pos):
    s = io.BytesIO(dat)
    s.seek(pos)
    if expected is not None:
        assert read_block_backwards(s, to_read) == expected
    else:
        with pytest.raises(PdfStreamError):
            read_block_backwards(s, to_read)
    assert s.tell() == expected_pos


def test_read_block_backwards_at_start():
    s = io.BytesIO(b"abc")
    with pytest.raises(PdfStreamError) as _:
        read_previous_line(s)


@pytest.mark.parametrize(
    ("dat", "pos", "expected", "expected_pos"),
    [
        (b"abc", 1, b"a", 0),
        (b"abc", 2, b"ab", 0),
        (b"abc", 3, b"abc", 0),
        (b"abc\n", 3, b"abc", 0),
        (b"abc\n", 4, b"", 3),
        (b"abc\n\r", 4, b"", 3),
        (b"abc\nd", 5, b"d", 3),
        # Skip over multiple CR/LF bytes
        (b"abc\n\r\ndef", 9, b"def", 3),
    ],
    ids=list(range(8)),
)
def test_read_previous_line(dat, pos, expected, expected_pos):
    s = io.BytesIO(dat)
    s.seek(pos)
    assert read_previous_line(s) == expected
    assert s.tell() == expected_pos


# for unknown reason if the parameters are passed through pytest, errors are reported
def test_read_previous_line2():
    # Include a block full of newlines...
    test_read_previous_line(
        b"abc" + b"\n" * (2 * io.DEFAULT_BUFFER_SIZE) + b"d",
        2 * io.DEFAULT_BUFFER_SIZE + 4,
        b"d",
        3,
    )
    # Include a block full of non-newline characters
    test_read_previous_line(
        b"abc\n" + b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
        2 * io.DEFAULT_BUFFER_SIZE + 4,
        b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
        3,
    )
    # Both
    test_read_previous_line(
        b"abcxyz"
        + b"\n" * (2 * io.DEFAULT_BUFFER_SIZE)
        + b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
        4 * io.DEFAULT_BUFFER_SIZE + 6,
        b"d" * (2 * io.DEFAULT_BUFFER_SIZE),
        6,
    )


def test_get_max_pdf_version_header():
    with pytest.raises(ValueError) as exc:
        _get_max_pdf_version_header(b"", b"PDF-1.2")
    assert exc.value.args[0] == "Neither b'' nor b'PDF-1.2' are proper headers"


def test_read_block_backwards_exception():
    stream = io.BytesIO(b"foobar")
    stream.seek(6)
    with pytest.raises(PdfReadError) as exc:
        read_block_backwards(stream, 7)
    assert exc.value.args[0] == "Could not read malformed PDF file"


def test_deprecate_with_replacement():
    def foo() -> None:
        deprecate_with_replacement("foo", "bar", removed_in="4.3.2")

    with pytest.warns(
        DeprecationWarning,
        match="foo is deprecated and will be removed in pypdf 4.3.2. Use bar instead.",
    ):
        foo()


def test_deprecation_no_replacement():
    def foo() -> None:
        deprecation_no_replacement("foo", removed_in="4.3.2")

    with pytest.raises(
        DeprecationError,
        match=r"foo is deprecated and was removed in pypdf 4\.3\.2\.",
    ):
        foo()


def test_logger_error(caplog):
    enc = NameObject("/Invalid")
    message = "Advanced encoding %(encoding)s not implemented yet"
    logger_error(message, source=__name__, encoding=enc)
    assert "Advanced encoding /Invalid not implemented yet" in caplog.text
    encoding = DictionaryObject({NameObject("/key"): TextStringObject("value")})
    message = "Advanced encoding %(encoding)s not implemented yet"
    logger_error(message, source=__name__, encoding=encoding)
    assert "Advanced encoding {'/key': 'value'} not implemented yet" in caplog.text


def test_rename_kwargs():
    def deprecation_bookmark_nofail(**aliases: str) -> Callable:
        """
        Decorator for deprecated term "bookmark".

        To be used for methods and function arguments
            outline_item = a bookmark
            outline = a collection of outline items.
        """

        def decoration(func: Callable) -> Any:  # type: ignore
            @functools.wraps(func)
            def wrapper(*args: Any, **kwargs: Any) -> Any:  # type: ignore
                rename_kwargs(func.__name__, kwargs, aliases, fail=False)
                return func(*args, **kwargs)

            return wrapper

        return decoration

    @deprecation_bookmark_nofail(old_param="new_param")
    def foo(old_param: int = 1, baz: int = 2, new_param: int = 1) -> None:
        pass

    expected_msg = (
        "foo received both old_param and new_param as an argument. "
        "old_param is deprecated. Use new_param instead."
    )
    with pytest.raises(TypeError, match=expected_msg):
        foo(old_param=12, new_param=13)

    with pytest.warns(
        DeprecationWarning,
        match="old_param is deprecated as an argument. Use new_param instead",
    ):
        foo(old_param=12)


def test_rename_kwargs__stacklevel(tmp_path: Path) -> None:
    script = tmp_path / "script.py"
    script.write_text("""
import functools
import warnings

from pypdf._utils import rename_kwargs

def deprecation(**aliases: str):
    def decoration(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            rename_kwargs(func.__name__, kwargs, aliases, fail=False)
            return func(*args, **kwargs)

        return wrapper

    return decoration

@deprecation(old_param="new_param")
def foo(old_param: int = 1, baz: int = 2, new_param: int = 1) -> None:
    pass

warnings.simplefilter("always")
foo(old_param=12)
    """)

    result = subprocess.run([sys.executable, script], capture_output=True, text=True)  # noqa: S603
    assert result.returncode == 0
    assert result.stderr == (
        f"{script}:23: DeprecationWarning: old_param is deprecated as an argument. "
        f"Use new_param instead\n  foo(old_param=12)\n"
    )


@pytest.mark.parametrize(
    ("input_int", "expected_output"),
    [
        (123, "123 Byte"),
        (1234, "1.2 kB"),
        (123_456, "123.5 kB"),
        (1_234_567, "1.2 MB"),
        (1_234_567_890, "1.2 GB"),
        (1_234_567_890_000, "1234.6 GB"),
    ],
)
def test_human_readable_bytes(input_int, expected_output):
    """_human_readable_bytes correctly transforms the integer to a string."""
    assert _human_readable_bytes(input_int) == expected_output


def test_file_class():
    """File class can be instantiated and string representation is ok."""
    f = File(name="image.png", data=b"")
    assert str(f) == "File(name=image.png, data: 0 Byte)"
    # hash(b"") varies between CPython and PyPy
    assert repr(f) == f"File(name=image.png, data: 0 Byte, hash: {hash(b'')})"


@pytest.mark.parametrize(
    ("text", "expected"),
    [
        ("D:20210318000756", "2021-03-18T00:07:56"),
        ("20210318000756", "2021-03-18T00:07:56"),
        ("D:2021", "2021-01-01T00:00:00"),
        ("D:202103", "2021-03-01T00:00:00"),
        ("D:20210304", "2021-03-04T00:00:00"),
        ("D:2021030402", "2021-03-04T02:00:00"),
        ("D:20210408054711", "2021-04-08T05:47:11"),
        ("D:20210408054711Z", "2021-04-08T05:47:11+00:00"),
        ("D:20210408054711Z00", "2021-04-08T05:47:11+00:00"),
        ("D:20210408054711Z0000", "2021-04-08T05:47:11+00:00"),
        ("D:20210408075331+02'00'", "2021-04-08T07:53:31+02:00"),
        ("D:20210408075331-03'00'", "2021-04-08T07:53:31-03:00"),
    ],
)
def test_parse_datetime(text, expected):
    date = parse_iso8824_date(text)
    date_str = (date.isoformat() + date.strftime("%z"))[: len(expected)]
    assert date_str == expected


@pytest.mark.parametrize(
    ("text", "expected"),
    [
        ("", None),
        (None, None),
    ],
)
def test_parse_datetime_edge_cases(text, expected):
    date = parse_iso8824_date(text)
    assert date == expected


def test_parse_datetime_err():
    with pytest.raises(ValueError) as ex:
        parse_iso8824_date("D:20210408T054711Z")
    assert ex.value.args[0] == "Can not convert date: D:20210408T054711Z"
    assert parse_iso8824_date("D:20210408054711").tzinfo is None


def test_format_iso8824_date():
    """Test format_iso8824_date function with timezone handling."""
    dt_naive = datetime(2021, 3, 18, 12, 7, 56)
    result = format_iso8824_date(dt_naive)
    assert result == "D:20210318120756"

    dt_utc = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone.utc)
    result = format_iso8824_date(dt_utc)
    assert result == "D:20210318120756+00'00'"

    dt_positive = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone(timedelta(hours=2, minutes=30)))
    result = format_iso8824_date(dt_positive)
    assert result == "D:20210318120756+02'30'"

    dt_negative = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone(timedelta(hours=-5, minutes=-30)))
    result = format_iso8824_date(dt_negative)
    assert result == "D:20210318120756-05'30'"


def test_format_iso8824_date_roundtrip():
    dt_naive = datetime(2021, 3, 18, 12, 7, 56)
    formatted = format_iso8824_date(dt_naive)
    parsed = parse_iso8824_date(formatted)
    assert parsed == dt_naive

    dt_utc = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone.utc)
    formatted = format_iso8824_date(dt_utc)
    parsed = parse_iso8824_date(formatted)
    assert parsed == dt_utc

    dt_positive = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone(timedelta(hours=2, minutes=30)))
    formatted = format_iso8824_date(dt_positive)
    parsed = parse_iso8824_date(formatted)
    assert parsed == dt_positive

    dt_negative = datetime(2021, 3, 18, 12, 7, 56, tzinfo=timezone(timedelta(hours=-5, minutes=-30)))
    formatted = format_iso8824_date(dt_negative)
    parsed = parse_iso8824_date(formatted)
    assert parsed == dt_negative


def test_is_sublist():
    # Basic checks:
    assert is_sublist([0, 1], [0, 1, 2]) is True
    assert is_sublist([0, 2], [0, 1, 2]) is True
    assert is_sublist([1, 2], [0, 1, 2]) is True
    assert is_sublist([0, 3], [0, 1, 2]) is False
    # Ensure order is checked:
    assert is_sublist([1, 0], [0, 1, 2]) is False
    # Ensure duplicates are handled:
    assert is_sublist([0, 1, 1], [0, 1, 1, 2]) is True
    assert is_sublist([0, 1, 1], [0, 1, 2]) is False
    # Edge cases with empty lists:
    assert is_sublist([], [0, 1, 2]) is True
    assert is_sublist([0, 1], []) is False
    # Self-sublist edge case:
    assert is_sublist([0, 1, 2], [0, 1, 2]) is True


@pytest.mark.parametrize(
    ("left", "right", "is_less_than"),
    [
        ("1", "2", True),
        ("2", "1", False),
        ("1", "1", False),
        ("1.0", "1.1", True),
        ("1", "1.1", True),
        # Suffix left
        ("1a", "2", True),
        ("2a", "1", False),
        ("1a", "1", False),
        ("1.0a", "1.1", True),
        # I'm not sure about that, but seems special enough that it
        # probably doesn't matter:
        ("1a", "1.1", False),
        # Suffix right
        ("1", "2a", True),
        ("2", "1a", False),
        ("1", "1a", True),
        ("1.0", "1.1a", True),
        ("1", "1.1a", True),
        ("", "0.0.0", True),
        # Just suffix matters ... hm, I think this is actually wrong:
        ("1.0a", "1.0", False),
        ("1.0", "1.0a", True),
    ],
)
def test_version_compare(left, right, is_less_than):
    assert (Version(left) < Version(right)) is is_less_than


def test_version_compare_equal_str():
    a = Version("1.0")
    assert a != "1.0"


def test_version_compare_lt_str():
    a = Version("1.0")
    with pytest.raises(ValueError) as exc:
        a < "1.0"  # noqa: B015
    assert exc.value.args[0] == "Version cannot be compared against <class 'str'>"


def test_bad_version():
    assert Version("a").components == [(0, "a")]


def test_version_eq_hash():
    version1 = Version("1.0")
    version2 = Version("1.0")
    version3 = Version("1.1")
    assert version1 == version2
    assert version1 != version3
    assert hash(version1) == hash(version2)
    assert hash(version1) != hash(version3)


def test_classproperty():
    class Container:
        @classproperty
        def value1(cls) -> int:  # noqa: N805
            return 42

        @classproperty
        def value2(cls) -> int:  # noqa: N805
            return 1337

        @classproperty
        def value3(cls) -> int:  # noqa: N805
            return 1

        @value3.getter
        def value3(cls) -> int:  # noqa: N805
            return 2

    assert Container.value1 == 42
    assert Container.value2 == 1337
    assert Container.value3 == 2
    assert Container().value1 == 42
    assert Container().value2 == 1337
    assert Container().value3 == 2


================================================
FILE: tests/test_workflows.py
================================================
"""
Tests in this module behave like user code.

They don't mock/patch anything, they cover typical user needs.
"""

import binascii
from io import BytesIO
from pathlib import Path
from re import findall

import pytest
from PIL import Image, ImageChops
from PIL import __version__ as pil_version

from pypdf import PdfReader, PdfWriter, Transformation
from pypdf.constants import PageAttributes as PG
from pypdf.errors import PdfReadError, PdfReadWarning
from pypdf.generic import (
    ArrayObject,
    ContentStream,
    DictionaryObject,
    NameObject,
    TextStringObject,
    read_object,
)

from . import PROJECT_ROOT, RESOURCE_ROOT, SAMPLE_ROOT, PILContext, get_data_from_url, normalize_warnings
from .utils import get_image_data


def test_basic_features(tmp_path):
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    assert len(reader.pages) == 1

    # add page 1 from input1 to output document, unchanged
    writer.add_page(reader.pages[0])

    # add page 2 from input1, but rotated clockwise 90 degrees
    writer.add_page(reader.pages[0].rotate(90))
    assert writer.pages[0].rotation == 0
    assert writer.pages[1].rotation == 90

    # add page 3 from input1, but crop it to half size:
    page4 = reader.pages[0]
    page4 = writer.add_page(page4)
    page4.mediabox.upper_right = (
        page4.mediabox.right / 2,
        page4.mediabox.top / 2,
    )
    del page4.mediabox

    # add page 4 from input1, but first add a watermark from another PDF:
    page3 = reader.pages[0]
    page3 = writer.add_page(page3)
    watermark_pdf = pdf_path
    watermark = PdfReader(watermark_pdf)
    page3.merge_page(watermark.pages[0])

    # add some Javascript to launch the print window on opening this PDF.
    # the password dialog may prevent the print dialog from being shown,
    # comment the encryption lines, if that's the case, to try this out
    writer.add_js("this.print({bUI:true,bSilent:false,bShrinkToFit:true});")

    # encrypt your new PDF and add a password
    password = "secret"
    writer.encrypt(password)
    # doing it twice should not change anything
    writer.encrypt(password)

    # finally, write "output" to pypdf-output.pdf
    write_path = tmp_path / "pypdf-output.pdf"
    with open(write_path, "wb") as output_stream:
        writer.write(output_stream)


def test_dropdown_items():
    inputfile = RESOURCE_ROOT / "libreoffice-form.pdf"
    reader = PdfReader(inputfile)
    fields = reader.get_fields()
    assert "/Opt" in fields["Nationality"]


def test_pdfreader_file_load():
    """
    Test loading and parsing of a file.

    Extract text of the file and compare to expected textual output. Expected
    outcome: file loads, text matches expected.
    """
    with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile:
        # Load PDF file from file
        reader = PdfReader(inputfile)
        page = reader.pages[0]

        # Retrieve the text of the PDF
        with open(RESOURCE_ROOT / "crazyones.txt", "rb") as pdftext_file:
            pdftext = pdftext_file.read()

        text = page.extract_text().encode("utf-8")

        # Compare the text of the PDF to a known source
        for expected_line, actual_line in zip(text.splitlines(), pdftext.splitlines()):
            assert expected_line == actual_line

        pdftext = pdftext.replace(b"\r\n", b"\n")  # fix for windows
        assert text == pdftext


def test_pdfreader_jpeg_image():
    """
    Test loading and parsing of a file. Extract the image of the file and
    compare to expected textual output.

    Expected outcome: file loads, image matches expected.
    """
    with open(RESOURCE_ROOT / "jpeg.pdf", "rb") as inputfile:
        # Load PDF file from file
        reader = PdfReader(inputfile)

        # Retrieve the text of the image
        with open(RESOURCE_ROOT / "jpeg.txt") as pdftext_file:
            imagetext = pdftext_file.read()

        page = reader.pages[0]
        x_object = page[PG.RESOURCES]["/XObject"].get_object()
        data = x_object["/Im4"].get_data()

        # Compare the text of the PDF to a known source
        assert binascii.hexlify(data).decode() == imagetext


def test_decrypt():
    with open(RESOURCE_ROOT / "libreoffice-writer-password.pdf", "rb") as inputfile:
        reader = PdfReader(inputfile)
        assert reader.is_encrypted is True
        reader.decrypt("openpassword")
        assert len(reader.pages) == 1
        assert reader.is_encrypted is True
        metadict = reader.metadata
        assert dict(metadict) == {
            "/CreationDate": "D:20220403203552+02'00'",
            "/Creator": "Writer",
            "/Producer": "LibreOffice 6.4",
        }


def test_text_extraction_encrypted():
    inputfile = RESOURCE_ROOT / "libreoffice-writer-password.pdf"
    reader = PdfReader(inputfile)
    assert reader.is_encrypted is True
    reader.decrypt("openpassword")
    assert (
        reader.pages[0]
        .extract_text()
        .strip()
        .startswith("Lorem ipsum dolor sit amet")
    )


@pytest.mark.parametrize("degree", [0, 90, 180, 270, 360, -90])
def test_rotate(degree):
    with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile:
        reader = PdfReader(inputfile)
        page = reader.pages[0]
        page.rotate(degree)


def test_rotate_45():
    with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile:
        reader = PdfReader(inputfile)
        page = reader.pages[0]
        with pytest.raises(ValueError) as exc:
            page.rotate(45)
        assert exc.value.args[0] == "Rotation angle must be a multiple of 90"


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("enable", "url", "pages"),
    [
        (True, "https://arxiv.org/pdf/2201.00214.pdf", [0, 1, 5, 10]),
        (
            True,
            "https://github.com/py-pdf/sample-files/raw/main/009-pdflatex-geotopo/GeoTopo.pdf",
            [0, 1, 5, 10],
        ),
        (True, "https://arxiv.org/pdf/2201.00151.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/1707.09725.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00021.pdf", [0, 1, 5, 8]),
        (True, "https://arxiv.org/pdf/2201.00037.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00069.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00178.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00201.pdf", [0, 1, 5, 8]),
        (True, "https://arxiv.org/pdf/1602.06541.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00200.pdf", [0, 1, 5, 6]),
        (True, "https://arxiv.org/pdf/2201.00022.pdf", [0, 1, 5, 10]),
        (True, "https://arxiv.org/pdf/2201.00029.pdf", [0, 1, 6, 10]),
        # #1145
        (True, "https://github.com/py-pdf/pypdf/files/9174594/2017.pdf", [0]),
        # #1145, remaining issue (empty arguments for FlateEncoding)
        (
            True,
            "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf",
            [0],
        ),
        # 6 instead of 5: as there is an issue in page 5 (missing objects)
        # and too complex to handle the warning without hiding real regressions
        (True, "https://arxiv.org/pdf/1601.03642.pdf", [0, 1, 5, 7]),
        (
            True,
            "https://github.com/py-pdf/pypdf/files/3796761/17343_2008_Order_09-Jan-2019.pdf",
            [0, 1],
        ),
        (
            True,
            "https://github.com/py-pdf/pypdf/files/8884471/ssi_manwaring.pdf",
            [0, 1],
        ),
        (True, "https://github.com/py-pdf/pypdf/files/8884469/999092.pdf", [0, 1]),
        (
            True,
            "file://" + str(RESOURCE_ROOT / "test Orient.pdf"),
            [0],
        ),  # TODO: preparation of text orientation validation
        (
            True,
            "https://github.com/py-pdf/pypdf/files/8884470/fdocuments.in_sweet-fundamentals-of-crystallography.pdf",
            [0, 1, 34, 35, 36, 118, 119, 120, 121],
        ),
        (True, "https://github.com/py-pdf/pypdf/files/8884493/998167.pdf", [0]),
        (
            True,
            "https://github.com/user-attachments/files/18382039/971703.pdf",
            [0, 1, 5, 8, 14],
        ),
        (  # faulty PDF, wrongly linearized and with 2 trailer, second with /Root
            True,
            "https://github.com/user-attachments/files/18382034/989691.pdf",
            [0],
        ),
    ],
)
def test_extract_textbench(enable, url, pages):
    if not enable:
        return
    print_result = False
    try:
        reader = PdfReader(BytesIO(get_data_from_url(url, url.split("/")[-1])))
        for page_number in pages:
            if print_result:
                print(f"**************** {url} / page {page_number} ****************")
            rst = reader.pages[page_number].extract_text()
            if print_result:
                print(f"{rst}\n*****************************\n")
    except PdfReadWarning:
        pass


def test_transform_compress_identical_objects():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "two-different-pages.pdf")

    for page in writer.pages:
        op = Transformation().scale(sx=0.8, sy=0.8)
        page.add_transformation(op)
    writer.compress_identical_objects()
    bytes_out = BytesIO()
    writer.write(bytes_out)
    result_reader = PdfReader(bytes_out)
    pg1_text = result_reader.pages[0].extract_text()
    pg2_text = result_reader.pages[1].extract_text()
    assert pg1_text.strip() == "1"
    assert pg2_text.strip() == "2"


@pytest.mark.slow
def test_orientations():
    p = PdfReader(RESOURCE_ROOT / "test Orient.pdf").pages[0]
    p.extract_text("", "")
    p.extract_text("", "", 0)
    p.extract_text("", "", 0, 200)
    p.extract_text()
    assert findall("\\((.)\\)", p.extract_text()) == ["T", "B", "L", "R"]
    with pytest.raises(Exception):
        p.extract_text(None)
    p.extract_text("", 0)
    with pytest.raises(Exception):
        p.extract_text("", "", None)
    with pytest.raises(Exception):
        p.extract_text("", "", 0, "")
    with pytest.raises(Exception):
        p.extract_text(0, "")

    p.extract_text(0, 0)
    p.extract_text(orientations=0)

    for req, rst in (
        (0, ["T"]),
        (90, ["L"]),
        (180, ["B"]),
        (270, ["R"]),
        ((0,), ["T"]),
        ((0, 180), ["T", "B"]),
        ((45,), []),
    ):
        assert (
            findall("\\((.)\\)", p.extract_text(req)) == rst
        ), f"extract_text({req}) => {rst}"


@pytest.mark.samples
@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("base_path", "overlay_path"),
    [
        (
            "resources/crazyones.pdf",
            "sample-files/013-reportlab-overlay/reportlab-overlay.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381707/tika-935981.pdf",
            "sample-files/013-reportlab-overlay/reportlab-overlay.pdf",
        ),
    ],
)
def test_overlay(pdf_file_path, base_path, overlay_path):
    if base_path.startswith("http"):
        base_path = BytesIO(get_data_from_url(base_path, name="tika-935981.pdf"))
    else:
        base_path = PROJECT_ROOT / base_path
    writer = PdfWriter(clone_from=base_path)

    reader_overlay = PdfReader(PROJECT_ROOT / overlay_path)
    overlay = reader_overlay.pages[0]

    for page in writer.pages:
        page.merge_page(overlay)
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381697/tika-924546.pdf",
            "tika-924546.pdf",
        )
    ],
)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_merge_with_warning(tmp_path, url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    merger = PdfWriter()
    merger.append(reader)
    # This could actually be a performance bottleneck:
    merger.write(tmp_path / "tmp.merged.pdf")


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381757/tika-980613.pdf",
            "tika-980613.pdf",
        )
    ],
)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_merge(tmp_path, url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    merger = PdfWriter()
    merger.append(reader)
    merger.write(tmp_path / "tmp.merged.pdf")


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "expected_metadata"),
    [
        (
            "https://github.com/user-attachments/files/18381708/tika-935996.pdf",
            "tika-935996.pdf",
            {
                "/Author": "Unknown",
                "/CreationDate": "Thursday, May 06, 1999 3:56:54 PM",
                "/Creator": r"C:\DEB\6338",
                "/Keywords": "",
                "/Producer": "Acrobat PDFWriter 3.02 for Windows",
                "/Subject": "",
                "/Title": r"C:\DEB\6338-6R.PDF",
            },
        )
    ],
)
def test_get_metadata(url, name, expected_metadata):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    data = reader.metadata
    assert expected_metadata == data


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "strict", "exception"),
    [
        (
            "https://github.com/user-attachments/files/16624503/tika-938702.pdf",
            "tika-938702.pdf",
            False,
            None,  # iss #1090 is now fixed
        ),
        (
            "https://github.com/user-attachments/files/18381715/tika-942358.pdf",
            "tika-942358.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381684/tika-911260.pdf",
            "tika-911260.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381766/tika-992472.pdf",
            "tika-992472.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381756/tika-978477.pdf",
            "tika-978477.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381731/tika-960317.pdf",
            "tika-960317.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381701/tika-930513.pdf",
            "tika-930513.pdf",
            False,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381691/tika-918113.pdf",
            "tika-918113.pdf",
            True,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381711/tika-940704.pdf",
            "tika-940704.pdf",
            True,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381752/tika-976488.pdf",
            "tika-976488.pdf",
            True,
            None,
        ),
        (
            "https://github.com/user-attachments/files/18381716/tika-948176.pdf",
            "tika-948176.pdf",
            True,
            None,
        ),
    ],
)
def test_extract_text(url, name, strict, exception):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=strict)
    if not exception:
        for page in reader.pages:
            page.extract_text()
    else:
        exc, exc_text = exception
        with pytest.raises(exc) as ex_info:
            for page in reader.pages:
                page.extract_text()
        assert ex_info.value.args[0] == exc_text


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381710/tika-938702.pdf",
            "tika-938702.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381725/tika-957304.pdf",
            "tika-957304.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381690/tika-915194.pdf",
            "tika-915194.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381717/tika-950337.pdf",
            "tika-950337.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381734/tika-962292.pdf",
            "tika-962292.pdf",
        ),
    ],
)
def test_compress_raised(url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    # no more error since iss #1090 fix
    for page in writer.pages:
        page.compress_content_streams()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381733/tika-961883.pdf",
            "tika-961883.pdf",
        ),
    ],
)
def test_get_fields_warns(tmp_path, caplog, url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    write_path = tmp_path / "tmp.txt"
    with open(write_path, "w") as fp:
        retrieved_fields = reader.get_fields(fileobj=fp)

    assert retrieved_fields == {}
    assert normalize_warnings(caplog.text) == [
        "Ignoring wrong pointing object 1 65536 (offset 0)",
        "Ignoring wrong pointing object 2 65536 (offset 0)",
        "Object 2 0 not defined.",
    ]


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381713/tika-942050.pdf",
            "tika-942050.pdf",
        ),
    ],
)
def test_get_fields_no_warning(tmp_path, url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    write_path = tmp_path / "tmp.txt"
    with open(write_path, "w") as fp:
        retrieved_fields = reader.get_fields(fileobj=fp)

    assert len(retrieved_fields) == 10


@pytest.mark.enable_socket
def test_scale_rectangle_indirect_object():
    url = "https://github.com/user-attachments/files/18381778/tika-999944.pdf"
    name = "tika-999944.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    writer = PdfWriter(clone_from=data)

    for page in writer.pages:
        page.scale(sx=2, sy=3)


def test_merge_output(caplog):
    # Arrange
    base = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR.pdf"
    crazy = RESOURCE_ROOT / "crazyones.pdf"
    expected = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"

    # Act
    merger = PdfWriter()
    merger.append(base)
    merger.merge(1, crazy)
    stream = BytesIO()
    merger.write(stream)

    # Assert
    stream.seek(0)
    actual = stream.read()
    with open(expected, "rb") as fp:
        expected_data = fp.read()
    if actual != expected_data:
        # See https://github.com/pytest-dev/pytest/issues/9124
        pytest.fail(
            f"len(actual) = {len(actual):,} vs len(expected) = {len(expected_data):,}"
        )

    # Cleanup
    merger.close()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381767/tika-994636.pdf",
            "tika-994636.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381719/tika-952133.pdf",
            "tika-952133.pdf",
        ),
        (  # JPXDecode
            "https://github.com/user-attachments/files/18381688/tika-914568.pdf",
            "tika-914568.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381718/tika-952016.pdf",
            "tika-952016.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18382223/965118.pdf",
            "tika-965118.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381729/tika-959184.pdf",
            "tika-959184.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381727/tika-958496.pdf",
            "tika-958496.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381744/tika-972174.pdf",
            "tika-972174.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381745/tika-972243.pdf",
            "tika-972243.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381743/tika-969502.pdf",
            "tika-969502.pdf",
        ),
        ("https://arxiv.org/pdf/2201.00214.pdf", "arxiv-2201.00214.pdf"),
    ],
)
def test_image_extraction(url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)

    images_extracted = []
    root = Path("extracted-images")
    if not root.exists():
        root.mkdir()

    with PILContext():
        for page in reader.pages:
            for image in page.images:
                filename = root / image.name
                with open(filename, "wb") as img:
                    img.write(image.data)
                images_extracted.append(filename)

    # Cleanup
    do_cleanup = True  # set this to False for manual inspection
    if do_cleanup:
        for filepath in images_extracted:
            if Path(filepath).exists():
                Path(filepath).unlink()


@pytest.mark.enable_socket
def test_image_extraction_strict():
    # Emits log messages
    url = "https://github.com/user-attachments/files/18381687/tika-914102.pdf"
    name = "tika-914102.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=True)

    images_extracted = []
    root = Path("extracted-images")
    if not root.exists():
        root.mkdir()

    for page in reader.pages:
        for image in page.images:
            filename = root / image.name
            with open(filename, "wb") as fp:
                fp.write(image.data)
            images_extracted.append(filename)

    # Cleanup
    do_cleanup = True  # set this to False for manual inspection
    if do_cleanup:
        for filepath in images_extracted:
            if Path(filepath).exists():
                Path(filepath).unlink()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381754/tika-977609.pdf",
            "tika-977609.pdf",
        ),
    ],
)
def test_image_extraction2(url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)

    images_extracted = []
    root = Path("extracted-images")
    if not root.exists():
        root.mkdir()

    for page in reader.pages:
        for image in page.images:
            filename = root / image.name
            with open(filename, "wb") as img:
                img.write(image.data)
            images_extracted.append(filename)

    # Cleanup
    do_cleanup = True  # set this to False for manual inspection
    if do_cleanup:
        for filepath in images_extracted:
            if Path(filepath).exists():
                Path(filepath).unlink()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381692/tika-918137.pdf",
            "tika-918137.pdf",
        ),
        (
            "https://github.com/user-attachments/files/22596566/7552c42e9280b4476e59e77acc0bc812.pdf",
            "7552c42e9280b4476e59e77acc0bc812.pdf",
        ),
    ],
)
def test_get_outline(url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    reader.outline


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381707/tika-935981.pdf",
            "tika-935981.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381709/tika-937334.pdf",
            "tika-937334.pdf",
        ),
    ],
)
def test_get_xfa(url, name):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data)
    reader.xfa


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "strict"),
    [
        (
            "https://github.com/user-attachments/files/18381765/tika-988698.pdf",
            "tika-988698.pdf",
            False,
        ),
        (
            "https://github.com/user-attachments/files/18382162/914133.pdf",
            "tika-914133.pdf",
            False,
        ),
        (
            "https://github.com/user-attachments/files/18381685/tika-912552.pdf",
            "tika-912552.pdf",
            False,
        ),
        (
            "https://github.com/user-attachments/files/18381687/tika-914102.pdf",
            "tika-914102.pdf",
            True,
        ),
    ],
)
def test_get_fonts(url, name, strict):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=strict)
    for page in reader.pages:
        page._get_fonts()


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "strict"),
    [
        (
            "https://github.com/user-attachments/files/18382060/tika-942303.pdf",
            "tika-942303.pdf",
            True,
        ),
        (
            "https://github.com/user-attachments/files/18381707/tika-935981.pdf",
            "tika-935981.pdf",
            True,
        ),
        (
            "https://github.com/user-attachments/files/18381738/tika-967399.pdf",
            "tika-967399.pdf",
            True,
        ),
        (
            "https://github.com/user-attachments/files/18381707/tika-935981.pdf",
            "tika-935981.pdf",
            False,
        ),
    ],
)
def test_get_xmp(url, name, strict):
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=strict)
    xmp_info = reader.xmp_metadata
    if xmp_info:
        xmp_info.dc_contributor
        xmp_info.dc_coverage
        xmp_info.dc_creator
        xmp_info.dc_date
        xmp_info.dc_description
        xmp_info.dc_format
        xmp_info.dc_identifier
        xmp_info.dc_language
        xmp_info.dc_publisher
        xmp_info.dc_relation
        xmp_info.dc_rights
        xmp_info.dc_source
        xmp_info.dc_subject
        xmp_info.dc_title
        xmp_info.dc_type
        xmp_info.pdf_keywords
        xmp_info.pdf_pdfversion
        xmp_info.pdf_producer
        xmp_info.xmp_create_date
        xmp_info.xmp_modify_date
        xmp_info.xmp_metadata_date
        xmp_info.xmp_creator_tool
        xmp_info.xmpmm_document_id
        xmp_info.xmpmm_instance_id
        xmp_info.custom_properties


@pytest.mark.enable_socket
def test_tounicode_is_identity():
    url = "https://github.com/py-pdf/pypdf/files/9998335/FP_Thesis.pdf"
    name = "FP_Thesis.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=False)
    reader.pages[0].extract_text()


@pytest.mark.enable_socket
def test_append_forms():
    # from #1538
    writer = PdfWriter()

    url = "https://github.com/py-pdf/pypdf/files/10367412/pdfa.pdf"
    name = "form_a.pdf"
    reader1 = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader1.add_form_topname("form_a")
    writer.append(reader1)

    url = "https://github.com/py-pdf/pypdf/files/10367413/pdfb.pdf"
    name = "form_b.pdf"
    reader2 = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    reader2.add_form_topname("form_b")
    writer.append(reader2)

    b = BytesIO()
    writer.write(b)
    reader = PdfReader(b)
    assert len(reader.get_form_text_fields()) == len(
        reader1.get_form_text_fields()
    ) + len(reader2.get_form_text_fields())


@pytest.mark.enable_socket
def test_extra_test_iss1541():
    url = "https://github.com/py-pdf/pypdf/files/10418158/tst_iss1541.pdf"
    name = "tst_iss1541.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=False)
    reader.pages[0].extract_text()

    cs = ContentStream(reader.pages[0]["/Contents"], None, None)
    cs.operations.insert(-1, ([], b"EMC"))
    stream = BytesIO()
    cs.write_to_stream(stream)
    stream.seek(0)
    ContentStream(read_object(stream, None, None), None, None).operations

    cs = ContentStream(reader.pages[0]["/Contents"], None, None)
    cs.operations.insert(-1, ([], b"E!C"))
    stream = BytesIO()
    cs.write_to_stream(stream)
    stream.seek(0)
    ContentStream(read_object(stream, None, None), None, None).operations

    b = BytesIO(data.getbuffer())
    reader = PdfReader(
        BytesIO(bytes(b.getbuffer()).replace(b"EI \n", b"E! \n")), strict=False
    )
    with pytest.raises(PdfReadError) as exc:
        reader.pages[0].extract_text()
    assert exc.value.args[0] == "Unexpected end of stream"


@pytest.mark.enable_socket
def test_fields_returning_stream():
    """This problem was reported in #424"""
    url = "https://github.com/mstamy2/PyPDF2/files/1948267/Simple.form.pdf"
    name = "tst_iss424.pdf"
    data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(data, strict=False)
    assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"]


def test_replace_image(tmp_path):
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "labeled-edges-center-image.pdf")
    reader = PdfReader(RESOURCE_ROOT / "jpeg.pdf")
    img = reader.pages[0].images[0].image
    if int(pil_version.split(".")[0]) < 9:
        img = img.convert("RGB")
    writer.pages[0].images[0].replace(img)
    b = BytesIO()
    writer.write(b)
    reader2 = PdfReader(b)
    if int(pil_version.split(".")[0]) >= 9:
        assert reader2.pages[0].images[0].image.mode == "RGBA"
    # very simple image distance evaluation
    diff = ImageChops.difference(reader2.pages[0].images[0].image, img)
    d = sum(get_image_data(diff.convert("L"))) / (diff.size[0] * diff.size[1])
    assert d < 1.5
    img = img.convert("RGB")  # quality does not apply to RGBA/JP2
    writer.pages[0].images[0].replace(img, quality=20)
    diff = ImageChops.difference(writer.pages[0].images[0].image, img)
    d1 = sum(get_image_data(diff.convert("L"))) / (diff.size[0] * diff.size[1])
    assert d1 > d
    # extra tests for coverage
    with pytest.raises(TypeError) as exc:
        reader.pages[0].images[0].replace(img)
    assert exc.value.args[0] == "Cannot update an image not belonging to a PdfWriter."
    i = writer.pages[0].images[0]
    with pytest.raises(TypeError) as exc:
        i.replace(reader.pages[0].images[0])  # missing .image
    assert exc.value.args[0] == "new_image shall be a PIL Image"
    i.indirect_reference = None  # to behave like an inline image
    with pytest.raises(TypeError) as exc:
        i.replace(reader.pages[0].images[0].image)
    assert exc.value.args[0] == "Cannot update an inline image."

    import pypdf  # noqa: PLC0415

    try:
        pypdf._page.pil_not_imported = True
        with pytest.raises(ImportError) as exc:
            i.replace(reader.pages[0].images[0].image)
    finally:
        pypdf._page.pil_not_imported = False


@pytest.mark.enable_socket
def test_inline_images():
    """This problem was reported in #424"""
    url = "https://arxiv.org/pdf/2201.00151.pdf"
    name = "2201.00151.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/assets/4083478/28e8b87c-be2c-40d9-9c86-15c7819021bf"
    name = "inline4.png"
    img_ref = Image.open(BytesIO(get_data_from_url(url, name=name)))
    assert get_image_data(reader.pages[1].images[4].image) == get_image_data(img_ref)
    with pytest.raises(KeyError):
        reader.pages[0].images["~999~"]
    del reader.pages[1]["/Resources"]["/ColorSpace"]["/R124"]
    reader.pages[1].inline_images = None  # to force recalculation
    with pytest.raises(PdfReadError):
        reader.pages[1].images["~1~"]

    co = reader.pages[0].get_contents()
    co.operations.append(([], b"BI"))
    reader.pages[0][NameObject("/Contents")] = co
    reader.pages[0].images.keys()

    with pytest.raises(TypeError) as exc:
        reader.pages[0].images[0].replace(img_ref)
    assert exc.value.args[0] == "Cannot update an inline image."

    _a = {}
    for x, y in reader.pages[2].images[0:-2].items():
        _a[x] = y  # noqa: PERF403  # Testing code and easier to read this way.
    with pytest.raises(KeyError) as exc:
        reader.pages[2]._get_image(("test",))

    url = "https://github.com/py-pdf/pypdf/files/15233597/bug1065245.pdf"
    name = "iss2598c.pdf"  # test data also used in test_images.py/test_inline_image_extraction()
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert len(reader.pages[0].images) == 3


@pytest.mark.enable_socket
def test_issue1899():
    url = "https://github.com/py-pdf/pypdf/files/11801077/lv2018tconv.pdf"
    name = "lv2018tconv.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    for i, page in enumerate(reader.pages):
        print(i)
        page.extract_text()


@pytest.mark.enable_socket
def test_cr_with_cm_operation():
    """Issue #2138"""
    url = "https://github.com/py-pdf/pypdf/files/12483807/AEO.1172.pdf"
    name = "iss2138.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    assert (
        """STATUS: FNL
STYLE: 1172 1172 KNIT SHORTIE SUMMER-B 2023
Company: AMERICAN EAGLE OUTFITTERS
Division / Dept: 50 / 170
Season: SUMMER-B 2023"""
        in reader.pages[0].extract_text()
    )
    # currently there is still a white space on last line missing
    # so we can not do a full comparison.


def remove_trailing_whitespace(text: str) -> str:
    text = text.strip()
    return "\n".join(line.rstrip() for line in text.splitlines())


@pytest.mark.samples
@pytest.mark.parametrize(
    ("pdf_path", "expected_path"),
    [
        (
            SAMPLE_ROOT / "026-latex-multicolumn/multicolumn.pdf",
            RESOURCE_ROOT / "multicolumn-lorem-ipsum.txt",
        ),
        (
            SAMPLE_ROOT / "010-pdflatex-forms/pdflatex-forms.pdf",
            RESOURCE_ROOT / "010-pdflatex-forms.txt",
        ),
    ],
)
def test_text_extraction_layout_mode(pdf_path, expected_path):
    reader = PdfReader(pdf_path)
    actual = reader.pages[0].extract_text(extraction_mode="layout")
    expected = expected_path.read_text(encoding="utf-8")
    # We don't care about trailing whitespace
    assert remove_trailing_whitespace(actual) == remove_trailing_whitespace(expected)


@pytest.mark.enable_socket
def test_layout_mode_space_vertically():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2138.pdf")))
    # remove automatically added final newline
    expected = (
        (RESOURCE_ROOT / "AEO.1172.layout.txt").read_text(encoding="utf-8").rstrip()
    )
    assert expected == reader.pages[0].extract_text(
        extraction_mode="layout", layout_mode_space_vertically=False
    )


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("rotation", "strip_rotated"), [(90, True), (180, False), (270, True)]
)
def test_layout_mode_rotations(rotation, strip_rotated):
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(name="iss2138.pdf")))
    rotated_page = writer.pages[0].rotate(rotation)
    rotated_page.transfer_rotation_to_content()
    expected = ""
    if not strip_rotated:
        expected = (
            (RESOURCE_ROOT / "AEO.1172.layout.rot180.txt")
            .read_text(encoding="utf-8")
            .rstrip()
        )  # remove automatically added final newline
    assert expected == rotated_page.extract_text(
        extraction_mode="layout",
        layout_mode_space_vertically=False,
        layout_mode_strip_rotated=strip_rotated,
    )


def test_text_extraction_invalid_mode():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    with pytest.raises(ValueError, match="Invalid text extraction mode"):
        reader.pages[0].extract_text(extraction_mode="foo")  # type: ignore


@pytest.mark.enable_socket
def test_get_page_showing_field():
    """
    Uses testfile from #2452 in order to get fields on multiple pages,
        choices boxes,...
    """
    url = "https://github.com/py-pdf/pypdf/files/14031491/Form_Structure_v50.pdf"
    name = "iss2452.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name)))
    writer = PdfWriter(clone_from=reader)

    # validate with Field:  only works on Reader (no get_fields on writer yet)
    fld = reader.get_fields()
    assert [
        p.page_number for p in reader.get_pages_showing_field(fld["FormVersion"])
    ] == [0]

    # validate with dictionary object
    # NRCategory field is a radio box
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][8].get_object()
        )
    ] == [0, 0, 0, 0, 0]
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][8].get_object()
        )
    ] == [0, 0, 0, 0, 0]

    # validate with IndirectObject
    # SiteID field is a textbox on multiple pages
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][99]
        )
    ] == [0, 1]
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][99]
        )
    ] == [0, 1]
    # test directly on the widget:
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][99]["/Kids"][1]
        )
    ] == [1]
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][99]["/Kids"][1]
        )
    ] == [1]

    # Exceptions:
    # Invalid Object
    with pytest.raises(ValueError) as exc:
        reader.get_pages_showing_field(None)
    with pytest.raises(ValueError) as exc:
        writer.get_pages_showing_field(None)
    assert "Field type is invalid" in exc.value.args[0]

    # Damage Field
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][1].get_object()["/FT"]
    del writer._root_object["/AcroForm"]["/Fields"][1].get_object()["/FT"]
    with pytest.raises(ValueError) as exc:
        reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][1]
        )
    with pytest.raises(ValueError) as exc:
        writer.get_pages_showing_field(writer._root_object["/AcroForm"]["/Fields"][1])
    assert "Field is not valid" in exc.value.args[0]

    # missing Parent in field
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][99]["/Kids"][1].get_object()[
        "/Parent"
    ]
    del writer._root_object["/AcroForm"]["/Fields"][99]["/Kids"][1].get_object()[
        "/Parent"
    ]
    with pytest.raises(ValueError) as exc:
        reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][1]
        )
    with pytest.raises(ValueError) as exc:
        writer.get_pages_showing_field(writer._root_object["/AcroForm"]["/Fields"][1])

    # remove "/P" (optional)
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][8]["/Kids"][1].get_object()[
        "/P"
    ]
    del writer._root_object["/AcroForm"]["/Fields"][8]["/Kids"][1].get_object()["/P"]
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][8]["/Kids"][1]
        )
    ] == [0]
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][8]["/Kids"][1]
        )
    ] == [0]
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][8].get_object()
        )
    ] == [0, 0, 0, 0, 0]
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][8].get_object()
        )
    ] == [0, 0, 0, 0, 0]

    # Grouping fields
    reader.trailer["/Root"]["/AcroForm"]["/Fields"][-1].get_object()[
        NameObject("/Kids")
    ] = ArrayObject([reader.trailer["/Root"]["/AcroForm"]["/Fields"][0]])
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][-1].get_object()["/T"]
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][-1].get_object()["/P"]
    del reader.trailer["/Root"]["/AcroForm"]["/Fields"][-1].get_object()["/Subtype"]
    writer._root_object["/AcroForm"]["/Fields"].append(
        writer._add_object(
            DictionaryObject(
                {
                    NameObject("/T"): TextStringObject("grouping"),
                    NameObject("/FT"): NameObject("/Tx"),
                    NameObject("/Kids"): ArrayObject(
                        [reader.trailer["/Root"]["/AcroForm"]["/Fields"][0]]
                    ),
                }
            )
        )
    )
    assert [
        p.page_number
        for p in reader.get_pages_showing_field(
            reader.trailer["/Root"]["/AcroForm"]["/Fields"][-1]
        )
    ] == []
    assert [
        p.page_number
        for p in writer.get_pages_showing_field(
            writer._root_object["/AcroForm"]["/Fields"][-1]
        )
    ] == []


@pytest.mark.enable_socket
def test_extract_empty_page():
    """Cf #2533"""
    url = "https://github.com/py-pdf/pypdf/files/14718318/test.pdf"
    name = "iss2533.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name)))
    assert reader.pages[1].extract_text(extraction_mode="layout") == ""


@pytest.mark.enable_socket
def test_iss2815():
    """Cf #2815"""
    url = "https://github.com/user-attachments/files/16760725/crash-c1920c7a064649e1191d7879952ec252473fc7e6.pdf"
    name = "iss2815.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name)))
    assert reader.pages[0].extract_text() == "test command with wrong number of args"


================================================
FILE: tests/test_writer.py
================================================
"""Test the pypdf._writer module."""

import re
import shutil
import subprocess
from io import BytesIO
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any
from unittest import mock

import pytest

from pypdf import (
    ImageType,
    ObjectDeletionFlag,
    PageObject,
    PdfReader,
    PdfWriter,
    Transformation,
)
from pypdf.annotations import Link
from pypdf.errors import DeprecationError, PageSizeNotDefinedError, PdfReadError, PyPdfError
from pypdf.generic import (
    ArrayObject,
    ByteStringObject,
    ContentStream,
    DecodedStreamObject,
    Destination,
    DictionaryObject,
    Fit,
    IndirectObject,
    NameObject,
    NullObject,
    NumberObject,
    RectangleObject,
    StreamObject,
    TextStringObject,
)

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url, is_sublist
from .test_images import image_similarity

GHOSTSCRIPT_BINARY = shutil.which("gs")


def _get_write_target(convert) -> Any:
    target = convert
    if callable(convert):
        with NamedTemporaryFile(suffix=".pdf", delete=False) as temporary:
            target = temporary.name
    return target


def test_writer_exception_non_binary(tmp_path, caplog):
    src = RESOURCE_ROOT / "pdflatex-outline.pdf"

    reader = PdfReader(src)
    writer = PdfWriter()
    writer.add_page(reader.pages[0])

    with open(tmp_path / "out.txt", "w") as fp, pytest.raises(TypeError):
        writer.write_stream(fp)
    ending = "to write to is not in binary mode. It may not be written to correctly.\n"
    assert caplog.text.endswith(ending)


def test_writer_clone():
    src = RESOURCE_ROOT / "pdflatex-outline.pdf"

    reader = PdfReader(src)
    writer = PdfWriter(clone_from=reader)
    assert len(writer.pages) == 4
    assert "PageObject" in str(type(writer.pages[0]))

    writer = PdfWriter(clone_from=src)
    assert len(writer.pages) == 4
    assert "PageObject" in str(type(writer.pages[0]))


def test_clone_metadata():
    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
    reader = PdfReader(src)

    writer = PdfWriter(clone_from=reader)
    writer.add_metadata({"/foo": "bar"})
    assert writer.metadata == {
        **reader.metadata,
        "/foo": "bar",
    }

    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.add_metadata({"/foo": "bar"})
    assert writer.metadata == {
        **reader.metadata,
        "/foo": "bar",
    }
    writer.metadata = None
    writer.add_metadata({"/foo": "bar"})
    assert writer.metadata == {"/foo": "bar"}

    writer = PdfWriter()
    writer.clone_reader_document_root(reader)
    writer.add_metadata({"/foo": "bar"})
    assert writer.metadata == {"/foo": "bar"}


def test_writer_clone_bookmarks():
    # Arrange
    src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf"
    reader = PdfReader(src)
    writer = PdfWriter()

    # Act + test cat
    cat = ""

    def cat1(p) -> None:
        nonlocal cat
        cat += p.__repr__()

    writer.clone_document_from_reader(reader, cat1)
    assert "/Page" in cat
    assert writer.pages[0].raw_get("/Parent") == writer._pages
    writer.add_outline_item("Page 1", 0)
    writer.add_outline_item("Page 2", 1)

    # Assert
    bytes_stream = BytesIO()
    writer.write(bytes_stream)
    bytes_stream.seek(0)
    reader2 = PdfReader(bytes_stream)
    assert len(reader2.pages) == len(reader.pages)
    assert len(reader2.outline) == 2

    # test with append
    writer = PdfWriter()
    writer.append(reader)
    writer.add_outline_item("Page 1", 0)
    writer.add_outline_item("Page 2", 1)

    # Assert
    bytes_stream = BytesIO()
    writer.write(bytes_stream)
    bytes_stream.seek(0)
    reader2 = PdfReader(bytes_stream)
    assert len(reader2.pages) == len(reader.pages)
    assert len(reader2.outline) == 2


def writer_operate(writer: PdfWriter) -> None:
    """
    To test the writer that initialized by each of the four usages.

    Args:
        writer: A PdfWriter object

    """
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf"

    reader = PdfReader(pdf_path)
    reader_outline = PdfReader(pdf_outline_path)

    page = reader.pages[0]
    with pytest.raises(PageSizeNotDefinedError) as exc:
        writer.add_blank_page()
    assert exc.value.args == ()
    writer.insert_page(page, 1)
    writer.insert_page(reader_outline.pages[0], 0)
    writer.add_outline_item_destination(page)
    writer.remove_links()
    writer.add_outline_item_destination(page)
    oi = writer.add_outline_item(
        "An outline item", 0, None, (255, 0, 15), True, True, Fit.fit_box_vertically(10)
    )
    writer.add_outline_item(
        "The XYZ fit", 0, oi, (255, 0, 15), True, True, Fit.xyz(left=10, top=20, zoom=3)
    )
    writer.add_outline_item(
        "The XYZ fit no args", 0, oi, (255, 0, 15), True, True, Fit.xyz()
    )
    writer.add_outline_item(
        "The FitH fit", 0, oi, (255, 0, 15), True, True, Fit.fit_horizontally(top=10)
    )
    writer.add_outline_item(
        "The FitV fit", 0, oi, (255, 0, 15), True, True, Fit.fit_vertically(left=10)
    )
    writer.add_outline_item(
        "The FitR fit",
        0,
        oi,
        (255, 0, 15),
        True,
        True,
        Fit.fit_rectangle(left=10, bottom=20, right=30, top=40),
    )
    writer.add_outline_item(
        "The FitB fit", 0, oi, (255, 0, 15), True, True, Fit.fit_box()
    )
    writer.add_outline_item(
        "The FitBH fit",
        0,
        oi,
        (255, 0, 15),
        True,
        True,
        Fit.fit_box_horizontally(top=10),
    )
    writer.add_outline_item(
        "The FitBV fit",
        0,
        oi,
        (255, 0, 15),
        True,
        True,
        Fit.fit_box_vertically(left=10),
    )
    writer.add_blank_page()
    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
    writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100]))
    writer.add_annotation(
        page_number=2,
        annotation=Link(target_page_index=1, rect=RectangleObject([0, 0, 100, 100])),
    )
    assert writer._get_page_layout() is None
    writer.page_layout = "broken"
    assert writer.page_layout == "broken"
    writer.page_layout = NameObject("/SinglePage")
    assert writer._get_page_layout() == "/SinglePage"
    assert writer._get_page_mode() is None
    writer.page_mode = "/UseNone"
    assert writer._get_page_mode() == "/UseNone"
    writer.page_mode = NameObject("/UseOC")
    assert writer._get_page_mode() == "/UseOC"
    writer.insert_blank_page(width=100, height=100)
    page = writer.insert_blank_page(width=100)
    assert page.mediabox.height == 100
    page = writer.insert_blank_page(height=100)
    assert page.mediabox.width == 100
    writer.insert_blank_page()  # without parameters

    writer.remove_images()

    writer.add_metadata(reader.metadata)
    writer.add_metadata({"/Author": "Martin Thoma"})
    writer.add_metadata({"/MyCustom": 1234})

    writer.add_attachment("foobar.gif", b"foobarcontent")

    # Check that every key in _idnum_hash is correct
    objects_hash = [o.hash_value() for o in writer._objects]
    for k, v in writer._idnum_hash.items():
        assert v.pdf == writer
        assert k in objects_hash, f"Missing {v}"


def test_insert_blank_page():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")

    old_page_count = len(writer.pages)

    old_page = writer.pages[0]
    page = writer.insert_blank_page(index=0)
    assert len(writer.pages) == old_page_count + 1
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == old_page.mediabox.height

    old_page = writer.pages[0]
    page = writer.insert_blank_page(width=10, index=0)
    assert len(writer.pages) == old_page_count + 2
    assert page.mediabox.width == 10
    assert page.mediabox.height == old_page.mediabox.height

    old_page = writer.pages[0]
    page = writer.insert_blank_page(width=-10, index=0)
    assert len(writer.pages) == old_page_count + 3
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == old_page.mediabox.height

    old_page = writer.pages[0]
    page = writer.insert_blank_page(height=20, index=0)
    assert len(writer.pages) == old_page_count + 4
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == 20

    old_page = writer.pages[0]
    page = writer.insert_blank_page(height=-20, index=0)
    assert len(writer.pages) == old_page_count + 5
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == old_page.mediabox.height

    page = writer.insert_blank_page(width=30, height=40, index=0)
    assert len(writer.pages) == old_page_count + 6
    assert page.mediabox.width == 30
    assert page.mediabox.height == 40

    old_page = writer.pages[0]
    page = writer.insert_blank_page(width=-30, height=-40, index=0)
    assert len(writer.pages) == old_page_count + 7
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == old_page.mediabox.height

    page = writer.insert_blank_page(width=50, height=60, index=len(writer.pages))
    assert len(writer.pages) == old_page_count + 8
    assert page.mediabox.width == 50
    assert page.mediabox.height == 60

    old_page = writer.pages[0]
    page = writer.insert_blank_page(width=-50, height=-60, index=-len(writer.pages))
    assert len(writer.pages) == old_page_count + 9
    assert page.mediabox.width == old_page.mediabox.width
    assert page.mediabox.height == old_page.mediabox.height

    page = writer.insert_blank_page(width=70, height=80, index=len(writer.pages) // 2)
    assert len(writer.pages) == old_page_count + 10
    assert page.mediabox.width == 70
    assert page.mediabox.height == 80

    page = writer.insert_blank_page(width=70, height=80, index=-len(writer.pages) // 2)
    assert len(writer.pages) == old_page_count + 11
    assert page.mediabox.width == 70
    assert page.mediabox.height == 80

    num_pages = len(writer.pages)

    with pytest.raises(
        IndexError,
        match=re.escape(f"Index should be in range [-{num_pages}, {num_pages}]"),
    ):
        page = writer.insert_blank_page(width=90, height=100, index=len(writer.pages) + 1)

    with pytest.raises(
        IndexError,
        match=re.escape(f"Index should be in range [-{num_pages}, {num_pages}]"),
    ):
        page = writer.insert_blank_page(width=-90, height=-100, index=-len(writer.pages) - 1)


@pytest.mark.parametrize(
    ("convert", "needs_cleanup"),
    [
        (str, True),
        (Path, True),
        (BytesIO(), False),
    ],
)
def test_writer_operations_by_traditional_usage(convert, needs_cleanup):
    write_data_here = _get_write_target(convert)
    writer = PdfWriter()
    writer_operate(writer)

    # finally, write "output" to pypdf-output.pdf
    if needs_cleanup:
        with open(write_data_here, "wb") as output_stream:
            writer.write(output_stream)
    else:
        output_stream = write_data_here
        writer.write(output_stream)

    if needs_cleanup:
        Path(write_data_here).unlink()


@pytest.mark.parametrize(
    ("convert", "needs_cleanup"),
    [
        (str, True),
        (Path, True),
        (BytesIO(), False),
    ],
)
def test_writer_operations_by_semi_traditional_usage(convert, needs_cleanup):
    write_data_here = _get_write_target(convert)

    with PdfWriter() as writer:
        writer_operate(writer)

        # finally, write "output" to pypdf-output.pdf
        if needs_cleanup:
            with open(write_data_here, "wb") as output_stream:
                writer.write(output_stream)
        else:
            output_stream = write_data_here
            writer.write(output_stream)

    if needs_cleanup:
        Path(write_data_here).unlink()


@pytest.mark.parametrize(
    ("convert", "needs_cleanup"),
    [
        (str, True),
        (Path, True),
        (BytesIO(), False),
    ],
)
def test_writer_operations_by_semi_new_traditional_usage(convert, needs_cleanup):
    write_data_here = _get_write_target(convert)

    with PdfWriter() as writer:
        writer_operate(writer)

        # finally, write "output" to pypdf-output.pdf
        writer.write(write_data_here)

    if needs_cleanup:
        Path(write_data_here).unlink()


@pytest.mark.parametrize(
    ("convert", "needs_cleanup"),
    [
        (str, True),
        (Path, True),
        (BytesIO(), False),
    ],
)
def test_writer_operation_by_new_usage(convert, needs_cleanup):
    write_data_here = _get_write_target(convert)

    # This includes write "output" to pypdf-output.pdf
    with PdfWriter(write_data_here) as writer:
        writer_operate(writer)

    if needs_cleanup:
        Path(write_data_here).unlink()


@pytest.mark.parametrize(
    "input_path",
    [
        "side-by-side-subfig.pdf",
        "reportlab-inline-image.pdf",
    ],
)
def test_remove_images(pdf_file_path, input_path):
    pdf_path = RESOURCE_ROOT / input_path

    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    page = reader.pages[0]
    writer.insert_page(page, 0)
    writer.remove_images()
    page_contents_stream = writer.pages[0]["/Contents"]._data
    assert len(page_contents_stream.strip())

    # finally, write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    with open(pdf_file_path, "rb") as input_stream:
        reader = PdfReader(input_stream)
        if input_path == "side-by-side-subfig.pdf":
            extracted_text = reader.pages[0].extract_text()
            assert extracted_text
            assert "Lorem ipsum dolor sit amet" in extracted_text


@pytest.mark.enable_socket
def test_remove_images_sub_level():
    """Cf #2035"""
    url = "https://github.com/py-pdf/pypdf/files/12394781/2210.03142-1.pdf"
    name = "iss2103.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    writer.remove_images()
    assert (
        len(
            [
                o.get_object()
                for o in writer.pages[0]["/Resources"]["/XObject"]["/Fm1"][
                    "/Resources"
                ]["/XObject"]["/Im1"]["/Resources"]["/XObject"].values()
                if not isinstance(o.get_object(), NullObject)
            ]
        )
        == 0
    )


@pytest.mark.parametrize(
    "input_path",
    [
        "side-by-side-subfig.pdf",
        "reportlab-inline-image.pdf",
    ],
)
def test_remove_text(input_path, pdf_file_path):
    pdf_path = RESOURCE_ROOT / input_path

    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    page = reader.pages[0]
    writer.insert_page(page, 0)
    writer.remove_text()

    # finally, write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_remove_text_all_operators(pdf_file_path):
    stream = (
        b"BT "
        b"/F0 36 Tf "
        b"50 706 Td "
        b"36 TL "
        b"(The Tj operator) Tj "
        b'1 2 (The double quote operator) " '
        b"(The single quote operator) ' "
        b"ET"
    )
    pdf_data = (
        b"%%PDF-1.7\n"
        b"1 0 obj << /Count 1 /Kids [5 0 R] /Type /Pages >> endobj\n"
        b"2 0 obj << >> endobj\n"
        b"3 0 obj << >> endobj\n"
        b"4 0 obj << /Length %d >>\n"
        b"stream\n" + (b"%s\n" % stream) + b"endstream\n"
        b"endobj\n"
        b"5 0 obj << /Contents 4 0 R /CropBox [0.0 0.0 2550.0 3508.0]\n"
        b" /MediaBox [0.0 0.0 2550.0 3508.0] /Parent 1 0 R"
        b" /Resources << /Font << >> >>"
        b" /Rotate 0 /Type /Page >> endobj\n"
        b"6 0 obj << /Pages 1 0 R /Type /Catalog >> endobj\n"
        b"xref 1 6\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"%010d 00000 n\n"
        b"trailer << /Root 6 0 R /Size 6 >>\n"
        b"startxref\n%d\n"
        b"%%%%EOF"
    )
    startx_correction = -1
    pdf_data = pdf_data % (
        len(stream),
        pdf_data.find(b"1 0 obj") + startx_correction,
        pdf_data.find(b"2 0 obj") + startx_correction,
        pdf_data.find(b"3 0 obj") + startx_correction,
        pdf_data.find(b"4 0 obj") + startx_correction,
        pdf_data.find(b"5 0 obj") + startx_correction,
        pdf_data.find(b"6 0 obj") + startx_correction,
        # startx_correction should be -1 due to double % at the beginning
        # inducing an error on startxref computation
        pdf_data.find(b"xref"),
    )
    pdf_stream = BytesIO(pdf_data)

    reader = PdfReader(pdf_stream, strict=False)
    writer = PdfWriter()

    page = reader.pages[0]
    writer.insert_page(page, 0)
    writer.remove_text()

    # finally, write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_write_metadata(pdf_file_path):
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"

    reader = PdfReader(pdf_path)
    writer = PdfWriter()

    writer.add_page(reader.pages[0])
    for page in reader.pages:
        writer.add_page(page)

    metadata = reader.metadata
    writer.add_metadata(metadata)

    writer.add_metadata({"/Title": "The Crazy Ones"})

    # finally, write data to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    # Check if the title was set
    reader = PdfReader(pdf_file_path)
    metadata = reader.metadata
    assert metadata.get("/Title") == "The Crazy Ones"


def test_fill_form(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
    writer = PdfWriter()

    writer.append(reader, [0])
    writer.append(RESOURCE_ROOT / "crazyones.pdf", [0])

    writer.update_page_form_field_values(
        writer.pages[0], {"foo": "some filled in text"}, flags=1, flatten=True
    )

    # check if no fields to fill in the page
    writer.update_page_form_field_values(
        writer.pages[1], {"foo": "some filled in text"}, flags=1, flatten=True
    )

    writer.update_page_form_field_values(
        writer.pages[0], {"foo": "some filled in text"}
    )

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_fill_form_with_qualified():
    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
    reader.add_form_topname("top")

    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.add_page(reader.pages[0])
    writer.update_page_form_field_values(
        writer.pages[0], {"top.foo": "filling"}, flags=1
    )
    b = BytesIO()
    writer.write(b)

    reader2 = PdfReader(b)
    fields = reader2.get_fields()
    assert fields["top.foo"]["/V"] == "filling"


@pytest.mark.parametrize(
    ("use_128bit", "user_password", "owner_password"),
    [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")],
)
def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "form.pdf")
    writer = PdfWriter()

    page = reader.pages[0]
    orig_text = page.extract_text()

    writer.add_page(page)

    writer.encrypt(
        owner_password=owner_password,
        user_password=user_password,
        use_128bit=use_128bit,
    )
    writer.encrypt(
        user_password=user_password,
        owner_password=owner_password,
        use_128bit=use_128bit,
    )

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)

    # Test that the data is not there in clear text
    with open(pdf_file_path, "rb") as input_stream:
        data = input_stream.read()
    assert b"foo" not in data

    # Test the user password (str):
    reader = PdfReader(pdf_file_path, password="userpwd")
    new_text = reader.pages[0].extract_text()
    assert reader.metadata.get("/Producer") == "pypdf"
    assert new_text == orig_text

    # Test the owner password (str):
    reader = PdfReader(pdf_file_path, password="ownerpwd")
    new_text = reader.pages[0].extract_text()
    assert reader.metadata.get("/Producer") == "pypdf"
    assert new_text == orig_text

    # Test the user password (bytes):
    reader = PdfReader(pdf_file_path, password=b"userpwd")
    new_text = reader.pages[0].extract_text()
    assert reader.metadata.get("/Producer") == "pypdf"
    assert new_text == orig_text

    # Test the owner password (bytes):
    reader = PdfReader(pdf_file_path, password=b"ownerpwd")
    new_text = reader.pages[0].extract_text()
    assert reader.metadata.get("/Producer") == "pypdf"
    assert new_text == orig_text


def test_add_outline_item(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
    writer = PdfWriter()

    for page in reader.pages:
        writer.add_page(page)

    outline_item = writer.add_outline_item(
        "An outline item",
        1,
        None,
        (255, 0, 15),
        True,
        True,
        Fit.fit(),
        is_open=False,
    )
    _o2a = writer.add_outline_item(
        "Another", 2, outline_item, None, False, False, Fit.fit()
    )
    _o2b = writer.add_outline_item(
        "Another bis", 2, outline_item, None, False, False, Fit.fit()
    )
    outline_item2 = writer.add_outline_item(
        "An outline item 2",
        1,
        None,
        (255, 0, 15),
        True,
        True,
        Fit.fit(),
        is_open=True,
    )
    _o3a = writer.add_outline_item(
        "Another 2", 2, outline_item2, None, False, False, Fit.fit()
    )
    _o3b = writer.add_outline_item(
        "Another 2bis", 2, outline_item2, None, False, False, Fit.fit()
    )

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "w+b") as output_stream:
        writer.write(output_stream)
        output_stream.seek(0)
        reader = PdfReader(output_stream)
        assert reader.trailer["/Root"]["/Outlines"]["/Count"] == 3
        assert reader.outline[0]["/Count"] == -2
        assert reader.outline[0]["/%is_open%"] == False  # noqa: E712
        assert reader.outline[2]["/Count"] == 2
        assert reader.outline[2]["/%is_open%"] == True  # noqa: E712
        assert reader.outline[1][0]["/Count"] == 0


def test_add_named_destination(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
    writer = PdfWriter()
    assert writer.get_named_dest_root() == []

    for page in reader.pages:
        writer.add_page(page)

    assert writer.get_named_dest_root() == []

    writer.add_named_destination(TextStringObject("A named dest"), 2)
    writer.add_named_destination(TextStringObject("A named dest2"), 2)
    writer.add_named_destination(TextStringObject("A named dest3"), page_number=2)
    writer.add_named_destination(TextStringObject("A named dest3"), page_number=2)

    root = writer.get_named_dest_root()
    assert root[0] == "A named dest"
    assert root[1].pdf == writer
    assert root[1].get_object()["/S"] == NameObject("/GoTo")
    assert root[1].get_object()["/D"][0] == writer.pages[2].indirect_reference
    assert root[2] == "A named dest2"
    assert root[3].pdf == writer
    assert root[3].get_object()["/S"] == NameObject("/GoTo")
    assert root[3].get_object()["/D"][0] == writer.pages[2].indirect_reference
    assert root[4] == "A named dest3"

    # test get_object
    assert writer.get_object(root[1].idnum) == writer.get_object(root[1])
    with pytest.raises(ValueError) as exc:
        writer.get_object(reader.pages[0].indirect_reference)
    assert exc.value.args[0] == "PDF must be self"

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_add_named_destination_sort_order(pdf_file_path):
    """
    Issue #1927 does not appear.

    add_named_destination() maintains the named destination list sort order
    """
    writer = PdfWriter()

    assert writer.get_named_dest_root() == []

    writer.add_blank_page(200, 200)
    writer.add_named_destination("b", 0)
    # "a" should be moved before "b" on insert
    writer.add_named_destination("a", 0)

    root = writer.get_named_dest_root()

    assert len(root) == 4
    assert (
        root[0] == "a"
    ), '"a" was not inserted before "b" in the named destination root'
    assert root[2] == "b"

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_add_uri(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
    writer = PdfWriter()

    for page in reader.pages:
        writer.add_page(page)

    writer.add_uri(
        1,
        "http://www.example.com",
        RectangleObject([0, 0, 100, 100]),
        border=[1, 2, 3, [4]],
    )
    writer.add_uri(
        2,
        "https://pypdf.readthedocs.io/en/latest/",
        RectangleObject([20, 30, 50, 80]),
        border=[1, 2, 3],
    )
    writer.add_uri(
        3,
        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
        "[ 200 300 250 350 ]",
        border=[0, 0, 0],
    )
    writer.add_uri(
        3,
        "https://pypdf.readthedocs.io/en/latest/user/adding-pdf-annotations.html",
        [100, 200, 150, 250],
        border=[0, 0, 0],
    )

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_link_annotation(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf")
    writer = PdfWriter()

    for page in reader.pages:
        writer.add_page(page)

    writer.add_annotation(
        page_number=1,
        annotation=Link(
            target_page_index=2,
            rect=RectangleObject(
                [0, 0, 100, 100],
            ),
            border=[1, 2, 3, [4]],
            fit=Fit.fit(),
        ),
    )
    writer.add_annotation(
        page_number=2,
        annotation=Link(
            target_page_index=3,
            rect=RectangleObject(
                [0, 0, 100, 100],
            ),
            border=[1, 2, 3],
            fit=Fit.fit_horizontally(),
        ),
    )
    writer.add_annotation(
        page_number=3,
        annotation=Link(
            target_page_index=0,
            rect=RectangleObject(
                [200, 300, 250, 350],
            ),
            border=[0, 0, 0],
            fit=Fit.xyz(left=0, top=0, zoom=2),
        ),
    )
    writer.add_annotation(
        page_number=3,
        annotation=Link(
            target_page_index=0,
            rect=RectangleObject([100, 200, 150, 250]),
            border=[0, 0, 0],
        ),
    )

    # write "output" to pypdf-output.pdf
    with open(pdf_file_path, "wb") as output_stream:
        writer.write(output_stream)


def test_io_streams():
    """This is the example from the docs ("Streaming data")."""
    filepath = RESOURCE_ROOT / "pdflatex-outline.pdf"
    with open(filepath, "rb") as fh:
        bytes_stream = BytesIO(fh.read())

    # Read from bytes stream
    reader = PdfReader(bytes_stream)
    assert len(reader.pages) == 4

    # Write to bytes stream
    writer = PdfWriter()
    with BytesIO() as output_stream:
        writer.write(output_stream)


def test_regression_issue670(pdf_file_path):
    filepath = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(filepath, strict=False)
    for _ in range(2):
        writer = PdfWriter()
        writer.add_page(reader.pages[0])
        with open(pdf_file_path, "wb") as f_pdf:
            writer.write(f_pdf)


def test_issue301():
    """Test with invalid stream length object."""
    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
        reader = PdfReader(f)
        writer = PdfWriter()
        writer.append_pages_from_reader(reader)
        b = BytesIO()
        writer.write(b)


def test_append_pages_from_reader_append():
    """Use append_pages_from_reader with a callable."""
    with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f:
        reader = PdfReader(f)
        writer = PdfWriter()
        writer.append_pages_from_reader(reader, callable)
        b = BytesIO()
        writer.write(b)


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_sweep_indirect_references_nullobject_exception(pdf_file_path):
    # TODO: Check this more closely... this looks weird
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "tika-924666.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)


@pytest.mark.enable_socket
@pytest.mark.slow
@pytest.mark.parametrize(
    ("url", "name"),
    [
        (
            "https://github.com/user-attachments/files/18381699/tika-924666.pdf",
            "test_sweep_indirect_references_nullobject_exception.pdf",
        ),
        (
            "https://github.com/user-attachments/files/18381694/tika-922840.pdf",
            "test_write_outline_item_on_page_fitv.pdf",
        ),
        ("https://github.com/py-pdf/pypdf/files/10715624/test.pdf", "iss1627.pdf"),
    ],
)
@pytest.mark.filterwarnings("ignore::DeprecationWarning")
def test_some_appends(pdf_file_path, url, name):
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    merger = PdfWriter()
    merger.append(reader)
    merger.write(pdf_file_path)


def test_pdf_header():
    writer = PdfWriter()
    assert writer.pdf_header == "%PDF-1.3"

    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    writer.add_page(reader.pages[0])
    assert writer.pdf_header == "%PDF-1.5"

    writer.pdf_header = b"%PDF-1.6"
    assert writer.pdf_header == "%PDF-1.6"


def test_write_dict_stream_object(pdf_file_path):
    stream = (
        b"BT "
        b"/F0 36 Tf "
        b"50 706 Td "
        b"36 TL "
        b"(The Tj operator) Tj "
        b'1 2 (The double quote operator) " '
        b"(The single quote operator) ' "
        b"ET"
    )

    stream_object = StreamObject()
    stream_object[NameObject("/Type")] = NameObject("/Text")
    stream_object._data = stream

    writer = PdfWriter()

    page_object = PageObject.create_blank_page(writer, 1000, 1000)
    # Construct dictionary object (PageObject) with stream object
    # Writer will replace this stream object with indirect object
    page_object[NameObject("/Test")] = stream_object

    page_object = writer.add_page(page_object)
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)

    for k, v in page_object.items():
        if k == "/Test":
            assert repr(v) != repr(stream_object)
            assert isinstance(v, IndirectObject)
            assert str(v) == str(stream_object)  # expansion of IndirectObjects
            assert str(v.get_object()) == str(stream_object)
            break
    else:
        pytest.fail("/Test not found")

    # Check that every key in _idnum_hash is correct
    objects_hash = [o.hash_value() for o in writer._objects]
    for k, v in writer._idnum_hash.items():
        assert v.pdf == writer
        assert k in objects_hash, f"Missing {v}"


def test_add_single_annotation(pdf_file_path):
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    page = reader.pages[0]
    writer = PdfWriter()
    writer.add_page(page)

    annot_dict = {
        "/Type": "/Annot",
        "/Subtype": "/Text",
        "/Rect": [270.75, 596.25, 294.75, 620.25],
        "/Contents": "Note in second paragraph",
        "/C": [1, 1, 0],
        "/M": "D:20220406191858+02'00",
        "/Popup": {
            "/Type": "/Annot",
            "/Subtype": "/Popup",
            "/Rect": [294.75, 446.25, 494.75, 596.25],
            "/M": "D:20220406191847+02'00",
        },
        "/T": "moose",
    }
    writer.add_annotation(0, annot_dict)

    # Inspect manually by adding 'assert False' and viewing the PDF
    with open(pdf_file_path, "wb") as fp:
        writer.write(fp)


@pytest.mark.samples
def test_colors_in_outline_item(pdf_file_path):
    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255)
    writer.add_outline_item("First Outline Item", page_number=2, color="800080")
    writer.add_outline_item("Second Outline Item", page_number=3, color="#800080")
    writer.add_outline_item("Third Outline Item", page_number=4, color=purple_rgb)

    with open(pdf_file_path, "wb") as f:
        writer.write(f)

    reader2 = PdfReader(pdf_file_path)
    for outline_item in reader2.outline:
        # convert float to string because of mutability
        assert [f"{c:.5f}" for c in outline_item.color] == [
            f"{p:.5f}" for p in purple_rgb
        ]


@pytest.mark.samples
def test_write_empty_stream():
    reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf")
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)

    with pytest.raises(ValueError) as exc:
        writer.write("")
    assert exc.value.args[0] == "Output(stream='') is empty."


def test_startup_dest():
    pdf_file_writer = PdfWriter()
    pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf"))

    assert pdf_file_writer.open_destination is None
    pdf_file_writer.open_destination = pdf_file_writer.pages[9]
    # checked also using Acrobrat to verify the good page is opened
    op = pdf_file_writer.root_object["/OpenAction"]
    assert op[0] == pdf_file_writer.pages[9].indirect_reference
    assert op[1] == "/Fit"
    op = pdf_file_writer.open_destination
    assert op.raw_get("/Page") == pdf_file_writer.pages[9].indirect_reference
    assert op["/Type"] == "/Fit"
    pdf_file_writer.open_destination = op
    assert pdf_file_writer.open_destination == op

    # irrelevant, just for coverage
    pdf_file_writer.root_object[NameObject("/OpenAction")][0] = NumberObject(0)
    pdf_file_writer.open_destination
    with pytest.raises(Exception) as exc:
        del pdf_file_writer.root_object[NameObject("/OpenAction")][0]
        pdf_file_writer.open_destination
    assert "Invalid Destination" in str(exc.value)

    pdf_file_writer.open_destination = "Test"
    # checked also using Acrobrat to verify open_destination
    op = pdf_file_writer.root_object["/OpenAction"]
    assert isinstance(op, TextStringObject)
    assert op == "Test"
    op = pdf_file_writer.open_destination
    assert isinstance(op, TextStringObject)
    assert op == "Test"

    # irrelevant, this is just for coverage
    pdf_file_writer.root_object[NameObject("/OpenAction")] = NumberObject(0)
    assert pdf_file_writer.open_destination is None
    pdf_file_writer.open_destination = None
    assert "/OpenAction" not in pdf_file_writer.root_object
    pdf_file_writer.open_destination = None


@pytest.mark.enable_socket
def test_iss471():
    url = "https://github.com/py-pdf/pypdf/files/9139245/book.pdf"
    name = "book_471.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    writer = PdfWriter()
    writer.append(reader, excluded_fields=[])
    assert isinstance(
        writer.pages[0]["/Annots"][0].get_object()["/Dest"], TextStringObject
    )


@pytest.mark.enable_socket
def test_reset_translation():
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "tika-924666.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader, (0, 10))
    nb = len(writer._objects)
    writer.append(reader, (0, 10))
    assert (
        len(writer._objects) == nb + 11
    )  # +10 (pages) +1 because of the added outline
    nb += 1
    writer.reset_translation(reader)
    writer.append(reader, (0, 10))
    assert len(writer._objects) >= nb + 200
    nb = len(writer._objects)
    writer.reset_translation(reader.pages[0].indirect_reference)
    writer.append(reader, (0, 10))
    assert len(writer._objects) >= nb + 200
    nb = len(writer._objects)
    writer.reset_translation()
    writer.append(reader, (0, 10))
    assert len(writer._objects) >= nb + 200
    nb = len(writer.pages)
    writer.append(reader, [reader.pages[0], reader.pages[0]])
    assert len(writer.pages) == nb + 2


def test_threads_empty():
    writer = PdfWriter()
    thr = writer.threads
    assert isinstance(thr, ArrayObject)
    assert len(thr) == 0
    thr2 = writer.threads
    assert thr == thr2


@pytest.mark.enable_socket
def test_append_without_annots_and_articles():
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "tika-924666.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader, None, (0, 10), True, ["/B"])
    writer.reset_translation()
    writer.append(reader, (0, 10), True, ["/B"])
    assert writer.threads == []
    writer = PdfWriter()
    writer.append(reader, None, (0, 10), True, ["/Annots"])
    assert "/Annots" not in writer.pages[5]
    writer = PdfWriter()
    writer.append(reader, None, (0, 10), True, [])
    assert "/Annots" in writer.pages[5]
    assert len(writer.threads) >= 1


@pytest.mark.enable_socket
def test_append_multiple():
    url = "https://github.com/user-attachments/files/18381699/tika-924666.pdf"
    name = "tika-924666.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(
        reader, [0, 0, 0]
    )  # to demonstre multiple insertion of same page at once
    writer.append(reader, [0, 0, 0])  # second pack
    pages = writer.root_object["/Pages"]["/Kids"]
    assert pages[0] not in pages[1:]  # page not repeated
    assert pages[-1] not in pages[0:-1]  # page not repeated


@pytest.mark.samples
def test_set_page_label(pdf_file_path):
    src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"  # File without labels
    reader = PdfReader(src)

    expected = [
        "i",
        "ii",
        "1",
        "2",
        "A",
        "B",
        "1",
        "2",
        "3",
        "4",
        "A",
        "i",
        "I",
        "II",
        "1",
        "2",
        "3",
        "I",
        "II",
    ]

    # Tests full length with labels assigned at first and last elements
    # Tests different labels assigned to consecutive ranges
    writer = PdfWriter(reader, full=True)
    writer.set_page_label(0, 1, "/r")
    writer.set_page_label(4, 5, "/A")
    writer.set_page_label(10, 10, "/A")
    writer.set_page_label(11, 11, "/r")
    writer.set_page_label(12, 13, "/R")
    writer.set_page_label(17, 18, "/R")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels == expected

    writer = PdfWriter()  # Same labels, different set order
    writer.clone_document_from_reader(reader)
    writer.set_page_label(17, 18, "/R")
    writer.set_page_label(4, 5, "/A")
    writer.set_page_label(10, 10, "/A")
    writer.set_page_label(0, 1, "/r")
    writer.set_page_label(12, 13, "/R")
    writer.set_page_label(11, 11, "/r")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels == expected

    # Tests labels assigned only in the middle
    # Tests label assigned to a range already containing labelled ranges
    expected = ["1", "2", "i", "ii", "iii", "iv", "v", "1"]
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.set_page_label(3, 4, "/a")
    writer.set_page_label(5, 5, "/A")
    writer.set_page_label(2, 6, "/r")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected

    # Tests labels assigned inside a previously existing range
    expected = ["1", "2", "i", "a", "b", "A", "1", "1", "2"]
    # Ones repeat because user did not cover the entire original range
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.set_page_label(2, 6, "/r")
    writer.set_page_label(3, 4, "/a")
    writer.set_page_label(5, 5, "/A")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected

    # Tests invalid user input
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    with pytest.raises(
        ValueError, match="At least one of style and prefix must be given"
    ):
        writer.set_page_label(0, 5, start=2)
    with pytest.raises(
        ValueError, match="page_index_from must be greater or equal than 0"
    ):
        writer.set_page_label(-1, 5, "/r")
    with pytest.raises(
        ValueError, match="page_index_to must be greater or equal than page_index_from"
    ):
        writer.set_page_label(5, 0, "/r")
    with pytest.raises(ValueError, match="page_index_to exceeds number of pages"):
        writer.set_page_label(0, 19, "/r")
    with pytest.raises(
        ValueError, match="If given, start must be greater or equal than one"
    ):
        writer.set_page_label(0, 5, "/r", start=-1)

    pdf_file_path.unlink()

    src = (
        SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf"
    )  # File with pre existing labels
    reader = PdfReader(src)

    # Tests adding labels to existing ones
    expected = ["i", "ii", "A", "B", "1"]
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.set_page_label(2, 3, "/A")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected

    # Tests replacing existing labels
    expected = ["A", "B", "1", "1", "2"]
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.set_page_label(0, 1, "/A")
    writer.write(pdf_file_path)
    assert PdfReader(pdf_file_path).page_labels[: len(expected)] == expected

    pdf_file_path.unlink()

    # Tests prefix and start.
    src = RESOURCE_ROOT / "issue-604.pdf"  # File without page labels
    reader = PdfReader(src)
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)

    writer.set_page_label(0, 0, prefix="FRONT")
    writer.set_page_label(1, 2, "/D", start=2)
    writer.set_page_label(3, 6, prefix="UPDATES")
    writer.set_page_label(7, 10, "/D", prefix="THYR-")
    writer.set_page_label(11, 21, "/D", prefix="PAP-")
    writer.set_page_label(22, 30, "/D", prefix="FOLL-")
    writer.set_page_label(31, 39, "/D", prefix="HURT-")
    writer.write(pdf_file_path)


@pytest.mark.enable_socket
def test_iss1601():
    url = "https://github.com/py-pdf/pypdf/files/10579503/badges-38.pdf"
    name = "badge-38.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    original_cs_operations = ContentStream(
        reader.pages[0].get_contents(), reader
    ).operations
    writer = PdfWriter()
    page_1 = writer.add_blank_page(
        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
    )
    page_1.merge_transformed_page(reader.pages[0], Transformation())
    page_1_cs_operations = page_1.get_contents().operations
    assert is_sublist(original_cs_operations, page_1_cs_operations)
    page_1 = writer.add_blank_page(
        reader.pages[0].mediabox[2], reader.pages[0].mediabox[3]
    )
    page_1.merge_page(reader.pages[0])
    page_1_cs_operations = page_1.get_contents().operations
    assert is_sublist(original_cs_operations, page_1_cs_operations)


def test_attachments():
    writer = PdfWriter()
    writer.add_blank_page(100, 100)
    b = BytesIO()
    writer.write(b)
    b.seek(0)
    reader = PdfReader(b)
    b = None
    assert reader.attachments == {}
    assert reader._list_attachments() == []
    assert reader._get_attachments() == {}
    to_add = [
        ("foobar.txt", b"foobarcontent"),
        ("foobar2.txt", b"foobarcontent2"),
        ("foobar2.txt", "2nd_foobarcontent"),
    ]
    for name, content in to_add:
        writer.add_attachment(name, content)

    b = BytesIO()
    writer.write(b)
    b.seek(0)
    reader = PdfReader(b)
    b = None
    assert sorted(reader.attachments.keys()) == sorted({name for name, _ in to_add})
    assert str(reader.attachments) == "LazyDict(keys=['foobar.txt', 'foobar2.txt'])"
    assert reader._list_attachments() == [name for name, _ in to_add]

    # We've added the same key twice - hence only 2 and not 3:
    att = reader._get_attachments()
    assert len(att) == 2  # we have 2 keys, but 3 attachments!

    # The content for foobar.txt is clear and just a single value:
    assert att["foobar.txt"] == b"foobarcontent"

    # The content for foobar2.txt is a list!
    att = reader._get_attachments("foobar2.txt")
    assert len(att) == 1
    assert att["foobar2.txt"] == [b"foobarcontent2", b"2nd_foobarcontent"]

    # Let's do both cases with the public interface:
    assert reader.attachments["foobar.txt"][0] == b"foobarcontent"
    assert reader.attachments["foobar2.txt"][0] == b"foobarcontent2"
    assert reader.attachments["foobar2.txt"][1] == b"2nd_foobarcontent"


@pytest.mark.enable_socket
def test_iss1614():
    # test of an annotation(link) directly stored in the /Annots in the page
    url = "https://github.com/py-pdf/pypdf/files/10669995/broke.pdf"
    name = "iss1614.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)
    # test for 2nd error case reported in #1614
    url = "https://github.com/py-pdf/pypdf/files/10696390/broken.pdf"
    name = "iss1614.2.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer.append(reader)


@pytest.mark.enable_socket
def test_new_removes():
    # test of an annotation(link) directly stored in the /Annots in the page
    url = "https://github.com/py-pdf/pypdf/files/10807951/tt.pdf"
    name = "iss1650.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.remove_images()
    b = BytesIO()
    writer.write(b)
    bb = bytes(b.getbuffer())
    assert b"/Im0 Do" not in bb
    assert b"/Fm0 Do" in bb
    assert b" TJ" in bb

    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    writer.remove_text()
    b = BytesIO()
    writer.write(b)
    bb = bytes(b.getbuffer())
    assert b"/Im0" in bb
    assert b"Chap" not in bb
    assert b" TJ" not in bb

    # Test removing text in a specified font
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    b = BytesIO()
    writer.write(b)
    temp_reader = PdfReader(b)
    text = temp_reader.pages[0].extract_text()
    assert "Arbeitsschritt" in text
    assert "Modelltechnik" in text
    writer.remove_text(font_names=["LiberationSans-Bold"])
    b = BytesIO()
    writer.write(b)
    temp_reader = PdfReader(b)
    text = temp_reader.pages[0].extract_text()
    assert "Arbeitsschritt" not in text
    assert "Modelltechnik" in text

    # Test removing text in a specified font that doesn't exist (nothing should happen)
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    b = BytesIO()
    writer.write(b)
    temp_reader = PdfReader(b)
    text = temp_reader.pages[0].extract_text()
    assert "Arbeitsschritt" in text
    assert "Modelltechnik" in text
    writer.remove_text(font_names=["ComicSans-Oblique"])
    b = BytesIO()
    writer.write(b)
    temp_reader = PdfReader(b)
    text = temp_reader.pages[0].extract_text()
    assert "Arbeitsschritt" in text
    assert "Modelltechnik" in text

    url = "https://github.com/py-pdf/pypdf/files/10832029/tt2.pdf"
    name = "GeoBaseWithComments.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer.append(reader)
    writer.remove_objects_from_page(writer.pages[0], [ObjectDeletionFlag.LINKS])
    assert "/Links" not in [
        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
    ]
    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.ATTACHMENTS)
    assert "/FileAttachment" not in [
        a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]
    ]

    writer.pages[0]["/Annots"].append(
        DictionaryObject({NameObject("/Subtype"): TextStringObject("/3D")})
    )
    assert "/3D" in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]
    writer.remove_objects_from_page(writer.pages[0], ObjectDeletionFlag.OBJECTS_3D)
    assert "/3D" not in [a.get_object()["/Subtype"] for a in writer.pages[0]["/Annots"]]

    writer.remove_links()
    assert len(writer.pages[0]["/Annots"]) == 0
    assert len(writer.pages[3]["/Annots"]) == 0

    writer.remove_annotations("/Text")


@pytest.mark.enable_socket
def test_late_iss1654():
    url = "https://github.com/py-pdf/pypdf/files/10935632/bid1.pdf"
    name = "bid1.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.clone_document_from_reader(reader)
    for p in writer.pages:
        p.compress_content_streams()
    b = BytesIO()
    writer.write(b)


@pytest.mark.enable_socket
def test_iss1723():
    # test of an annotation(link) directly stored in the /Annots in the page
    url = "https://github.com/py-pdf/pypdf/files/11015242/inputFile.pdf"
    name = "iss1723.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader, (3, 5))


@pytest.mark.enable_socket
def test_iss1767():
    # test with a pdf which is buggy because the object 389,0 exists 3 times:
    # twice to define catalog and one as an XObject inducing a loop when
    # cloning
    url = "https://github.com/py-pdf/pypdf/files/11138472/test.pdf"
    name = "iss1767.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    PdfWriter(clone_from=reader)


@pytest.mark.enable_socket
def test_named_dest_page_number():
    """
    Closes iss471
    tests appending with named destinations as integers
    """
    url = "https://github.com/py-pdf/pypdf/files/10704333/central.pdf"
    name = "central.pdf"
    writer = PdfWriter()
    writer.add_blank_page(100, 100)
    writer.append(BytesIO(get_data_from_url(url, name=name)), pages=[0, 1, 2])
    assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 2
    assert writer.root_object["/Names"]["/Dests"]["/Names"][-1][0] == (1 + 1)
    writer.append(BytesIO(get_data_from_url(url, name=name)))
    assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 6
    writer2 = PdfWriter()
    writer2.add_blank_page(100, 100)
    dest = writer2.add_named_destination("toto", 0)
    dest.get_object()[NameObject("/D")][0] = NullObject()
    b = BytesIO()
    writer2.write(b)
    b.seek(0)
    writer.append(b)
    assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 6


def test_update_form_fields(caplog, tmp_path):
    write_data_here = tmp_path / "out.pdf"
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
    writer.update_page_form_field_values(
        writer.pages[0],
        {
            "CheckBox1": "/Yes",
            "Text1": "mon Text1",
            "Text2": "ligne1\nligne2",
            "RadioGroup1": "/2",
            "RdoS1": "/",
            "Combo1": "!!monCombo!!",
            "Liste1": "Liste2",
            "Liste2": ["Lst1", "Lst3"],
            "DropList1": "DropListe3",
        },
        auto_regenerate=False,
        flatten=True,
    )
    del writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"]
    del writer.pages[0]["/Resources"]["/Font"]
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
        auto_regenerate=False,
    )
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Text1": None, "Text2": None},
        auto_regenerate=False,
        flatten=True,
    )

    writer.write(write_data_here)
    reader = PdfReader(write_data_here)
    flds = reader.get_fields()
    assert flds["CheckBox1"]["/V"] == "/Yes"
    assert flds["CheckBox1"].indirect_reference.get_object()["/AS"] == "/Yes"
    assert (
        b"(my Text1)"
        in flds["Text1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
    )
    assert flds["Text2"]["/V"] == "ligne1\nligne2\nligne3"
    assert (
        b"(ligne3)"
        in flds["Text2"].indirect_reference.get_object()["/AP"]["/N"].get_data()
    )
    assert flds["RadioGroup1"]["/V"] == "/2"
    assert flds["RadioGroup1"]["/Kids"][0].get_object()["/AS"] == "/Off"
    assert flds["RadioGroup1"]["/Kids"][1].get_object()["/AS"] == "/2"
    assert all(x in flds["Liste2"]["/V"] for x in ["Lst1", "Lst3"])

    assert all(x in flds["CheckBox1"]["/_States_"] for x in ["/Off", "/Yes"])
    assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"])
    assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"])

    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
    writer.add_annotation(
        page_number=0,
        annotation=Link(target_page_index=1, rect=RectangleObject([0, 0, 100, 100])),
    )
    writer.insert_blank_page(100, 100, 0)
    del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DA"]
    del writer.root_object["/AcroForm"]["/Fields"][1].get_object()["/DR"]["/Font"]
    writer.update_page_form_field_values(
        [writer.pages[0], writer.pages[1]],
        {"Text1": "!مرحبا بالعالم", "Text2": "ligne1\nligne2\nligne3"},
        auto_regenerate=False,
    )
    assert b"/Helvetica " in writer.pages[1]["/Annots"][1]["/AP"]["/N"].get_data()
    assert "Text string '!مرحبا بالعالم' contains characters not supported by font encoding." in caplog.text
    writer.update_page_form_field_values(
        None,
        {"Text1": "my Text1", "Text2": "ligne1\nligne2\nligne3"},
        auto_regenerate=False,
        flatten=True
    )

    Path(write_data_here).unlink()


def test_add_apstream_object():
    writer = PdfWriter()
    page = writer.add_blank_page(1000, 1000)
    assert NameObject("/Contents") not in page
    apstream_object = DecodedStreamObject.initialize_from_dictionary(
        {
            NameObject("/Type"): NameObject("/XObject"),
            NameObject("/Subtype"): NameObject("/Form"),
            NameObject("/BBox"): RectangleObject([0.0, 0.0, 10.5, 10.5]),
            "__streamdata__": ByteStringObject(b"BT /F1 12 Tf (Hello World) Tj ET")
        }
    )
    writer._add_object(apstream_object)
    object_name = "AA2342!@#$% ^^##aa:-)"
    x_offset = 200
    y_offset = 200
    writer._add_apstream_object(page, apstream_object, object_name, x_offset, y_offset)
    assert NameObject("/XObject") in page[NameObject("/Resources")]
    assert "/Fm_AA2342__________aa_-_" in page[NameObject("/Resources")][NameObject("/XObject")]
    assert NameObject("/Contents") in page
    contents_obj = page[NameObject("/Contents")]
    stream = contents_obj.get_object()
    assert isinstance(stream, StreamObject)
    assert stream.get_data() == (
        b"q\n1.0000 0.0000 0.0000 1.0000 200.0000 200.0000 cm\n/Fm_AA2342__________aa_-_ Do\nQ"
    )


def test_merge_content_stream_to_page():
    """Test that new content data is correctly added to page contents
    in the form of an ArrayObject or StreamObject. The
    test_add_apstream_object code already correctly checks that
    _merge_content_stream_to_page works for an emtpy page.
    """
    writer = PdfWriter()
    page = writer.add_blank_page(100, 100)
    new_content = b"BT /F1 12 Tf (Hello World) Tj ET"
    # Call the method under test
    writer._merge_content_stream_to_page(page, new_content)
    more_content = b"BT /F1 12 Tf (Hello Again, World) Tj ET"
    writer._merge_content_stream_to_page(page, more_content)
    contents_obj = page[NameObject("/Contents")]
    stream = contents_obj.get_object()
    assert isinstance(stream, StreamObject)
    assert stream.get_data() == b"BT /F1 12 Tf (Hello World) Tj ET\nBT /F1 12 Tf (Hello Again, World) Tj ET"
    new_stream_obj = StreamObject()
    new_stream_obj.set_data(new_content)
    content = ArrayObject()
    content.append(new_stream_obj)
    page[NameObject("/Contents")] = writer._add_object(content)
    writer._merge_content_stream_to_page(page, more_content)
    contents_obj = page[NameObject("/Contents")]
    array = contents_obj.get_object()
    assert isinstance(array, ArrayObject)
    contents = page[NameObject("/Contents")].get_object()
    assert contents[0].get_object().get_data() == new_content
    assert contents[1].get_object().get_data() == more_content


@pytest.mark.enable_socket
def test_update_form_fields2(caplog):
    my_files = {
        "test1": {
            "name": "Test1 Form",
            "url": "https://github.com/py-pdf/pypdf/files/14817365/test1.pdf",
            "path": "iss2234a.pdf",
            "usage": {
                "fields": {
                    "First Name": "Reed",
                    "Middle Name": "R",
                    "MM": "04",
                    "DD": "21",
                    "YY": "24",
                    "Initial": "RRG",
                    # "I DO NOT Agree": null,
                    # "Last Name": null
                },
            },
        },
        "test2": {
            "name": "Test2 Form",
            "url": "https://github.com/py-pdf/pypdf/files/14817366/test2.pdf",
            "path": "iss2234b.pdf",
            "usage": {
                "fields": {
                    "p2 First Name": "Joe",
                    "p2 Middle Name": "S",
                    "p2 MM": "03",
                    "p2 DD": "31",
                    "p2 YY": "24",
                    "Initial": "JSS",
                    # "p2 I DO NOT Agree": "null",
                    "p2 Last Name": "Smith",
                    "p3 First Name": "شهرزاد",
                    "p3 Middle Name": "R",
                    "p3 MM": "01",
                    "p3 DD": "25",
                    "p3 YY": "21",
                },
            },
        },
    }
    merger = PdfWriter()

    for file in my_files:
        reader = PdfReader(
            BytesIO(get_data_from_url(my_files[file]["url"], name=my_files[file]["path"]))
        )
        reader.add_form_topname(file)
        writer = PdfWriter(clone_from=reader)

        writer.update_page_form_field_values(
            None, my_files[file]["usage"]["fields"], auto_regenerate=True
        )
        merger.append(writer)
    assert merger.get_form_text_fields(True) == {
        "test1.First Name": "Reed",
        "test1.Middle Name": "R",
        "test1.MM": "04",
        "test1.DD": "21",
        "test1.YY": "24",
        "test1.Initial": "RRG",
        "test1.I DO NOT Agree": None,
        "test1.Last Name": None,
        "test2.p2 First Name": "Joe",
        "test2.p2 Middle Name": "S",
        "test2.p2 MM": "03",
        "test2.p2 DD": "31",
        "test2.p2 YY": "24",
        "test2.Initial": "JSS",
        "test2.p2 I DO NOT Agree": None,
        "test2.p2 Last Name": "Smith",
        "test2.p3 First Name": "شهرزاد",
        "test2.p3 Middle Name": "R",
        "test2.p3 MM": "01",
        "test2.p3 DD": "25",
        "test2.p3 YY": "21",
    }
    assert "Text string 'شهرزاد' contains characters not supported by font encoding." in caplog.text


@pytest.mark.enable_socket
def test_iss1862():
    # The file here has "/B" entry to define the font in a object below the page
    # The excluded field shall be considered only at first level (page) and not
    # below
    url = "https://github.com/py-pdf/pypdf/files/11708801/intro.pdf"
    name = "iss1862.pdf"
    writer = PdfWriter()
    writer.append(BytesIO(get_data_from_url(url, name=name)))
    # check that "/B" is in the font
    writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data()


def test_empty_objects_before_cloning():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    writer = PdfWriter(clone_from=reader)
    nb_obj_reader = len(reader.xref_objStm) + sum(
        len(reader.xref[i]) for i in reader.xref
    )
    nb_obj_reader -= 1  # for trailer
    nb_obj_reader -= len(
        {x: 1 for x, y in reader.xref_objStm.values()}
    )  # to remove object streams
    assert len(writer._objects) == nb_obj_reader


@pytest.mark.enable_socket
def test_watermark():
    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
    name = "bgwatermark.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
    name = "srcwatermark.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    for p in writer.pages:
        p.merge_page(reader.pages[0], over=False)

    assert isinstance(p["/Contents"], ArrayObject)
    assert isinstance(p["/Contents"][0], IndirectObject)

    b = BytesIO()
    writer.write(b)
    assert len(b.getvalue()) < 2.1 * 1024 * 1024


@pytest.mark.enable_socket
@pytest.mark.timeout(4)
def test_watermarking_speed():
    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
    name = "bgwatermark.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    url = "https://arxiv.org/pdf/2201.00214.pdf"
    name = "2201.00214.pdf"
    writer = PdfWriter(clone_from=BytesIO(get_data_from_url(url, name=name)))
    for p in writer.pages:
        p.merge_page(reader.pages[0], over=False)
    out_pdf_bytesio = BytesIO()
    writer.write(out_pdf_bytesio)
    pdf_size_in_mib = len(out_pdf_bytesio.getvalue()) / 1024 / 1024
    assert pdf_size_in_mib < 20


@pytest.mark.enable_socket
@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
def test_watermark_rendering(tmp_path):
    """Ensure the visual appearance of watermarking stays correct."""
    url = "https://github.com/py-pdf/pypdf/files/11985889/bg.pdf"
    name = "bgwatermark.pdf"
    watermark = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
    url = "https://github.com/py-pdf/pypdf/files/11985888/source.pdf"
    name = "srcwatermark.pdf"
    page = PdfReader(BytesIO(get_data_from_url(url, name=name))).pages[0]
    writer = PdfWriter()
    page = writer.add_page(page)
    page.merge_page(watermark, over=False)

    target_png_path = tmp_path / "target.png"
    url = "https://github.com/py-pdf/pypdf/assets/96178532/d5c72d0e-7047-4504-bbf6-bc591c80d7c0"
    name = "dstwatermark.png"
    target_png_path.write_bytes(get_data_from_url(url, name=name))

    pdf_path = tmp_path / "out.pdf"
    png_path = tmp_path / "out.png"
    writer.write(pdf_path)

    # False positive: https://github.com/PyCQA/bandit/issues/333
    subprocess.run(  # noqa: S603
        [
            GHOSTSCRIPT_BINARY,
            "-sDEVICE=pngalpha",
            "-o",
            png_path,
            pdf_path,
        ]
    )
    assert png_path.is_file()
    assert image_similarity(png_path, target_png_path) >= 0.95


@pytest.mark.samples
@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
def test_watermarking_reportlab_rendering(tmp_path):
    """
    This test is showing a rotated+mirrored watermark in pypdf==3.15.4.

    Replacing the generate_base with e.g. the crazyones did not show the issue.
    """
    base_path = SAMPLE_ROOT / "022-pdfkit/pdfkit.pdf"
    watermark_path = SAMPLE_ROOT / "013-reportlab-overlay/reportlab-overlay.pdf"

    reader = PdfReader(base_path)
    base_page = reader.pages[0]
    watermark = PdfReader(watermark_path).pages[0]

    writer = PdfWriter()
    base_page = writer.add_page(base_page)
    base_page.merge_page(watermark)

    target_png_path = RESOURCE_ROOT / "test_watermarking_reportlab_rendering.png"
    pdf_path = tmp_path / "out.pdf"
    png_path = tmp_path / "test_watermarking_reportlab_rendering.png"

    writer.write(pdf_path)
    # False positive: https://github.com/PyCQA/bandit/issues/333
    subprocess.run(  # noqa: S603
        [
            GHOSTSCRIPT_BINARY,
            "-r120",
            "-sDEVICE=pngalpha",
            "-o",
            png_path,
            pdf_path,
        ]
    )
    assert png_path.is_file()
    assert image_similarity(png_path, target_png_path) >= 0.999


@pytest.mark.enable_socket
def test_da_missing_in_annot():
    url = "https://github.com/py-pdf/pypdf/files/12136285/Building.Division.Permit.Application.pdf"
    name = "BuildingDivisionPermitApplication.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter(clone_from=reader)
    writer.update_page_form_field_values(
        writer.pages[0], {"PCN-1": "0"}, auto_regenerate=False
    )
    b = BytesIO()
    writer.write(b)
    reader = PdfReader(BytesIO(b.getvalue()))
    ff = reader.get_fields()
    # check for autosize processing
    assert (
        b" 0 Tf"
        not in ff["PCN-1"].indirect_reference.get_object()["/AP"]["/N"].get_data()
    )
    f2 = writer.get_object(ff["PCN-2"].indirect_reference.idnum)
    f2[NameObject("/Parent")] = writer.get_object(
        ff["PCN-1"].indirect_reference.idnum
    ).indirect_reference
    writer.update_page_form_field_values(
        writer.pages[0], {"PCN-2": "1"}, auto_regenerate=False
    )


def test_missing_fields(pdf_file_path):
    reader = PdfReader(RESOURCE_ROOT / "form.pdf")

    writer = PdfWriter()
    writer.add_page(reader.pages[0])

    with pytest.raises(PyPdfError) as exc:
        writer.update_page_form_field_values(
            writer.pages[0], {"foo": "some filled in text"}, flags=1
        )
    assert exc.value.args[0] == "No /AcroForm dictionary in PDF of PdfWriter Object"

    writer = PdfWriter()
    writer.append(reader, [0])
    del writer.root_object["/AcroForm"]["/Fields"]
    with pytest.raises(PyPdfError) as exc:
        writer.update_page_form_field_values(
            writer.pages[0], {"foo": "some filled in text"}, flags=1
        )
    assert exc.value.args[0] == "No /Fields dictionary in PDF of PdfWriter Object"


def test_missing_info():
    reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf")

    writer = PdfWriter(clone_from=reader)
    assert len(writer.pages) == len(reader.pages)
    assert writer.metadata is None
    b = BytesIO()
    writer.write(b)
    assert b"/Info" not in b.getvalue()

    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    writer.metadata = reader.metadata
    assert dict(writer._info) == dict(reader._info)
    assert writer.metadata == reader.metadata
    b = BytesIO()
    writer.write(b)
    assert b"/Info" in b.getvalue()

    writer.metadata = {}
    writer._info = DictionaryObject()  # for code coverage
    b = BytesIO()
    writer.write(b)
    assert b"/Info" in b.getvalue()
    assert writer.metadata == {}

    writer.metadata = None
    writer.metadata = None  # for code coverage
    assert writer.metadata is None
    assert PdfWriter().metadata == {"/Producer": "pypdf"}
    b = BytesIO()
    writer.write(b)
    assert b"/Info" not in b.getvalue()


@pytest.mark.enable_socket
def test_germanfields():
    """Cf #2035"""
    url = "https://github.com/py-pdf/pypdf/files/12194195/test.pdf"
    name = "germanfields.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter(clone_from=reader)
    form_fields = {"Text Box 1": "test æ ø å"}
    writer.update_page_form_field_values(
        writer.pages[0], form_fields, auto_regenerate=False
    )
    bytes_stream = BytesIO()
    writer.write(bytes_stream)
    bytes_stream.seek(0)
    reader2 = PdfReader(bytes_stream)
    assert (
        b"test \xe6 \xf8 \xe5"
        in reader2.get_fields()["Text Box 1"]
        .indirect_reference.get_object()["/AP"]["/N"]
        .get_data()
    )


@pytest.mark.enable_socket
def test_no_t_in_articles():
    """Cf #2078"""
    url = "https://github.com/py-pdf/pypdf/files/12311735/bad.pdf"
    name = "iss2078.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)


@pytest.mark.enable_socket
def test_no_i_in_articles():
    """Cf #2089"""
    url = "https://github.com/py-pdf/pypdf/files/12352793/kim2002.pdf"
    name = "iss2089.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)


@pytest.mark.enable_socket
def test_damaged_pdf_length_returning_none():
    """
    Cf #140
    https://github.com/py-pdf/pypdf/issues/140#issuecomment-1685380549
    """
    url = "https://github.com/py-pdf/pypdf/files/12168578/bad_pdf_example.pdf"
    name = "iss140_bad_pdf.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)


@pytest.mark.enable_socket
def test_viewerpreferences():
    """Add Tests for ViewerPreferences"""
    url = "https://github.com/py-pdf/pypdf/files/9175966/2015._pb_decode_pg0.pdf"
    name = "2015._pb_decode_pg0.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    v = reader.viewer_preferences
    assert v.center_window == True  # noqa: E712
    writer = PdfWriter(clone_from=reader)
    v = writer.viewer_preferences
    assert v.center_window == True  # noqa: E712
    v.center_window = False
    assert (
        writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False  # noqa: E712
    )
    assert v.print_area == "/CropBox"
    with pytest.raises(ValueError):
        v.non_fullscreen_pagemode = "toto"
    with pytest.raises(ValueError):
        v.non_fullscreen_pagemode = "/toto"
    v.non_fullscreen_pagemode = "/UseOutlines"
    assert (
        writer.root_object["/ViewerPreferences"]["/NonFullScreenPageMode"]
        == "/UseOutlines"
    )
    writer = PdfWriter(clone_from=reader)
    v = writer.viewer_preferences
    assert v.center_window == True  # noqa: E712
    v.center_window = False
    assert (
        writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False  # noqa: E712
    )

    writer = PdfWriter(clone_from=reader)
    writer.root_object[NameObject("/ViewerPreferences")] = writer._add_object(
        writer.root_object["/ViewerPreferences"]
    )
    v = writer.viewer_preferences
    v.center_window = False
    assert (
        writer.root_object["/ViewerPreferences"]["/CenterWindow"] == False  # noqa: E712
    )
    v.num_copies = 1
    assert v.num_copies == 1
    assert v.print_pagerange is None
    with pytest.raises(ValueError):
        v.print_pagerange = "toto"
    v.print_pagerange = ArrayObject()
    assert len(v.print_pagerange) == 0

    writer.create_viewer_preferences()
    assert len(writer.root_object["/ViewerPreferences"]) == 0
    writer.viewer_preferences.direction = "/R2L"
    assert len(writer.root_object["/ViewerPreferences"]) == 1

    assert writer.viewer_preferences.enforce == []
    assert "/Enforce" not in writer.viewer_preferences
    writer.viewer_preferences.enforce += writer.viewer_preferences.PRINT_SCALING
    assert writer.viewer_preferences["/Enforce"] == ["/PrintScaling"]
    writer.viewer_preferences.enforce = None
    assert "/Enforce" not in writer.viewer_preferences
    writer.viewer_preferences.enforce = None

    del reader.trailer["/Root"]["/ViewerPreferences"]
    assert reader.viewer_preferences is None
    writer = PdfWriter(clone_from=reader)
    assert writer.viewer_preferences is None


def test_extra_spaces_in_da_text(caplog):
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf")
    t = writer.pages[0]["/Annots"][0].get_object()["/DA"]
    t = t.replace("/Helv", "/Helv   ")
    writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t)
    writer.update_page_form_field_values(
        writer.pages[0], {"foo": "abcd"}, auto_regenerate=False
    )
    t = writer.pages[0]["/Annots"][0].get_object()["/AP"]["/N"].get_data()
    assert "Font dictionary for  not found." not in caplog.text
    assert b"/Helv" in t
    assert b"(abcd)" in t


@pytest.mark.enable_socket
def test_object_contains_indirect_reference_to_self():
    url = "https://github.com/py-pdf/pypdf/files/12389243/testbook.pdf"
    name = "iss2102.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    width, height = 595, 841
    outpage = writer.add_blank_page(width, height)
    outpage.merge_page(reader.pages[6])
    writer.append(reader)


def test_remove_image_per_type():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "reportlab-inline-image.pdf")
    writer.remove_images(ImageType.INLINE_IMAGES)

    assert all(
        x not in writer.pages[0].get_contents().get_data()
        for x in (b"BI", b"ID", b"EI")
    )

    writer.remove_images()

    writer = PdfWriter(clone_from=RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf")
    writer.remove_images(ImageType.DRAWING_IMAGES)
    assert all(
        x not in writer.pages[1].get_contents().get_data()
        for x in (b" re\n", b"W*", b"f*")
    )
    assert all(
        x in writer.pages[1].get_contents().get_data() for x in (b" TJ\n", b"rg", b"Tm")
    )
    assert all(
        x not in writer.pages[9]["/Resources"]["/XObject"]["/Meta84"].get_data()
        for x in (b" re\n", b"W*", b"f*")
    )
    writer.remove_images(ImageType.XOBJECT_IMAGES)
    assert b"Do\n" not in writer.pages[0].get_contents().get_data()
    assert len(writer.pages[0]["/Resources"]["/XObject"]) == 0


@pytest.mark.enable_socket
def test_add_outlines_on_empty_dict():
    """Cf #2233"""

    def _get_parent_bookmark(current_indent, history_indent, bookmarks) -> Any:
        """The parent of A is the nearest bookmark whose indent is smaller than A's"""
        assert len(history_indent) == len(bookmarks)
        if current_indent == 0:
            return None
        for i in range(len(history_indent) - 1, -1, -1):
            # len(history_indent) - 1   ===>   0
            if history_indent[i] < current_indent:
                return bookmarks[i]
        return None

    bookmark_lines = """1 FUNDAMENTALS OF RADIATIVE TRANSFER 1
1.1 The Electromagnetic Spectrum; Elementary Properties of Radiation 1
1.2 Radiative Flux 2
    Macroscopic Description of the Propagation of Radiation 2
    Flux from an Isotropic Source-The Inverse Square Law 2
1.3 The Specific Intensity and Its Moments 3
    Definition of Specific Intensity or Brightness 3
    Net Flux and Momentum Flux 4
    Radiative Energy Density 5
    Radiation Pressure in an Enclosure Containing an Isotropic Radiation Field 6
    Constancy of Specific Zntensiw Along Rays in Free Space 7
    Proof of the Inverse Square Law for a Uniformly Bright Sphere 7
1.4 Radiative Transfer 8
    Emission 9
    Absorption 9
    The Radiative Transfer Equation 11
    Optical Depth and Source Function 12
    Mean Free Path 14
    Radiation Force 15
1.5 Thermal Radiation 15
    Blackbody Radiation 15
    Kirchhof's Law for Thermal Emission 16
    Thermodynamics of Blackbody Radiation 17
    The Planck Spectrum 20
    Properties of the Planck Law 23
    Characteristic Temperatures Related to Planck Spectrum 25
1.6 The Einstein Coefficients 27
    Definition of Coefficients 27
    Relations between Einstein Coefficients 29
    Absorption and Emission Coefficients in Terms of Einstein Coefficients 30
1.7 Scattering Effects; Random Walks 33
    Pure Scattering 33
    Combined Scattering and Absorption 36
1.8 Radiative Diffusion 39
    The Rosseland Approximation 39
    The Eddington Approximation; Two-Stream Approximation 42
PROBLEMS 45
REFERENCES 50
2 BASIC THEORY OF RADIATION FIELDS 51
2.1 Review of Maxwell’s Equations 51
2.2 Plane Electromagnetic Waves 55
2.3 The Radiation Spectrum 58
2.4 Polarization and Stokes Parameters 62
    Monochromatic Waves 62
    Quasi-monochromatic Waves 65
2.5 Electromagnetic Potentials 69
2.6 Applicability of Transfer Theory and the Geometrical Optics Limit 72
PROBLEMS 74
REFERENCES 76"""
    url = "https://github.com/py-pdf/pypdf/files/12797067/test-12.pdf"
    name = "iss2233.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter(clone_from=reader)

    bookmarks, history_indent = [], []
    for line in bookmark_lines.split("\n"):
        line2 = re.split(r"\s+", line.strip())
        indent_size = len(line) - len(line.lstrip())
        parent = _get_parent_bookmark(indent_size, history_indent, bookmarks)
        history_indent.append(indent_size)
        title, page = " ".join(line2[:-1]), int(line2[-1]) - 1
        new_bookmark = writer.add_outline_item(title, page, parent=parent)
        bookmarks.append(new_bookmark)


def test_merging_many_temporary_files(caplog):
    def create_number_pdf(_n) -> BytesIO:
        pytest.importorskip("fpdf")
        from fpdf import FPDF  # noqa: PLC0415

        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("helvetica", "B", 16)
        pdf.cell(40, 10, str(_n))
        byte_string = pdf.output()
        return BytesIO(byte_string)

    writer = PdfWriter()
    for n in range(100):
        reader = PdfReader(create_number_pdf(n))
        for page in reader.pages:
            # Should only be one page.
            writer.add_page(page)

    pg = PageObject.create_blank_page(writer, 1000, 1000)
    pg1 = writer.add_page(pg)
    assert len(writer.pages) == 101
    caplog.clear()
    writer.remove_page(pg)
    assert "Cannot find page in pages" in caplog.text
    assert len(writer.pages) == 101
    writer.remove_page(pg1)
    assert len(writer.pages) == 100

    out = BytesIO()
    writer.write(out)

    out.seek(0)
    reader = PdfReader(out)
    for n, page in enumerate(reader.pages):
        text = page.extract_text()
        assert text == str(n)
    # test completed to validate remove_page
    writer.remove_page(writer.pages[-1], True)

    writer2 = PdfWriter()
    writer2.remove_page(0)
    writer2.flattened_pages = None
    writer2.remove_page(0)

    caplog.clear()
    writer.remove_page(writer.pages[-1]["/Contents"].indirect_reference)
    assert "IndirectObject is not referencing a page" in caplog.text

    caplog.clear()
    pg = PageObject.create_blank_page(writer, 1000, 1000)
    writer.remove_page(pg)
    assert "Cannot find page in pages" in caplog.text

    caplog.clear()
    writer.remove_page(999999)
    assert "Page number is out of range" in caplog.text

    pg = PageObject.create_blank_page(writer, 1000, 1000)
    pg = writer._add_object(pg)
    writer.flattened_pages.append(pg)
    caplog.clear()
    writer.remove_page(pg)
    assert "Cannot find page in pages" in caplog.text


@pytest.mark.enable_socket
def test_reattach_fields():
    """
    Test Reattach function
    addressed in #2453
    """
    url = "https://github.com/py-pdf/pypdf/files/14241368/ExampleForm.pdf"
    name = "iss2453.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    for p in reader.pages:
        writer.add_page(p)
    assert len(writer.reattach_fields()) == 15
    assert len(writer.reattach_fields()) == 0  # nothing to append anymore
    assert len(writer.root_object["/AcroForm"]["/Fields"]) == 15
    writer = PdfWriter(clone_from=reader)
    assert len(writer.reattach_fields()) == 7
    writer.reattach_fields()
    assert len(writer.root_object["/AcroForm"]["/Fields"]) == 15

    writer = PdfWriter()
    for p in reader.pages:
        writer.add_page(p)
    ano = writer.pages[0]["/Annots"][0].get_object()
    del ano.indirect_reference
    writer.pages[0]["/Annots"][0] = ano
    assert isinstance(writer.pages[0]["/Annots"][0], DictionaryObject)
    assert len(writer.reattach_fields(writer.pages[0])) == 6
    assert isinstance(writer.pages[0]["/Annots"][0], IndirectObject)
    del writer.pages[1]["/Annots"]
    assert len(writer.reattach_fields(writer.pages[1])) == 0


def test_get_pagenumber_from_indirectobject():
    """Test test_get_pagenumber_from_indirectobject"""
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    writer = PdfWriter(clone_from=pdf_path)
    assert writer._get_page_number_by_indirect(None) is None
    assert writer._get_page_number_by_indirect(NullObject()) is None

    ind = writer.pages[0].indirect_reference
    assert writer._get_page_number_by_indirect(ind) == 0
    assert writer._get_page_number_by_indirect(ind.idnum) == 0
    assert writer._get_page_number_by_indirect(ind.idnum + 1) is None


def test_replace_object():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    writer = PdfWriter(clone_from=reader)
    with pytest.raises(ValueError):
        writer._replace_object(reader.pages[0].indirect_reference, reader.pages[0])
    writer._replace_object(writer.pages[0].indirect_reference, reader.pages[0])
    pg = PageObject.create_blank_page(writer, 1000, 1000)
    writer._replace_object(writer.pages[0].indirect_reference, pg)

    # mainly for coverage
    reader = PdfReader(pdf_path)  # reload a new instance
    with pytest.raises(ValueError):
        reader._replace_object(writer.pages[0].indirect_reference, reader.pages[0])
    with pytest.raises(ValueError):
        reader._replace_object(IndirectObject(9999, 9999, reader), reader.pages[0])
    reader._replace_object(reader.pages[0].indirect_reference, reader.pages[0])
    pg = PageObject.create_blank_page(writer, 1000, 1000)
    reader._replace_object(reader.pages[0].indirect_reference, pg)
    pg = PageObject.create_blank_page(None, 1000, 1000)
    pg[NameObject("/Contents")] = writer.pages[0]["/Contents"]
    writer._add_object(pg)
    writer.add_page(pg)


def test_mime_jupyter():
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    writer = PdfWriter(clone_from=reader)
    assert reader._repr_mimebundle_(("include",), ("exclude",)) == {}
    assert writer._repr_mimebundle_(("include",), ("exclude",)) == {}


def test_init_without_named_arg():
    """Test to use file_obj argument and not clone_from"""
    pdf_path = RESOURCE_ROOT / "crazyones.pdf"
    reader = PdfReader(pdf_path)
    writer = PdfWriter(clone_from=reader)
    nb = len(writer._objects)
    writer = PdfWriter(reader)
    assert len(writer._objects) == nb
    with open(pdf_path, "rb") as f:
        writer = PdfWriter(f)
        f.seek(0, 0)
        by = BytesIO(f.read())
    assert len(writer._objects) == nb
    writer = PdfWriter(pdf_path)
    assert len(writer._objects) == nb
    writer = PdfWriter(str(pdf_path))
    assert len(writer._objects) == nb
    writer = PdfWriter(by)
    assert len(writer._objects) == nb


@pytest.mark.enable_socket
def test_i_in_choice_fields():
    """Cf #2611"""
    url = "https://github.com/py-pdf/pypdf/files/15176321/FRA.F.6180.150.pdf"
    name = "iss2611.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    assert "/I" in writer.get_fields()["State"].indirect_reference.get_object()
    writer.update_page_form_field_values(
        writer.pages[0], {"State": "NY"}, auto_regenerate=False
    )
    assert "/I" not in writer.get_fields()["State"].indirect_reference.get_object()


def test_selfont():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf")
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Text1": ("Text_1", "", 5), "Text2": ("Text_2", "/F3", 0)},
        auto_regenerate=False,
    )
    assert (
        b"/F3 5 Tf"
        in writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"].get_data()
    )
    assert (
        b"Text_1" in writer.pages[0]["/Annots"][1].get_object()["/AP"]["/N"].get_data()
    )
    assert (
        b"/F3 12.0 Tf"
        in writer.pages[0]["/Annots"][2].get_object()["/AP"]["/N"].get_data()
    )
    assert (
        b"Text_2" in writer.pages[0]["/Annots"][2].get_object()["/AP"]["/N"].get_data()
    )


@pytest.mark.enable_socket
def test_no_resource_for_14_std_fonts():
    """Cf #2670"""
    url = "https://github.com/py-pdf/pypdf/files/15405390/f1040.pdf"
    name = "iss2670.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    p = writer.pages[0]
    for a in p["/Annots"]:
        a = a.get_object()
        if a["/FT"] == "/Tx":
            writer.update_page_form_field_values(
                p, {a["/T"]: "Brooks"}, auto_regenerate=False
            )
            assert "/Helvetica" in a["/AP"]["/N"]["/Resources"]["/Font"]


@pytest.mark.enable_socket
def test_field_box_upside_down():
    """Cf #2724"""
    url = "https://github.com/user-attachments/files/15996356/FRA.F.6180.55.pdf"
    name = "iss2724.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    writer.update_page_form_field_values(None, {"FreightTrainMiles": "0"})
    assert writer.pages[0]["/Annots"][13].get_object()["/AP"]["/N"].get_data() == (
        b"q\n/Tx BMC \nq\n2 1 102.29520000000001 9.835000000000036 re\n"
        b"W\nBT\n/Arial 8.0 Tf 0 g\n2 3.0455000000000183 Td\n(0) Tj\nET\n"
        b"Q\nEMC\nQ\n"
    )
    box = writer.pages[0]["/Annots"][13].get_object()["/AP"]["/N"]["/BBox"]
    assert box[2] > 0
    assert box[3] > 0


@pytest.mark.enable_socket
def test_matrix_entry_in_field_annots():
    """Cf #2731"""
    url = "https://github.com/user-attachments/files/16036514/template.pdf"
    name = "iss2731.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)))
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Stellenbezeichnung_1": "some filled in text"},
        auto_regenerate=False,
    )
    assert "/Matrix" in writer.pages[0]["/Annots"][5].get_object()["/AP"]["/N"]


@pytest.mark.enable_socket
def test_compress_identical_objects():
    """Cf #2728 and #2794"""
    url = "https://github.com/user-attachments/files/16575458/tt2.pdf"
    name = "iss2794.pdf"
    in_bytes = BytesIO(get_data_from_url(url, name=name))
    writer = PdfWriter(in_bytes)
    writer.compress_identical_objects(remove_orphans=False)
    out1 = BytesIO()
    writer.write(out1)
    assert 0.5 * len(in_bytes.getvalue()) > len(out1.getvalue())
    writer.remove_page(
        1
    )  # page0 contains fields which keep reference to the deleted page
    out2 = BytesIO()
    writer.write(out2)
    assert len(out1.getvalue()) - 100 < len(out2.getvalue())
    writer.compress_identical_objects(remove_identicals=False)
    out3 = BytesIO()
    writer.write(out3)
    assert len(out2.getvalue()) > len(out3.getvalue())


def test_set_need_appearances_writer():
    """Minimal test for coverage"""
    writer = PdfWriter()
    writer.set_need_appearances_writer()


def test_utf16_metadata():
    """See #2754"""
    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf")
    writer.add_metadata(
        {
            "/Subject": "Invoice №AI_047",
        }
    )
    b = BytesIO()
    writer.write(b)
    b.seek(0)
    reader = PdfReader(b)
    assert reader.metadata.subject == "Invoice №AI_047"
    bb = b.getvalue()
    i = bb.find(b"/Subject")
    assert bb[i : i + 100] == (
        b"/Subject (\\376\\377\\000I\\000n\\000v\\000o\\000i\\000c\\000e"
        b"\\000 \\041\\026\\000A\\000I\\000\\137\\0000\\0004\\0007)"
    )


@pytest.mark.enable_socket
def test_increment_writer(caplog):
    """Tests for #2811"""
    writer = PdfWriter(
        RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf",
        incremental=True,
    )
    # Contains JBIG2 not decoded for the moment
    assert writer.list_objects_in_increment() == []  # no flowdown of properties

    # test writing with empty increment
    b = BytesIO()
    writer.write(b)
    with open(
        RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf", "rb"
    ) as f:
        assert b.getvalue() == f.read(-1)
    b.seek(0)
    writer2 = PdfWriter(b, incremental=True)
    assert len([x for x in writer2._objects if x is not None]) == len(
        [x for x in writer._objects if x is not None]
    )
    writer2.add_metadata({"/Author": "test"})
    assert len(writer2.list_objects_in_increment()) == 1
    b = BytesIO()
    writer2.write(b)

    # modify one object
    writer.pages[0][NameObject("/MediaBox")] = ArrayObject(
        [NumberObject(0), NumberObject(0), NumberObject(864), NumberObject(648)]
    )
    assert writer.list_objects_in_increment() == [IndirectObject(4, 0, writer)]
    b = BytesIO()
    writer.write(b)
    writer.pages[5][NameObject("/MediaBox")] = ArrayObject(
        [NumberObject(0), NumberObject(0), NumberObject(864), NumberObject(648)]
    )
    assert len(writer.list_objects_in_increment()) == 2
    # modify object IndirectObject(5,0) : for coverage
    writer.get_object(5)[NameObject("/ForTestOnly")] = NameObject("/ForTestOnly")

    b = BytesIO()
    writer.write(b)
    assert b.getvalue().startswith(writer._reader.stream.getvalue())
    b.seek(0)
    reader = PdfReader(b)
    assert reader.pages[0]["/MediaBox"] == ArrayObject(
        [NumberObject(0), NumberObject(0), NumberObject(864), NumberObject(648)]
    )
    assert "/ForTestOnly" in reader.get_object(5)
    with pytest.raises(PyPdfError):
        writer = PdfWriter(1, incremental=True)
    b.seek(0)
    writer = PdfWriter(b, incremental=True)
    assert writer.list_objects_in_increment() == []  # no flowdown of properties

    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True)
    # 1 object is modified: page 0  inherits MediaBox so is changed
    assert len(writer.list_objects_in_increment()) == 1
    b = BytesIO()
    writer.write(b)

    writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=False)
    # 1 object is modified: page 0  inherits MediaBox so is changed
    assert len(writer.list_objects_in_increment()) == len(writer._objects)

    # insert pages in a tree
    url = "https://github.com/py-pdf/pypdf/files/13946477/panda.pdf"
    name = "iss2343b.pdf"
    writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)), incremental=True)
    reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf")
    pg = writer.insert_page(reader.pages[0], 4)
    assert (
        pg.raw_get("/Parent")
        == writer.root_object["/Pages"]["/Kids"][0].get_object()["/Kids"][0]
    )
    assert pg["/Parent"]["/Count"] == 8
    assert writer.root_object["/Pages"]["/Count"] == 285
    assert len(writer.flattened_pages) == 285

    # clone without info
    writer = PdfWriter(RESOURCE_ROOT / "missing_info.pdf", incremental=True)
    assert len(writer.list_objects_in_increment()) == 0
    assert writer.metadata is None
    writer.metadata = {}
    assert writer.metadata == {}
    assert len(writer.list_objects_in_increment()) == 1
    writer.metadata = None
    assert len(writer.list_objects_in_increment()) == 0
    assert writer.metadata is None
    b = BytesIO()
    writer.write(b)


@pytest.mark.enable_socket
def test_append_pdf_with_dest_without_page(caplog):
    """Tests for #2842"""
    url = "https://github.com/user-attachments/files/16990834/test.pdf"
    name = "iss2842.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)
    assert "/__WKANCHOR_8" not in writer.named_destinations
    assert len(writer.named_destinations) == 3


@pytest.mark.enable_socket
def test_destination_is_nullobject():
    """Tests for #2958"""
    url = "https://github.com/user-attachments/files/17822279/C0.00.-.COVER.SHEET.pdf"
    name = "iss2958.pdf"
    source_data = BytesIO(get_data_from_url(url, name=name))
    writer = PdfWriter()
    writer.append(source_data)


@pytest.mark.enable_socket
def test_destination_page_is_none():
    """Tests for #2963"""
    url = "https://github.com/user-attachments/files/17879461/3.pdf"
    name = "iss2963.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    writer = PdfWriter()
    writer.append(reader)


def test_stream_not_closed():
    """Tests for #2905"""
    src = RESOURCE_ROOT / "pdflatex-outline.pdf"
    with NamedTemporaryFile(suffix=".pdf") as tmp:
        with PdfReader(src) as reader, PdfWriter() as writer:
            writer.add_page(reader.pages[0])
            writer.write(tmp)
        assert not tmp.file.closed

    with NamedTemporaryFile(suffix=".pdf") as target:
        with PdfWriter(target.file) as writer:
            writer.add_blank_page(100, 100)
        assert not target.file.closed

    with open(src, "rb") as fileobj:
        with PdfWriter(fileobj) as writer:
            pass
        assert not fileobj.closed


def test_auto_write(tmp_path):
    """Another test for #2905"""
    target = tmp_path / "out.pdf"
    with PdfWriter(target) as writer:
        writer.add_blank_page(100, 100)
    assert target.stat().st_size > 0


def test_deprecate_with_as():
    """Yet another test for #2905"""
    with PdfWriter() as writer:
        with pytest.raises(
                expected_exception=DeprecationError,
                match=r"with_as_usage is deprecated and was removed in pypdf 5\.0"
        ):
            _ = writer.with_as_usage

        with pytest.raises(
                expected_exception=DeprecationError,
                match=r"with_as_usage is deprecated and was removed in pypdf 5\.0"
        ):
            writer.with_as_usage = False  # old code allowed setting this, so...


@pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript")
@pytest.mark.enable_socket
def test_inline_image_q_operator_handling(tmp_path):
    """Test for #2927"""
    pdf_url = "https://github.com/user-attachments/files/17614880/test_clean.pdf"
    pdf_name = "iss2927.pdf"
    pdf_data = BytesIO(get_data_from_url(pdf_url, name=pdf_name))

    png_url = "https://github.com/user-attachments/assets/abe16f48-9afa-4179-b1e8-62be27b95c26"
    png_name = "iss2927.png"
    expected_png_path = tmp_path / "expected.png"
    expected_png_path.write_bytes(get_data_from_url(png_url, name=png_name))

    writer = PdfWriter()
    writer.append(pdf_data)
    for page in writer.pages:
        page.transfer_rotation_to_content()

    pdf_path = tmp_path / "out.pdf"
    png_path = tmp_path / "actual.png"

    writer.write(pdf_path)
    # False positive: https://github.com/PyCQA/bandit/issues/333
    subprocess.run(  # noqa: S603
        [
            GHOSTSCRIPT_BINARY,
            "-r120",
            "-sDEVICE=pngalpha",
            "-o",
            png_path,
            pdf_path,
        ]
    )
    assert png_path.is_file()
    assert image_similarity(png_path, expected_png_path) >= 0.99999


def test_insert_filtered_annotations__annotations_are_none():
    writer = PdfWriter()
    writer.add_blank_page(72, 72)
    stream = BytesIO()
    writer.write(stream)
    reader = PdfReader(stream)
    assert writer._insert_filtered_annotations(
        annots=None, page=PageObject(), pages={}, reader=reader
    ) == []


def test_incremental_read():
    """Test for #3116"""
    writer = PdfWriter()
    writer.add_blank_page(72, 72)
    stream0 = BytesIO()
    writer.write(stream0)

    reader = PdfReader(stream0)
    # 1 = Catalog, 2 = Pages, 3 = New Page, 4 = Info, Size == 5
    assert reader.trailer["/Size"] == 5

    stream0.seek(0, 0)
    writer = PdfWriter(stream0, incremental=True)
    assert len(writer._objects) == 4
    assert writer._objects[-1] is not None
    stream1 = BytesIO()
    writer.write(stream1)

    # nothing modified, so nothing added = ideal situation
    assert stream1.getvalue() == stream1.getvalue()

    stream0.seek(0, 0)
    writer = PdfWriter(stream0, incremental=True)
    assert len(writer._objects) == 4
    assert writer._objects[-1] is not None
    writer.add_blank_page(72, 72)
    assert len(writer._objects) == 5
    stream1 = BytesIO()
    writer.write(stream1)
    # 2 = Pages, 5 = New Page, 6 = XRef, Size == 7
    # XRef is created on write and not counted
    assert len(writer._objects) == 5


def test_compress_identical_objects__after_remove_images():
    """Test for #3237"""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "AutoCad_Diagram.pdf")
    writer.remove_images()
    writer.compress_identical_objects(remove_identicals=True, remove_orphans=True)


def test_merge__process_named_dests__no_dests_in_source_file():
    """Test for #3279"""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")

    # Hacky solution to avoid attribute errors.
    names = DictionaryObject()
    names.indirect_reference = names
    writer.root_object[NameObject("/Names")] = names

    reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")
    destination = Destination(title="test.pdf", page=reader.pages[0], fit=Fit("/Fit"))
    with mock.patch.object(reader, "_get_named_destinations", return_value={"test.pdf": destination}):
        writer.append(reader)
        # The page now points to the appended one.
        assert writer.named_destinations == {
            "test.pdf": Destination(title="test.pdf", page=writer.pages[1].indirect_reference, fit=Fit("/Fit"))
        }


def test_insert_filtered_annotations__link_without_destination():
    """Test for #3211"""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf")

    annotations = [
        DictionaryObject({
            "/A": DictionaryObject({"/S": NameObject("/GoTo"), "/D": None}),
            "/BS": {"/S": "/S", "/Type": "/Border", "/W": 0},
            "/Border": [0, 0, 0],
            "/H": "/I",
            "/Rect": [68.6001, 653.405, 526.2, 671.054],
            "/StructParent": 9,
            "/Subtype": NameObject("/Link"),
            "/Type": NameObject("/Annot")
        })
    ]
    result = writer._insert_filtered_annotations(
        annots=annotations, page=writer.pages[0], pages={}, reader=reader
    )
    assert result == []

    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    del annotations[0]["/A"]["/D"]
    result = writer._insert_filtered_annotations(
        annots=annotations, page=writer.pages[0], pages={}, reader=reader
    )
    assert result == []


@pytest.mark.enable_socket
def test_insert_filtered_annotations__annotations_are_no_list(caplog):
    """Tests for #3320"""
    url = "https://github.com/user-attachments/files/20818089/bugpdf.pdf"
    name = "issue3320.pdf"
    source_data = BytesIO(get_data_from_url(url, name=name))
    reader = PdfReader(source_data)
    writer = PdfWriter()
    writer.append(reader)
    font_file2 = reader.get_object(36).indirect_reference
    assert caplog.messages == [
        (
            f"Expected annotation arrays: {{'/FontFile2': {font_file2!r}, "
            "'/Descent': -269, '/CapHeight': 714, '/FontWeight': "
            "300, '/FontName': '/JQJGLF+OpenSans-Light', '/ItalicAngle': 0, '/StemV': "
            "48, '/Type': '/FontDescriptor', '/FontBBox': [-521, -269, 1140, 1048], "
            "'/FontFamily': 'Open Sans Light', '/Flags': 32, '/XHeight': 531, "
            "'/Ascent': 1048, '/FontStretch': '/Normal'} []. Ignoring annotations."
        ),
        (
            f"Expected list of annotations, got {{'/FontFile2': {font_file2!r}, "
            "'/Descent': -269, '/CapHeight': 714, '/FontWeight': 300, '/FontName': '/JQJGLF+OpenSans-Light', "
            "'/ItalicAngle': 0, '/StemV': 48, '/Type': '/FontDescriptor', '/FontBBox': [-521, -269, 1140, 1048], "
            "'/FontFamily': 'Open Sans Light', '/Flags': 32, '/XHeight': 531, '/Ascent': 1048, '/FontStretch': "
            "'/Normal'} of type DictionaryObject."
        )
    ]


def test_unterminated_object__with_incremental_writer():
    """Test for #3118"""
    reader = PdfReader(RESOURCE_ROOT / "bytes.pdf")
    writer = PdfWriter(reader, incremental=True)

    writer.add_blank_page(72, 72)

    fi = BytesIO()
    writer.write(fi)
    b = fi.getvalue()
    assert b[-39:] == b"\nendstream\nendobj\nstartxref\n1240\n%%EOF\n"


def test_wrong_size_in_incremental_pdf(caplog):
    source_data = RESOURCE_ROOT.joinpath("crazyones.pdf").read_bytes()
    writer = PdfWriter(BytesIO(source_data), incremental=True)
    writer._add_object(DictionaryObject())

    incremental_data = BytesIO()
    writer.write(incremental_data)
    modified_data = incremental_data.getvalue().replace(b"/Size 25", b"/Size 2")

    writer = PdfWriter(BytesIO(modified_data), incremental=False)
    assert "Object count 19 exceeds defined trailer size 2" in caplog.text
    assert len(writer._objects) == 20

    caplog.clear()
    writer = PdfWriter(incremental=False, strict=True)
    with pytest.raises(expected_exception=PdfReadError, match=r"^Object count 19 exceeds defined trailer size 2$"):
        writer.clone_reader_document_root(reader=PdfReader(BytesIO(modified_data)))

    with pytest.raises(expected_exception=PdfReadError, match=r"^Got index error while flattening\.$"):
        PdfWriter(BytesIO(modified_data), incremental=True)


@pytest.mark.enable_socket
def test_flatten_form_field_without_font_in_resources():
    """
    This test is a regression test for issue #3553.
    Flatten form field with /Resources lacking /Font.
    """
    reader = PdfReader(BytesIO(get_data_from_url(name="issue-3553.pdf")))
    writer = PdfWriter()
    writer.append(reader)
    writer.update_page_form_field_values(
        writer.pages[0],
        {"Unique reference numberRow1": "test"},
        flatten=True,
    )
    b = BytesIO()
    writer.write(b)

    reader = PdfReader(b)
    form_text_fields = reader.get_form_text_fields()
    assert form_text_fields["Unique reference numberRow1"] == "test"


def test_merge_with_null_acroform_does_not_raise_typeerror():
    """
    Source PDFs may contain '/AcroForm null'.

    Test for issue #3598.
    """
    src_writer = PdfWriter()
    src_writer.add_blank_page(72, 72)
    src_writer.root_object[NameObject("/AcroForm")] = NullObject()

    src_bytes = BytesIO()
    src_writer.write(src_bytes)
    src_bytes.seek(0)

    source = PdfReader(src_bytes)

    target = PdfWriter()
    target.merge(0, source)

    assert "/AcroForm" not in target.root_object


def test_compress_identical_objects__info_is_none():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf")
    writer.compress_identical_objects()

    writer.metadata = None
    writer.compress_identical_objects()


@pytest.mark.enable_socket
def test_flatten_form_field_with_signature():
    """
    This test is a regression test for issue #3633.
    Flatten form field with /Sig.
    """
    writer = PdfWriter(BytesIO(get_data_from_url(name="issue-3633.pdf")))
    writer.update_page_form_field_values(
        writer.pages[0],
        {"signature": "test"},
        flatten=True,
    )
    b = BytesIO()
    writer.write(b)

    _ = PdfReader(b)


================================================
FILE: tests/test_xmp.py
================================================
"""Test the pypdf.xmp module."""
from datetime import datetime, timedelta, timezone
from io import BytesIO

import pytest

import pypdf.generic
import pypdf.xmp
from pypdf import PdfReader, PdfWriter
from pypdf.errors import PdfReadError, XmpDocumentError
from pypdf.generic import ContentStream, NameObject, StreamObject
from pypdf.xmp import XmpInformation

from . import RESOURCE_ROOT, SAMPLE_ROOT, get_data_from_url


@pytest.mark.samples
@pytest.mark.parametrize(
    "src",
    [
        (SAMPLE_ROOT / "020-xmp/output_with_metadata_pymupdf.pdf"),
    ],
)
def test_read_xmp_metadata_samples(src):
    reader = PdfReader(src)
    xmp = reader.xmp_metadata
    assert xmp
    assert xmp.dc_contributor == []
    assert xmp.dc_creator == ["John Doe"]
    assert xmp.dc_source == "Martin Thoma"  # attribute node
    assert xmp.dc_description == {"x-default": "This is a text"}
    assert xmp.dc_date == [datetime(1990, 4, 28, 0, 0)]
    assert xmp.dc_title == {"x-default": "Sample PDF with XMP Metadata"}
    assert xmp.custom_properties == {
        "Style": "FooBarStyle",
        "other": "worlds",
        "⏰": "time",
    }


@pytest.mark.samples
def test_writer_xmp_metadata_samples():
    writer = PdfWriter(SAMPLE_ROOT / "020-xmp/output_with_metadata_pymupdf.pdf")
    xmp = writer.xmp_metadata
    assert xmp
    assert xmp.dc_contributor == []
    assert xmp.dc_creator == ["John Doe"]
    assert xmp.dc_source == "Martin Thoma"  # attribute node
    assert xmp.dc_description == {"x-default": "This is a text"}
    assert xmp.dc_date == [datetime(1990, 4, 28, 0, 0)]
    assert xmp.dc_title == {"x-default": "Sample PDF with XMP Metadata"}
    assert xmp.custom_properties == {
        "Style": "FooBarStyle",
        "other": "worlds",
        "⏰": "time",
    }
    co = pypdf.generic.ContentStream(None, None)
    co.set_data(
        xmp.stream.get_data().replace(
            b'dc:source="Martin Thoma"', b'dc:source="Pubpub-Zz"'
        )
    )
    writer.xmp_metadata = pypdf.xmp.XmpInformation(co)
    b = BytesIO()
    writer.write(b)
    reader = PdfReader(b)
    xmp2 = reader.xmp_metadata
    assert xmp2.dc_source == "Pubpub-Zz"


@pytest.mark.parametrize(
    ("src", "has_xmp"),
    [
        (RESOURCE_ROOT / "commented-xmp.pdf", True),
        (RESOURCE_ROOT / "crazyones.pdf", False),
    ],
)
def test_read_xmp_metadata(src, has_xmp):
    """Read XMP metadata from PDF files."""
    reader = PdfReader(src)
    xmp = reader.xmp_metadata
    assert (xmp is None) == (not has_xmp)
    if has_xmp:
        for _ in xmp.get_element(
            about_uri="", namespace=pypdf.xmp.RDF_NAMESPACE, name="Artist"
        ):
            pass

        assert get_all_tiff(xmp) == {"tiff:Artist": ["me"]}
        assert xmp.dc_contributor == []


def get_all_tiff(xmp: pypdf.xmp.XmpInformation):
    """Return all TIFF metadata as a dictionary."""
    data = {}
    tiff_ns = xmp.get_nodes_in_namespace(
        about_uri="", namespace="http://ns.adobe.com/tiff/1.0/"
    )
    for tag in tiff_ns:
        contents = [content.data for content in tag.childNodes]
        data[tag.tagName] = contents
    return data


def test_converter_date():
    """
    _converter_date returns the correct datetime.

    This is a regression test for issue #774.
    """
    date = pypdf.xmp._converter_date("2021-04-28T12:23:34.123Z")
    assert date == datetime(2021, 4, 28, 12, 23, 34, 123000)

    with pytest.raises(ValueError) as exc:
        pypdf.xmp._converter_date("today")
    assert exc.value.args[0].startswith("Invalid date format")

    date = pypdf.xmp._converter_date("2021-04-28T12:23:01-03:00")
    assert date == datetime(2021, 4, 28, 15, 23, 1)


def test_modify_date():
    """
    xmp_modify_date is extracted correctly.

    This is a regression test for issue #914.
    """
    path = RESOURCE_ROOT / "issue-914-xmp-data.pdf"
    reader = PdfReader(path)
    assert reader.xmp_metadata.xmp_modify_date == datetime(2022, 4, 9, 15, 22, 43)


@pytest.mark.parametrize(
    "x",
    ["a", 42, 3.141, False, True],
)
def test_identity_function(x):
    """The identity is returning its input."""
    assert pypdf.xmp._identity(x) == x


@pytest.mark.enable_socket
@pytest.mark.parametrize(
    ("url", "name", "xmpmm_instance_id"),
    [
        (
            None,
            "tika-955562.pdf",
            "uuid:ca96e032-c2af-49bd-a71c-95889bafbf1d",
        )
    ],
)
def test_xmpmm_instance_id(url, name, xmpmm_instance_id):
    """XMPMM instance id is correctly extracted."""
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    xmp_metadata = reader.xmp_metadata
    assert xmp_metadata.xmpmm_instance_id == xmpmm_instance_id
    # cache hit:
    assert xmp_metadata.xmpmm_instance_id == xmpmm_instance_id


@pytest.mark.enable_socket
def test_xmp_dc_description_extraction():
    """XMP dc_description is correctly extracted."""
    url = "https://github.com/user-attachments/files/18381721/tika-953770.pdf"
    name = "tika-953770.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    xmp_metadata = reader.xmp_metadata
    assert xmp_metadata.dc_description == {
        "x-default": "U.S. Title 50 Certification Form"
    }
    # cache hit:
    assert xmp_metadata.dc_description == {
        "x-default": "U.S. Title 50 Certification Form"
    }


@pytest.mark.enable_socket
def test_dc_creator_extraction():
    """XMP dc_creator is correctly extracted."""
    url = "https://github.com/user-attachments/files/18381721/tika-953770.pdf"
    name = "tika-953770.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    xmp_metadata = reader.xmp_metadata
    assert xmp_metadata.dc_creator == ["U.S. Fish and Wildlife Service"]
    # cache hit:
    assert xmp_metadata.dc_creator == ["U.S. Fish and Wildlife Service"]


@pytest.mark.enable_socket
def test_custom_properties_extraction():
    """XMP custom_properties is correctly extracted."""
    url = "https://github.com/user-attachments/files/18381764/tika-986065.pdf"
    name = "tika-986065.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    xmp_metadata = reader.xmp_metadata
    assert xmp_metadata.custom_properties == {"Style": "Searchable Image (Exact)"}
    # cache hit:
    assert xmp_metadata.custom_properties == {"Style": "Searchable Image (Exact)"}


@pytest.mark.enable_socket
def test_dc_subject_extraction():
    """XMP dc_subject is correctly extracted."""
    url = "https://github.com/user-attachments/files/18381730/tika-959519.pdf"
    name = "tika-959519.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    xmp_metadata = reader.xmp_metadata
    assert xmp_metadata.dc_subject == [
        "P&P",
        "manual",
        "1240.2325",
        "CVM",
        "PROCEDURES ON MEDIA INQUIRIES",
        "animal",
        "media",
        "procedures",
        "inquiries",
    ]
    # Cache hit:
    assert xmp_metadata.dc_subject == [
        "P&P",
        "manual",
        "1240.2325",
        "CVM",
        "PROCEDURES ON MEDIA INQUIRIES",
        "animal",
        "media",
        "procedures",
        "inquiries",
    ]


@pytest.mark.enable_socket
def test_invalid_xmp_information_handling():
    """
    Invalid XML in xmp_metadata is gracefully handled.

    This is a regression test for issue #585.
    """
    url = "https://github.com/py-pdf/pypdf/files/5536984/test.pdf"
    name = "pypdf-5536984.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))
    with pytest.raises(PdfReadError) as exc:
        reader.xmp_metadata
    assert exc.value.args[0].startswith("XML in XmpInformation was invalid")

@pytest.mark.samples
def test_pdfa_xmp_metadata_with_values():
    """Test PDF/A XMP metadata extraction from a file with PDF/A metadata."""
    reader = PdfReader(SAMPLE_ROOT / "021-pdfa" / "crazyones-pdfa.pdf")
    xmp = reader.xmp_metadata

    assert xmp is not None
    assert xmp.pdfaid_part == "1"
    assert xmp.pdfaid_conformance == "B"


@pytest.mark.samples
def test_pdfa_xmp_metadata_without_values():
    """Test PDF/A XMP metadata extraction from a file without PDF/A metadata."""
    reader = PdfReader(SAMPLE_ROOT / "020-xmp" / "output_with_metadata_pymupdf.pdf")
    xmp = reader.xmp_metadata

    assert xmp is not None
    assert xmp.pdfaid_part is None
    assert xmp.pdfaid_conformance is None


@pytest.mark.enable_socket
def test_xmp_metadata__content_stream_is_dictionary_object():
    url = "https://github.com/user-attachments/files/18943249/testing.pdf"
    name = "issue3107.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    with pytest.raises(
            PdfReadError,
            match="XML in XmpInformation was invalid: 'DictionaryObject' object has no attribute 'get_data'"
    ):
        assert reader.xmp_metadata is not None


@pytest.mark.enable_socket
def test_dc_creator__bag_instead_of_seq():
    url = "https://github.com/user-attachments/files/18381698/tika-924562.pdf"
    name = "tika-924562.pdf"
    reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

    assert reader.xmp_metadata is not None
    assert reader.xmp_metadata.dc_creator == ["William J. Hussar"]


@pytest.mark.enable_socket
def test_dc_language__no_bag_container():
    reader = PdfReader(BytesIO(get_data_from_url(name="iss2138.pdf")))

    assert reader.xmp_metadata is not None
    assert reader.xmp_metadata.dc_language == ["x-unknown"]


def test_reading_does_not_destroy_root_object():
    """Test for #3391."""
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf")
    xmp = writer.xmp_metadata
    assert xmp is not None
    assert not isinstance(writer.root_object["/Metadata"], XmpInformation)
    assert isinstance(writer.root_object["/Metadata"].get_object(), StreamObject)

    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    assert b"\n/Metadata 27 0 R\n" in output_bytes


def test_xmp_information__write_to_stream():
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf")
    xmp = writer.xmp_metadata

    output = BytesIO()
    with pytest.warns(
            DeprecationWarning,
            match=(
                r"^XmpInformation\.write_to_stream is deprecated and will be removed in pypdf 6\.0\.0\. "
                r"Use PdfWriter\.xmp_metadata instead\.$"
            )
    ):
        xmp.write_to_stream(output)
    output_bytes = output.getvalue()
    assert output_bytes.startswith(b"<<\n/Type /Metadata\n/Subtype /XML\n/Length 2786\n>>\nstream\n<?xpacket begin")


def test_pdf_writer__xmp_metadata_setter():
    # Clear existing metadata.
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf")
    assert writer.xmp_metadata is not None
    original_metadata = writer.xmp_metadata.stream.get_data()
    writer.xmp_metadata = None
    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    reader = PdfReader(BytesIO(output_bytes))
    assert reader.xmp_metadata is None

    # Attempt to clear again.
    writer = PdfWriter(clone_from=reader)
    assert writer.xmp_metadata is None
    writer.xmp_metadata = None
    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    reader = PdfReader(BytesIO(output_bytes))
    assert reader.xmp_metadata is None

    # Set new metadata from bytes.
    writer = PdfWriter(clone_from=reader)
    assert writer.xmp_metadata is None
    writer.xmp_metadata = original_metadata
    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    reader = PdfReader(BytesIO(output_bytes))
    assert get_all_tiff(reader.xmp_metadata) == {"tiff:Artist": ["me"]}

    # Set metadata from XmpInformation.
    writer = PdfWriter(clone_from=reader)
    xmp_metadata = writer.xmp_metadata
    assert get_all_tiff(xmp_metadata) == {"tiff:Artist": ["me"]}
    new_metadata = original_metadata.replace(b"<tiff:Artist>me</tiff:Artist>", b"<tiff:Artist>Foo Bar</tiff:Artist>")
    xmp_metadata.stream.set_data(new_metadata)
    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    reader = PdfReader(BytesIO(output_bytes))
    assert get_all_tiff(reader.xmp_metadata) == {"tiff:Artist": ["Foo Bar"]}

    # Fix metadata not being an IndirectObject before.
    writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf")
    writer.root_object[NameObject("/Metadata")] = writer.root_object["/Metadata"].get_object()
    assert "/XML" in str(writer.root_object)
    writer.xmp_metadata = new_metadata
    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()
    reader = PdfReader(BytesIO(output_bytes))
    assert get_all_tiff(reader.xmp_metadata) == {"tiff:Artist": ["Foo Bar"]}
    assert "/XML" not in str(writer.root_object)


def test_xmp_information__create():
    """Test XmpInformation.create() classmethod."""
    xmp = XmpInformation.create()
    assert xmp is not None
    assert xmp.dc_title == {}
    assert xmp.dc_creator == []
    assert xmp.dc_description == {}
    assert xmp.xmp_create_date is None
    assert xmp.pdf_producer is None


def test_xmp_information__set_dc_title():
    """Test setting dc:title metadata."""
    xmp = XmpInformation.create()

    title_values = {"x-default": "Test Title", "en": "Test Title EN"}
    xmp.dc_title = title_values
    assert xmp.dc_title == title_values

    xmp.dc_title = None
    assert xmp.dc_title is None or xmp.dc_title == {}


def test_xmp_information__set_dc_creator():
    """Test setting dc:creator metadata."""
    xmp = XmpInformation.create()

    creators = ["Author One", "Author Two"]
    xmp.dc_creator = creators
    assert xmp.dc_creator == creators

    xmp.dc_creator = None
    assert xmp.dc_creator is None or xmp.dc_creator == []


def test_xmp_information__set_dc_description():
    """Test setting dc:description metadata."""
    xmp = XmpInformation.create()

    description_values = {"x-default": "Test Description", "en": "Test Description EN"}
    xmp.dc_description = description_values
    assert xmp.dc_description == description_values

    xmp.dc_description = None
    assert xmp.dc_description is None or xmp.dc_description == {}


def test_xmp_information__set_dc_subject():
    """Test setting dc:subject metadata."""
    xmp = XmpInformation.create()

    subjects = ["keyword1", "keyword2", "keyword3"]
    xmp.dc_subject = subjects
    assert xmp.dc_subject == subjects

    xmp.dc_subject = None
    assert xmp.dc_subject is None or xmp.dc_subject == []


def test_xmp_information__set_dc_date():
    """Test setting dc:date metadata."""
    xmp = XmpInformation.create()

    test_date = datetime(2023, 12, 25, 10, 30, 45)
    xmp.dc_date = [test_date]
    stored_dates = xmp.dc_date
    assert len(stored_dates) == 1

    date_string = "2023-12-25T10:30:45.000000Z"
    xmp.dc_date = [date_string]
    stored_dates = xmp.dc_date
    assert len(stored_dates) == 1

    xmp.dc_date = None
    assert xmp.dc_date is None or xmp.dc_date == []


def test_xmp_information__set_single_fields():
    """Test setting single-value metadata fields."""
    xmp = XmpInformation.create()

    xmp.dc_coverage = "Global coverage"
    assert xmp.dc_coverage == "Global coverage"
    xmp.dc_coverage = None
    assert xmp.dc_coverage is None

    xmp.dc_format = "application/pdf"
    assert xmp.dc_format == "application/pdf"
    xmp.dc_format = None
    assert xmp.dc_format is None

    xmp.dc_identifier = "unique-id-123"
    assert xmp.dc_identifier == "unique-id-123"
    xmp.dc_identifier = None
    assert xmp.dc_identifier is None

    xmp.dc_source = "Original Source"
    assert xmp.dc_source == "Original Source"
    xmp.dc_source = None
    assert xmp.dc_source is None


def test_xmp_information__set_bag_fields():
    """Test setting bag (unordered array) metadata fields."""
    xmp = XmpInformation.create()

    contributors = ["Contributor One", "Contributor Two"]
    xmp.dc_contributor = contributors
    assert xmp.dc_contributor == contributors
    xmp.dc_contributor = None
    assert xmp.dc_contributor is None or xmp.dc_contributor == []

    languages = ["en", "fr", "de"]
    xmp.dc_language = languages
    assert xmp.dc_language == languages
    xmp.dc_language = None
    assert xmp.dc_language is None or xmp.dc_language == []

    publishers = ["Publisher One", "Publisher Two"]
    xmp.dc_publisher = publishers
    assert xmp.dc_publisher == publishers
    xmp.dc_publisher = None
    assert xmp.dc_publisher is None or xmp.dc_publisher == []

    relations = ["Related Doc 1", "Related Doc 2"]
    xmp.dc_relation = relations
    assert xmp.dc_relation == relations
    xmp.dc_relation = None
    assert xmp.dc_relation is None or xmp.dc_relation == []

    types = ["Document", "Text"]
    xmp.dc_type = types
    assert xmp.dc_type == types
    xmp.dc_type = None
    assert xmp.dc_type is None or xmp.dc_type == []


def test_xmp_information__set_dc_rights():
    """Test setting dc:rights metadata."""
    xmp = XmpInformation.create()

    rights_values = {"x-default": "All rights reserved", "en": "All rights reserved EN"}
    xmp.dc_rights = rights_values
    assert xmp.dc_rights == rights_values

    xmp.dc_rights = None
    assert xmp.dc_rights is None or xmp.dc_rights == {}


def test_xmp_information__set_pdf_fields():
    """Test setting PDF namespace metadata fields."""
    xmp = XmpInformation.create()

    xmp.pdf_keywords = "keyword1, keyword2, keyword3"
    assert xmp.pdf_keywords == "keyword1, keyword2, keyword3"
    xmp.pdf_keywords = None
    assert xmp.pdf_keywords is None

    xmp.pdf_pdfversion = "1.4"
    assert xmp.pdf_pdfversion == "1.4"
    xmp.pdf_pdfversion = None
    assert xmp.pdf_pdfversion is None

    xmp.pdf_producer = "pypdf"
    assert xmp.pdf_producer == "pypdf"
    xmp.pdf_producer = None
    assert xmp.pdf_producer is None


def test_xmp_information__set_xmp_date_fields():
    """Test setting XMP date metadata fields."""
    xmp = XmpInformation.create()
    test_date = datetime(2023, 12, 25, 10, 30, 45)
    aware_date = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone(timedelta(hours=-5)))

    xmp.xmp_create_date = test_date
    stored_date = xmp.xmp_create_date
    assert isinstance(stored_date, datetime)
    xmp.xmp_create_date = aware_date
    stored_date = xmp.xmp_create_date
    assert stored_date == datetime(2023, 1, 1, 17, 0, 0)
    xmp.xmp_create_date = None
    assert xmp.xmp_create_date is None

    xmp.xmp_modify_date = test_date
    stored_date = xmp.xmp_modify_date
    assert isinstance(stored_date, datetime)
    xmp.xmp_modify_date = aware_date
    stored_date = xmp.xmp_modify_date
    assert stored_date == datetime(2023, 1, 1, 17, 0, 0)
    xmp.xmp_modify_date = None
    assert xmp.xmp_modify_date is None

    xmp.xmp_metadata_date = test_date
    stored_date = xmp.xmp_metadata_date
    assert isinstance(stored_date, datetime)
    xmp.xmp_metadata_date = aware_date
    stored_date = xmp.xmp_metadata_date
    assert stored_date == datetime(2023, 1, 1, 17, 0, 0)
    xmp.xmp_metadata_date = None
    assert xmp.xmp_metadata_date is None


def test_xmp_information__set_xmp_creator_tool():
    """Test setting xmp:CreatorTool metadata."""
    xmp = XmpInformation.create()

    xmp.xmp_creator_tool = "pypdf"
    assert xmp.xmp_creator_tool == "pypdf"
    xmp.xmp_creator_tool = None
    assert xmp.xmp_creator_tool is None


def test_xmp_information__set_xmpmm_fields():
    """Test setting XMPMM namespace metadata fields."""
    xmp = XmpInformation.create()

    doc_id = "uuid:12345678-1234-1234-1234-123456789abc"
    xmp.xmpmm_document_id = doc_id
    assert xmp.xmpmm_document_id == doc_id
    xmp.xmpmm_document_id = None
    assert xmp.xmpmm_document_id is None

    instance_id = "uuid:87654321-4321-4321-4321-cba987654321"
    xmp.xmpmm_instance_id = instance_id
    assert xmp.xmpmm_instance_id == instance_id
    xmp.xmpmm_instance_id = None
    assert xmp.xmpmm_instance_id is None


def test_xmp_information__set_pdfaid_fields():
    """Test setting PDF/A ID namespace metadata fields."""
    xmp = XmpInformation.create()

    xmp.pdfaid_part = "1"
    assert xmp.pdfaid_part == "1"
    xmp.pdfaid_part = None
    assert xmp.pdfaid_part is None

    xmp.pdfaid_conformance = "B"
    assert xmp.pdfaid_conformance == "B"
    xmp.pdfaid_conformance = None
    assert xmp.pdfaid_conformance is None


def test_xmp_information__create_with_writer():
    """Test using XmpInformation.create() with PdfWriter."""
    xmp = XmpInformation.create()
    xmp.dc_title = {"x-default": "Created with pypdf"}
    xmp.dc_creator = ["pypdf user"]
    xmp.pdf_producer = "pypdf library"

    writer = PdfWriter()
    writer.add_blank_page(612, 792)
    writer.xmp_metadata = xmp

    output = BytesIO()
    writer.write(output)
    output_bytes = output.getvalue()

    reader = PdfReader(BytesIO(output_bytes))
    xmp_read = reader.xmp_metadata
    assert xmp_read is not None
    assert xmp_read.dc_title == {"x-default": "Created with pypdf"}
    assert xmp_read.dc_creator == ["pypdf user"]
    assert xmp_read.pdf_producer == "pypdf library"


def test_xmp_information__namespace_prefix():
    """Test _get_namespace_prefix method."""
    xmp = XmpInformation.create()

    assert xmp._get_namespace_prefix(pypdf.xmp.DC_NAMESPACE) == "dc"
    assert xmp._get_namespace_prefix(pypdf.xmp.XMP_NAMESPACE) == "xmp"
    assert xmp._get_namespace_prefix(pypdf.xmp.PDF_NAMESPACE) == "pdf"
    assert xmp._get_namespace_prefix(pypdf.xmp.XMPMM_NAMESPACE) == "xmpMM"
    assert xmp._get_namespace_prefix(pypdf.xmp.PDFAID_NAMESPACE) == "pdfaid"
    assert xmp._get_namespace_prefix(pypdf.xmp.PDFX_NAMESPACE) == "pdfx"
    assert xmp._get_namespace_prefix("unknown://namespace") == "unknown"


def test_xmp_information__owner_document_none_errors():
    xmp = XmpInformation.create()

    original_owner = xmp.rdf_root.ownerDocument

    try:
        for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
            xmp.rdf_root.removeChild(desc)

        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._get_or_create_description()

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._update_stream()

        xmp.rdf_root.ownerDocument = original_owner
        for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
            xmp.rdf_root.removeChild(desc)
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp.dc_coverage = "test coverage"

        xmp.rdf_root.ownerDocument = original_owner
        for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
            xmp.rdf_root.removeChild(desc)
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp.dc_contributor = ["contributor"]

        xmp.rdf_root.ownerDocument = original_owner
        for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
            xmp.rdf_root.removeChild(desc)
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp.dc_creator = ["creator"]

        xmp.rdf_root.ownerDocument = original_owner
        for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
            xmp.rdf_root.removeChild(desc)
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp.dc_title = {"x-default": "title"}

        xmp.rdf_root.ownerDocument = original_owner
        desc = xmp._get_or_create_description()
        desc.setAttribute("test-attr", "test-value")
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._set_single_value("test-namespace", "test-attr", "new-value")

        xmp.rdf_root.ownerDocument = original_owner
        desc = xmp._get_or_create_description()
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._set_bag_values("test-namespace", "test-name", ["value"])

        xmp.rdf_root.ownerDocument = original_owner
        desc = xmp._get_or_create_description()
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._set_seq_values("test-namespace", "test-name", ["value"])

        xmp.rdf_root.ownerDocument = original_owner
        desc = xmp._get_or_create_description()
        xmp.rdf_root.ownerDocument = None

        with pytest.raises(XmpDocumentError, match="XMP Document is None"):
            xmp._set_langalt_values("test-namespace", "test-name", {"x-default": "value"})

    finally:
        xmp.rdf_root.ownerDocument = original_owner


def test_xmp_information__remove_existing_attribute():
    xmp = XmpInformation.create()

    xmp.dc_coverage = "initial coverage"
    assert xmp.dc_coverage == "initial coverage"

    xmp.dc_coverage = "updated coverage"
    assert xmp.dc_coverage == "updated coverage"

    xmp.dc_coverage = None
    assert xmp.dc_coverage is None

    desc = xmp._get_or_create_description()
    desc.setAttributeNS(pypdf.xmp.DC_NAMESPACE, "dc:coverage", "original attribute")

    assert desc.getAttributeNS(pypdf.xmp.DC_NAMESPACE, "coverage") == "original attribute"

    xmp.dc_coverage = "new element value"
    assert xmp.dc_coverage == "new element value"

    assert desc.getAttributeNS(pypdf.xmp.DC_NAMESPACE, "coverage") == ""

    elements = desc.getElementsByTagNameNS(pypdf.xmp.DC_NAMESPACE, "coverage")
    assert len(elements) == 1
    assert elements[0].firstChild.data == "new element value"


def test_xmp_information__edge_case_coverage():
    xmp = XmpInformation.create()

    xmp.dc_contributor = []
    assert xmp.dc_contributor == []

    xmp.dc_creator = []
    assert xmp.dc_creator == []

    xmp.dc_title = {}
    assert xmp.dc_title == {}

    xmp.dc_contributor = None
    assert xmp.dc_contributor == []

    xmp.dc_creator = None
    assert xmp.dc_creator == []

    xmp.dc_title = None
    assert xmp.dc_title == {}


def test_xmp_information__create_new_description():
    """Test creating new description elements."""
    xmp = XmpInformation.create()

    for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
        xmp.rdf_root.removeChild(desc)

    desc = xmp._get_or_create_description("test-uri")
    assert desc.getAttributeNS(pypdf.xmp.RDF_NAMESPACE, "about") == "test-uri"

    assert desc.tagName == "rdf:Description"
    assert desc.namespaceURI == pypdf.xmp.RDF_NAMESPACE


def test_xmp_information__get_text_skips_non_text_nodes():
    xmp = XmpInformation.create()

    doc = xmp.rdf_root.ownerDocument
    el = doc.createElementNS(pypdf.xmp.DC_NAMESPACE, "dc:test")
    el.appendChild(doc.createTextNode("hello"))
    el.appendChild(doc.createElement("ignored-node"))
    el.appendChild(doc.createTextNode(" world"))

    assert xmp._get_text(el) == "hello world"


def test_xmp_information__get_or_create_description_mismatch_about_uri():
    xmp = XmpInformation.create()

    existing = xmp._get_or_create_description()
    existing.setAttributeNS(pypdf.xmp.RDF_NAMESPACE, "rdf:about", "foo-uri")

    new_desc = xmp._get_or_create_description("bar-uri")
    assert new_desc is not existing
    assert new_desc.getAttributeNS(pypdf.xmp.RDF_NAMESPACE, "about") == "bar-uri"

    all_desc = list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description"))
    about_values = {d.getAttributeNS(pypdf.xmp.RDF_NAMESPACE, "about") for d in all_desc}
    assert {"foo-uri", "bar-uri"}.issubset(about_values)


def test_xmp_information__attribute_handling():
    """Test attribute node removal and creation (line 479, 484, 506, 535, 564)."""
    xmp = XmpInformation.create()

    for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
        xmp.rdf_root.removeChild(desc)

    xmp.dc_coverage = "test coverage"
    assert xmp.dc_coverage == "test coverage"

    xmp.dc_contributor = ["contributor1", "contributor2"]
    assert xmp.dc_contributor == ["contributor1", "contributor2"]

    xmp.dc_creator = ["creator1", "creator2"]
    assert xmp.dc_creator == ["creator1", "creator2"]

    xmp.dc_title = {"x-default": "Test Title", "en": "Test Title EN"}
    assert xmp.dc_title == {"x-default": "Test Title", "en": "Test Title EN"}

    xmp.dc_format = "application/pdf"
    assert xmp.dc_format == "application/pdf"

    xmp.dc_format = "text/plain"
    assert xmp.dc_format == "text/plain"


def test_xmp_information__create_and_set_metadata():
    xmp = XmpInformation.create()

    for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
        xmp.rdf_root.removeChild(desc)

    desc = xmp._get_or_create_description()
    desc.setAttribute("test", "value")
    xmp.dc_source = "original"
    xmp.dc_source = "modified"
    assert xmp.dc_source == "modified"

    for desc in list(xmp.rdf_root.getElementsByTagNameNS(pypdf.xmp.RDF_NAMESPACE, "Description")):
        xmp.rdf_root.removeChild(desc)

    xmp.dc_contributor = ["test1"]
    xmp.dc_creator = ["test2"]
    xmp.dc_title = {"x-default": "test3"}

    assert xmp.dc_contributor == ["test1"]
    assert xmp.dc_creator == ["test2"]
    assert xmp.dc_title == {"x-default": "test3"}


def test_xmp_information__external_entity_expansion(tmpdir):
    path = tmpdir / "secret.txt"
    path.write("VERY SECRET")

    stream = ContentStream(pdf=None, stream=None)
    stream.set_data(f"""<?xml version="1.0"?>
<!DOCTYPE foo [
  <!ENTITY xxe SYSTEM "file://{path}">
]>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="">
      <dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">&xxe;abc</dc:creator>
    </rdf:Description>
  </rdf:RDF>
</x:xmpmeta>""".encode())

    xmp = XmpInformation(stream)
    assert xmp.dc_creator == ["abc"]


@pytest.mark.timeout(10)
def test_xmp_information__exponential_entity_expansion():
    stream = ContentStream(pdf=None, stream=None)
    stream.set_data(b"""<?xml version="1.0"?>
<!DOCTYPE lolz [
  <!ENTITY lol "lol">
  <!ENTITY lol2 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
  <!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
  <!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
  <!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
  <!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
  <!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
  <!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
  <!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
]>
<x:xmpmeta xmlns:x="adobe:ns:meta/">
  <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="">
      <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/">&lol9;</dc:title>
    </rdf:Description>
  </rdf:RDF>
</x:xmpmeta>""")

    with pytest.raises(
            expected_exception=PdfReadError,
            match=(
                r"^XML in XmpInformation was invalid: limit on input amplification factor "
                r"\(from DTD and entities\) breached: line 16, column 60$"
            )
    ):
        XmpInformation(stream)


================================================
FILE: tests/utils.py
================================================
"""Utility functions and classes for testing."""
import logging
from typing import Union

from PIL import Image

from pypdf import PageObject
from pypdf.generic import DictionaryObject, IndirectObject


class PositionedText:
    """
    Specify a text with coordinates, font-dictionary and font-size.

    The font-dictionary may be None in case of an unknown font.
    """

    def __init__(self, text, x, y, font_dict, font_size) -> None:
        # TODO: \0-replace: Encoding issue in some files?
        self.text = text.replace("\0", "")
        self.x = x
        self.y = y
        self.font_dict = font_dict
        self.font_size = font_size

    def get_base_font(self) -> str:
        """
        Gets the base font of the text.

        Return UNKNOWN in case of an unknown font.
        """
        if (self.font_dict is None) or "/BaseFont" not in self.font_dict:
            return "UNKNOWN"
        return self.font_dict["/BaseFont"]


class Rectangle:
    """Specify a rectangle."""

    def __init__(self, x, y, w, h) -> None:
        self.x = x.as_numeric()
        self.y = y.as_numeric()
        self.w = w.as_numeric()
        self.h = h.as_numeric()

    def contains(self, x, y) -> bool:
        return (
                self.x <= x <= (self.x + self.w)
                and self.y <= y <= (self.y + self.h)
        )


def extract_text_and_rectangles(
        page: PageObject, rect_filter=None
) -> tuple[list[PositionedText], list[Rectangle]]:
    """
    Extracts texts and rectangles of a page of type pypdf._page.PageObject.

    This function supports simple coordinate transformations only.
    The optional rect_filter-lambda can be used to filter wanted
    rectangles.
    rect_filter has Rectangle as argument and must return a boolean.

    It returns a tuple containing a list of extracted texts and
    a list of extracted rectangles.
    """
    logger = logging.getLogger("extract_text_and_rectangles")

    rectangles = []
    texts = []

    def print_op_b(op, args, cm_matrix, tm_matrix) -> None:
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f"before: {op} at {cm_matrix}, {tm_matrix}")
        if op == b"re":
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(f"  add rectangle: {args}")
            w = args[2]
            h = args[3]
            r = Rectangle(args[0], args[1], w, h)
            if (rect_filter is None) or rect_filter(r):
                rectangles.append(r)

    def print_visi(text, cm_matrix, tm_matrix, font_dict, font_size) -> None:
        if text.strip() != "":
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug(f"at {cm_matrix}, {tm_matrix}, font size={font_size}")
            texts.append(
                PositionedText(
                    text, tm_matrix[4], tm_matrix[5], font_dict, font_size
                )
            )

    visitor_before = print_op_b
    visitor_text = print_visi

    page.extract_text(
        visitor_operand_before=visitor_before, visitor_text=visitor_text
    )

    return texts, rectangles


def extract_table(
        texts: list[PositionedText], rectangles: list[Rectangle]
) -> list[list[list[PositionedText]]]:
    """
    Extracts a table containing text.

    It is expected that each cell is marked by a rectangle-object.
    It is expected that the page contains one table only.
    It is expected that the table contains at least 3 columns and 2 rows.

    A list of rows is returned.
    Each row contains a list of cells.
    Each cell contains a list of PositionedText-elements.
    """
    logger = logging.getLogger("extractTable")

    # Step 1: Count number of x- and y-coordinates of rectangles.
    # Remove duplicate rectangles. The new list is rectangles_filtered.
    col2count = {}
    row2count = {}
    key2rectangle = {}
    rectangles_filtered = []
    for r in rectangles:
        # Coordinates may be inaccurate, we have to round.
        # cell: x=72.264, y=386.57, w=93.96, h=46.584
        # cell: x=72.271, y=386.56, w=93.96, h=46.59
        key = f"{round(r.x, 0)} {round(r.y, 0)} {round(r.w, 0)} {round(r.h, 0)}"
        if key in key2rectangle:
            # Ignore duplicate rectangles
            continue
        key2rectangle[key] = r
        if r.x not in col2count:
            col2count[r.x] = 0
        if r.y not in row2count:
            row2count[r.y] = 0
        col2count[r.x] += 1
        row2count[r.y] += 1
        rectangles_filtered.append(r)

    # Step 2: Look for texts in rectangles.
    rectangle2texts = {}
    for text in texts:
        for r in rectangles_filtered:
            if r.contains(text.x, text.y):
                if r not in rectangle2texts:
                    rectangle2texts[r] = []
                rectangle2texts[r].append(text)
                break

    # PDF: y = 0 is expected at the bottom of the page.
    # So the header-row is expected to have the highest y-value.
    rectangles.sort(key=lambda r: (-r.y, r.x))

    # Step 3: Build the list of rows containing list of cell-texts.
    rows = []
    row_nr = 0
    col_nr = 0
    curr_y = None
    curr_row = None
    for r in rectangles_filtered:
        if col2count[r.x] < 3 or row2count[r.y] < 2:
            # We expect at least 3 columns and 2 rows.
            continue
        if curr_y is None or r.y != curr_y:
            # next row
            curr_y = r.y
            col_nr = 0
            row_nr += 1
            curr_row = []
            rows.append(curr_row)
        col_nr += 1
        if logger.isEnabledFor(logging.DEBUG):
            logger.debug(f"cell: x={r.x}, y={r.y}, w={r.w}, h={r.h}")
        if r not in rectangle2texts:
            curr_row.append("")
            continue
        cell_texts = list(rectangle2texts[r])
        curr_row.append(cell_texts)

    return rows


def extract_cell_text(cell_texts: list[PositionedText]) -> str:
    """Joins the text-objects of a cell."""
    return ("".join(t.text for t in cell_texts)).strip()


def get_image_data(
        image: Image.Image, band: Union[int, None] = None
) -> Union[tuple[tuple[int, ...], ...], tuple[float, ...]]:
    try:
        return image.get_flattened_data(band=band)
    except AttributeError:
        # For Pillow < 12.1.0
        return tuple(image.getdata(band=band))


class ReaderDummy:
    def __init__(self, strict=False) -> None:
        self.strict = strict

    def get_object(self, indirect_reference):
        class DummyObj:
            def get_object(self) -> "DummyObj":
                return self

        return DictionaryObject()

    def get_reference(self, obj):
        return IndirectObject(idnum=1, generation=1, pdf=self)