Repository: py-pdf/pdfly Branch: main Commit: 897420ec65b3 Files: 97 Total size: 263.1 KB Directory structure: gitextract_w5t8afxl/ ├── .all-contributorsrc ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ ├── config.yml │ │ ├── feature_request.md │ │ └── question.md │ ├── dependabot.yml │ ├── pull_request_template.md │ ├── scripts/ │ │ └── check_pr_title.py │ └── workflows/ │ ├── check-gitignored-files.yml │ ├── create-github-release.yaml │ ├── github-ci.yaml │ ├── publish-to-pypi.yaml │ ├── release.yaml │ └── title-check.yml ├── .gitignore ├── .gitmodules ├── .isort.cfg ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── .typos.toml ├── CHANGELOG.md ├── CONTRIBUTORS.md ├── LICENSE ├── Makefile ├── README.md ├── dependabot.yml ├── docs/ │ ├── Makefile │ ├── conf.py │ ├── dev/ │ │ ├── intro.md │ │ └── testing.md │ ├── index.rst │ ├── make.bat │ ├── meta/ │ │ └── project-governance.md │ └── user/ │ ├── installation.md │ ├── subcommand-2-up.md │ ├── subcommand-booklet.md │ ├── subcommand-cat.md │ ├── subcommand-check-sign.md │ ├── subcommand-compress.md │ ├── subcommand-extract-annotated-pages.md │ ├── subcommand-extract-images.md │ ├── subcommand-extract-text.md │ ├── subcommand-meta.md │ ├── subcommand-pagemeta.md │ ├── subcommand-rm.md │ ├── subcommand-rotate.md │ ├── subcommand-sign.md │ ├── subcommand-uncompress.md │ ├── subcommand-update-offsets.md │ └── subcommand-x2pdf.md ├── make_release.py ├── mypy.ini ├── pdfly/ │ ├── __init__.py │ ├── __main__.py │ ├── _utils.py │ ├── _version.py │ ├── booklet.py │ ├── cat.py │ ├── check_sign.py │ ├── cli.py │ ├── compress.py │ ├── extract_annotated_pages.py │ ├── extract_images.py │ ├── metadata.py │ ├── pagemeta.py │ ├── rm.py │ ├── rotate.py │ ├── sign.py │ ├── uncompress.py │ ├── up2.py │ ├── update_offsets.py │ └── x2pdf.py ├── pylock.toml ├── pyproject.toml ├── renovate.json ├── resources/ │ ├── demo2_ca.root.crt.pem │ ├── signing-certificate.crt │ └── signing-certificate.p12 ├── setup.cfg ├── setup.py └── tests/ ├── __init__.py ├── conftest.py ├── test_booklet.py ├── test_cat.py ├── test_check_sign.py ├── test_cli.py ├── test_compress.py ├── test_extract_annotated_pages.py ├── test_extract_images.py ├── test_pagemeta.py ├── test_rm.py ├── test_rotate.py ├── test_sign.py ├── test_uncompress.py ├── test_up2.py ├── test_update_offsets.py └── test_x2pdf.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .all-contributorsrc ================================================ { "projectName": "pdfly", "projectOwner": "py-pdf", "repoType": "github", "repoHost": "https://github.com", "files": [ "README.md" ], "imageSize": 100, "commit": true, "commitConvention": "eslint", "contributors": [ { "login": "MartinThoma", "name": "Martin Thoma", "avatar_url": "https://avatars.githubusercontent.com/u/1658117?v=4", "profile": "http://martin-thoma.com/", "contributions": [ "code", "doc", "ideas", "infra", "maintenance", "projectManagement", "tutorial" ] }, { "login": "Lucas-C", "name": "Lucas Cimon", "avatar_url": "https://avatars.githubusercontent.com/u/925560?v=4", "profile": "https://chezsoi.org/lucas/blog/", "contributions": [ "bug", "code", "doc", "maintenance" ] }, { "login": "pastor-robert", "name": "Rob Adams", "avatar_url": "https://avatars.githubusercontent.com/u/35646090?v=4", "profile": "https://github.com/pastor-robert", "contributions": [ "code" ] }, { "login": "Kaos599", "name": "Harsh ", "avatar_url": "https://avatars.githubusercontent.com/u/115716485?v=4", "profile": "https://github.com/Kaos599", "contributions": [ "code" ] }, { "login": "srogmann", "name": "Sascha Rogmann", "avatar_url": "https://avatars.githubusercontent.com/u/59577610?v=4", "profile": "https://github.com/srogmann", "contributions": [ "code" ] }, { "login": "ebotiab", "name": "Enrique Botía", "avatar_url": "https://avatars.githubusercontent.com/u/62219950?v=4", "profile": "https://github.com/ebotiab", "contributions": [ "code" ] }, { "login": "kommade", "name": "kommade", "avatar_url": "https://avatars.githubusercontent.com/u/99523586?v=4", "profile": "https://github.com/kommade", "contributions": [ "code" ] }, { "login": "Zingzy", "name": "Zingzy", "avatar_url": "https://avatars.githubusercontent.com/u/90309290?v=4", "profile": "https://spoo.me/", "contributions": [ "code" ] }, { "login": "wolfram77", "name": "Subhajit Sahu", "avatar_url": "https://avatars.githubusercontent.com/u/3179612?v=4", "profile": "https://wolfram77.github.io", "contributions": [ "code" ] }, { "login": "kianmeng", "name": "Kian-Meng Ang", "avatar_url": "https://avatars.githubusercontent.com/u/134518?v=4", "profile": "https://www.kianmeng.org", "contributions": [ "ideas" ] }, { "login": "hwine", "name": "Hal Wine", "avatar_url": "https://avatars.githubusercontent.com/u/132412?v=4", "profile": "https://github.com/hwine", "contributions": [ "bug", "code" ] }, { "login": "philippesamuel", "name": "philippesamuel", "avatar_url": "https://avatars.githubusercontent.com/u/32560769?v=4", "profile": "https://github.com/philippesamuel", "contributions": [ "doc" ] }, { "login": "marcobrb", "name": "marcobrb", "avatar_url": "https://avatars.githubusercontent.com/u/219329309?v=4", "profile": "https://github.com/marcobrb", "contributions": [ "doc" ] }, { "login": "moormaster", "name": "moormaster", "avatar_url": "https://avatars.githubusercontent.com/u/2452695?v=4", "profile": "https://github.com/moormaster", "contributions": [ "doc", "code" ] }, { "login": "geoffbeier", "name": "Geoff Beier", "avatar_url": "https://avatars.githubusercontent.com/u/133355?v=4", "profile": "https://geoff.tuxpup.com/", "contributions": [ "code" ] }, { "login": "georgthegreat", "name": "Yuriy Chernyshov", "avatar_url": "https://avatars.githubusercontent.com/u/1121500?v=4", "profile": "https://leftparagraphs.com", "contributions": [ "ideas", "code" ] }, { "login": "lkintact", "name": "lkintact", "avatar_url": "https://avatars.githubusercontent.com/u/24726299?v=4", "profile": "https://github.com/lkintact", "contributions": [ "bug" ] } ], "contributorsPerLine": 5, "skipCi": false, "commitType": "docs" } ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Report some unexpected behaviour to help us improve title: '' labels: bug assignees: '' --- Describe the bug **Error details** If an exception is raised, it is very important that you provide the full error message. Otherwise members of the `pdfly` community won't be able to help you with your problem. **Environment** Please provide the following information: * **Operating System**: Windows, Mac OSX, Linux flavour... * **Python version**: you can get this information with `python --version` * **`pdfly` version used**: if you installed it with `pip`, you can get this information in `pip freeze` output ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ # Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser blank_issues_enabled: false contact_links: - name: 💬 Start a discussion url: https://github.com/py-pdf/pdfly/discussions/new about: Informal discussion about the project organization, considerations that do not expect a definitive answer, etc. # - name: Security issue # url: security@... # about: Do not report security issues publicly. Email our security contact. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: enhancement assignees: '' --- **Please explain your intent** Describe what you want to achieve. **Describe the solution you'd like** A clear and concise description of what you want to happen. Please also mention any alternative solutions or features you've considered. **Additional context** Add any other context, code snippet or screenshots about the feature request. You can also mention if you are willing to contribute a PR yourself to provide this feature. ================================================ FILE: .github/ISSUE_TEMPLATE/question.md ================================================ --- name: I have a question about: Anything that is not a bug report or a feature request title: '' labels: question assignees: '' --- ================================================ FILE: .github/dependabot.yml ================================================ # Set update schedule for GitHub Actions version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "daily" ================================================ FILE: .github/pull_request_template.md ================================================ e.g. Fixes #0 **Checklist**: - [ ] A unit test is covering the code added / modified by this PR - [ ] In case of a new feature, docstrings have been added, with also some documentation in the `docs/` folder - [ ] A mention of the change is present in `CHANGELOG.md` - [ ] This PR is ready to be merged By submitting this pull request, I confirm that my contribution is made under the terms of the [BSD 3-Clause license](https://github.com/py-pdf/pdfly/blob/master/LICENSE). ================================================ FILE: .github/scripts/check_pr_title.py ================================================ """Check that all PR titles follow the desired scheme.""" import os import sys KNOWN_PREFIXES = ( "SEC: ", "BUG: ", "ENH: ", "DEP: ", "PI: ", "ROB: ", "DOC: ", "Docs: ", # MRs from Dependabot "TST: ", "DEV: ", "STY: ", "MAINT: ", "REL: ", ) PR_TITLE = os.getenv("PR_TITLE", "") if ( not PR_TITLE.startswith(KNOWN_PREFIXES) or not PR_TITLE.split(": ", maxsplit=1)[1] ): sys.stderr.write( f"The PR title '{PR_TITLE}' does not follow the projects naming scheme: " "https://pdfly.readthedocs.io/en/latest/dev/intro.html#commit-messages\n", ) sys.stderr.write( "If you do not know which one to choose or if multiple apply, make a best guess. " "Nobody will complain if it does not quite fit :-)\n", ) sys.exit(1) else: sys.stdout.write(f"PR title '{PR_TITLE}' appears to be valid.\n") ================================================ FILE: .github/workflows/check-gitignored-files.yml ================================================ name: Check for Gitignored Files on: push: branches: - '**' # Run on all branches pull_request: jobs: check-gitignored-files: runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check for gitignored files in commit run: | # List all files in the commit git diff --name-only --cached > committed_files.txt # Check if any of the committed files are ignored by .gitignore git check-ignore -v $(cat committed_files.txt) > ignored_files.txt || true # Fail if there are any ignored files if [[ -s ignored_files.txt ]]; then echo "The following files are gitignored but committed:" cat ignored_files.txt exit 1 fi ================================================ FILE: .github/workflows/create-github-release.yaml ================================================ name: Create a GitHub release page on: push: tags: - '*.*.*' workflow_dispatch: workflow_run: workflows: ["Create git tag"] types: - completed permissions: contents: write jobs: build_and_publish: if: ${{ github.event.workflow_run.conclusion == 'success' }} name: Create a GitHub release page runs-on: ubuntu-latest steps: - name: Checkout Repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Prepare variables id: prepare_variables run: | git fetch --tags --force latest_tag=$(git describe --tags --abbrev=0) echo "latest_tag=$(git describe --tags --abbrev=0)" >> "$GITHUB_ENV" echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV" EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) echo "tag_body<<$EOF" >> "$GITHUB_ENV" git --no-pager tag -l "${latest_tag}" --format='%(contents:body)' >> "$GITHUB_ENV" echo "$EOF" >> "$GITHUB_ENV" - name: Create GitHub Release 🚀 uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3 with: tag_name: ${{ env.latest_tag }} name: Version ${{ env.latest_tag }}, ${{ env.date }} draft: false prerelease: false body: ${{ env.tag_body }} ================================================ FILE: .github/workflows/github-ci.yaml ================================================ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: CI on: push: branches: [ main ] pull_request: branches: [ main ] workflow_dispatch: jobs: tests: strategy: matrix: python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] platform: [ubuntu-latest, windows-latest, macos-latest] name: pytest on ${{ matrix.python-version }} / ${{ matrix.platform }} runs-on: ${{ matrix.platform }} steps: - name: Checkout Code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python-version }} - name: Upgrade pip run: python -m pip install --upgrade pip - name: Install requirements run: pip install . --group dev - name: Install pdfly if: matrix.python-version != '3.8' run: pip install . - name: Install pdfly using the minimal versions of the dependencies if: matrix.python-version == '3.8' run: | # We ensure that those minimal versions remain compatible: sed -i '/dependencies = \[/,/\]/s/>=/==/' pyproject.toml pip install . - name: Run tests run: pytest -vv codestyle: name: Check code with black, mypy, ruff & typos runs-on: ubuntu-latest steps: - name: Checkout Code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: submodules: 'recursive' - name: Cache Downloaded Files id: cache-downloaded-files uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: '**/tests/pdf_cache/*' key: cache-downloaded-files - name: Upgrade pip, install pdfly and its dev dependencies run: | python -m pip install --upgrade pip pip install . pip install . --group dev - name: Lint with black run: black --check --extend-exclude sample-files . - name: Lint with mypy run: mypy . --ignore-missing-imports --exclude build - name: Test with ruff run: ruff check pdfly/ - name: Spell Check Repo uses: crate-ci/typos@7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2 package: name: Build & verify package runs-on: ubuntu-latest steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{env.PYTHON_LATEST}} - name: Build package run: | python -m pip install flit check-wheel-contents flit build ls -l dist check-wheel-contents dist/*.whl - name: Test installing package run: python -m pip install . - name: Test running installed package working-directory: /tmp run: python -c "import pdfly;print(pdfly.__version__)" ================================================ FILE: .github/workflows/publish-to-pypi.yaml ================================================ name: Publish Python Package to PyPI on: push: tags: - '*.*.*' workflow_dispatch: workflow_run: workflows: ["Create git tag"] types: - completed permissions: contents: write jobs: build_and_publish: if: ${{ github.event.workflow_run.conclusion == 'success' }} name: Publish a new version runs-on: ubuntu-latest steps: - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: 3.x - name: Install Flit run: | python -m pip install --upgrade pip pip install flit - name: Checkout Repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Publish Package to PyPI🚀 env: FLIT_USERNAME: '__token__' FLIT_PASSWORD: ${{ secrets.FLIT_PASSWORD }} run: | flit publish ================================================ FILE: .github/workflows/release.yaml ================================================ # This action assumes that there is a REL-commit which already has a # Markdown-formatted git tag. Hence the CHANGELOG is already adjusted # and it's decided what should be in the release. # This action only ensures the release is done with the proper contents # and that it's announced with a Github release. name: Create git tag on: push: branches: - main permissions: contents: write jobs: build_and_publish: name: Publish a new version runs-on: ubuntu-latest if: "${{ startsWith(github.event.head_commit.message, 'REL: ') }}" steps: - name: Checkout Repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Extract version from commit message id: extract_version run: | VERSION=$(echo "${{ github.event.head_commit.message }}" | grep -oP '(?<=REL: )\d+\.\d+\.\d+') echo "version=$VERSION" >> $GITHUB_OUTPUT - name: Extract tag message from commit message id: extract_message run: | VERSION="${{ steps.extract_version.outputs.version }}" delimiter="$(openssl rand -hex 8)" MESSAGE=$(echo "${{ github.event.head_commit.message }}" | sed "0,/REL: $VERSION/s///" ) echo "message<<${delimiter}" >> $GITHUB_OUTPUT echo "$MESSAGE" >> $GITHUB_OUTPUT echo "${delimiter}" >> $GITHUB_OUTPUT - name: Create Git Tag run: | VERSION="${{ steps.extract_version.outputs.version }}" MESSAGE="${{ steps.extract_message.outputs.message }}" git config user.name github-actions git config user.email github-actions@github.com git tag "$VERSION" -m "$MESSAGE" git push origin $VERSION ================================================ FILE: .github/workflows/title-check.yml ================================================ name: 'PR Title Check' on: pull_request: # check when PR # * is created, # * title is edited, and # * new commits are added (to ensure failing title blocks merging) types: [opened, reopened, edited, synchronize] jobs: title-check: name: Title check runs-on: ubuntu-latest steps: - name: Checkout Code uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check PR title env: PR_TITLE: ${{ github.event.pull_request.title }} run: python .github/scripts/check_pr_title.py ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .envrc .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # IntelliJ .idea # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ *.pdf .envrc # Documentation files copied when building: docs/meta/CHANGELOG.md docs/meta/CONTRIBUTORS.md # 'make release' creates those files: RELEASE_COMMIT_MSG.md RELEASE_TAG_MSG.md ================================================ FILE: .gitmodules ================================================ [submodule "sample-files"] path = sample-files url = git@github.com:py-pdf/sample-files.git ================================================ FILE: .isort.cfg ================================================ [settings] line_length=79 indent=' ' multi_line_output=3 length_sort=0 include_trailing_comma=True skip=docs known_third_party = PIL,pypdf,pydantic,setuptools,typer ================================================ FILE: .pre-commit-config.yaml ================================================ # pre-commit run --all-files repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 hooks: - id: check-added-large-files args: ['--maxkb=1000'] - id: check-ast - id: check-case-conflict - id: check-docstring-first - id: check-yaml - id: debug-statements - id: end-of-file-fixer exclude: "resources/.*|docs/make.bat" - id: fix-byte-order-marker - id: mixed-line-ending args: ['--fix=lf'] exclude: "docs/make.bat" - id: trailing-whitespace - repo: https://github.com/psf/black rev: 26.3.1 hooks: - id: black args: [--target-version, py36] - repo: https://github.com/asottile/blacken-docs rev: 1.20.0 hooks: - id: blacken-docs additional_dependencies: [black==22.1.0] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.15.6 hooks: - id: ruff args: ['--fix'] exclude: "tests/" - repo: https://github.com/asottile/pyupgrade rev: v3.21.2 hooks: - id: pyupgrade args: [--py38-plus] - repo: https://github.com/pycqa/flake8 rev: 7.3.0 hooks: - id: flake8 args: ["--ignore", "E,W,F"] - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v1.19.1' hooks: - id: mypy files: ^pdfly/.* args: [--ignore-missing-imports] additional_dependencies: - "pydantic>=1.10.4" ================================================ FILE: .readthedocs.yaml ================================================ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details version: 2 build: os: ubuntu-22.04 tools: python: "3.12" # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py # If using Sphinx, optionally build your docs in additional formats such as PDF formats: all # Optionally declare the Python requirements required to build your docs python: install: - method: pip path: . extra_requirements: - docs ================================================ FILE: .typos.toml ================================================ [default] extend-ignore-identifiers-re = [ "certifi", "FlateDecode", # This typo appears in a .tex file in the sample-files git submodule: "exampe" ] ================================================ FILE: CHANGELOG.md ================================================ # CHANGELOG ## Version 0.6.0, not released yet ### Bug Fixes (BUG) - `2up` incorrectly handled documents with an odd number of pages - [issue #219](https://github.com/py-pdf/pdfly/issues/218) ### New Features (ENH) - `pagemeta` now displays the name of a known page format that is close to the page dimensions ## Version 0.5.1, 2025-10-13 ### New Features (ENH) - `extract-images`: output filenames are now formatted using four digit for page numbers, in order for output files to be ordered alphabetically - ensured support for Python 3.14 ### Bug Fixes (BUG) - `requests` is now a dependency, to prevent a `ModuleNotFoundError` when running with `uv` ## Version 0.5.0, 2025-10-13 ### New Features (ENH) - New `extract-annotated-pages` to filter out only the user annotated pages ([PR #98](https://github.com/py-pdf/pdfly/pull/98)) - New `rotate` sub-command to rotate specified pages ([PR #128](https://github.com/py-pdf/pdfly/pull/128)) - Added optional `--password` argument to `cat` to perform decryption ([PR #61](https://github.com/py-pdf/pdfly/pull/61)) - `pagemeta` now displays known page formats when it can detect it: A3, A4, A5, Letter, Legal - `pagemeta` now displays the rotation value. - New `sign` sub-command to create a signed pdf from an existing pdf ([PR #165](https://github.com/py-pdf/pdfly/pull/165)) - New `check-sign` sub-command to verify the signature of a signed pdf ([PR #166](https://github.com/py-pdf/pdfly/pull/166)) ### Bug Fixes (BUG) - `pypdf[full]` is now a dependency, instead of just `pypdf`, to avoid some cases of `DependencyError` ### Deprecations (DEP) * support for older Python3 versions has been dropped, `pdfly` now requires Python 3.10+ ## Version 0.4.0, 2024-12-08 ### New Features (ENH) - New `booklet` command to adjust offsets and lengths ([PR #77](https://github.com/py-pdf/pdfly/pull/77)) - New `uncompress` command ([PR #75](https://github.com/py-pdf/pdfly/pull/75)) - New `update-offsets` command to adjust offsets and lengths ([PR #15](https://github.com/py-pdf/pdfly/pull/15)) - New `rm` command ([PR #59](https://github.com/py-pdf/pdfly/pull/59)) - `metadata`: now also displaying CreationDate, Creator, Keywords & Subject ([PR #73](https://github.com/py-pdf/pdfly/pull/73)) - Add warning for out-of-bounds page range in pdfly `cat` command ([PR #58](https://github.com/py-pdf/pdfly/pull/58)) ### Bug Fixes (BUG) - `2-up` command, that only showed one page per sheet, on the left side, with blank space on the right ([PR #78](https://github.com/py-pdf/pdfly/pull/78)) [Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.3...0.4.0) ## Version 0.3.3, 2024-04-14 ### Developer Experience (DEV) - Chain workflows [Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.2...0.3.3) ## Version 0.3.2, 2024-04-14 ### Developer Experience (DEV) - Decouple git tag / PyPI release / Github release page (#49, #50) [Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.1...0.3.2) ## Version 0.3.1, 2024-03-29 ### Maintenance (MAINT) - Update pypdf usage (#48) ### Developer Experience (DEV) - Release via REL commit (#48) - Fix mypy issues - Add make_release.py [Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.0...0.3.1) ## Version 0.3.0, 2023-12-17 ### New Features (ENH) - Add x2pdf command (#25) ### Bug Fixes (BUG) - boxes are floats, not int - Add missing fpdf2 dependency (#29) ### Documentation (DOC) - cat command - More examples for the cat subcommand - Add cat subcommand - Link to readthedocs - Add project governance file - Move readthedocs config file to root - Add docs (#24) ### Developer Experience (DEV) - Checkout sample-files in CI (#30) - Let dependabot update Github Actions - Add action for automatic releases ### Maintenance (MAINT) - Update dependencies (#42) - In the cat subcommand, replace the usage of the deprecated PdfMerger by PdfWriter (#34) - Update .pre-commit-config.yaml - Adjust x2pdf syntax ### Testing (TST) - cat with two files (#41) - Test cat command with more parameters + validate result (#40) - Adding unit tests (#28) ### Other - : [{'msg': 'Bump actions/setup-python from 4 to 5 (#39)', 'author': 'dependabot[bot]'}, {'msg': 'test_extract_images_monochrome() is now passing', 'author': 'CimonLucas(LCM)'}, {'msg': 'Bump actions/setup-python from 3 to 4 (#27)', 'author': 'dependabot[bot]'}, {'msg': 'Bump actions/checkout from 3 to 4 (#26)', 'author': 'dependabot[bot]'}, {'msg': 'Ensure input PDF exists for cat subcommand', 'author': 'MartinThoma'}] [Full Changelog](https://github.com/py-pdf/pdfly/compare/0.2.14...0.3.0) ================================================ FILE: CONTRIBUTORS.md ================================================ # List of contributors The list of contributors has been moved into the [README.md](https://github.com/py-pdf/pdfly/blob/main/README.md#contributors-). ================================================ FILE: LICENSE ================================================ BSD 3-Clause License Copyright (c) 2022, py-pdf All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: Makefile ================================================ maint: pre-commit autoupdate python -m pip install --upgrade . python -m pip lock --group dev --group docs . uv pip install -r pylock.toml git submodule update --remote release: python make_release.py git commit -eF RELEASE_COMMIT_MSG.md upload: make clean flit publish clean: python setup.py clean --all pyclean . rm -rf tests/__pycache__ pdfly/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf pdfly.egg-info lint: mypy . --ignore-missing-imports --exclude build ruff check --fix --unsafe-fixes test: pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30 ================================================ FILE: README.md ================================================ [![Pypi latest version](https://img.shields.io/pypi/v/pdfly.svg)](https://pypi.org/pypi/pdfly#history) [![Python Support](https://img.shields.io/pypi/pyversions/pdfly.svg)](https://pypi.org/project/pdfly/) [![License: BSD 3 Clause](https://img.shields.io/badge/License-BSD%203%20Clause-blue.svg)](https://opensource.org/license/bsd-3-clause) [![Documentation Status](https://app.readthedocs.org/projects/pdfly/badge/?version=latest)](https://pdfly.readthedocs.io/en/latest/) [![build status](https://github.com/py-pdf/pdfly/workflows/CI/badge.svg)](https://github.com/py-pdf/pdfly/actions?query=branch%3Amain) [![GitHub last commit](https://img.shields.io/github/last-commit/py-pdf/pdfly)](https://github.com/py-pdf/pdfly/commits/main/) [![issues closed](https://img.shields.io/github/issues-closed/py-pdf/pdfly)](https://github.com/py-pdf/pdfly/issues) [![PRs closed](https://img.shields.io/github/issues-pr-closed/py-pdf/pdfly)](https://github.com/py-pdf/pdfly/pulls) [![linters: black, ruff, mypi](https://img.shields.io/badge/linters-black,ruff,mypi-green.svg)](https://github.com/py-pdf/pdfly/actions) [![Pull Requests Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat)](https://makeapullrequest.com) [![first-timers-only Friendly](https://img.shields.io/badge/first--timers--only-friendly-blue.svg)](https://www.firsttimersonly.com/) # pdfly pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files. pdfly logo ## Installation ```bash pip install -U pdfly ``` As `pdfly` is an application, you might want to install it with [`pipx`](https://pypi.org/project/pipx/) or [`uv tool`](https://docs.astral.sh/uv/concepts/tools/): `uvx pdfly --help` ## Usage ```console $ pdfly --help Usage: pdfly [OPTIONS] COMMAND [ARGS]... pdfly is a pure-python cli application for manipulating PDF files. ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮ │ --version │ │ --help Show this message and exit. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮ │ 2-up Create a booklet-style PDF from a single input. │ │ booklet Reorder and two-up PDF pages for booklet printing. │ │ cat Extract and concatenate pages from PDF files into a single PDF file. │ │ check-sign Verifies the signature of a signed PDF. │ │ compress Compress a PDF. │ │ extract-annotated-pages Extract only the annotated pages from a PDF. │ │ extract-images Extract images from PDF without resampling or altering. │ │ extract-text Extract text from a PDF file. │ │ meta Show metadata of a PDF file │ │ pagemeta Give details about a single page. │ │ rm Remove pages from PDF files. │ │ rotate Rotate specified pages by the specified amount │ │ sign Creates a signed PDF from an existing PDF file. │ │ uncompress Module for uncompressing PDF content streams. │ │ update-offsets Updates offsets and lengths in a simple PDF file. │ │ x2pdf Convert one or more files to PDF. Each file is a page. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` You can see the help of every subcommand by typing `--help`: ```console $ pdfly 2-up --help Usage: pdfly 2-up [OPTIONS] PDF OUT Create a booklet-style PDF from a single input. Pairs of two pages will be put on one page (left and right) usage: python 2-up.py input_file output_file ╭─ Arguments ───────────────────────────────────────╮ │ * pdf PATH [default: None] [required] │ │ * out PATH [default: None] [required] │ ╰───────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────╯ ``` **Note:** `pdfly` has nothing to do with ``pdfly.net`` or ``gopdfly.com`` ## Contributors ✨ pdfly is a free software project without any company affiliation. We cannot pay contributors, but we do value their contributions 🤗
Martin Thoma
Martin Thoma

💻 📖 🤔 🚇 🚧 📆
Lucas Cimon
Lucas Cimon

🐛 💻 📖 🚧
Rob Adams
Rob Adams

💻
Harsh
Harsh

💻
Sascha Rogmann
Sascha Rogmann

💻
Enrique Botía
Enrique Botía

💻
kommade
kommade

💻
Zingzy
Zingzy

💻
Subhajit Sahu
Subhajit Sahu

💻
Kian-Meng Ang
Kian-Meng Ang

🤔
Hal Wine
Hal Wine

🐛 💻
philippesamuel
philippesamuel

📖
marcobrb
marcobrb

📖
moormaster
moormaster

📖 💻
Geoff Beier
Geoff Beier

💻
Yuriy Chernyshov
Yuriy Chernyshov

🤔 💻
lkintact
lkintact

🐛
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification ([emoji key](https://allcontributors.org/docs/en/emoji-key)). Contributions of any kind welcome! The list might not be complete. You can find more contributors via the git history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pdfly/graphs/contributors). ================================================ FILE: dependabot.yml ================================================ # Doc: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file version: 2 updates: - package-ecosystem: "gitsubmodule" commit-message: prefix: "MAINT" - package-ecosystem: "github-actions" commit-message: prefix: "MAINT" - package-ecosystem: "pip" commit-message: prefix: "MAINT" ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/conf.py ================================================ """ Configuration file for the Sphinx documentation builder. This file only contains a selection of the most common options. For a full list see the documentation: https://www.sphinx-doc.org/en/master/usage/configuration.html """ # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. import os import shutil import sys import pdfly as py_pkg sys.path.insert(0, os.path.abspath(".")) # noqa sys.path.insert(0, os.path.abspath("../")) # noqa shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md") shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md") # -- Project information ----------------------------------------------------- project = py_pkg.__name__ copyright = "2023, pdfly contributors" author = "pdfly contributors" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = py_pkg.__version__ # The full version, including alpha/beta/rc tags. release = py_pkg.__version__ # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. needs_sphinx = "4.0.0" myst_all_links_external = True # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.autosummary", "sphinx.ext.coverage", "sphinx.ext.mathjax", "sphinx.ext.viewcode", "sphinx.ext.napoleon", # External "myst_parser", ] intersphinx_mapping = { "py-pdf organization": ("https://py-pdf.github.io/", None), } nitpick_ignore_regex = [ # For reasons unclear at this stage the io module prefixes everything with _io # and this confuses sphinx (r"py:class", r"_io.(FileIO|BytesIO|Buffered(Reader|Writer))"), ] autodoc_default_options = { "member-order": "bysource", "members": True, "show-inheritance": True, "undoc-members": True, } autodoc_inherit_docstrings = False autodoc_typehints_format = "short" python_use_unqualified_type_names = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. html_theme_options = { "canonical_url": "", "analytics_id": "", "logo_only": True, "display_version": True, "prev_next_buttons_location": "bottom", "style_external_links": False, # Toc options "collapse_navigation": True, "sticky_navigation": True, "navigation_depth": 4, "includehidden": True, "titles_only": False, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_logo = "pdfly-logo.png" # -- Options for Napoleon ----------------------------------------------------- napoleon_google_docstring = True napoleon_numpy_docstring = False # Explicitly prefer Google style docstring napoleon_use_param = True # for type hint support napoleon_use_rtype = ( False # False so the return type is inline with the description. ) ================================================ FILE: docs/dev/intro.md ================================================ # Developer Intro pdfly is an application and thus non-developers might also use it. ## Installing Requirements ``` pip install . --group dev ``` ## Running Tests See [testing pdfly with pytest](testing.md) ## Documentation To preview the HTML documentation, you can run this command: ``` sphinx-autobuild docs docs/_build/html ``` ## Tools: git and pre-commit Git is a command line application for version control. If you don't know it, you can [play ohmygit](https://ohmygit.org/) to learn it. GitHub is the service where the pdfly project is hosted. While git is free and open source, GitHub is a paid service by Microsoft, but free in a lot of cases. [pre-commit](https://pypi.org/project/pre-commit/) is a command line application that uses git hooks to automatically execute code. This allows you to avoid style issues and other code quality issues. After you entered `pre-commit install` once in your local copy of pdfly, it will automatically be executed when you `git commit`. ## Commit Messages Having a clean commit message helps people to quickly understand what the commit is about, without actually looking at the changes. The first line of the commit message is used to [auto-generate the CHANGELOG](https://github.com/py-pdf/pdfly/blob/main/make_release.py). For this reason, the format should be: ``` PREFIX: DESCRIPTION BODY ``` The `PREFIX` can be: * `SEC`: Security improvements. Typically an infinite loop that was possible. * `BUG`: A bug was fixed. Likely there is one or multiple issues. Then write in the `BODY`: `Closes #123` where 123 is the issue number on GitHub. It would be absolutely amazing if you could write a regression test in those cases. That is a test that would fail without the fix. A bug is always an issue for pdfly users - test code or CI that was fixed is not considered a bug here. * `ENH`: A new feature! Describe in the body what it can be used for. * `DEP`: A deprecation. Either marking something as "this is going to be removed" or actually removing it. * `PI`: A performance improvement. This could also be a reduction in the file size of PDF files generated by pdfly. * `ROB`: A robustness change. Dealing better with broken PDF files. * `DOC`: A documentation change. `Docs:` is also allowed for commits made by DependaBot. * `TST`: Adding or adjusting tests. * `DEV`: Developer experience improvements, e.g. pre-commit or setting up CI. * `MAINT`: Quite a lot of different stuff. Performance improvements are for sure the most interesting changes in here. Refactorings as well. * `STY`: A style change. Something that makes pdfly code more consistent. Typically a small change. It could also be better error messages for end users. The prefix is used to generate the CHANGELOG. Every PR must have exactly one - if you feel like several match, take the top one from this list that matches for your PR. ## Pull Requests Smaller Pull Requests (PRs) are preferred as it's typically easier to merge them. For example, if you have some typos, a few code-style changes, a new feature, and a bug-fix, that could be 3 or 4 PRs. A PR must be complete. That means if you introduce a new feature it must be finished within the PR and have a test for that feature. ## Releases To perform a new release, there is the checklist to follow: 1. update `__version__` in `pdfly/_version.py` & `CHANGELOG.md` in order to specify the release date for the new version 2. perform a `REL`-prefixed commit, _e.g;_ `REL: X.Y.0"`, then make & merge a PR for it. The Github Actions pipeline should create a new `git` tag, and then publish a new version on Pypi: 3. edit the [GitHub release note](https://github.com/py-pdf/pdfly/releases), using the `CHANGELOG.md` content for the description ================================================ FILE: docs/dev/testing.md ================================================ # Testing pdfly uses [`pytest`](https://docs.pytest.org/en/latest/) for testing. To run the tests you need to install the CI (Continuous Integration) dependencies by running `pip install . --group dev`. ================================================ FILE: docs/index.rst ================================================ Welcome to pdfly ================ .. image:: https://img.shields.io/pypi/v/pdfly.svg :target: https://pypi.org/pypi/pdfly#history .. image:: https://img.shields.io/pypi/pyversions/pdfly.svg :target: https://pypi.org/project/pdfly/ .. image:: https://img.shields.io/badge/License-BSD%203%20Clause-blue.svg :target: https://opensource.org/license/bsd-3-clause .. image:: https://app.readthedocs.org/projects/pdfly/badge/?version=latest :target: https://pdfly.readthedocs.io/en/latest/ .. image:: https://github.com/py-pdf/pdfly/workflows/CI/badge.svg :target: https://github.com/py-pdf/pdfly/actions?query=branch%3Amain .. image:: https://img.shields.io/github/last-commit/py-pdf/pdfly :target: https://github.com/py-pdf/pdfly/commits/main/ .. image:: https://img.shields.io/github/issues-closed/py-pdf/pdfly :target: https://github.com/py-pdf/pdfly/issues .. image:: https://img.shields.io/github/issues-pr-closed/py-pdf/pdfly :target: https://github.com/py-pdf/pdfly/pulls .. image:: https://img.shields.io/badge/linters-black,ruff,mypi-green.svg :target: https://github.com/py-pdf/pdfly/actions .. image:: https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat :target: https://makeapullrequest.com .. image:: https://img.shields.io/badge/first--timers--only-friendly-blue.svg :target: https://www.firsttimersonly.com/ pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files. .. image:: ./pdfly-logo.png :scale: 25% Repository: `github.com/py-pdf/pdfly `__ Installation ------------ .. code-block:: pip install -U pdfly As ``pdfly`` is an application, you might want to install it with `pipx `__ or `uv tool `__: ``uvx pdfly --help`` Usage ----- .. code-block:: $ pdfly --help Usage: pdfly [OPTIONS] COMMAND [ARGS]... pdfly is a pure-python cli application for manipulating PDF files. ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮ │ --version │ │ --help Show this message and exit. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮ │ 2-up Create a booklet-style PDF from a single input. │ │ booklet Reorder and two-up PDF pages for booklet printing. │ │ cat Extract and concatenate pages from PDF files into a single PDF file. │ │ check-sign Verifies the signature of a signed PDF. │ │ compress Compress a PDF. │ │ extract-annotated-pages Extract only the annotated pages from a PDF. │ │ extract-images Extract images from PDF without resampling or altering. │ │ extract-text Extract text from a PDF file. │ │ meta Show metadata of a PDF file │ │ pagemeta Give details about a single page. │ │ rm Remove pages from PDF files. │ │ rotate Rotate specified pages by the specified amount │ │ sign Creates a signed PDF from an existing PDF file. │ │ uncompress Module for uncompressing PDF content streams. │ │ update-offsets Updates offsets and lengths in a simple PDF file. │ │ x2pdf Convert one or more files to PDF. Each file is a page. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────╯ You can see the help of every subcommand by typing ``--help``: .. code-block:: $ pdfly 2-up --help Usage: pdfly 2-up [OPTIONS] PDF OUT Create a booklet-style PDF from a single input. Pairs of two pages will be put on one page (left and right) usage: python 2-up.py input_file output_file ╭─ Arguments ───────────────────────────────────────╮ │ * pdf PATH [default: None] [required] │ │ * out PATH [default: None] [required] │ ╰───────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────╯ GitHub ⭐️ --------- .. image:: https://api.star-history.com/svg?repos=py-pdf/pdfly&type=date&legend=top-left :target: https://www.star-history.com/#py-pdf/pdfly&type=date&legend=top-left .. note:: ``pdfly`` has nothing to do with ``pdfly.net`` or ``gopdfly.com`` .. toctree:: :caption: User Guide :maxdepth: 1 user/installation user/subcommand-2-up user/subcommand-booklet user/subcommand-cat user/subcommand-check-sign user/subcommand-compress user/subcommand-extract-annotated-pages user/subcommand-extract-images user/subcommand-extract-text user/subcommand-meta user/subcommand-pagemeta user/subcommand-rm user/subcommand-rotate user/subcommand-sign user/subcommand-uncompress user/subcommand-update-offsets user/subcommand-x2pdf .. toctree:: :caption: Developer Guide :maxdepth: 1 dev/intro dev/testing .. toctree:: :caption: About pdfly :maxdepth: 1 meta/CHANGELOG meta/CONTRIBUTORS meta/project-governance Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/meta/project-governance.md ================================================ # Project Governance This document describes how the pdfly project is managed. It describes the different actors, their roles, and the responsibilities they have. `pdfly` is part of the `py-pdf` organization, and hence we try to follow some [maintainer guidelines](https://py-pdf.github.io/pages/maintainer-guidelines.html) & [rules](https://py-pdf.github.io/pages/py-pdf-owners.html). ## Terminology * The **project** is pdfly - a free and open-source pure-python PDF command line tool. It includes the [code, issues, and discussions on GitHub](https://github.com/py-pdf/pdfly), and [the documentation on ReadTheDocs](https://pdfly.readthedocs.io/en/latest/), [the package on PyPI](https://pypi.org/project/pdfly/). * A **maintainer** is a person who has technical permissions to change one or more part of the projects. It is a person who is driven to keep the project running and improving. * A **contributor** is a person who contributes to the project. That could be through writing code - in the best case through forking and creating a pull request, but that is up to the maintainer. Other contributors describe issues, help to ask questions on existing issues to make them easier to answer, participate in discussions, and help to improve the documentation. Contributors are similar to maintainers, but without technical permissions. * A **user** is a person who imports pdfly into their code. All pdfly users are developers, but not developers who know the internals of pdfly. They only use the public interface of pdfly. They will likely have less knowledge about PDF than contributors. * The **community** is all of that - the users, the contributors, and the maintainers. ## Governance, Leadership, and Steering pdfly forward pdfly is a free and open source project. As pdfly does not have any formal relationship with any company and no funding, all the work done by the community are voluntary contributions. People don't get paid, but choose to spend their free time to create software of which many more are profiting. This has to be honored and respected. pdfly has the **Benevolent Dictator** governance model. The benevolent dictator is a maintainer with all technical permissions - most importantly the permission to push new pdfly versions on PyPI. Being benevolent, the benevolent dictator listens for decisions to the community and tries their best to make decisions from which the overall community profits - the current one and the potential future one. Being a dictator, the benevolent dictator always has the power and the right to make decisions on their own - also against some members of the community. As pdfly is free software, parts of the community can split off (fork the code) and create a new community. This should limit the harm a bad benevolent dictator can do. ## Project Language The project language is (american) English. All documentation and issues must be written in English to ensure that the community can understand it. We appreciate the fact that large parts of the community don't have English as their mother tongue. We try our best to understand others - [automatic translators](https://translate.google.com/) might help. ## Expectations The community can expect the following: * The **benevolent dictator** tries their best to make decisions from which the overall community profits. The benevolent dictator is aware that his/her decisions can shape the overall community. Once the benevolent dictator notices that she/he doesn't have the time to advance pdfly, he/she looks for a new benevolent dictator. As it is expected that the benevolent dictator will step down at some point of their choice (hopefully before their death), it is NOT a benevolent dictator for life (BDFL). * Every **maintainer** (including the benevolent dictator) is aware of their permissions and the harm they could do. They value security and ensure that the project is not harmed. They give their technical permissions back if they don't need them any longer. Any long-time contributor can become a maintainer. Maintainers can - and should! - step down from their role when they realize that they can no longer commit that time. * Every **contributor** is aware that the time of maintainers and the benevolent dictator is limited. Short pull requests that briefly describe the solved issue and have a unit test have a higher chance to get merged soon - simply because it's easier for maintainers to see that the contribution will not harm the overall project. Their contributions are documented in the git history and in the public issues. * Every **community member** uses a respectful language. We are all human, we get upset about things we care and other things than what's visible on the internet go on in our live. pdfly does not pay its contributors - keep all of that in mind when you interact with others. We are here because we want to help others. ### Issues and Discussions An issue is any technical description that aims at bringing pdfly forward: * Bugs tickets: Something went wrong because pdfly developers made a mistake. * Feature requests: pdfly does not support all features of the PDF specifications. There are certainly also convenience methods that would help users a lot. * Robustness requests: There are many broken PDFs around. In some cases, we can deal with that. It's kind of a mixture between a bug ticket and a feature request. * Performance tickets: pdfly could be faster - let us know about your specific scenario. Any comment that is in those technical descriptions which is not helping the discussion can be deleted. This is especially true for "me too" comments on bugs or "bump" comments for desired features. People can express this with 👍 / 👎 reactions. [Discussions](https://github.com/py-pdf/pdfly/discussions) are open. No comments will be deleted there - except if they are clearly unrelated spam or only try to insult people (luckily, the community was very respectful so far 🤞) ### Releases The maintainers follow [semantic versioning](https://semver.org/). Most importantly, that means that breaking changes will have a major version bump. Be aware that unintentional breaking changes might still happen. The `pdfly` maintainers do their best to fix that in a timely manner - please [report such issues](https://github.com/py-pdf/pdfly/issues)! ## People * Martin Thoma is benevolent dictator since April 2022. * Maintainers: * Matthew Stamy (mstamy2) was the benevolent dictator for a long time. He still is around on GitHub once in a while and has permissions on PyPI and GitHub. * Matthew Peveler (MasterOdin) is a maintainer on GitHub. ================================================ FILE: docs/user/installation.md ================================================ # Installation There are several ways to install pdfly. The most common option is to use pip. ## pip pdfly requires Python 3.10+ to run. Typically Python comes with `pip`, a package installer. Using it you can install pdfly: ```bash pip install pdfly ``` If you are not a super-user (a system administrator / root), you can also just install pdfly for your current user: ```bash pip install --user pdfly ``` ## pipx We recommend to install pdfly via [pipx](https://pypi.org/project/pipx/): ```bash pipx install pdfly ``` pipx installs the pdfly application in an isolated environment. That guarantees that no other applications interferes with its defpendencies. ## uv pdfly can be run without persistent installation using [uv tool run](https://docs.astral.sh/uv/guides/tools/#running-tools): ```bash uv tool run pdfly ``` via the [uvx](https://docs.astral.sh/uv/guides/tools/#running-tools) alias: ```bash uvx pdfly ``` or it can be installed using [uv tool install](https://docs.astral.sh/uv/guides/tools/#installing-tools): ```bash uv tool install pdfly ``` ## Python Version Support If ✓ is given, it works. It is tested via CI. If ✖ is given, it is guaranteed not to work. If it's not filled, we don't guarantee support, but it might still work. | Python | 3.14 | 3.13 | 3.12 | 3.11 | 3.10 | 2.7 | | ---------------------- | ---- | ---- | ---- | ---- | ---- | --- | | pdfly | ✓ | ✓ | ✓ | ✓ | ✓ | ✖ | ## Development Version In case you want to use the current version under development: ```bash pip install git+https://github.com/py-pdf/pdfly.git ``` ================================================ FILE: docs/user/subcommand-2-up.md ================================================ # 2-up Create a booklet-style PDF from a single input. ## Usage ``` $ pdfly 2-up --help Usage: pdfly 2-up [OPTIONS] PDF OUT Create a booklet-style PDF from a single input. Pairs of two pages will be put on one page (left and right) usage: python 2-up.py input_file output_file ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ │ * out PATH [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Convert `document.pdf` into a booklet and write the output in `booklet.pdf`. ``` pdfly 2-up document.pdf booklet.pdf ``` ================================================ FILE: docs/user/subcommand-booklet.md ================================================ # booklet Reorder and two-up PDF pages for booklet printing. ## Usage ``` $ pdfly booklet --help Usage: pdfly booklet [OPTIONS] FILENAME OUTPUT Reorder and two-up PDF pages for booklet printing. If the number of pages is not a multiple of four, pages are added until it is a multiple of four. This includes a centerfold in the middle of the booklet and a single page on the inside back cover. The content of those pages are from the centerfold-file and blank-page-file files, if specified, otherwise they are blank pages. Example: pdfly booklet input.pdf output.pdf ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * filename FILE [default: None] [required] │ │ * output FILE [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --blank-page-file -b FILE page added if input is odd number of pages │ │ [default: None] │ │ --centerfold-file -c FILE double-page added if input is missing >= 2 │ │ pages │ │ [default: None] │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Convert `document.pdf` into a booklet and write the output in `booklet.pdf`. ``` pdfly booklet document.pdf booklet.pdf ``` ================================================ FILE: docs/user/subcommand-cat.md ================================================ # cat The cat command can split / extract pages from a PDF. It can also join/merge/combine multiple PDF documents into a single one. ## Usage ``` pdfly cat --help Usage: pdfly cat [OPTIONS] FILENAME FN_PGRGS... Extract and concatenate pages from PDF files into a single PDF file. Page ranges refer to the previously-named file. A file not followed by a page range means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. Examples pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1 Concatenate all of head.pdf, all but page seven of content.pdf, and the last page of tail.pdf, producing output.pdf. pdfly cat chapter*.pdf >book.pdf You can specify the output file by redirection. pdfly cat chapter?.pdf chapter10.pdf >book.pdf In case you don't want chapter 10 before chapter 2. ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * filename PATH [default: None] [required] │ │ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] │ │ [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ * --output -o PATH [default: None] [required] │ │ --verbose --no-verbose show page ranges as they are being │ │ read │ │ [default: no-verbose] │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Split a PDF Get the second, third, and fourth page of a PDF: ``` pdfly cat input.pdf 1:4 -o out.pdf ``` ### Extract a Page Get the sixt page of a PDF: ``` pdfly cat input.pdf 5 -o out.pdf ``` Note that it is `5`, because the page indices always start at 0. ### Specify a negative index Get the last page of a PDF: ``` pdfly cat -o out.pdf input.pdf -- -1 ``` `--` must be used to escape negative indices. ### Concatenate two PDFs Just combine two PDF files so that the pages come right after each other: ``` pdfly cat input1.pdf input2.pdf -o out.pdf ``` ### Decrypt a PDF document ``` pdfly cat --password=SECRET doc.pdf -o doc-decrypted.pdf ``` ================================================ FILE: docs/user/subcommand-check-sign.md ================================================ # check-sign Validate that a PDF document has a digital signature matching a given certificate. ## Usage ``` Usage: pdfly check-sign [OPTIONS] FILENAME Verifies the signature of a signed PDF. Examples pdfly verify input.pdf --pem certs.pem Verifies the input.pdf with a PEM certificate bundle. ╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * filename FILE [required] │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * --pem FILE PEM certificate file [required] │ │ --verbose --no-verbose Show signature verification details. [default: no-verbose] │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Verify PDF signature against a PEM certificate Verifies the input.pdf with a PEM certificate bundle. ``` pdfly verify input.pdf --pem certs.pem ``` ================================================ FILE: docs/user/subcommand-compress.md ================================================ # compress Compress a PDF using lossless FlateDecode compression. **Note:** If compression would result in a larger file, the original file is kept unchanged to avoid file size increase. ## Usage ``` $ pdfly compress --help Usage: pdfly compress [OPTIONS] PDF OUTPUT Compress a PDF. ╭─ Arguments ───────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ │ * output PATH [default: None] [required] │ ╰───────────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────────╯ ``` ## Examples Compress the file `document.pdf` and output `document_compressed.pdf` ``` pdfly compress document.pdf document_compressed.pdf ``` Example output when compression succeeds: ``` Original Size : 1,996,123 Final Size : 1,234,567 (Compressed (61.8% of original)) ``` Example output when compression would increase file size: ``` Original Size : 887 Final Size : 887 (No compression applied (would increase size)) ``` ================================================ FILE: docs/user/subcommand-extract-annotated-pages.md ================================================ # extract-annotated-pages Extract only the annotated pages from a PDF. This can help to review or rework pages from a large document iteratively. ## Usage ``` pdfly extract-annotated-pages --help Usage: pdfly extract-annotated-pages [OPTIONS] INPUT_PDF Extract only the annotated pages from a PDF. Q: Why does this help? A: https://github.com/py-pdf/pdfly/issues/97 ╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * input_pdf FILE Input PDF file. [required] │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ --output -o PATH Output PDF file. Defaults to 'input_pdf_annotated'. │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Input file Extracts only pages containing annotations from a file `input.pdf`. Pages are written into a new file `input_annotated.pdf`. ``` pdfly extract-annotated-pages input.pdf ``` ### Input file with specific output file Extracts only pages containing annotations from a file `input.pdf` into the given output file `pages_to_rework.pdf`. ``` pdfly extract-annotated-pages input.pdf -o pages_to_rework.pdf ``` ================================================ FILE: docs/user/subcommand-extract-images.md ================================================ # extract-images Extract text from a PDF file. ## Usage ``` $ pdfly extract-images --help Usage: pdfly extract-images [OPTIONS] PDF Extract images from PDF without resampling or altering. Adapted from work by Sylvain Pelissier http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-res ampling-in-python ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Extract the first page of `document.pdf` and extract the images present in it. ``` pdfly cat document.pdf 9 -o page.pdf pdfly extract-text page.pdf Extracted 1 images: - 0-Im0.png ``` ================================================ FILE: docs/user/subcommand-extract-text.md ================================================ # extract-text Extract text from a PDF file. ## Usage ``` $ pdfly extract-text --help Usage: pdfly extract-text [OPTIONS] PDF Extract text from a PDF file. ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Extract the text from the 10th page of `document.pdf`, redirecting the output into `page.txt`. ``` pdfly cat document.pdf 9 -o page.pdf pdfly extract-text page.pdf ``` ================================================ FILE: docs/user/subcommand-meta.md ================================================ # meta Get metadata of a PDF file. ## Usage ``` pdfly meta --help Usage: pdfly meta [OPTIONS] PDF Show metadata of a PDF file ╭─ Arguments ───────────────────────────────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ ╰───────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────────────────────────────────╮ │ --output -o [json|text] output format [default: text] │ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────────────────────────────────╯ ``` ## Example ``` $pdfly meta Allianz-Versicherungsunterlagen.pdf Operating System Data ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Attribute ┃ Value ┃ ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ File Name │ /home/user/Documents/Allianz-Versicherungsunterlagen.pdf │ │ File Permissions │ -rw-rw-r-- │ │ File Size │ 874,781 bytes │ │ Creation Time │ 2023-09-02 10:00:51 │ │ Modification Time │ 2023-09-02 10:00:42 │ │ Access Time │ 2023-09-09 11:57:41 │ └───────────────────┴───────────────────────────────────────────────────────────┘ PDF Data ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Attribute ┃ Value ┃ ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ Title │ │ │ Producer │ itext-paulo-155 (itextpdf.sf.net-lowagie.com) │ │ Author │ │ │ Pages │ 34 │ │ Encrypted │ None │ │ PDF File Version │ %PDF-1.6 │ │ Page Layout │ │ │ Page Mode │ │ │ PDF ID │ ID1=b"'\xc5\x92\xc3\x92\xe2\x80\x93--/\xef\xac\x824\xc3… │ │ │ ID2=b'\xc3\x8b\xc3\xaa\xcb\x9b\r\xc3\xa2\r\xcb\x99T\xc3… │ │ │ \xc3\x96\xc3\x9fY2' │ │ Fonts (unembedded) │ /Helvetica │ │ Fonts (embedded) │ /ASPNQQ+TT22D6t00, /CBKSHX+Helvetica-Bold, │ │ │ /CXQKAY+Helvetica, /GOCSXU+AllianzNeo-Bold, │ │ │ /LKNHUL+Arial-BoldMT, /LMNFKX+ArialMT, /MWUNIP+Symbol, │ │ │ /ODNMDG+TT5B6t00, /PESMKN+AllianzNeo-CondensedBold, │ │ │ /PHDALA+Helvetica-Oblique, /PJEFXS+AllianzNeo-Light, │ │ │ /SNDABN+Helvetica, /SNDABN+Helvetica-Bold, │ │ │ /SNDABN+Times-Roman, /TXDAYK+Helvetica, │ │ │ /VORXLN+Helvetica-BoldOblique, /YTXZAH+Arial-ItalicMT │ │ Attachments │ [] │ │ Images │ 16 images (355,454 bytes) │ └────────────────────┴──────────────────────────────────────────────────────────┘ Use the 'pagemeta' subcommand to get details about a single page ``` ================================================ FILE: docs/user/subcommand-pagemeta.md ================================================ # pagemeta Give details about a PDF's single page. ## Usage ``` $ pdfly pagemeta --help Usage: pdfly pagemeta [OPTIONS] PDF PAGE_INDEX Give details about a single page. ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ │ * page_index INTEGER [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --output -o [json|text] output format [default: text] │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Get the metadata of the 101st page of `document.pdf` in text format. ``` pdfly pagemeta document.pdf 100 /home/user/.../document.pdf, page index 100 ┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Attribute ┃ Value ┃ ┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ mediabox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │ │ cropbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │ │ artbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │ │ bleedbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │ │ annotations │ 8 │ └─────────────┴─────────────────────────────────────────────────────┘ All annotations: 1. /Link at [232.05524, 385.79007, 343.6091, 396.29007] 2. /Link at [157.63988, 209.99002, 243.69913, 220.49002] 3. /Link at [72, 178.19678, 249.65918, 188.69678] 4. /Link at [196.12769, 152.40353, 361.02328, 162.90353] 5. /Link at [360.97717, 139.80353, 432, 150.30353] 6. /Link at [72, 127.20352, 213.9915, 137.70352] 7. /Link at [179.64218, 448.3905, 220.08231, 458.8905] 8. /Link at [282.84, 347.99005, 340.83148, 358.49005] ``` Get the same metadata in `json` format. ``` pdfly pagemeta document.pdf 100 -o json {"mediabox":[0.0,0.0,504.0,661.5],"cropbox":[0.0,0.0,504.0,661.5],"artbox":[0.0,0.0,504.0,661.5],"bleedbox":[0.0,0.0,504.0,661.5],"annotations":19} ``` ================================================ FILE: docs/user/subcommand-rm.md ================================================ # rm Remove pages from PDF files. ## Usage ``` $ pdfly rm --help Usage: pdfly rm [OPTIONS] FILENAME FN_PGRGS... Remove pages from PDF files. Page ranges refer to the previously-named file. A file not followed by a page range means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. Examples pdfly rm -o output.pdf document.pdf 2:5 Remove pages 2 to 4 from document.pdf, producing output.pdf. pdfly rm document.pdf :-1 Removes all pages except the last one from document.pdf, modifying the original file. pdfly rm report.pdf :6 7: Remove all pages except page seven from report.pdf, producing a single-page report.pdf. ╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────╮ │ * filename FILE [default: None] [required] │ │ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] [required] │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────╮ │ * --output -o PATH [default: None] [required] │ │ --verbose --no-verbose show page ranges as they are being read [default: no-verbose] │ │ --help Show this message and exit. │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Remove the 5th page of `document.pdf`, modifying the original file. ``` pdfly rm document.pdf 4 ``` Remove the first and last page of `document.pdf`, producing `output.pdf`. ``` pdfly rm -o output.pdf document.pdf 1:-1 ``` ================================================ FILE: docs/user/subcommand-rotate.md ================================================ # rotate ## Usage ``` pdfly rotate --help Usage: pdfly rotate [OPTIONS] FILENAME DEGREES [PGRGS] Rotate specified pages by the specified amount Example: pdfly rotate --output output.pdf input.pdf 90 Rotate all pages by 90 degrees (clockwise) pdfly rotate --output output.pdf input.pdf 90 :3 Rotate first three pages by 90 degrees (clockwise) pdfly rotate --output output.pdf input.pdf 90 -- -1 Rotate last page by 90 degrees (clockwise) A file not followed by a page range (PGRGS) means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. ╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * filename FILE [required] │ │ * degrees INTEGER degrees to rotate [required] │ │ pgrgs [PGRGS] page range [default: :] │ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * --output -o PATH [required] │ │ --help Show this message and exit. │ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Rotate all pages by 90 degrees (clockwise) Rotate all pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`. ``` pdfly rotate --output output.pdf input.pdf 90 ``` ### Rotate first three pages by 90 degrees (clockwise) Rotate first three pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`. ``` pdfly rotate --output output.pdf input.pdf 90 :3 ``` ### Rotate last page by 90 degrees (clockwise) Rotate last page from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`. ``` pdfly rotate --output output.pdf input.pdf 90 -- -1 ``` ================================================ FILE: docs/user/subcommand-sign.md ================================================ # sign Creates a digitally-signed PDF from an existing PDF file and a given certificate. ## Usage ``` Usage: pdfly sign [OPTIONS] FILENAME Creates a signed PDF. Examples pdfly sign input.pdf --p12 certs.p12 -o signed.pdf Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf. pdfly sign document.pdf --p12 certs.p12 --in-place Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place. ╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * filename FILE [required] │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * --p12 FILE PKCS12 certificate container [required] │ │ --output -o PATH │ │ --in-place -i │ │ --p12-password -p TEXT The password to use to decrypt the PKCS12 file. │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Sign a PDF with PKCS12 Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf. ``` pdfly sign input.pdf --p12 certs.p12 -o signed.pdf ``` ### Sign a PDF in-place Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place. ``` pdfly sign document.pdf --p12 certs.p12 --in-place ``` ================================================ FILE: docs/user/subcommand-uncompress.md ================================================ # uncompress Module for uncompressing PDF content streams. ## Usage ``` $ pdfly ucompress --help Module for uncompressing PDF content streams. ╭─ Arguments ───────────────────────────────────────────╮ │ * pdf FILE [default: None] [required] │ │ * output PATH [default: None] [required] │ ╰───────────────────────────────────────────────────────╯ ╭─ Options ─────────────────────────────────────────────╮ │ --help Show this message and exit. │ ╰───────────────────────────────────────────────────────╯ ``` ## Examples Uncompress `document_compressed.pdf` and output `document.pdf`. ``` pdfly uncompress document_compressed.pdf document.pdf ``` ================================================ FILE: docs/user/subcommand-update-offsets.md ================================================ # update-offsets Updates offsets and lengths in a simple PDF file. ## Usage ``` $ pdfly update-offsets --help Usage: pdfly update-offsets [OPTIONS] FILE_IN FILE_OUT Updates offsets and lengths in a simple PDF file. The PDF specification requires that the xref section at the end of a PDF file has the correct offsets of the PDF's objects. It further requires that the dictionary of a stream object contains a /Length-entry giving the length of the encoded stream. When editing a PDF file using a text-editor (e.g. vim) it is elaborate to compute or adjust these offsets and lengths. This command tries to compute /Length-entries of the stream dictionaries and the offsets in the xref-section automatically. It expects that the PDF file has ASCII encoding only. It may use ISO-8859-1 or UTF-8 in its comments. The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in binary data. It expects that there is one xref-section only. It expects that the /Length-entries have default values containing enough digits, e.g. /Length 000 when the stream consists of 576 bytes. Example: update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ │ * file_in FILE [default: None] [required] │ │ * file_out PATH [default: None] [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --encoding TEXT Encoding used to read and write the │ │ files, e.g. UTF-8. │ │ [default: ISO-8859-1] │ │ --verbose --no-verbose Show progress while processing. │ │ [default: no-verbose] │ │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples Update the offsets of `document.pdf` with UTF-8 encoding and write the output to `document.out.pdf`. ``` pdfly update-offsets document.pdf --verbose --encoding UTF-8 document.out.pdf ``` ================================================ FILE: docs/user/subcommand-x2pdf.md ================================================ # x2pdf Convert a file to PDF. Currently supported for "x": * PNG * JPG ## Usage ``` $ pdfly x2pdf --help Usage: pdfly x2pdf [OPTIONS] X... Convert one or more files to PDF. Each file is a page. ╭─ Arguments ─────────────────────────────────────────────────────────────────╮ │ * x X... [default: None] [required] │ ╰─────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ───────────────────────────────────────────────────────────────────╮ │ * --output -o PATH [default: None] [required] │ │ --help Show this message and exit. │ ╰─────────────────────────────────────────────────────────────────────────────╯ ``` ## Examples ### Single file ``` $ pdfly x2pdf image.jpg -o out.pdf $ ls -lh -rw-rw-r-- 1 user user 47K Sep 17 21:49 image.jpg -rw-rw-r-- 1 user user 49K Sep 17 22:48 out.pdf ``` ### Multiple files manually ``` $ pdfly x2pdf image1.jpg image2.jpg -o out.pdf $ ls -lh -rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg -rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg -rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf ``` ### Multiple files via * ``` $ pdfly x2pdf *.jpg -o out.pdf $ ls -lh -rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg -rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg -rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf ``` ================================================ FILE: make_release.py ================================================ """Internal tool to update the CHANGELOG.""" import json import subprocess import urllib.request from dataclasses import dataclass from datetime import datetime, timezone from typing import Any from rich.prompt import Prompt GH_ORG = "py-pdf" GH_PROJECT = "pdfly" VERSION_FILE_PATH = "pdfly/_version.py" CHANGELOG_FILE_PATH = "CHANGELOG.md" @dataclass(frozen=True) class Change: """Capture the data of a git commit.""" commit_hash: str prefix: str message: str author: str author_login: str def main(changelog_path: str) -> None: """ Create a changelog. Args: changelog_path: The location of the CHANGELOG file """ changelog = get_changelog(changelog_path) git_tag = get_most_recent_git_tag() changes, changes_with_author = get_formatted_changes(git_tag) if changes == "": print("No changes") return new_version = version_bump(git_tag) new_version = get_version_interactive(new_version, changes) adjust_version_py(new_version) today = datetime.now(tz=timezone.utc) header = f"## Version {new_version}, {today:%Y-%m-%d}\n" url = f"https://github.com/{GH_ORG}/{GH_PROJECT}/compare/{git_tag}...{new_version}" trailer = f"\n[Full Changelog]({url})\n\n" new_entry = header + changes + trailer print(new_entry) write_commit_msg_file(new_version, changes_with_author + trailer) write_release_msg_file(new_version, changes_with_author + trailer, today) # Make the script idempotent by checking if the new entry is already in the changelog if new_entry in changelog: print("Changelog is already up-to-date!") return new_changelog = "# CHANGELOG\n\n" + new_entry + strip_header(changelog) write_changelog(new_changelog, changelog_path) print_instructions(new_version) def print_instructions(new_version: str) -> None: """Print release instructions.""" print("=" * 80) print(f"☑ {VERSION_FILE_PATH} was adjusted to '{new_version}'") print(f"☑ {CHANGELOG_FILE_PATH} was adjusted") print() print("Now run:") print(" git commit -eF RELEASE_COMMIT_MSG.md") print(f" git tag -s {new_version} -eF RELEASE_TAG_MSG.md") print(" git push") print(" git push --tags") def adjust_version_py(version: str) -> None: """Adjust the __version__ string.""" with open(VERSION_FILE_PATH, "w") as fp: fp.write(f'__version__ = "{version}"\n') def get_version_interactive(new_version: str, changes: str) -> str: """Get the new __version__ interactively.""" print("The changes are:") print(changes) orig = new_version new_version = Prompt.ask("New semantic version", default=orig) while not is_semantic_version(new_version): new_version = Prompt.ask( "That was not a semantic version. Please enter a semantic version", default=orig, ) return new_version def is_semantic_version(version: str) -> bool: """Check if the given version is a semantic version.""" # This doesn't cover the edge-cases like pre-releases if version.count(".") != 2: return False try: return bool([int(part) for part in version.split(".")]) except Exception: return False def write_commit_msg_file(new_version: str, commit_changes: str) -> None: """ Write a file that can be used as a commit message. Like this: git commit -eF RELEASE_COMMIT_MSG.md && git push """ with open("RELEASE_COMMIT_MSG.md", "w") as fp: fp.write(f"REL: {new_version}\n\n") fp.write("## What's new\n") fp.write(commit_changes) def write_release_msg_file( new_version: str, commit_changes: str, today: datetime ) -> None: """ Write a file that can be used as a git tag message. Like this: git tag -eF RELEASE_TAG_MSG.md && git push """ with open("RELEASE_TAG_MSG.md", "w") as fp: fp.write(f"Version {new_version}, {today:%Y-%m-%d}\n\n") fp.write("## What's new\n") fp.write(commit_changes) def strip_header(md: str) -> str: """Remove the 'CHANGELOG' header.""" return md.lstrip("# CHANGELOG").lstrip() # noqa def version_bump(git_tag: str) -> str: """ Increase the patch version of the git tag by one. Args: git_tag: Old version tag Returns: The new version where the patch version is bumped. """ # just assume a patch version change major, minor, patch = git_tag.split(".") return f"{major}.{minor}.{int(patch) + 1}" def get_changelog(changelog_path: str) -> str: """ Read the changelog. Args: changelog_path: Path to the CHANGELOG file Returns: Data of the CHANGELOG """ with open(changelog_path) as fh: changelog = fh.read() return changelog def write_changelog(new_changelog: str, changelog_path: str) -> None: """ Write the changelog. Args: new_changelog: Contents of the new CHANGELOG changelog_path: Path where the CHANGELOG file is """ with open(changelog_path, "w") as fh: fh.write(new_changelog) def get_formatted_changes(git_tag: str) -> tuple[str, str]: """ Format the changes done since the last tag. Args: git_tag: the reference tag Returns: Changes done since git_tag """ commits = get_git_commits_since_tag(git_tag) # Group by prefix grouped: dict[str, list[dict[str, Any]]] = {} for commit in commits: if commit.prefix not in grouped: grouped[commit.prefix] = [] grouped[commit.prefix].append( {"msg": commit.message, "author": commit.author_login} ) # Order prefixes order = [ "SEC", "DEP", "ENH", "PI", "BUG", "ROB", "DOC", # We ignore MRs from Dependabot prefixed with: "Docs:" "DEV", "CI", "MAINT", "TST", "STY", ] abbrev2long = { "SEC": "Security", "DEP": "Deprecations", "ENH": "New Features", "BUG": "Bug Fixes", "ROB": "Robustness", "DOC": "Documentation", "DEV": "Developer Experience", "CI": "Continuous Integration", "MAINT": "Maintenance", "TST": "Testing", "STY": "Code Style", "PI": "Performance Improvements", } # Create output output = "" output_with_user = "" for prefix in order: if prefix not in grouped: continue tmp = f"\n### {abbrev2long[prefix]} ({prefix})\n" # header output += tmp output_with_user += tmp for commit_dict in grouped[prefix]: output += f"- {commit_dict['msg']}\n" output_with_user += ( f"- {commit_dict['msg']} by @{commit_dict['author']}\n" ) del grouped[prefix] if grouped: output += "\n### Other\n" output_with_user += "\n### Other\n" for prefix, commit_dicts in grouped.items(): for commit_dict in commit_dicts: output += f"- {prefix}: {commit_dict['msg']}\n" output_with_user += f"- {prefix}: {commit_dict['msg']} by @{commit_dict['author']}\n" return output, output_with_user def get_most_recent_git_tag() -> str: """ Get the git tag most recently created. Returns: Most recently created git tag. """ git_tag = str( subprocess.check_output( ["git", "describe", "--abbrev=0"], stderr=subprocess.STDOUT ) ).strip("'b\\n") return git_tag def get_author_mapping(line_count: int) -> dict[str, str]: """ Get the authors for each commit. Args: line_count: Number of lines from Git log output. Used for determining how many commits to fetch. Returns: A mapping of long commit hashes to author login handles. """ per_page = min(line_count, 100) page = 1 mapping: dict[str, str] = {} for _ in range(0, line_count, per_page): with urllib.request.urlopen( f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}" ) as response: commits = json.loads(response.read()) page += 1 for commit in commits: if commit["author"]: gh_handle = commit["author"]["login"] else: # This is not perfect, but better than the other option gh_handle = commit["commit"]["author"]["name"].replace(" ", "") mapping[commit["sha"]] = gh_handle return mapping def get_git_commits_since_tag(git_tag: str) -> list[Change]: """ Get all commits since the last tag. Args: git_tag: Reference tag from which the changes to the current commit are fetched. Returns: list of all changes since git_tag. """ commits = ( subprocess.check_output( [ "git", "--no-pager", "log", f"{git_tag}..HEAD", '--pretty=format:"%H:::%s:::%aN"', ], stderr=subprocess.STDOUT, ) .decode("UTF-8") .strip() ) lines = commits.splitlines() authors = get_author_mapping(len(lines)) return [parse_commit_line(line, authors) for line in lines if line != ""] def parse_commit_line(line: str, authors: dict[str, str]) -> Change: """ Parse the first line of a git commit message. Args: line: The first line of a git commit message. Returns: The parsed Change object Raises: ValueError: The commit line is not well-structured """ parts = line.split(":::") if len(parts) != 3: raise ValueError(f"Invalid commit line: '{line}'") commit_hash, rest, author = parts if ":" in rest: prefix, message = rest.split(": ", 1) else: prefix = "" message = rest # Standardize message.strip() commit_hash = commit_hash.strip('"') author = author.removesuffix('"') author_login = authors[commit_hash] prefix = prefix.strip() if prefix == "DOCS": prefix = "DOC" return Change( commit_hash=commit_hash, prefix=prefix, message=message, author=author, author_login=author_login, ) if __name__ == "__main__": main(CHANGELOG_FILE_PATH) ================================================ FILE: mypy.ini ================================================ [mypy] plugins = pydantic.mypy ================================================ FILE: pdfly/__init__.py ================================================ """pdfly is a command line utility for manipulating PDFs and getting information about them.""" from ._version import __version__ __all__ = [ "__version__", ] ================================================ FILE: pdfly/__main__.py ================================================ """Execute pdfly as a module.""" from pdfly.cli import entry_point if __name__ == "__main__": entry_point() ================================================ FILE: pdfly/_utils.py ================================================ from enum import Enum class OutputOptions(Enum): json = "json" text = "text" ================================================ FILE: pdfly/_version.py ================================================ __version__ = "0.5.1" ================================================ FILE: pdfly/booklet.py ================================================ """ Reorder and two-up PDF pages for booklet printing. If the number of pages is not a multiple of four, pages are added until it is a multiple of four. This includes a centerfold in the middle of the booklet and a single page on the inside back cover. The content of those pages are from the centerfold-file and blank-page-file files, if specified, otherwise they are blank pages. Example: pdfly booklet input.pdf output.pdf """ # Copyright (c) 2014, Steve Witham . # All rights reserved. This software is available under a BSD license; # see https://github.com/py-pdf/pypdf/LICENSE from collections.abc import Generator from pathlib import Path from pypdf import ( PageObject, PdfReader, PdfWriter, ) from pypdf.generic import FloatObject, RectangleObject def main( filename: Path, output: Path, inside_cover_file: Path | None, centerfold_file: Path | None, ) -> None: try: # Set up the streams reader = PdfReader(filename) pages = list(reader.pages) writer = PdfWriter() # Add blank pages to make the number of pages a multiple of 4 # If the user specified an inside-back-cover file, use it. blank_page = PageObject.create_blank_page( width=pages[0].mediabox.width, height=pages[0].mediabox.height ) if len(pages) % 2 == 1: if inside_cover_file: ic_reader_page = fetch_first_page(inside_cover_file) pages.insert(-1, ic_reader_page) else: pages.insert(-1, blank_page) if len(pages) % 4 == 2: pages.insert(len(pages) // 2, blank_page) pages.insert(len(pages) // 2, blank_page) requires_centerfold = True else: requires_centerfold = False # Reorder the pages and place two pages side by side (2-up) on each sheet for lhs, rhs in page_iter(len(pages)): pages[lhs].merge_translated_page( page2=pages[rhs], tx=pages[lhs].mediabox.width, ty=0, expand=True, over=True, ) # Double the CropBox width: pages[lhs].cropbox[2] = FloatObject(2 * pages[lhs].cropbox[2]) writer.add_page(pages[lhs]) # If a centerfold was required, it is already # present as a pair of blank pages. If the user # specified a centerfold file, use it instead. if requires_centerfold and centerfold_file: centerfold_page = fetch_first_page(centerfold_file) last_page = writer.pages[-1] if centerfold_page.rotation != 0: centerfold_page.transfer_rotation_to_content() if requires_rotate(centerfold_page.mediabox, last_page.mediabox): centerfold_page = centerfold_page.rotate(270) if centerfold_page.rotation != 0: centerfold_page.transfer_rotation_to_content() last_page.merge_page(centerfold_page) # Everything looks good! Write the output file. with open(output, "wb") as output_fh: writer.write(output_fh) except Exception as error: raise RuntimeError(f"Error while processing {filename}") from error def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool: """ Return True if a and b are rotated relative to each other. Args: a (RectangleObject): The first rectangle. b (RectangleObject): The second rectangle. """ a_portrait = a.height > a.width b_portrait = b.height > b.width return a_portrait != b_portrait def fetch_first_page(filename: Path) -> PageObject: """ Fetch the first page of a PDF file. Args: filename (Path): The path to the PDF file. Returns: PageObject: The first page of the PDF file. """ return PdfReader(filename).pages[0] # This function written with inspiration, assistance, and code # from claude.ai & Github Copilot def page_iter(num_pages: int) -> Generator[tuple[int, int], None, None]: """ Generate pairs of page numbers for printing a booklet. This function assumes that the total number of pages is divisible by 4. It yields tuples of page numbers that should be printed on the same sheet of paper to create a booklet. Args: num_pages (int): The total number of pages in the document. Must be divisible by 4. Yields: Generator[tuple[int, int], None, None]: tuples containing pairs of page numbers. Each tuple represents the page numbers to be printed on one side of a sheet. Raises: ValueError: If the number of pages is not divisible by 4. """ if num_pages % 4 != 0: raise ValueError("Number of pages must be divisible by 4") for sheet in range(num_pages // 4): # Outside the fold last_page = num_pages - sheet * 2 - 1 first_page = sheet * 2 # Inside the fold second_page = sheet * 2 + 1 second_to_last_page = num_pages - sheet * 2 - 2 yield last_page, first_page yield second_page, second_to_last_page ================================================ FILE: pdfly/cat.py ================================================ """ Concatenate pages from PDF files into a single PDF file. Page ranges refer to the previously-named file. A file not followed by a page range means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. Examples pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1 Concatenate all of head.pdf, all but page seven of content.pdf, and the last page of tail.pdf, producing output.pdf. pdfly cat chapter*.pdf >book.pdf You can specify the output file by redirection. pdfly cat chapter?.pdf chapter10.pdf >book.pdf In case you don't want chapter 10 before chapter 2. """ # Copyright (c) 2014, Steve Witham . # All rights reserved. This software is available under a BSD license; # see https://github.com/py-pdf/pypdf/LICENSE import os import sys from pathlib import Path from pypdf import ( PageRange, PasswordType, PdfReader, PdfWriter, parse_filename_page_ranges, ) from rich.console import Console def main( filename: Path, fn_pgrgs: list[str] | None, output: Path, verbose: bool, inverted_page_selection: bool = False, password: str | None = None, ) -> None: console = Console() filename_page_ranges = parse_filepaths_and_pagerange_args( console, filename, fn_pgrgs ) if output: output_fh = open(output, "wb") else: sys.stdout.flush() output_fh = os.fdopen(sys.stdout.fileno(), "wb") writer = PdfWriter() in_fs = {} try: for filepath, page_range in filename_page_ranges: # type: ignore if verbose: print(filepath, page_range, file=sys.stderr) if filepath not in in_fs: in_fs[filepath] = open(filepath, "rb") reader = PdfReader(in_fs[filepath]) if ( password is not None and reader.decrypt(password) == PasswordType.NOT_DECRYPTED ): console.print( "[red]Error: the decrypting password provided is invalid" ) sys.exit(1) num_pages = len(reader.pages) start, end, _step = page_range.indices(num_pages) if ( start < 0 or end < 0 or start >= num_pages or end > num_pages or start > end ): print( f"WARNING: Page range {page_range} is out of bounds", file=sys.stderr, ) if inverted_page_selection: all_page_nums = set(range(len(reader.pages))) page_nums = set(range(*page_range.indices(len(reader.pages)))) inverted_page_nums = all_page_nums - page_nums for page_num in inverted_page_nums: writer.add_page(reader.pages[page_num]) else: for page_num in range(*page_range.indices(len(reader.pages))): writer.add_page(reader.pages[page_num]) writer.write(output_fh) except Exception as error: raise RuntimeError(f"Error while reading {filename}") from error finally: output_fh.close() # In 3.0, input files must stay open until output is written. # Not closing the in_fs because this script exits now. def parse_filepaths_and_pagerange_args( console: Console, filename: Path, fn_pgrgs: list[str] | None ) -> list[tuple[Path, PageRange]]: fn_pgrgs_l = list(fn_pgrgs) if fn_pgrgs else [] fn_pgrgs_l.insert(0, str(filename)) filename_page_ranges, invalid_filepaths = [], [] for filepath, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore if Path(filepath).is_file(): filename_page_ranges.append((Path(filepath), page_range)) else: invalid_filepaths.append(str(filepath)) if invalid_filepaths: console.print( f"[red]Error: invalid file path or page range provided: {' '.join(invalid_filepaths)}" ) sys.exit(2) return filename_page_ranges ================================================ FILE: pdfly/check_sign.py ================================================ """ Verifies the signature of a signed PDF. Examples pdfly verify input.pdf --pem certs.pem Verifies the input.pdf with a PEM certificate bundle. """ import sys from pathlib import Path import typer from endesive import pdf def main(filename: Path, pem: Path, verbose: bool | None) -> None: x509_certificates = [pem.read_bytes()] results = pdf.verify(filename.read_bytes(), x509_certificates) if len(results) == 0: raise typer.BadParameter("Signature missing") details: list[str] = [] for hash_ok, signature_ok, cert_ok in results: if not signature_ok: details.append("Signature not ok") elif verbose: details.append("Signature ok") if not hash_ok: details.append("Content hash not ok") elif verbose: details.append("Content hash ok") if not cert_ok: details.append("Certificate not ok") elif verbose: details.append("Certificate ok") details_str = "" if len(details) == 0 else " (" + ", ".join(details) + ")" for hash_ok, signature_ok, cert_ok in results: if not signature_ok or not hash_ok or not cert_ok: print(f"Check failed{details_str}.", file=sys.stderr) raise typer.Exit(code=1) print(f"Check succeeded{details_str}.") ================================================ FILE: pdfly/cli.py ================================================ """ Define how the CLI should behave. Subcommands are added here. """ from pathlib import Path from typing import Annotated import typer import pdfly.booklet import pdfly.cat import pdfly.check_sign import pdfly.compress import pdfly.extract_annotated_pages import pdfly.extract_images import pdfly.metadata import pdfly.pagemeta import pdfly.rm import pdfly.rotate import pdfly.sign import pdfly.uncompress import pdfly.up2 import pdfly.update_offsets import pdfly.x2pdf def version_callback(value: bool) -> None: import pypdf if value: typer.echo(f"pdfly {pdfly.__version__}") typer.echo(f" using pypdf=={pypdf.__version__}") raise typer.Exit entry_point = typer.Typer( add_completion=False, help=( "pdfly is a pure-python cli application for manipulating PDF files." ), rich_markup_mode="rich", # Allows to pretty-print commands documentation ) @entry_point.callback() # type: ignore[misc] def common( ctx: typer.Context, version: bool = typer.Option(None, "--version", callback=version_callback), ) -> None: pass @entry_point.command(name="2-up", help=pdfly.up2.__doc__) # type: ignore[misc] def up2( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], out: Path, ) -> None: pdfly.up2.main(pdf, out) @entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc] def booklet( filename: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], output: Annotated[ Path, typer.Argument( dir_okay=False, exists=False, resolve_path=False, ), ], blank_page: Annotated[ Path | None, typer.Option( "-b", "--blank-page-file", help="page added if input is odd number of pages", dir_okay=False, exists=True, resolve_path=True, ), ] = None, centerfold: Annotated[ Path | None, typer.Option( "-c", "--centerfold-file", help="double-page added if input is missing >= 2 pages", dir_okay=False, exists=True, resolve_path=True, ), ] = None, ) -> None: pdfly.booklet.main(filename, output, blank_page, centerfold) @entry_point.command(name="cat", help=pdfly.cat.__doc__) # type: ignore[misc] def cat( filename: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], fn_pgrgs: list[str] | None = typer.Argument( # noqa: B008 None, allow_dash=True, help="filenames and/or page ranges" ), output: Path = typer.Option(..., "-o", "--output"), # noqa password: str = typer.Option( None, help="Document's user or owner password." ), verbose: bool = typer.Option( False, help="show page ranges as they are being read" ), ) -> None: pdfly.cat.main( filename, fn_pgrgs, output=output, verbose=verbose, password=password ) @entry_point.command(name="check-sign", help=pdfly.check_sign.__doc__) def check_sign( filename: Annotated[ Path, typer.Argument(dir_okay=False, exists=True, resolve_path=True), ], pem: Annotated[ Path, typer.Option( ..., dir_okay=False, exists=True, resolve_path=True, help="PEM certificate file", ), ], verbose: bool = typer.Option( False, help="Show signature verification details." ), ) -> None: pdfly.check_sign.main(filename, pem, verbose) @entry_point.command(name="compress", help=pdfly.compress.__doc__) # type: ignore[misc] def compress( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], output: Annotated[ Path, typer.Argument( writable=True, ), ], ) -> None: pdfly.compress.main(pdf, output) @entry_point.command(name="extract-annotated-pages", help=pdfly.extract_annotated_pages.__doc__) # type: ignore[misc] def extract_annotated_pages( input_pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, help="Input PDF file.", ), ], output_pdf: Annotated[ Path | None, typer.Option( "--output", "-o", writable=True, help="Output PDF file. Defaults to 'input_pdf_annotated'.", ), ] = None, ) -> None: pdfly.extract_annotated_pages.main(input_pdf, output_pdf) @entry_point.command(name="extract-images", help=pdfly.extract_images.__doc__) # type: ignore[misc] def extract_images( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], ) -> None: pdfly.extract_images.main(pdf) @entry_point.command(name="extract-text") # type: ignore[misc] def extract_text( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], ) -> None: """Extract text from a PDF file.""" from pypdf import PdfReader reader = PdfReader(str(pdf)) for page in reader.pages: typer.echo(page.extract_text()) @entry_point.command(name="meta", help=pdfly.metadata.__doc__) # type: ignore[misc] def metadata( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], output: pdfly.metadata.OutputOptions = typer.Option( # noqa pdfly.metadata.OutputOptions.text.value, "--output", "-o", help="output format", show_default=True, ), ) -> None: pdfly.metadata.main(pdf, output) @entry_point.command(name="pagemeta", help=pdfly.pagemeta.__doc__) # type: ignore[misc] def pagemeta( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], page_index: int, output: pdfly.metadata.OutputOptions = typer.Option( # noqa pdfly.metadata.OutputOptions.text.value, "--output", "-o", help="output format", show_default=True, ), ) -> None: pdfly.pagemeta.main( pdf, page_index, output, ) @entry_point.command(name="rm", help=pdfly.rm.__doc__) def rm( filename: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], output: Path = typer.Option(..., "-o", "--output"), # noqa fn_pgrgs: list[str] = typer.Argument( # noqa ..., help="filenames and/or page ranges" ), verbose: bool = typer.Option( False, help="show page ranges as they are being read" ), ) -> None: pdfly.rm.main(filename, fn_pgrgs, output, verbose) @entry_point.command(name="rotate", help=pdfly.rotate.__doc__) # type: ignore[misc] def rotate( filename: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], degrees: Annotated[int, typer.Argument(..., help="degrees to rotate")], pgrgs: Annotated[str, typer.Argument(..., help="page range")] = ":", output: Path = typer.Option(..., "-o", "--output"), # noqa ) -> None: pdfly.rotate.main(filename, output, degrees, pgrgs) @entry_point.command(name="sign", help=pdfly.sign.__doc__) def sign( filename: Annotated[ Path, typer.Argument(dir_okay=False, exists=True, resolve_path=True), ], p12: Annotated[ Path, typer.Option( ..., dir_okay=False, exists=True, resolve_path=True, help="PKCS12 certificate container", ), ], output: Annotated[Path | None, typer.Option("--output", "-o")] = None, in_place: bool = typer.Option(False, "--in-place", "-i"), p12_password: Annotated[ str | None, typer.Option( "--p12-password", "-p", help="The password to use to decrypt the PKCS12 file.", ), ] = None, ) -> None: pdfly.sign.main(filename, output, in_place, p12, p12_password) @entry_point.command(name="uncompress", help=pdfly.uncompress.__doc__) # type: ignore[misc] def uncompress( pdf: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], output: Annotated[ Path, typer.Argument( writable=True, ), ], ) -> None: pdfly.uncompress.main(pdf, output) @entry_point.command(name="update-offsets", help=pdfly.update_offsets.__doc__) # type: ignore[misc] def update_offsets( file_in: Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ], file_out: Annotated[ Path, typer.Option("-o", "--output") # noqa ] = None, # type: ignore[assignment] encoding: str = typer.Option( "ISO-8859-1", help="Encoding used to read and write the files, e.g. UTF-8.", ), verbose: bool = typer.Option( False, help="Show progress while processing." ), ) -> None: pdfly.update_offsets.main(file_in, file_out, encoding, verbose) @entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__) # type: ignore[misc] def x2pdf( x: list[ Annotated[ Path, typer.Argument( dir_okay=False, exists=True, resolve_path=True, ), ] ], output: Annotated[ Path, typer.Option( "-o", "--output", writable=True, ), ], ) -> None: exit_code = pdfly.x2pdf.main(x, output) if exit_code: raise typer.Exit(code=exit_code) ================================================ FILE: pdfly/compress.py ================================================ """Compress a PDF.""" import shutil from io import BytesIO from pathlib import Path from pypdf import PdfReader, PdfWriter def main(pdf: Path, output: Path) -> None: reader = PdfReader(pdf) writer = PdfWriter() for page in reader.pages: writer.add_page(page) if reader.metadata: writer.add_metadata(reader.metadata) for page in writer.pages: page.compress_content_streams() # PDF to memory buffer first compressed_buffer = BytesIO() writer.write(compressed_buffer) compressed_data = compressed_buffer.getvalue() comp_size = len(compressed_data) orig_size = pdf.stat().st_size # If compressed size is larger than original, use original file if comp_size >= orig_size: print( f"Compression resulted in larger file ({comp_size:,} >= {orig_size:,} bytes)" ) print("Keeping original file as compressed version would be larger") shutil.copy2(pdf, output) final_size = orig_size ratio = 100.0 status = "No compression applied (would increase size)" else: with open(output, "wb") as fp: fp.write(compressed_data) final_size = comp_size ratio = (comp_size / orig_size) * 100 status = f"Compressed ({ratio:.1f}% of original)" print(f"Original Size : {orig_size:,}") print(f"Final Size : {final_size:,} ({status})") ================================================ FILE: pdfly/extract_annotated_pages.py ================================================ """ Extract only the annotated pages from a PDF. Q: Why does this help? A: https://github.com/py-pdf/pdfly/issues/97 """ from pathlib import Path from typing import TYPE_CHECKING from pypdf import PdfReader, PdfWriter from pypdf.annotations import AnnotationDictionary if TYPE_CHECKING: from pypdf.generic import ArrayObject # Check if an annotation is manipulable. def is_manipulable(annot: AnnotationDictionary) -> bool: return annot.get("/Subtype") != "/Link" # Main function. def main(input_pdf: Path, output_pdf: Path | None) -> None: if not output_pdf: output_pdf = input_pdf.with_name(input_pdf.stem + "_annotated.pdf") input = PdfReader(input_pdf) output = PdfWriter() output_pages = 0 # Copy only the pages with annotations for page in input.pages: if "/Annots" not in page: continue page_annots: ArrayObject = page["/Annots"] # type: ignore[assignment] if not any(is_manipulable(annot) for annot in page_annots): continue output.add_page(page) output_pages += 1 # Save the output PDF output.write(output_pdf) print(f"Extracted {output_pages} pages with annotations to {output_pdf}") ================================================ FILE: pdfly/extract_images.py ================================================ """ Extract images from PDF without resampling or altering. Adapted from work by Sylvain Pelissier http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python """ from pathlib import Path from pypdf import PdfReader def main(pdf: Path) -> None: reader = PdfReader(str(pdf)) extracted_images = [] for page_index, page0 in enumerate(reader.pages): for image_file_object in page0.images: path = f"{page_index:04d}-{image_file_object.name}" with open(path, "wb") as fp: fp.write(image_file_object.data) extracted_images.append(path) if len(extracted_images) == 0: print("No image found.") else: print(f"Extracted {len(extracted_images)} images:") for path in extracted_images: print(f"- {path}") ================================================ FILE: pdfly/metadata.py ================================================ """Show metadata of a PDF file""" import stat from datetime import datetime from pathlib import Path from pydantic import BaseModel from pypdf import PdfReader from ._utils import OutputOptions class EncryptionData(BaseModel): revision: int v_value: int class MetaInfo(BaseModel): encryption: EncryptionData | None = None pdf_file_version: str pages: int | None = None page_mode: str | None = None page_layout: str | None = None attachments: str = "unknown" id1: bytes | None = None id2: bytes | None = None images: list[int] = [] # PDF /Info dictionary author: str | None = None creation_date: datetime | None = None creator: str | None = None keywords: str | None = None producer: str | None = None subject: str | None = None title: str | None = None # OS Information file_permissions: str file_size: int # in bytes creation_time: datetime modification_time: datetime access_time: datetime def main(pdf: Path, output: OutputOptions) -> None: reader = PdfReader(str(pdf)) if reader.is_encrypted: pdf_stat = pdf.stat() meta = MetaInfo( encryption=( EncryptionData( v_value=reader._encryption.V, revision=reader._encryption.R, ) if reader._encryption else None ), pdf_file_version=reader.stream.read(8).decode("utf-8"), # OS Info file_permissions=f"{stat.filemode(pdf_stat.st_mode)}", file_size=pdf_stat.st_size, creation_time=datetime.fromtimestamp(pdf_stat.st_ctime), modification_time=datetime.fromtimestamp(pdf_stat.st_mtime), access_time=datetime.fromtimestamp(pdf_stat.st_atime), ) else: info = reader.metadata reader.stream.seek(0) pdf_file_version = reader.stream.read(8).decode("utf-8") pdf_stat = pdf.stat() pdf_id = reader.trailer.get("/ID") meta = MetaInfo( pages=len(reader.pages), page_mode=reader.page_mode, pdf_file_version=pdf_file_version, page_layout=reader.page_layout, attachments=str(list(reader.attachments.keys())), id1=pdf_id[0] if pdf_id is not None else None, id2=pdf_id[1] if pdf_id is not None and len(pdf_id) >= 2 else None, # OS Info file_permissions=f"{stat.filemode(pdf_stat.st_mode)}", file_size=pdf_stat.st_size, creation_time=datetime.fromtimestamp(pdf_stat.st_ctime), modification_time=datetime.fromtimestamp(pdf_stat.st_mtime), access_time=datetime.fromtimestamp(pdf_stat.st_atime), images=[ len(image.data) for page in reader.pages for image in page.images ], ) if info is not None: meta.author = info.author meta.creation_date = info.creation_date meta.creator = info.creator # Pending https://github.com/py-pdf/pypdf/pull/2939 to be able to access .keywords: meta.keywords = info.get("/Keywords") meta.producer = info.producer meta.subject = info.subject meta.title = info.title if output == OutputOptions.json: print(meta.json()) else: from rich.console import Console from rich.table import Table table = Table(title="PDF Data") table.add_column( "Attribute", justify="right", style="cyan", no_wrap=True ) table.add_column("Value", style="white") if meta.title: table.add_row("Title", meta.title) if meta.author: table.add_row("Author", meta.author) if meta.creation_date: table.add_row("CreationDate", str(meta.creation_date)) if meta.creator: table.add_row("Creator", meta.creator) if meta.producer: table.add_row("Producer", meta.producer) if meta.subject: table.add_row("Subject", meta.subject) if meta.keywords: table.add_row("Keywords", meta.keywords) table.add_row("Pages", f"{meta.pages:,}" if meta.pages else "unknown") table.add_row("Encrypted", f"{meta.encryption}") table.add_row("PDF File Version", meta.pdf_file_version) table.add_row("Page Layout", meta.page_layout) table.add_row("Page Mode", meta.page_mode) table.add_row("PDF ID", f"ID1={meta.id1!r} ID2={meta.id2!r}") embedded_fonts: set[str] = set() unemedded_fonts: set[str] = set() if not reader.is_encrypted: for page in reader.pages: emb, unemb = page._get_fonts() embedded_fonts = embedded_fonts.union(set(emb)) unemedded_fonts = unemedded_fonts.union(set(unemb)) table.add_row( "Fonts (unembedded)", ", ".join(sorted(unemedded_fonts)) ) table.add_row( "Fonts (embedded)", ", ".join(sorted(embedded_fonts)) ) table.add_row("Attachments", meta.attachments) table.add_row( "Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)" ) enc_table = Table(title="Encryption information") enc_table.add_column( "Attribute", justify="right", style="cyan", no_wrap=True ) enc_table.add_column("Value", style="white") if meta.encryption: enc_table.add_row( "Security Handler Revision Number", str(meta.encryption.revision), ) enc_table.add_row("V value", str(meta.encryption.v_value)) os_table = Table(title="Operating System Data") os_table.add_column( "Attribute", justify="right", style="cyan", no_wrap=True ) os_table.add_column("Value", style="white") os_table.add_row("File Name", f"{pdf}") os_table.add_row("File Permissions", f"{meta.file_permissions}") os_table.add_row("File Size", f"{meta.file_size:,} bytes") os_table.add_row( "Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}" ) os_table.add_row( "Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}" ) os_table.add_row( "Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}" ) console = Console() console.print(os_table) console.print(table) if meta.encryption: console.print(enc_table) console.print( "Use the 'pagemeta' subcommand to get details about a single page" ) ================================================ FILE: pdfly/pagemeta.py ================================================ """Give details about a single page.""" from pathlib import Path from pydantic import BaseModel from pypdf import PdfReader from rich.console import Console from rich.markdown import Markdown from rich.table import Table from ._utils import OutputOptions KNOWN_PAGE_FORMATS = { (841.89, 1190.55): "A3", # 297mm x 420mm (595.28, 841.89): "A4", # 210mm x 297mm (420.94, 595.28): "A5", # 148mm x 210mm (297.66, 420.94): "A6", # 105mm x 148mm (612, 792): "Letter", (612, 1008): "Legal", } class PageMeta(BaseModel): mediabox: tuple[float, float, float, float] cropbox: tuple[float, float, float, float] artbox: tuple[float, float, float, float] bleedbox: tuple[float, float, float, float] annotations: int rotation: int def main(pdf: Path, page_index: int, output: OutputOptions) -> None: reader = PdfReader(pdf) page = reader.pages[page_index] meta = PageMeta( mediabox=page.mediabox, cropbox=page.cropbox, artbox=page.artbox, bleedbox=page.bleedbox, annotations=len(page.annotations) if page.annotations else 0, rotation=page.rotation, ) if output == OutputOptions.json: print(meta.json()) else: console = Console() table = Table(title=f"{pdf}, page index {page_index}") table.add_column( "Attribute", justify="right", style="cyan", no_wrap=True ) table.add_column("Value", style="white") def add_box_attr( name: str, box: tuple[float, float, float, float] ) -> None: width = box[2] - box[0] height = box[3] - box[1] known_format = find_known_format(width, height) extra = f" ({known_format})" if known_format else "" table.add_row( name, f"({box[0]:.2f}, {box[1]:.2f}, {box[2]:.2f}, {box[3]:.2f}):" f" {width=:.2f} x {height=:.2f}{extra}", ) add_box_attr("mediabox", meta.mediabox) add_box_attr("cropbox", meta.cropbox) add_box_attr("artbox", meta.artbox) add_box_attr("bleedbox", meta.bleedbox) if meta.annotations: table.add_row("annotations", str(meta.annotations)) if meta.rotation: table.add_row("rotation", str(meta.rotation)) console.print(table) if page.annotations: console.print(Markdown("**All annotations:**")) for i, annot in enumerate(page.annotations, start=1): obj = annot.get_object() console.print(f"{i}. {obj['/Subtype']} at {obj['/Rect']}") def find_known_format(width: float, height: float) -> str: known_format = KNOWN_PAGE_FORMATS.get((width, height)) if known_format: return known_format for (w, h), name in KNOWN_PAGE_FORMATS.items(): if ((w - width) * (w - width) + (h - height) * (h - height)) < 4: return f"close to format: {name}" return "" ================================================ FILE: pdfly/rm.py ================================================ """ Remove pages from PDF files. Page ranges refer to the previously-named file. A file not followed by a page range means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. Examples pdfly rm -o output.pdf document.pdf 2:5 Remove pages 2 to 4 from document.pdf, producing output.pdf. pdfly rm document.pdf -- -1 Removes the last page from document.pdf, modifying the original file. pdfly rm document.pdf :-1 Removes all pages except the last one from document.pdf, modifying the original file. pdfly rm report.pdf :6 7: Remove all pages except page seven from report.pdf, producing a single-page report.pdf. """ from pathlib import Path from pdfly.cat import main as cat_main def main( filename: Path, fn_pgrgs: list[str], output: Path, verbose: bool ) -> None: cat_main(filename, fn_pgrgs, output, verbose, inverted_page_selection=True) ================================================ FILE: pdfly/rotate.py ================================================ """ Rotate specified pages by the specified amount Example: pdfly rotate --output output.pdf input.pdf 90 Rotate all pages by 90 degrees (clockwise) pdfly rotate --output output.pdf input.pdf 90 :3 Rotate first three pages by 90 degrees (clockwise) pdfly rotate --output output.pdf input.pdf 90 -- -1 Rotate last page by 90 degrees (clockwise) A file not followed by a page range (PGRGS) means all the pages of the file. PAGE RANGES are like Python slices. Remember, page indices start with zero. When using page ranges that start with a negative value a two-hyphen symbol -- must be used to separate them from the command line options. Page range expression examples: : all pages. -1 last page. 22 just the 23rd page. :-1 all but the last page. 0:3 the first three pages. -2 second-to-last page. :3 the first three pages. -2: last two pages. 5: from the sixth page onward. -3:-1 third & second to last. The third, "stride" or "step" number is also recognized. ::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0. 1:10:2 1 3 5 7 9 2::-1 2 1 0. ::-1 all pages in reverse order. """ from pathlib import Path from pypdf import ( PageRange, PdfReader, PdfWriter, ) from rich.console import Console def main( filename: Path, output: Path, degrees: int, page_range: str, ) -> None: try: # set up the streams reader = PdfReader(filename) pages = list(reader.pages) writer = PdfWriter() # Convert the page range into a set of page numbers pages_to_rotate = convert_range_to_pages(page_range, len(pages)) for page_index, page in enumerate(pages): if page_index in pages_to_rotate: page = page.rotate(degrees) writer.add_page(page) # Everything looks good! Write the output file. with open(output, "wb") as output_fh: writer.write(output_fh) except Exception as error: console = Console() console.print(f"Error while rotating {filename}") raise error def convert_range_to_pages(page_range: str, num_pages: int) -> set[int]: pages_to_rotate = {*range(*PageRange(page_range).indices(num_pages))} return pages_to_rotate ================================================ FILE: pdfly/sign.py ================================================ """ Creates a signed PDF from an existing PDF file. Examples pdfly sign input.pdf --p12 certs.p12 -o signed.pdf Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf. pdfly sign document.pdf --p12 certs.p12 --in-place Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place. """ import io import tempfile from collections.abc import Generator from contextlib import contextmanager from pathlib import Path from typing import Union import fpdf.sign import typer from cryptography.hazmat.primitives.serialization import pkcs12 from endesive import signer from fpdf import FPDF, get_scale_factor from pypdf import PageObject, PdfReader, PdfWriter from pypdf.generic import DictionaryObject, PdfObject def main( filename: Path, output: Path | None, in_place: bool, p12: Path, p12_password: str | None, ) -> None: validate_output_args_or_raise(output, in_place) pdf_reader = PdfReader(filename) pdf_is_unsigned_or_raise(pdf_reader) output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper] if output: output_file = open(output, "wb") else: output_file = tempfile.NamedTemporaryFile( delete=False ) # will be deleted by output.unlink() later on output = Path(output_file.name) try: _sign_pdf_contents(pdf_reader, output_file, p12, p12_password) finally: output_file.close() if in_place: filename.write_bytes(output.read_bytes()) output.unlink() def pdf_is_unsigned_or_raise(pdf_reader: PdfReader) -> None: for page in pdf_reader.pages: if page.annotations is None: continue if any(is_signature(annotation) for annotation in page.annotations): raise typer.BadParameter("PDF is already signed.") def is_signature(annotation: PdfObject) -> bool: resolved_annotation_object = annotation.get_object() if resolved_annotation_object is None: return False if type(resolved_annotation_object) is not DictionaryObject: return False subtype = resolved_annotation_object["/Subtype"] if subtype != "/Widget": return False fieldtype = resolved_annotation_object["/FT"] return fieldtype == "/Sig" def _sign_pdf_contents( pdf_reader: PdfReader, output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper], p12: Path, p12_password: str | None, ) -> None: unsigned_output_buffer = io.BytesIO() with add_to_page(pdf_reader.pages[-1]) as pdf: with p12.open("rb") as pkcs_file: hashalgo = "sha256" sign_time = pdf.creation_date key, cert, extra_certs = pkcs12.load_key_and_certificates( pkcs_file.read(), (p12_password.encode() if p12_password is not None else None), ) pdf.sign( key=key, cert=cert, # type: ignore extra_certs=extra_certs, hashalgo=hashalgo, signing_time=sign_time, ) # defer actual signing until after the input pdfs contents are merged # _sign_key = None prevents FDPF.output() from calculating the signature hash too early pdf._sign_key = None writer = PdfWriter() writer.append_pages_from_reader(pdf_reader) writer.write(unsigned_output_buffer) # Now that output_buffer contains the contents to be signed # we can generate the cryptographic signature using fpdf2.sign.sign_content # patch placeholder values to match how fpdf.sign.sign_content() expects them content_to_sign = bytearray(unsigned_output_buffer.getbuffer()) content_to_sign = content_to_sign.replace( _SIGNATURE_BYTERANGE_PLACEHOLDER.encode(), fpdf.sign._SIGNATURE_BYTERANGE_PLACEHOLDER.encode(), ) content_to_sign = content_to_sign.replace( b"(" + _SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b")", b"<" + fpdf.sign._SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b">", ) signed_output_buffer = fpdf.sign.sign_content( signer, content_to_sign, key, cert, # type: ignore extra_certs, hashalgo, sign_time, ) output_file.write(signed_output_buffer) @contextmanager def add_to_page(reader_page: PageObject, unit: str = "mm") -> Generator[FPDF]: k = get_scale_factor(unit) format = (reader_page.mediabox[2] / k, reader_page.mediabox[3] / k) pdf = FPDF(format=format, unit=unit) pdf.add_page() yield pdf page_overlay = PdfReader(io.BytesIO(pdf.output())).pages[0] reader_page.merge_page(page2=page_overlay) def validate_output_args_or_raise(output: Path | None, in_place: bool) -> None: if not in_place and output is None: raise typer.BadParameter( "One of the options --output or --in-place is required." ) # fpdf.sign placeholder values - in the form after PdfWriter serialized them _SIGNATURE_BYTERANGE_PLACEHOLDER = "[ 0 0 0 0 ]" _SIGNATURE_CONTENTS_PLACEHOLDER = "\\000" * 0x2000 ================================================ FILE: pdfly/uncompress.py ================================================ """Module for uncompressing PDF content streams.""" import zlib from pathlib import Path from pypdf import PdfReader, PdfWriter from pypdf.generic import IndirectObject, PdfObject def main(pdf: Path, output: Path) -> None: reader = PdfReader(pdf) writer = PdfWriter() for page in reader.pages: if "/Contents" in page: contents: PdfObject | None = page["/Contents"] if isinstance(contents, IndirectObject): contents = contents.get_object() if contents is not None: if isinstance(contents, list): for content in contents: if isinstance(content, IndirectObject): decompress_content_stream(content) elif isinstance(contents, IndirectObject): decompress_content_stream(contents) writer.add_page(page) with open(output, "wb") as fp: writer.write(fp) orig_size = pdf.stat().st_size uncomp_size = output.stat().st_size print(f"Original Size : {orig_size:,}") print( f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)" ) def decompress_content_stream(content: IndirectObject) -> None: """Decompress a content stream if it uses FlateDecode.""" if content.get("/Filter") == "/FlateDecode": try: compressed_data = content.get_data() uncompressed_data = zlib.decompress(compressed_data) content.set_data(uncompressed_data) del content["/Filter"] except zlib.error as error: print( f"Some content stream with /FlateDecode failed to be decompressed: {error}" ) ================================================ FILE: pdfly/up2.py ================================================ """ Create a booklet-style PDF from a single input. Pairs of two pages will be put on one page (left and right) usage: python 2-up.py input_file output_file """ from pathlib import Path from pypdf import PdfReader, PdfWriter from pypdf.generic import FloatObject def main(pdf: Path, output: Path) -> None: reader = PdfReader(str(pdf)) writer = PdfWriter() for i in range(0, len(reader.pages), 2): lhs = reader.pages[i] if i + 1 < len(reader.pages): rhs = reader.pages[i + 1] lhs.merge_translated_page( rhs, tx=float(lhs.mediabox.width), ty=0, expand=True ) else: # Double the MediaBox width: lhs.mediabox[2] = FloatObject(2 * lhs.mediabox[2]) # Double the CropBox width: lhs.cropbox[2] = FloatObject(2 * lhs.cropbox[2]) writer.add_page(lhs) with open(output, "wb") as fp: writer.write(fp) print(f"{output} was created") ================================================ FILE: pdfly/update_offsets.py ================================================ """ Updates offsets and lengths in a simple PDF file. The PDF specification requires that the xref section at the end of a PDF file has the correct offsets of the PDF's objects. It further requires that the dictionary of a stream object contains a /Length-entry giving the length of the encoded stream. When editing a PDF file using a text-editor (e.g. vim) it is elaborate to compute or adjust these offsets and lengths. This command tries to compute /Length-entries of the stream dictionaries and the offsets in the xref-section automatically. It expects that the PDF file has ASCII encoding only. It may use ISO-8859-1 or UTF-8 in its comments. The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in binary data. It expects that there is one xref-section only. It expects that the /Length-entries have default values containing enough digits, e.g. /Length 000 when the stream consists of 576 bytes. Example: update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf """ import re from pathlib import Path from rich.console import Console # Here, only simple regular expressions are used. # Beyond a certain level of complexity, switching to a proper PDF dictionary parser would be better. RE_OBJ = re.compile(r"^([0-9]+) ([0-9]+) obj *") RE_CONTENT = re.compile(r"^([^\r\n]*)", re.DOTALL) RE_LENGTH_REF = re.compile(r"^(.*/Length )([0-9]+) ([0-9]+) R(.*)", re.DOTALL) RE_LENGTH = re.compile( r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL ) def update_lines( lines_in: list[str], encoding: str, console: Console, verbose: bool ) -> list[str]: """ Iterates over the lines of a pdf-files and updates offsets. The input is expected to be a pdf without binary-sections. :param lines_in: A list over the lines including line-breaks. :param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8". :param console: Console used to print messages. :param verbose: True to activate logging of info-messages. :return The output is a list of lines to be written in the given encoding. """ lines_out = [] # lines to be written map_line_offset = {} # map from line-number to offset map_obj_offset = {} # map from object-number to offset map_obj_line = {} # map from object-number to line-number line_no = 0 # current line-number (starting at 0) offset_out = 0 # current offset in output-file line_xref = None # line-number of xref-line (in xref-section only) line_startxref = None # line-number of startxref-line curr_obj = None # number of current object len_stream = None # length of stream (in stream only) offset_xref = None # offset of xref-section map_stream_len = {} # map from object-number to /Length of stream map_obj_length_line = {} # map from object-number to /Length-line map_obj_length_ref = ( {} ) # map from object-number to /Length-reference (e.g. "3") map_obj_length_line_no = {} # map from object-number to line_no of length # of /Length-line for idx, line in enumerate(lines_in): line_no = idx + 1 m_content = RE_CONTENT.match(line) if m_content is None: raise RuntimeError( f"Invalid PDF file: line {line_no} without line-break." ) content = m_content.group(1) map_line_offset[line_no] = offset_out m_obj = RE_OBJ.match(line) if m_obj is not None: curr_obj = m_obj.group(1) curr_gen = m_obj.group(2) if verbose: console.print(f"line {line_no}: object {curr_obj}") if curr_gen != "0": raise RuntimeError( f"Invalid PDF file: generation {curr_gen} of object {curr_obj} in line {line_no} is not supported." ) map_obj_offset[curr_obj] = int(offset_out) map_obj_line[curr_obj] = line_no len_stream = None if content == "xref": offset_xref = offset_out line_xref = line_no elif content == "startxref": line_startxref = line_no line_xref = None elif content == "stream": if verbose: console.print(f"line {line_no}: start stream") len_stream = 0 elif content == "endstream": if verbose: console.print(f"line {line_no}: end stream") if curr_obj is None: raise RuntimeError( f"Invalid PDF file: line {line_no}: endstream without object-start." ) if len_stream is None: raise RuntimeError( f"Invalid PDF file: line {line_no}: endstream without stream." ) if len_stream > 0: # Ignore the last EOL len_stream = ( len_stream - 2 if lines_in[idx - 1][-2:] == "\r\n" else len_stream - 1 ) if verbose: console.print( f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}" ) map_stream_len[curr_obj] = len_stream elif content == "endobj": curr_obj = None elif curr_obj is not None and len_stream is None: m_length_ref = RE_LENGTH_REF.match(line) if m_length_ref is not None: len_obj = m_length_ref.group(2) len_obj_gen = m_length_ref.group(3) if verbose: console.print( f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}" ) map_obj_length_ref[curr_obj] = len_obj else: m_length = RE_LENGTH.match(line) if m_length is not None: if verbose: console.print(f"line {line_no}, /Length: {content}") map_obj_length_line[curr_obj] = line map_obj_length_line_no[curr_obj] = line_no elif curr_obj is not None and len_stream is not None: len_stream += len(line.encode(encoding)) elif line_xref is not None and line_no > line_xref + 2: object_number = line_no - line_xref - 2 if ( object_number <= len(map_obj_offset) and str(object_number) in map_obj_offset ): eol = line[-2:] xref_updated = ( "%010d" % map_obj_offset[str(object_number)] ) + " 00000 n" if verbose: console.print(f"{content} -> {xref_updated}") line = xref_updated + eol elif line_startxref is not None and line_no == line_startxref + 1: if offset_xref is None: raise NotImplementedError( "Unsupported file: startxref without preceding xref-section (probable cross-reference stream)" ) line = "%d\n" % offset_xref lines_out.append(line) offset_out += len(line.encode(encoding)) # Some checks if len(map_obj_offset) == 0: raise RuntimeError( "Invalid PDF file: the command didn't find any PDF objects." ) if offset_xref is None: raise RuntimeError( "Invalid PDF file: the command didn't find a xref-section" ) if line_startxref is None: raise RuntimeError( "Invalid PDF file: the command didn't find a startxref-section" ) for curr_obj, stream_len in map_stream_len.items(): if curr_obj in map_obj_length_line: line = map_obj_length_line[curr_obj] m_length = RE_LENGTH.match(line) if m_length is None: raise RuntimeError( f"Invalid PDF file: line '{line}' does not contain a valid /Length." ) prev_length = m_length.group(2) len_digits = len(prev_length) len_format = "%%0%dd" % len_digits updated_length = len_format % stream_len if len(updated_length) > len_digits: raise RuntimeError( f"Not enough digits in /Length-entry {prev_length}" f" of object {curr_obj}:" f" too short to take /Length {updated_length}" ) line = m_length.group(1) + updated_length + m_length.group(3) lines_out[map_obj_length_line_no[curr_obj] - 1] = line elif curr_obj in map_obj_length_ref: len_obj = map_obj_length_ref[curr_obj] if len_obj not in map_obj_line: raise RuntimeError( f"obj {curr_obj} has unknown length-obj {len_obj}" ) len_obj_line = map_obj_line[len_obj] prev_length = lines_out[len_obj_line][:-1] len_digits = len(prev_length) len_format = "%%0%dd" % len_digits updated_length = len_format % stream_len if len(updated_length) > len_digits: raise RuntimeError( f"Not enough digits in /Length-ref-entry {prev_length}" f" of object {curr_obj} and len-object {len_obj}:" f" too short to take /Length {updated_length}" ) if prev_length != updated_length: if verbose: console.print( f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}" ) lines_out[len_obj_line] = updated_length + "\n" else: raise RuntimeError( f"obj {curr_obj} with stream-len {stream_len} has no object-length-line: {map_obj_length_line}" ) return lines_out def read_binary_file(file_path: Path, encoding: str) -> list[str]: """ Reads a binary file line by line and returns these lines as a list of strings in the given encoding. Encoding utf-8 can't be used to read random binary data. :param file_path: file to be read line by line :param encoding: encoding to be used (e.g. "iso-8859-1") :return lines including line-breaks """ chunks: list[str] = [] with file_path.open("rb") as file: buffer = bytearray() while True: chunk = file.read(4096) # Read in chunks of 4096 bytes if not chunk: break # End of file buffer += chunk # Split buffer into chunks based on LF, CR, or CRLF while True: match = re.search(b"(\x0d\x0a|\x0a|\x0d)", buffer) if not match: break # No more line breaks found, process the remaining buffer end = match.end() chunk_str = buffer[:end].decode(encoding, errors="strict") buffer = buffer[end:] chunks.append(chunk_str) # Handle the last chunk if buffer: chunks.append(buffer.decode(encoding, errors="strict")) return chunks def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None: if not file_out: file_out = file_in console = Console() console.print(f"Read {file_in}") lines_in = read_binary_file(file_in, encoding) lines_out = update_lines(lines_in, encoding, console, verbose) with open(file_out, "wb") as f: f.writelines(line.encode(encoding) for line in lines_out) console.print(f"Wrote {file_out}", soft_wrap=True) ================================================ FILE: pdfly/x2pdf.py ================================================ """Convert one or more files to PDF. Each file is a page.""" from io import BytesIO from pathlib import Path from fpdf import FPDF from PIL import Image from pypdf import PdfReader, PdfWriter from rich.console import Console def px_to_mm(px: float) -> float: px_in_inch = 72 mm_in_inch = 25.4 inch = px / px_in_inch mm = inch * mm_in_inch return mm def image_to_pdf(filepath: Path) -> BytesIO: with Image.open(filepath) as cover: w, h = cover.size width, height = px_to_mm(w), px_to_mm(h) pdf = FPDF(unit="mm") pdf.add_page(format=(width, height)) # type: ignore pdf.image(filepath, x=0, y=0) return BytesIO(pdf.output()) def main(in_filepaths: list[Path], out_filepath: Path) -> int: console = Console() exit_code = 0 writer = PdfWriter() for filepath in in_filepaths: if filepath.name.endswith(".pdf"): for page in PdfReader(filepath).pages: writer.insert_page(page) continue try: pdf_bytes = image_to_pdf(filepath) new_page = PdfReader(pdf_bytes).pages[0] writer.insert_page(new_page) except Exception: console.print( f"[red]Error: Could not convert '{filepath}' to a PDF." ) console.print_exception(extra_lines=1, max_frames=1) exit_code += 1 writer.write(out_filepath) return exit_code ================================================ FILE: pylock.toml ================================================ lock-version = "1.0" created-by = "pip" [[packages]] name = "alabaster" version = "1.0.0" [[packages.wheels]] name = "alabaster-1.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b" [[packages]] name = "annotated-doc" version = "0.0.4" [[packages.wheels]] name = "annotated_doc-0.0.4-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl" [packages.wheels.hashes] sha256 = "571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320" [[packages]] name = "annotated-types" version = "0.7.0" [[packages.wheels]] name = "annotated_types-0.7.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53" [[packages]] name = "anyio" version = "4.12.1" [[packages.wheels]] name = "anyio-4.12.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c" [[packages]] name = "asn1crypto" version = "1.5.1" [[packages.wheels]] name = "asn1crypto-1.5.1-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67" [[packages]] name = "attrs" version = "25.4.0" [[packages.wheels]] name = "attrs-25.4.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373" [[packages]] name = "babel" version = "2.18.0" [[packages.wheels]] name = "babel-2.18.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35" [[packages]] name = "bcrypt" version = "5.0.0" [[packages.wheels]] name = "bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl" url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl" [packages.wheels.hashes] sha256 = "611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a" [[packages]] name = "black" version = "26.3.1" [[packages.wheels]] name = "black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/7f/0a/8d17d1a9c06f88d3d030d0b1d4373c1551146e252afe4547ed601c0e697f/black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac" [[packages]] name = "certifi" version = "2026.2.25" [[packages.wheels]] name = "certifi-2026.2.25-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl" [packages.wheels.hashes] sha256 = "027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa" [[packages]] name = "cffi" version = "2.0.0" [[packages.wheels]] name = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" [packages.wheels.hashes] sha256 = "fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453" [[packages]] name = "cfgv" version = "3.5.0" [[packages.wheels]] name = "cfgv-3.5.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0" [[packages]] name = "charset-normalizer" version = "3.4.6" [[packages.wheels]] name = "charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89" [[packages]] name = "check-wheel-contents" version = "0.6.3" [[packages.wheels]] name = "check_wheel_contents-0.6.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/be/05/f39fde9f31ef80b285ef5822fad4ddabf73fec62a1f02c5beb4b2f328972/check_wheel_contents-0.6.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "5ae39c8c434b972f0740d04610759168590713175aab584b012b1b84f6771874" [[packages]] name = "click" version = "8.3.1" [[packages.wheels]] name = "click-8.3.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6" [[packages]] name = "colorama" version = "0.4.6" [[packages.wheels]] name = "colorama-0.4.6-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" [[packages]] name = "coverage" version = "7.13.4" [[packages.wheels]] name = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl" url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl" [packages.wheels.hashes] sha256 = "8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f" [[packages]] name = "cryptography" version = "46.0.5" [[packages.wheels]] name = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl" url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl" [packages.wheels.hashes] sha256 = "a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c" [[packages]] name = "defusedxml" version = "0.7.1" [[packages.wheels]] name = "defusedxml-0.7.1-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61" [[packages]] name = "distlib" version = "0.4.0" [[packages.wheels]] name = "distlib-0.4.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16" [[packages]] name = "docutils" version = "0.21.2" [[packages.wheels]] name = "docutils-0.21.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2" [[packages]] name = "endesive" version = "2.19.3" [[packages.wheels]] name = "endesive-2.19.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/a0/c3/a0dcae019de40816352462371c473b22639cd8e68f33a5f23f07faf330fd/endesive-2.19.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "e5e09c1011b1977fbb9d563d672de7f17f5638304ce57a35bf7d00f3b7a3972e" [[packages]] name = "exceptiongroup" version = "1.3.1" [[packages.wheels]] name = "exceptiongroup-1.3.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598" [[packages]] name = "filelock" version = "3.25.2" [[packages.wheels]] name = "filelock-3.25.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70" [[packages]] name = "flake8" version = "7.3.0" [[packages.wheels]] name = "flake8-7.3.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/9f/56/13ab06b4f93ca7cac71078fbe37fcea175d3216f31f85c3168a6bbd0bb9a/flake8-7.3.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e" [[packages]] name = "flake8-bugbear" version = "25.11.29" [[packages.wheels]] name = "flake8_bugbear-25.11.29-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/0d/42/c18f199780d99a6f6a64c4a36f4ad28a445d9e11968a6025b21d0c8b6802/flake8_bugbear-25.11.29-py3-none-any.whl" [packages.wheels.hashes] sha256 = "9bf15e2970e736d2340da4c0a70493db964061c9c38f708cfe1f7b2d87392298" [[packages]] name = "flake8-comprehensions" version = "3.17.0" [[packages.wheels]] name = "flake8_comprehensions-3.17.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/39/bd/d6739d685fdd79349aa51c37bdedc0d8eab6ae9c6e6ed2ca935b3f88210d/flake8_comprehensions-3.17.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "3943a9c6f2593c3bc5cc64106c2f89d63c6ecd49c8343597f8257b8fcfc8b0a2" [[packages]] name = "flake8-isort" version = "7.0.0" [[packages.wheels]] name = "flake8_isort-7.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/17/7d/907ef4135f6ede5187930d9ddd1f36564e07c6cdcd15ae8fb9849c9517e0/flake8_isort-7.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "c301a0e55fc77582348e636194b84b1a0baf0dfdaa6eddf3b0eeea75f8be7f36" [[packages]] name = "flake8-simplify" version = "0.30.0" [[packages.wheels]] name = "flake8_simplify-0.30.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/9b/d5/18a89f40c1a145a44d1fad825553be8131bcb727f5f2783d3727a2f4b2d0/flake8_simplify-0.30.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "c9f54a50d24780832a3f2bb7a687ef465b91f10d7cb4ea0845dff4b65d9c91f4" [[packages]] name = "flit" version = "3.12.0" [[packages.wheels]] name = "flit-3.12.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/f5/82/ce1d3bb380b227e26e517655d1de7b32a72aad61fa21ff9bd91a2e2db6ee/flit-3.12.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "2b4e7171dc22881fa6adc2dbf083e5ecc72520be3cd7587d2a803da94d6ef431" [[packages]] name = "flit-core" version = "3.12.0" [[packages.wheels]] name = "flit_core-3.12.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/f2/65/b6ba90634c984a4fcc02c7e3afe523fef500c4980fec67cc27536ee50acf/flit_core-3.12.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "e7a0304069ea895172e3c7bb703292e992c5d1555dd1233ab7b5621b5b69e62c" [[packages]] name = "fonttools" version = "4.62.1" [[packages.wheels]] name = "fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" url = "https://files.pythonhosted.org/packages/42/09/7dbe3d7023f57d9b580cfa832109d521988112fd59dddfda3fddda8218f9/fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl" [packages.wheels.hashes] sha256 = "7bca7a1c1faf235ffe25d4f2e555246b4750220b38de8261d94ebc5ce8a23c23" [[packages]] name = "fpdf2" version = "2.8.7" [[packages.wheels]] name = "fpdf2-2.8.7-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/66/0a/cf50ecffa1e3747ed9380a3adfc829259f1f86b3fdbd9e505af789003141/fpdf2-2.8.7-py3-none-any.whl" [packages.wheels.hashes] sha256 = "d391fc508a3ce02fc43a577c830cda4fe6f37646f2d143d489839940932fbc19" [[packages]] name = "h11" version = "0.16.0" [[packages.wheels]] name = "h11-0.16.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86" [[packages]] name = "identify" version = "2.6.18" [[packages.wheels]] name = "identify-2.6.18-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737" [[packages]] name = "idna" version = "3.11" [[packages.wheels]] name = "idna-3.11-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl" [packages.wheels.hashes] sha256 = "771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea" [[packages]] name = "imagesize" version = "2.0.0" [[packages.wheels]] name = "imagesize-2.0.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96" [[packages]] name = "iniconfig" version = "2.3.0" [[packages.wheels]] name = "iniconfig-2.3.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12" [[packages]] name = "invoke" version = "2.2.1" [[packages.wheels]] name = "invoke-2.2.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8" [[packages]] name = "isort" version = "8.0.1" [[packages.wheels]] name = "isort-8.0.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/3e/95/c7c34aa53c16353c56d0b802fba48d5f5caa2cdee7958acbcb795c830416/isort-8.0.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "28b89bc70f751b559aeca209e6120393d43fbe2490de0559662be7a9787e3d75" [[packages]] name = "jinja2" version = "3.1.6" [[packages.wheels]] name = "jinja2-3.1.6-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl" [packages.wheels.hashes] sha256 = "85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67" [[packages]] name = "librt" version = "0.8.1" [[packages.wheels]] name = "librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/01/99/f85130582f05dcf0c8902f3d629270231d2f4afdfc567f8305a952ac7f14/librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "97c2b54ff6717a7a563b72627990bec60d8029df17df423f0ed37d56a17a176b" [[packages]] name = "lxml" version = "6.0.2" [[packages.wheels]] name = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/20/cf/cab09478699b003857ed6ebfe95e9fb9fa3d3c25f1353b905c9b73cfb624/lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c" [[packages]] name = "markdown-it-py" version = "3.0.0" [[packages.wheels]] name = "markdown_it_py-3.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1" [[packages]] name = "markupsafe" version = "3.0.3" [[packages.wheels]] name = "markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591" [[packages]] name = "mccabe" version = "0.7.0" [[packages.wheels]] name = "mccabe-0.7.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e" [[packages]] name = "mdit-py-plugins" version = "0.5.0" [[packages.wheels]] name = "mdit_py_plugins-0.5.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f" [[packages]] name = "mdurl" version = "0.1.2" [[packages.wheels]] name = "mdurl-0.1.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8" [[packages]] name = "mypy" version = "1.19.1" [[packages.wheels]] name = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/2a/0d/93c2e4a287f74ef11a66fb6d49c7a9f05e47b0a4399040e6719b57f500d2/mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74" [[packages]] name = "mypy-extensions" version = "1.1.0" [[packages.wheels]] name = "mypy_extensions-1.1.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505" [[packages]] name = "myst-parser" version = "4.0.1" [[packages.wheels]] name = "myst_parser-4.0.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d" [[packages]] name = "nodeenv" version = "1.10.0" [[packages.wheels]] name = "nodeenv-1.10.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827" [[packages]] name = "packaging" version = "26.0" [[packages.wheels]] name = "packaging-26.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529" [[packages]] name = "paramiko" version = "4.0.0" [[packages.wheels]] name = "paramiko-4.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9" [[packages]] name = "pathspec" version = "1.0.4" [[packages.wheels]] name = "pathspec-1.0.4-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl" [packages.wheels.hashes] sha256 = "fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723" [[packages]] name = "pdfly" [packages.directory] path = "." [[packages]] name = "pillow" version = "12.1.1" [[packages.wheels]] name = "pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/0c/7b/f9b09a7804ec7336effb96c26d37c29d27225783dc1501b7d62dcef6ae25/pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4" [[packages]] name = "pip" version = "26.0.1" [[packages.wheels]] name = "pip-26.0.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/de/f0/c81e05b613866b76d2d1066490adf1a3dbc4ee9d9c839961c3fc8a6997af/pip-26.0.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "bdb1b08f4274833d62c1aa29e20907365a2ceb950410df15fc9521bad440122b" [[packages]] name = "platformdirs" version = "4.9.4" [[packages.wheels]] name = "platformdirs-4.9.4-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl" [packages.wheels.hashes] sha256 = "68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868" [[packages]] name = "pluggy" version = "1.6.0" [[packages.wheels]] name = "pluggy-1.6.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746" [[packages]] name = "pre-commit" version = "4.5.1" [[packages.wheels]] name = "pre_commit-4.5.1-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77" [[packages]] name = "pycodestyle" version = "2.14.0" [[packages.wheels]] name = "pycodestyle-2.14.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/d7/27/a58ddaf8c588a3ef080db9d0b7e0b97215cee3a45df74f3a94dbbf5c893a/pycodestyle-2.14.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d" [[packages]] name = "pycparser" version = "3.0" [[packages.wheels]] name = "pycparser-3.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992" [[packages]] name = "pydantic" version = "2.12.5" [[packages.wheels]] name = "pydantic-2.12.5-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl" [packages.wheels.hashes] sha256 = "e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d" [[packages]] name = "pydantic-core" version = "2.41.5" [[packages.wheels]] name = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" [packages.wheels.hashes] sha256 = "100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a" [[packages]] name = "pyflakes" version = "3.4.0" [[packages.wheels]] name = "pyflakes-3.4.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c2/2f/81d580a0fb83baeb066698975cb14a618bdbed7720678566f1b046a95fe8/pyflakes-3.4.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f" [[packages]] name = "pygments" version = "2.19.2" [[packages.wheels]] name = "pygments-2.19.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b" [[packages]] name = "pykcs11" version = "1.5.18" [packages.sdist] name = "pykcs11-1.5.18.tar.gz" url = "https://files.pythonhosted.org/packages/22/07/0c2215cb6ef70c213892571eb015e670f4d6adbecedc5eb2369f82c1c7f2/pykcs11-1.5.18.tar.gz" [packages.sdist.hashes] sha256 = "12fd878b369821d80c1be8a140c85e8a0fb1358fcaaba66ca66869213692f227" [[packages]] name = "pynacl" version = "1.6.2" [[packages.wheels]] name = "pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl" url = "https://files.pythonhosted.org/packages/c9/a8/b917096b1accc9acd878819a49d3d84875731a41eb665f6ebc826b1af99e/pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl" [packages.wheels.hashes] sha256 = "c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6" [[packages]] name = "pypdf" version = "6.9.0" [[packages.wheels]] name = "pypdf-6.9.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/00/64/ac6159cfbeabab3cf54873bbf7314b29183c7ff547c9776596d63170d7c0/pypdf-6.9.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "85805ad7457ca878c4cfd1bc026c4b3dcae359b4a80f889fa7e8c5a1c1a83e51" [[packages]] name = "pytest" version = "9.0.2" [[packages.wheels]] name = "pytest-9.0.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b" [[packages]] name = "pytest-cov" version = "7.0.0" [[packages.wheels]] name = "pytest_cov-7.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861" [[packages]] name = "pytest-socket" version = "0.7.0" [[packages.wheels]] name = "pytest_socket-0.7.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45" [[packages]] name = "pytest-timeout" version = "2.4.0" [[packages.wheels]] name = "pytest_timeout-2.4.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2" [[packages]] name = "python-discovery" version = "1.1.3" [[packages.wheels]] name = "python_discovery-1.1.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/e7/80/73211fc5bfbfc562369b4aa61dc1e4bf07dc7b34df7b317e4539316b809c/python_discovery-1.1.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "90e795f0121bc84572e737c9aa9966311b9fde44ffb88a5953b3ec9b31c6945e" [[packages]] name = "pytokens" version = "0.4.1" [[packages.wheels]] name = "pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/69/66/f6fb1007a4c3d8b682d5d65b7c1fb33257587a5f782647091e3408abe0b8/pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c" [[packages]] name = "pyyaml" version = "6.0.3" [[packages.wheels]] name = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl" [packages.wheels.hashes] sha256 = "9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b" [[packages]] name = "requests" version = "2.32.5" [[packages.wheels]] name = "requests-2.32.5-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl" [packages.wheels.hashes] sha256 = "2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6" [[packages]] name = "rich" version = "14.3.3" [[packages.wheels]] name = "rich-14.3.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d" [[packages]] name = "ruff" version = "0.15.6" [[packages.wheels]] name = "ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" [packages.wheels.hashes] sha256 = "98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e" [[packages]] name = "shellingham" version = "1.5.4" [[packages.wheels]] name = "shellingham-1.5.4-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686" [[packages]] name = "snowballstemmer" version = "3.0.1" [[packages.wheels]] name = "snowballstemmer-3.0.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064" [[packages]] name = "sphinx" version = "8.1.3" [[packages.wheels]] name = "sphinx-8.1.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2" [[packages]] name = "sphinx-autobuild" version = "2024.10.3" [[packages.wheels]] name = "sphinx_autobuild-2024.10.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa" [[packages]] name = "sphinx-rtd-theme" version = "3.1.0" [[packages.wheels]] name = "sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/87/c7/b5c8015d823bfda1a346adb2c634a2101d50bb75d421eb6dcb31acd25ebc/sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "1785824ae8e6632060490f67cf3a72d404a85d2d9fc26bce3619944de5682b89" [[packages]] name = "sphinxcontrib-applehelp" version = "2.0.0" [[packages.wheels]] name = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5" [[packages]] name = "sphinxcontrib-devhelp" version = "2.0.0" [[packages.wheels]] name = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2" [[packages]] name = "sphinxcontrib-htmlhelp" version = "2.1.0" [[packages.wheels]] name = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8" [[packages]] name = "sphinxcontrib-jquery" version = "4.1" [[packages.wheels]] name = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/76/85/749bd22d1a68db7291c89e2ebca53f4306c3f205853cf31e9de279034c3c/sphinxcontrib_jquery-4.1-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae" [[packages]] name = "sphinxcontrib-jsmath" version = "1.0.1" [[packages.wheels]] name = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl" [packages.wheels.hashes] sha256 = "2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178" [[packages]] name = "sphinxcontrib-qthelp" version = "2.0.0" [[packages.wheels]] name = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb" [[packages]] name = "sphinxcontrib-serializinghtml" version = "2.0.0" [[packages.wheels]] name = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331" [[packages]] name = "starlette" version = "0.52.1" [[packages.wheels]] name = "starlette-0.52.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74" [[packages]] name = "tomli" version = "2.4.0" [[packages.wheels]] name = "tomli-2.4.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a" [[packages]] name = "tomli-w" version = "1.2.0" [[packages.wheels]] name = "tomli_w-1.2.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90" [[packages]] name = "typer" version = "0.24.1" [[packages.wheels]] name = "typer-0.24.1-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl" [packages.wheels.hashes] sha256 = "112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e" [[packages]] name = "typing-extensions" version = "4.15.0" [[packages.wheels]] name = "typing_extensions-4.15.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548" [[packages]] name = "typing-inspection" version = "0.4.2" [[packages.wheels]] name = "typing_inspection-0.4.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7" [[packages]] name = "urllib3" version = "2.6.3" [[packages.wheels]] name = "urllib3-2.6.3-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl" [packages.wheels.hashes] sha256 = "bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4" [[packages]] name = "uvicorn" version = "0.42.0" [[packages.wheels]] name = "uvicorn-0.42.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359" [[packages]] name = "virtualenv" version = "21.2.0" [[packages.wheels]] name = "virtualenv-21.2.0-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl" [packages.wheels.hashes] sha256 = "1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f" [[packages]] name = "watchfiles" version = "1.1.1" [[packages.wheels]] name = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" [packages.wheels.hashes] sha256 = "544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab" [[packages]] name = "websockets" version = "16.0" [[packages.wheels]] name = "websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl" url = "https://files.pythonhosted.org/packages/9d/2f/4b3ca7e106bc608744b1cdae041e005e446124bebb037b18799c2d356864/websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl" [packages.wheels.hashes] sha256 = "7d837379b647c0c4c2355c2499723f82f1635fd2c26510e1f587d89bc2199e72" [[packages]] name = "wheel-filename" version = "1.4.2" [[packages.wheels]] name = "wheel_filename-1.4.2-py3-none-any.whl" url = "https://files.pythonhosted.org/packages/b4/0f/6e97a3bc38cdde32e3ec49f8c0903fe3559ec9ec9db181782f0bb4417717/wheel_filename-1.4.2-py3-none-any.whl" [packages.wheels.hashes] sha256 = "3fa599046443d4ca830d06e3d180cd0a675d5871af0a68daa5623318bb4d17e3" ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [project] name = "pdfly" authors = [ { name = "Martin Thoma", email = "info@martin-thoma.de" }, { name = "Lucas Cimon (@Lucas-C)" }, ] maintainers = [ { name = "Martin Thoma", email = "info@martin-thoma.de" }, { name = "Lucas Cimon (@Lucas-C)" }, ] description = "A pure-python CLI application to manipulate PDF files" readme = "README.md" dynamic = ["version"] license = "BSD-3-Clause" license-files = ["LICENSE"] requires-python = ">=3.10.0" keywords = ["pdf", "cli", "tools", "compression", "metadata", "signature", "booklet"] # https://pypi.org/pypi?%3Aaction=list_classifiers classifiers = [ "Development Status :: 1 - Planning", "Environment :: Console", "Intended Audience :: Developers", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", ] dependencies = [ "pypdf[full]>=5.1.0", "typer>=0.12.4", "pillow", "pydantic", "rich", "fpdf2>=2.8.1", "asn1crypto", "cryptography", "endesive", "requests>=2.32.5", # required by endesive.signer ] [dependency-groups] dev = ["black", "check-wheel-contents", "flake8", "flake8-bugbear", "flake8-comprehensions", "flake8-isort", "flake8-simplify", "flit", "mypy", "pre-commit>=3.2.0", "pydantic", "pytest", "pytest-cov", "pytest-socket", "pytest-timeout", "rich", "ruff"] docs = ["attrs", "sphinx", "sphinx_rtd_theme", "sphinx-autobuild", "myst_parser"] # attrs is required for myst, but not automatically installed by myst [project.urls] Source = "https://github.com/py-pdf/pdfly" [project.scripts] pdfly = "pdfly.cli:entry_point" [tool.pytest.ini_options] addopts = "--disable-socket --doctest-modules --cov=. --cov-report html:tests/reports/coverage-html --cov-report term-missing --ignore=docs/ --durations=3 --timeout=30" doctest_encoding = "utf-8" testpaths = ["tests"] [tool.black] line-length = 79 [tool.isort] line_length = 79 indent = ' ' multi_line_output = 3 include_trailing_comma = true known_third_party = ["pytest", "setuptools"] [tool.ruff] line-length = 120 [tool.ruff.lint] select = ["ALL"] ignore = [ "D401", # First line of docstring should be in imperative mood - false positives "UP031", # Use format specifiers instead of percent format "D205", # 1 blank line required between summary line and description "D400", # First line should end with a period "D415", # First line should end with a period # Introduces bugs "RUF005", "DTZ001", # The use of `datetime.datetime()` without `tzinfo` is necessary # Personal preference "D212", # I want multiline-docstrings to start at the second line "D407", # google-style docstrings don't have dashses "BLE", # we want to capture Exception sometimes "COM812", # yes, they make the diff smaller "D100", # Missing docstring in public module "D105", # Missing docstring in magic method "D106", # Missing docstring in public nested class "D107", # Missing docstring in `__init__` "D203", # one-blank-line-before-class "EM", # exception messages "G004", # f-string in logging statement "RET", "S110", # `try`-`except`-`pass` detected, consider logging the exception "SIM105", # contextlib.suppress "SIM108", # don't enforce ternary operators "SIM300", # yoda conditions "TID252", # we want relative imports "TRY", # I don't know what this is about # As long as we are not on Python 3.11+ "UP006", "UP007", # for the moment, fix it later: "T201", # print "DTZ006", # datetime without timezone "SIM115", # context handler for opening files "A", # Variable is shadowing a built-in "B904", # Within an `except` clause, raise exceptions with "B905", # `zip()` without an explicit `strict=` parameter "C901", "D101", # Missing docstring in public class "D102", # Missing docstring in public method "D103", # Missing docstring in public function "D417", # Missing argument descriptions in the docstring "FBT001", # Boolean positional arg in function definition "FBT002", # Boolean default value in function definition "FBT003", # Boolean positional value in function call "PLC0415", # `import` should be at the top-level of a file "PGH", # Use specific error messages "PLR0912", # Too many branches "PLR0913", # Too many arguments to function call "PLR0915", # Too many statements "PLR2004", # Magic value "PLW", # global variables "PTH110", # `os.path.exists()` should be replaced by `Path.exists()` "PTH123", # `open()` should be replaced by `Path.open()` "S101", # Use of `assert` detected "SLF001", # Private member accessed "INP001", # File `docs/conf.py` is part of an implicit namespace package. Add an `__init__.py`. ] [tool.ruff.lint.mccabe] max-complexity = 20 # Recommended: 10 [tool.ruff.lint.per-file-ignores] "sample-files/*" = ["D100", "INP001", "FA102", "I001"] "make_release.py" = ["T201", "S603", "S607"] ================================================ FILE: renovate.json ================================================ { "commitMessagePrefix": "MAINT:", "extends": ["config:best-practices"], "labels": ["dependencies"], "osvVulnerabilityAlerts": true, "vulnerabilityAlerts": {"enabled": true} } ================================================ FILE: resources/demo2_ca.root.crt.pem ================================================ -----BEGIN CERTIFICATE----- MIIDLTCCAhWgAwIBAgIUHeQXwdDU4jyXtdItkEjDOw/SigAwDQYJKoZIhvcNAQEL BQAwHTEbMBkGA1UEAwwSQUEgVHJpU29mdCBSb290IENBMCAXDTI1MDYxMTE4Mjgw MloYDzIwNjUwNjAxMTgyODAyWjAdMRswGQYDVQQDDBJBQSBUcmlTb2Z0IFJvb3Qg Q0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCGHskGb4Gd364QhbS6 i2NmHbJf4N5LhDJPwRjDACuRqRu42fEB+MwKvAIYoS2wVihYubf/dRZFc0/4yyCH 7I1Mkh1YoQRjl3q51pKWjUjm5Ua611NDLHvkDU8ecQWj2qjHcJtV39ay3L/TIyvS tesIR+o2oOkfxzaLjkhrH08DOy5L3gvETexV7GBbmSQTaI9jvNuD9oKZs6ba1S5O 65pPEC/u3/udZgRBKd+lB/qlLk7HNuN0trwEfZLvdBC4pS9Fc0DbUcHnsNBwWFc9 VjrzzJDYHdWmZtYGg5rc7efx5+zVw26wm58caJv5ihi0An4J/I8i5I4TKoLMgcJP 2r7VAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUPkWmCmbq vZJeJaiLKy8j/la8iHEwHQYDVR0OBBYEFD5Fpgpm6r2SXiWoiysvI/5WvIhxMA4G A1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEAPc3cf1CHKSaF4BDM8UHT 4B5VMdj7uZSxsQ+IerrOi6QfMIUuesVc/h9oN9eBLoTCCQsFB7nrizwmyd2xIK9d jOuPQZexu9VhBIeJE8Fh86gG0U6IQxXw9NXW10yaW9w5RAYQqH3w+VPsaPDXnceX b0yjM1vtmV9WrMNoXWPil7vYuea0HAar80IyUKwrzEOZa8zqDz1HElC0rukVh0Yl 5PHkVptl11d81ukyKeXGP6PFt1JI31vgAEZHdykz8w7SjAu0g+QrM2LCZV915wLu OAS3ptxRmdNymk1zYHEyPt7CRdgUV1NWhE1N0RQMuf1CnXRPWZ6+Ls83xVzoO1i7 WA== -----END CERTIFICATE----- ================================================ FILE: resources/signing-certificate.crt ================================================ Bag Attributes friendlyName: fpdf2 localKeyID: C2 58 91 78 7F 3E 01 57 6E 39 AE AD CA 28 99 06 3B 55 2D F1 subject=CN = fpdf2, O = fpdf2, OU = signing testing issuer=CN = fpdf2, O = fpdf2, OU = signing testing -----BEGIN CERTIFICATE----- MIIEFzCCAv+gAwIBAgIBfzANBgkqhkiG9w0BAQsFADA6MQ4wDAYDVQQDDAVmcGRm MjEOMAwGA1UECgwFZnBkZjIxGDAWBgNVBAsMD3NpZ25pbmcgdGVzdGluZzAeFw0y NTA3MjMwNDI0NTBaFw0zNTA3MjEwNDI0NTBaMDoxDjAMBgNVBAMMBWZwZGYyMQ4w DAYDVQQKDAVmcGRmMjEYMBYGA1UECwwPc2lnbmluZyB0ZXN0aW5nMIIBIjANBgkq hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAn1/C38InT9bJPE/R5yDhLSUS6KKR2xir PYQF8Blb9LYLf3jF/2Dupl9OG5FUFHQZL2Lw2PJvrIvXi4LKfi3wM93lumvNpVl8 BFuuQKZbvV3aGXsjfLL96i4rgRd9TrnOUvYHUiyhY1Q/1f3eW7+y4+6KUTUDgXf6 awKXC9qpmv/L0BlKNl3CaSnQcc3KRSTlxNkupOiuLC0gC+Xhf5qjUZDKPjkIQZ3R fUTaVsCIUYqwzKsRkfhiizcXj3L5b/XeBDTNT6qI1xz2XN7UQ2w8Z0PExxcth3Hb TeR6KZOPPo2dIeXPB3kljoraWAxJosxr9lDhFO2t4HP8Hbj1LwXk0wIDAQABo4IB JjCCASIwHQYDVR0OBBYEFFtMIYXyJ7jtFAz3bU7d4fCPlqkJMEwGA1UdIwRFMEOh PqQ8MDoxDjAMBgNVBAMMBWZwZGYyMQ4wDAYDVQQKDAVmcGRmMjEYMBYGA1UECwwP c2lnbmluZyB0ZXN0aW5nggF/MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgeAMB0GA1Ud JQQWMBQGCCsGAQUFBwMEBggrBgEFBQcDAjAdBgNVHREEFjAUgRJzaWduZXJAZnBk ZjIubG9jYWwwXQYIKwYBBQUHAQEEUTBPMCgGCCsGAQUFBzAChhxodHRwOi8vY2Eu ZXhhbXBsZS5jb20vY2EucGVtMCMGCCsGAQUFBzABhhdodHRwOi8vb2NzcC5leGFt cGxlLmNvbTANBgkqhkiG9w0BAQsFAAOCAQEAUFuZAJ7bzp1+drypANTk1QBS476n 2ggKfDzsxNPmF5DO8anyBS6k6rMT0Ziq7Y9TzuUe6xOtJSgXswupn7AAn81p3V/q slaHsIzaNo+1wg6b7EtP3/udtDKBOwQTdz3PwA3ihLdDC4IcnGLPmwPDfBX3H2tc R3Xw64gudbinRTdrwh8nHDxsNWZ0G56Gbwm2J+Pt6l6RS+mXrWrO/PcjvVJAigBe 7u9laSU7LLQSUoWn5Yv99DYdAvVZQqUG0BgUeKXxFDEiIqNWtHUNzv3Ce8KdASlG TxFCEB+Y1Ag2S1Y1AmpKsP3RUt9SOiGjmqhHfXBIgghz2b3hoLYEAbWxSw== -----END CERTIFICATE----- ================================================ FILE: setup.cfg ================================================ [mutmut] backup = False runner = ./mutmut-test.sh tests_dir = tests/ [mypy] ignore_missing_imports = true strict = true check_untyped_defs = true disallow_any_generics = true disallow_incomplete_defs = true disallow_untyped_defs = true no_implicit_optional = true warn_unused_ignores = false show_error_codes = true [mypy-testing.*] disallow_untyped_defs = false [mypy-tests.*] disallow_untyped_defs = false [flake8] ignore = E501, E203, W503, PT007, SIM115 exclude = build/* per-file-ignores = tests/*: ASS001 ================================================ FILE: setup.py ================================================ """Package pdfly with setuptools.""" import re from setuptools import find_packages, setup VERSIONFILE = "pdfly/_version.py" with open(VERSIONFILE) as fp: verstrline = fp.read() VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]" mo = re.search(VSRE, verstrline, re.MULTILINE) if mo: verstr = mo.group(1) else: raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE)) setup( version=verstr, packages=find_packages(exclude=("tests",)), ) ================================================ FILE: tests/__init__.py ================================================ """Shared test code""" ================================================ FILE: tests/conftest.py ================================================ """Utilities and fixtures that are available automatically for all tests.""" import os from collections.abc import Iterator from pathlib import Path from typing import Union import pytest from fpdf import FPDF from pdfly.cli import entry_point try: from contextlib import chdir # type: ignore except ImportError: # Fallback when not available (< Python 3.11): from contextlib import contextmanager @contextmanager # type: ignore def chdir(dir_path: Union[str, Path]) -> Iterator[None]: """Non thread-safe context manager to change the current working directory.""" cwd = Path.cwd() os.chdir(dir_path) try: yield finally: os.chdir(cwd) TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent RESOURCES_ROOT = PROJECT_ROOT / "resources" def run_cli(args: list[str]) -> Union[None, int, str]: try: entry_point(args) return None except SystemExit as error: return error.code @pytest.fixture def two_pages_pdf_filepath(tmp_path: Path) -> Path: """A PDF with 2 pages, and a different image on each page""" # Note: prior to v2.7.9, fpdf2 produced incorrect /Resources dicts for each page (cf. fpdf2 PR #1133), # leading to an "abnormal" two_pages.pdf generated there, and for test_cat_subset_ensure_reduced_size() to fail. pdf = FPDF() pdf.add_page() pdf.image(RESOURCES_ROOT / "baleines.jpg") pdf.add_page() pdf.image(RESOURCES_ROOT / "pythonknight.png") pdf_filepath = tmp_path / "two_pages.pdf" pdf.output(pdf_filepath) return pdf_filepath @pytest.fixture def pdf_file_100(tmp_path: Path) -> Path: """A PDF with 100 pages; each has only the page index on it.""" pdf = FPDF() for i in range(100): pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text=f"{i}", new_x="LMARGIN", new_y="NEXT", align="C" ) pdf_filepath = tmp_path / "pdf_file_100.pdf" pdf.output(pdf_filepath) return pdf_filepath @pytest.fixture def pdf_file_abc(tmp_path: Path) -> Path: """A PDF with 100 pages; each has only the page index on it.""" pdf = FPDF() for char in [chr(i) for i in range(ord("a"), ord("z") + 1)]: pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text=f"{char}", new_x="LMARGIN", new_y="NEXT", align="C" ) pdf_filepath = tmp_path / "abc.pdf" pdf.output(pdf_filepath) return pdf_filepath ================================================ FILE: tests/test_booklet.py ================================================ from pathlib import Path import pytest from pypdf import PdfReader from .conftest import RESOURCES_ROOT, chdir, run_cli def test_booklet_fewer_args( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")]) assert exit_code == 2 captured = capsys.readouterr() assert "Missing" in captured.err def test_booklet_extra_args( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli( ["booklet", str(RESOURCES_ROOT / "box.pdf"), "a.pdf", "b.pdf"] ) assert exit_code == 2 captured = capsys.readouterr() assert "unexpected extra argument" in captured.err def test_booklet_page_size(tmp_path: Path) -> None: in_fname = str(RESOURCES_ROOT / "input8.pdf") with chdir(tmp_path): exit_code = run_cli( [ "booklet", in_fname, "output8.pdf", ] ) in_reader = PdfReader(in_fname) out_reader = PdfReader("output8.pdf") assert exit_code == 0 assert len(in_reader.pages) == 8 assert len(out_reader.pages) == 4 in_height = in_reader.pages[0].mediabox.height in_width = in_reader.pages[0].mediabox.width out_height = out_reader.pages[0].mediabox.height out_width = out_reader.pages[0].mediabox.width assert out_width == in_width * 2 assert in_height == out_height @pytest.mark.parametrize( ("page_count", "expected", "expected_bc"), [ ("8", "8 1\n2 7\n6 3\n4 5\n", "8 1\n2 7\n6 3\n4 5\n"), ("7", "7 1\n2\n6 3\n4 5\n", "7 1\n2 b\n6 3\n4 5\n"), ("6", "6 1\n2 5\n4 3\n\n", "6 1\n2 5\n4 3\nc\n"), ("5", "5 1\n2\n4 3\n\n", "5 1\n2 b\n4 3\nc\n"), ("4", "4 1\n2 3\n", "4 1\n2 3\n"), ("3", "3 1\n2\n", "3 1\n2 b\n"), ("2", "2 1\n\n", "2 1\nc\n"), ("1", "1\n\n", "1 b\nc\n"), ], ) def test_booklet_order( capsys: pytest.CaptureFixture, tmp_path: Path, page_count: str, expected: str, expected_bc: str, ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "cat", "-o", f"input{page_count}.pdf", str(RESOURCES_ROOT / "input8.pdf"), f":{page_count}", ] ) assert exit_code == 0 exit_code = run_cli( [ "booklet", f"input{page_count}.pdf", f"output{page_count}.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured.err exit_code = run_cli( [ "extract-text", f"output{page_count}.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured.err assert captured.out == expected exit_code = run_cli( [ "booklet", "--centerfold-file", str(RESOURCES_ROOT / "c.pdf"), "--blank-page-file", str(RESOURCES_ROOT / "b.pdf"), f"input{page_count}.pdf", f"outputbc{page_count}.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured.err exit_code = run_cli( [ "extract-text", f"outputbc{page_count}.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured.err assert captured.out == expected_bc ================================================ FILE: tests/test_cat.py ================================================ from pathlib import Path from typing import Any import pytest from pypdf import PdfReader from .conftest import RESOURCES_ROOT, chdir, run_cli def extract_embedded_images(pdf_filepath: Path) -> list[Any]: reader = PdfReader(pdf_filepath) return [page.images for page in reader.pages] def extract_text_pages(pdf_filepath: Path) -> list[str]: reader = PdfReader(pdf_filepath) return [page.extract_text() for page in reader.pages] def test_cat_incorrect_number_of_args( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")]) assert exit_code == 2 captured = capsys.readouterr() assert "Missing" in captured.err def test_cat_two_files_ok( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "cat", str(RESOURCES_ROOT / "box.pdf"), str(RESOURCES_ROOT / "jpeg.pdf"), "--output", "./out.pdf", ] ) captured = capsys.readouterr() # Assert assert exit_code == 0, captured assert not captured.err reader = PdfReader(tmp_path / "out.pdf") assert len(reader.pages) == 2 def test_cat_subset_ok(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: with chdir(tmp_path): exit_code = run_cli( [ "cat", str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"), "13:15", "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured assert not captured.err reader = PdfReader(tmp_path / "out.pdf") assert len(reader.pages) == 2 @pytest.mark.parametrize( "page_range", ["a", "-", "1-", "1-1-1", "1:1:1:1"], ) def test_cat_subset_invalid_args( capsys: pytest.CaptureFixture, tmp_path: Path, page_range: str ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "cat", str(RESOURCES_ROOT / "jpeg.pdf"), page_range, "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 2, captured assert "Error: invalid file path or page range provided" in captured.out def test_cat_subset_warn_on_missing_pages( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "cat", str(RESOURCES_ROOT / "jpeg.pdf"), "2", "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured assert "WARN" in captured.err def test_cat_subset_ensure_reduced_size( tmp_path: Path, two_pages_pdf_filepath: Path ) -> None: exit_code = run_cli( [ "cat", str(two_pages_pdf_filepath), "0", "--output", str(tmp_path / "page1.pdf"), ] ) assert exit_code == 0 # The extracted PDF should only contain ONE image: embedded_images = extract_embedded_images(tmp_path / "page1.pdf") assert len(embedded_images) == 1 exit_code = run_cli( [ "cat", str(two_pages_pdf_filepath), "1", "--output", str(tmp_path / "page2.pdf"), ] ) assert exit_code == 0 # The extracted PDF should only contain ONE image: embedded_images = extract_embedded_images(tmp_path / "page2.pdf") assert len(embedded_images) == 1 def test_cat_combine_files( pdf_file_100: Path, pdf_file_abc: Path, tmp_path: Path, capsys: pytest.CaptureFixture, ) -> None: with chdir(tmp_path): output_pdf_path = tmp_path / "out.pdf" # Run pdfly cat command exit_code = run_cli( [ "cat", str(pdf_file_100), "1:10:2", str(pdf_file_abc), "::2", str(pdf_file_abc), "1::2", "--output", str(output_pdf_path), ] ) captured = capsys.readouterr() # Check if the command was successful assert exit_code == 0, captured.out # Extract text from the original and modified PDFs extracted_pages = extract_text_pages(output_pdf_path) # Compare the extracted text assert extracted_pages == [ "1", "3", "5", "7", "9", "a", "c", "e", "g", "i", "k", "m", "o", "q", "s", "u", "w", "y", "b", "d", "f", "h", "j", "l", "n", "p", "r", "t", "v", "x", "z", ] @pytest.mark.parametrize( ("page_range", "expected"), [ ("22", ["22"]), ("0:3", ["0", "1", "2"]), (":3", ["0", "1", "2"]), (":", [str(el) for el in range(100)]), ("5:", [str(el) for el in list(range(100))[5:]]), ("::2", [str(el) for el in list(range(100))[::2]]), ("1:10:2", [str(el) for el in list(range(100))[1:10:2]]), ("::1", [str(el) for el in list(range(100))[::1]]), ("::-1", [str(el) for el in list(range(100))[::-1]]), ], ) def test_cat_commands( pdf_file_100: Path, tmp_path: Path, page_range: str, expected: list[str], ) -> None: with chdir(tmp_path): output_pdf_path = tmp_path / "out.pdf" # Run pdfly cat command exit_code = run_cli( [ "cat", str(pdf_file_100), page_range, "--output", str(output_pdf_path), ] ) # Check if the command was successful assert exit_code == 0 # Extract text from the original and modified PDFs extracted_pages = extract_text_pages(output_pdf_path) # Compare the extracted text assert extracted_pages == expected def test_cat_decrypt_with_password_ok( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: exit_code = run_cli( [ "cat", "--password=openpassword", "sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf", "--output", str(tmp_path / "out.pdf"), ] ) captured = capsys.readouterr() assert exit_code == 0, captured assert not captured.err reader = PdfReader(tmp_path / "out.pdf") assert len(reader.pages) == 1 def test_cat_decrypt_with_password_ko( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: exit_code = run_cli( [ "cat", "--password=INCORRECT", "sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf", "--output", str(tmp_path / "out.pdf"), ] ) captured = capsys.readouterr() assert exit_code == 1, captured assert "Error: the decrypting password provided is invalid" in captured.out ================================================ FILE: tests/test_check_sign.py ================================================ from pathlib import Path import pytest from fpdf import FPDF from .conftest import RESOURCES_ROOT, chdir, run_cli def test_check_sign_manipulated_content( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange pdf = FPDF() pdf.add_page() pdf.set_font("helvetica", style="B", size=16) pdf.add_text_markup_annotation( "Underline", "Hello World!", [0, 0, 0, 0, 0, 0, 0, 0] ) pdf.sign_pkcs12(str(RESOURCES_ROOT / "signing-certificate.p12"), b"fpdf2") input_pdf_bytes = pdf.output() # manipulate signed pdf - leaving length intact input_pdf_bytes = input_pdf_bytes.replace(b"Hello World!", b"aaaaa aaaaa!") input_pdf_manipulated = tmp_path / "signed_manipulated.pdf" input_pdf_manipulated.write_bytes(input_pdf_bytes) # Act with chdir(tmp_path): exit_code = run_cli( [ "check-sign", input_pdf_manipulated.name, "--pem", str(RESOURCES_ROOT / "signing-certificate.crt"), ] ) captured = capsys.readouterr() # Assert assert exit_code == 1 assert "Check failed" in captured.err assert "Content hash not ok" in captured.err def test_check_sign_missing_signature( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "check-sign", str(RESOURCES_ROOT / "input8.pdf"), "--pem", str(RESOURCES_ROOT / "signing-certificate.crt"), ] ) captured = capsys.readouterr() # Assert assert exit_code == 2 assert "Signature missing" in captured.err def test_check_sign_signature_not_matching_to_certificate( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "check-sign", str(RESOURCES_ROOT / "sign_pkcs12.pdf"), "--pem", str( RESOURCES_ROOT / "demo2_ca.root.crt.pem" ), # sign_pkcs12.pdf signature matched to signing-certificate.crt ] ) captured = capsys.readouterr() # Assert assert exit_code == 1 assert "Check failed" in captured.err assert "Certificate not ok" in captured.err def test_check_sign_pem(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "check-sign", str(RESOURCES_ROOT / "sign_pkcs12.pdf"), "--pem", str(RESOURCES_ROOT / "signing-certificate.crt"), ] ) captured = capsys.readouterr() # Assert assert exit_code == 0 assert not captured.err def test_check_sign_pdfly_signed_pdf( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange with chdir(tmp_path): exit_code = run_cli( [ "sign", str(RESOURCES_ROOT / "input8.pdf"), "-o", str(tmp_path / "input8_signed.pdf"), "--p12", str(RESOURCES_ROOT / "signing-certificate.p12"), "--p12-password", "fpdf2", ] ) captured = capsys.readouterr() # Act with chdir(tmp_path): exit_code = run_cli( [ "check-sign", str(tmp_path / "input8_signed.pdf"), "--pem", str(RESOURCES_ROOT / "signing-certificate.crt"), ] ) captured = capsys.readouterr() # Assert assert exit_code == 0 assert not captured.err ================================================ FILE: tests/test_cli.py ================================================ import sys from subprocess import check_output import pytest from pypdf import __version__ as pypdf_version from .conftest import run_cli def test_pypdf_cli_can_be_invoked_as_a_module() -> None: stdout = check_output( # noqa: S603 [sys.executable, "-m", "pdfly", "--help"] ).decode() assert "pdfly [OPTIONS] COMMAND [ARGS]..." in stdout assert ( "pdfly is a pure-python cli application for manipulating PDF files." in stdout ) def test_pypdf_cli_version(capsys: pytest.CaptureFixture) -> None: exit_code = run_cli(["--version"]) captured = capsys.readouterr() assert not captured.err assert pypdf_version in captured.out assert exit_code == 0 ================================================ FILE: tests/test_compress.py ================================================ """Tests for the `compress` command.""" from pathlib import Path import pytest from typer.testing import CliRunner from pdfly.cli import entry_point runner = CliRunner() @pytest.mark.parametrize("input_pdf_filepath", Path("resources").glob("*.pdf")) def test_compress_sample_files( input_pdf_filepath: Path, tmp_path: Path ) -> None: """Test compression on all sample PDF files.""" output_pdf_filepath = tmp_path / "compressed_output.pdf" result = runner.invoke( entry_point, ["compress", str(input_pdf_filepath), str(output_pdf_filepath)], ) assert ( result.exit_code == 0 ), f"Compression failed for {input_pdf_filepath}: {result.output}" assert ( output_pdf_filepath.exists() ), f"Output PDF {output_pdf_filepath} does not exist." # Verify output file is a valid PDF with open(output_pdf_filepath, "rb") as f: content = f.read() assert content.startswith( b"%PDF-" ), f"Output is not a valid PDF file: {output_pdf_filepath}" assert "Original Size" in result.output assert "Final Size" in result.output def test_compress_no_compression_when_larger(tmp_path: Path) -> None: """Test that compression doesn't apply when result would be larger.""" # Create a small PDF that might not compress well from fpdf import FPDF pdf = FPDF() pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text="Short text", new_x="LMARGIN", new_y="NEXT", align="C" ) input_pdf = tmp_path / "small.pdf" pdf.output(input_pdf) output_pdf = tmp_path / "compressed.pdf" result = runner.invoke( entry_point, ["compress", str(input_pdf), str(output_pdf)], ) assert result.exit_code == 0 if "No compression applied" in result.output: # If compression would make file larger, ensure original is copied assert input_pdf.stat().st_size == output_pdf.stat().st_size assert "would increase size" in result.output else: # If compression worked, ensure it's actually smaller or same size assert output_pdf.stat().st_size <= input_pdf.stat().st_size def test_compress_file_integrity(tmp_path: Path) -> None: """Test that compressed files maintain PDF integrity.""" from fpdf import FPDF pdf = FPDF() pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text="Test PDF for compression", new_x="LMARGIN", new_y="NEXT", align="C", ) pdf.cell( 200, 10, text="This is a test document.", new_x="LMARGIN", new_y="NEXT", align="L", ) pdf.add_page() pdf.cell( 200, 10, text="Second page content", new_x="LMARGIN", new_y="NEXT", align="C", ) input_pdf = tmp_path / "test.pdf" pdf.output(input_pdf) output_pdf = tmp_path / "compressed.pdf" result = runner.invoke( entry_point, ["compress", str(input_pdf), str(output_pdf)], ) assert result.exit_code == 0 from pypdf import PdfReader reader = PdfReader(str(output_pdf)) assert len(reader.pages) == 2 page1_text = reader.pages[0].extract_text() page2_text = reader.pages[1].extract_text() assert "Test PDF for compression" in page1_text assert "Second page content" in page2_text def test_compress_output_metrics(tmp_path: Path) -> None: """Test that compression metrics are properly displayed.""" from fpdf import FPDF pdf = FPDF() for _i in range(10): pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text="This is repeated text on every page " * 5, new_x="LMARGIN", new_y="NEXT", align="L", ) input_pdf = tmp_path / "repeat.pdf" pdf.output(input_pdf) output_pdf = tmp_path / "compressed.pdf" result = runner.invoke( entry_point, ["compress", str(input_pdf), str(output_pdf)], ) assert result.exit_code == 0 output_lines = result.output.strip().split("\n") assert any("Original Size" in line for line in output_lines) assert any("Final Size" in line for line in output_lines) # Extract sizes from output orig_size_line = next( line for line in output_lines if "Original Size" in line ) final_size_line = next( line for line in output_lines if "Final Size" in line ) assert ":" in orig_size_line assert ":" in final_size_line def test_compress_same_input_output_not_allowed(tmp_path: Path) -> None: """Test that input and output files cannot be the same.""" input_pdf = tmp_path / "test.pdf" # Create a simple PDF from fpdf import FPDF pdf = FPDF() pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell(200, 10, text="Test", new_x="LMARGIN", new_y="NEXT", align="C") pdf.output(input_pdf) # Try to compress to the same file (should work but might not compress) result = runner.invoke( entry_point, ["compress", str(input_pdf), str(input_pdf)], ) assert result.exit_code in [0, 1] # 0 for success, 1 for error def test_compress_preserves_metadata(tmp_path: Path) -> None: """Test that compression preserves PDF metadata.""" from fpdf import FPDF pdf = FPDF() pdf.add_page() pdf.set_font("helvetica", size=12) pdf.cell( 200, 10, text="Test document", new_x="LMARGIN", new_y="NEXT", align="C" ) # Set some metadata pdf.set_title("Test Title") pdf.set_author("Test Author") pdf.set_subject("Test Subject") input_pdf = tmp_path / "metadata.pdf" pdf.output(input_pdf) output_pdf = tmp_path / "compressed.pdf" result = runner.invoke( entry_point, ["compress", str(input_pdf), str(output_pdf)], ) assert result.exit_code == 0 from pypdf import PdfReader reader = PdfReader(str(output_pdf)) metadata = reader.metadata assert metadata is not None assert metadata.get("/Title") == "Test Title" assert metadata.get("/Author") == "Test Author" assert metadata.get("/Subject") == "Test Subject" ================================================ FILE: tests/test_extract_annotated_pages.py ================================================ from pathlib import Path import pytest from .conftest import RESOURCES_ROOT, chdir, run_cli def test_extract_annotated_pages_input8( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): run_cli( [ "extract-annotated-pages", str(RESOURCES_ROOT / "input8.pdf"), ] ) captured = capsys.readouterr() assert not captured.err assert "Extracted 1 pages with annotations" in captured.out ================================================ FILE: tests/test_extract_images.py ================================================ from pathlib import Path import pytest from .conftest import RESOURCES_ROOT, chdir, run_cli def test_extract_images_jpg_png( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): run_cli( [ "extract-images", str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"), ] ) captured = capsys.readouterr() assert not captured.err assert "Extracted 3 images" in captured.out def test_extract_images_monochrome( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # There used to be a bug for this case: https://github.com/py-pdf/pypdf/issues/2176 with chdir(tmp_path): run_cli(["extract-images", str(RESOURCES_ROOT / "box.pdf")]) captured = capsys.readouterr() assert not captured.err assert "Extracted 1 images" in captured.out ================================================ FILE: tests/test_pagemeta.py ================================================ import json from pathlib import Path import pytest from .conftest import RESOURCES_ROOT, chdir, run_cli def test_pagemeta_json(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: with chdir(tmp_path): run_cli( ["pagemeta", str(RESOURCES_ROOT / "box.pdf"), "0", "-o", "json"] ) captured = capsys.readouterr() assert not captured.err page_metadata = json.loads(captured.out) assert page_metadata["mediabox"] == [0.0, 0.0, 60.0, 60.0] assert page_metadata["cropbox"] == [0.0, 0.0, 60.0, 60.0] assert page_metadata["artbox"] == [0.0, 0.0, 60.0, 60.0] assert page_metadata["bleedbox"] == [0.0, 0.0, 60.0, 60.0] def test_pagemeta_text_with_known_format( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): run_cli(["pagemeta", str(RESOURCES_ROOT / "c.pdf"), "0"]) captured = capsys.readouterr() assert not captured.err assert "(Letter)" in captured.out def test_pagemeta_text_with_close_format( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): run_cli(["pagemeta", str(RESOURCES_ROOT / "jpeg.pdf"), "0"]) captured = capsys.readouterr() assert not captured.err assert "close to format: A4" in captured.out ================================================ FILE: tests/test_rm.py ================================================ """Tests for the `rm` command.""" from pathlib import Path import pytest from _pytest.capture import CaptureFixture from pypdf import PdfReader from .conftest import RESOURCES_ROOT, chdir, run_cli from .test_cat import extract_embedded_images def test_rm_incorrect_number_of_args( capsys: CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli(["rm", str(RESOURCES_ROOT / "box.pdf")]) assert exit_code == 2 captured = capsys.readouterr() assert "Missing" in captured.err def test_rm_subset_ok(capsys: CaptureFixture, tmp_path: Path) -> None: with chdir(tmp_path): exit_code = run_cli( [ "rm", str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"), "13:15", "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured assert not captured.err inp_reader = PdfReader( RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf" ) out_reader = PdfReader(tmp_path / "out.pdf") assert len(out_reader.pages) == len(inp_reader.pages) - 2 @pytest.mark.parametrize( "page_range", ["a", "-", "1-", "1-1-1", "1:1:1:1"], ) def test_rm_subset_invalid_args( capsys: CaptureFixture, tmp_path: Path, page_range: str ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "rm", str(RESOURCES_ROOT / "jpeg.pdf"), page_range, "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 2, captured assert "Error: invalid file path or page range provided" in captured.out def test_rm_subset_warn_on_missing_pages( capsys: CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "rm", str(RESOURCES_ROOT / "jpeg.pdf"), "2", "--output", "./out.pdf", ] ) captured = capsys.readouterr() assert exit_code == 0, captured assert "WARN" in captured.err def test_rm_subset_ensure_reduced_size( tmp_path: Path, two_pages_pdf_filepath: Path ) -> None: exit_code = run_cli( [ "rm", str(two_pages_pdf_filepath), "0", "--output", str(tmp_path / "page1.pdf"), ] ) assert exit_code == 0 # The extracted PDF should only contain ONE image: embedded_images = extract_embedded_images(tmp_path / "page1.pdf") assert len(embedded_images) == 1 exit_code = run_cli( [ "rm", str(two_pages_pdf_filepath), "1", "--output", str(tmp_path / "page2.pdf"), ] ) assert exit_code == 0 # The extracted PDF should only contain ONE image: embedded_images = extract_embedded_images(tmp_path / "page2.pdf") assert len(embedded_images) == 1 def test_rm_combine_files( pdf_file_100: Path, pdf_file_abc: Path, tmp_path: Path, capsys: CaptureFixture, ) -> None: with chdir(tmp_path): output_pdf_path = tmp_path / "out.pdf" # Run pdfly rm command exit_code = run_cli( [ "rm", str(pdf_file_100), "1:10:2", str(pdf_file_abc), "::2", str(pdf_file_abc), "1::2", "--output", str(output_pdf_path), ] ) captured = capsys.readouterr() # Check if the command was successful assert exit_code == 0, captured.out # Extract text from the original and modified PDFs extracted_pages = [] reader = PdfReader(output_pdf_path) extracted_pages = [page.extract_text() for page in reader.pages] # Compare the extracted text l1 = [str(el) for el in list(range(0, 10, 2)) + list(range(10, 100))] assert extracted_pages == l1 + [ "b", "d", "f", "h", "j", "l", "n", "p", "r", "t", "v", "x", "z", "a", "c", "e", "g", "i", "k", "m", "o", "q", "s", "u", "w", "y", ] @pytest.mark.parametrize( ("page_range", "expected"), [ ("22", [str(el) for el in range(100) if el != 22]), ("0:3", [str(el) for el in range(3, 100)]), (":3", [str(el) for el in range(3, 100)]), (":", []), ("5:", ["0", "1", "2", "3", "4"]), ("::2", [str(el) for el in list(range(100))[1::2]]), ( "1:10:2", [str(el) for el in list(range(0, 10, 2)) + list(range(10, 100))], ), ("::1", []), ("::-1", []), ], ) def test_rm_commands( pdf_file_100: Path, tmp_path: Path, page_range: str, expected: list[str], ) -> None: with chdir(tmp_path): output_pdf_path = tmp_path / "out.pdf" # Run pdfly rm command exit_code = run_cli( [ "rm", str(pdf_file_100), page_range, "--output", str(output_pdf_path), ] ) # Check if the command was successful assert exit_code == 0 # Extract text from the original and modified PDFs extracted_pages = [] reader = PdfReader(output_pdf_path) extracted_pages = [page.extract_text() for page in reader.pages] # Compare the extracted text assert extracted_pages == expected ================================================ FILE: tests/test_rotate.py ================================================ from pathlib import Path import pytest from pypdf import PdfReader from .conftest import RESOURCES_ROOT, chdir, run_cli def test_rotate_fewer_args( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "rotate", ] ) assert exit_code == 2 captured = capsys.readouterr() assert "Missing argument" in captured.err def test_rotate_extra_args( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: with chdir(tmp_path): exit_code = run_cli( [ "rotate", "-o", "/dev/null", str(RESOURCES_ROOT / "box.pdf"), "37", "extra 1", "extra 2", ] ) assert exit_code == 2 captured = capsys.readouterr() assert "unexpected extra argument" in captured.err def get_page_rotations(fname: str) -> list[int]: reader = PdfReader(fname) return [page.rotation for page in reader.pages] def diff_rotations( in_: list[int], out: list[int], degrees: int = 0 ) -> list[int]: diffs = [] for orig, rotated in zip(in_, out): diffs.append(rotated - (orig + degrees)) return diffs def test_rotate_default(tmp_path: Path) -> None: in_fname = str(RESOURCES_ROOT / "input8.pdf") out_fname = "output8.pdf" degrees = 90 with chdir(tmp_path): print(f"{tmp_path=}") exit_code = run_cli( [ "rotate", "-o", out_fname, in_fname, str(degrees), ] ) in_rotations = get_page_rotations(in_fname) out_rotations = get_page_rotations(out_fname) assert exit_code == 0 assert not any(diff_rotations(in_rotations, out_rotations, degrees)) @pytest.mark.parametrize( # NB "slice" can not be specified as the empty string ("degrees", "slice", "expected_diff"), [ (90, ":", [90, 90, 90, 90, 90, 90, 90, 90]), # every page (90, "::2", [90, 0, 90, 0, 90, 0, 90, 0]), # every other, even index (90, "1::2", [0, 90, 0, 90, 0, 90, 0, 90]), # every other, odd index (90, ":2", [90, 90, 0, 0, 0, 0, 0, 0]), # first 2 ( -90, ":", [-90, -90, -90, -90, -90, -90, -90, -90], ), # negative degrees works ( -720, ":", [-720, -720, -720, -720, -720, -720, -720, -720], ), # |degrees| > 360 is also supported ], ) def test_rotate_slices( capsys: pytest.CaptureFixture, tmp_path: Path, degrees: int, slice: str, expected_diff: list[int], ) -> None: in_fname = str(RESOURCES_ROOT / "input8.pdf") out_fname = "output.pdf" with chdir(tmp_path): args = [ "rotate", "-o", f"{out_fname}", f"{in_fname}", "--", # end options, so negative degree values work f"{degrees}", f"{slice}", ] exit_code = run_cli(args) captured = capsys.readouterr() assert exit_code == 0, captured.err in_rotations = get_page_rotations(in_fname) out_rotations = get_page_rotations(out_fname) actual_diff = diff_rotations(in_rotations, out_rotations) assert not any(diff_rotations(actual_diff, expected_diff)) ================================================ FILE: tests/test_sign.py ================================================ from pathlib import Path import pytest from endesive import pdf from .conftest import RESOURCES_ROOT, chdir, run_cli def test_sign_missing_certificate_key_option( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Act with chdir(tmp_path): exit_code = run_cli( ["sign", str(RESOURCES_ROOT / "input8.pdf"), "-o", "out.pdf"] ) captured = capsys.readouterr() # Assert assert exit_code == 2 assert "Missing option" in captured.err def test_sign_already_signed_pdf( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "sign", str(RESOURCES_ROOT / "sign_pkcs12.pdf"), "-o", "out.pdf", "--p12", str(RESOURCES_ROOT / "signing-certificate.p12"), "--p12-password", "fpdf2", ] ) captured = capsys.readouterr() # Assert assert exit_code == 2 assert "already signed" in captured.err def test_sign_pkcs12(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: # Act with chdir(tmp_path): exit_code = run_cli( [ "sign", str(RESOURCES_ROOT / "input8.pdf"), "-o", "out.pdf", "--p12", str(RESOURCES_ROOT / "signing-certificate.p12"), "--p12-password", "fpdf2", ] ) captured = capsys.readouterr() # Assert assert exit_code == 0 assert not captured.err outpdf = tmp_path / "out.pdf" certificate = RESOURCES_ROOT / "signing-certificate.crt" results = pdf.verify(outpdf.read_bytes(), [certificate.read_bytes()]) for hash_ok, signature_ok, cert_ok in results: assert signature_ok assert hash_ok assert cert_ok def test_sign_pkcs12_in_place( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange input8pdf = RESOURCES_ROOT / "input8.pdf" outpdf = tmp_path / "out.pdf" outpdf.write_bytes(input8pdf.read_bytes()) # Act with chdir(tmp_path): exit_code = run_cli( [ "sign", "out.pdf", "--in-place", "--p12", str(RESOURCES_ROOT / "signing-certificate.p12"), "--p12-password", "fpdf2", ] ) captured = capsys.readouterr() # Assert assert exit_code == 0 assert not captured.err certificate = RESOURCES_ROOT / "signing-certificate.crt" results = pdf.verify(outpdf.read_bytes(), [certificate.read_bytes()]) for hash_ok, signature_ok, cert_ok in results: assert signature_ok assert hash_ok assert cert_ok ================================================ FILE: tests/test_uncompress.py ================================================ """Tests for the `uncompress` command.""" from pathlib import Path import pytest from pypdf import PdfReader from typer.testing import CliRunner from pdfly.cli import entry_point runner = CliRunner() @pytest.mark.parametrize( "input_pdf_filepath", Path("sample-files").glob("*.pdf") ) def test_uncompress_all_sample_files( input_pdf_filepath: Path, tmp_path: Path ) -> None: output_pdf_filepath = tmp_path / "uncompressed_output.pdf" result = runner.invoke( entry_point, ["uncompress", str(input_pdf_filepath), str(output_pdf_filepath)], ) assert ( result.exit_code == 0 ), f"Error in uncompressing {input_pdf_filepath}: {result.output}" assert ( output_pdf_filepath.exists() ), f"Output PDF {output_pdf_filepath} does not exist." reader = PdfReader(str(output_pdf_filepath)) for page in reader.pages: contents = page.get("/Contents") if contents: assert ( "/Filter" not in contents ), "Content stream is still compressed" ================================================ FILE: tests/test_up2.py ================================================ import os.path from pathlib import Path import pytest from pypdf import PdfReader from .conftest import RESOURCES_ROOT, chdir, run_cli def test_up2_fewer_args(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: with chdir(tmp_path): exit_code = run_cli(["2-up", str(RESOURCES_ROOT / "box.pdf")]) assert exit_code == 2 captured = capsys.readouterr() assert "Missing argument" in captured.err def test_up2_extra_args(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: with chdir(tmp_path): exit_code = run_cli( [ "2-up", str(RESOURCES_ROOT / "box.pdf"), "./out.pdf", "./out2.pdf", ] ) assert exit_code == 2 captured = capsys.readouterr() assert "unexpected extra argument" in captured.err with chdir(tmp_path): assert not os.path.exists("out.pdf"), "'out.pdf' should not exist." assert not os.path.exists("out2.pdf"), "'out2.pdf' should not exist." def test_up2_8page_file(capsys: pytest.CaptureFixture, tmp_path: Path) -> None: pdf_file = str(RESOURCES_ROOT / "input8.pdf") out_file_name = "out.pdf" in_reader = PdfReader(pdf_file) assert len(in_reader.pages) == 8 in_height = in_reader.pages[0].mediabox.height in_width = in_reader.pages[0].mediabox.width # Act with chdir(tmp_path): exit_code = run_cli( [ "2-up", pdf_file, out_file_name, ] ) captured = capsys.readouterr() # Assert assert exit_code == 0, captured assert not captured.err out_reader = PdfReader(tmp_path / out_file_name) assert len(out_reader.pages) == 4 out_width = out_reader.pages[0].mediabox.width out_height = out_reader.pages[0].mediabox.height assert out_width == 2 * in_width # PR #78 assert out_height == in_height # Fix issue #218 def test_up2_odd_page_number( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: pdf_file = "sample-files/026-latex-multicolumn/multicolumn.pdf" out_file_path = tmp_path / "out.pdf" # Ensure original page number is odd: in_reader = PdfReader(pdf_file) assert len(in_reader.pages) % 2 == 1 # Act exit_code = run_cli( [ "2-up", pdf_file, str(out_file_path), ] ) captured = capsys.readouterr() # Assert assert exit_code == 0, captured assert not captured.err out_reader = PdfReader(out_file_path) assert len(out_reader.pages) == (len(in_reader.pages) + 1) / 2 ================================================ FILE: tests/test_update_offsets.py ================================================ """ Every CLI command is called here with a typer CliRunner. Here should only be end-to-end tests. """ import re import sys from pathlib import Path import pytest from .conftest import RESOURCES_ROOT, run_cli @pytest.mark.skipif(sys.platform == "win32", reason="Does not run on windows") def test_update_offsets(capsys: pytest.CaptureFixture) -> None: # Arrange input = RESOURCES_ROOT / "file-with-invalid-offsets.pdf" file_expected = str(RESOURCES_ROOT / "file-with-fixed-offsets.pdf") # Act exit_code = run_cli( [ "update-offsets", str(input), ] ) # Assert captured = capsys.readouterr() assert exit_code == 0, captured assert not captured.err assert re.search(r"Wrote\s+" + re.escape(str(input)), captured.out) with open(file_expected, encoding="iso-8859-1") as file_exp: lines_exp = file_exp.readlines() with input.open(encoding="iso-8859-1") as file_act: lines_act = file_act.readlines() assert len(lines_exp) == len( lines_act ), f"lines_exp=f{lines_exp}, lines_act=f{lines_act}" for line_no, (line_exp, line_act) in enumerate( zip(lines_exp, lines_act), start=1 ): assert line_exp == line_act, f"Lines differ in line {line_no}" # The current implementation doesn't support valid PDF lines as "/Length 5470>> stream". @pytest.mark.parametrize( "input_pdf_filepath", [ "sample-files/002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf", "sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf", "sample-files/007-imagemagick-images/imagemagick-ASCII85Decode.pdf", "sample-files/007-imagemagick-images/imagemagick-CCITTFaxDecode.pdf", "sample-files/007-imagemagick-images/imagemagick-images.pdf", "sample-files/007-imagemagick-images/imagemagick-lzw.pdf", "sample-files/008-reportlab-inline-image/inline-image.pdf", "sample-files/009-pdflatex-geotopo/GeoTopo-komprimiert.pdf", # "sample-files/011-google-doc-document/google-doc-document.pdf", # stream token in line after /Length "sample-files/012-libreoffice-form/libreoffice-form.pdf", "sample-files/013-reportlab-overlay/reportlab-overlay.pdf", "sample-files/015-arabic/habibi-oneline-cmap.pdf", "sample-files/015-arabic/habibi-rotated.pdf", "sample-files/015-arabic/habibi.pdf", "sample-files/016-libre-office-link/libre-office-link.pdf", # "sample-files/017-unreadable-meta-data/unreadablemetadata.pdf", # stream in line after object "sample-files/018-base64-image/base64image.pdf", # "sample-files/019-grayscale-image/grayscale-image.pdf", # stream in line after object "sample-files/020-xmp/output_with_metadata_pymupdf.pdf", # "sample-files/021-pdfa/crazyones-pdfa.pdf", # stream in line is after dictionary "sample-files/022-pdfkit/pdfkit.pdf", "sample-files/023-cmyk-image/cmyk-image.pdf", "sample-files/024-annotations/annotated_pdf.pdf", "sample-files/025-attachment/with-attachment.pdf", ], ) def test_update_offsets_on_all_reference_files( capsys: pytest.CaptureFixture, tmp_path: Path, input_pdf_filepath: Path ) -> None: # Arrange output_pdf_filepath = tmp_path / "out.pdf" # Act exit_code = run_cli( [ "update-offsets", "--encoding", "iso-8859-1", str(input_pdf_filepath), "-o", str(output_pdf_filepath), ] ) # Assert captured = capsys.readouterr() assert exit_code == 0, captured assert not captured.err assert f"Wrote {output_pdf_filepath}" in captured.out assert output_pdf_filepath.exists() ================================================ FILE: tests/test_x2pdf.py ================================================ """ Every CLI command is called here with a typer CliRunner. Here should only be end-to-end tests. """ from pathlib import Path import pytest from .conftest import run_cli def test_x2pdf_succeed_to_convert_jpg( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange output = tmp_path / "out.pdf" # Act exit_code = run_cli( [ "x2pdf", "sample-files/003-pdflatex-image/page-0-Im1.jpg", "--output", str(output), ] ) # Assert captured = capsys.readouterr() assert exit_code == 0, captured assert captured.out == "" assert output.exists() def test_x2pdf_succeed_to_embed_pdfs( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange output = tmp_path / "out.pdf" # Act exit_code = run_cli( [ "x2pdf", "sample-files/001-trivial/minimal-document.pdf", "sample-files/002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf", "--output", str(output), ] ) # Assert captured = capsys.readouterr() assert exit_code == 0, captured assert captured.out == "" assert output.exists() def test_x2pdf_fail_to_open_file( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange & Act exit_code = run_cli( [ "x2pdf", "NonExistingFile", "--output", str(tmp_path / "out.pdf"), ] ) # Assert captured = capsys.readouterr() assert exit_code == 1, captured assert "No such file or directory" in captured.out def test_x2pdf_fail_to_convert( capsys: pytest.CaptureFixture, tmp_path: Path ) -> None: # Arrange & Act exit_code = run_cli( [ "x2pdf", "README.md", "--output", str(tmp_path / "out.pdf"), ] ) # Assert captured = capsys.readouterr() assert exit_code == 1, captured assert "Error: Could not convert 'README.md' to a PDF" in captured.out