Repository: py-pdf/pdfly
Branch: main
Commit: 897420ec65b3
Files: 97
Total size: 263.1 KB
Directory structure:
gitextract_w5t8afxl/
├── .all-contributorsrc
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ ├── feature_request.md
│ │ └── question.md
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ ├── scripts/
│ │ └── check_pr_title.py
│ └── workflows/
│ ├── check-gitignored-files.yml
│ ├── create-github-release.yaml
│ ├── github-ci.yaml
│ ├── publish-to-pypi.yaml
│ ├── release.yaml
│ └── title-check.yml
├── .gitignore
├── .gitmodules
├── .isort.cfg
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .typos.toml
├── CHANGELOG.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── dependabot.yml
├── docs/
│ ├── Makefile
│ ├── conf.py
│ ├── dev/
│ │ ├── intro.md
│ │ └── testing.md
│ ├── index.rst
│ ├── make.bat
│ ├── meta/
│ │ └── project-governance.md
│ └── user/
│ ├── installation.md
│ ├── subcommand-2-up.md
│ ├── subcommand-booklet.md
│ ├── subcommand-cat.md
│ ├── subcommand-check-sign.md
│ ├── subcommand-compress.md
│ ├── subcommand-extract-annotated-pages.md
│ ├── subcommand-extract-images.md
│ ├── subcommand-extract-text.md
│ ├── subcommand-meta.md
│ ├── subcommand-pagemeta.md
│ ├── subcommand-rm.md
│ ├── subcommand-rotate.md
│ ├── subcommand-sign.md
│ ├── subcommand-uncompress.md
│ ├── subcommand-update-offsets.md
│ └── subcommand-x2pdf.md
├── make_release.py
├── mypy.ini
├── pdfly/
│ ├── __init__.py
│ ├── __main__.py
│ ├── _utils.py
│ ├── _version.py
│ ├── booklet.py
│ ├── cat.py
│ ├── check_sign.py
│ ├── cli.py
│ ├── compress.py
│ ├── extract_annotated_pages.py
│ ├── extract_images.py
│ ├── metadata.py
│ ├── pagemeta.py
│ ├── rm.py
│ ├── rotate.py
│ ├── sign.py
│ ├── uncompress.py
│ ├── up2.py
│ ├── update_offsets.py
│ └── x2pdf.py
├── pylock.toml
├── pyproject.toml
├── renovate.json
├── resources/
│ ├── demo2_ca.root.crt.pem
│ ├── signing-certificate.crt
│ └── signing-certificate.p12
├── setup.cfg
├── setup.py
└── tests/
├── __init__.py
├── conftest.py
├── test_booklet.py
├── test_cat.py
├── test_check_sign.py
├── test_cli.py
├── test_compress.py
├── test_extract_annotated_pages.py
├── test_extract_images.py
├── test_pagemeta.py
├── test_rm.py
├── test_rotate.py
├── test_sign.py
├── test_uncompress.py
├── test_up2.py
├── test_update_offsets.py
└── test_x2pdf.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .all-contributorsrc
================================================
{
"projectName": "pdfly",
"projectOwner": "py-pdf",
"repoType": "github",
"repoHost": "https://github.com",
"files": [
"README.md"
],
"imageSize": 100,
"commit": true,
"commitConvention": "eslint",
"contributors": [
{
"login": "MartinThoma",
"name": "Martin Thoma",
"avatar_url": "https://avatars.githubusercontent.com/u/1658117?v=4",
"profile": "http://martin-thoma.com/",
"contributions": [
"code",
"doc",
"ideas",
"infra",
"maintenance",
"projectManagement",
"tutorial"
]
},
{
"login": "Lucas-C",
"name": "Lucas Cimon",
"avatar_url": "https://avatars.githubusercontent.com/u/925560?v=4",
"profile": "https://chezsoi.org/lucas/blog/",
"contributions": [
"bug",
"code",
"doc",
"maintenance"
]
},
{
"login": "pastor-robert",
"name": "Rob Adams",
"avatar_url": "https://avatars.githubusercontent.com/u/35646090?v=4",
"profile": "https://github.com/pastor-robert",
"contributions": [
"code"
]
},
{
"login": "Kaos599",
"name": "Harsh ",
"avatar_url": "https://avatars.githubusercontent.com/u/115716485?v=4",
"profile": "https://github.com/Kaos599",
"contributions": [
"code"
]
},
{
"login": "srogmann",
"name": "Sascha Rogmann",
"avatar_url": "https://avatars.githubusercontent.com/u/59577610?v=4",
"profile": "https://github.com/srogmann",
"contributions": [
"code"
]
},
{
"login": "ebotiab",
"name": "Enrique Botía",
"avatar_url": "https://avatars.githubusercontent.com/u/62219950?v=4",
"profile": "https://github.com/ebotiab",
"contributions": [
"code"
]
},
{
"login": "kommade",
"name": "kommade",
"avatar_url": "https://avatars.githubusercontent.com/u/99523586?v=4",
"profile": "https://github.com/kommade",
"contributions": [
"code"
]
},
{
"login": "Zingzy",
"name": "Zingzy",
"avatar_url": "https://avatars.githubusercontent.com/u/90309290?v=4",
"profile": "https://spoo.me/",
"contributions": [
"code"
]
},
{
"login": "wolfram77",
"name": "Subhajit Sahu",
"avatar_url": "https://avatars.githubusercontent.com/u/3179612?v=4",
"profile": "https://wolfram77.github.io",
"contributions": [
"code"
]
},
{
"login": "kianmeng",
"name": "Kian-Meng Ang",
"avatar_url": "https://avatars.githubusercontent.com/u/134518?v=4",
"profile": "https://www.kianmeng.org",
"contributions": [
"ideas"
]
},
{
"login": "hwine",
"name": "Hal Wine",
"avatar_url": "https://avatars.githubusercontent.com/u/132412?v=4",
"profile": "https://github.com/hwine",
"contributions": [
"bug",
"code"
]
},
{
"login": "philippesamuel",
"name": "philippesamuel",
"avatar_url": "https://avatars.githubusercontent.com/u/32560769?v=4",
"profile": "https://github.com/philippesamuel",
"contributions": [
"doc"
]
},
{
"login": "marcobrb",
"name": "marcobrb",
"avatar_url": "https://avatars.githubusercontent.com/u/219329309?v=4",
"profile": "https://github.com/marcobrb",
"contributions": [
"doc"
]
},
{
"login": "moormaster",
"name": "moormaster",
"avatar_url": "https://avatars.githubusercontent.com/u/2452695?v=4",
"profile": "https://github.com/moormaster",
"contributions": [
"doc",
"code"
]
},
{
"login": "geoffbeier",
"name": "Geoff Beier",
"avatar_url": "https://avatars.githubusercontent.com/u/133355?v=4",
"profile": "https://geoff.tuxpup.com/",
"contributions": [
"code"
]
},
{
"login": "georgthegreat",
"name": "Yuriy Chernyshov",
"avatar_url": "https://avatars.githubusercontent.com/u/1121500?v=4",
"profile": "https://leftparagraphs.com",
"contributions": [
"ideas",
"code"
]
},
{
"login": "lkintact",
"name": "lkintact",
"avatar_url": "https://avatars.githubusercontent.com/u/24726299?v=4",
"profile": "https://github.com/lkintact",
"contributions": [
"bug"
]
}
],
"contributorsPerLine": 5,
"skipCi": false,
"commitType": "docs"
}
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Report some unexpected behaviour to help us improve
title: ''
labels: bug
assignees: ''
---
Describe the bug
**Error details**
If an exception is raised, it is very important that you provide the full error message.
Otherwise members of the `pdfly` community won't be able to help you with your problem.
**Environment**
Please provide the following information:
* **Operating System**: Windows, Mac OSX, Linux flavour...
* **Python version**: you can get this information with `python --version`
* **`pdfly` version used**: if you installed it with `pip`, you can get this information in `pip freeze` output
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
# Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
blank_issues_enabled: false
contact_links:
- name: 💬 Start a discussion
url: https://github.com/py-pdf/pdfly/discussions/new
about: Informal discussion about the project organization, considerations that do not expect a definitive answer, etc.
# - name: Security issue
# url: security@...
# about: Do not report security issues publicly. Email our security contact.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: ''
---
**Please explain your intent**
Describe what you want to achieve.
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
Please also mention any alternative solutions or features you've considered.
**Additional context**
Add any other context, code snippet or screenshots about the feature request.
You can also mention if you are willing to contribute a PR yourself to provide this feature.
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: I have a question
about: Anything that is not a bug report or a feature request
title: ''
labels: question
assignees: ''
---
================================================
FILE: .github/dependabot.yml
================================================
# Set update schedule for GitHub Actions
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
================================================
FILE: .github/pull_request_template.md
================================================
e.g. Fixes #0
**Checklist**:
- [ ] A unit test is covering the code added / modified by this PR
- [ ] In case of a new feature, docstrings have been added, with also some documentation in the `docs/` folder
- [ ] A mention of the change is present in `CHANGELOG.md`
- [ ] This PR is ready to be merged
By submitting this pull request, I confirm that my contribution is made under the terms of the [BSD 3-Clause license](https://github.com/py-pdf/pdfly/blob/master/LICENSE).
================================================
FILE: .github/scripts/check_pr_title.py
================================================
"""Check that all PR titles follow the desired scheme."""
import os
import sys
KNOWN_PREFIXES = (
"SEC: ",
"BUG: ",
"ENH: ",
"DEP: ",
"PI: ",
"ROB: ",
"DOC: ",
"Docs: ", # MRs from Dependabot
"TST: ",
"DEV: ",
"STY: ",
"MAINT: ",
"REL: ",
)
PR_TITLE = os.getenv("PR_TITLE", "")
if (
not PR_TITLE.startswith(KNOWN_PREFIXES)
or not PR_TITLE.split(": ", maxsplit=1)[1]
):
sys.stderr.write(
f"The PR title '{PR_TITLE}' does not follow the projects naming scheme: "
"https://pdfly.readthedocs.io/en/latest/dev/intro.html#commit-messages\n",
)
sys.stderr.write(
"If you do not know which one to choose or if multiple apply, make a best guess. "
"Nobody will complain if it does not quite fit :-)\n",
)
sys.exit(1)
else:
sys.stdout.write(f"PR title '{PR_TITLE}' appears to be valid.\n")
================================================
FILE: .github/workflows/check-gitignored-files.yml
================================================
name: Check for Gitignored Files
on:
push:
branches:
- '**' # Run on all branches
pull_request:
jobs:
check-gitignored-files:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check for gitignored files in commit
run: |
# List all files in the commit
git diff --name-only --cached > committed_files.txt
# Check if any of the committed files are ignored by .gitignore
git check-ignore -v $(cat committed_files.txt) > ignored_files.txt || true
# Fail if there are any ignored files
if [[ -s ignored_files.txt ]]; then
echo "The following files are gitignored but committed:"
cat ignored_files.txt
exit 1
fi
================================================
FILE: .github/workflows/create-github-release.yaml
================================================
name: Create a GitHub release page
on:
push:
tags:
- '*.*.*'
workflow_dispatch:
workflow_run:
workflows: ["Create git tag"]
types:
- completed
permissions:
contents: write
jobs:
build_and_publish:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
name: Create a GitHub release page
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Prepare variables
id: prepare_variables
run: |
git fetch --tags --force
latest_tag=$(git describe --tags --abbrev=0)
echo "latest_tag=$(git describe --tags --abbrev=0)" >> "$GITHUB_ENV"
echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV"
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "tag_body<<$EOF" >> "$GITHUB_ENV"
git --no-pager tag -l "${latest_tag}" --format='%(contents:body)' >> "$GITHUB_ENV"
echo "$EOF" >> "$GITHUB_ENV"
- name: Create GitHub Release 🚀
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3
with:
tag_name: ${{ env.latest_tag }}
name: Version ${{ env.latest_tag }}, ${{ env.date }}
draft: false
prerelease: false
body: ${{ env.tag_body }}
================================================
FILE: .github/workflows/github-ci.yaml
================================================
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: CI
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
jobs:
tests:
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
platform: [ubuntu-latest, windows-latest, macos-latest]
name: pytest on ${{ matrix.python-version }} / ${{ matrix.platform }}
runs-on: ${{ matrix.platform }}
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: 'recursive'
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install requirements
run: pip install . --group dev
- name: Install pdfly
if: matrix.python-version != '3.8'
run: pip install .
- name: Install pdfly using the minimal versions of the dependencies
if: matrix.python-version == '3.8'
run: |
# We ensure that those minimal versions remain compatible:
sed -i '/dependencies = \[/,/\]/s/>=/==/' pyproject.toml
pip install .
- name: Run tests
run: pytest -vv
codestyle:
name: Check code with black, mypy, ruff & typos
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: 'recursive'
- name: Cache Downloaded Files
id: cache-downloaded-files
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: '**/tests/pdf_cache/*'
key: cache-downloaded-files
- name: Upgrade pip, install pdfly and its dev dependencies
run: |
python -m pip install --upgrade pip
pip install .
pip install . --group dev
- name: Lint with black
run: black --check --extend-exclude sample-files .
- name: Lint with mypy
run: mypy . --ignore-missing-imports --exclude build
- name: Test with ruff
run: ruff check pdfly/
- name: Spell Check Repo
uses: crate-ci/typos@7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2
package:
name: Build & verify package
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{env.PYTHON_LATEST}}
- name: Build package
run: |
python -m pip install flit check-wheel-contents
flit build
ls -l dist
check-wheel-contents dist/*.whl
- name: Test installing package
run: python -m pip install .
- name: Test running installed package
working-directory: /tmp
run: python -c "import pdfly;print(pdfly.__version__)"
================================================
FILE: .github/workflows/publish-to-pypi.yaml
================================================
name: Publish Python Package to PyPI
on:
push:
tags:
- '*.*.*'
workflow_dispatch:
workflow_run:
workflows: ["Create git tag"]
types:
- completed
permissions:
contents: write
jobs:
build_and_publish:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
name: Publish a new version
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: 3.x
- name: Install Flit
run: |
python -m pip install --upgrade pip
pip install flit
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Publish Package to PyPI🚀
env:
FLIT_USERNAME: '__token__'
FLIT_PASSWORD: ${{ secrets.FLIT_PASSWORD }}
run: |
flit publish
================================================
FILE: .github/workflows/release.yaml
================================================
# This action assumes that there is a REL-commit which already has a
# Markdown-formatted git tag. Hence the CHANGELOG is already adjusted
# and it's decided what should be in the release.
# This action only ensures the release is done with the proper contents
# and that it's announced with a Github release.
name: Create git tag
on:
push:
branches:
- main
permissions:
contents: write
jobs:
build_and_publish:
name: Publish a new version
runs-on: ubuntu-latest
if: "${{ startsWith(github.event.head_commit.message, 'REL: ') }}"
steps:
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Extract version from commit message
id: extract_version
run: |
VERSION=$(echo "${{ github.event.head_commit.message }}" | grep -oP '(?<=REL: )\d+\.\d+\.\d+')
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Extract tag message from commit message
id: extract_message
run: |
VERSION="${{ steps.extract_version.outputs.version }}"
delimiter="$(openssl rand -hex 8)"
MESSAGE=$(echo "${{ github.event.head_commit.message }}" | sed "0,/REL: $VERSION/s///" )
echo "message<<${delimiter}" >> $GITHUB_OUTPUT
echo "$MESSAGE" >> $GITHUB_OUTPUT
echo "${delimiter}" >> $GITHUB_OUTPUT
- name: Create Git Tag
run: |
VERSION="${{ steps.extract_version.outputs.version }}"
MESSAGE="${{ steps.extract_message.outputs.message }}"
git config user.name github-actions
git config user.email github-actions@github.com
git tag "$VERSION" -m "$MESSAGE"
git push origin $VERSION
================================================
FILE: .github/workflows/title-check.yml
================================================
name: 'PR Title Check'
on:
pull_request:
# check when PR
# * is created,
# * title is edited, and
# * new commits are added (to ensure failing title blocks merging)
types: [opened, reopened, edited, synchronize]
jobs:
title-check:
name: Title check
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check PR title
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: python .github/scripts/check_pr_title.py
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# IntelliJ
.idea
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
*.pdf
.envrc
# Documentation files copied when building:
docs/meta/CHANGELOG.md
docs/meta/CONTRIBUTORS.md
# 'make release' creates those files:
RELEASE_COMMIT_MSG.md
RELEASE_TAG_MSG.md
================================================
FILE: .gitmodules
================================================
[submodule "sample-files"]
path = sample-files
url = git@github.com:py-pdf/sample-files.git
================================================
FILE: .isort.cfg
================================================
[settings]
line_length=79
indent=' '
multi_line_output=3
length_sort=0
include_trailing_comma=True
skip=docs
known_third_party = PIL,pypdf,pydantic,setuptools,typer
================================================
FILE: .pre-commit-config.yaml
================================================
# pre-commit run --all-files
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-added-large-files
args: ['--maxkb=1000']
- id: check-ast
- id: check-case-conflict
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
exclude: "resources/.*|docs/make.bat"
- id: fix-byte-order-marker
- id: mixed-line-ending
args: ['--fix=lf']
exclude: "docs/make.bat"
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 26.3.1
hooks:
- id: black
args: [--target-version, py36]
- repo: https://github.com/asottile/blacken-docs
rev: 1.20.0
hooks:
- id: blacken-docs
additional_dependencies: [black==22.1.0]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.6
hooks:
- id: ruff
args: ['--fix']
exclude: "tests/"
- repo: https://github.com/asottile/pyupgrade
rev: v3.21.2
hooks:
- id: pyupgrade
args: [--py38-plus]
- repo: https://github.com/pycqa/flake8
rev: 7.3.0
hooks:
- id: flake8
args: ["--ignore", "E,W,F"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.19.1'
hooks:
- id: mypy
files: ^pdfly/.*
args: [--ignore-missing-imports]
additional_dependencies:
- "pydantic>=1.10.4"
================================================
FILE: .readthedocs.yaml
================================================
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.12"
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# If using Sphinx, optionally build your docs in additional formats such as PDF
formats: all
# Optionally declare the Python requirements required to build your docs
python:
install:
- method: pip
path: .
extra_requirements:
- docs
================================================
FILE: .typos.toml
================================================
[default]
extend-ignore-identifiers-re = [
"certifi",
"FlateDecode",
# This typo appears in a .tex file in the sample-files git submodule:
"exampe"
]
================================================
FILE: CHANGELOG.md
================================================
# CHANGELOG
## Version 0.6.0, not released yet
### Bug Fixes (BUG)
- `2up` incorrectly handled documents with an odd number of pages - [issue #219](https://github.com/py-pdf/pdfly/issues/218)
### New Features (ENH)
- `pagemeta` now displays the name of a known page format that is close to the page dimensions
## Version 0.5.1, 2025-10-13
### New Features (ENH)
- `extract-images`: output filenames are now formatted using four digit for page numbers, in order for output files to be ordered alphabetically
- ensured support for Python 3.14
### Bug Fixes (BUG)
- `requests` is now a dependency, to prevent a `ModuleNotFoundError` when running with `uv`
## Version 0.5.0, 2025-10-13
### New Features (ENH)
- New `extract-annotated-pages` to filter out only the user annotated pages ([PR #98](https://github.com/py-pdf/pdfly/pull/98))
- New `rotate` sub-command to rotate specified pages ([PR #128](https://github.com/py-pdf/pdfly/pull/128))
- Added optional `--password` argument to `cat` to perform decryption ([PR #61](https://github.com/py-pdf/pdfly/pull/61))
- `pagemeta` now displays known page formats when it can detect it: A3, A4, A5, Letter, Legal
- `pagemeta` now displays the rotation value.
- New `sign` sub-command to create a signed pdf from an existing pdf ([PR #165](https://github.com/py-pdf/pdfly/pull/165))
- New `check-sign` sub-command to verify the signature of a signed pdf ([PR #166](https://github.com/py-pdf/pdfly/pull/166))
### Bug Fixes (BUG)
- `pypdf[full]` is now a dependency, instead of just `pypdf`, to avoid some cases of `DependencyError`
### Deprecations (DEP)
* support for older Python3 versions has been dropped, `pdfly` now requires Python 3.10+
## Version 0.4.0, 2024-12-08
### New Features (ENH)
- New `booklet` command to adjust offsets and lengths ([PR #77](https://github.com/py-pdf/pdfly/pull/77))
- New `uncompress` command ([PR #75](https://github.com/py-pdf/pdfly/pull/75))
- New `update-offsets` command to adjust offsets and lengths ([PR #15](https://github.com/py-pdf/pdfly/pull/15))
- New `rm` command ([PR #59](https://github.com/py-pdf/pdfly/pull/59))
- `metadata`: now also displaying CreationDate, Creator, Keywords & Subject ([PR #73](https://github.com/py-pdf/pdfly/pull/73))
- Add warning for out-of-bounds page range in pdfly `cat` command ([PR #58](https://github.com/py-pdf/pdfly/pull/58))
### Bug Fixes (BUG)
- `2-up` command, that only showed one page per sheet, on the left side, with blank space on the right ([PR #78](https://github.com/py-pdf/pdfly/pull/78))
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.3...0.4.0)
## Version 0.3.3, 2024-04-14
### Developer Experience (DEV)
- Chain workflows
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.2...0.3.3)
## Version 0.3.2, 2024-04-14
### Developer Experience (DEV)
- Decouple git tag / PyPI release / Github release page (#49, #50)
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.1...0.3.2)
## Version 0.3.1, 2024-03-29
### Maintenance (MAINT)
- Update pypdf usage (#48)
### Developer Experience (DEV)
- Release via REL commit (#48)
- Fix mypy issues
- Add make_release.py
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.0...0.3.1)
## Version 0.3.0, 2023-12-17
### New Features (ENH)
- Add x2pdf command (#25)
### Bug Fixes (BUG)
- boxes are floats, not int
- Add missing fpdf2 dependency (#29)
### Documentation (DOC)
- cat command
- More examples for the cat subcommand
- Add cat subcommand
- Link to readthedocs
- Add project governance file
- Move readthedocs config file to root
- Add docs (#24)
### Developer Experience (DEV)
- Checkout sample-files in CI (#30)
- Let dependabot update Github Actions
- Add action for automatic releases
### Maintenance (MAINT)
- Update dependencies (#42)
- In the cat subcommand, replace the usage of the deprecated PdfMerger by PdfWriter (#34)
- Update .pre-commit-config.yaml
- Adjust x2pdf syntax
### Testing (TST)
- cat with two files (#41)
- Test cat command with more parameters + validate result (#40)
- Adding unit tests (#28)
### Other
- : [{'msg': 'Bump actions/setup-python from 4 to 5 (#39)', 'author': 'dependabot[bot]'}, {'msg': 'test_extract_images_monochrome() is now passing', 'author': 'CimonLucas(LCM)'}, {'msg': 'Bump actions/setup-python from 3 to 4 (#27)', 'author': 'dependabot[bot]'}, {'msg': 'Bump actions/checkout from 3 to 4 (#26)', 'author': 'dependabot[bot]'}, {'msg': 'Ensure input PDF exists for cat subcommand', 'author': 'MartinThoma'}]
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.2.14...0.3.0)
================================================
FILE: CONTRIBUTORS.md
================================================
# List of contributors
The list of contributors has been moved into the [README.md](https://github.com/py-pdf/pdfly/blob/main/README.md#contributors-).
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2022, py-pdf
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Makefile
================================================
maint:
pre-commit autoupdate
python -m pip install --upgrade .
python -m pip lock --group dev --group docs .
uv pip install -r pylock.toml
git submodule update --remote
release:
python make_release.py
git commit -eF RELEASE_COMMIT_MSG.md
upload:
make clean
flit publish
clean:
python setup.py clean --all
pyclean .
rm -rf tests/__pycache__ pdfly/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf pdfly.egg-info
lint:
mypy . --ignore-missing-imports --exclude build
ruff check --fix --unsafe-fixes
test:
pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30
================================================
FILE: README.md
================================================
[](https://pypi.org/pypi/pdfly#history)
[](https://pypi.org/project/pdfly/)
[](https://opensource.org/license/bsd-3-clause)
[](https://pdfly.readthedocs.io/en/latest/)
[](https://github.com/py-pdf/pdfly/actions?query=branch%3Amain)
[](https://github.com/py-pdf/pdfly/commits/main/)
[](https://github.com/py-pdf/pdfly/issues)
[](https://github.com/py-pdf/pdfly/pulls)
[](https://github.com/py-pdf/pdfly/actions)
[](https://makeapullrequest.com)
[](https://www.firsttimersonly.com/)
# pdfly
pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files.
## Installation
```bash
pip install -U pdfly
```
As `pdfly` is an application, you might want to install it with [`pipx`](https://pypi.org/project/pipx/) or [`uv tool`](https://docs.astral.sh/uv/concepts/tools/): `uvx pdfly --help`
## Usage
```console
$ pdfly --help
Usage: pdfly [OPTIONS] COMMAND [ARGS]...
pdfly is a pure-python cli application for manipulating PDF files.
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮
│ --version │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮
│ 2-up Create a booklet-style PDF from a single input. │
│ booklet Reorder and two-up PDF pages for booklet printing. │
│ cat Extract and concatenate pages from PDF files into a single PDF file. │
│ check-sign Verifies the signature of a signed PDF. │
│ compress Compress a PDF. │
│ extract-annotated-pages Extract only the annotated pages from a PDF. │
│ extract-images Extract images from PDF without resampling or altering. │
│ extract-text Extract text from a PDF file. │
│ meta Show metadata of a PDF file │
│ pagemeta Give details about a single page. │
│ rm Remove pages from PDF files. │
│ rotate Rotate specified pages by the specified amount │
│ sign Creates a signed PDF from an existing PDF file. │
│ uncompress Module for uncompressing PDF content streams. │
│ update-offsets Updates offsets and lengths in a simple PDF file. │
│ x2pdf Convert one or more files to PDF. Each file is a page. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
```
You can see the help of every subcommand by typing `--help`:
```console
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ───────────────────────────────────────╮
│ * pdf PATH [default: None] [required] │
│ * out PATH [default: None] [required] │
╰───────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────╯
```
**Note:** `pdfly` has nothing to do with ``pdfly.net`` or ``gopdfly.com``
## Contributors ✨
pdfly is a free software project without any company affiliation. We cannot pay
contributors, but we do value their contributions 🤗
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification
([emoji key](https://allcontributors.org/docs/en/emoji-key)).
Contributions of any kind welcome!
The list might not be complete. You can find more contributors via the git
history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pdfly/graphs/contributors).
================================================
FILE: dependabot.yml
================================================
# Doc: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "gitsubmodule"
commit-message:
prefix: "MAINT"
- package-ecosystem: "github-actions"
commit-message:
prefix: "MAINT"
- package-ecosystem: "pip"
commit-message:
prefix: "MAINT"
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/conf.py
================================================
"""
Configuration file for the Sphinx documentation builder.
This file only contains a selection of the most common options.
For a full list see the documentation:
https://www.sphinx-doc.org/en/master/usage/configuration.html
"""
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import shutil
import sys
import pdfly as py_pkg
sys.path.insert(0, os.path.abspath(".")) # noqa
sys.path.insert(0, os.path.abspath("../")) # noqa
shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md")
shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md")
# -- Project information -----------------------------------------------------
project = py_pkg.__name__
copyright = "2023, pdfly contributors"
author = "pdfly contributors"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = py_pkg.__version__
# The full version, including alpha/beta/rc tags.
release = py_pkg.__version__
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = "4.0.0"
myst_all_links_external = True
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.autosummary",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx.ext.napoleon",
# External
"myst_parser",
]
intersphinx_mapping = {
"py-pdf organization": ("https://py-pdf.github.io/", None),
}
nitpick_ignore_regex = [
# For reasons unclear at this stage the io module prefixes everything with _io
# and this confuses sphinx
(r"py:class", r"_io.(FileIO|BytesIO|Buffered(Reader|Writer))"),
]
autodoc_default_options = {
"member-order": "bysource",
"members": True,
"show-inheritance": True,
"undoc-members": True,
}
autodoc_inherit_docstrings = False
autodoc_typehints_format = "short"
python_use_unqualified_type_names = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"canonical_url": "",
"analytics_id": "",
"logo_only": True,
"display_version": True,
"prev_next_buttons_location": "bottom",
"style_external_links": False,
# Toc options
"collapse_navigation": True,
"sticky_navigation": True,
"navigation_depth": 4,
"includehidden": True,
"titles_only": False,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_logo = "pdfly-logo.png"
# -- Options for Napoleon -----------------------------------------------------
napoleon_google_docstring = True
napoleon_numpy_docstring = False # Explicitly prefer Google style docstring
napoleon_use_param = True # for type hint support
napoleon_use_rtype = (
False # False so the return type is inline with the description.
)
================================================
FILE: docs/dev/intro.md
================================================
# Developer Intro
pdfly is an application and thus non-developers
might also use it.
## Installing Requirements
```
pip install . --group dev
```
## Running Tests
See [testing pdfly with pytest](testing.md)
## Documentation
To preview the HTML documentation, you can run this command:
```
sphinx-autobuild docs docs/_build/html
```
## Tools: git and pre-commit
Git is a command line application for version control. If you don't know it,
you can [play ohmygit](https://ohmygit.org/) to learn it.
GitHub is the service where the pdfly project is hosted. While git is free and
open source, GitHub is a paid service by Microsoft, but free in a lot of
cases.
[pre-commit](https://pypi.org/project/pre-commit/) is a command line application
that uses git hooks to automatically execute code. This allows you to avoid
style issues and other code quality issues. After you entered `pre-commit install`
once in your local copy of pdfly, it will automatically be executed when
you `git commit`.
## Commit Messages
Having a clean commit message helps people to quickly understand what the commit
is about, without actually looking at the changes. The first line of the
commit message is used to [auto-generate the CHANGELOG](https://github.com/py-pdf/pdfly/blob/main/make_release.py).
For this reason, the format should be:
```
PREFIX: DESCRIPTION
BODY
```
The `PREFIX` can be:
* `SEC`: Security improvements. Typically an infinite loop that was possible.
* `BUG`: A bug was fixed. Likely there is one or multiple issues. Then write in
the `BODY`: `Closes #123` where 123 is the issue number on GitHub.
It would be absolutely amazing if you could write a regression test in those
cases. That is a test that would fail without the fix.
A bug is always an issue for pdfly users - test code or CI that was fixed is
not considered a bug here.
* `ENH`: A new feature! Describe in the body what it can be used for.
* `DEP`: A deprecation. Either marking something as "this is going to be removed"
or actually removing it.
* `PI`: A performance improvement. This could also be a reduction in the
file size of PDF files generated by pdfly.
* `ROB`: A robustness change. Dealing better with broken PDF files.
* `DOC`: A documentation change. `Docs:` is also allowed for commits made by DependaBot.
* `TST`: Adding or adjusting tests.
* `DEV`: Developer experience improvements, e.g. pre-commit or setting up CI.
* `MAINT`: Quite a lot of different stuff. Performance improvements are for sure
the most interesting changes in here. Refactorings as well.
* `STY`: A style change. Something that makes pdfly code more consistent.
Typically a small change. It could also be better error messages for
end users.
The prefix is used to generate the CHANGELOG. Every PR must have exactly one -
if you feel like several match, take the top one from this list that matches for
your PR.
## Pull Requests
Smaller Pull Requests (PRs) are preferred as it's typically easier to merge
them. For example, if you have some typos, a few code-style changes, a new
feature, and a bug-fix, that could be 3 or 4 PRs.
A PR must be complete. That means if you introduce a new feature it must be
finished within the PR and have a test for that feature.
## Releases
To perform a new release, there is the checklist to follow:
1. update `__version__` in `pdfly/_version.py` & `CHANGELOG.md` in order to specify the release date for the new version
2. perform a `REL`-prefixed commit, _e.g;_ `REL: X.Y.0"`, then make & merge a PR for it.
The Github Actions pipeline should create a new `git` tag, and then publish a new version on Pypi:
3. edit the [GitHub release note](https://github.com/py-pdf/pdfly/releases), using the `CHANGELOG.md` content for the description
================================================
FILE: docs/dev/testing.md
================================================
# Testing
pdfly uses [`pytest`](https://docs.pytest.org/en/latest/) for testing.
To run the tests you need to install the CI (Continuous Integration) dependencies by running `pip install . --group dev`.
================================================
FILE: docs/index.rst
================================================
Welcome to pdfly
================
.. image:: https://img.shields.io/pypi/v/pdfly.svg
:target: https://pypi.org/pypi/pdfly#history
.. image:: https://img.shields.io/pypi/pyversions/pdfly.svg
:target: https://pypi.org/project/pdfly/
.. image:: https://img.shields.io/badge/License-BSD%203%20Clause-blue.svg
:target: https://opensource.org/license/bsd-3-clause
.. image:: https://app.readthedocs.org/projects/pdfly/badge/?version=latest
:target: https://pdfly.readthedocs.io/en/latest/
.. image:: https://github.com/py-pdf/pdfly/workflows/CI/badge.svg
:target: https://github.com/py-pdf/pdfly/actions?query=branch%3Amain
.. image:: https://img.shields.io/github/last-commit/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/commits/main/
.. image:: https://img.shields.io/github/issues-closed/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/issues
.. image:: https://img.shields.io/github/issues-pr-closed/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/pulls
.. image:: https://img.shields.io/badge/linters-black,ruff,mypi-green.svg
:target: https://github.com/py-pdf/pdfly/actions
.. image:: https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat
:target: https://makeapullrequest.com
.. image:: https://img.shields.io/badge/first--timers--only-friendly-blue.svg
:target: https://www.firsttimersonly.com/
pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files.
.. image:: ./pdfly-logo.png
:scale: 25%
Repository: `github.com/py-pdf/pdfly `__
Installation
------------
.. code-block::
pip install -U pdfly
As ``pdfly`` is an application, you might want to install it with `pipx `__ or `uv tool `__: ``uvx pdfly --help``
Usage
-----
.. code-block::
$ pdfly --help
Usage: pdfly [OPTIONS] COMMAND [ARGS]...
pdfly is a pure-python cli application for manipulating PDF files.
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮
│ --version │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮
│ 2-up Create a booklet-style PDF from a single input. │
│ booklet Reorder and two-up PDF pages for booklet printing. │
│ cat Extract and concatenate pages from PDF files into a single PDF file. │
│ check-sign Verifies the signature of a signed PDF. │
│ compress Compress a PDF. │
│ extract-annotated-pages Extract only the annotated pages from a PDF. │
│ extract-images Extract images from PDF without resampling or altering. │
│ extract-text Extract text from a PDF file. │
│ meta Show metadata of a PDF file │
│ pagemeta Give details about a single page. │
│ rm Remove pages from PDF files. │
│ rotate Rotate specified pages by the specified amount │
│ sign Creates a signed PDF from an existing PDF file. │
│ uncompress Module for uncompressing PDF content streams. │
│ update-offsets Updates offsets and lengths in a simple PDF file. │
│ x2pdf Convert one or more files to PDF. Each file is a page. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
You can see the help of every subcommand by typing ``--help``:
.. code-block::
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ───────────────────────────────────────╮
│ * pdf PATH [default: None] [required] │
│ * out PATH [default: None] [required] │
╰───────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────╯
GitHub ⭐️
---------
.. image:: https://api.star-history.com/svg?repos=py-pdf/pdfly&type=date&legend=top-left
:target: https://www.star-history.com/#py-pdf/pdfly&type=date&legend=top-left
.. note:: ``pdfly`` has nothing to do with ``pdfly.net`` or ``gopdfly.com``
.. toctree::
:caption: User Guide
:maxdepth: 1
user/installation
user/subcommand-2-up
user/subcommand-booklet
user/subcommand-cat
user/subcommand-check-sign
user/subcommand-compress
user/subcommand-extract-annotated-pages
user/subcommand-extract-images
user/subcommand-extract-text
user/subcommand-meta
user/subcommand-pagemeta
user/subcommand-rm
user/subcommand-rotate
user/subcommand-sign
user/subcommand-uncompress
user/subcommand-update-offsets
user/subcommand-x2pdf
.. toctree::
:caption: Developer Guide
:maxdepth: 1
dev/intro
dev/testing
.. toctree::
:caption: About pdfly
:maxdepth: 1
meta/CHANGELOG
meta/CONTRIBUTORS
meta/project-governance
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
================================================
FILE: docs/meta/project-governance.md
================================================
# Project Governance
This document describes how the pdfly project is managed. It describes the
different actors, their roles, and the responsibilities they have.
`pdfly` is part of the `py-pdf` organization,
and hence we try to follow some [maintainer guidelines](https://py-pdf.github.io/pages/maintainer-guidelines.html) & [rules](https://py-pdf.github.io/pages/py-pdf-owners.html).
## Terminology
* The **project** is pdfly - a free and open-source pure-python PDF command line
tool.
It includes the [code, issues, and discussions on GitHub](https://github.com/py-pdf/pdfly),
and [the documentation on ReadTheDocs](https://pdfly.readthedocs.io/en/latest/),
[the package on PyPI](https://pypi.org/project/pdfly/).
* A **maintainer** is a person who has technical permissions to change one or
more part of the projects. It is a person who is driven to keep the project running
and improving.
* A **contributor** is a person who contributes to the project. That could be
through writing code - in the best case through forking and creating a pull
request, but that is up to the maintainer. Other contributors describe issues,
help to ask questions on existing issues to make them easier to answer,
participate in discussions, and help to improve the documentation. Contributors
are similar to maintainers, but without technical permissions.
* A **user** is a person who imports pdfly into their code. All pdfly users
are developers, but not developers who know the internals of pdfly. They only
use the public interface of pdfly. They will likely have less knowledge about
PDF than contributors.
* The **community** is all of that - the users, the contributors, and the maintainers.
## Governance, Leadership, and Steering pdfly forward
pdfly is a free and open source project.
As pdfly does not have any formal relationship with any company and no funding,
all the work done by the community are voluntary contributions. People don't
get paid, but choose to spend their free time to create software of which
many more are profiting. This has to be honored and respected.
pdfly has the **Benevolent Dictator**
governance model. The benevolent dictator is a maintainer with all technical permissions -
most importantly the permission to push new pdfly versions on PyPI.
Being benevolent, the benevolent dictator listens for decisions to the community and tries
their best to make decisions from which the overall community profits - the
current one and the potential future one. Being a dictator, the benevolent dictator always has
the power and the right to make decisions on their own - also against some
members of the community.
As pdfly is free software, parts of the community can split off (fork the code)
and create a new community. This should limit the harm a bad benevolent dictator can do.
## Project Language
The project language is (american) English. All documentation and issues must
be written in English to ensure that the community can understand it.
We appreciate the fact that large parts of the community don't have English
as their mother tongue. We try our best to understand others -
[automatic translators](https://translate.google.com/) might help.
## Expectations
The community can expect the following:
* The **benevolent dictator** tries their best to make decisions from which the overall
community profits. The benevolent dictator is aware that his/her decisions can shape the
overall community. Once the benevolent dictator notices that she/he doesn't have the time
to advance pdfly, he/she looks for a new benevolent dictator. As it is expected
that the benevolent dictator will step down at some point of their choice
(hopefully before their death), it is NOT a benevolent dictator for life
(BDFL).
* Every **maintainer** (including the benevolent dictator) is aware of their permissions and
the harm they could do. They value security and ensure that the project is
not harmed. They give their technical permissions back if they don't need them
any longer. Any long-time contributor can become a maintainer. Maintainers
can - and should! - step down from their role when they realize that they
can no longer commit that time.
* Every **contributor** is aware that the time of maintainers and the benevolent dictator is
limited. Short pull requests that briefly describe the solved issue and have
a unit test have a higher chance to get merged soon - simply because it's
easier for maintainers to see that the contribution will not harm the overall
project. Their contributions are documented in the git history and in the
public issues.
* Every **community member** uses a respectful language. We are all human, we
get upset about things we care and other things than what's visible on the
internet go on in our live. pdfly does not pay its contributors - keep all
of that in mind when you interact with others. We are here because we want to
help others.
### Issues and Discussions
An issue is any technical description that aims at bringing pdfly forward:
* Bugs tickets: Something went wrong because pdfly developers made a mistake.
* Feature requests: pdfly does not support all features of the PDF specifications.
There are certainly also convenience methods that would help users a lot.
* Robustness requests: There are many broken PDFs around. In some cases, we can
deal with that. It's kind of a mixture between a bug ticket and a feature
request.
* Performance tickets: pdfly could be faster - let us know about your specific
scenario.
Any comment that is in those technical descriptions which is not helping the
discussion can be deleted. This is especially true for "me too" comments on bugs
or "bump" comments for desired features. People can express this with 👍 / 👎
reactions.
[Discussions](https://github.com/py-pdf/pdfly/discussions) are open. No comments
will be deleted there - except if they are clearly unrelated spam or only
try to insult people (luckily, the community was very respectful so far 🤞)
### Releases
The maintainers follow [semantic versioning](https://semver.org/). Most
importantly, that means that breaking changes will have a major version bump.
Be aware that unintentional breaking changes might still happen. The `pdfly`
maintainers do their best to fix that in a timely manner - please
[report such issues](https://github.com/py-pdf/pdfly/issues)!
## People
* Martin Thoma is benevolent dictator since April 2022.
* Maintainers:
* Matthew Stamy (mstamy2) was the benevolent dictator for a long time.
He still is around on GitHub once in a while and has permissions on PyPI and GitHub.
* Matthew Peveler (MasterOdin) is a maintainer on GitHub.
================================================
FILE: docs/user/installation.md
================================================
# Installation
There are several ways to install pdfly. The most common option is to use pip.
## pip
pdfly requires Python 3.10+ to run.
Typically Python comes with `pip`, a package installer. Using it you can
install pdfly:
```bash
pip install pdfly
```
If you are not a super-user (a system administrator / root), you can also just
install pdfly for your current user:
```bash
pip install --user pdfly
```
## pipx
We recommend to install pdfly via [pipx](https://pypi.org/project/pipx/):
```bash
pipx install pdfly
```
pipx installs the pdfly application in an isolated environment. That guarantees
that no other applications interferes with its defpendencies.
## uv
pdfly can be run without persistent installation using [uv tool run](https://docs.astral.sh/uv/guides/tools/#running-tools):
```bash
uv tool run pdfly
```
via the [uvx](https://docs.astral.sh/uv/guides/tools/#running-tools) alias:
```bash
uvx pdfly
```
or it can be installed using [uv tool install](https://docs.astral.sh/uv/guides/tools/#installing-tools):
```bash
uv tool install pdfly
```
## Python Version Support
If ✓ is given, it works. It is tested via CI.
If ✖ is given, it is guaranteed not to work.
If it's not filled, we don't guarantee support, but it might still work.
| Python | 3.14 | 3.13 | 3.12 | 3.11 | 3.10 | 2.7 |
| ---------------------- | ---- | ---- | ---- | ---- | ---- | --- |
| pdfly | ✓ | ✓ | ✓ | ✓ | ✓ | ✖ |
## Development Version
In case you want to use the current version under development:
```bash
pip install git+https://github.com/py-pdf/pdfly.git
```
================================================
FILE: docs/user/subcommand-2-up.md
================================================
# 2-up
Create a booklet-style PDF from a single input.
## Usage
```
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * out PATH [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Convert `document.pdf` into a booklet and write the output in `booklet.pdf`.
```
pdfly 2-up document.pdf booklet.pdf
```
================================================
FILE: docs/user/subcommand-booklet.md
================================================
# booklet
Reorder and two-up PDF pages for booklet printing.
## Usage
```
$ pdfly booklet --help
Usage: pdfly booklet [OPTIONS] FILENAME OUTPUT
Reorder and two-up PDF pages for booklet printing.
If the number of pages is not a multiple of four, pages are
added until it is a multiple of four. This includes a centerfold
in the middle of the booklet and a single page on the inside
back cover. The content of those pages are from the
centerfold-file and blank-page-file files, if specified, otherwise
they are blank pages.
Example:
pdfly booklet input.pdf output.pdf
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * filename FILE [default: None] [required] │
│ * output FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --blank-page-file -b FILE page added if input is odd number of pages │
│ [default: None] │
│ --centerfold-file -c FILE double-page added if input is missing >= 2 │
│ pages │
│ [default: None] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Convert `document.pdf` into a booklet and write the output in `booklet.pdf`.
```
pdfly booklet document.pdf booklet.pdf
```
================================================
FILE: docs/user/subcommand-cat.md
================================================
# cat
The cat command can split / extract pages from a PDF. It can also
join/merge/combine multiple PDF documents into a single one.
## Usage
```
pdfly cat --help
Usage: pdfly cat [OPTIONS] FILENAME FN_PGRGS...
Extract and concatenate pages from PDF files into a single PDF file.
Page ranges refer to the previously-named file. A file not followed by a page
range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages.
-1 last page.
22 just the 23rd page.
:-1 all but the last page.
0:3 the first three pages.
-2 second-to-last page.
:3 the first three pages.
-2: last two pages.
5: from the sixth page onward.
-3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end.
3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9
2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
pdfly cat chapter*.pdf >book.pdf
You can specify the output file by redirection.
pdfly cat chapter?.pdf chapter10.pdf >book.pdf
In case you don't want chapter 10 before chapter 2.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * filename PATH [default: None] [required] │
│ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] │
│ [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --verbose --no-verbose show page ranges as they are being │
│ read │
│ [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Split a PDF
Get the second, third, and fourth page of a PDF:
```
pdfly cat input.pdf 1:4 -o out.pdf
```
### Extract a Page
Get the sixt page of a PDF:
```
pdfly cat input.pdf 5 -o out.pdf
```
Note that it is `5`, because the page indices always start at 0.
### Specify a negative index
Get the last page of a PDF:
```
pdfly cat -o out.pdf input.pdf -- -1
```
`--` must be used to escape negative indices.
### Concatenate two PDFs
Just combine two PDF files so that the pages come right after each other:
```
pdfly cat input1.pdf input2.pdf -o out.pdf
```
### Decrypt a PDF document
```
pdfly cat --password=SECRET doc.pdf -o doc-decrypted.pdf
```
================================================
FILE: docs/user/subcommand-check-sign.md
================================================
# check-sign
Validate that a PDF document has a digital signature matching a given certificate.
## Usage
```
Usage: pdfly check-sign [OPTIONS] FILENAME
Verifies the signature of a signed PDF.
Examples
pdfly verify input.pdf --pem certs.pem
Verifies the input.pdf with a PEM certificate bundle.
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --pem FILE PEM certificate file [required] │
│ --verbose --no-verbose Show signature verification details. [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Verify PDF signature against a PEM certificate
Verifies the input.pdf with a PEM certificate bundle.
```
pdfly verify input.pdf --pem certs.pem
```
================================================
FILE: docs/user/subcommand-compress.md
================================================
# compress
Compress a PDF using lossless FlateDecode compression.
**Note:** If compression would result in a larger file, the original file is kept unchanged to avoid file size increase.
## Usage
```
$ pdfly compress --help
Usage: pdfly compress [OPTIONS] PDF OUTPUT
Compress a PDF.
╭─ Arguments ───────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * output PATH [default: None] [required] │
╰───────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────╯
```
## Examples
Compress the file `document.pdf` and output `document_compressed.pdf`
```
pdfly compress document.pdf document_compressed.pdf
```
Example output when compression succeeds:
```
Original Size : 1,996,123
Final Size : 1,234,567 (Compressed (61.8% of original))
```
Example output when compression would increase file size:
```
Original Size : 887
Final Size : 887 (No compression applied (would increase size))
```
================================================
FILE: docs/user/subcommand-extract-annotated-pages.md
================================================
# extract-annotated-pages
Extract only the annotated pages from a PDF. This can help to review or rework pages from a large document iteratively.
## Usage
```
pdfly extract-annotated-pages --help
Usage: pdfly extract-annotated-pages [OPTIONS] INPUT_PDF
Extract only the annotated pages from a PDF.
Q: Why does this help?
A: https://github.com/py-pdf/pdfly/issues/97
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * input_pdf FILE Input PDF file. [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --output -o PATH Output PDF file. Defaults to 'input_pdf_annotated'. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Input file
Extracts only pages containing annotations from a file `input.pdf`. Pages are written into a new file `input_annotated.pdf`.
```
pdfly extract-annotated-pages input.pdf
```
### Input file with specific output file
Extracts only pages containing annotations from a file `input.pdf` into the given output file `pages_to_rework.pdf`.
```
pdfly extract-annotated-pages input.pdf -o pages_to_rework.pdf
```
================================================
FILE: docs/user/subcommand-extract-images.md
================================================
# extract-images
Extract text from a PDF file.
## Usage
```
$ pdfly extract-images --help
Usage: pdfly extract-images [OPTIONS] PDF
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-res
ampling-in-python
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Extract the first page of `document.pdf` and extract the images present in it.
```
pdfly cat document.pdf 9 -o page.pdf
pdfly extract-text page.pdf
Extracted 1 images:
- 0-Im0.png
```
================================================
FILE: docs/user/subcommand-extract-text.md
================================================
# extract-text
Extract text from a PDF file.
## Usage
```
$ pdfly extract-text --help
Usage: pdfly extract-text [OPTIONS] PDF
Extract text from a PDF file.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Extract the text from the 10th page of `document.pdf`, redirecting the output into `page.txt`.
```
pdfly cat document.pdf 9 -o page.pdf
pdfly extract-text page.pdf
```
================================================
FILE: docs/user/subcommand-meta.md
================================================
# meta
Get metadata of a PDF file.
## Usage
```
pdfly meta --help
Usage: pdfly meta [OPTIONS] PDF
Show metadata of a PDF file
╭─ Arguments ───────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰───────────────────────────────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────────────────────────────╮
│ --output -o [json|text] output format [default: text] │
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────────────────────────────╯
```
## Example
```
$pdfly meta Allianz-Versicherungsunterlagen.pdf
Operating System Data
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ File Name │ /home/user/Documents/Allianz-Versicherungsunterlagen.pdf │
│ File Permissions │ -rw-rw-r-- │
│ File Size │ 874,781 bytes │
│ Creation Time │ 2023-09-02 10:00:51 │
│ Modification Time │ 2023-09-02 10:00:42 │
│ Access Time │ 2023-09-09 11:57:41 │
└───────────────────┴───────────────────────────────────────────────────────────┘
PDF Data
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Title │ │
│ Producer │ itext-paulo-155 (itextpdf.sf.net-lowagie.com) │
│ Author │ │
│ Pages │ 34 │
│ Encrypted │ None │
│ PDF File Version │ %PDF-1.6 │
│ Page Layout │ │
│ Page Mode │ │
│ PDF ID │ ID1=b"'\xc5\x92\xc3\x92\xe2\x80\x93--/\xef\xac\x824\xc3… │
│ │ ID2=b'\xc3\x8b\xc3\xaa\xcb\x9b\r\xc3\xa2\r\xcb\x99T\xc3… │
│ │ \xc3\x96\xc3\x9fY2' │
│ Fonts (unembedded) │ /Helvetica │
│ Fonts (embedded) │ /ASPNQQ+TT22D6t00, /CBKSHX+Helvetica-Bold, │
│ │ /CXQKAY+Helvetica, /GOCSXU+AllianzNeo-Bold, │
│ │ /LKNHUL+Arial-BoldMT, /LMNFKX+ArialMT, /MWUNIP+Symbol, │
│ │ /ODNMDG+TT5B6t00, /PESMKN+AllianzNeo-CondensedBold, │
│ │ /PHDALA+Helvetica-Oblique, /PJEFXS+AllianzNeo-Light, │
│ │ /SNDABN+Helvetica, /SNDABN+Helvetica-Bold, │
│ │ /SNDABN+Times-Roman, /TXDAYK+Helvetica, │
│ │ /VORXLN+Helvetica-BoldOblique, /YTXZAH+Arial-ItalicMT │
│ Attachments │ [] │
│ Images │ 16 images (355,454 bytes) │
└────────────────────┴──────────────────────────────────────────────────────────┘
Use the 'pagemeta' subcommand to get details about a single page
```
================================================
FILE: docs/user/subcommand-pagemeta.md
================================================
# pagemeta
Give details about a PDF's single page.
## Usage
```
$ pdfly pagemeta --help
Usage: pdfly pagemeta [OPTIONS] PDF PAGE_INDEX
Give details about a single page.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * page_index INTEGER [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --output -o [json|text] output format [default: text] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Get the metadata of the 101st page of `document.pdf` in text format.
```
pdfly pagemeta document.pdf 100
/home/user/.../document.pdf, page index 100
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ mediabox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ cropbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ artbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ bleedbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ annotations │ 8 │
└─────────────┴─────────────────────────────────────────────────────┘
All annotations:
1. /Link at [232.05524, 385.79007, 343.6091, 396.29007]
2. /Link at [157.63988, 209.99002, 243.69913, 220.49002]
3. /Link at [72, 178.19678, 249.65918, 188.69678]
4. /Link at [196.12769, 152.40353, 361.02328, 162.90353]
5. /Link at [360.97717, 139.80353, 432, 150.30353]
6. /Link at [72, 127.20352, 213.9915, 137.70352]
7. /Link at [179.64218, 448.3905, 220.08231, 458.8905]
8. /Link at [282.84, 347.99005, 340.83148, 358.49005]
```
Get the same metadata in `json` format.
```
pdfly pagemeta document.pdf 100 -o json
{"mediabox":[0.0,0.0,504.0,661.5],"cropbox":[0.0,0.0,504.0,661.5],"artbox":[0.0,0.0,504.0,661.5],"bleedbox":[0.0,0.0,504.0,661.5],"annotations":19}
```
================================================
FILE: docs/user/subcommand-rm.md
================================================
# rm
Remove pages from PDF files.
## Usage
```
$ pdfly rm --help
Usage: pdfly rm [OPTIONS] FILENAME FN_PGRGS...
Remove pages from PDF files.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly rm -o output.pdf document.pdf 2:5
Remove pages 2 to 4 from document.pdf, producing output.pdf.
pdfly rm document.pdf :-1
Removes all pages except the last one from document.pdf, modifying the original file.
pdfly rm report.pdf :6 7:
Remove all pages except page seven from report.pdf,
producing a single-page report.pdf.
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [default: None] [required] │
│ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --verbose --no-verbose show page ranges as they are being read [default: no-verbose] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Remove the 5th page of `document.pdf`, modifying the original file.
```
pdfly rm document.pdf 4
```
Remove the first and last page of `document.pdf`, producing `output.pdf`.
```
pdfly rm -o output.pdf document.pdf 1:-1
```
================================================
FILE: docs/user/subcommand-rotate.md
================================================
# rotate
## Usage
```
pdfly rotate --help
Usage: pdfly rotate [OPTIONS] FILENAME DEGREES [PGRGS]
Rotate specified pages by the specified amount
Example:
pdfly rotate --output output.pdf input.pdf 90
Rotate all pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 :3
Rotate first three pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 -- -1
Rotate last page by 90 degrees (clockwise)
A file not followed by a page range (PGRGS) means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
│ * degrees INTEGER degrees to rotate [required] │
│ pgrgs [PGRGS] page range [default: :] │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [required] │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Rotate all pages by 90 degrees (clockwise)
Rotate all pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90
```
### Rotate first three pages by 90 degrees (clockwise)
Rotate first three pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90 :3
```
### Rotate last page by 90 degrees (clockwise)
Rotate last page from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90 -- -1
```
================================================
FILE: docs/user/subcommand-sign.md
================================================
# sign
Creates a digitally-signed PDF from an existing PDF file and a given certificate.
## Usage
```
Usage: pdfly sign [OPTIONS] FILENAME
Creates a signed PDF.
Examples
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
pdfly sign document.pdf --p12 certs.p12 --in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --p12 FILE PKCS12 certificate container [required] │
│ --output -o PATH │
│ --in-place -i │
│ --p12-password -p TEXT The password to use to decrypt the PKCS12 file. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Sign a PDF with PKCS12
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
```
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
```
### Sign a PDF in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
```
pdfly sign document.pdf --p12 certs.p12 --in-place
```
================================================
FILE: docs/user/subcommand-uncompress.md
================================================
# uncompress
Module for uncompressing PDF content streams.
## Usage
```
$ pdfly ucompress --help
Module for uncompressing PDF content streams.
╭─ Arguments ───────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * output PATH [default: None] [required] │
╰───────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────╯
```
## Examples
Uncompress `document_compressed.pdf` and output `document.pdf`.
```
pdfly uncompress document_compressed.pdf document.pdf
```
================================================
FILE: docs/user/subcommand-update-offsets.md
================================================
# update-offsets
Updates offsets and lengths in a simple PDF file.
## Usage
```
$ pdfly update-offsets --help
Usage: pdfly update-offsets [OPTIONS] FILE_IN FILE_OUT
Updates offsets and lengths in a simple PDF file.
The PDF specification requires that the xref section at the end
of a PDF file has the correct offsets of the PDF's objects.
It further requires that the dictionary of a stream object
contains a /Length-entry giving the length of the encoded stream.
When editing a PDF file using a text-editor (e.g. vim) it is
elaborate to compute or adjust these offsets and lengths.
This command tries to compute /Length-entries of the stream dictionaries
and the offsets in the xref-section automatically.
It expects that the PDF file has ASCII encoding only. It may
use ISO-8859-1 or UTF-8 in its comments.
The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in
binary data.
It expects that there is one xref-section only.
It expects that the /Length-entries have default values containing
enough digits, e.g. /Length 000 when the stream consists of 576 bytes.
Example:
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf
issue-297.out.pdf
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * file_in FILE [default: None] [required] │
│ * file_out PATH [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --encoding TEXT Encoding used to read and write the │
│ files, e.g. UTF-8. │
│ [default: ISO-8859-1] │
│ --verbose --no-verbose Show progress while processing. │
│ [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Update the offsets of `document.pdf` with UTF-8 encoding and write the output to `document.out.pdf`.
```
pdfly update-offsets document.pdf --verbose --encoding UTF-8 document.out.pdf
```
================================================
FILE: docs/user/subcommand-x2pdf.md
================================================
# x2pdf
Convert a file to PDF.
Currently supported for "x":
* PNG
* JPG
## Usage
```
$ pdfly x2pdf --help
Usage: pdfly x2pdf [OPTIONS] X...
Convert one or more files to PDF. Each file is a page.
╭─ Arguments ─────────────────────────────────────────────────────────────────╮
│ * x X... [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Single file
```
$ pdfly x2pdf image.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image.jpg
-rw-rw-r-- 1 user user 49K Sep 17 22:48 out.pdf
```
### Multiple files manually
```
$ pdfly x2pdf image1.jpg image2.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg
-rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg
-rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf
```
### Multiple files via *
```
$ pdfly x2pdf *.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg
-rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg
-rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf
```
================================================
FILE: make_release.py
================================================
"""Internal tool to update the CHANGELOG."""
import json
import subprocess
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
from rich.prompt import Prompt
GH_ORG = "py-pdf"
GH_PROJECT = "pdfly"
VERSION_FILE_PATH = "pdfly/_version.py"
CHANGELOG_FILE_PATH = "CHANGELOG.md"
@dataclass(frozen=True)
class Change:
"""Capture the data of a git commit."""
commit_hash: str
prefix: str
message: str
author: str
author_login: str
def main(changelog_path: str) -> None:
"""
Create a changelog.
Args:
changelog_path: The location of the CHANGELOG file
"""
changelog = get_changelog(changelog_path)
git_tag = get_most_recent_git_tag()
changes, changes_with_author = get_formatted_changes(git_tag)
if changes == "":
print("No changes")
return
new_version = version_bump(git_tag)
new_version = get_version_interactive(new_version, changes)
adjust_version_py(new_version)
today = datetime.now(tz=timezone.utc)
header = f"## Version {new_version}, {today:%Y-%m-%d}\n"
url = f"https://github.com/{GH_ORG}/{GH_PROJECT}/compare/{git_tag}...{new_version}"
trailer = f"\n[Full Changelog]({url})\n\n"
new_entry = header + changes + trailer
print(new_entry)
write_commit_msg_file(new_version, changes_with_author + trailer)
write_release_msg_file(new_version, changes_with_author + trailer, today)
# Make the script idempotent by checking if the new entry is already in the changelog
if new_entry in changelog:
print("Changelog is already up-to-date!")
return
new_changelog = "# CHANGELOG\n\n" + new_entry + strip_header(changelog)
write_changelog(new_changelog, changelog_path)
print_instructions(new_version)
def print_instructions(new_version: str) -> None:
"""Print release instructions."""
print("=" * 80)
print(f"☑ {VERSION_FILE_PATH} was adjusted to '{new_version}'")
print(f"☑ {CHANGELOG_FILE_PATH} was adjusted")
print()
print("Now run:")
print(" git commit -eF RELEASE_COMMIT_MSG.md")
print(f" git tag -s {new_version} -eF RELEASE_TAG_MSG.md")
print(" git push")
print(" git push --tags")
def adjust_version_py(version: str) -> None:
"""Adjust the __version__ string."""
with open(VERSION_FILE_PATH, "w") as fp:
fp.write(f'__version__ = "{version}"\n')
def get_version_interactive(new_version: str, changes: str) -> str:
"""Get the new __version__ interactively."""
print("The changes are:")
print(changes)
orig = new_version
new_version = Prompt.ask("New semantic version", default=orig)
while not is_semantic_version(new_version):
new_version = Prompt.ask(
"That was not a semantic version. Please enter a semantic version",
default=orig,
)
return new_version
def is_semantic_version(version: str) -> bool:
"""Check if the given version is a semantic version."""
# This doesn't cover the edge-cases like pre-releases
if version.count(".") != 2:
return False
try:
return bool([int(part) for part in version.split(".")])
except Exception:
return False
def write_commit_msg_file(new_version: str, commit_changes: str) -> None:
"""
Write a file that can be used as a commit message.
Like this:
git commit -eF RELEASE_COMMIT_MSG.md && git push
"""
with open("RELEASE_COMMIT_MSG.md", "w") as fp:
fp.write(f"REL: {new_version}\n\n")
fp.write("## What's new\n")
fp.write(commit_changes)
def write_release_msg_file(
new_version: str, commit_changes: str, today: datetime
) -> None:
"""
Write a file that can be used as a git tag message.
Like this:
git tag -eF RELEASE_TAG_MSG.md && git push
"""
with open("RELEASE_TAG_MSG.md", "w") as fp:
fp.write(f"Version {new_version}, {today:%Y-%m-%d}\n\n")
fp.write("## What's new\n")
fp.write(commit_changes)
def strip_header(md: str) -> str:
"""Remove the 'CHANGELOG' header."""
return md.lstrip("# CHANGELOG").lstrip() # noqa
def version_bump(git_tag: str) -> str:
"""
Increase the patch version of the git tag by one.
Args:
git_tag: Old version tag
Returns:
The new version where the patch version is bumped.
"""
# just assume a patch version change
major, minor, patch = git_tag.split(".")
return f"{major}.{minor}.{int(patch) + 1}"
def get_changelog(changelog_path: str) -> str:
"""
Read the changelog.
Args:
changelog_path: Path to the CHANGELOG file
Returns:
Data of the CHANGELOG
"""
with open(changelog_path) as fh:
changelog = fh.read()
return changelog
def write_changelog(new_changelog: str, changelog_path: str) -> None:
"""
Write the changelog.
Args:
new_changelog: Contents of the new CHANGELOG
changelog_path: Path where the CHANGELOG file is
"""
with open(changelog_path, "w") as fh:
fh.write(new_changelog)
def get_formatted_changes(git_tag: str) -> tuple[str, str]:
"""
Format the changes done since the last tag.
Args:
git_tag: the reference tag
Returns:
Changes done since git_tag
"""
commits = get_git_commits_since_tag(git_tag)
# Group by prefix
grouped: dict[str, list[dict[str, Any]]] = {}
for commit in commits:
if commit.prefix not in grouped:
grouped[commit.prefix] = []
grouped[commit.prefix].append(
{"msg": commit.message, "author": commit.author_login}
)
# Order prefixes
order = [
"SEC",
"DEP",
"ENH",
"PI",
"BUG",
"ROB",
"DOC", # We ignore MRs from Dependabot prefixed with: "Docs:"
"DEV",
"CI",
"MAINT",
"TST",
"STY",
]
abbrev2long = {
"SEC": "Security",
"DEP": "Deprecations",
"ENH": "New Features",
"BUG": "Bug Fixes",
"ROB": "Robustness",
"DOC": "Documentation",
"DEV": "Developer Experience",
"CI": "Continuous Integration",
"MAINT": "Maintenance",
"TST": "Testing",
"STY": "Code Style",
"PI": "Performance Improvements",
}
# Create output
output = ""
output_with_user = ""
for prefix in order:
if prefix not in grouped:
continue
tmp = f"\n### {abbrev2long[prefix]} ({prefix})\n" # header
output += tmp
output_with_user += tmp
for commit_dict in grouped[prefix]:
output += f"- {commit_dict['msg']}\n"
output_with_user += (
f"- {commit_dict['msg']} by @{commit_dict['author']}\n"
)
del grouped[prefix]
if grouped:
output += "\n### Other\n"
output_with_user += "\n### Other\n"
for prefix, commit_dicts in grouped.items():
for commit_dict in commit_dicts:
output += f"- {prefix}: {commit_dict['msg']}\n"
output_with_user += f"- {prefix}: {commit_dict['msg']} by @{commit_dict['author']}\n"
return output, output_with_user
def get_most_recent_git_tag() -> str:
"""
Get the git tag most recently created.
Returns:
Most recently created git tag.
"""
git_tag = str(
subprocess.check_output(
["git", "describe", "--abbrev=0"], stderr=subprocess.STDOUT
)
).strip("'b\\n")
return git_tag
def get_author_mapping(line_count: int) -> dict[str, str]:
"""
Get the authors for each commit.
Args:
line_count: Number of lines from Git log output. Used for determining how
many commits to fetch.
Returns:
A mapping of long commit hashes to author login handles.
"""
per_page = min(line_count, 100)
page = 1
mapping: dict[str, str] = {}
for _ in range(0, line_count, per_page):
with urllib.request.urlopen(
f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
) as response:
commits = json.loads(response.read())
page += 1
for commit in commits:
if commit["author"]:
gh_handle = commit["author"]["login"]
else:
# This is not perfect, but better than the other option
gh_handle = commit["commit"]["author"]["name"].replace(" ", "")
mapping[commit["sha"]] = gh_handle
return mapping
def get_git_commits_since_tag(git_tag: str) -> list[Change]:
"""
Get all commits since the last tag.
Args:
git_tag: Reference tag from which the changes to the current commit are
fetched.
Returns:
list of all changes since git_tag.
"""
commits = (
subprocess.check_output(
[
"git",
"--no-pager",
"log",
f"{git_tag}..HEAD",
'--pretty=format:"%H:::%s:::%aN"',
],
stderr=subprocess.STDOUT,
)
.decode("UTF-8")
.strip()
)
lines = commits.splitlines()
authors = get_author_mapping(len(lines))
return [parse_commit_line(line, authors) for line in lines if line != ""]
def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
"""
Parse the first line of a git commit message.
Args:
line: The first line of a git commit message.
Returns:
The parsed Change object
Raises:
ValueError: The commit line is not well-structured
"""
parts = line.split(":::")
if len(parts) != 3:
raise ValueError(f"Invalid commit line: '{line}'")
commit_hash, rest, author = parts
if ":" in rest:
prefix, message = rest.split(": ", 1)
else:
prefix = ""
message = rest
# Standardize
message.strip()
commit_hash = commit_hash.strip('"')
author = author.removesuffix('"')
author_login = authors[commit_hash]
prefix = prefix.strip()
if prefix == "DOCS":
prefix = "DOC"
return Change(
commit_hash=commit_hash,
prefix=prefix,
message=message,
author=author,
author_login=author_login,
)
if __name__ == "__main__":
main(CHANGELOG_FILE_PATH)
================================================
FILE: mypy.ini
================================================
[mypy]
plugins = pydantic.mypy
================================================
FILE: pdfly/__init__.py
================================================
"""pdfly is a command line utility for manipulating PDFs and getting information about them."""
from ._version import __version__
__all__ = [
"__version__",
]
================================================
FILE: pdfly/__main__.py
================================================
"""Execute pdfly as a module."""
from pdfly.cli import entry_point
if __name__ == "__main__":
entry_point()
================================================
FILE: pdfly/_utils.py
================================================
from enum import Enum
class OutputOptions(Enum):
json = "json"
text = "text"
================================================
FILE: pdfly/_version.py
================================================
__version__ = "0.5.1"
================================================
FILE: pdfly/booklet.py
================================================
"""
Reorder and two-up PDF pages for booklet printing.
If the number of pages is not a multiple of four, pages are
added until it is a multiple of four. This includes a centerfold
in the middle of the booklet and a single page on the inside
back cover. The content of those pages are from the
centerfold-file and blank-page-file files, if specified, otherwise
they are blank pages.
Example:
pdfly booklet input.pdf output.pdf
"""
# Copyright (c) 2014, Steve Witham .
# All rights reserved. This software is available under a BSD license;
# see https://github.com/py-pdf/pypdf/LICENSE
from collections.abc import Generator
from pathlib import Path
from pypdf import (
PageObject,
PdfReader,
PdfWriter,
)
from pypdf.generic import FloatObject, RectangleObject
def main(
filename: Path,
output: Path,
inside_cover_file: Path | None,
centerfold_file: Path | None,
) -> None:
try:
# Set up the streams
reader = PdfReader(filename)
pages = list(reader.pages)
writer = PdfWriter()
# Add blank pages to make the number of pages a multiple of 4
# If the user specified an inside-back-cover file, use it.
blank_page = PageObject.create_blank_page(
width=pages[0].mediabox.width, height=pages[0].mediabox.height
)
if len(pages) % 2 == 1:
if inside_cover_file:
ic_reader_page = fetch_first_page(inside_cover_file)
pages.insert(-1, ic_reader_page)
else:
pages.insert(-1, blank_page)
if len(pages) % 4 == 2:
pages.insert(len(pages) // 2, blank_page)
pages.insert(len(pages) // 2, blank_page)
requires_centerfold = True
else:
requires_centerfold = False
# Reorder the pages and place two pages side by side (2-up) on each sheet
for lhs, rhs in page_iter(len(pages)):
pages[lhs].merge_translated_page(
page2=pages[rhs],
tx=pages[lhs].mediabox.width,
ty=0,
expand=True,
over=True,
)
# Double the CropBox width:
pages[lhs].cropbox[2] = FloatObject(2 * pages[lhs].cropbox[2])
writer.add_page(pages[lhs])
# If a centerfold was required, it is already
# present as a pair of blank pages. If the user
# specified a centerfold file, use it instead.
if requires_centerfold and centerfold_file:
centerfold_page = fetch_first_page(centerfold_file)
last_page = writer.pages[-1]
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
if requires_rotate(centerfold_page.mediabox, last_page.mediabox):
centerfold_page = centerfold_page.rotate(270)
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
last_page.merge_page(centerfold_page)
# Everything looks good! Write the output file.
with open(output, "wb") as output_fh:
writer.write(output_fh)
except Exception as error:
raise RuntimeError(f"Error while processing {filename}") from error
def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool:
"""
Return True if a and b are rotated relative to each other.
Args:
a (RectangleObject): The first rectangle.
b (RectangleObject): The second rectangle.
"""
a_portrait = a.height > a.width
b_portrait = b.height > b.width
return a_portrait != b_portrait
def fetch_first_page(filename: Path) -> PageObject:
"""
Fetch the first page of a PDF file.
Args:
filename (Path): The path to the PDF file.
Returns:
PageObject: The first page of the PDF file.
"""
return PdfReader(filename).pages[0]
# This function written with inspiration, assistance, and code
# from claude.ai & Github Copilot
def page_iter(num_pages: int) -> Generator[tuple[int, int], None, None]:
"""
Generate pairs of page numbers for printing a booklet.
This function assumes that the total number of pages is divisible by 4.
It yields tuples of page numbers that should be printed on the same sheet
of paper to create a booklet.
Args:
num_pages (int): The total number of pages in the document. Must be divisible by 4.
Yields:
Generator[tuple[int, int], None, None]: tuples containing pairs of page numbers.
Each tuple represents the page numbers to be printed on one side of a sheet.
Raises:
ValueError: If the number of pages is not divisible by 4.
"""
if num_pages % 4 != 0:
raise ValueError("Number of pages must be divisible by 4")
for sheet in range(num_pages // 4):
# Outside the fold
last_page = num_pages - sheet * 2 - 1
first_page = sheet * 2
# Inside the fold
second_page = sheet * 2 + 1
second_to_last_page = num_pages - sheet * 2 - 2
yield last_page, first_page
yield second_page, second_to_last_page
================================================
FILE: pdfly/cat.py
================================================
"""
Concatenate pages from PDF files into a single PDF file.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
pdfly cat chapter*.pdf >book.pdf
You can specify the output file by redirection.
pdfly cat chapter?.pdf chapter10.pdf >book.pdf
In case you don't want chapter 10 before chapter 2.
"""
# Copyright (c) 2014, Steve Witham .
# All rights reserved. This software is available under a BSD license;
# see https://github.com/py-pdf/pypdf/LICENSE
import os
import sys
from pathlib import Path
from pypdf import (
PageRange,
PasswordType,
PdfReader,
PdfWriter,
parse_filename_page_ranges,
)
from rich.console import Console
def main(
filename: Path,
fn_pgrgs: list[str] | None,
output: Path,
verbose: bool,
inverted_page_selection: bool = False,
password: str | None = None,
) -> None:
console = Console()
filename_page_ranges = parse_filepaths_and_pagerange_args(
console, filename, fn_pgrgs
)
if output:
output_fh = open(output, "wb")
else:
sys.stdout.flush()
output_fh = os.fdopen(sys.stdout.fileno(), "wb")
writer = PdfWriter()
in_fs = {}
try:
for filepath, page_range in filename_page_ranges: # type: ignore
if verbose:
print(filepath, page_range, file=sys.stderr)
if filepath not in in_fs:
in_fs[filepath] = open(filepath, "rb")
reader = PdfReader(in_fs[filepath])
if (
password is not None
and reader.decrypt(password) == PasswordType.NOT_DECRYPTED
):
console.print(
"[red]Error: the decrypting password provided is invalid"
)
sys.exit(1)
num_pages = len(reader.pages)
start, end, _step = page_range.indices(num_pages)
if (
start < 0
or end < 0
or start >= num_pages
or end > num_pages
or start > end
):
print(
f"WARNING: Page range {page_range} is out of bounds",
file=sys.stderr,
)
if inverted_page_selection:
all_page_nums = set(range(len(reader.pages)))
page_nums = set(range(*page_range.indices(len(reader.pages))))
inverted_page_nums = all_page_nums - page_nums
for page_num in inverted_page_nums:
writer.add_page(reader.pages[page_num])
else:
for page_num in range(*page_range.indices(len(reader.pages))):
writer.add_page(reader.pages[page_num])
writer.write(output_fh)
except Exception as error:
raise RuntimeError(f"Error while reading {filename}") from error
finally:
output_fh.close()
# In 3.0, input files must stay open until output is written.
# Not closing the in_fs because this script exits now.
def parse_filepaths_and_pagerange_args(
console: Console, filename: Path, fn_pgrgs: list[str] | None
) -> list[tuple[Path, PageRange]]:
fn_pgrgs_l = list(fn_pgrgs) if fn_pgrgs else []
fn_pgrgs_l.insert(0, str(filename))
filename_page_ranges, invalid_filepaths = [], []
for filepath, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore
if Path(filepath).is_file():
filename_page_ranges.append((Path(filepath), page_range))
else:
invalid_filepaths.append(str(filepath))
if invalid_filepaths:
console.print(
f"[red]Error: invalid file path or page range provided: {' '.join(invalid_filepaths)}"
)
sys.exit(2)
return filename_page_ranges
================================================
FILE: pdfly/check_sign.py
================================================
"""
Verifies the signature of a signed PDF.
Examples
pdfly verify input.pdf --pem certs.pem
Verifies the input.pdf with a PEM certificate bundle.
"""
import sys
from pathlib import Path
import typer
from endesive import pdf
def main(filename: Path, pem: Path, verbose: bool | None) -> None:
x509_certificates = [pem.read_bytes()]
results = pdf.verify(filename.read_bytes(), x509_certificates)
if len(results) == 0:
raise typer.BadParameter("Signature missing")
details: list[str] = []
for hash_ok, signature_ok, cert_ok in results:
if not signature_ok:
details.append("Signature not ok")
elif verbose:
details.append("Signature ok")
if not hash_ok:
details.append("Content hash not ok")
elif verbose:
details.append("Content hash ok")
if not cert_ok:
details.append("Certificate not ok")
elif verbose:
details.append("Certificate ok")
details_str = "" if len(details) == 0 else " (" + ", ".join(details) + ")"
for hash_ok, signature_ok, cert_ok in results:
if not signature_ok or not hash_ok or not cert_ok:
print(f"Check failed{details_str}.", file=sys.stderr)
raise typer.Exit(code=1)
print(f"Check succeeded{details_str}.")
================================================
FILE: pdfly/cli.py
================================================
"""
Define how the CLI should behave.
Subcommands are added here.
"""
from pathlib import Path
from typing import Annotated
import typer
import pdfly.booklet
import pdfly.cat
import pdfly.check_sign
import pdfly.compress
import pdfly.extract_annotated_pages
import pdfly.extract_images
import pdfly.metadata
import pdfly.pagemeta
import pdfly.rm
import pdfly.rotate
import pdfly.sign
import pdfly.uncompress
import pdfly.up2
import pdfly.update_offsets
import pdfly.x2pdf
def version_callback(value: bool) -> None:
import pypdf
if value:
typer.echo(f"pdfly {pdfly.__version__}")
typer.echo(f" using pypdf=={pypdf.__version__}")
raise typer.Exit
entry_point = typer.Typer(
add_completion=False,
help=(
"pdfly is a pure-python cli application for manipulating PDF files."
),
rich_markup_mode="rich", # Allows to pretty-print commands documentation
)
@entry_point.callback() # type: ignore[misc]
def common(
ctx: typer.Context,
version: bool = typer.Option(None, "--version", callback=version_callback),
) -> None:
pass
@entry_point.command(name="2-up", help=pdfly.up2.__doc__) # type: ignore[misc]
def up2(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
out: Path,
) -> None:
pdfly.up2.main(pdf, out)
@entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc]
def booklet(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=False,
resolve_path=False,
),
],
blank_page: Annotated[
Path | None,
typer.Option(
"-b",
"--blank-page-file",
help="page added if input is odd number of pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
centerfold: Annotated[
Path | None,
typer.Option(
"-c",
"--centerfold-file",
help="double-page added if input is missing >= 2 pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
) -> None:
pdfly.booklet.main(filename, output, blank_page, centerfold)
@entry_point.command(name="cat", help=pdfly.cat.__doc__) # type: ignore[misc]
def cat(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
fn_pgrgs: list[str] | None = typer.Argument( # noqa: B008
None, allow_dash=True, help="filenames and/or page ranges"
),
output: Path = typer.Option(..., "-o", "--output"), # noqa
password: str = typer.Option(
None, help="Document's user or owner password."
),
verbose: bool = typer.Option(
False, help="show page ranges as they are being read"
),
) -> None:
pdfly.cat.main(
filename, fn_pgrgs, output=output, verbose=verbose, password=password
)
@entry_point.command(name="check-sign", help=pdfly.check_sign.__doc__)
def check_sign(
filename: Annotated[
Path,
typer.Argument(dir_okay=False, exists=True, resolve_path=True),
],
pem: Annotated[
Path,
typer.Option(
...,
dir_okay=False,
exists=True,
resolve_path=True,
help="PEM certificate file",
),
],
verbose: bool = typer.Option(
False, help="Show signature verification details."
),
) -> None:
pdfly.check_sign.main(filename, pem, verbose)
@entry_point.command(name="compress", help=pdfly.compress.__doc__) # type: ignore[misc]
def compress(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
writable=True,
),
],
) -> None:
pdfly.compress.main(pdf, output)
@entry_point.command(name="extract-annotated-pages", help=pdfly.extract_annotated_pages.__doc__) # type: ignore[misc]
def extract_annotated_pages(
input_pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
help="Input PDF file.",
),
],
output_pdf: Annotated[
Path | None,
typer.Option(
"--output",
"-o",
writable=True,
help="Output PDF file. Defaults to 'input_pdf_annotated'.",
),
] = None,
) -> None:
pdfly.extract_annotated_pages.main(input_pdf, output_pdf)
@entry_point.command(name="extract-images", help=pdfly.extract_images.__doc__) # type: ignore[misc]
def extract_images(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
) -> None:
pdfly.extract_images.main(pdf)
@entry_point.command(name="extract-text") # type: ignore[misc]
def extract_text(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
) -> None:
"""Extract text from a PDF file."""
from pypdf import PdfReader
reader = PdfReader(str(pdf))
for page in reader.pages:
typer.echo(page.extract_text())
@entry_point.command(name="meta", help=pdfly.metadata.__doc__) # type: ignore[misc]
def metadata(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: pdfly.metadata.OutputOptions = typer.Option( # noqa
pdfly.metadata.OutputOptions.text.value,
"--output",
"-o",
help="output format",
show_default=True,
),
) -> None:
pdfly.metadata.main(pdf, output)
@entry_point.command(name="pagemeta", help=pdfly.pagemeta.__doc__) # type: ignore[misc]
def pagemeta(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
page_index: int,
output: pdfly.metadata.OutputOptions = typer.Option( # noqa
pdfly.metadata.OutputOptions.text.value,
"--output",
"-o",
help="output format",
show_default=True,
),
) -> None:
pdfly.pagemeta.main(
pdf,
page_index,
output,
)
@entry_point.command(name="rm", help=pdfly.rm.__doc__)
def rm(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Path = typer.Option(..., "-o", "--output"), # noqa
fn_pgrgs: list[str] = typer.Argument( # noqa
..., help="filenames and/or page ranges"
),
verbose: bool = typer.Option(
False, help="show page ranges as they are being read"
),
) -> None:
pdfly.rm.main(filename, fn_pgrgs, output, verbose)
@entry_point.command(name="rotate", help=pdfly.rotate.__doc__) # type: ignore[misc]
def rotate(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
degrees: Annotated[int, typer.Argument(..., help="degrees to rotate")],
pgrgs: Annotated[str, typer.Argument(..., help="page range")] = ":",
output: Path = typer.Option(..., "-o", "--output"), # noqa
) -> None:
pdfly.rotate.main(filename, output, degrees, pgrgs)
@entry_point.command(name="sign", help=pdfly.sign.__doc__)
def sign(
filename: Annotated[
Path,
typer.Argument(dir_okay=False, exists=True, resolve_path=True),
],
p12: Annotated[
Path,
typer.Option(
...,
dir_okay=False,
exists=True,
resolve_path=True,
help="PKCS12 certificate container",
),
],
output: Annotated[Path | None, typer.Option("--output", "-o")] = None,
in_place: bool = typer.Option(False, "--in-place", "-i"),
p12_password: Annotated[
str | None,
typer.Option(
"--p12-password",
"-p",
help="The password to use to decrypt the PKCS12 file.",
),
] = None,
) -> None:
pdfly.sign.main(filename, output, in_place, p12, p12_password)
@entry_point.command(name="uncompress", help=pdfly.uncompress.__doc__) # type: ignore[misc]
def uncompress(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
writable=True,
),
],
) -> None:
pdfly.uncompress.main(pdf, output)
@entry_point.command(name="update-offsets", help=pdfly.update_offsets.__doc__) # type: ignore[misc]
def update_offsets(
file_in: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
file_out: Annotated[
Path, typer.Option("-o", "--output") # noqa
] = None, # type: ignore[assignment]
encoding: str = typer.Option(
"ISO-8859-1",
help="Encoding used to read and write the files, e.g. UTF-8.",
),
verbose: bool = typer.Option(
False, help="Show progress while processing."
),
) -> None:
pdfly.update_offsets.main(file_in, file_out, encoding, verbose)
@entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__) # type: ignore[misc]
def x2pdf(
x: list[
Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
]
],
output: Annotated[
Path,
typer.Option(
"-o",
"--output",
writable=True,
),
],
) -> None:
exit_code = pdfly.x2pdf.main(x, output)
if exit_code:
raise typer.Exit(code=exit_code)
================================================
FILE: pdfly/compress.py
================================================
"""Compress a PDF."""
import shutil
from io import BytesIO
from pathlib import Path
from pypdf import PdfReader, PdfWriter
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(pdf)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
if reader.metadata:
writer.add_metadata(reader.metadata)
for page in writer.pages:
page.compress_content_streams()
# PDF to memory buffer first
compressed_buffer = BytesIO()
writer.write(compressed_buffer)
compressed_data = compressed_buffer.getvalue()
comp_size = len(compressed_data)
orig_size = pdf.stat().st_size
# If compressed size is larger than original, use original file
if comp_size >= orig_size:
print(
f"Compression resulted in larger file ({comp_size:,} >= {orig_size:,} bytes)"
)
print("Keeping original file as compressed version would be larger")
shutil.copy2(pdf, output)
final_size = orig_size
ratio = 100.0
status = "No compression applied (would increase size)"
else:
with open(output, "wb") as fp:
fp.write(compressed_data)
final_size = comp_size
ratio = (comp_size / orig_size) * 100
status = f"Compressed ({ratio:.1f}% of original)"
print(f"Original Size : {orig_size:,}")
print(f"Final Size : {final_size:,} ({status})")
================================================
FILE: pdfly/extract_annotated_pages.py
================================================
"""
Extract only the annotated pages from a PDF.
Q: Why does this help?
A: https://github.com/py-pdf/pdfly/issues/97
"""
from pathlib import Path
from typing import TYPE_CHECKING
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import AnnotationDictionary
if TYPE_CHECKING:
from pypdf.generic import ArrayObject
# Check if an annotation is manipulable.
def is_manipulable(annot: AnnotationDictionary) -> bool:
return annot.get("/Subtype") != "/Link"
# Main function.
def main(input_pdf: Path, output_pdf: Path | None) -> None:
if not output_pdf:
output_pdf = input_pdf.with_name(input_pdf.stem + "_annotated.pdf")
input = PdfReader(input_pdf)
output = PdfWriter()
output_pages = 0
# Copy only the pages with annotations
for page in input.pages:
if "/Annots" not in page:
continue
page_annots: ArrayObject = page["/Annots"] # type: ignore[assignment]
if not any(is_manipulable(annot) for annot in page_annots):
continue
output.add_page(page)
output_pages += 1
# Save the output PDF
output.write(output_pdf)
print(f"Extracted {output_pages} pages with annotations to {output_pdf}")
================================================
FILE: pdfly/extract_images.py
================================================
"""
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
"""
from pathlib import Path
from pypdf import PdfReader
def main(pdf: Path) -> None:
reader = PdfReader(str(pdf))
extracted_images = []
for page_index, page0 in enumerate(reader.pages):
for image_file_object in page0.images:
path = f"{page_index:04d}-{image_file_object.name}"
with open(path, "wb") as fp:
fp.write(image_file_object.data)
extracted_images.append(path)
if len(extracted_images) == 0:
print("No image found.")
else:
print(f"Extracted {len(extracted_images)} images:")
for path in extracted_images:
print(f"- {path}")
================================================
FILE: pdfly/metadata.py
================================================
"""Show metadata of a PDF file"""
import stat
from datetime import datetime
from pathlib import Path
from pydantic import BaseModel
from pypdf import PdfReader
from ._utils import OutputOptions
class EncryptionData(BaseModel):
revision: int
v_value: int
class MetaInfo(BaseModel):
encryption: EncryptionData | None = None
pdf_file_version: str
pages: int | None = None
page_mode: str | None = None
page_layout: str | None = None
attachments: str = "unknown"
id1: bytes | None = None
id2: bytes | None = None
images: list[int] = []
# PDF /Info dictionary
author: str | None = None
creation_date: datetime | None = None
creator: str | None = None
keywords: str | None = None
producer: str | None = None
subject: str | None = None
title: str | None = None
# OS Information
file_permissions: str
file_size: int # in bytes
creation_time: datetime
modification_time: datetime
access_time: datetime
def main(pdf: Path, output: OutputOptions) -> None:
reader = PdfReader(str(pdf))
if reader.is_encrypted:
pdf_stat = pdf.stat()
meta = MetaInfo(
encryption=(
EncryptionData(
v_value=reader._encryption.V,
revision=reader._encryption.R,
)
if reader._encryption
else None
),
pdf_file_version=reader.stream.read(8).decode("utf-8"),
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
creation_time=datetime.fromtimestamp(pdf_stat.st_ctime),
modification_time=datetime.fromtimestamp(pdf_stat.st_mtime),
access_time=datetime.fromtimestamp(pdf_stat.st_atime),
)
else:
info = reader.metadata
reader.stream.seek(0)
pdf_file_version = reader.stream.read(8).decode("utf-8")
pdf_stat = pdf.stat()
pdf_id = reader.trailer.get("/ID")
meta = MetaInfo(
pages=len(reader.pages),
page_mode=reader.page_mode,
pdf_file_version=pdf_file_version,
page_layout=reader.page_layout,
attachments=str(list(reader.attachments.keys())),
id1=pdf_id[0] if pdf_id is not None else None,
id2=pdf_id[1] if pdf_id is not None and len(pdf_id) >= 2 else None,
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
creation_time=datetime.fromtimestamp(pdf_stat.st_ctime),
modification_time=datetime.fromtimestamp(pdf_stat.st_mtime),
access_time=datetime.fromtimestamp(pdf_stat.st_atime),
images=[
len(image.data)
for page in reader.pages
for image in page.images
],
)
if info is not None:
meta.author = info.author
meta.creation_date = info.creation_date
meta.creator = info.creator
# Pending https://github.com/py-pdf/pypdf/pull/2939 to be able to access .keywords:
meta.keywords = info.get("/Keywords")
meta.producer = info.producer
meta.subject = info.subject
meta.title = info.title
if output == OutputOptions.json:
print(meta.json())
else:
from rich.console import Console
from rich.table import Table
table = Table(title="PDF Data")
table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
table.add_column("Value", style="white")
if meta.title:
table.add_row("Title", meta.title)
if meta.author:
table.add_row("Author", meta.author)
if meta.creation_date:
table.add_row("CreationDate", str(meta.creation_date))
if meta.creator:
table.add_row("Creator", meta.creator)
if meta.producer:
table.add_row("Producer", meta.producer)
if meta.subject:
table.add_row("Subject", meta.subject)
if meta.keywords:
table.add_row("Keywords", meta.keywords)
table.add_row("Pages", f"{meta.pages:,}" if meta.pages else "unknown")
table.add_row("Encrypted", f"{meta.encryption}")
table.add_row("PDF File Version", meta.pdf_file_version)
table.add_row("Page Layout", meta.page_layout)
table.add_row("Page Mode", meta.page_mode)
table.add_row("PDF ID", f"ID1={meta.id1!r} ID2={meta.id2!r}")
embedded_fonts: set[str] = set()
unemedded_fonts: set[str] = set()
if not reader.is_encrypted:
for page in reader.pages:
emb, unemb = page._get_fonts()
embedded_fonts = embedded_fonts.union(set(emb))
unemedded_fonts = unemedded_fonts.union(set(unemb))
table.add_row(
"Fonts (unembedded)", ", ".join(sorted(unemedded_fonts))
)
table.add_row(
"Fonts (embedded)", ", ".join(sorted(embedded_fonts))
)
table.add_row("Attachments", meta.attachments)
table.add_row(
"Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)"
)
enc_table = Table(title="Encryption information")
enc_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
enc_table.add_column("Value", style="white")
if meta.encryption:
enc_table.add_row(
"Security Handler Revision Number",
str(meta.encryption.revision),
)
enc_table.add_row("V value", str(meta.encryption.v_value))
os_table = Table(title="Operating System Data")
os_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
os_table.add_column("Value", style="white")
os_table.add_row("File Name", f"{pdf}")
os_table.add_row("File Permissions", f"{meta.file_permissions}")
os_table.add_row("File Size", f"{meta.file_size:,} bytes")
os_table.add_row(
"Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}"
)
os_table.add_row(
"Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}"
)
os_table.add_row(
"Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}"
)
console = Console()
console.print(os_table)
console.print(table)
if meta.encryption:
console.print(enc_table)
console.print(
"Use the 'pagemeta' subcommand to get details about a single page"
)
================================================
FILE: pdfly/pagemeta.py
================================================
"""Give details about a single page."""
from pathlib import Path
from pydantic import BaseModel
from pypdf import PdfReader
from rich.console import Console
from rich.markdown import Markdown
from rich.table import Table
from ._utils import OutputOptions
KNOWN_PAGE_FORMATS = {
(841.89, 1190.55): "A3", # 297mm x 420mm
(595.28, 841.89): "A4", # 210mm x 297mm
(420.94, 595.28): "A5", # 148mm x 210mm
(297.66, 420.94): "A6", # 105mm x 148mm
(612, 792): "Letter",
(612, 1008): "Legal",
}
class PageMeta(BaseModel):
mediabox: tuple[float, float, float, float]
cropbox: tuple[float, float, float, float]
artbox: tuple[float, float, float, float]
bleedbox: tuple[float, float, float, float]
annotations: int
rotation: int
def main(pdf: Path, page_index: int, output: OutputOptions) -> None:
reader = PdfReader(pdf)
page = reader.pages[page_index]
meta = PageMeta(
mediabox=page.mediabox,
cropbox=page.cropbox,
artbox=page.artbox,
bleedbox=page.bleedbox,
annotations=len(page.annotations) if page.annotations else 0,
rotation=page.rotation,
)
if output == OutputOptions.json:
print(meta.json())
else:
console = Console()
table = Table(title=f"{pdf}, page index {page_index}")
table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
table.add_column("Value", style="white")
def add_box_attr(
name: str, box: tuple[float, float, float, float]
) -> None:
width = box[2] - box[0]
height = box[3] - box[1]
known_format = find_known_format(width, height)
extra = f" ({known_format})" if known_format else ""
table.add_row(
name,
f"({box[0]:.2f}, {box[1]:.2f}, {box[2]:.2f}, {box[3]:.2f}):"
f" {width=:.2f} x {height=:.2f}{extra}",
)
add_box_attr("mediabox", meta.mediabox)
add_box_attr("cropbox", meta.cropbox)
add_box_attr("artbox", meta.artbox)
add_box_attr("bleedbox", meta.bleedbox)
if meta.annotations:
table.add_row("annotations", str(meta.annotations))
if meta.rotation:
table.add_row("rotation", str(meta.rotation))
console.print(table)
if page.annotations:
console.print(Markdown("**All annotations:**"))
for i, annot in enumerate(page.annotations, start=1):
obj = annot.get_object()
console.print(f"{i}. {obj['/Subtype']} at {obj['/Rect']}")
def find_known_format(width: float, height: float) -> str:
known_format = KNOWN_PAGE_FORMATS.get((width, height))
if known_format:
return known_format
for (w, h), name in KNOWN_PAGE_FORMATS.items():
if ((w - width) * (w - width) + (h - height) * (h - height)) < 4:
return f"close to format: {name}"
return ""
================================================
FILE: pdfly/rm.py
================================================
"""
Remove pages from PDF files.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly rm -o output.pdf document.pdf 2:5
Remove pages 2 to 4 from document.pdf, producing output.pdf.
pdfly rm document.pdf -- -1
Removes the last page from document.pdf, modifying the original file.
pdfly rm document.pdf :-1
Removes all pages except the last one from document.pdf, modifying the original file.
pdfly rm report.pdf :6 7:
Remove all pages except page seven from report.pdf,
producing a single-page report.pdf.
"""
from pathlib import Path
from pdfly.cat import main as cat_main
def main(
filename: Path, fn_pgrgs: list[str], output: Path, verbose: bool
) -> None:
cat_main(filename, fn_pgrgs, output, verbose, inverted_page_selection=True)
================================================
FILE: pdfly/rotate.py
================================================
"""
Rotate specified pages by the specified amount
Example:
pdfly rotate --output output.pdf input.pdf 90
Rotate all pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 :3
Rotate first three pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 -- -1
Rotate last page by 90 degrees (clockwise)
A file not followed by a page range (PGRGS) means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
"""
from pathlib import Path
from pypdf import (
PageRange,
PdfReader,
PdfWriter,
)
from rich.console import Console
def main(
filename: Path,
output: Path,
degrees: int,
page_range: str,
) -> None:
try:
# set up the streams
reader = PdfReader(filename)
pages = list(reader.pages)
writer = PdfWriter()
# Convert the page range into a set of page numbers
pages_to_rotate = convert_range_to_pages(page_range, len(pages))
for page_index, page in enumerate(pages):
if page_index in pages_to_rotate:
page = page.rotate(degrees)
writer.add_page(page)
# Everything looks good! Write the output file.
with open(output, "wb") as output_fh:
writer.write(output_fh)
except Exception as error:
console = Console()
console.print(f"Error while rotating {filename}")
raise error
def convert_range_to_pages(page_range: str, num_pages: int) -> set[int]:
pages_to_rotate = {*range(*PageRange(page_range).indices(num_pages))}
return pages_to_rotate
================================================
FILE: pdfly/sign.py
================================================
"""
Creates a signed PDF from an existing PDF file.
Examples
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
pdfly sign document.pdf --p12 certs.p12 --in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
"""
import io
import tempfile
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
from typing import Union
import fpdf.sign
import typer
from cryptography.hazmat.primitives.serialization import pkcs12
from endesive import signer
from fpdf import FPDF, get_scale_factor
from pypdf import PageObject, PdfReader, PdfWriter
from pypdf.generic import DictionaryObject, PdfObject
def main(
filename: Path,
output: Path | None,
in_place: bool,
p12: Path,
p12_password: str | None,
) -> None:
validate_output_args_or_raise(output, in_place)
pdf_reader = PdfReader(filename)
pdf_is_unsigned_or_raise(pdf_reader)
output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper]
if output:
output_file = open(output, "wb")
else:
output_file = tempfile.NamedTemporaryFile(
delete=False
) # will be deleted by output.unlink() later on
output = Path(output_file.name)
try:
_sign_pdf_contents(pdf_reader, output_file, p12, p12_password)
finally:
output_file.close()
if in_place:
filename.write_bytes(output.read_bytes())
output.unlink()
def pdf_is_unsigned_or_raise(pdf_reader: PdfReader) -> None:
for page in pdf_reader.pages:
if page.annotations is None:
continue
if any(is_signature(annotation) for annotation in page.annotations):
raise typer.BadParameter("PDF is already signed.")
def is_signature(annotation: PdfObject) -> bool:
resolved_annotation_object = annotation.get_object()
if resolved_annotation_object is None:
return False
if type(resolved_annotation_object) is not DictionaryObject:
return False
subtype = resolved_annotation_object["/Subtype"]
if subtype != "/Widget":
return False
fieldtype = resolved_annotation_object["/FT"]
return fieldtype == "/Sig"
def _sign_pdf_contents(
pdf_reader: PdfReader,
output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper],
p12: Path,
p12_password: str | None,
) -> None:
unsigned_output_buffer = io.BytesIO()
with add_to_page(pdf_reader.pages[-1]) as pdf:
with p12.open("rb") as pkcs_file:
hashalgo = "sha256"
sign_time = pdf.creation_date
key, cert, extra_certs = pkcs12.load_key_and_certificates(
pkcs_file.read(),
(p12_password.encode() if p12_password is not None else None),
)
pdf.sign(
key=key,
cert=cert, # type: ignore
extra_certs=extra_certs,
hashalgo=hashalgo,
signing_time=sign_time,
)
# defer actual signing until after the input pdfs contents are merged
# _sign_key = None prevents FDPF.output() from calculating the signature hash too early
pdf._sign_key = None
writer = PdfWriter()
writer.append_pages_from_reader(pdf_reader)
writer.write(unsigned_output_buffer)
# Now that output_buffer contains the contents to be signed
# we can generate the cryptographic signature using fpdf2.sign.sign_content
# patch placeholder values to match how fpdf.sign.sign_content() expects them
content_to_sign = bytearray(unsigned_output_buffer.getbuffer())
content_to_sign = content_to_sign.replace(
_SIGNATURE_BYTERANGE_PLACEHOLDER.encode(),
fpdf.sign._SIGNATURE_BYTERANGE_PLACEHOLDER.encode(),
)
content_to_sign = content_to_sign.replace(
b"(" + _SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b")",
b"<" + fpdf.sign._SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b">",
)
signed_output_buffer = fpdf.sign.sign_content(
signer,
content_to_sign,
key,
cert, # type: ignore
extra_certs,
hashalgo,
sign_time,
)
output_file.write(signed_output_buffer)
@contextmanager
def add_to_page(reader_page: PageObject, unit: str = "mm") -> Generator[FPDF]:
k = get_scale_factor(unit)
format = (reader_page.mediabox[2] / k, reader_page.mediabox[3] / k)
pdf = FPDF(format=format, unit=unit)
pdf.add_page()
yield pdf
page_overlay = PdfReader(io.BytesIO(pdf.output())).pages[0]
reader_page.merge_page(page2=page_overlay)
def validate_output_args_or_raise(output: Path | None, in_place: bool) -> None:
if not in_place and output is None:
raise typer.BadParameter(
"One of the options --output or --in-place is required."
)
# fpdf.sign placeholder values - in the form after PdfWriter serialized them
_SIGNATURE_BYTERANGE_PLACEHOLDER = "[ 0 0 0 0 ]"
_SIGNATURE_CONTENTS_PLACEHOLDER = "\\000" * 0x2000
================================================
FILE: pdfly/uncompress.py
================================================
"""Module for uncompressing PDF content streams."""
import zlib
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import IndirectObject, PdfObject
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(pdf)
writer = PdfWriter()
for page in reader.pages:
if "/Contents" in page:
contents: PdfObject | None = page["/Contents"]
if isinstance(contents, IndirectObject):
contents = contents.get_object()
if contents is not None:
if isinstance(contents, list):
for content in contents:
if isinstance(content, IndirectObject):
decompress_content_stream(content)
elif isinstance(contents, IndirectObject):
decompress_content_stream(contents)
writer.add_page(page)
with open(output, "wb") as fp:
writer.write(fp)
orig_size = pdf.stat().st_size
uncomp_size = output.stat().st_size
print(f"Original Size : {orig_size:,}")
print(
f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)"
)
def decompress_content_stream(content: IndirectObject) -> None:
"""Decompress a content stream if it uses FlateDecode."""
if content.get("/Filter") == "/FlateDecode":
try:
compressed_data = content.get_data()
uncompressed_data = zlib.decompress(compressed_data)
content.set_data(uncompressed_data)
del content["/Filter"]
except zlib.error as error:
print(
f"Some content stream with /FlateDecode failed to be decompressed: {error}"
)
================================================
FILE: pdfly/up2.py
================================================
"""
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
"""
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import FloatObject
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(str(pdf))
writer = PdfWriter()
for i in range(0, len(reader.pages), 2):
lhs = reader.pages[i]
if i + 1 < len(reader.pages):
rhs = reader.pages[i + 1]
lhs.merge_translated_page(
rhs, tx=float(lhs.mediabox.width), ty=0, expand=True
)
else:
# Double the MediaBox width:
lhs.mediabox[2] = FloatObject(2 * lhs.mediabox[2])
# Double the CropBox width:
lhs.cropbox[2] = FloatObject(2 * lhs.cropbox[2])
writer.add_page(lhs)
with open(output, "wb") as fp:
writer.write(fp)
print(f"{output} was created")
================================================
FILE: pdfly/update_offsets.py
================================================
"""
Updates offsets and lengths in a simple PDF file.
The PDF specification requires that the xref section at the end
of a PDF file has the correct offsets of the PDF's objects.
It further requires that the dictionary of a stream object
contains a /Length-entry giving the length of the encoded stream.
When editing a PDF file using a text-editor (e.g. vim) it is
elaborate to compute or adjust these offsets and lengths.
This command tries to compute /Length-entries of the stream dictionaries
and the offsets in the xref-section automatically.
It expects that the PDF file has ASCII encoding only. It may
use ISO-8859-1 or UTF-8 in its comments.
The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in binary data.
It expects that there is one xref-section only.
It expects that the /Length-entries have default values containing
enough digits, e.g. /Length 000 when the stream consists of 576 bytes.
Example:
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf
"""
import re
from pathlib import Path
from rich.console import Console
# Here, only simple regular expressions are used.
# Beyond a certain level of complexity, switching to a proper PDF dictionary parser would be better.
RE_OBJ = re.compile(r"^([0-9]+) ([0-9]+) obj *")
RE_CONTENT = re.compile(r"^([^\r\n]*)", re.DOTALL)
RE_LENGTH_REF = re.compile(r"^(.*/Length )([0-9]+) ([0-9]+) R(.*)", re.DOTALL)
RE_LENGTH = re.compile(
r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL
)
def update_lines(
lines_in: list[str], encoding: str, console: Console, verbose: bool
) -> list[str]:
"""
Iterates over the lines of a pdf-files and updates offsets.
The input is expected to be a pdf without binary-sections.
:param lines_in: A list over the lines including line-breaks.
:param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8".
:param console: Console used to print messages.
:param verbose: True to activate logging of info-messages.
:return The output is a list of lines to be written
in the given encoding.
"""
lines_out = [] # lines to be written
map_line_offset = {} # map from line-number to offset
map_obj_offset = {} # map from object-number to offset
map_obj_line = {} # map from object-number to line-number
line_no = 0 # current line-number (starting at 0)
offset_out = 0 # current offset in output-file
line_xref = None # line-number of xref-line (in xref-section only)
line_startxref = None # line-number of startxref-line
curr_obj = None # number of current object
len_stream = None # length of stream (in stream only)
offset_xref = None # offset of xref-section
map_stream_len = {} # map from object-number to /Length of stream
map_obj_length_line = {} # map from object-number to /Length-line
map_obj_length_ref = (
{}
) # map from object-number to /Length-reference (e.g. "3")
map_obj_length_line_no = {} # map from object-number to line_no of length
# of /Length-line
for idx, line in enumerate(lines_in):
line_no = idx + 1
m_content = RE_CONTENT.match(line)
if m_content is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no} without line-break."
)
content = m_content.group(1)
map_line_offset[line_no] = offset_out
m_obj = RE_OBJ.match(line)
if m_obj is not None:
curr_obj = m_obj.group(1)
curr_gen = m_obj.group(2)
if verbose:
console.print(f"line {line_no}: object {curr_obj}")
if curr_gen != "0":
raise RuntimeError(
f"Invalid PDF file: generation {curr_gen} of object {curr_obj} in line {line_no} is not supported."
)
map_obj_offset[curr_obj] = int(offset_out)
map_obj_line[curr_obj] = line_no
len_stream = None
if content == "xref":
offset_xref = offset_out
line_xref = line_no
elif content == "startxref":
line_startxref = line_no
line_xref = None
elif content == "stream":
if verbose:
console.print(f"line {line_no}: start stream")
len_stream = 0
elif content == "endstream":
if verbose:
console.print(f"line {line_no}: end stream")
if curr_obj is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no}: endstream without object-start."
)
if len_stream is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no}: endstream without stream."
)
if len_stream > 0:
# Ignore the last EOL
len_stream = (
len_stream - 2
if lines_in[idx - 1][-2:] == "\r\n"
else len_stream - 1
)
if verbose:
console.print(
f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}"
)
map_stream_len[curr_obj] = len_stream
elif content == "endobj":
curr_obj = None
elif curr_obj is not None and len_stream is None:
m_length_ref = RE_LENGTH_REF.match(line)
if m_length_ref is not None:
len_obj = m_length_ref.group(2)
len_obj_gen = m_length_ref.group(3)
if verbose:
console.print(
f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}"
)
map_obj_length_ref[curr_obj] = len_obj
else:
m_length = RE_LENGTH.match(line)
if m_length is not None:
if verbose:
console.print(f"line {line_no}, /Length: {content}")
map_obj_length_line[curr_obj] = line
map_obj_length_line_no[curr_obj] = line_no
elif curr_obj is not None and len_stream is not None:
len_stream += len(line.encode(encoding))
elif line_xref is not None and line_no > line_xref + 2:
object_number = line_no - line_xref - 2
if (
object_number <= len(map_obj_offset)
and str(object_number) in map_obj_offset
):
eol = line[-2:]
xref_updated = (
"%010d" % map_obj_offset[str(object_number)]
) + " 00000 n"
if verbose:
console.print(f"{content} -> {xref_updated}")
line = xref_updated + eol
elif line_startxref is not None and line_no == line_startxref + 1:
if offset_xref is None:
raise NotImplementedError(
"Unsupported file: startxref without preceding xref-section (probable cross-reference stream)"
)
line = "%d\n" % offset_xref
lines_out.append(line)
offset_out += len(line.encode(encoding))
# Some checks
if len(map_obj_offset) == 0:
raise RuntimeError(
"Invalid PDF file: the command didn't find any PDF objects."
)
if offset_xref is None:
raise RuntimeError(
"Invalid PDF file: the command didn't find a xref-section"
)
if line_startxref is None:
raise RuntimeError(
"Invalid PDF file: the command didn't find a startxref-section"
)
for curr_obj, stream_len in map_stream_len.items():
if curr_obj in map_obj_length_line:
line = map_obj_length_line[curr_obj]
m_length = RE_LENGTH.match(line)
if m_length is None:
raise RuntimeError(
f"Invalid PDF file: line '{line}' does not contain a valid /Length."
)
prev_length = m_length.group(2)
len_digits = len(prev_length)
len_format = "%%0%dd" % len_digits
updated_length = len_format % stream_len
if len(updated_length) > len_digits:
raise RuntimeError(
f"Not enough digits in /Length-entry {prev_length}"
f" of object {curr_obj}:"
f" too short to take /Length {updated_length}"
)
line = m_length.group(1) + updated_length + m_length.group(3)
lines_out[map_obj_length_line_no[curr_obj] - 1] = line
elif curr_obj in map_obj_length_ref:
len_obj = map_obj_length_ref[curr_obj]
if len_obj not in map_obj_line:
raise RuntimeError(
f"obj {curr_obj} has unknown length-obj {len_obj}"
)
len_obj_line = map_obj_line[len_obj]
prev_length = lines_out[len_obj_line][:-1]
len_digits = len(prev_length)
len_format = "%%0%dd" % len_digits
updated_length = len_format % stream_len
if len(updated_length) > len_digits:
raise RuntimeError(
f"Not enough digits in /Length-ref-entry {prev_length}"
f" of object {curr_obj} and len-object {len_obj}:"
f" too short to take /Length {updated_length}"
)
if prev_length != updated_length:
if verbose:
console.print(
f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}"
)
lines_out[len_obj_line] = updated_length + "\n"
else:
raise RuntimeError(
f"obj {curr_obj} with stream-len {stream_len} has no object-length-line: {map_obj_length_line}"
)
return lines_out
def read_binary_file(file_path: Path, encoding: str) -> list[str]:
"""
Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
Encoding utf-8 can't be used to read random binary data.
:param file_path: file to be read line by line
:param encoding: encoding to be used (e.g. "iso-8859-1")
:return lines including line-breaks
"""
chunks: list[str] = []
with file_path.open("rb") as file:
buffer = bytearray()
while True:
chunk = file.read(4096) # Read in chunks of 4096 bytes
if not chunk:
break # End of file
buffer += chunk
# Split buffer into chunks based on LF, CR, or CRLF
while True:
match = re.search(b"(\x0d\x0a|\x0a|\x0d)", buffer)
if not match:
break # No more line breaks found, process the remaining buffer
end = match.end()
chunk_str = buffer[:end].decode(encoding, errors="strict")
buffer = buffer[end:]
chunks.append(chunk_str)
# Handle the last chunk
if buffer:
chunks.append(buffer.decode(encoding, errors="strict"))
return chunks
def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None:
if not file_out:
file_out = file_in
console = Console()
console.print(f"Read {file_in}")
lines_in = read_binary_file(file_in, encoding)
lines_out = update_lines(lines_in, encoding, console, verbose)
with open(file_out, "wb") as f:
f.writelines(line.encode(encoding) for line in lines_out)
console.print(f"Wrote {file_out}", soft_wrap=True)
================================================
FILE: pdfly/x2pdf.py
================================================
"""Convert one or more files to PDF. Each file is a page."""
from io import BytesIO
from pathlib import Path
from fpdf import FPDF
from PIL import Image
from pypdf import PdfReader, PdfWriter
from rich.console import Console
def px_to_mm(px: float) -> float:
px_in_inch = 72
mm_in_inch = 25.4
inch = px / px_in_inch
mm = inch * mm_in_inch
return mm
def image_to_pdf(filepath: Path) -> BytesIO:
with Image.open(filepath) as cover:
w, h = cover.size
width, height = px_to_mm(w), px_to_mm(h)
pdf = FPDF(unit="mm")
pdf.add_page(format=(width, height)) # type: ignore
pdf.image(filepath, x=0, y=0)
return BytesIO(pdf.output())
def main(in_filepaths: list[Path], out_filepath: Path) -> int:
console = Console()
exit_code = 0
writer = PdfWriter()
for filepath in in_filepaths:
if filepath.name.endswith(".pdf"):
for page in PdfReader(filepath).pages:
writer.insert_page(page)
continue
try:
pdf_bytes = image_to_pdf(filepath)
new_page = PdfReader(pdf_bytes).pages[0]
writer.insert_page(new_page)
except Exception:
console.print(
f"[red]Error: Could not convert '{filepath}' to a PDF."
)
console.print_exception(extra_lines=1, max_frames=1)
exit_code += 1
writer.write(out_filepath)
return exit_code
================================================
FILE: pylock.toml
================================================
lock-version = "1.0"
created-by = "pip"
[[packages]]
name = "alabaster"
version = "1.0.0"
[[packages.wheels]]
name = "alabaster-1.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"
[[packages]]
name = "annotated-doc"
version = "0.0.4"
[[packages.wheels]]
name = "annotated_doc-0.0.4-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"
[[packages]]
name = "annotated-types"
version = "0.7.0"
[[packages.wheels]]
name = "annotated_types-0.7.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"
[[packages]]
name = "anyio"
version = "4.12.1"
[[packages.wheels]]
name = "anyio-4.12.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"
[[packages]]
name = "asn1crypto"
version = "1.5.1"
[[packages.wheels]]
name = "asn1crypto-1.5.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"
[[packages]]
name = "attrs"
version = "25.4.0"
[[packages.wheels]]
name = "attrs-25.4.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"
[[packages]]
name = "babel"
version = "2.18.0"
[[packages.wheels]]
name = "babel-2.18.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35"
[[packages]]
name = "bcrypt"
version = "5.0.0"
[[packages.wheels]]
name = "bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl"
url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl"
[packages.wheels.hashes]
sha256 = "611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a"
[[packages]]
name = "black"
version = "26.3.1"
[[packages.wheels]]
name = "black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/7f/0a/8d17d1a9c06f88d3d030d0b1d4373c1551146e252afe4547ed601c0e697f/black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac"
[[packages]]
name = "certifi"
version = "2026.2.25"
[[packages.wheels]]
name = "certifi-2026.2.25-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"
[[packages]]
name = "cffi"
version = "2.0.0"
[[packages.wheels]]
name = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
[packages.wheels.hashes]
sha256 = "fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453"
[[packages]]
name = "cfgv"
version = "3.5.0"
[[packages.wheels]]
name = "cfgv-3.5.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0"
[[packages]]
name = "charset-normalizer"
version = "3.4.6"
[[packages.wheels]]
name = "charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89"
[[packages]]
name = "check-wheel-contents"
version = "0.6.3"
[[packages.wheels]]
name = "check_wheel_contents-0.6.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/be/05/f39fde9f31ef80b285ef5822fad4ddabf73fec62a1f02c5beb4b2f328972/check_wheel_contents-0.6.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "5ae39c8c434b972f0740d04610759168590713175aab584b012b1b84f6771874"
[[packages]]
name = "click"
version = "8.3.1"
[[packages.wheels]]
name = "click-8.3.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"
[[packages]]
name = "colorama"
version = "0.4.6"
[[packages.wheels]]
name = "colorama-0.4.6-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"
[[packages]]
name = "coverage"
version = "7.13.4"
[[packages.wheels]]
name = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
[packages.wheels.hashes]
sha256 = "8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f"
[[packages]]
name = "cryptography"
version = "46.0.5"
[[packages.wheels]]
name = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl"
url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl"
[packages.wheels.hashes]
sha256 = "a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"
[[packages]]
name = "defusedxml"
version = "0.7.1"
[[packages.wheels]]
name = "defusedxml-0.7.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"
[[packages]]
name = "distlib"
version = "0.4.0"
[[packages.wheels]]
name = "distlib-0.4.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"
[[packages]]
name = "docutils"
version = "0.21.2"
[[packages.wheels]]
name = "docutils-0.21.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"
[[packages]]
name = "endesive"
version = "2.19.3"
[[packages.wheels]]
name = "endesive-2.19.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/a0/c3/a0dcae019de40816352462371c473b22639cd8e68f33a5f23f07faf330fd/endesive-2.19.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e5e09c1011b1977fbb9d563d672de7f17f5638304ce57a35bf7d00f3b7a3972e"
[[packages]]
name = "exceptiongroup"
version = "1.3.1"
[[packages.wheels]]
name = "exceptiongroup-1.3.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"
[[packages]]
name = "filelock"
version = "3.25.2"
[[packages.wheels]]
name = "filelock-3.25.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70"
[[packages]]
name = "flake8"
version = "7.3.0"
[[packages.wheels]]
name = "flake8-7.3.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9f/56/13ab06b4f93ca7cac71078fbe37fcea175d3216f31f85c3168a6bbd0bb9a/flake8-7.3.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"
[[packages]]
name = "flake8-bugbear"
version = "25.11.29"
[[packages.wheels]]
name = "flake8_bugbear-25.11.29-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0d/42/c18f199780d99a6f6a64c4a36f4ad28a445d9e11968a6025b21d0c8b6802/flake8_bugbear-25.11.29-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "9bf15e2970e736d2340da4c0a70493db964061c9c38f708cfe1f7b2d87392298"
[[packages]]
name = "flake8-comprehensions"
version = "3.17.0"
[[packages.wheels]]
name = "flake8_comprehensions-3.17.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/39/bd/d6739d685fdd79349aa51c37bdedc0d8eab6ae9c6e6ed2ca935b3f88210d/flake8_comprehensions-3.17.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "3943a9c6f2593c3bc5cc64106c2f89d63c6ecd49c8343597f8257b8fcfc8b0a2"
[[packages]]
name = "flake8-isort"
version = "7.0.0"
[[packages.wheels]]
name = "flake8_isort-7.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/17/7d/907ef4135f6ede5187930d9ddd1f36564e07c6cdcd15ae8fb9849c9517e0/flake8_isort-7.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "c301a0e55fc77582348e636194b84b1a0baf0dfdaa6eddf3b0eeea75f8be7f36"
[[packages]]
name = "flake8-simplify"
version = "0.30.0"
[[packages.wheels]]
name = "flake8_simplify-0.30.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9b/d5/18a89f40c1a145a44d1fad825553be8131bcb727f5f2783d3727a2f4b2d0/flake8_simplify-0.30.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "c9f54a50d24780832a3f2bb7a687ef465b91f10d7cb4ea0845dff4b65d9c91f4"
[[packages]]
name = "flit"
version = "3.12.0"
[[packages.wheels]]
name = "flit-3.12.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/f5/82/ce1d3bb380b227e26e517655d1de7b32a72aad61fa21ff9bd91a2e2db6ee/flit-3.12.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2b4e7171dc22881fa6adc2dbf083e5ecc72520be3cd7587d2a803da94d6ef431"
[[packages]]
name = "flit-core"
version = "3.12.0"
[[packages.wheels]]
name = "flit_core-3.12.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/f2/65/b6ba90634c984a4fcc02c7e3afe523fef500c4980fec67cc27536ee50acf/flit_core-3.12.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e7a0304069ea895172e3c7bb703292e992c5d1555dd1233ab7b5621b5b69e62c"
[[packages]]
name = "fonttools"
version = "4.62.1"
[[packages.wheels]]
name = "fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
url = "https://files.pythonhosted.org/packages/42/09/7dbe3d7023f57d9b580cfa832109d521988112fd59dddfda3fddda8218f9/fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
[packages.wheels.hashes]
sha256 = "7bca7a1c1faf235ffe25d4f2e555246b4750220b38de8261d94ebc5ce8a23c23"
[[packages]]
name = "fpdf2"
version = "2.8.7"
[[packages.wheels]]
name = "fpdf2-2.8.7-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/66/0a/cf50ecffa1e3747ed9380a3adfc829259f1f86b3fdbd9e505af789003141/fpdf2-2.8.7-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "d391fc508a3ce02fc43a577c830cda4fe6f37646f2d143d489839940932fbc19"
[[packages]]
name = "h11"
version = "0.16.0"
[[packages.wheels]]
name = "h11-0.16.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"
[[packages]]
name = "identify"
version = "2.6.18"
[[packages.wheels]]
name = "identify-2.6.18-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737"
[[packages]]
name = "idna"
version = "3.11"
[[packages.wheels]]
name = "idna-3.11-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"
[[packages]]
name = "imagesize"
version = "2.0.0"
[[packages.wheels]]
name = "imagesize-2.0.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96"
[[packages]]
name = "iniconfig"
version = "2.3.0"
[[packages.wheels]]
name = "iniconfig-2.3.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"
[[packages]]
name = "invoke"
version = "2.2.1"
[[packages.wheels]]
name = "invoke-2.2.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8"
[[packages]]
name = "isort"
version = "8.0.1"
[[packages.wheels]]
name = "isort-8.0.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/3e/95/c7c34aa53c16353c56d0b802fba48d5f5caa2cdee7958acbcb795c830416/isort-8.0.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "28b89bc70f751b559aeca209e6120393d43fbe2490de0559662be7a9787e3d75"
[[packages]]
name = "jinja2"
version = "3.1.6"
[[packages.wheels]]
name = "jinja2-3.1.6-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
[[packages]]
name = "librt"
version = "0.8.1"
[[packages.wheels]]
name = "librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/01/99/f85130582f05dcf0c8902f3d629270231d2f4afdfc567f8305a952ac7f14/librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "97c2b54ff6717a7a563b72627990bec60d8029df17df423f0ed37d56a17a176b"
[[packages]]
name = "lxml"
version = "6.0.2"
[[packages.wheels]]
name = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/20/cf/cab09478699b003857ed6ebfe95e9fb9fa3d3c25f1353b905c9b73cfb624/lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"
[[packages]]
name = "markdown-it-py"
version = "3.0.0"
[[packages.wheels]]
name = "markdown_it_py-3.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"
[[packages]]
name = "markupsafe"
version = "3.0.3"
[[packages.wheels]]
name = "markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591"
[[packages]]
name = "mccabe"
version = "0.7.0"
[[packages.wheels]]
name = "mccabe-0.7.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"
[[packages]]
name = "mdit-py-plugins"
version = "0.5.0"
[[packages.wheels]]
name = "mdit_py_plugins-0.5.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f"
[[packages]]
name = "mdurl"
version = "0.1.2"
[[packages.wheels]]
name = "mdurl-0.1.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"
[[packages]]
name = "mypy"
version = "1.19.1"
[[packages.wheels]]
name = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/2a/0d/93c2e4a287f74ef11a66fb6d49c7a9f05e47b0a4399040e6719b57f500d2/mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"
[[packages]]
name = "mypy-extensions"
version = "1.1.0"
[[packages.wheels]]
name = "mypy_extensions-1.1.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"
[[packages]]
name = "myst-parser"
version = "4.0.1"
[[packages.wheels]]
name = "myst_parser-4.0.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d"
[[packages]]
name = "nodeenv"
version = "1.10.0"
[[packages.wheels]]
name = "nodeenv-1.10.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827"
[[packages]]
name = "packaging"
version = "26.0"
[[packages.wheels]]
name = "packaging-26.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529"
[[packages]]
name = "paramiko"
version = "4.0.0"
[[packages.wheels]]
name = "paramiko-4.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9"
[[packages]]
name = "pathspec"
version = "1.0.4"
[[packages.wheels]]
name = "pathspec-1.0.4-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/ef/3c/2c197d226f9ea224a9ab8d197933f9da0ae0aac5b6e0f884e2b8d9c8e9f7/pathspec-1.0.4-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723"
[[packages]]
name = "pdfly"
[packages.directory]
path = "."
[[packages]]
name = "pillow"
version = "12.1.1"
[[packages.wheels]]
name = "pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/0c/7b/f9b09a7804ec7336effb96c26d37c29d27225783dc1501b7d62dcef6ae25/pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4"
[[packages]]
name = "pip"
version = "26.0.1"
[[packages.wheels]]
name = "pip-26.0.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/de/f0/c81e05b613866b76d2d1066490adf1a3dbc4ee9d9c839961c3fc8a6997af/pip-26.0.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "bdb1b08f4274833d62c1aa29e20907365a2ceb950410df15fc9521bad440122b"
[[packages]]
name = "platformdirs"
version = "4.9.4"
[[packages.wheels]]
name = "platformdirs-4.9.4-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/63/d7/97f7e3a6abb67d8080dd406fd4df842c2be0efaf712d1c899c32a075027c/platformdirs-4.9.4-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "68a9a4619a666ea6439f2ff250c12a853cd1cbd5158d258bd824a7df6be2f868"
[[packages]]
name = "pluggy"
version = "1.6.0"
[[packages.wheels]]
name = "pluggy-1.6.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"
[[packages]]
name = "pre-commit"
version = "4.5.1"
[[packages.wheels]]
name = "pre_commit-4.5.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5d/19/fd3ef348460c80af7bb4669ea7926651d1f95c23ff2df18b9d24bab4f3fa/pre_commit-4.5.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "3b3afd891e97337708c1674210f8eba659b52a38ea5f822ff142d10786221f77"
[[packages]]
name = "pycodestyle"
version = "2.14.0"
[[packages.wheels]]
name = "pycodestyle-2.14.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/d7/27/a58ddaf8c588a3ef080db9d0b7e0b97215cee3a45df74f3a94dbbf5c893a/pycodestyle-2.14.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "dd6bf7cb4ee77f8e016f9c8e74a35ddd9f67e1d5fd4184d86c3b98e07099f42d"
[[packages]]
name = "pycparser"
version = "3.0"
[[packages.wheels]]
name = "pycparser-3.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992"
[[packages]]
name = "pydantic"
version = "2.12.5"
[[packages.wheels]]
name = "pydantic-2.12.5-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"
[[packages]]
name = "pydantic-core"
version = "2.41.5"
[[packages.wheels]]
name = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
[packages.wheels.hashes]
sha256 = "100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a"
[[packages]]
name = "pyflakes"
version = "3.4.0"
[[packages.wheels]]
name = "pyflakes-3.4.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c2/2f/81d580a0fb83baeb066698975cb14a618bdbed7720678566f1b046a95fe8/pyflakes-3.4.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "f742a7dbd0d9cb9ea41e9a24a918996e8170c799fa528688d40dd582c8265f4f"
[[packages]]
name = "pygments"
version = "2.19.2"
[[packages.wheels]]
name = "pygments-2.19.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"
[[packages]]
name = "pykcs11"
version = "1.5.18"
[packages.sdist]
name = "pykcs11-1.5.18.tar.gz"
url = "https://files.pythonhosted.org/packages/22/07/0c2215cb6ef70c213892571eb015e670f4d6adbecedc5eb2369f82c1c7f2/pykcs11-1.5.18.tar.gz"
[packages.sdist.hashes]
sha256 = "12fd878b369821d80c1be8a140c85e8a0fb1358fcaaba66ca66869213692f227"
[[packages]]
name = "pynacl"
version = "1.6.2"
[[packages.wheels]]
name = "pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl"
url = "https://files.pythonhosted.org/packages/c9/a8/b917096b1accc9acd878819a49d3d84875731a41eb665f6ebc826b1af99e/pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl"
[packages.wheels.hashes]
sha256 = "c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6"
[[packages]]
name = "pypdf"
version = "6.9.0"
[[packages.wheels]]
name = "pypdf-6.9.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/00/64/ac6159cfbeabab3cf54873bbf7314b29183c7ff547c9776596d63170d7c0/pypdf-6.9.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "85805ad7457ca878c4cfd1bc026c4b3dcae359b4a80f889fa7e8c5a1c1a83e51"
[[packages]]
name = "pytest"
version = "9.0.2"
[[packages.wheels]]
name = "pytest-9.0.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b"
[[packages]]
name = "pytest-cov"
version = "7.0.0"
[[packages.wheels]]
name = "pytest_cov-7.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861"
[[packages]]
name = "pytest-socket"
version = "0.7.0"
[[packages.wheels]]
name = "pytest_socket-0.7.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45"
[[packages]]
name = "pytest-timeout"
version = "2.4.0"
[[packages.wheels]]
name = "pytest_timeout-2.4.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2"
[[packages]]
name = "python-discovery"
version = "1.1.3"
[[packages.wheels]]
name = "python_discovery-1.1.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/e7/80/73211fc5bfbfc562369b4aa61dc1e4bf07dc7b34df7b317e4539316b809c/python_discovery-1.1.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "90e795f0121bc84572e737c9aa9966311b9fde44ffb88a5953b3ec9b31c6945e"
[[packages]]
name = "pytokens"
version = "0.4.1"
[[packages.wheels]]
name = "pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/69/66/f6fb1007a4c3d8b682d5d65b7c1fb33257587a5f782647091e3408abe0b8/pytokens-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c"
[[packages]]
name = "pyyaml"
version = "6.0.3"
[[packages.wheels]]
name = "pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b"
[[packages]]
name = "requests"
version = "2.32.5"
[[packages.wheels]]
name = "requests-2.32.5-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"
[[packages]]
name = "rich"
version = "14.3.3"
[[packages.wheels]]
name = "rich-14.3.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d"
[[packages]]
name = "ruff"
version = "0.15.6"
[[packages.wheels]]
name = "ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
url = "https://files.pythonhosted.org/packages/f1/9f/f85ef5fd01a52e0b472b26dc1b4bd228b8f6f0435975442ffa4741278703/ruff-0.15.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
[packages.wheels.hashes]
sha256 = "98893c4c0aadc8e448cfa315bd0cc343a5323d740fe5f28ef8a3f9e21b381f7e"
[[packages]]
name = "shellingham"
version = "1.5.4"
[[packages.wheels]]
name = "shellingham-1.5.4-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"
[[packages]]
name = "snowballstemmer"
version = "3.0.1"
[[packages.wheels]]
name = "snowballstemmer-3.0.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064"
[[packages]]
name = "sphinx"
version = "8.1.3"
[[packages.wheels]]
name = "sphinx-8.1.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/26/60/1ddff83a56d33aaf6f10ec8ce84b4c007d9368b21008876fceda7e7381ef/sphinx-8.1.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "09719015511837b76bf6e03e42eb7595ac8c2e41eeb9c29c5b755c6b677992a2"
[[packages]]
name = "sphinx-autobuild"
version = "2024.10.3"
[[packages.wheels]]
name = "sphinx_autobuild-2024.10.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa"
[[packages]]
name = "sphinx-rtd-theme"
version = "3.1.0"
[[packages.wheels]]
name = "sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/87/c7/b5c8015d823bfda1a346adb2c634a2101d50bb75d421eb6dcb31acd25ebc/sphinx_rtd_theme-3.1.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1785824ae8e6632060490f67cf3a72d404a85d2d9fc26bce3619944de5682b89"
[[packages]]
name = "sphinxcontrib-applehelp"
version = "2.0.0"
[[packages.wheels]]
name = "sphinxcontrib_applehelp-2.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5"
[[packages]]
name = "sphinxcontrib-devhelp"
version = "2.0.0"
[[packages.wheels]]
name = "sphinxcontrib_devhelp-2.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2"
[[packages]]
name = "sphinxcontrib-htmlhelp"
version = "2.1.0"
[[packages.wheels]]
name = "sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8"
[[packages]]
name = "sphinxcontrib-jquery"
version = "4.1"
[[packages.wheels]]
name = "sphinxcontrib_jquery-4.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/76/85/749bd22d1a68db7291c89e2ebca53f4306c3f205853cf31e9de279034c3c/sphinxcontrib_jquery-4.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "f936030d7d0147dd026a4f2b5a57343d233f1fc7b363f68b3d4f1cb0993878ae"
[[packages]]
name = "sphinxcontrib-jsmath"
version = "1.0.1"
[[packages.wheels]]
name = "sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178"
[[packages]]
name = "sphinxcontrib-qthelp"
version = "2.0.0"
[[packages.wheels]]
name = "sphinxcontrib_qthelp-2.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb"
[[packages]]
name = "sphinxcontrib-serializinghtml"
version = "2.0.0"
[[packages.wheels]]
name = "sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331"
[[packages]]
name = "starlette"
version = "0.52.1"
[[packages.wheels]]
name = "starlette-0.52.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74"
[[packages]]
name = "tomli"
version = "2.4.0"
[[packages.wheels]]
name = "tomli-2.4.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/23/d1/136eb2cb77520a31e1f64cbae9d33ec6df0d78bdf4160398e86eec8a8754/tomli-2.4.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"
[[packages]]
name = "tomli-w"
version = "1.2.0"
[[packages.wheels]]
name = "tomli_w-1.2.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90"
[[packages]]
name = "typer"
version = "0.24.1"
[[packages.wheels]]
name = "typer-0.24.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e"
[[packages]]
name = "typing-extensions"
version = "4.15.0"
[[packages.wheels]]
name = "typing_extensions-4.15.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"
[[packages]]
name = "typing-inspection"
version = "0.4.2"
[[packages.wheels]]
name = "typing_inspection-0.4.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"
[[packages]]
name = "urllib3"
version = "2.6.3"
[[packages.wheels]]
name = "urllib3-2.6.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"
[[packages]]
name = "uvicorn"
version = "0.42.0"
[[packages.wheels]]
name = "uvicorn-0.42.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0a/89/f8827ccff89c1586027a105e5630ff6139a64da2515e24dafe860bd9ae4d/uvicorn-0.42.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "96c30f5c7abe6f74ae8900a70e92b85ad6613b745d4879eb9b16ccad15645359"
[[packages]]
name = "virtualenv"
version = "21.2.0"
[[packages.wheels]]
name = "virtualenv-21.2.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c6/59/7d02447a55b2e55755011a647479041bc92a82e143f96a8195cb33bd0a1c/virtualenv-21.2.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1bd755b504931164a5a496d217c014d098426cddc79363ad66ac78125f9d908f"
[[packages]]
name = "watchfiles"
version = "1.1.1"
[[packages.wheels]]
name = "watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
[packages.wheels.hashes]
sha256 = "544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab"
[[packages]]
name = "websockets"
version = "16.0"
[[packages.wheels]]
name = "websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
url = "https://files.pythonhosted.org/packages/9d/2f/4b3ca7e106bc608744b1cdae041e005e446124bebb037b18799c2d356864/websockets-16.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
[packages.wheels.hashes]
sha256 = "7d837379b647c0c4c2355c2499723f82f1635fd2c26510e1f587d89bc2199e72"
[[packages]]
name = "wheel-filename"
version = "1.4.2"
[[packages.wheels]]
name = "wheel_filename-1.4.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/b4/0f/6e97a3bc38cdde32e3ec49f8c0903fe3559ec9ec9db181782f0bb4417717/wheel_filename-1.4.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "3fa599046443d4ca830d06e3d180cd0a675d5871af0a68daa5623318bb4d17e3"
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["flit_core >=3.2,<4"]
build-backend = "flit_core.buildapi"
[project]
name = "pdfly"
authors = [
{ name = "Martin Thoma", email = "info@martin-thoma.de" },
{ name = "Lucas Cimon (@Lucas-C)" },
]
maintainers = [
{ name = "Martin Thoma", email = "info@martin-thoma.de" },
{ name = "Lucas Cimon (@Lucas-C)" },
]
description = "A pure-python CLI application to manipulate PDF files"
readme = "README.md"
dynamic = ["version"]
license = "BSD-3-Clause"
license-files = ["LICENSE"]
requires-python = ">=3.10.0"
keywords = ["pdf", "cli", "tools", "compression", "metadata", "signature", "booklet"]
# https://pypi.org/pypi?%3Aaction=list_classifiers
classifiers = [
"Development Status :: 1 - Planning",
"Environment :: Console",
"Intended Audience :: Developers",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
]
dependencies = [
"pypdf[full]>=5.1.0",
"typer>=0.12.4",
"pillow",
"pydantic",
"rich",
"fpdf2>=2.8.1",
"asn1crypto",
"cryptography",
"endesive",
"requests>=2.32.5", # required by endesive.signer
]
[dependency-groups]
dev = ["black", "check-wheel-contents", "flake8", "flake8-bugbear", "flake8-comprehensions", "flake8-isort", "flake8-simplify", "flit", "mypy", "pre-commit>=3.2.0", "pydantic", "pytest", "pytest-cov", "pytest-socket", "pytest-timeout", "rich", "ruff"]
docs = ["attrs", "sphinx", "sphinx_rtd_theme", "sphinx-autobuild", "myst_parser"] # attrs is required for myst, but not automatically installed by myst
[project.urls]
Source = "https://github.com/py-pdf/pdfly"
[project.scripts]
pdfly = "pdfly.cli:entry_point"
[tool.pytest.ini_options]
addopts = "--disable-socket --doctest-modules --cov=. --cov-report html:tests/reports/coverage-html --cov-report term-missing --ignore=docs/ --durations=3 --timeout=30"
doctest_encoding = "utf-8"
testpaths = ["tests"]
[tool.black]
line-length = 79
[tool.isort]
line_length = 79
indent = ' '
multi_line_output = 3
include_trailing_comma = true
known_third_party = ["pytest", "setuptools"]
[tool.ruff]
line-length = 120
[tool.ruff.lint]
select = ["ALL"]
ignore = [
"D401", # First line of docstring should be in imperative mood - false positives
"UP031", # Use format specifiers instead of percent format
"D205", # 1 blank line required between summary line and description
"D400", # First line should end with a period
"D415", # First line should end with a period
# Introduces bugs
"RUF005",
"DTZ001", # The use of `datetime.datetime()` without `tzinfo` is necessary
# Personal preference
"D212", # I want multiline-docstrings to start at the second line
"D407", # google-style docstrings don't have dashses
"BLE", # we want to capture Exception sometimes
"COM812", # yes, they make the diff smaller
"D100", # Missing docstring in public module
"D105", # Missing docstring in magic method
"D106", # Missing docstring in public nested class
"D107", # Missing docstring in `__init__`
"D203", # one-blank-line-before-class
"EM", # exception messages
"G004", # f-string in logging statement
"RET",
"S110", # `try`-`except`-`pass` detected, consider logging the exception
"SIM105", # contextlib.suppress
"SIM108", # don't enforce ternary operators
"SIM300", # yoda conditions
"TID252", # we want relative imports
"TRY", # I don't know what this is about
# As long as we are not on Python 3.11+
"UP006", "UP007",
# for the moment, fix it later:
"T201", # print
"DTZ006", # datetime without timezone
"SIM115", # context handler for opening files
"A", # Variable is shadowing a built-in
"B904", # Within an `except` clause, raise exceptions with
"B905", # `zip()` without an explicit `strict=` parameter
"C901",
"D101", # Missing docstring in public class
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D417", # Missing argument descriptions in the docstring
"FBT001", # Boolean positional arg in function definition
"FBT002", # Boolean default value in function definition
"FBT003", # Boolean positional value in function call
"PLC0415", # `import` should be at the top-level of a file
"PGH", # Use specific error messages
"PLR0912", # Too many branches
"PLR0913", # Too many arguments to function call
"PLR0915", # Too many statements
"PLR2004", # Magic value
"PLW", # global variables
"PTH110", # `os.path.exists()` should be replaced by `Path.exists()`
"PTH123", # `open()` should be replaced by `Path.open()`
"S101", # Use of `assert` detected
"SLF001", # Private member accessed
"INP001", # File `docs/conf.py` is part of an implicit namespace package. Add an `__init__.py`.
]
[tool.ruff.lint.mccabe]
max-complexity = 20 # Recommended: 10
[tool.ruff.lint.per-file-ignores]
"sample-files/*" = ["D100", "INP001", "FA102", "I001"]
"make_release.py" = ["T201", "S603", "S607"]
================================================
FILE: renovate.json
================================================
{
"commitMessagePrefix": "MAINT:",
"extends": ["config:best-practices"],
"labels": ["dependencies"],
"osvVulnerabilityAlerts": true,
"vulnerabilityAlerts": {"enabled": true}
}
================================================
FILE: resources/demo2_ca.root.crt.pem
================================================
-----BEGIN CERTIFICATE-----
MIIDLTCCAhWgAwIBAgIUHeQXwdDU4jyXtdItkEjDOw/SigAwDQYJKoZIhvcNAQEL
BQAwHTEbMBkGA1UEAwwSQUEgVHJpU29mdCBSb290IENBMCAXDTI1MDYxMTE4Mjgw
MloYDzIwNjUwNjAxMTgyODAyWjAdMRswGQYDVQQDDBJBQSBUcmlTb2Z0IFJvb3Qg
Q0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCGHskGb4Gd364QhbS6
i2NmHbJf4N5LhDJPwRjDACuRqRu42fEB+MwKvAIYoS2wVihYubf/dRZFc0/4yyCH
7I1Mkh1YoQRjl3q51pKWjUjm5Ua611NDLHvkDU8ecQWj2qjHcJtV39ay3L/TIyvS
tesIR+o2oOkfxzaLjkhrH08DOy5L3gvETexV7GBbmSQTaI9jvNuD9oKZs6ba1S5O
65pPEC/u3/udZgRBKd+lB/qlLk7HNuN0trwEfZLvdBC4pS9Fc0DbUcHnsNBwWFc9
VjrzzJDYHdWmZtYGg5rc7efx5+zVw26wm58caJv5ihi0An4J/I8i5I4TKoLMgcJP
2r7VAgMBAAGjYzBhMA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUPkWmCmbq
vZJeJaiLKy8j/la8iHEwHQYDVR0OBBYEFD5Fpgpm6r2SXiWoiysvI/5WvIhxMA4G
A1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEAPc3cf1CHKSaF4BDM8UHT
4B5VMdj7uZSxsQ+IerrOi6QfMIUuesVc/h9oN9eBLoTCCQsFB7nrizwmyd2xIK9d
jOuPQZexu9VhBIeJE8Fh86gG0U6IQxXw9NXW10yaW9w5RAYQqH3w+VPsaPDXnceX
b0yjM1vtmV9WrMNoXWPil7vYuea0HAar80IyUKwrzEOZa8zqDz1HElC0rukVh0Yl
5PHkVptl11d81ukyKeXGP6PFt1JI31vgAEZHdykz8w7SjAu0g+QrM2LCZV915wLu
OAS3ptxRmdNymk1zYHEyPt7CRdgUV1NWhE1N0RQMuf1CnXRPWZ6+Ls83xVzoO1i7
WA==
-----END CERTIFICATE-----
================================================
FILE: resources/signing-certificate.crt
================================================
Bag Attributes
friendlyName: fpdf2
localKeyID: C2 58 91 78 7F 3E 01 57 6E 39 AE AD CA 28 99 06 3B 55 2D F1
subject=CN = fpdf2, O = fpdf2, OU = signing testing
issuer=CN = fpdf2, O = fpdf2, OU = signing testing
-----BEGIN CERTIFICATE-----
MIIEFzCCAv+gAwIBAgIBfzANBgkqhkiG9w0BAQsFADA6MQ4wDAYDVQQDDAVmcGRm
MjEOMAwGA1UECgwFZnBkZjIxGDAWBgNVBAsMD3NpZ25pbmcgdGVzdGluZzAeFw0y
NTA3MjMwNDI0NTBaFw0zNTA3MjEwNDI0NTBaMDoxDjAMBgNVBAMMBWZwZGYyMQ4w
DAYDVQQKDAVmcGRmMjEYMBYGA1UECwwPc2lnbmluZyB0ZXN0aW5nMIIBIjANBgkq
hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAn1/C38InT9bJPE/R5yDhLSUS6KKR2xir
PYQF8Blb9LYLf3jF/2Dupl9OG5FUFHQZL2Lw2PJvrIvXi4LKfi3wM93lumvNpVl8
BFuuQKZbvV3aGXsjfLL96i4rgRd9TrnOUvYHUiyhY1Q/1f3eW7+y4+6KUTUDgXf6
awKXC9qpmv/L0BlKNl3CaSnQcc3KRSTlxNkupOiuLC0gC+Xhf5qjUZDKPjkIQZ3R
fUTaVsCIUYqwzKsRkfhiizcXj3L5b/XeBDTNT6qI1xz2XN7UQ2w8Z0PExxcth3Hb
TeR6KZOPPo2dIeXPB3kljoraWAxJosxr9lDhFO2t4HP8Hbj1LwXk0wIDAQABo4IB
JjCCASIwHQYDVR0OBBYEFFtMIYXyJ7jtFAz3bU7d4fCPlqkJMEwGA1UdIwRFMEOh
PqQ8MDoxDjAMBgNVBAMMBWZwZGYyMQ4wDAYDVQQKDAVmcGRmMjEYMBYGA1UECwwP
c2lnbmluZyB0ZXN0aW5nggF/MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgeAMB0GA1Ud
JQQWMBQGCCsGAQUFBwMEBggrBgEFBQcDAjAdBgNVHREEFjAUgRJzaWduZXJAZnBk
ZjIubG9jYWwwXQYIKwYBBQUHAQEEUTBPMCgGCCsGAQUFBzAChhxodHRwOi8vY2Eu
ZXhhbXBsZS5jb20vY2EucGVtMCMGCCsGAQUFBzABhhdodHRwOi8vb2NzcC5leGFt
cGxlLmNvbTANBgkqhkiG9w0BAQsFAAOCAQEAUFuZAJ7bzp1+drypANTk1QBS476n
2ggKfDzsxNPmF5DO8anyBS6k6rMT0Ziq7Y9TzuUe6xOtJSgXswupn7AAn81p3V/q
slaHsIzaNo+1wg6b7EtP3/udtDKBOwQTdz3PwA3ihLdDC4IcnGLPmwPDfBX3H2tc
R3Xw64gudbinRTdrwh8nHDxsNWZ0G56Gbwm2J+Pt6l6RS+mXrWrO/PcjvVJAigBe
7u9laSU7LLQSUoWn5Yv99DYdAvVZQqUG0BgUeKXxFDEiIqNWtHUNzv3Ce8KdASlG
TxFCEB+Y1Ag2S1Y1AmpKsP3RUt9SOiGjmqhHfXBIgghz2b3hoLYEAbWxSw==
-----END CERTIFICATE-----
================================================
FILE: setup.cfg
================================================
[mutmut]
backup = False
runner = ./mutmut-test.sh
tests_dir = tests/
[mypy]
ignore_missing_imports = true
strict = true
check_untyped_defs = true
disallow_any_generics = true
disallow_incomplete_defs = true
disallow_untyped_defs = true
no_implicit_optional = true
warn_unused_ignores = false
show_error_codes = true
[mypy-testing.*]
disallow_untyped_defs = false
[mypy-tests.*]
disallow_untyped_defs = false
[flake8]
ignore = E501, E203, W503, PT007, SIM115
exclude = build/*
per-file-ignores =
tests/*: ASS001
================================================
FILE: setup.py
================================================
"""Package pdfly with setuptools."""
import re
from setuptools import find_packages, setup
VERSIONFILE = "pdfly/_version.py"
with open(VERSIONFILE) as fp:
verstrline = fp.read()
VSRE = r"^__version__ = ['\"]([^'\"]*)['\"]"
mo = re.search(VSRE, verstrline, re.MULTILINE)
if mo:
verstr = mo.group(1)
else:
raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE))
setup(
version=verstr,
packages=find_packages(exclude=("tests",)),
)
================================================
FILE: tests/__init__.py
================================================
"""Shared test code"""
================================================
FILE: tests/conftest.py
================================================
"""Utilities and fixtures that are available automatically for all tests."""
import os
from collections.abc import Iterator
from pathlib import Path
from typing import Union
import pytest
from fpdf import FPDF
from pdfly.cli import entry_point
try:
from contextlib import chdir # type: ignore
except ImportError: # Fallback when not available (< Python 3.11):
from contextlib import contextmanager
@contextmanager # type: ignore
def chdir(dir_path: Union[str, Path]) -> Iterator[None]:
"""Non thread-safe context manager to change the current working directory."""
cwd = Path.cwd()
os.chdir(dir_path)
try:
yield
finally:
os.chdir(cwd)
TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
RESOURCES_ROOT = PROJECT_ROOT / "resources"
def run_cli(args: list[str]) -> Union[None, int, str]:
try:
entry_point(args)
return None
except SystemExit as error:
return error.code
@pytest.fixture
def two_pages_pdf_filepath(tmp_path: Path) -> Path:
"""A PDF with 2 pages, and a different image on each page"""
# Note: prior to v2.7.9, fpdf2 produced incorrect /Resources dicts for each page (cf. fpdf2 PR #1133),
# leading to an "abnormal" two_pages.pdf generated there, and for test_cat_subset_ensure_reduced_size() to fail.
pdf = FPDF()
pdf.add_page()
pdf.image(RESOURCES_ROOT / "baleines.jpg")
pdf.add_page()
pdf.image(RESOURCES_ROOT / "pythonknight.png")
pdf_filepath = tmp_path / "two_pages.pdf"
pdf.output(pdf_filepath)
return pdf_filepath
@pytest.fixture
def pdf_file_100(tmp_path: Path) -> Path:
"""A PDF with 100 pages; each has only the page index on it."""
pdf = FPDF()
for i in range(100):
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200, 10, text=f"{i}", new_x="LMARGIN", new_y="NEXT", align="C"
)
pdf_filepath = tmp_path / "pdf_file_100.pdf"
pdf.output(pdf_filepath)
return pdf_filepath
@pytest.fixture
def pdf_file_abc(tmp_path: Path) -> Path:
"""A PDF with 100 pages; each has only the page index on it."""
pdf = FPDF()
for char in [chr(i) for i in range(ord("a"), ord("z") + 1)]:
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200, 10, text=f"{char}", new_x="LMARGIN", new_y="NEXT", align="C"
)
pdf_filepath = tmp_path / "abc.pdf"
pdf.output(pdf_filepath)
return pdf_filepath
================================================
FILE: tests/test_booklet.py
================================================
from pathlib import Path
import pytest
from pypdf import PdfReader
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_booklet_fewer_args(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")])
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing" in captured.err
def test_booklet_extra_args(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
["booklet", str(RESOURCES_ROOT / "box.pdf"), "a.pdf", "b.pdf"]
)
assert exit_code == 2
captured = capsys.readouterr()
assert "unexpected extra argument" in captured.err
def test_booklet_page_size(tmp_path: Path) -> None:
in_fname = str(RESOURCES_ROOT / "input8.pdf")
with chdir(tmp_path):
exit_code = run_cli(
[
"booklet",
in_fname,
"output8.pdf",
]
)
in_reader = PdfReader(in_fname)
out_reader = PdfReader("output8.pdf")
assert exit_code == 0
assert len(in_reader.pages) == 8
assert len(out_reader.pages) == 4
in_height = in_reader.pages[0].mediabox.height
in_width = in_reader.pages[0].mediabox.width
out_height = out_reader.pages[0].mediabox.height
out_width = out_reader.pages[0].mediabox.width
assert out_width == in_width * 2
assert in_height == out_height
@pytest.mark.parametrize(
("page_count", "expected", "expected_bc"),
[
("8", "8 1\n2 7\n6 3\n4 5\n", "8 1\n2 7\n6 3\n4 5\n"),
("7", "7 1\n2\n6 3\n4 5\n", "7 1\n2 b\n6 3\n4 5\n"),
("6", "6 1\n2 5\n4 3\n\n", "6 1\n2 5\n4 3\nc\n"),
("5", "5 1\n2\n4 3\n\n", "5 1\n2 b\n4 3\nc\n"),
("4", "4 1\n2 3\n", "4 1\n2 3\n"),
("3", "3 1\n2\n", "3 1\n2 b\n"),
("2", "2 1\n\n", "2 1\nc\n"),
("1", "1\n\n", "1 b\nc\n"),
],
)
def test_booklet_order(
capsys: pytest.CaptureFixture,
tmp_path: Path,
page_count: str,
expected: str,
expected_bc: str,
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
"-o",
f"input{page_count}.pdf",
str(RESOURCES_ROOT / "input8.pdf"),
f":{page_count}",
]
)
assert exit_code == 0
exit_code = run_cli(
[
"booklet",
f"input{page_count}.pdf",
f"output{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
exit_code = run_cli(
[
"extract-text",
f"output{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
assert captured.out == expected
exit_code = run_cli(
[
"booklet",
"--centerfold-file",
str(RESOURCES_ROOT / "c.pdf"),
"--blank-page-file",
str(RESOURCES_ROOT / "b.pdf"),
f"input{page_count}.pdf",
f"outputbc{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
exit_code = run_cli(
[
"extract-text",
f"outputbc{page_count}.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
assert captured.out == expected_bc
================================================
FILE: tests/test_cat.py
================================================
from pathlib import Path
from typing import Any
import pytest
from pypdf import PdfReader
from .conftest import RESOURCES_ROOT, chdir, run_cli
def extract_embedded_images(pdf_filepath: Path) -> list[Any]:
reader = PdfReader(pdf_filepath)
return [page.images for page in reader.pages]
def extract_text_pages(pdf_filepath: Path) -> list[str]:
reader = PdfReader(pdf_filepath)
return [page.extract_text() for page in reader.pages]
def test_cat_incorrect_number_of_args(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(["cat", str(RESOURCES_ROOT / "box.pdf")])
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing" in captured.err
def test_cat_two_files_ok(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
str(RESOURCES_ROOT / "box.pdf"),
str(RESOURCES_ROOT / "jpeg.pdf"),
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0, captured
assert not captured.err
reader = PdfReader(tmp_path / "out.pdf")
assert len(reader.pages) == 2
def test_cat_subset_ok(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"),
"13:15",
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured
assert not captured.err
reader = PdfReader(tmp_path / "out.pdf")
assert len(reader.pages) == 2
@pytest.mark.parametrize(
"page_range",
["a", "-", "1-", "1-1-1", "1:1:1:1"],
)
def test_cat_subset_invalid_args(
capsys: pytest.CaptureFixture, tmp_path: Path, page_range: str
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
str(RESOURCES_ROOT / "jpeg.pdf"),
page_range,
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 2, captured
assert "Error: invalid file path or page range provided" in captured.out
def test_cat_subset_warn_on_missing_pages(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"cat",
str(RESOURCES_ROOT / "jpeg.pdf"),
"2",
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured
assert "WARN" in captured.err
def test_cat_subset_ensure_reduced_size(
tmp_path: Path, two_pages_pdf_filepath: Path
) -> None:
exit_code = run_cli(
[
"cat",
str(two_pages_pdf_filepath),
"0",
"--output",
str(tmp_path / "page1.pdf"),
]
)
assert exit_code == 0
# The extracted PDF should only contain ONE image:
embedded_images = extract_embedded_images(tmp_path / "page1.pdf")
assert len(embedded_images) == 1
exit_code = run_cli(
[
"cat",
str(two_pages_pdf_filepath),
"1",
"--output",
str(tmp_path / "page2.pdf"),
]
)
assert exit_code == 0
# The extracted PDF should only contain ONE image:
embedded_images = extract_embedded_images(tmp_path / "page2.pdf")
assert len(embedded_images) == 1
def test_cat_combine_files(
pdf_file_100: Path,
pdf_file_abc: Path,
tmp_path: Path,
capsys: pytest.CaptureFixture,
) -> None:
with chdir(tmp_path):
output_pdf_path = tmp_path / "out.pdf"
# Run pdfly cat command
exit_code = run_cli(
[
"cat",
str(pdf_file_100),
"1:10:2",
str(pdf_file_abc),
"::2",
str(pdf_file_abc),
"1::2",
"--output",
str(output_pdf_path),
]
)
captured = capsys.readouterr()
# Check if the command was successful
assert exit_code == 0, captured.out
# Extract text from the original and modified PDFs
extracted_pages = extract_text_pages(output_pdf_path)
# Compare the extracted text
assert extracted_pages == [
"1",
"3",
"5",
"7",
"9",
"a",
"c",
"e",
"g",
"i",
"k",
"m",
"o",
"q",
"s",
"u",
"w",
"y",
"b",
"d",
"f",
"h",
"j",
"l",
"n",
"p",
"r",
"t",
"v",
"x",
"z",
]
@pytest.mark.parametrize(
("page_range", "expected"),
[
("22", ["22"]),
("0:3", ["0", "1", "2"]),
(":3", ["0", "1", "2"]),
(":", [str(el) for el in range(100)]),
("5:", [str(el) for el in list(range(100))[5:]]),
("::2", [str(el) for el in list(range(100))[::2]]),
("1:10:2", [str(el) for el in list(range(100))[1:10:2]]),
("::1", [str(el) for el in list(range(100))[::1]]),
("::-1", [str(el) for el in list(range(100))[::-1]]),
],
)
def test_cat_commands(
pdf_file_100: Path,
tmp_path: Path,
page_range: str,
expected: list[str],
) -> None:
with chdir(tmp_path):
output_pdf_path = tmp_path / "out.pdf"
# Run pdfly cat command
exit_code = run_cli(
[
"cat",
str(pdf_file_100),
page_range,
"--output",
str(output_pdf_path),
]
)
# Check if the command was successful
assert exit_code == 0
# Extract text from the original and modified PDFs
extracted_pages = extract_text_pages(output_pdf_path)
# Compare the extracted text
assert extracted_pages == expected
def test_cat_decrypt_with_password_ok(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
exit_code = run_cli(
[
"cat",
"--password=openpassword",
"sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf",
"--output",
str(tmp_path / "out.pdf"),
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured
assert not captured.err
reader = PdfReader(tmp_path / "out.pdf")
assert len(reader.pages) == 1
def test_cat_decrypt_with_password_ko(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
exit_code = run_cli(
[
"cat",
"--password=INCORRECT",
"sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf",
"--output",
str(tmp_path / "out.pdf"),
]
)
captured = capsys.readouterr()
assert exit_code == 1, captured
assert "Error: the decrypting password provided is invalid" in captured.out
================================================
FILE: tests/test_check_sign.py
================================================
from pathlib import Path
import pytest
from fpdf import FPDF
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_check_sign_manipulated_content(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange
pdf = FPDF()
pdf.add_page()
pdf.set_font("helvetica", style="B", size=16)
pdf.add_text_markup_annotation(
"Underline", "Hello World!", [0, 0, 0, 0, 0, 0, 0, 0]
)
pdf.sign_pkcs12(str(RESOURCES_ROOT / "signing-certificate.p12"), b"fpdf2")
input_pdf_bytes = pdf.output()
# manipulate signed pdf - leaving length intact
input_pdf_bytes = input_pdf_bytes.replace(b"Hello World!", b"aaaaa aaaaa!")
input_pdf_manipulated = tmp_path / "signed_manipulated.pdf"
input_pdf_manipulated.write_bytes(input_pdf_bytes)
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"check-sign",
input_pdf_manipulated.name,
"--pem",
str(RESOURCES_ROOT / "signing-certificate.crt"),
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 1
assert "Check failed" in captured.err
assert "Content hash not ok" in captured.err
def test_check_sign_missing_signature(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"check-sign",
str(RESOURCES_ROOT / "input8.pdf"),
"--pem",
str(RESOURCES_ROOT / "signing-certificate.crt"),
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 2
assert "Signature missing" in captured.err
def test_check_sign_signature_not_matching_to_certificate(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"check-sign",
str(RESOURCES_ROOT / "sign_pkcs12.pdf"),
"--pem",
str(
RESOURCES_ROOT / "demo2_ca.root.crt.pem"
), # sign_pkcs12.pdf signature matched to signing-certificate.crt
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 1
assert "Check failed" in captured.err
assert "Certificate not ok" in captured.err
def test_check_sign_pem(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"check-sign",
str(RESOURCES_ROOT / "sign_pkcs12.pdf"),
"--pem",
str(RESOURCES_ROOT / "signing-certificate.crt"),
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0
assert not captured.err
def test_check_sign_pdfly_signed_pdf(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange
with chdir(tmp_path):
exit_code = run_cli(
[
"sign",
str(RESOURCES_ROOT / "input8.pdf"),
"-o",
str(tmp_path / "input8_signed.pdf"),
"--p12",
str(RESOURCES_ROOT / "signing-certificate.p12"),
"--p12-password",
"fpdf2",
]
)
captured = capsys.readouterr()
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"check-sign",
str(tmp_path / "input8_signed.pdf"),
"--pem",
str(RESOURCES_ROOT / "signing-certificate.crt"),
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0
assert not captured.err
================================================
FILE: tests/test_cli.py
================================================
import sys
from subprocess import check_output
import pytest
from pypdf import __version__ as pypdf_version
from .conftest import run_cli
def test_pypdf_cli_can_be_invoked_as_a_module() -> None:
stdout = check_output( # noqa: S603
[sys.executable, "-m", "pdfly", "--help"]
).decode()
assert "pdfly [OPTIONS] COMMAND [ARGS]..." in stdout
assert (
"pdfly is a pure-python cli application for manipulating PDF files."
in stdout
)
def test_pypdf_cli_version(capsys: pytest.CaptureFixture) -> None:
exit_code = run_cli(["--version"])
captured = capsys.readouterr()
assert not captured.err
assert pypdf_version in captured.out
assert exit_code == 0
================================================
FILE: tests/test_compress.py
================================================
"""Tests for the `compress` command."""
from pathlib import Path
import pytest
from typer.testing import CliRunner
from pdfly.cli import entry_point
runner = CliRunner()
@pytest.mark.parametrize("input_pdf_filepath", Path("resources").glob("*.pdf"))
def test_compress_sample_files(
input_pdf_filepath: Path, tmp_path: Path
) -> None:
"""Test compression on all sample PDF files."""
output_pdf_filepath = tmp_path / "compressed_output.pdf"
result = runner.invoke(
entry_point,
["compress", str(input_pdf_filepath), str(output_pdf_filepath)],
)
assert (
result.exit_code == 0
), f"Compression failed for {input_pdf_filepath}: {result.output}"
assert (
output_pdf_filepath.exists()
), f"Output PDF {output_pdf_filepath} does not exist."
# Verify output file is a valid PDF
with open(output_pdf_filepath, "rb") as f:
content = f.read()
assert content.startswith(
b"%PDF-"
), f"Output is not a valid PDF file: {output_pdf_filepath}"
assert "Original Size" in result.output
assert "Final Size" in result.output
def test_compress_no_compression_when_larger(tmp_path: Path) -> None:
"""Test that compression doesn't apply when result would be larger."""
# Create a small PDF that might not compress well
from fpdf import FPDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200, 10, text="Short text", new_x="LMARGIN", new_y="NEXT", align="C"
)
input_pdf = tmp_path / "small.pdf"
pdf.output(input_pdf)
output_pdf = tmp_path / "compressed.pdf"
result = runner.invoke(
entry_point,
["compress", str(input_pdf), str(output_pdf)],
)
assert result.exit_code == 0
if "No compression applied" in result.output:
# If compression would make file larger, ensure original is copied
assert input_pdf.stat().st_size == output_pdf.stat().st_size
assert "would increase size" in result.output
else:
# If compression worked, ensure it's actually smaller or same size
assert output_pdf.stat().st_size <= input_pdf.stat().st_size
def test_compress_file_integrity(tmp_path: Path) -> None:
"""Test that compressed files maintain PDF integrity."""
from fpdf import FPDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200,
10,
text="Test PDF for compression",
new_x="LMARGIN",
new_y="NEXT",
align="C",
)
pdf.cell(
200,
10,
text="This is a test document.",
new_x="LMARGIN",
new_y="NEXT",
align="L",
)
pdf.add_page()
pdf.cell(
200,
10,
text="Second page content",
new_x="LMARGIN",
new_y="NEXT",
align="C",
)
input_pdf = tmp_path / "test.pdf"
pdf.output(input_pdf)
output_pdf = tmp_path / "compressed.pdf"
result = runner.invoke(
entry_point,
["compress", str(input_pdf), str(output_pdf)],
)
assert result.exit_code == 0
from pypdf import PdfReader
reader = PdfReader(str(output_pdf))
assert len(reader.pages) == 2
page1_text = reader.pages[0].extract_text()
page2_text = reader.pages[1].extract_text()
assert "Test PDF for compression" in page1_text
assert "Second page content" in page2_text
def test_compress_output_metrics(tmp_path: Path) -> None:
"""Test that compression metrics are properly displayed."""
from fpdf import FPDF
pdf = FPDF()
for _i in range(10):
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200,
10,
text="This is repeated text on every page " * 5,
new_x="LMARGIN",
new_y="NEXT",
align="L",
)
input_pdf = tmp_path / "repeat.pdf"
pdf.output(input_pdf)
output_pdf = tmp_path / "compressed.pdf"
result = runner.invoke(
entry_point,
["compress", str(input_pdf), str(output_pdf)],
)
assert result.exit_code == 0
output_lines = result.output.strip().split("\n")
assert any("Original Size" in line for line in output_lines)
assert any("Final Size" in line for line in output_lines)
# Extract sizes from output
orig_size_line = next(
line for line in output_lines if "Original Size" in line
)
final_size_line = next(
line for line in output_lines if "Final Size" in line
)
assert ":" in orig_size_line
assert ":" in final_size_line
def test_compress_same_input_output_not_allowed(tmp_path: Path) -> None:
"""Test that input and output files cannot be the same."""
input_pdf = tmp_path / "test.pdf"
# Create a simple PDF
from fpdf import FPDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(200, 10, text="Test", new_x="LMARGIN", new_y="NEXT", align="C")
pdf.output(input_pdf)
# Try to compress to the same file (should work but might not compress)
result = runner.invoke(
entry_point,
["compress", str(input_pdf), str(input_pdf)],
)
assert result.exit_code in [0, 1] # 0 for success, 1 for error
def test_compress_preserves_metadata(tmp_path: Path) -> None:
"""Test that compression preserves PDF metadata."""
from fpdf import FPDF
pdf = FPDF()
pdf.add_page()
pdf.set_font("helvetica", size=12)
pdf.cell(
200, 10, text="Test document", new_x="LMARGIN", new_y="NEXT", align="C"
)
# Set some metadata
pdf.set_title("Test Title")
pdf.set_author("Test Author")
pdf.set_subject("Test Subject")
input_pdf = tmp_path / "metadata.pdf"
pdf.output(input_pdf)
output_pdf = tmp_path / "compressed.pdf"
result = runner.invoke(
entry_point,
["compress", str(input_pdf), str(output_pdf)],
)
assert result.exit_code == 0
from pypdf import PdfReader
reader = PdfReader(str(output_pdf))
metadata = reader.metadata
assert metadata is not None
assert metadata.get("/Title") == "Test Title"
assert metadata.get("/Author") == "Test Author"
assert metadata.get("/Subject") == "Test Subject"
================================================
FILE: tests/test_extract_annotated_pages.py
================================================
from pathlib import Path
import pytest
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_extract_annotated_pages_input8(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
run_cli(
[
"extract-annotated-pages",
str(RESOURCES_ROOT / "input8.pdf"),
]
)
captured = capsys.readouterr()
assert not captured.err
assert "Extracted 1 pages with annotations" in captured.out
================================================
FILE: tests/test_extract_images.py
================================================
from pathlib import Path
import pytest
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_extract_images_jpg_png(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
run_cli(
[
"extract-images",
str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"),
]
)
captured = capsys.readouterr()
assert not captured.err
assert "Extracted 3 images" in captured.out
def test_extract_images_monochrome(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# There used to be a bug for this case: https://github.com/py-pdf/pypdf/issues/2176
with chdir(tmp_path):
run_cli(["extract-images", str(RESOURCES_ROOT / "box.pdf")])
captured = capsys.readouterr()
assert not captured.err
assert "Extracted 1 images" in captured.out
================================================
FILE: tests/test_pagemeta.py
================================================
import json
from pathlib import Path
import pytest
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_pagemeta_json(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
with chdir(tmp_path):
run_cli(
["pagemeta", str(RESOURCES_ROOT / "box.pdf"), "0", "-o", "json"]
)
captured = capsys.readouterr()
assert not captured.err
page_metadata = json.loads(captured.out)
assert page_metadata["mediabox"] == [0.0, 0.0, 60.0, 60.0]
assert page_metadata["cropbox"] == [0.0, 0.0, 60.0, 60.0]
assert page_metadata["artbox"] == [0.0, 0.0, 60.0, 60.0]
assert page_metadata["bleedbox"] == [0.0, 0.0, 60.0, 60.0]
def test_pagemeta_text_with_known_format(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
run_cli(["pagemeta", str(RESOURCES_ROOT / "c.pdf"), "0"])
captured = capsys.readouterr()
assert not captured.err
assert "(Letter)" in captured.out
def test_pagemeta_text_with_close_format(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
run_cli(["pagemeta", str(RESOURCES_ROOT / "jpeg.pdf"), "0"])
captured = capsys.readouterr()
assert not captured.err
assert "close to format: A4" in captured.out
================================================
FILE: tests/test_rm.py
================================================
"""Tests for the `rm` command."""
from pathlib import Path
import pytest
from _pytest.capture import CaptureFixture
from pypdf import PdfReader
from .conftest import RESOURCES_ROOT, chdir, run_cli
from .test_cat import extract_embedded_images
def test_rm_incorrect_number_of_args(
capsys: CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(["rm", str(RESOURCES_ROOT / "box.pdf")])
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing" in captured.err
def test_rm_subset_ok(capsys: CaptureFixture, tmp_path: Path) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"rm",
str(RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"),
"13:15",
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured
assert not captured.err
inp_reader = PdfReader(
RESOURCES_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf"
)
out_reader = PdfReader(tmp_path / "out.pdf")
assert len(out_reader.pages) == len(inp_reader.pages) - 2
@pytest.mark.parametrize(
"page_range",
["a", "-", "1-", "1-1-1", "1:1:1:1"],
)
def test_rm_subset_invalid_args(
capsys: CaptureFixture, tmp_path: Path, page_range: str
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"rm",
str(RESOURCES_ROOT / "jpeg.pdf"),
page_range,
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 2, captured
assert "Error: invalid file path or page range provided" in captured.out
def test_rm_subset_warn_on_missing_pages(
capsys: CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"rm",
str(RESOURCES_ROOT / "jpeg.pdf"),
"2",
"--output",
"./out.pdf",
]
)
captured = capsys.readouterr()
assert exit_code == 0, captured
assert "WARN" in captured.err
def test_rm_subset_ensure_reduced_size(
tmp_path: Path, two_pages_pdf_filepath: Path
) -> None:
exit_code = run_cli(
[
"rm",
str(two_pages_pdf_filepath),
"0",
"--output",
str(tmp_path / "page1.pdf"),
]
)
assert exit_code == 0
# The extracted PDF should only contain ONE image:
embedded_images = extract_embedded_images(tmp_path / "page1.pdf")
assert len(embedded_images) == 1
exit_code = run_cli(
[
"rm",
str(two_pages_pdf_filepath),
"1",
"--output",
str(tmp_path / "page2.pdf"),
]
)
assert exit_code == 0
# The extracted PDF should only contain ONE image:
embedded_images = extract_embedded_images(tmp_path / "page2.pdf")
assert len(embedded_images) == 1
def test_rm_combine_files(
pdf_file_100: Path,
pdf_file_abc: Path,
tmp_path: Path,
capsys: CaptureFixture,
) -> None:
with chdir(tmp_path):
output_pdf_path = tmp_path / "out.pdf"
# Run pdfly rm command
exit_code = run_cli(
[
"rm",
str(pdf_file_100),
"1:10:2",
str(pdf_file_abc),
"::2",
str(pdf_file_abc),
"1::2",
"--output",
str(output_pdf_path),
]
)
captured = capsys.readouterr()
# Check if the command was successful
assert exit_code == 0, captured.out
# Extract text from the original and modified PDFs
extracted_pages = []
reader = PdfReader(output_pdf_path)
extracted_pages = [page.extract_text() for page in reader.pages]
# Compare the extracted text
l1 = [str(el) for el in list(range(0, 10, 2)) + list(range(10, 100))]
assert extracted_pages == l1 + [
"b",
"d",
"f",
"h",
"j",
"l",
"n",
"p",
"r",
"t",
"v",
"x",
"z",
"a",
"c",
"e",
"g",
"i",
"k",
"m",
"o",
"q",
"s",
"u",
"w",
"y",
]
@pytest.mark.parametrize(
("page_range", "expected"),
[
("22", [str(el) for el in range(100) if el != 22]),
("0:3", [str(el) for el in range(3, 100)]),
(":3", [str(el) for el in range(3, 100)]),
(":", []),
("5:", ["0", "1", "2", "3", "4"]),
("::2", [str(el) for el in list(range(100))[1::2]]),
(
"1:10:2",
[str(el) for el in list(range(0, 10, 2)) + list(range(10, 100))],
),
("::1", []),
("::-1", []),
],
)
def test_rm_commands(
pdf_file_100: Path,
tmp_path: Path,
page_range: str,
expected: list[str],
) -> None:
with chdir(tmp_path):
output_pdf_path = tmp_path / "out.pdf"
# Run pdfly rm command
exit_code = run_cli(
[
"rm",
str(pdf_file_100),
page_range,
"--output",
str(output_pdf_path),
]
)
# Check if the command was successful
assert exit_code == 0
# Extract text from the original and modified PDFs
extracted_pages = []
reader = PdfReader(output_pdf_path)
extracted_pages = [page.extract_text() for page in reader.pages]
# Compare the extracted text
assert extracted_pages == expected
================================================
FILE: tests/test_rotate.py
================================================
from pathlib import Path
import pytest
from pypdf import PdfReader
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_rotate_fewer_args(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"rotate",
]
)
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing argument" in captured.err
def test_rotate_extra_args(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"rotate",
"-o",
"/dev/null",
str(RESOURCES_ROOT / "box.pdf"),
"37",
"extra 1",
"extra 2",
]
)
assert exit_code == 2
captured = capsys.readouterr()
assert "unexpected extra argument" in captured.err
def get_page_rotations(fname: str) -> list[int]:
reader = PdfReader(fname)
return [page.rotation for page in reader.pages]
def diff_rotations(
in_: list[int], out: list[int], degrees: int = 0
) -> list[int]:
diffs = []
for orig, rotated in zip(in_, out):
diffs.append(rotated - (orig + degrees))
return diffs
def test_rotate_default(tmp_path: Path) -> None:
in_fname = str(RESOURCES_ROOT / "input8.pdf")
out_fname = "output8.pdf"
degrees = 90
with chdir(tmp_path):
print(f"{tmp_path=}")
exit_code = run_cli(
[
"rotate",
"-o",
out_fname,
in_fname,
str(degrees),
]
)
in_rotations = get_page_rotations(in_fname)
out_rotations = get_page_rotations(out_fname)
assert exit_code == 0
assert not any(diff_rotations(in_rotations, out_rotations, degrees))
@pytest.mark.parametrize(
# NB "slice" can not be specified as the empty string
("degrees", "slice", "expected_diff"),
[
(90, ":", [90, 90, 90, 90, 90, 90, 90, 90]), # every page
(90, "::2", [90, 0, 90, 0, 90, 0, 90, 0]), # every other, even index
(90, "1::2", [0, 90, 0, 90, 0, 90, 0, 90]), # every other, odd index
(90, ":2", [90, 90, 0, 0, 0, 0, 0, 0]), # first 2
(
-90,
":",
[-90, -90, -90, -90, -90, -90, -90, -90],
), # negative degrees works
(
-720,
":",
[-720, -720, -720, -720, -720, -720, -720, -720],
), # |degrees| > 360 is also supported
],
)
def test_rotate_slices(
capsys: pytest.CaptureFixture,
tmp_path: Path,
degrees: int,
slice: str,
expected_diff: list[int],
) -> None:
in_fname = str(RESOURCES_ROOT / "input8.pdf")
out_fname = "output.pdf"
with chdir(tmp_path):
args = [
"rotate",
"-o",
f"{out_fname}",
f"{in_fname}",
"--", # end options, so negative degree values work
f"{degrees}",
f"{slice}",
]
exit_code = run_cli(args)
captured = capsys.readouterr()
assert exit_code == 0, captured.err
in_rotations = get_page_rotations(in_fname)
out_rotations = get_page_rotations(out_fname)
actual_diff = diff_rotations(in_rotations, out_rotations)
assert not any(diff_rotations(actual_diff, expected_diff))
================================================
FILE: tests/test_sign.py
================================================
from pathlib import Path
import pytest
from endesive import pdf
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_sign_missing_certificate_key_option(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
["sign", str(RESOURCES_ROOT / "input8.pdf"), "-o", "out.pdf"]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 2
assert "Missing option" in captured.err
def test_sign_already_signed_pdf(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"sign",
str(RESOURCES_ROOT / "sign_pkcs12.pdf"),
"-o",
"out.pdf",
"--p12",
str(RESOURCES_ROOT / "signing-certificate.p12"),
"--p12-password",
"fpdf2",
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 2
assert "already signed" in captured.err
def test_sign_pkcs12(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"sign",
str(RESOURCES_ROOT / "input8.pdf"),
"-o",
"out.pdf",
"--p12",
str(RESOURCES_ROOT / "signing-certificate.p12"),
"--p12-password",
"fpdf2",
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0
assert not captured.err
outpdf = tmp_path / "out.pdf"
certificate = RESOURCES_ROOT / "signing-certificate.crt"
results = pdf.verify(outpdf.read_bytes(), [certificate.read_bytes()])
for hash_ok, signature_ok, cert_ok in results:
assert signature_ok
assert hash_ok
assert cert_ok
def test_sign_pkcs12_in_place(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange
input8pdf = RESOURCES_ROOT / "input8.pdf"
outpdf = tmp_path / "out.pdf"
outpdf.write_bytes(input8pdf.read_bytes())
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"sign",
"out.pdf",
"--in-place",
"--p12",
str(RESOURCES_ROOT / "signing-certificate.p12"),
"--p12-password",
"fpdf2",
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0
assert not captured.err
certificate = RESOURCES_ROOT / "signing-certificate.crt"
results = pdf.verify(outpdf.read_bytes(), [certificate.read_bytes()])
for hash_ok, signature_ok, cert_ok in results:
assert signature_ok
assert hash_ok
assert cert_ok
================================================
FILE: tests/test_uncompress.py
================================================
"""Tests for the `uncompress` command."""
from pathlib import Path
import pytest
from pypdf import PdfReader
from typer.testing import CliRunner
from pdfly.cli import entry_point
runner = CliRunner()
@pytest.mark.parametrize(
"input_pdf_filepath", Path("sample-files").glob("*.pdf")
)
def test_uncompress_all_sample_files(
input_pdf_filepath: Path, tmp_path: Path
) -> None:
output_pdf_filepath = tmp_path / "uncompressed_output.pdf"
result = runner.invoke(
entry_point,
["uncompress", str(input_pdf_filepath), str(output_pdf_filepath)],
)
assert (
result.exit_code == 0
), f"Error in uncompressing {input_pdf_filepath}: {result.output}"
assert (
output_pdf_filepath.exists()
), f"Output PDF {output_pdf_filepath} does not exist."
reader = PdfReader(str(output_pdf_filepath))
for page in reader.pages:
contents = page.get("/Contents")
if contents:
assert (
"/Filter" not in contents
), "Content stream is still compressed"
================================================
FILE: tests/test_up2.py
================================================
import os.path
from pathlib import Path
import pytest
from pypdf import PdfReader
from .conftest import RESOURCES_ROOT, chdir, run_cli
def test_up2_fewer_args(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
with chdir(tmp_path):
exit_code = run_cli(["2-up", str(RESOURCES_ROOT / "box.pdf")])
assert exit_code == 2
captured = capsys.readouterr()
assert "Missing argument" in captured.err
def test_up2_extra_args(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
with chdir(tmp_path):
exit_code = run_cli(
[
"2-up",
str(RESOURCES_ROOT / "box.pdf"),
"./out.pdf",
"./out2.pdf",
]
)
assert exit_code == 2
captured = capsys.readouterr()
assert "unexpected extra argument" in captured.err
with chdir(tmp_path):
assert not os.path.exists("out.pdf"), "'out.pdf' should not exist."
assert not os.path.exists("out2.pdf"), "'out2.pdf' should not exist."
def test_up2_8page_file(capsys: pytest.CaptureFixture, tmp_path: Path) -> None:
pdf_file = str(RESOURCES_ROOT / "input8.pdf")
out_file_name = "out.pdf"
in_reader = PdfReader(pdf_file)
assert len(in_reader.pages) == 8
in_height = in_reader.pages[0].mediabox.height
in_width = in_reader.pages[0].mediabox.width
# Act
with chdir(tmp_path):
exit_code = run_cli(
[
"2-up",
pdf_file,
out_file_name,
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0, captured
assert not captured.err
out_reader = PdfReader(tmp_path / out_file_name)
assert len(out_reader.pages) == 4
out_width = out_reader.pages[0].mediabox.width
out_height = out_reader.pages[0].mediabox.height
assert out_width == 2 * in_width # PR #78
assert out_height == in_height
# Fix issue #218
def test_up2_odd_page_number(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
pdf_file = "sample-files/026-latex-multicolumn/multicolumn.pdf"
out_file_path = tmp_path / "out.pdf"
# Ensure original page number is odd:
in_reader = PdfReader(pdf_file)
assert len(in_reader.pages) % 2 == 1
# Act
exit_code = run_cli(
[
"2-up",
pdf_file,
str(out_file_path),
]
)
captured = capsys.readouterr()
# Assert
assert exit_code == 0, captured
assert not captured.err
out_reader = PdfReader(out_file_path)
assert len(out_reader.pages) == (len(in_reader.pages) + 1) / 2
================================================
FILE: tests/test_update_offsets.py
================================================
"""
Every CLI command is called here with a typer CliRunner.
Here should only be end-to-end tests.
"""
import re
import sys
from pathlib import Path
import pytest
from .conftest import RESOURCES_ROOT, run_cli
@pytest.mark.skipif(sys.platform == "win32", reason="Does not run on windows")
def test_update_offsets(capsys: pytest.CaptureFixture) -> None:
# Arrange
input = RESOURCES_ROOT / "file-with-invalid-offsets.pdf"
file_expected = str(RESOURCES_ROOT / "file-with-fixed-offsets.pdf")
# Act
exit_code = run_cli(
[
"update-offsets",
str(input),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 0, captured
assert not captured.err
assert re.search(r"Wrote\s+" + re.escape(str(input)), captured.out)
with open(file_expected, encoding="iso-8859-1") as file_exp:
lines_exp = file_exp.readlines()
with input.open(encoding="iso-8859-1") as file_act:
lines_act = file_act.readlines()
assert len(lines_exp) == len(
lines_act
), f"lines_exp=f{lines_exp}, lines_act=f{lines_act}"
for line_no, (line_exp, line_act) in enumerate(
zip(lines_exp, lines_act), start=1
):
assert line_exp == line_act, f"Lines differ in line {line_no}"
# The current implementation doesn't support valid PDF lines as "/Length 5470>> stream".
@pytest.mark.parametrize(
"input_pdf_filepath",
[
"sample-files/002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf",
"sample-files/005-libreoffice-writer-password/libreoffice-writer-password.pdf",
"sample-files/007-imagemagick-images/imagemagick-ASCII85Decode.pdf",
"sample-files/007-imagemagick-images/imagemagick-CCITTFaxDecode.pdf",
"sample-files/007-imagemagick-images/imagemagick-images.pdf",
"sample-files/007-imagemagick-images/imagemagick-lzw.pdf",
"sample-files/008-reportlab-inline-image/inline-image.pdf",
"sample-files/009-pdflatex-geotopo/GeoTopo-komprimiert.pdf",
# "sample-files/011-google-doc-document/google-doc-document.pdf", # stream token in line after /Length
"sample-files/012-libreoffice-form/libreoffice-form.pdf",
"sample-files/013-reportlab-overlay/reportlab-overlay.pdf",
"sample-files/015-arabic/habibi-oneline-cmap.pdf",
"sample-files/015-arabic/habibi-rotated.pdf",
"sample-files/015-arabic/habibi.pdf",
"sample-files/016-libre-office-link/libre-office-link.pdf",
# "sample-files/017-unreadable-meta-data/unreadablemetadata.pdf", # stream in line after object
"sample-files/018-base64-image/base64image.pdf",
# "sample-files/019-grayscale-image/grayscale-image.pdf", # stream in line after object
"sample-files/020-xmp/output_with_metadata_pymupdf.pdf",
# "sample-files/021-pdfa/crazyones-pdfa.pdf", # stream in line is after dictionary
"sample-files/022-pdfkit/pdfkit.pdf",
"sample-files/023-cmyk-image/cmyk-image.pdf",
"sample-files/024-annotations/annotated_pdf.pdf",
"sample-files/025-attachment/with-attachment.pdf",
],
)
def test_update_offsets_on_all_reference_files(
capsys: pytest.CaptureFixture, tmp_path: Path, input_pdf_filepath: Path
) -> None:
# Arrange
output_pdf_filepath = tmp_path / "out.pdf"
# Act
exit_code = run_cli(
[
"update-offsets",
"--encoding",
"iso-8859-1",
str(input_pdf_filepath),
"-o",
str(output_pdf_filepath),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 0, captured
assert not captured.err
assert f"Wrote {output_pdf_filepath}" in captured.out
assert output_pdf_filepath.exists()
================================================
FILE: tests/test_x2pdf.py
================================================
"""
Every CLI command is called here with a typer CliRunner.
Here should only be end-to-end tests.
"""
from pathlib import Path
import pytest
from .conftest import run_cli
def test_x2pdf_succeed_to_convert_jpg(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange
output = tmp_path / "out.pdf"
# Act
exit_code = run_cli(
[
"x2pdf",
"sample-files/003-pdflatex-image/page-0-Im1.jpg",
"--output",
str(output),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 0, captured
assert captured.out == ""
assert output.exists()
def test_x2pdf_succeed_to_embed_pdfs(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange
output = tmp_path / "out.pdf"
# Act
exit_code = run_cli(
[
"x2pdf",
"sample-files/001-trivial/minimal-document.pdf",
"sample-files/002-trivial-libre-office-writer/002-trivial-libre-office-writer.pdf",
"--output",
str(output),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 0, captured
assert captured.out == ""
assert output.exists()
def test_x2pdf_fail_to_open_file(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange & Act
exit_code = run_cli(
[
"x2pdf",
"NonExistingFile",
"--output",
str(tmp_path / "out.pdf"),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 1, captured
assert "No such file or directory" in captured.out
def test_x2pdf_fail_to_convert(
capsys: pytest.CaptureFixture, tmp_path: Path
) -> None:
# Arrange & Act
exit_code = run_cli(
[
"x2pdf",
"README.md",
"--output",
str(tmp_path / "out.pdf"),
]
)
# Assert
captured = capsys.readouterr()
assert exit_code == 1, captured
assert "Error: Could not convert 'README.md' to a PDF" in captured.out