Showing preview only (301K chars total). Download the full file or copy to clipboard to get everything.
Repository: py-pdf/pdfly
Branch: main
Commit: 897420ec65b3
Files: 97
Total size: 263.1 KB
Directory structure:
gitextract_w5t8afxl/
├── .all-contributorsrc
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ ├── feature_request.md
│ │ └── question.md
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ ├── scripts/
│ │ └── check_pr_title.py
│ └── workflows/
│ ├── check-gitignored-files.yml
│ ├── create-github-release.yaml
│ ├── github-ci.yaml
│ ├── publish-to-pypi.yaml
│ ├── release.yaml
│ └── title-check.yml
├── .gitignore
├── .gitmodules
├── .isort.cfg
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .typos.toml
├── CHANGELOG.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── dependabot.yml
├── docs/
│ ├── Makefile
│ ├── conf.py
│ ├── dev/
│ │ ├── intro.md
│ │ └── testing.md
│ ├── index.rst
│ ├── make.bat
│ ├── meta/
│ │ └── project-governance.md
│ └── user/
│ ├── installation.md
│ ├── subcommand-2-up.md
│ ├── subcommand-booklet.md
│ ├── subcommand-cat.md
│ ├── subcommand-check-sign.md
│ ├── subcommand-compress.md
│ ├── subcommand-extract-annotated-pages.md
│ ├── subcommand-extract-images.md
│ ├── subcommand-extract-text.md
│ ├── subcommand-meta.md
│ ├── subcommand-pagemeta.md
│ ├── subcommand-rm.md
│ ├── subcommand-rotate.md
│ ├── subcommand-sign.md
│ ├── subcommand-uncompress.md
│ ├── subcommand-update-offsets.md
│ └── subcommand-x2pdf.md
├── make_release.py
├── mypy.ini
├── pdfly/
│ ├── __init__.py
│ ├── __main__.py
│ ├── _utils.py
│ ├── _version.py
│ ├── booklet.py
│ ├── cat.py
│ ├── check_sign.py
│ ├── cli.py
│ ├── compress.py
│ ├── extract_annotated_pages.py
│ ├── extract_images.py
│ ├── metadata.py
│ ├── pagemeta.py
│ ├── rm.py
│ ├── rotate.py
│ ├── sign.py
│ ├── uncompress.py
│ ├── up2.py
│ ├── update_offsets.py
│ └── x2pdf.py
├── pylock.toml
├── pyproject.toml
├── renovate.json
├── resources/
│ ├── demo2_ca.root.crt.pem
│ ├── signing-certificate.crt
│ └── signing-certificate.p12
├── setup.cfg
├── setup.py
└── tests/
├── __init__.py
├── conftest.py
├── test_booklet.py
├── test_cat.py
├── test_check_sign.py
├── test_cli.py
├── test_compress.py
├── test_extract_annotated_pages.py
├── test_extract_images.py
├── test_pagemeta.py
├── test_rm.py
├── test_rotate.py
├── test_sign.py
├── test_uncompress.py
├── test_up2.py
├── test_update_offsets.py
└── test_x2pdf.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .all-contributorsrc
================================================
{
"projectName": "pdfly",
"projectOwner": "py-pdf",
"repoType": "github",
"repoHost": "https://github.com",
"files": [
"README.md"
],
"imageSize": 100,
"commit": true,
"commitConvention": "eslint",
"contributors": [
{
"login": "MartinThoma",
"name": "Martin Thoma",
"avatar_url": "https://avatars.githubusercontent.com/u/1658117?v=4",
"profile": "http://martin-thoma.com/",
"contributions": [
"code",
"doc",
"ideas",
"infra",
"maintenance",
"projectManagement",
"tutorial"
]
},
{
"login": "Lucas-C",
"name": "Lucas Cimon",
"avatar_url": "https://avatars.githubusercontent.com/u/925560?v=4",
"profile": "https://chezsoi.org/lucas/blog/",
"contributions": [
"bug",
"code",
"doc",
"maintenance"
]
},
{
"login": "pastor-robert",
"name": "Rob Adams",
"avatar_url": "https://avatars.githubusercontent.com/u/35646090?v=4",
"profile": "https://github.com/pastor-robert",
"contributions": [
"code"
]
},
{
"login": "Kaos599",
"name": "Harsh ",
"avatar_url": "https://avatars.githubusercontent.com/u/115716485?v=4",
"profile": "https://github.com/Kaos599",
"contributions": [
"code"
]
},
{
"login": "srogmann",
"name": "Sascha Rogmann",
"avatar_url": "https://avatars.githubusercontent.com/u/59577610?v=4",
"profile": "https://github.com/srogmann",
"contributions": [
"code"
]
},
{
"login": "ebotiab",
"name": "Enrique Botía",
"avatar_url": "https://avatars.githubusercontent.com/u/62219950?v=4",
"profile": "https://github.com/ebotiab",
"contributions": [
"code"
]
},
{
"login": "kommade",
"name": "kommade",
"avatar_url": "https://avatars.githubusercontent.com/u/99523586?v=4",
"profile": "https://github.com/kommade",
"contributions": [
"code"
]
},
{
"login": "Zingzy",
"name": "Zingzy",
"avatar_url": "https://avatars.githubusercontent.com/u/90309290?v=4",
"profile": "https://spoo.me/",
"contributions": [
"code"
]
},
{
"login": "wolfram77",
"name": "Subhajit Sahu",
"avatar_url": "https://avatars.githubusercontent.com/u/3179612?v=4",
"profile": "https://wolfram77.github.io",
"contributions": [
"code"
]
},
{
"login": "kianmeng",
"name": "Kian-Meng Ang",
"avatar_url": "https://avatars.githubusercontent.com/u/134518?v=4",
"profile": "https://www.kianmeng.org",
"contributions": [
"ideas"
]
},
{
"login": "hwine",
"name": "Hal Wine",
"avatar_url": "https://avatars.githubusercontent.com/u/132412?v=4",
"profile": "https://github.com/hwine",
"contributions": [
"bug",
"code"
]
},
{
"login": "philippesamuel",
"name": "philippesamuel",
"avatar_url": "https://avatars.githubusercontent.com/u/32560769?v=4",
"profile": "https://github.com/philippesamuel",
"contributions": [
"doc"
]
},
{
"login": "marcobrb",
"name": "marcobrb",
"avatar_url": "https://avatars.githubusercontent.com/u/219329309?v=4",
"profile": "https://github.com/marcobrb",
"contributions": [
"doc"
]
},
{
"login": "moormaster",
"name": "moormaster",
"avatar_url": "https://avatars.githubusercontent.com/u/2452695?v=4",
"profile": "https://github.com/moormaster",
"contributions": [
"doc",
"code"
]
},
{
"login": "geoffbeier",
"name": "Geoff Beier",
"avatar_url": "https://avatars.githubusercontent.com/u/133355?v=4",
"profile": "https://geoff.tuxpup.com/",
"contributions": [
"code"
]
},
{
"login": "georgthegreat",
"name": "Yuriy Chernyshov",
"avatar_url": "https://avatars.githubusercontent.com/u/1121500?v=4",
"profile": "https://leftparagraphs.com",
"contributions": [
"ideas",
"code"
]
},
{
"login": "lkintact",
"name": "lkintact",
"avatar_url": "https://avatars.githubusercontent.com/u/24726299?v=4",
"profile": "https://github.com/lkintact",
"contributions": [
"bug"
]
}
],
"contributorsPerLine": 5,
"skipCi": false,
"commitType": "docs"
}
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Report some unexpected behaviour to help us improve
title: ''
labels: bug
assignees: ''
---
<!--
Hi there! Thank you for wanting to make pdfly better 😉.
Please perform a quick search first, in order to check if your problem has already been reported:
https://github.com/py-pdf/pdfly/issues
-->
Describe the bug
**Error details**
If an exception is raised, it is very important that you provide the full error message.
Otherwise members of the `pdfly` community won't be able to help you with your problem.
**Environment**
Please provide the following information:
* **Operating System**: Windows, Mac OSX, Linux flavour...
* **Python version**: you can get this information with `python --version`
* **`pdfly` version used**: if you installed it with `pip`, you can get this information in `pip freeze` output
<!-- Bonus / recommended:
Often, there are bugfixes & other changes on pdfly git repo `master` branch
that have not been released yet. They are listed in the ChangeLog:
https://github.com/py-pdf/pdfly/blob/master/CHANGELOG.md
Hence, please check that your bug is still present using the latest version of pdfly from the git repository, by installing it this way:
pip install git+https://github.com/py-pdf/pdfly.git@master
-->
================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
# Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#configuring-the-template-chooser
blank_issues_enabled: false
contact_links:
- name: 💬 Start a discussion
url: https://github.com/py-pdf/pdfly/discussions/new
about: Informal discussion about the project organization, considerations that do not expect a definitive answer, etc.
# - name: Security issue
# url: security@...
# about: Do not report security issues publicly. Email our security contact.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: ''
---
<!--
Hi there! Thank you for wanting to make pdfly better 😉.
Before you submit this, make sure that this feature wasn't already requested,
or if it is not already implemented in the master branch but not released yet:
https://github.com/py-pdf/pdfly/blob/master/CHANGELOG.md
-->
**Please explain your intent**
Describe what you want to achieve.
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
Please also mention any alternative solutions or features you've considered.
**Additional context**
Add any other context, code snippet or screenshots about the feature request.
You can also mention if you are willing to contribute a PR yourself to provide this feature.
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: I have a question
about: Anything that is not a bug report or a feature request
title: ''
labels: question
assignees: ''
---
<!--
Hi there! Thank you for reaching out and stepping in pdfly users community 😉.
Before submitting your question, please check:
* that it is not covered by the documentation: https://pdfly.readthedocs.io/en/latest/
* that it has not already been asked: https://github.com/py-pdf/pdfly/issues
-->
================================================
FILE: .github/dependabot.yml
================================================
# Set update schedule for GitHub Actions
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
================================================
FILE: .github/pull_request_template.md
================================================
<!--
Thanks for your interest in the project.
Bugs filed and PRs submitted are appreciated!
Some guidelines are provided like this, in HTML comments, to expedite the code review before merging your contribution.
First, please make sure that you have read the documentation page on pdfly development:
https://pdfly.readthedocs.io/en/latest/dev/intro.html
If you're new to contributing to open source projects,
you might find this free video course helpful: http://kcd.im/pull-request
-->
<!-- What changes are being made? (What feature/bug is being fixed here?) -->
e.g. Fixes #0 <!-- This will automatically close issue #0 once the PR is merged: https://help.github.com/en/articles/closing-issues-using-keywords -->
<!-- Have you done all of these things? -->
**Checklist**:
<!-- To check an item, place an "x" in the box like so: "- [x] Item description"
Add "N/A" to the end of each line that's irrelevant to your changes -->
- [ ] A unit test is covering the code added / modified by this PR
- [ ] In case of a new feature, docstrings have been added, with also some documentation in the `docs/` folder
- [ ] A mention of the change is present in `CHANGELOG.md`
- [ ] This PR is ready to be merged <!-- In your opinion, can this be merged as soon as it's reviewed? Else, this can be turned into a Draft PR -->
<!-- Feel free to add additional comments, and to ask questions if some of those guidelines are unclear to you! -->
<!--
Once a PR is merged, maintainers will add your name to the contributors table in README.md.
If they forget, or you do not wish this to happen, please mention it.
-->
By submitting this pull request, I confirm that my contribution is made under the terms of the [BSD 3-Clause license](https://github.com/py-pdf/pdfly/blob/master/LICENSE).
================================================
FILE: .github/scripts/check_pr_title.py
================================================
"""Check that all PR titles follow the desired scheme."""
import os
import sys
KNOWN_PREFIXES = (
"SEC: ",
"BUG: ",
"ENH: ",
"DEP: ",
"PI: ",
"ROB: ",
"DOC: ",
"Docs: ", # MRs from Dependabot
"TST: ",
"DEV: ",
"STY: ",
"MAINT: ",
"REL: ",
)
PR_TITLE = os.getenv("PR_TITLE", "")
if (
not PR_TITLE.startswith(KNOWN_PREFIXES)
or not PR_TITLE.split(": ", maxsplit=1)[1]
):
sys.stderr.write(
f"The PR title '{PR_TITLE}' does not follow the projects naming scheme: "
"https://pdfly.readthedocs.io/en/latest/dev/intro.html#commit-messages\n",
)
sys.stderr.write(
"If you do not know which one to choose or if multiple apply, make a best guess. "
"Nobody will complain if it does not quite fit :-)\n",
)
sys.exit(1)
else:
sys.stdout.write(f"PR title '{PR_TITLE}' appears to be valid.\n")
================================================
FILE: .github/workflows/check-gitignored-files.yml
================================================
name: Check for Gitignored Files
on:
push:
branches:
- '**' # Run on all branches
pull_request:
jobs:
check-gitignored-files:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check for gitignored files in commit
run: |
# List all files in the commit
git diff --name-only --cached > committed_files.txt
# Check if any of the committed files are ignored by .gitignore
git check-ignore -v $(cat committed_files.txt) > ignored_files.txt || true
# Fail if there are any ignored files
if [[ -s ignored_files.txt ]]; then
echo "The following files are gitignored but committed:"
cat ignored_files.txt
exit 1
fi
================================================
FILE: .github/workflows/create-github-release.yaml
================================================
name: Create a GitHub release page
on:
push:
tags:
- '*.*.*'
workflow_dispatch:
workflow_run:
workflows: ["Create git tag"]
types:
- completed
permissions:
contents: write
jobs:
build_and_publish:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
name: Create a GitHub release page
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Prepare variables
id: prepare_variables
run: |
git fetch --tags --force
latest_tag=$(git describe --tags --abbrev=0)
echo "latest_tag=$(git describe --tags --abbrev=0)" >> "$GITHUB_ENV"
echo "date=$(date +'%Y-%m-%d')" >> "$GITHUB_ENV"
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "tag_body<<$EOF" >> "$GITHUB_ENV"
git --no-pager tag -l "${latest_tag}" --format='%(contents:body)' >> "$GITHUB_ENV"
echo "$EOF" >> "$GITHUB_ENV"
- name: Create GitHub Release 🚀
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3
with:
tag_name: ${{ env.latest_tag }}
name: Version ${{ env.latest_tag }}, ${{ env.date }}
draft: false
prerelease: false
body: ${{ env.tag_body }}
================================================
FILE: .github/workflows/github-ci.yaml
================================================
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: CI
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
jobs:
tests:
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
platform: [ubuntu-latest, windows-latest, macos-latest]
name: pytest on ${{ matrix.python-version }} / ${{ matrix.platform }}
runs-on: ${{ matrix.platform }}
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: 'recursive'
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Upgrade pip
run: python -m pip install --upgrade pip
- name: Install requirements
run: pip install . --group dev
- name: Install pdfly
if: matrix.python-version != '3.8'
run: pip install .
- name: Install pdfly using the minimal versions of the dependencies
if: matrix.python-version == '3.8'
run: |
# We ensure that those minimal versions remain compatible:
sed -i '/dependencies = \[/,/\]/s/>=/==/' pyproject.toml
pip install .
- name: Run tests
run: pytest -vv
codestyle:
name: Check code with black, mypy, ruff & typos
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
submodules: 'recursive'
- name: Cache Downloaded Files
id: cache-downloaded-files
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: '**/tests/pdf_cache/*'
key: cache-downloaded-files
- name: Upgrade pip, install pdfly and its dev dependencies
run: |
python -m pip install --upgrade pip
pip install .
pip install . --group dev
- name: Lint with black
run: black --check --extend-exclude sample-files .
- name: Lint with mypy
run: mypy . --ignore-missing-imports --exclude build
- name: Test with ruff
run: ruff check pdfly/
- name: Spell Check Repo
uses: crate-ci/typos@7c572958218557a3272c2d6719629443b5cc26fd # v1.45.2
package:
name: Build & verify package
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{env.PYTHON_LATEST}}
- name: Build package
run: |
python -m pip install flit check-wheel-contents
flit build
ls -l dist
check-wheel-contents dist/*.whl
- name: Test installing package
run: python -m pip install .
- name: Test running installed package
working-directory: /tmp
run: python -c "import pdfly;print(pdfly.__version__)"
================================================
FILE: .github/workflows/publish-to-pypi.yaml
================================================
name: Publish Python Package to PyPI
on:
push:
tags:
- '*.*.*'
workflow_dispatch:
workflow_run:
workflows: ["Create git tag"]
types:
- completed
permissions:
contents: write
jobs:
build_and_publish:
if: ${{ github.event.workflow_run.conclusion == 'success' }}
name: Publish a new version
runs-on: ubuntu-latest
steps:
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: 3.x
- name: Install Flit
run: |
python -m pip install --upgrade pip
pip install flit
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Publish Package to PyPI🚀
env:
FLIT_USERNAME: '__token__'
FLIT_PASSWORD: ${{ secrets.FLIT_PASSWORD }}
run: |
flit publish
================================================
FILE: .github/workflows/release.yaml
================================================
# This action assumes that there is a REL-commit which already has a
# Markdown-formatted git tag. Hence the CHANGELOG is already adjusted
# and it's decided what should be in the release.
# This action only ensures the release is done with the proper contents
# and that it's announced with a Github release.
name: Create git tag
on:
push:
branches:
- main
permissions:
contents: write
jobs:
build_and_publish:
name: Publish a new version
runs-on: ubuntu-latest
if: "${{ startsWith(github.event.head_commit.message, 'REL: ') }}"
steps:
- name: Checkout Repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Extract version from commit message
id: extract_version
run: |
VERSION=$(echo "${{ github.event.head_commit.message }}" | grep -oP '(?<=REL: )\d+\.\d+\.\d+')
echo "version=$VERSION" >> $GITHUB_OUTPUT
- name: Extract tag message from commit message
id: extract_message
run: |
VERSION="${{ steps.extract_version.outputs.version }}"
delimiter="$(openssl rand -hex 8)"
MESSAGE=$(echo "${{ github.event.head_commit.message }}" | sed "0,/REL: $VERSION/s///" )
echo "message<<${delimiter}" >> $GITHUB_OUTPUT
echo "$MESSAGE" >> $GITHUB_OUTPUT
echo "${delimiter}" >> $GITHUB_OUTPUT
- name: Create Git Tag
run: |
VERSION="${{ steps.extract_version.outputs.version }}"
MESSAGE="${{ steps.extract_message.outputs.message }}"
git config user.name github-actions
git config user.email github-actions@github.com
git tag "$VERSION" -m "$MESSAGE"
git push origin $VERSION
================================================
FILE: .github/workflows/title-check.yml
================================================
name: 'PR Title Check'
on:
pull_request:
# check when PR
# * is created,
# * title is edited, and
# * new commits are added (to ensure failing title blocks merging)
types: [opened, reopened, edited, synchronize]
jobs:
title-check:
name: Title check
runs-on: ubuntu-latest
steps:
- name: Checkout Code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check PR title
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: python .github/scripts/check_pr_title.py
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# IntelliJ
.idea
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
*.pdf
.envrc
# Documentation files copied when building:
docs/meta/CHANGELOG.md
docs/meta/CONTRIBUTORS.md
# 'make release' creates those files:
RELEASE_COMMIT_MSG.md
RELEASE_TAG_MSG.md
================================================
FILE: .gitmodules
================================================
[submodule "sample-files"]
path = sample-files
url = git@github.com:py-pdf/sample-files.git
================================================
FILE: .isort.cfg
================================================
[settings]
line_length=79
indent=' '
multi_line_output=3
length_sort=0
include_trailing_comma=True
skip=docs
known_third_party = PIL,pypdf,pydantic,setuptools,typer
================================================
FILE: .pre-commit-config.yaml
================================================
# pre-commit run --all-files
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: check-added-large-files
args: ['--maxkb=1000']
- id: check-ast
- id: check-case-conflict
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: end-of-file-fixer
exclude: "resources/.*|docs/make.bat"
- id: fix-byte-order-marker
- id: mixed-line-ending
args: ['--fix=lf']
exclude: "docs/make.bat"
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 26.3.1
hooks:
- id: black
args: [--target-version, py36]
- repo: https://github.com/asottile/blacken-docs
rev: 1.20.0
hooks:
- id: blacken-docs
additional_dependencies: [black==22.1.0]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.15.6
hooks:
- id: ruff
args: ['--fix']
exclude: "tests/"
- repo: https://github.com/asottile/pyupgrade
rev: v3.21.2
hooks:
- id: pyupgrade
args: [--py38-plus]
- repo: https://github.com/pycqa/flake8
rev: 7.3.0
hooks:
- id: flake8
args: ["--ignore", "E,W,F"]
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.19.1'
hooks:
- id: mypy
files: ^pdfly/.*
args: [--ignore-missing-imports]
additional_dependencies:
- "pydantic>=1.10.4"
================================================
FILE: .readthedocs.yaml
================================================
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.12"
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# If using Sphinx, optionally build your docs in additional formats such as PDF
formats: all
# Optionally declare the Python requirements required to build your docs
python:
install:
- method: pip
path: .
extra_requirements:
- docs
================================================
FILE: .typos.toml
================================================
[default]
extend-ignore-identifiers-re = [
"certifi",
"FlateDecode",
# This typo appears in a .tex file in the sample-files git submodule:
"exampe"
]
================================================
FILE: CHANGELOG.md
================================================
# CHANGELOG
## Version 0.6.0, not released yet
### Bug Fixes (BUG)
- `2up` incorrectly handled documents with an odd number of pages - [issue #219](https://github.com/py-pdf/pdfly/issues/218)
### New Features (ENH)
- `pagemeta` now displays the name of a known page format that is close to the page dimensions
## Version 0.5.1, 2025-10-13
### New Features (ENH)
- `extract-images`: output filenames are now formatted using four digit for page numbers, in order for output files to be ordered alphabetically
- ensured support for Python 3.14
### Bug Fixes (BUG)
- `requests` is now a dependency, to prevent a `ModuleNotFoundError` when running with `uv`
## Version 0.5.0, 2025-10-13
### New Features (ENH)
- New `extract-annotated-pages` to filter out only the user annotated pages ([PR #98](https://github.com/py-pdf/pdfly/pull/98))
- New `rotate` sub-command to rotate specified pages ([PR #128](https://github.com/py-pdf/pdfly/pull/128))
- Added optional `--password` argument to `cat` to perform decryption ([PR #61](https://github.com/py-pdf/pdfly/pull/61))
- `pagemeta` now displays known page formats when it can detect it: A3, A4, A5, Letter, Legal
- `pagemeta` now displays the rotation value.
- New `sign` sub-command to create a signed pdf from an existing pdf ([PR #165](https://github.com/py-pdf/pdfly/pull/165))
- New `check-sign` sub-command to verify the signature of a signed pdf ([PR #166](https://github.com/py-pdf/pdfly/pull/166))
### Bug Fixes (BUG)
- `pypdf[full]` is now a dependency, instead of just `pypdf`, to avoid some cases of `DependencyError`
### Deprecations (DEP)
* support for older Python3 versions has been dropped, `pdfly` now requires Python 3.10+
## Version 0.4.0, 2024-12-08
### New Features (ENH)
- New `booklet` command to adjust offsets and lengths ([PR #77](https://github.com/py-pdf/pdfly/pull/77))
- New `uncompress` command ([PR #75](https://github.com/py-pdf/pdfly/pull/75))
- New `update-offsets` command to adjust offsets and lengths ([PR #15](https://github.com/py-pdf/pdfly/pull/15))
- New `rm` command ([PR #59](https://github.com/py-pdf/pdfly/pull/59))
- `metadata`: now also displaying CreationDate, Creator, Keywords & Subject ([PR #73](https://github.com/py-pdf/pdfly/pull/73))
- Add warning for out-of-bounds page range in pdfly `cat` command ([PR #58](https://github.com/py-pdf/pdfly/pull/58))
### Bug Fixes (BUG)
- `2-up` command, that only showed one page per sheet, on the left side, with blank space on the right ([PR #78](https://github.com/py-pdf/pdfly/pull/78))
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.3...0.4.0)
## Version 0.3.3, 2024-04-14
### Developer Experience (DEV)
- Chain workflows
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.2...0.3.3)
## Version 0.3.2, 2024-04-14
### Developer Experience (DEV)
- Decouple git tag / PyPI release / Github release page (#49, #50)
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.1...0.3.2)
## Version 0.3.1, 2024-03-29
### Maintenance (MAINT)
- Update pypdf usage (#48)
### Developer Experience (DEV)
- Release via REL commit (#48)
- Fix mypy issues
- Add make_release.py
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.3.0...0.3.1)
## Version 0.3.0, 2023-12-17
### New Features (ENH)
- Add x2pdf command (#25)
### Bug Fixes (BUG)
- boxes are floats, not int
- Add missing fpdf2 dependency (#29)
### Documentation (DOC)
- cat command
- More examples for the cat subcommand
- Add cat subcommand
- Link to readthedocs
- Add project governance file
- Move readthedocs config file to root
- Add docs (#24)
### Developer Experience (DEV)
- Checkout sample-files in CI (#30)
- Let dependabot update Github Actions
- Add action for automatic releases
### Maintenance (MAINT)
- Update dependencies (#42)
- In the cat subcommand, replace the usage of the deprecated PdfMerger by PdfWriter (#34)
- Update .pre-commit-config.yaml
- Adjust x2pdf syntax
### Testing (TST)
- cat with two files (#41)
- Test cat command with more parameters + validate result (#40)
- Adding unit tests (#28)
### Other
- : [{'msg': 'Bump actions/setup-python from 4 to 5 (#39)', 'author': 'dependabot[bot]'}, {'msg': 'test_extract_images_monochrome() is now passing', 'author': 'CimonLucas(LCM)'}, {'msg': 'Bump actions/setup-python from 3 to 4 (#27)', 'author': 'dependabot[bot]'}, {'msg': 'Bump actions/checkout from 3 to 4 (#26)', 'author': 'dependabot[bot]'}, {'msg': 'Ensure input PDF exists for cat subcommand', 'author': 'MartinThoma'}]
[Full Changelog](https://github.com/py-pdf/pdfly/compare/0.2.14...0.3.0)
================================================
FILE: CONTRIBUTORS.md
================================================
# List of contributors
The list of contributors has been moved into the [README.md](https://github.com/py-pdf/pdfly/blob/main/README.md#contributors-).
================================================
FILE: LICENSE
================================================
BSD 3-Clause License
Copyright (c) 2022, py-pdf
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
================================================
FILE: Makefile
================================================
maint:
pre-commit autoupdate
python -m pip install --upgrade .
python -m pip lock --group dev --group docs .
uv pip install -r pylock.toml
git submodule update --remote
release:
python make_release.py
git commit -eF RELEASE_COMMIT_MSG.md
upload:
make clean
flit publish
clean:
python setup.py clean --all
pyclean .
rm -rf tests/__pycache__ pdfly/__pycache__ Image9.png htmlcov docs/_build dist dont_commit_merged.pdf dont_commit_writer.pdf pdfly.egg-info
lint:
mypy . --ignore-missing-imports --exclude build
ruff check --fix --unsafe-fixes
test:
pytest tests --cov --cov-report term-missing -vv --cov-report html --durations=3 --timeout=30
================================================
FILE: README.md
================================================
[](https://pypi.org/pypi/pdfly#history)
[](https://pypi.org/project/pdfly/)
[](https://opensource.org/license/bsd-3-clause)
[](https://pdfly.readthedocs.io/en/latest/)
[](https://github.com/py-pdf/pdfly/actions?query=branch%3Amain)
[](https://github.com/py-pdf/pdfly/commits/main/)
[](https://github.com/py-pdf/pdfly/issues)
[](https://github.com/py-pdf/pdfly/pulls)
[](https://github.com/py-pdf/pdfly/actions)
[](https://makeapullrequest.com)
[](https://www.firsttimersonly.com/)
# pdfly
pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files.
<img src="docs/pdfly-logo.png" alt="pdfly logo" width="25%">
## Installation
```bash
pip install -U pdfly
```
As `pdfly` is an application, you might want to install it with [`pipx`](https://pypi.org/project/pipx/) or [`uv tool`](https://docs.astral.sh/uv/concepts/tools/): `uvx pdfly --help`
## Usage
```console
$ pdfly --help
Usage: pdfly [OPTIONS] COMMAND [ARGS]...
pdfly is a pure-python cli application for manipulating PDF files.
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮
│ --version │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮
│ 2-up Create a booklet-style PDF from a single input. │
│ booklet Reorder and two-up PDF pages for booklet printing. │
│ cat Extract and concatenate pages from PDF files into a single PDF file. │
│ check-sign Verifies the signature of a signed PDF. │
│ compress Compress a PDF. │
│ extract-annotated-pages Extract only the annotated pages from a PDF. │
│ extract-images Extract images from PDF without resampling or altering. │
│ extract-text Extract text from a PDF file. │
│ meta Show metadata of a PDF file │
│ pagemeta Give details about a single page. │
│ rm Remove pages from PDF files. │
│ rotate Rotate specified pages by the specified amount │
│ sign Creates a signed PDF from an existing PDF file. │
│ uncompress Module for uncompressing PDF content streams. │
│ update-offsets Updates offsets and lengths in a simple PDF file. │
│ x2pdf Convert one or more files to PDF. Each file is a page. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
```
You can see the help of every subcommand by typing `--help`:
```console
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ───────────────────────────────────────╮
│ * pdf PATH [default: None] [required] │
│ * out PATH [default: None] [required] │
╰───────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────╯
```
**Note:** `pdfly` has nothing to do with ``pdfly.net`` or ``gopdfly.com``
## Contributors ✨
pdfly is a free software project without any company affiliation. We cannot pay
contributors, but we do value their contributions 🤗
<!-- ALL-CONTRIBUTORS-LIST:START - Do not remove or modify this section -->
<!-- prettier-ignore-start -->
<!-- markdownlint-disable -->
<table>
<tbody>
<tr>
<td align="center" valign="top" width="20%"><a href="http://martin-thoma.com/"><img src="https://avatars.githubusercontent.com/u/1658117?v=4?s=100" width="100px;" alt="Martin Thoma"/><br /><sub><b>Martin Thoma</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=MartinThoma" title="Code">💻</a> <a href="https://github.com/py-pdf/pdfly/commits?author=MartinThoma" title="Documentation">📖</a> <a href="#ideas-MartinThoma" title="Ideas, Planning, & Feedback">🤔</a> <a href="#infra-MartinThoma" title="Infrastructure (Hosting, Build-Tools, etc)">🚇</a> <a href="#maintenance-MartinThoma" title="Maintenance">🚧</a> <a href="#projectManagement-MartinThoma" title="Project Management">📆</a> <a href="#tutorial-MartinThoma" title="Tutorials">✅</a></td>
<td align="center" valign="top" width="20%"><a href="https://chezsoi.org/lucas/blog/"><img src="https://avatars.githubusercontent.com/u/925560?v=4?s=100" width="100px;" alt="Lucas Cimon"/><br /><sub><b>Lucas Cimon</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/issues?q=author%3ALucas-C" title="Bug reports">🐛</a> <a href="https://github.com/py-pdf/pdfly/commits?author=Lucas-C" title="Code">💻</a> <a href="https://github.com/py-pdf/pdfly/commits?author=Lucas-C" title="Documentation">📖</a> <a href="#maintenance-Lucas-C" title="Maintenance">🚧</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/pastor-robert"><img src="https://avatars.githubusercontent.com/u/35646090?v=4?s=100" width="100px;" alt="Rob Adams"/><br /><sub><b>Rob Adams</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=pastor-robert" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/Kaos599"><img src="https://avatars.githubusercontent.com/u/115716485?v=4?s=100" width="100px;" alt="Harsh "/><br /><sub><b>Harsh </b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=Kaos599" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/srogmann"><img src="https://avatars.githubusercontent.com/u/59577610?v=4?s=100" width="100px;" alt="Sascha Rogmann"/><br /><sub><b>Sascha Rogmann</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=srogmann" title="Code">💻</a></td>
</tr>
<tr>
<td align="center" valign="top" width="20%"><a href="https://github.com/ebotiab"><img src="https://avatars.githubusercontent.com/u/62219950?v=4?s=100" width="100px;" alt="Enrique Botía"/><br /><sub><b>Enrique Botía</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=ebotiab" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/kommade"><img src="https://avatars.githubusercontent.com/u/99523586?v=4?s=100" width="100px;" alt="kommade"/><br /><sub><b>kommade</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=kommade" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://spoo.me/"><img src="https://avatars.githubusercontent.com/u/90309290?v=4?s=100" width="100px;" alt="Zingzy"/><br /><sub><b>Zingzy</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=Zingzy" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://wolfram77.github.io"><img src="https://avatars.githubusercontent.com/u/3179612?v=4?s=100" width="100px;" alt="Subhajit Sahu"/><br /><sub><b>Subhajit Sahu</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=wolfram77" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://www.kianmeng.org"><img src="https://avatars.githubusercontent.com/u/134518?v=4?s=100" width="100px;" alt="Kian-Meng Ang"/><br /><sub><b>Kian-Meng Ang</b></sub></a><br /><a href="#ideas-kianmeng" title="Ideas, Planning, & Feedback">🤔</a></td>
</tr>
<tr>
<td align="center" valign="top" width="20%"><a href="https://github.com/hwine"><img src="https://avatars.githubusercontent.com/u/132412?v=4?s=100" width="100px;" alt="Hal Wine"/><br /><sub><b>Hal Wine</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/issues?q=author%3Ahwine" title="Bug reports">🐛</a> <a href="https://github.com/py-pdf/pdfly/commits?author=hwine" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/philippesamuel"><img src="https://avatars.githubusercontent.com/u/32560769?v=4?s=100" width="100px;" alt="philippesamuel"/><br /><sub><b>philippesamuel</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=philippesamuel" title="Documentation">📖</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/marcobrb"><img src="https://avatars.githubusercontent.com/u/219329309?v=4?s=100" width="100px;" alt="marcobrb"/><br /><sub><b>marcobrb</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=marcobrb" title="Documentation">📖</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/moormaster"><img src="https://avatars.githubusercontent.com/u/2452695?v=4?s=100" width="100px;" alt="moormaster"/><br /><sub><b>moormaster</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=moormaster" title="Documentation">📖</a> <a href="https://github.com/py-pdf/pdfly/commits?author=moormaster" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://geoff.tuxpup.com/"><img src="https://avatars.githubusercontent.com/u/133355?v=4?s=100" width="100px;" alt="Geoff Beier"/><br /><sub><b>Geoff Beier</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/commits?author=geoffbeier" title="Code">💻</a></td>
</tr>
<tr>
<td align="center" valign="top" width="20%"><a href="https://leftparagraphs.com"><img src="https://avatars.githubusercontent.com/u/1121500?v=4?s=100" width="100px;" alt="Yuriy Chernyshov"/><br /><sub><b>Yuriy Chernyshov</b></sub></a><br /><a href="#ideas-georgthegreat" title="Ideas, Planning, & Feedback">🤔</a> <a href="https://github.com/py-pdf/pdfly/commits?author=georgthegreat" title="Code">💻</a></td>
<td align="center" valign="top" width="20%"><a href="https://github.com/lkintact"><img src="https://avatars.githubusercontent.com/u/24726299?v=4?s=100" width="100px;" alt="lkintact"/><br /><sub><b>lkintact</b></sub></a><br /><a href="https://github.com/py-pdf/pdfly/issues?q=author%3Alkintact" title="Bug reports">🐛</a></td>
</tr>
</tbody>
</table>
<!-- markdownlint-restore -->
<!-- prettier-ignore-end -->
<!-- ALL-CONTRIBUTORS-LIST:END -->
This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification
([emoji key](https://allcontributors.org/docs/en/emoji-key)).
Contributions of any kind welcome!
The list might not be complete. You can find more contributors via the git
history and [GitHubs 'Contributors' feature](https://github.com/py-pdf/pdfly/graphs/contributors).
================================================
FILE: dependabot.yml
================================================
# Doc: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "gitsubmodule"
commit-message:
prefix: "MAINT"
- package-ecosystem: "github-actions"
commit-message:
prefix: "MAINT"
- package-ecosystem: "pip"
commit-message:
prefix: "MAINT"
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/conf.py
================================================
"""
Configuration file for the Sphinx documentation builder.
This file only contains a selection of the most common options.
For a full list see the documentation:
https://www.sphinx-doc.org/en/master/usage/configuration.html
"""
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import shutil
import sys
import pdfly as py_pkg
sys.path.insert(0, os.path.abspath(".")) # noqa
sys.path.insert(0, os.path.abspath("../")) # noqa
shutil.copyfile("../CHANGELOG.md", "meta/CHANGELOG.md")
shutil.copyfile("../CONTRIBUTORS.md", "meta/CONTRIBUTORS.md")
# -- Project information -----------------------------------------------------
project = py_pkg.__name__
copyright = "2023, pdfly contributors"
author = "pdfly contributors"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = py_pkg.__version__
# The full version, including alpha/beta/rc tags.
release = py_pkg.__version__
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = "4.0.0"
myst_all_links_external = True
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
"sphinx.ext.autosummary",
"sphinx.ext.coverage",
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx.ext.napoleon",
# External
"myst_parser",
]
intersphinx_mapping = {
"py-pdf organization": ("https://py-pdf.github.io/", None),
}
nitpick_ignore_regex = [
# For reasons unclear at this stage the io module prefixes everything with _io
# and this confuses sphinx
(r"py:class", r"_io.(FileIO|BytesIO|Buffered(Reader|Writer))"),
]
autodoc_default_options = {
"member-order": "bysource",
"members": True,
"show-inheritance": True,
"undoc-members": True,
}
autodoc_inherit_docstrings = False
autodoc_typehints_format = "short"
python_use_unqualified_type_names = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ["_templates"]
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
html_theme_options = {
"canonical_url": "",
"analytics_id": "",
"logo_only": True,
"display_version": True,
"prev_next_buttons_location": "bottom",
"style_external_links": False,
# Toc options
"collapse_navigation": True,
"sticky_navigation": True,
"navigation_depth": 4,
"includehidden": True,
"titles_only": False,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ["_static"]
html_logo = "pdfly-logo.png"
# -- Options for Napoleon -----------------------------------------------------
napoleon_google_docstring = True
napoleon_numpy_docstring = False # Explicitly prefer Google style docstring
napoleon_use_param = True # for type hint support
napoleon_use_rtype = (
False # False so the return type is inline with the description.
)
================================================
FILE: docs/dev/intro.md
================================================
# Developer Intro
pdfly is an application and thus non-developers
might also use it.
## Installing Requirements
```
pip install . --group dev
```
## Running Tests
See [testing pdfly with pytest](testing.md)
## Documentation
To preview the HTML documentation, you can run this command:
```
sphinx-autobuild docs docs/_build/html
```
## Tools: git and pre-commit
Git is a command line application for version control. If you don't know it,
you can [play ohmygit](https://ohmygit.org/) to learn it.
GitHub is the service where the pdfly project is hosted. While git is free and
open source, GitHub is a paid service by Microsoft, but free in a lot of
cases.
[pre-commit](https://pypi.org/project/pre-commit/) is a command line application
that uses git hooks to automatically execute code. This allows you to avoid
style issues and other code quality issues. After you entered `pre-commit install`
once in your local copy of pdfly, it will automatically be executed when
you `git commit`.
## Commit Messages
Having a clean commit message helps people to quickly understand what the commit
is about, without actually looking at the changes. The first line of the
commit message is used to [auto-generate the CHANGELOG](https://github.com/py-pdf/pdfly/blob/main/make_release.py).
For this reason, the format should be:
```
PREFIX: DESCRIPTION
BODY
```
The `PREFIX` can be:
* `SEC`: Security improvements. Typically an infinite loop that was possible.
* `BUG`: A bug was fixed. Likely there is one or multiple issues. Then write in
the `BODY`: `Closes #123` where 123 is the issue number on GitHub.
It would be absolutely amazing if you could write a regression test in those
cases. That is a test that would fail without the fix.
A bug is always an issue for pdfly users - test code or CI that was fixed is
not considered a bug here.
* `ENH`: A new feature! Describe in the body what it can be used for.
* `DEP`: A deprecation. Either marking something as "this is going to be removed"
or actually removing it.
* `PI`: A performance improvement. This could also be a reduction in the
file size of PDF files generated by pdfly.
* `ROB`: A robustness change. Dealing better with broken PDF files.
* `DOC`: A documentation change. `Docs:` is also allowed for commits made by DependaBot.
* `TST`: Adding or adjusting tests.
* `DEV`: Developer experience improvements, e.g. pre-commit or setting up CI.
* `MAINT`: Quite a lot of different stuff. Performance improvements are for sure
the most interesting changes in here. Refactorings as well.
* `STY`: A style change. Something that makes pdfly code more consistent.
Typically a small change. It could also be better error messages for
end users.
The prefix is used to generate the CHANGELOG. Every PR must have exactly one -
if you feel like several match, take the top one from this list that matches for
your PR.
## Pull Requests
Smaller Pull Requests (PRs) are preferred as it's typically easier to merge
them. For example, if you have some typos, a few code-style changes, a new
feature, and a bug-fix, that could be 3 or 4 PRs.
A PR must be complete. That means if you introduce a new feature it must be
finished within the PR and have a test for that feature.
## Releases
To perform a new release, there is the checklist to follow:
1. update `__version__` in `pdfly/_version.py` & `CHANGELOG.md` in order to specify the release date for the new version
2. perform a `REL`-prefixed commit, _e.g;_ `REL: X.Y.0"`, then make & merge a PR for it.
The Github Actions pipeline should create a new `git` tag, and then publish a new version on Pypi: <https://pypi.org/project/pdfly/#history>
3. edit the [GitHub release note](https://github.com/py-pdf/pdfly/releases), using the `CHANGELOG.md` content for the description
================================================
FILE: docs/dev/testing.md
================================================
# Testing
pdfly uses [`pytest`](https://docs.pytest.org/en/latest/) for testing.
To run the tests you need to install the CI (Continuous Integration) dependencies by running `pip install . --group dev`.
================================================
FILE: docs/index.rst
================================================
Welcome to pdfly
================
.. image:: https://img.shields.io/pypi/v/pdfly.svg
:target: https://pypi.org/pypi/pdfly#history
.. image:: https://img.shields.io/pypi/pyversions/pdfly.svg
:target: https://pypi.org/project/pdfly/
.. image:: https://img.shields.io/badge/License-BSD%203%20Clause-blue.svg
:target: https://opensource.org/license/bsd-3-clause
.. image:: https://app.readthedocs.org/projects/pdfly/badge/?version=latest
:target: https://pdfly.readthedocs.io/en/latest/
.. image:: https://github.com/py-pdf/pdfly/workflows/CI/badge.svg
:target: https://github.com/py-pdf/pdfly/actions?query=branch%3Amain
.. image:: https://img.shields.io/github/last-commit/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/commits/main/
.. image:: https://img.shields.io/github/issues-closed/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/issues
.. image:: https://img.shields.io/github/issues-pr-closed/py-pdf/pdfly
:target: https://github.com/py-pdf/pdfly/pulls
.. image:: https://img.shields.io/badge/linters-black,ruff,mypi-green.svg
:target: https://github.com/py-pdf/pdfly/actions
.. image:: https://img.shields.io/badge/PRs-welcome-brightgreen.svg?style=flat
:target: https://makeapullrequest.com
.. image:: https://img.shields.io/badge/first--timers--only-friendly-blue.svg
:target: https://www.firsttimersonly.com/
pdfly (say: PDF-li) is a pure-python cli application for manipulating PDF files.
.. image:: ./pdfly-logo.png
:scale: 25%
Repository: `github.com/py-pdf/pdfly <https://github.com/py-pdf/pdfly>`__
Installation
------------
.. code-block::
pip install -U pdfly
As ``pdfly`` is an application, you might want to install it with `pipx <https://pypi.org/project/pipx/>`__ or `uv tool <https://docs.astral.sh/uv/concepts/tools/>`__: ``uvx pdfly --help``
Usage
-----
.. code-block::
$ pdfly --help
Usage: pdfly [OPTIONS] COMMAND [ARGS]...
pdfly is a pure-python cli application for manipulating PDF files.
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────╮
│ --version │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Commands ─────────────────────────────────────────────────────────────────────────────────────╮
│ 2-up Create a booklet-style PDF from a single input. │
│ booklet Reorder and two-up PDF pages for booklet printing. │
│ cat Extract and concatenate pages from PDF files into a single PDF file. │
│ check-sign Verifies the signature of a signed PDF. │
│ compress Compress a PDF. │
│ extract-annotated-pages Extract only the annotated pages from a PDF. │
│ extract-images Extract images from PDF without resampling or altering. │
│ extract-text Extract text from a PDF file. │
│ meta Show metadata of a PDF file │
│ pagemeta Give details about a single page. │
│ rm Remove pages from PDF files. │
│ rotate Rotate specified pages by the specified amount │
│ sign Creates a signed PDF from an existing PDF file. │
│ uncompress Module for uncompressing PDF content streams. │
│ update-offsets Updates offsets and lengths in a simple PDF file. │
│ x2pdf Convert one or more files to PDF. Each file is a page. │
╰────────────────────────────────────────────────────────────────────────────────────────────────╯
You can see the help of every subcommand by typing ``--help``:
.. code-block::
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ───────────────────────────────────────╮
│ * pdf PATH [default: None] [required] │
│ * out PATH [default: None] [required] │
╰───────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────╯
GitHub ⭐️
---------
.. image:: https://api.star-history.com/svg?repos=py-pdf/pdfly&type=date&legend=top-left
:target: https://www.star-history.com/#py-pdf/pdfly&type=date&legend=top-left
.. note:: ``pdfly`` has nothing to do with ``pdfly.net`` or ``gopdfly.com``
.. toctree::
:caption: User Guide
:maxdepth: 1
user/installation
user/subcommand-2-up
user/subcommand-booklet
user/subcommand-cat
user/subcommand-check-sign
user/subcommand-compress
user/subcommand-extract-annotated-pages
user/subcommand-extract-images
user/subcommand-extract-text
user/subcommand-meta
user/subcommand-pagemeta
user/subcommand-rm
user/subcommand-rotate
user/subcommand-sign
user/subcommand-uncompress
user/subcommand-update-offsets
user/subcommand-x2pdf
.. toctree::
:caption: Developer Guide
:maxdepth: 1
dev/intro
dev/testing
.. toctree::
:caption: About pdfly
:maxdepth: 1
meta/CHANGELOG
meta/CONTRIBUTORS
meta/project-governance
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
================================================
FILE: docs/make.bat
================================================
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
================================================
FILE: docs/meta/project-governance.md
================================================
# Project Governance
This document describes how the pdfly project is managed. It describes the
different actors, their roles, and the responsibilities they have.
`pdfly` is part of the `py-pdf` organization,
and hence we try to follow some [maintainer guidelines](https://py-pdf.github.io/pages/maintainer-guidelines.html) & [rules](https://py-pdf.github.io/pages/py-pdf-owners.html).
## Terminology
* The **project** is pdfly - a free and open-source pure-python PDF command line
tool.
It includes the [code, issues, and discussions on GitHub](https://github.com/py-pdf/pdfly),
and [the documentation on ReadTheDocs](https://pdfly.readthedocs.io/en/latest/),
[the package on PyPI](https://pypi.org/project/pdfly/).
* A **maintainer** is a person who has technical permissions to change one or
more part of the projects. It is a person who is driven to keep the project running
and improving.
* A **contributor** is a person who contributes to the project. That could be
through writing code - in the best case through forking and creating a pull
request, but that is up to the maintainer. Other contributors describe issues,
help to ask questions on existing issues to make them easier to answer,
participate in discussions, and help to improve the documentation. Contributors
are similar to maintainers, but without technical permissions.
* A **user** is a person who imports pdfly into their code. All pdfly users
are developers, but not developers who know the internals of pdfly. They only
use the public interface of pdfly. They will likely have less knowledge about
PDF than contributors.
* The **community** is all of that - the users, the contributors, and the maintainers.
## Governance, Leadership, and Steering pdfly forward
pdfly is a free and open source project.
As pdfly does not have any formal relationship with any company and no funding,
all the work done by the community are voluntary contributions. People don't
get paid, but choose to spend their free time to create software of which
many more are profiting. This has to be honored and respected.
pdfly has the **Benevolent Dictator**
governance model. The benevolent dictator is a maintainer with all technical permissions -
most importantly the permission to push new pdfly versions on PyPI.
Being benevolent, the benevolent dictator listens for decisions to the community and tries
their best to make decisions from which the overall community profits - the
current one and the potential future one. Being a dictator, the benevolent dictator always has
the power and the right to make decisions on their own - also against some
members of the community.
As pdfly is free software, parts of the community can split off (fork the code)
and create a new community. This should limit the harm a bad benevolent dictator can do.
## Project Language
The project language is (american) English. All documentation and issues must
be written in English to ensure that the community can understand it.
We appreciate the fact that large parts of the community don't have English
as their mother tongue. We try our best to understand others -
[automatic translators](https://translate.google.com/) might help.
## Expectations
The community can expect the following:
* The **benevolent dictator** tries their best to make decisions from which the overall
community profits. The benevolent dictator is aware that his/her decisions can shape the
overall community. Once the benevolent dictator notices that she/he doesn't have the time
to advance pdfly, he/she looks for a new benevolent dictator. As it is expected
that the benevolent dictator will step down at some point of their choice
(hopefully before their death), it is NOT a benevolent dictator for life
(BDFL).
* Every **maintainer** (including the benevolent dictator) is aware of their permissions and
the harm they could do. They value security and ensure that the project is
not harmed. They give their technical permissions back if they don't need them
any longer. Any long-time contributor can become a maintainer. Maintainers
can - and should! - step down from their role when they realize that they
can no longer commit that time.
* Every **contributor** is aware that the time of maintainers and the benevolent dictator is
limited. Short pull requests that briefly describe the solved issue and have
a unit test have a higher chance to get merged soon - simply because it's
easier for maintainers to see that the contribution will not harm the overall
project. Their contributions are documented in the git history and in the
public issues.
* Every **community member** uses a respectful language. We are all human, we
get upset about things we care and other things than what's visible on the
internet go on in our live. pdfly does not pay its contributors - keep all
of that in mind when you interact with others. We are here because we want to
help others.
### Issues and Discussions
An issue is any technical description that aims at bringing pdfly forward:
* Bugs tickets: Something went wrong because pdfly developers made a mistake.
* Feature requests: pdfly does not support all features of the PDF specifications.
There are certainly also convenience methods that would help users a lot.
* Robustness requests: There are many broken PDFs around. In some cases, we can
deal with that. It's kind of a mixture between a bug ticket and a feature
request.
* Performance tickets: pdfly could be faster - let us know about your specific
scenario.
Any comment that is in those technical descriptions which is not helping the
discussion can be deleted. This is especially true for "me too" comments on bugs
or "bump" comments for desired features. People can express this with 👍 / 👎
reactions.
[Discussions](https://github.com/py-pdf/pdfly/discussions) are open. No comments
will be deleted there - except if they are clearly unrelated spam or only
try to insult people (luckily, the community was very respectful so far 🤞)
### Releases
The maintainers follow [semantic versioning](https://semver.org/). Most
importantly, that means that breaking changes will have a major version bump.
Be aware that unintentional breaking changes might still happen. The `pdfly`
maintainers do their best to fix that in a timely manner - please
[report such issues](https://github.com/py-pdf/pdfly/issues)!
## People
* Martin Thoma is benevolent dictator since April 2022.
* Maintainers:
* Matthew Stamy (mstamy2) was the benevolent dictator for a long time.
He still is around on GitHub once in a while and has permissions on PyPI and GitHub.
* Matthew Peveler (MasterOdin) is a maintainer on GitHub.
================================================
FILE: docs/user/installation.md
================================================
# Installation
There are several ways to install pdfly. The most common option is to use pip.
## pip
pdfly requires Python 3.10+ to run.
Typically Python comes with `pip`, a package installer. Using it you can
install pdfly:
```bash
pip install pdfly
```
If you are not a super-user (a system administrator / root), you can also just
install pdfly for your current user:
```bash
pip install --user pdfly
```
## pipx
We recommend to install pdfly via [pipx](https://pypi.org/project/pipx/):
```bash
pipx install pdfly
```
pipx installs the pdfly application in an isolated environment. That guarantees
that no other applications interferes with its defpendencies.
## uv
pdfly can be run without persistent installation using [uv tool run](https://docs.astral.sh/uv/guides/tools/#running-tools):
```bash
uv tool run pdfly
```
via the [uvx](https://docs.astral.sh/uv/guides/tools/#running-tools) alias:
```bash
uvx pdfly
```
or it can be installed using [uv tool install](https://docs.astral.sh/uv/guides/tools/#installing-tools):
```bash
uv tool install pdfly
```
## Python Version Support
If ✓ is given, it works. It is tested via CI.
If ✖ is given, it is guaranteed not to work.
If it's not filled, we don't guarantee support, but it might still work.
| Python | 3.14 | 3.13 | 3.12 | 3.11 | 3.10 | 2.7 |
| ---------------------- | ---- | ---- | ---- | ---- | ---- | --- |
| pdfly | ✓ | ✓ | ✓ | ✓ | ✓ | ✖ |
## Development Version
In case you want to use the current version under development:
```bash
pip install git+https://github.com/py-pdf/pdfly.git
```
================================================
FILE: docs/user/subcommand-2-up.md
================================================
# 2-up
Create a booklet-style PDF from a single input.
## Usage
```
$ pdfly 2-up --help
Usage: pdfly 2-up [OPTIONS] PDF OUT
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * out PATH [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Convert `document.pdf` into a booklet and write the output in `booklet.pdf`.
```
pdfly 2-up document.pdf booklet.pdf
```
================================================
FILE: docs/user/subcommand-booklet.md
================================================
# booklet
Reorder and two-up PDF pages for booklet printing.
## Usage
```
$ pdfly booklet --help
Usage: pdfly booklet [OPTIONS] FILENAME OUTPUT
Reorder and two-up PDF pages for booklet printing.
If the number of pages is not a multiple of four, pages are
added until it is a multiple of four. This includes a centerfold
in the middle of the booklet and a single page on the inside
back cover. The content of those pages are from the
centerfold-file and blank-page-file files, if specified, otherwise
they are blank pages.
Example:
pdfly booklet input.pdf output.pdf
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * filename FILE [default: None] [required] │
│ * output FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --blank-page-file -b FILE page added if input is odd number of pages │
│ [default: None] │
│ --centerfold-file -c FILE double-page added if input is missing >= 2 │
│ pages │
│ [default: None] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Convert `document.pdf` into a booklet and write the output in `booklet.pdf`.
```
pdfly booklet document.pdf booklet.pdf
```
================================================
FILE: docs/user/subcommand-cat.md
================================================
# cat
The cat command can split / extract pages from a PDF. It can also
join/merge/combine multiple PDF documents into a single one.
## Usage
```
pdfly cat --help
Usage: pdfly cat [OPTIONS] FILENAME FN_PGRGS...
Extract and concatenate pages from PDF files into a single PDF file.
Page ranges refer to the previously-named file. A file not followed by a page
range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages.
-1 last page.
22 just the 23rd page.
:-1 all but the last page.
0:3 the first three pages.
-2 second-to-last page.
:3 the first three pages.
-2: last two pages.
5: from the sixth page onward.
-3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end.
3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9
2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
pdfly cat chapter*.pdf >book.pdf
You can specify the output file by redirection.
pdfly cat chapter?.pdf chapter10.pdf >book.pdf
In case you don't want chapter 10 before chapter 2.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * filename PATH [default: None] [required] │
│ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] │
│ [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --verbose --no-verbose show page ranges as they are being │
│ read │
│ [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Split a PDF
Get the second, third, and fourth page of a PDF:
```
pdfly cat input.pdf 1:4 -o out.pdf
```
### Extract a Page
Get the sixt page of a PDF:
```
pdfly cat input.pdf 5 -o out.pdf
```
Note that it is `5`, because the page indices always start at 0.
### Specify a negative index
Get the last page of a PDF:
```
pdfly cat -o out.pdf input.pdf -- -1
```
`--` must be used to escape negative indices.
### Concatenate two PDFs
Just combine two PDF files so that the pages come right after each other:
```
pdfly cat input1.pdf input2.pdf -o out.pdf
```
### Decrypt a PDF document
```
pdfly cat --password=SECRET doc.pdf -o doc-decrypted.pdf
```
================================================
FILE: docs/user/subcommand-check-sign.md
================================================
# check-sign
Validate that a PDF document has a digital signature matching a given certificate.
## Usage
```
Usage: pdfly check-sign [OPTIONS] FILENAME
Verifies the signature of a signed PDF.
Examples
pdfly verify input.pdf --pem certs.pem
Verifies the input.pdf with a PEM certificate bundle.
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --pem FILE PEM certificate file [required] │
│ --verbose --no-verbose Show signature verification details. [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Verify PDF signature against a PEM certificate
Verifies the input.pdf with a PEM certificate bundle.
```
pdfly verify input.pdf --pem certs.pem
```
================================================
FILE: docs/user/subcommand-compress.md
================================================
# compress
Compress a PDF using lossless FlateDecode compression.
**Note:** If compression would result in a larger file, the original file is kept unchanged to avoid file size increase.
## Usage
```
$ pdfly compress --help
Usage: pdfly compress [OPTIONS] PDF OUTPUT
Compress a PDF.
╭─ Arguments ───────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * output PATH [default: None] [required] │
╰───────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────╯
```
## Examples
Compress the file `document.pdf` and output `document_compressed.pdf`
```
pdfly compress document.pdf document_compressed.pdf
```
Example output when compression succeeds:
```
Original Size : 1,996,123
Final Size : 1,234,567 (Compressed (61.8% of original))
```
Example output when compression would increase file size:
```
Original Size : 887
Final Size : 887 (No compression applied (would increase size))
```
================================================
FILE: docs/user/subcommand-extract-annotated-pages.md
================================================
# extract-annotated-pages
Extract only the annotated pages from a PDF. This can help to review or rework pages from a large document iteratively.
## Usage
```
pdfly extract-annotated-pages --help
Usage: pdfly extract-annotated-pages [OPTIONS] INPUT_PDF
Extract only the annotated pages from a PDF.
Q: Why does this help?
A: https://github.com/py-pdf/pdfly/issues/97
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * input_pdf FILE Input PDF file. [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --output -o PATH Output PDF file. Defaults to 'input_pdf_annotated'. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Input file
Extracts only pages containing annotations from a file `input.pdf`. Pages are written into a new file `input_annotated.pdf`.
```
pdfly extract-annotated-pages input.pdf
```
### Input file with specific output file
Extracts only pages containing annotations from a file `input.pdf` into the given output file `pages_to_rework.pdf`.
```
pdfly extract-annotated-pages input.pdf -o pages_to_rework.pdf
```
================================================
FILE: docs/user/subcommand-extract-images.md
================================================
# extract-images
Extract text from a PDF file.
## Usage
```
$ pdfly extract-images --help
Usage: pdfly extract-images [OPTIONS] PDF
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-res
ampling-in-python
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Extract the first page of `document.pdf` and extract the images present in it.
```
pdfly cat document.pdf 9 -o page.pdf
pdfly extract-text page.pdf
Extracted 1 images:
- 0-Im0.png
```
================================================
FILE: docs/user/subcommand-extract-text.md
================================================
# extract-text
Extract text from a PDF file.
## Usage
```
$ pdfly extract-text --help
Usage: pdfly extract-text [OPTIONS] PDF
Extract text from a PDF file.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Extract the text from the 10th page of `document.pdf`, redirecting the output into `page.txt`.
```
pdfly cat document.pdf 9 -o page.pdf
pdfly extract-text page.pdf
```
================================================
FILE: docs/user/subcommand-meta.md
================================================
# meta
Get metadata of a PDF file.
## Usage
```
pdfly meta --help
Usage: pdfly meta [OPTIONS] PDF
Show metadata of a PDF file
╭─ Arguments ───────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
╰───────────────────────────────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────────────────────────────╮
│ --output -o [json|text] output format [default: text] │
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────────────────────────────╯
```
## Example
```
$pdfly meta Allianz-Versicherungsunterlagen.pdf
Operating System Data
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ File Name │ /home/user/Documents/Allianz-Versicherungsunterlagen.pdf │
│ File Permissions │ -rw-rw-r-- │
│ File Size │ 874,781 bytes │
│ Creation Time │ 2023-09-02 10:00:51 │
│ Modification Time │ 2023-09-02 10:00:42 │
│ Access Time │ 2023-09-09 11:57:41 │
└───────────────────┴───────────────────────────────────────────────────────────┘
PDF Data
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Title │ │
│ Producer │ itext-paulo-155 (itextpdf.sf.net-lowagie.com) │
│ Author │ │
│ Pages │ 34 │
│ Encrypted │ None │
│ PDF File Version │ %PDF-1.6 │
│ Page Layout │ │
│ Page Mode │ │
│ PDF ID │ ID1=b"'\xc5\x92\xc3\x92\xe2\x80\x93--/\xef\xac\x824\xc3… │
│ │ ID2=b'\xc3\x8b\xc3\xaa\xcb\x9b\r\xc3\xa2\r\xcb\x99T\xc3… │
│ │ \xc3\x96\xc3\x9fY2' │
│ Fonts (unembedded) │ /Helvetica │
│ Fonts (embedded) │ /ASPNQQ+TT22D6t00, /CBKSHX+Helvetica-Bold, │
│ │ /CXQKAY+Helvetica, /GOCSXU+AllianzNeo-Bold, │
│ │ /LKNHUL+Arial-BoldMT, /LMNFKX+ArialMT, /MWUNIP+Symbol, │
│ │ /ODNMDG+TT5B6t00, /PESMKN+AllianzNeo-CondensedBold, │
│ │ /PHDALA+Helvetica-Oblique, /PJEFXS+AllianzNeo-Light, │
│ │ /SNDABN+Helvetica, /SNDABN+Helvetica-Bold, │
│ │ /SNDABN+Times-Roman, /TXDAYK+Helvetica, │
│ │ /VORXLN+Helvetica-BoldOblique, /YTXZAH+Arial-ItalicMT │
│ Attachments │ [] │
│ Images │ 16 images (355,454 bytes) │
└────────────────────┴──────────────────────────────────────────────────────────┘
Use the 'pagemeta' subcommand to get details about a single page
```
================================================
FILE: docs/user/subcommand-pagemeta.md
================================================
# pagemeta
Give details about a PDF's single page.
## Usage
```
$ pdfly pagemeta --help
Usage: pdfly pagemeta [OPTIONS] PDF PAGE_INDEX
Give details about a single page.
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * page_index INTEGER [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --output -o [json|text] output format [default: text] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Get the metadata of the 101st page of `document.pdf` in text format.
```
pdfly pagemeta document.pdf 100
/home/user/.../document.pdf, page index 100
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Attribute ┃ Value ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ mediabox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ cropbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ artbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ bleedbox │ (0.0, 0.0, 504.0, 661.5): with=504.0 x height=661.5 │
│ annotations │ 8 │
└─────────────┴─────────────────────────────────────────────────────┘
All annotations:
1. /Link at [232.05524, 385.79007, 343.6091, 396.29007]
2. /Link at [157.63988, 209.99002, 243.69913, 220.49002]
3. /Link at [72, 178.19678, 249.65918, 188.69678]
4. /Link at [196.12769, 152.40353, 361.02328, 162.90353]
5. /Link at [360.97717, 139.80353, 432, 150.30353]
6. /Link at [72, 127.20352, 213.9915, 137.70352]
7. /Link at [179.64218, 448.3905, 220.08231, 458.8905]
8. /Link at [282.84, 347.99005, 340.83148, 358.49005]
```
Get the same metadata in `json` format.
```
pdfly pagemeta document.pdf 100 -o json
{"mediabox":[0.0,0.0,504.0,661.5],"cropbox":[0.0,0.0,504.0,661.5],"artbox":[0.0,0.0,504.0,661.5],"bleedbox":[0.0,0.0,504.0,661.5],"annotations":19}
```
================================================
FILE: docs/user/subcommand-rm.md
================================================
# rm
Remove pages from PDF files.
## Usage
```
$ pdfly rm --help
Usage: pdfly rm [OPTIONS] FILENAME FN_PGRGS...
Remove pages from PDF files.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly rm -o output.pdf document.pdf 2:5
Remove pages 2 to 4 from document.pdf, producing output.pdf.
pdfly rm document.pdf :-1
Removes all pages except the last one from document.pdf, modifying the original file.
pdfly rm report.pdf :6 7:
Remove all pages except page seven from report.pdf,
producing a single-page report.pdf.
╭─ Arguments ─────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [default: None] [required] │
│ * fn_pgrgs FN_PGRGS... filenames and/or page ranges [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --verbose --no-verbose show page ranges as they are being read [default: no-verbose] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Remove the 5th page of `document.pdf`, modifying the original file.
```
pdfly rm document.pdf 4
```
Remove the first and last page of `document.pdf`, producing `output.pdf`.
```
pdfly rm -o output.pdf document.pdf 1:-1
```
================================================
FILE: docs/user/subcommand-rotate.md
================================================
# rotate
## Usage
```
pdfly rotate --help
Usage: pdfly rotate [OPTIONS] FILENAME DEGREES [PGRGS]
Rotate specified pages by the specified amount
Example:
pdfly rotate --output output.pdf input.pdf 90
Rotate all pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 :3
Rotate first three pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 -- -1
Rotate last page by 90 degrees (clockwise)
A file not followed by a page range (PGRGS) means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
╭─ Arguments ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
│ * degrees INTEGER degrees to rotate [required] │
│ pgrgs [PGRGS] page range [default: :] │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [required] │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Rotate all pages by 90 degrees (clockwise)
Rotate all pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90
```
### Rotate first three pages by 90 degrees (clockwise)
Rotate first three pages from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90 :3
```
### Rotate last page by 90 degrees (clockwise)
Rotate last page from `input.pdf` by 90 degrees (clockwise) and write the resulting pdf to `output.pdf`.
```
pdfly rotate --output output.pdf input.pdf 90 -- -1
```
================================================
FILE: docs/user/subcommand-sign.md
================================================
# sign
Creates a digitally-signed PDF from an existing PDF file and a given certificate.
## Usage
```
Usage: pdfly sign [OPTIONS] FILENAME
Creates a signed PDF.
Examples
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
pdfly sign document.pdf --p12 certs.p12 --in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
╭─ Arguments ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * filename FILE [required] │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --p12 FILE PKCS12 certificate container [required] │
│ --output -o PATH │
│ --in-place -i │
│ --p12-password -p TEXT The password to use to decrypt the PKCS12 file. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Sign a PDF with PKCS12
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
```
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
```
### Sign a PDF in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
```
pdfly sign document.pdf --p12 certs.p12 --in-place
```
================================================
FILE: docs/user/subcommand-uncompress.md
================================================
# uncompress
Module for uncompressing PDF content streams.
## Usage
```
$ pdfly ucompress --help
Module for uncompressing PDF content streams.
╭─ Arguments ───────────────────────────────────────────╮
│ * pdf FILE [default: None] [required] │
│ * output PATH [default: None] [required] │
╰───────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────╮
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────╯
```
## Examples
Uncompress `document_compressed.pdf` and output `document.pdf`.
```
pdfly uncompress document_compressed.pdf document.pdf
```
================================================
FILE: docs/user/subcommand-update-offsets.md
================================================
# update-offsets
Updates offsets and lengths in a simple PDF file.
## Usage
```
$ pdfly update-offsets --help
Usage: pdfly update-offsets [OPTIONS] FILE_IN FILE_OUT
Updates offsets and lengths in a simple PDF file.
The PDF specification requires that the xref section at the end
of a PDF file has the correct offsets of the PDF's objects.
It further requires that the dictionary of a stream object
contains a /Length-entry giving the length of the encoded stream.
When editing a PDF file using a text-editor (e.g. vim) it is
elaborate to compute or adjust these offsets and lengths.
This command tries to compute /Length-entries of the stream dictionaries
and the offsets in the xref-section automatically.
It expects that the PDF file has ASCII encoding only. It may
use ISO-8859-1 or UTF-8 in its comments.
The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in
binary data.
It expects that there is one xref-section only.
It expects that the /Length-entries have default values containing
enough digits, e.g. /Length 000 when the stream consists of 576 bytes.
Example:
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf
issue-297.out.pdf
╭─ Arguments ──────────────────────────────────────────────────────────────────╮
│ * file_in FILE [default: None] [required] │
│ * file_out PATH [default: None] [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --encoding TEXT Encoding used to read and write the │
│ files, e.g. UTF-8. │
│ [default: ISO-8859-1] │
│ --verbose --no-verbose Show progress while processing. │
│ [default: no-verbose] │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
```
## Examples
Update the offsets of `document.pdf` with UTF-8 encoding and write the output to `document.out.pdf`.
```
pdfly update-offsets document.pdf --verbose --encoding UTF-8 document.out.pdf
```
================================================
FILE: docs/user/subcommand-x2pdf.md
================================================
# x2pdf
Convert a file to PDF.
Currently supported for "x":
* PNG
* JPG
## Usage
```
$ pdfly x2pdf --help
Usage: pdfly x2pdf [OPTIONS] X...
Convert one or more files to PDF. Each file is a page.
╭─ Arguments ─────────────────────────────────────────────────────────────────╮
│ * x X... [default: None] [required] │
╰─────────────────────────────────────────────────────────────────────────────╯
╭─ Options ───────────────────────────────────────────────────────────────────╮
│ * --output -o PATH [default: None] [required] │
│ --help Show this message and exit. │
╰─────────────────────────────────────────────────────────────────────────────╯
```
## Examples
### Single file
```
$ pdfly x2pdf image.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image.jpg
-rw-rw-r-- 1 user user 49K Sep 17 22:48 out.pdf
```
### Multiple files manually
```
$ pdfly x2pdf image1.jpg image2.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg
-rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg
-rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf
```
### Multiple files via *
```
$ pdfly x2pdf *.jpg -o out.pdf
$ ls -lh
-rw-rw-r-- 1 user user 47K Sep 17 21:49 image1.jpg
-rw-rw-r-- 1 user user 15K Sep 17 21:49 image2.jpg
-rw-rw-r-- 1 user user 64K Sep 17 22:48 out.pdf
```
================================================
FILE: make_release.py
================================================
"""Internal tool to update the CHANGELOG."""
import json
import subprocess
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any
from rich.prompt import Prompt
GH_ORG = "py-pdf"
GH_PROJECT = "pdfly"
VERSION_FILE_PATH = "pdfly/_version.py"
CHANGELOG_FILE_PATH = "CHANGELOG.md"
@dataclass(frozen=True)
class Change:
"""Capture the data of a git commit."""
commit_hash: str
prefix: str
message: str
author: str
author_login: str
def main(changelog_path: str) -> None:
"""
Create a changelog.
Args:
changelog_path: The location of the CHANGELOG file
"""
changelog = get_changelog(changelog_path)
git_tag = get_most_recent_git_tag()
changes, changes_with_author = get_formatted_changes(git_tag)
if changes == "":
print("No changes")
return
new_version = version_bump(git_tag)
new_version = get_version_interactive(new_version, changes)
adjust_version_py(new_version)
today = datetime.now(tz=timezone.utc)
header = f"## Version {new_version}, {today:%Y-%m-%d}\n"
url = f"https://github.com/{GH_ORG}/{GH_PROJECT}/compare/{git_tag}...{new_version}"
trailer = f"\n[Full Changelog]({url})\n\n"
new_entry = header + changes + trailer
print(new_entry)
write_commit_msg_file(new_version, changes_with_author + trailer)
write_release_msg_file(new_version, changes_with_author + trailer, today)
# Make the script idempotent by checking if the new entry is already in the changelog
if new_entry in changelog:
print("Changelog is already up-to-date!")
return
new_changelog = "# CHANGELOG\n\n" + new_entry + strip_header(changelog)
write_changelog(new_changelog, changelog_path)
print_instructions(new_version)
def print_instructions(new_version: str) -> None:
"""Print release instructions."""
print("=" * 80)
print(f"☑ {VERSION_FILE_PATH} was adjusted to '{new_version}'")
print(f"☑ {CHANGELOG_FILE_PATH} was adjusted")
print()
print("Now run:")
print(" git commit -eF RELEASE_COMMIT_MSG.md")
print(f" git tag -s {new_version} -eF RELEASE_TAG_MSG.md")
print(" git push")
print(" git push --tags")
def adjust_version_py(version: str) -> None:
"""Adjust the __version__ string."""
with open(VERSION_FILE_PATH, "w") as fp:
fp.write(f'__version__ = "{version}"\n')
def get_version_interactive(new_version: str, changes: str) -> str:
"""Get the new __version__ interactively."""
print("The changes are:")
print(changes)
orig = new_version
new_version = Prompt.ask("New semantic version", default=orig)
while not is_semantic_version(new_version):
new_version = Prompt.ask(
"That was not a semantic version. Please enter a semantic version",
default=orig,
)
return new_version
def is_semantic_version(version: str) -> bool:
"""Check if the given version is a semantic version."""
# This doesn't cover the edge-cases like pre-releases
if version.count(".") != 2:
return False
try:
return bool([int(part) for part in version.split(".")])
except Exception:
return False
def write_commit_msg_file(new_version: str, commit_changes: str) -> None:
"""
Write a file that can be used as a commit message.
Like this:
git commit -eF RELEASE_COMMIT_MSG.md && git push
"""
with open("RELEASE_COMMIT_MSG.md", "w") as fp:
fp.write(f"REL: {new_version}\n\n")
fp.write("## What's new\n")
fp.write(commit_changes)
def write_release_msg_file(
new_version: str, commit_changes: str, today: datetime
) -> None:
"""
Write a file that can be used as a git tag message.
Like this:
git tag -eF RELEASE_TAG_MSG.md && git push
"""
with open("RELEASE_TAG_MSG.md", "w") as fp:
fp.write(f"Version {new_version}, {today:%Y-%m-%d}\n\n")
fp.write("## What's new\n")
fp.write(commit_changes)
def strip_header(md: str) -> str:
"""Remove the 'CHANGELOG' header."""
return md.lstrip("# CHANGELOG").lstrip() # noqa
def version_bump(git_tag: str) -> str:
"""
Increase the patch version of the git tag by one.
Args:
git_tag: Old version tag
Returns:
The new version where the patch version is bumped.
"""
# just assume a patch version change
major, minor, patch = git_tag.split(".")
return f"{major}.{minor}.{int(patch) + 1}"
def get_changelog(changelog_path: str) -> str:
"""
Read the changelog.
Args:
changelog_path: Path to the CHANGELOG file
Returns:
Data of the CHANGELOG
"""
with open(changelog_path) as fh:
changelog = fh.read()
return changelog
def write_changelog(new_changelog: str, changelog_path: str) -> None:
"""
Write the changelog.
Args:
new_changelog: Contents of the new CHANGELOG
changelog_path: Path where the CHANGELOG file is
"""
with open(changelog_path, "w") as fh:
fh.write(new_changelog)
def get_formatted_changes(git_tag: str) -> tuple[str, str]:
"""
Format the changes done since the last tag.
Args:
git_tag: the reference tag
Returns:
Changes done since git_tag
"""
commits = get_git_commits_since_tag(git_tag)
# Group by prefix
grouped: dict[str, list[dict[str, Any]]] = {}
for commit in commits:
if commit.prefix not in grouped:
grouped[commit.prefix] = []
grouped[commit.prefix].append(
{"msg": commit.message, "author": commit.author_login}
)
# Order prefixes
order = [
"SEC",
"DEP",
"ENH",
"PI",
"BUG",
"ROB",
"DOC", # We ignore MRs from Dependabot prefixed with: "Docs:"
"DEV",
"CI",
"MAINT",
"TST",
"STY",
]
abbrev2long = {
"SEC": "Security",
"DEP": "Deprecations",
"ENH": "New Features",
"BUG": "Bug Fixes",
"ROB": "Robustness",
"DOC": "Documentation",
"DEV": "Developer Experience",
"CI": "Continuous Integration",
"MAINT": "Maintenance",
"TST": "Testing",
"STY": "Code Style",
"PI": "Performance Improvements",
}
# Create output
output = ""
output_with_user = ""
for prefix in order:
if prefix not in grouped:
continue
tmp = f"\n### {abbrev2long[prefix]} ({prefix})\n" # header
output += tmp
output_with_user += tmp
for commit_dict in grouped[prefix]:
output += f"- {commit_dict['msg']}\n"
output_with_user += (
f"- {commit_dict['msg']} by @{commit_dict['author']}\n"
)
del grouped[prefix]
if grouped:
output += "\n### Other\n"
output_with_user += "\n### Other\n"
for prefix, commit_dicts in grouped.items():
for commit_dict in commit_dicts:
output += f"- {prefix}: {commit_dict['msg']}\n"
output_with_user += f"- {prefix}: {commit_dict['msg']} by @{commit_dict['author']}\n"
return output, output_with_user
def get_most_recent_git_tag() -> str:
"""
Get the git tag most recently created.
Returns:
Most recently created git tag.
"""
git_tag = str(
subprocess.check_output(
["git", "describe", "--abbrev=0"], stderr=subprocess.STDOUT
)
).strip("'b\\n")
return git_tag
def get_author_mapping(line_count: int) -> dict[str, str]:
"""
Get the authors for each commit.
Args:
line_count: Number of lines from Git log output. Used for determining how
many commits to fetch.
Returns:
A mapping of long commit hashes to author login handles.
"""
per_page = min(line_count, 100)
page = 1
mapping: dict[str, str] = {}
for _ in range(0, line_count, per_page):
with urllib.request.urlopen(
f"https://api.github.com/repos/{GH_ORG}/{GH_PROJECT}/commits?per_page={per_page}&page={page}"
) as response:
commits = json.loads(response.read())
page += 1
for commit in commits:
if commit["author"]:
gh_handle = commit["author"]["login"]
else:
# This is not perfect, but better than the other option
gh_handle = commit["commit"]["author"]["name"].replace(" ", "")
mapping[commit["sha"]] = gh_handle
return mapping
def get_git_commits_since_tag(git_tag: str) -> list[Change]:
"""
Get all commits since the last tag.
Args:
git_tag: Reference tag from which the changes to the current commit are
fetched.
Returns:
list of all changes since git_tag.
"""
commits = (
subprocess.check_output(
[
"git",
"--no-pager",
"log",
f"{git_tag}..HEAD",
'--pretty=format:"%H:::%s:::%aN"',
],
stderr=subprocess.STDOUT,
)
.decode("UTF-8")
.strip()
)
lines = commits.splitlines()
authors = get_author_mapping(len(lines))
return [parse_commit_line(line, authors) for line in lines if line != ""]
def parse_commit_line(line: str, authors: dict[str, str]) -> Change:
"""
Parse the first line of a git commit message.
Args:
line: The first line of a git commit message.
Returns:
The parsed Change object
Raises:
ValueError: The commit line is not well-structured
"""
parts = line.split(":::")
if len(parts) != 3:
raise ValueError(f"Invalid commit line: '{line}'")
commit_hash, rest, author = parts
if ":" in rest:
prefix, message = rest.split(": ", 1)
else:
prefix = ""
message = rest
# Standardize
message.strip()
commit_hash = commit_hash.strip('"')
author = author.removesuffix('"')
author_login = authors[commit_hash]
prefix = prefix.strip()
if prefix == "DOCS":
prefix = "DOC"
return Change(
commit_hash=commit_hash,
prefix=prefix,
message=message,
author=author,
author_login=author_login,
)
if __name__ == "__main__":
main(CHANGELOG_FILE_PATH)
================================================
FILE: mypy.ini
================================================
[mypy]
plugins = pydantic.mypy
================================================
FILE: pdfly/__init__.py
================================================
"""pdfly is a command line utility for manipulating PDFs and getting information about them."""
from ._version import __version__
__all__ = [
"__version__",
]
================================================
FILE: pdfly/__main__.py
================================================
"""Execute pdfly as a module."""
from pdfly.cli import entry_point
if __name__ == "__main__":
entry_point()
================================================
FILE: pdfly/_utils.py
================================================
from enum import Enum
class OutputOptions(Enum):
json = "json"
text = "text"
================================================
FILE: pdfly/_version.py
================================================
__version__ = "0.5.1"
================================================
FILE: pdfly/booklet.py
================================================
"""
Reorder and two-up PDF pages for booklet printing.
If the number of pages is not a multiple of four, pages are
added until it is a multiple of four. This includes a centerfold
in the middle of the booklet and a single page on the inside
back cover. The content of those pages are from the
centerfold-file and blank-page-file files, if specified, otherwise
they are blank pages.
Example:
pdfly booklet input.pdf output.pdf
"""
# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
# All rights reserved. This software is available under a BSD license;
# see https://github.com/py-pdf/pypdf/LICENSE
from collections.abc import Generator
from pathlib import Path
from pypdf import (
PageObject,
PdfReader,
PdfWriter,
)
from pypdf.generic import FloatObject, RectangleObject
def main(
filename: Path,
output: Path,
inside_cover_file: Path | None,
centerfold_file: Path | None,
) -> None:
try:
# Set up the streams
reader = PdfReader(filename)
pages = list(reader.pages)
writer = PdfWriter()
# Add blank pages to make the number of pages a multiple of 4
# If the user specified an inside-back-cover file, use it.
blank_page = PageObject.create_blank_page(
width=pages[0].mediabox.width, height=pages[0].mediabox.height
)
if len(pages) % 2 == 1:
if inside_cover_file:
ic_reader_page = fetch_first_page(inside_cover_file)
pages.insert(-1, ic_reader_page)
else:
pages.insert(-1, blank_page)
if len(pages) % 4 == 2:
pages.insert(len(pages) // 2, blank_page)
pages.insert(len(pages) // 2, blank_page)
requires_centerfold = True
else:
requires_centerfold = False
# Reorder the pages and place two pages side by side (2-up) on each sheet
for lhs, rhs in page_iter(len(pages)):
pages[lhs].merge_translated_page(
page2=pages[rhs],
tx=pages[lhs].mediabox.width,
ty=0,
expand=True,
over=True,
)
# Double the CropBox width:
pages[lhs].cropbox[2] = FloatObject(2 * pages[lhs].cropbox[2])
writer.add_page(pages[lhs])
# If a centerfold was required, it is already
# present as a pair of blank pages. If the user
# specified a centerfold file, use it instead.
if requires_centerfold and centerfold_file:
centerfold_page = fetch_first_page(centerfold_file)
last_page = writer.pages[-1]
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
if requires_rotate(centerfold_page.mediabox, last_page.mediabox):
centerfold_page = centerfold_page.rotate(270)
if centerfold_page.rotation != 0:
centerfold_page.transfer_rotation_to_content()
last_page.merge_page(centerfold_page)
# Everything looks good! Write the output file.
with open(output, "wb") as output_fh:
writer.write(output_fh)
except Exception as error:
raise RuntimeError(f"Error while processing {filename}") from error
def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool:
"""
Return True if a and b are rotated relative to each other.
Args:
a (RectangleObject): The first rectangle.
b (RectangleObject): The second rectangle.
"""
a_portrait = a.height > a.width
b_portrait = b.height > b.width
return a_portrait != b_portrait
def fetch_first_page(filename: Path) -> PageObject:
"""
Fetch the first page of a PDF file.
Args:
filename (Path): The path to the PDF file.
Returns:
PageObject: The first page of the PDF file.
"""
return PdfReader(filename).pages[0]
# This function written with inspiration, assistance, and code
# from claude.ai & Github Copilot
def page_iter(num_pages: int) -> Generator[tuple[int, int], None, None]:
"""
Generate pairs of page numbers for printing a booklet.
This function assumes that the total number of pages is divisible by 4.
It yields tuples of page numbers that should be printed on the same sheet
of paper to create a booklet.
Args:
num_pages (int): The total number of pages in the document. Must be divisible by 4.
Yields:
Generator[tuple[int, int], None, None]: tuples containing pairs of page numbers.
Each tuple represents the page numbers to be printed on one side of a sheet.
Raises:
ValueError: If the number of pages is not divisible by 4.
"""
if num_pages % 4 != 0:
raise ValueError("Number of pages must be divisible by 4")
for sheet in range(num_pages // 4):
# Outside the fold
last_page = num_pages - sheet * 2 - 1
first_page = sheet * 2
# Inside the fold
second_page = sheet * 2 + 1
second_to_last_page = num_pages - sheet * 2 - 2
yield last_page, first_page
yield second_page, second_to_last_page
================================================
FILE: pdfly/cat.py
================================================
"""
Concatenate pages from PDF files into a single PDF file.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly cat -o output.pdf head.pdf -- content.pdf :6 7: tail.pdf -1
Concatenate all of head.pdf, all but page seven of content.pdf,
and the last page of tail.pdf, producing output.pdf.
pdfly cat chapter*.pdf >book.pdf
You can specify the output file by redirection.
pdfly cat chapter?.pdf chapter10.pdf >book.pdf
In case you don't want chapter 10 before chapter 2.
"""
# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
# All rights reserved. This software is available under a BSD license;
# see https://github.com/py-pdf/pypdf/LICENSE
import os
import sys
from pathlib import Path
from pypdf import (
PageRange,
PasswordType,
PdfReader,
PdfWriter,
parse_filename_page_ranges,
)
from rich.console import Console
def main(
filename: Path,
fn_pgrgs: list[str] | None,
output: Path,
verbose: bool,
inverted_page_selection: bool = False,
password: str | None = None,
) -> None:
console = Console()
filename_page_ranges = parse_filepaths_and_pagerange_args(
console, filename, fn_pgrgs
)
if output:
output_fh = open(output, "wb")
else:
sys.stdout.flush()
output_fh = os.fdopen(sys.stdout.fileno(), "wb")
writer = PdfWriter()
in_fs = {}
try:
for filepath, page_range in filename_page_ranges: # type: ignore
if verbose:
print(filepath, page_range, file=sys.stderr)
if filepath not in in_fs:
in_fs[filepath] = open(filepath, "rb")
reader = PdfReader(in_fs[filepath])
if (
password is not None
and reader.decrypt(password) == PasswordType.NOT_DECRYPTED
):
console.print(
"[red]Error: the decrypting password provided is invalid"
)
sys.exit(1)
num_pages = len(reader.pages)
start, end, _step = page_range.indices(num_pages)
if (
start < 0
or end < 0
or start >= num_pages
or end > num_pages
or start > end
):
print(
f"WARNING: Page range {page_range} is out of bounds",
file=sys.stderr,
)
if inverted_page_selection:
all_page_nums = set(range(len(reader.pages)))
page_nums = set(range(*page_range.indices(len(reader.pages))))
inverted_page_nums = all_page_nums - page_nums
for page_num in inverted_page_nums:
writer.add_page(reader.pages[page_num])
else:
for page_num in range(*page_range.indices(len(reader.pages))):
writer.add_page(reader.pages[page_num])
writer.write(output_fh)
except Exception as error:
raise RuntimeError(f"Error while reading {filename}") from error
finally:
output_fh.close()
# In 3.0, input files must stay open until output is written.
# Not closing the in_fs because this script exits now.
def parse_filepaths_and_pagerange_args(
console: Console, filename: Path, fn_pgrgs: list[str] | None
) -> list[tuple[Path, PageRange]]:
fn_pgrgs_l = list(fn_pgrgs) if fn_pgrgs else []
fn_pgrgs_l.insert(0, str(filename))
filename_page_ranges, invalid_filepaths = [], []
for filepath, page_range in parse_filename_page_ranges(fn_pgrgs_l): # type: ignore
if Path(filepath).is_file():
filename_page_ranges.append((Path(filepath), page_range))
else:
invalid_filepaths.append(str(filepath))
if invalid_filepaths:
console.print(
f"[red]Error: invalid file path or page range provided: {' '.join(invalid_filepaths)}"
)
sys.exit(2)
return filename_page_ranges
================================================
FILE: pdfly/check_sign.py
================================================
"""
Verifies the signature of a signed PDF.
Examples
pdfly verify input.pdf --pem certs.pem
Verifies the input.pdf with a PEM certificate bundle.
"""
import sys
from pathlib import Path
import typer
from endesive import pdf
def main(filename: Path, pem: Path, verbose: bool | None) -> None:
x509_certificates = [pem.read_bytes()]
results = pdf.verify(filename.read_bytes(), x509_certificates)
if len(results) == 0:
raise typer.BadParameter("Signature missing")
details: list[str] = []
for hash_ok, signature_ok, cert_ok in results:
if not signature_ok:
details.append("Signature not ok")
elif verbose:
details.append("Signature ok")
if not hash_ok:
details.append("Content hash not ok")
elif verbose:
details.append("Content hash ok")
if not cert_ok:
details.append("Certificate not ok")
elif verbose:
details.append("Certificate ok")
details_str = "" if len(details) == 0 else " (" + ", ".join(details) + ")"
for hash_ok, signature_ok, cert_ok in results:
if not signature_ok or not hash_ok or not cert_ok:
print(f"Check failed{details_str}.", file=sys.stderr)
raise typer.Exit(code=1)
print(f"Check succeeded{details_str}.")
================================================
FILE: pdfly/cli.py
================================================
"""
Define how the CLI should behave.
Subcommands are added here.
"""
from pathlib import Path
from typing import Annotated
import typer
import pdfly.booklet
import pdfly.cat
import pdfly.check_sign
import pdfly.compress
import pdfly.extract_annotated_pages
import pdfly.extract_images
import pdfly.metadata
import pdfly.pagemeta
import pdfly.rm
import pdfly.rotate
import pdfly.sign
import pdfly.uncompress
import pdfly.up2
import pdfly.update_offsets
import pdfly.x2pdf
def version_callback(value: bool) -> None:
import pypdf
if value:
typer.echo(f"pdfly {pdfly.__version__}")
typer.echo(f" using pypdf=={pypdf.__version__}")
raise typer.Exit
entry_point = typer.Typer(
add_completion=False,
help=(
"pdfly is a pure-python cli application for manipulating PDF files."
),
rich_markup_mode="rich", # Allows to pretty-print commands documentation
)
@entry_point.callback() # type: ignore[misc]
def common(
ctx: typer.Context,
version: bool = typer.Option(None, "--version", callback=version_callback),
) -> None:
pass
@entry_point.command(name="2-up", help=pdfly.up2.__doc__) # type: ignore[misc]
def up2(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
out: Path,
) -> None:
pdfly.up2.main(pdf, out)
@entry_point.command(name="booklet", help=pdfly.booklet.__doc__) # type: ignore[misc]
def booklet(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=False,
resolve_path=False,
),
],
blank_page: Annotated[
Path | None,
typer.Option(
"-b",
"--blank-page-file",
help="page added if input is odd number of pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
centerfold: Annotated[
Path | None,
typer.Option(
"-c",
"--centerfold-file",
help="double-page added if input is missing >= 2 pages",
dir_okay=False,
exists=True,
resolve_path=True,
),
] = None,
) -> None:
pdfly.booklet.main(filename, output, blank_page, centerfold)
@entry_point.command(name="cat", help=pdfly.cat.__doc__) # type: ignore[misc]
def cat(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
fn_pgrgs: list[str] | None = typer.Argument( # noqa: B008
None, allow_dash=True, help="filenames and/or page ranges"
),
output: Path = typer.Option(..., "-o", "--output"), # noqa
password: str = typer.Option(
None, help="Document's user or owner password."
),
verbose: bool = typer.Option(
False, help="show page ranges as they are being read"
),
) -> None:
pdfly.cat.main(
filename, fn_pgrgs, output=output, verbose=verbose, password=password
)
@entry_point.command(name="check-sign", help=pdfly.check_sign.__doc__)
def check_sign(
filename: Annotated[
Path,
typer.Argument(dir_okay=False, exists=True, resolve_path=True),
],
pem: Annotated[
Path,
typer.Option(
...,
dir_okay=False,
exists=True,
resolve_path=True,
help="PEM certificate file",
),
],
verbose: bool = typer.Option(
False, help="Show signature verification details."
),
) -> None:
pdfly.check_sign.main(filename, pem, verbose)
@entry_point.command(name="compress", help=pdfly.compress.__doc__) # type: ignore[misc]
def compress(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
writable=True,
),
],
) -> None:
pdfly.compress.main(pdf, output)
@entry_point.command(name="extract-annotated-pages", help=pdfly.extract_annotated_pages.__doc__) # type: ignore[misc]
def extract_annotated_pages(
input_pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
help="Input PDF file.",
),
],
output_pdf: Annotated[
Path | None,
typer.Option(
"--output",
"-o",
writable=True,
help="Output PDF file. Defaults to 'input_pdf_annotated'.",
),
] = None,
) -> None:
pdfly.extract_annotated_pages.main(input_pdf, output_pdf)
@entry_point.command(name="extract-images", help=pdfly.extract_images.__doc__) # type: ignore[misc]
def extract_images(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
) -> None:
pdfly.extract_images.main(pdf)
@entry_point.command(name="extract-text") # type: ignore[misc]
def extract_text(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
) -> None:
"""Extract text from a PDF file."""
from pypdf import PdfReader
reader = PdfReader(str(pdf))
for page in reader.pages:
typer.echo(page.extract_text())
@entry_point.command(name="meta", help=pdfly.metadata.__doc__) # type: ignore[misc]
def metadata(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: pdfly.metadata.OutputOptions = typer.Option( # noqa
pdfly.metadata.OutputOptions.text.value,
"--output",
"-o",
help="output format",
show_default=True,
),
) -> None:
pdfly.metadata.main(pdf, output)
@entry_point.command(name="pagemeta", help=pdfly.pagemeta.__doc__) # type: ignore[misc]
def pagemeta(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
page_index: int,
output: pdfly.metadata.OutputOptions = typer.Option( # noqa
pdfly.metadata.OutputOptions.text.value,
"--output",
"-o",
help="output format",
show_default=True,
),
) -> None:
pdfly.pagemeta.main(
pdf,
page_index,
output,
)
@entry_point.command(name="rm", help=pdfly.rm.__doc__)
def rm(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Path = typer.Option(..., "-o", "--output"), # noqa
fn_pgrgs: list[str] = typer.Argument( # noqa
..., help="filenames and/or page ranges"
),
verbose: bool = typer.Option(
False, help="show page ranges as they are being read"
),
) -> None:
pdfly.rm.main(filename, fn_pgrgs, output, verbose)
@entry_point.command(name="rotate", help=pdfly.rotate.__doc__) # type: ignore[misc]
def rotate(
filename: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
degrees: Annotated[int, typer.Argument(..., help="degrees to rotate")],
pgrgs: Annotated[str, typer.Argument(..., help="page range")] = ":",
output: Path = typer.Option(..., "-o", "--output"), # noqa
) -> None:
pdfly.rotate.main(filename, output, degrees, pgrgs)
@entry_point.command(name="sign", help=pdfly.sign.__doc__)
def sign(
filename: Annotated[
Path,
typer.Argument(dir_okay=False, exists=True, resolve_path=True),
],
p12: Annotated[
Path,
typer.Option(
...,
dir_okay=False,
exists=True,
resolve_path=True,
help="PKCS12 certificate container",
),
],
output: Annotated[Path | None, typer.Option("--output", "-o")] = None,
in_place: bool = typer.Option(False, "--in-place", "-i"),
p12_password: Annotated[
str | None,
typer.Option(
"--p12-password",
"-p",
help="The password to use to decrypt the PKCS12 file.",
),
] = None,
) -> None:
pdfly.sign.main(filename, output, in_place, p12, p12_password)
@entry_point.command(name="uncompress", help=pdfly.uncompress.__doc__) # type: ignore[misc]
def uncompress(
pdf: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
output: Annotated[
Path,
typer.Argument(
writable=True,
),
],
) -> None:
pdfly.uncompress.main(pdf, output)
@entry_point.command(name="update-offsets", help=pdfly.update_offsets.__doc__) # type: ignore[misc]
def update_offsets(
file_in: Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
],
file_out: Annotated[
Path, typer.Option("-o", "--output") # noqa
] = None, # type: ignore[assignment]
encoding: str = typer.Option(
"ISO-8859-1",
help="Encoding used to read and write the files, e.g. UTF-8.",
),
verbose: bool = typer.Option(
False, help="Show progress while processing."
),
) -> None:
pdfly.update_offsets.main(file_in, file_out, encoding, verbose)
@entry_point.command(name="x2pdf", help=pdfly.x2pdf.__doc__) # type: ignore[misc]
def x2pdf(
x: list[
Annotated[
Path,
typer.Argument(
dir_okay=False,
exists=True,
resolve_path=True,
),
]
],
output: Annotated[
Path,
typer.Option(
"-o",
"--output",
writable=True,
),
],
) -> None:
exit_code = pdfly.x2pdf.main(x, output)
if exit_code:
raise typer.Exit(code=exit_code)
================================================
FILE: pdfly/compress.py
================================================
"""Compress a PDF."""
import shutil
from io import BytesIO
from pathlib import Path
from pypdf import PdfReader, PdfWriter
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(pdf)
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
if reader.metadata:
writer.add_metadata(reader.metadata)
for page in writer.pages:
page.compress_content_streams()
# PDF to memory buffer first
compressed_buffer = BytesIO()
writer.write(compressed_buffer)
compressed_data = compressed_buffer.getvalue()
comp_size = len(compressed_data)
orig_size = pdf.stat().st_size
# If compressed size is larger than original, use original file
if comp_size >= orig_size:
print(
f"Compression resulted in larger file ({comp_size:,} >= {orig_size:,} bytes)"
)
print("Keeping original file as compressed version would be larger")
shutil.copy2(pdf, output)
final_size = orig_size
ratio = 100.0
status = "No compression applied (would increase size)"
else:
with open(output, "wb") as fp:
fp.write(compressed_data)
final_size = comp_size
ratio = (comp_size / orig_size) * 100
status = f"Compressed ({ratio:.1f}% of original)"
print(f"Original Size : {orig_size:,}")
print(f"Final Size : {final_size:,} ({status})")
================================================
FILE: pdfly/extract_annotated_pages.py
================================================
"""
Extract only the annotated pages from a PDF.
Q: Why does this help?
A: https://github.com/py-pdf/pdfly/issues/97
"""
from pathlib import Path
from typing import TYPE_CHECKING
from pypdf import PdfReader, PdfWriter
from pypdf.annotations import AnnotationDictionary
if TYPE_CHECKING:
from pypdf.generic import ArrayObject
# Check if an annotation is manipulable.
def is_manipulable(annot: AnnotationDictionary) -> bool:
return annot.get("/Subtype") != "/Link"
# Main function.
def main(input_pdf: Path, output_pdf: Path | None) -> None:
if not output_pdf:
output_pdf = input_pdf.with_name(input_pdf.stem + "_annotated.pdf")
input = PdfReader(input_pdf)
output = PdfWriter()
output_pages = 0
# Copy only the pages with annotations
for page in input.pages:
if "/Annots" not in page:
continue
page_annots: ArrayObject = page["/Annots"] # type: ignore[assignment]
if not any(is_manipulable(annot) for annot in page_annots):
continue
output.add_page(page)
output_pages += 1
# Save the output PDF
output.write(output_pdf)
print(f"Extracted {output_pages} pages with annotations to {output_pdf}")
================================================
FILE: pdfly/extract_images.py
================================================
"""
Extract images from PDF without resampling or altering.
Adapted from work by Sylvain Pelissier
http://stackoverflow.com/questions/2693820/extract-images-from-pdf-without-resampling-in-python
"""
from pathlib import Path
from pypdf import PdfReader
def main(pdf: Path) -> None:
reader = PdfReader(str(pdf))
extracted_images = []
for page_index, page0 in enumerate(reader.pages):
for image_file_object in page0.images:
path = f"{page_index:04d}-{image_file_object.name}"
with open(path, "wb") as fp:
fp.write(image_file_object.data)
extracted_images.append(path)
if len(extracted_images) == 0:
print("No image found.")
else:
print(f"Extracted {len(extracted_images)} images:")
for path in extracted_images:
print(f"- {path}")
================================================
FILE: pdfly/metadata.py
================================================
"""Show metadata of a PDF file"""
import stat
from datetime import datetime
from pathlib import Path
from pydantic import BaseModel
from pypdf import PdfReader
from ._utils import OutputOptions
class EncryptionData(BaseModel):
revision: int
v_value: int
class MetaInfo(BaseModel):
encryption: EncryptionData | None = None
pdf_file_version: str
pages: int | None = None
page_mode: str | None = None
page_layout: str | None = None
attachments: str = "unknown"
id1: bytes | None = None
id2: bytes | None = None
images: list[int] = []
# PDF /Info dictionary
author: str | None = None
creation_date: datetime | None = None
creator: str | None = None
keywords: str | None = None
producer: str | None = None
subject: str | None = None
title: str | None = None
# OS Information
file_permissions: str
file_size: int # in bytes
creation_time: datetime
modification_time: datetime
access_time: datetime
def main(pdf: Path, output: OutputOptions) -> None:
reader = PdfReader(str(pdf))
if reader.is_encrypted:
pdf_stat = pdf.stat()
meta = MetaInfo(
encryption=(
EncryptionData(
v_value=reader._encryption.V,
revision=reader._encryption.R,
)
if reader._encryption
else None
),
pdf_file_version=reader.stream.read(8).decode("utf-8"),
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
creation_time=datetime.fromtimestamp(pdf_stat.st_ctime),
modification_time=datetime.fromtimestamp(pdf_stat.st_mtime),
access_time=datetime.fromtimestamp(pdf_stat.st_atime),
)
else:
info = reader.metadata
reader.stream.seek(0)
pdf_file_version = reader.stream.read(8).decode("utf-8")
pdf_stat = pdf.stat()
pdf_id = reader.trailer.get("/ID")
meta = MetaInfo(
pages=len(reader.pages),
page_mode=reader.page_mode,
pdf_file_version=pdf_file_version,
page_layout=reader.page_layout,
attachments=str(list(reader.attachments.keys())),
id1=pdf_id[0] if pdf_id is not None else None,
id2=pdf_id[1] if pdf_id is not None and len(pdf_id) >= 2 else None,
# OS Info
file_permissions=f"{stat.filemode(pdf_stat.st_mode)}",
file_size=pdf_stat.st_size,
creation_time=datetime.fromtimestamp(pdf_stat.st_ctime),
modification_time=datetime.fromtimestamp(pdf_stat.st_mtime),
access_time=datetime.fromtimestamp(pdf_stat.st_atime),
images=[
len(image.data)
for page in reader.pages
for image in page.images
],
)
if info is not None:
meta.author = info.author
meta.creation_date = info.creation_date
meta.creator = info.creator
# Pending https://github.com/py-pdf/pypdf/pull/2939 to be able to access .keywords:
meta.keywords = info.get("/Keywords")
meta.producer = info.producer
meta.subject = info.subject
meta.title = info.title
if output == OutputOptions.json:
print(meta.json())
else:
from rich.console import Console
from rich.table import Table
table = Table(title="PDF Data")
table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
table.add_column("Value", style="white")
if meta.title:
table.add_row("Title", meta.title)
if meta.author:
table.add_row("Author", meta.author)
if meta.creation_date:
table.add_row("CreationDate", str(meta.creation_date))
if meta.creator:
table.add_row("Creator", meta.creator)
if meta.producer:
table.add_row("Producer", meta.producer)
if meta.subject:
table.add_row("Subject", meta.subject)
if meta.keywords:
table.add_row("Keywords", meta.keywords)
table.add_row("Pages", f"{meta.pages:,}" if meta.pages else "unknown")
table.add_row("Encrypted", f"{meta.encryption}")
table.add_row("PDF File Version", meta.pdf_file_version)
table.add_row("Page Layout", meta.page_layout)
table.add_row("Page Mode", meta.page_mode)
table.add_row("PDF ID", f"ID1={meta.id1!r} ID2={meta.id2!r}")
embedded_fonts: set[str] = set()
unemedded_fonts: set[str] = set()
if not reader.is_encrypted:
for page in reader.pages:
emb, unemb = page._get_fonts()
embedded_fonts = embedded_fonts.union(set(emb))
unemedded_fonts = unemedded_fonts.union(set(unemb))
table.add_row(
"Fonts (unembedded)", ", ".join(sorted(unemedded_fonts))
)
table.add_row(
"Fonts (embedded)", ", ".join(sorted(embedded_fonts))
)
table.add_row("Attachments", meta.attachments)
table.add_row(
"Images", f"{len(meta.images)} images ({sum(meta.images):,} bytes)"
)
enc_table = Table(title="Encryption information")
enc_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
enc_table.add_column("Value", style="white")
if meta.encryption:
enc_table.add_row(
"Security Handler Revision Number",
str(meta.encryption.revision),
)
enc_table.add_row("V value", str(meta.encryption.v_value))
os_table = Table(title="Operating System Data")
os_table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
os_table.add_column("Value", style="white")
os_table.add_row("File Name", f"{pdf}")
os_table.add_row("File Permissions", f"{meta.file_permissions}")
os_table.add_row("File Size", f"{meta.file_size:,} bytes")
os_table.add_row(
"Creation Time", f"{meta.creation_time:%Y-%m-%d %H:%M:%S}"
)
os_table.add_row(
"Modification Time", f"{meta.modification_time:%Y-%m-%d %H:%M:%S}"
)
os_table.add_row(
"Access Time", f"{meta.access_time:%Y-%m-%d %H:%M:%S}"
)
console = Console()
console.print(os_table)
console.print(table)
if meta.encryption:
console.print(enc_table)
console.print(
"Use the 'pagemeta' subcommand to get details about a single page"
)
================================================
FILE: pdfly/pagemeta.py
================================================
"""Give details about a single page."""
from pathlib import Path
from pydantic import BaseModel
from pypdf import PdfReader
from rich.console import Console
from rich.markdown import Markdown
from rich.table import Table
from ._utils import OutputOptions
KNOWN_PAGE_FORMATS = {
(841.89, 1190.55): "A3", # 297mm x 420mm
(595.28, 841.89): "A4", # 210mm x 297mm
(420.94, 595.28): "A5", # 148mm x 210mm
(297.66, 420.94): "A6", # 105mm x 148mm
(612, 792): "Letter",
(612, 1008): "Legal",
}
class PageMeta(BaseModel):
mediabox: tuple[float, float, float, float]
cropbox: tuple[float, float, float, float]
artbox: tuple[float, float, float, float]
bleedbox: tuple[float, float, float, float]
annotations: int
rotation: int
def main(pdf: Path, page_index: int, output: OutputOptions) -> None:
reader = PdfReader(pdf)
page = reader.pages[page_index]
meta = PageMeta(
mediabox=page.mediabox,
cropbox=page.cropbox,
artbox=page.artbox,
bleedbox=page.bleedbox,
annotations=len(page.annotations) if page.annotations else 0,
rotation=page.rotation,
)
if output == OutputOptions.json:
print(meta.json())
else:
console = Console()
table = Table(title=f"{pdf}, page index {page_index}")
table.add_column(
"Attribute", justify="right", style="cyan", no_wrap=True
)
table.add_column("Value", style="white")
def add_box_attr(
name: str, box: tuple[float, float, float, float]
) -> None:
width = box[2] - box[0]
height = box[3] - box[1]
known_format = find_known_format(width, height)
extra = f" ({known_format})" if known_format else ""
table.add_row(
name,
f"({box[0]:.2f}, {box[1]:.2f}, {box[2]:.2f}, {box[3]:.2f}):"
f" {width=:.2f} x {height=:.2f}{extra}",
)
add_box_attr("mediabox", meta.mediabox)
add_box_attr("cropbox", meta.cropbox)
add_box_attr("artbox", meta.artbox)
add_box_attr("bleedbox", meta.bleedbox)
if meta.annotations:
table.add_row("annotations", str(meta.annotations))
if meta.rotation:
table.add_row("rotation", str(meta.rotation))
console.print(table)
if page.annotations:
console.print(Markdown("**All annotations:**"))
for i, annot in enumerate(page.annotations, start=1):
obj = annot.get_object()
console.print(f"{i}. {obj['/Subtype']} at {obj['/Rect']}")
def find_known_format(width: float, height: float) -> str:
known_format = KNOWN_PAGE_FORMATS.get((width, height))
if known_format:
return known_format
for (w, h), name in KNOWN_PAGE_FORMATS.items():
if ((w - width) * (w - width) + (h - height) * (h - height)) < 4:
return f"close to format: {name}"
return ""
================================================
FILE: pdfly/rm.py
================================================
"""
Remove pages from PDF files.
Page ranges refer to the previously-named file.
A file not followed by a page range means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
Examples
pdfly rm -o output.pdf document.pdf 2:5
Remove pages 2 to 4 from document.pdf, producing output.pdf.
pdfly rm document.pdf -- -1
Removes the last page from document.pdf, modifying the original file.
pdfly rm document.pdf :-1
Removes all pages except the last one from document.pdf, modifying the original file.
pdfly rm report.pdf :6 7:
Remove all pages except page seven from report.pdf,
producing a single-page report.pdf.
"""
from pathlib import Path
from pdfly.cat import main as cat_main
def main(
filename: Path, fn_pgrgs: list[str], output: Path, verbose: bool
) -> None:
cat_main(filename, fn_pgrgs, output, verbose, inverted_page_selection=True)
================================================
FILE: pdfly/rotate.py
================================================
"""
Rotate specified pages by the specified amount
Example:
pdfly rotate --output output.pdf input.pdf 90
Rotate all pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 :3
Rotate first three pages by 90 degrees (clockwise)
pdfly rotate --output output.pdf input.pdf 90 -- -1
Rotate last page by 90 degrees (clockwise)
A file not followed by a page range (PGRGS) means all the pages of the file.
PAGE RANGES are like Python slices.
Remember, page indices start with zero.
When using page ranges that start with a negative value a
two-hyphen symbol -- must be used to separate them from
the command line options.
Page range expression examples:
: all pages. -1 last page.
22 just the 23rd page. :-1 all but the last page.
0:3 the first three pages. -2 second-to-last page.
:3 the first three pages. -2: last two pages.
5: from the sixth page onward. -3:-1 third & second to last.
The third, "stride" or "step" number is also recognized.
::2 0 2 4 ... to the end. 3:0:-1 3 2 1 but not 0.
1:10:2 1 3 5 7 9 2::-1 2 1 0.
::-1 all pages in reverse order.
"""
from pathlib import Path
from pypdf import (
PageRange,
PdfReader,
PdfWriter,
)
from rich.console import Console
def main(
filename: Path,
output: Path,
degrees: int,
page_range: str,
) -> None:
try:
# set up the streams
reader = PdfReader(filename)
pages = list(reader.pages)
writer = PdfWriter()
# Convert the page range into a set of page numbers
pages_to_rotate = convert_range_to_pages(page_range, len(pages))
for page_index, page in enumerate(pages):
if page_index in pages_to_rotate:
page = page.rotate(degrees)
writer.add_page(page)
# Everything looks good! Write the output file.
with open(output, "wb") as output_fh:
writer.write(output_fh)
except Exception as error:
console = Console()
console.print(f"Error while rotating {filename}")
raise error
def convert_range_to_pages(page_range: str, num_pages: int) -> set[int]:
pages_to_rotate = {*range(*PageRange(page_range).indices(num_pages))}
return pages_to_rotate
================================================
FILE: pdfly/sign.py
================================================
"""
Creates a signed PDF from an existing PDF file.
Examples
pdfly sign input.pdf --p12 certs.p12 -o signed.pdf
Signs the input.pdf with a PKCS12 certificate archive. Writes the resulting signed pdf into signed.pdf.
pdfly sign document.pdf --p12 certs.p12 --in-place
Signs the document.pdf with a PKCS12 certificate archive. Modifies the input file in-place.
"""
import io
import tempfile
from collections.abc import Generator
from contextlib import contextmanager
from pathlib import Path
from typing import Union
import fpdf.sign
import typer
from cryptography.hazmat.primitives.serialization import pkcs12
from endesive import signer
from fpdf import FPDF, get_scale_factor
from pypdf import PageObject, PdfReader, PdfWriter
from pypdf.generic import DictionaryObject, PdfObject
def main(
filename: Path,
output: Path | None,
in_place: bool,
p12: Path,
p12_password: str | None,
) -> None:
validate_output_args_or_raise(output, in_place)
pdf_reader = PdfReader(filename)
pdf_is_unsigned_or_raise(pdf_reader)
output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper]
if output:
output_file = open(output, "wb")
else:
output_file = tempfile.NamedTemporaryFile(
delete=False
) # will be deleted by output.unlink() later on
output = Path(output_file.name)
try:
_sign_pdf_contents(pdf_reader, output_file, p12, p12_password)
finally:
output_file.close()
if in_place:
filename.write_bytes(output.read_bytes())
output.unlink()
def pdf_is_unsigned_or_raise(pdf_reader: PdfReader) -> None:
for page in pdf_reader.pages:
if page.annotations is None:
continue
if any(is_signature(annotation) for annotation in page.annotations):
raise typer.BadParameter("PDF is already signed.")
def is_signature(annotation: PdfObject) -> bool:
resolved_annotation_object = annotation.get_object()
if resolved_annotation_object is None:
return False
if type(resolved_annotation_object) is not DictionaryObject:
return False
subtype = resolved_annotation_object["/Subtype"]
if subtype != "/Widget":
return False
fieldtype = resolved_annotation_object["/FT"]
return fieldtype == "/Sig"
def _sign_pdf_contents(
pdf_reader: PdfReader,
output_file: Union[io.BufferedWriter, tempfile._TemporaryFileWrapper],
p12: Path,
p12_password: str | None,
) -> None:
unsigned_output_buffer = io.BytesIO()
with add_to_page(pdf_reader.pages[-1]) as pdf:
with p12.open("rb") as pkcs_file:
hashalgo = "sha256"
sign_time = pdf.creation_date
key, cert, extra_certs = pkcs12.load_key_and_certificates(
pkcs_file.read(),
(p12_password.encode() if p12_password is not None else None),
)
pdf.sign(
key=key,
cert=cert, # type: ignore
extra_certs=extra_certs,
hashalgo=hashalgo,
signing_time=sign_time,
)
# defer actual signing until after the input pdfs contents are merged
# _sign_key = None prevents FDPF.output() from calculating the signature hash too early
pdf._sign_key = None
writer = PdfWriter()
writer.append_pages_from_reader(pdf_reader)
writer.write(unsigned_output_buffer)
# Now that output_buffer contains the contents to be signed
# we can generate the cryptographic signature using fpdf2.sign.sign_content
# patch placeholder values to match how fpdf.sign.sign_content() expects them
content_to_sign = bytearray(unsigned_output_buffer.getbuffer())
content_to_sign = content_to_sign.replace(
_SIGNATURE_BYTERANGE_PLACEHOLDER.encode(),
fpdf.sign._SIGNATURE_BYTERANGE_PLACEHOLDER.encode(),
)
content_to_sign = content_to_sign.replace(
b"(" + _SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b")",
b"<" + fpdf.sign._SIGNATURE_CONTENTS_PLACEHOLDER.encode() + b">",
)
signed_output_buffer = fpdf.sign.sign_content(
signer,
content_to_sign,
key,
cert, # type: ignore
extra_certs,
hashalgo,
sign_time,
)
output_file.write(signed_output_buffer)
@contextmanager
def add_to_page(reader_page: PageObject, unit: str = "mm") -> Generator[FPDF]:
k = get_scale_factor(unit)
format = (reader_page.mediabox[2] / k, reader_page.mediabox[3] / k)
pdf = FPDF(format=format, unit=unit)
pdf.add_page()
yield pdf
page_overlay = PdfReader(io.BytesIO(pdf.output())).pages[0]
reader_page.merge_page(page2=page_overlay)
def validate_output_args_or_raise(output: Path | None, in_place: bool) -> None:
if not in_place and output is None:
raise typer.BadParameter(
"One of the options --output or --in-place is required."
)
# fpdf.sign placeholder values - in the form after PdfWriter serialized them
_SIGNATURE_BYTERANGE_PLACEHOLDER = "[ 0 0 0 0 ]"
_SIGNATURE_CONTENTS_PLACEHOLDER = "\\000" * 0x2000
================================================
FILE: pdfly/uncompress.py
================================================
"""Module for uncompressing PDF content streams."""
import zlib
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import IndirectObject, PdfObject
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(pdf)
writer = PdfWriter()
for page in reader.pages:
if "/Contents" in page:
contents: PdfObject | None = page["/Contents"]
if isinstance(contents, IndirectObject):
contents = contents.get_object()
if contents is not None:
if isinstance(contents, list):
for content in contents:
if isinstance(content, IndirectObject):
decompress_content_stream(content)
elif isinstance(contents, IndirectObject):
decompress_content_stream(contents)
writer.add_page(page)
with open(output, "wb") as fp:
writer.write(fp)
orig_size = pdf.stat().st_size
uncomp_size = output.stat().st_size
print(f"Original Size : {orig_size:,}")
print(
f"Uncompressed Size: {uncomp_size:,} ({(uncomp_size / orig_size) * 100:.1f}% of original)"
)
def decompress_content_stream(content: IndirectObject) -> None:
"""Decompress a content stream if it uses FlateDecode."""
if content.get("/Filter") == "/FlateDecode":
try:
compressed_data = content.get_data()
uncompressed_data = zlib.decompress(compressed_data)
content.set_data(uncompressed_data)
del content["/Filter"]
except zlib.error as error:
print(
f"Some content stream with /FlateDecode failed to be decompressed: {error}"
)
================================================
FILE: pdfly/up2.py
================================================
"""
Create a booklet-style PDF from a single input.
Pairs of two pages will be put on one page (left and right)
usage: python 2-up.py input_file output_file
"""
from pathlib import Path
from pypdf import PdfReader, PdfWriter
from pypdf.generic import FloatObject
def main(pdf: Path, output: Path) -> None:
reader = PdfReader(str(pdf))
writer = PdfWriter()
for i in range(0, len(reader.pages), 2):
lhs = reader.pages[i]
if i + 1 < len(reader.pages):
rhs = reader.pages[i + 1]
lhs.merge_translated_page(
rhs, tx=float(lhs.mediabox.width), ty=0, expand=True
)
else:
# Double the MediaBox width:
lhs.mediabox[2] = FloatObject(2 * lhs.mediabox[2])
# Double the CropBox width:
lhs.cropbox[2] = FloatObject(2 * lhs.cropbox[2])
writer.add_page(lhs)
with open(output, "wb") as fp:
writer.write(fp)
print(f"{output} was created")
================================================
FILE: pdfly/update_offsets.py
================================================
"""
Updates offsets and lengths in a simple PDF file.
The PDF specification requires that the xref section at the end
of a PDF file has the correct offsets of the PDF's objects.
It further requires that the dictionary of a stream object
contains a /Length-entry giving the length of the encoded stream.
When editing a PDF file using a text-editor (e.g. vim) it is
elaborate to compute or adjust these offsets and lengths.
This command tries to compute /Length-entries of the stream dictionaries
and the offsets in the xref-section automatically.
It expects that the PDF file has ASCII encoding only. It may
use ISO-8859-1 or UTF-8 in its comments.
The current implementation incorrectly replaces CR (0x0d) by LF (0x0a) in binary data.
It expects that there is one xref-section only.
It expects that the /Length-entries have default values containing
enough digits, e.g. /Length 000 when the stream consists of 576 bytes.
Example:
update-offsets --verbose --encoding ISO-8859-1 issue-297.pdf issue-297.out.pdf
"""
import re
from pathlib import Path
from rich.console import Console
# Here, only simple regular expressions are used.
# Beyond a certain level of complexity, switching to a proper PDF dictionary parser would be better.
RE_OBJ = re.compile(r"^([0-9]+) ([0-9]+) obj *")
RE_CONTENT = re.compile(r"^([^\r\n]*)", re.DOTALL)
RE_LENGTH_REF = re.compile(r"^(.*/Length )([0-9]+) ([0-9]+) R(.*)", re.DOTALL)
RE_LENGTH = re.compile(
r"^(.*/Length )([0-9]+)([ />\x00\t\f\r\n].*)", re.DOTALL
)
def update_lines(
lines_in: list[str], encoding: str, console: Console, verbose: bool
) -> list[str]:
"""
Iterates over the lines of a pdf-files and updates offsets.
The input is expected to be a pdf without binary-sections.
:param lines_in: A list over the lines including line-breaks.
:param encoding: The encoding, e.g. "iso-8859-1" or "UTF-8".
:param console: Console used to print messages.
:param verbose: True to activate logging of info-messages.
:return The output is a list of lines to be written
in the given encoding.
"""
lines_out = [] # lines to be written
map_line_offset = {} # map from line-number to offset
map_obj_offset = {} # map from object-number to offset
map_obj_line = {} # map from object-number to line-number
line_no = 0 # current line-number (starting at 0)
offset_out = 0 # current offset in output-file
line_xref = None # line-number of xref-line (in xref-section only)
line_startxref = None # line-number of startxref-line
curr_obj = None # number of current object
len_stream = None # length of stream (in stream only)
offset_xref = None # offset of xref-section
map_stream_len = {} # map from object-number to /Length of stream
map_obj_length_line = {} # map from object-number to /Length-line
map_obj_length_ref = (
{}
) # map from object-number to /Length-reference (e.g. "3")
map_obj_length_line_no = {} # map from object-number to line_no of length
# of /Length-line
for idx, line in enumerate(lines_in):
line_no = idx + 1
m_content = RE_CONTENT.match(line)
if m_content is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no} without line-break."
)
content = m_content.group(1)
map_line_offset[line_no] = offset_out
m_obj = RE_OBJ.match(line)
if m_obj is not None:
curr_obj = m_obj.group(1)
curr_gen = m_obj.group(2)
if verbose:
console.print(f"line {line_no}: object {curr_obj}")
if curr_gen != "0":
raise RuntimeError(
f"Invalid PDF file: generation {curr_gen} of object {curr_obj} in line {line_no} is not supported."
)
map_obj_offset[curr_obj] = int(offset_out)
map_obj_line[curr_obj] = line_no
len_stream = None
if content == "xref":
offset_xref = offset_out
line_xref = line_no
elif content == "startxref":
line_startxref = line_no
line_xref = None
elif content == "stream":
if verbose:
console.print(f"line {line_no}: start stream")
len_stream = 0
elif content == "endstream":
if verbose:
console.print(f"line {line_no}: end stream")
if curr_obj is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no}: endstream without object-start."
)
if len_stream is None:
raise RuntimeError(
f"Invalid PDF file: line {line_no}: endstream without stream."
)
if len_stream > 0:
# Ignore the last EOL
len_stream = (
len_stream - 2
if lines_in[idx - 1][-2:] == "\r\n"
else len_stream - 1
)
if verbose:
console.print(
f"line {line_no}: Computed /Length {len_stream} of obj {curr_obj}"
)
map_stream_len[curr_obj] = len_stream
elif content == "endobj":
curr_obj = None
elif curr_obj is not None and len_stream is None:
m_length_ref = RE_LENGTH_REF.match(line)
if m_length_ref is not None:
len_obj = m_length_ref.group(2)
len_obj_gen = m_length_ref.group(3)
if verbose:
console.print(
f"line {line_no}, /Length-reference {len_obj} {len_obj_gen} R: {content}"
)
map_obj_length_ref[curr_obj] = len_obj
else:
m_length = RE_LENGTH.match(line)
if m_length is not None:
if verbose:
console.print(f"line {line_no}, /Length: {content}")
map_obj_length_line[curr_obj] = line
map_obj_length_line_no[curr_obj] = line_no
elif curr_obj is not None and len_stream is not None:
len_stream += len(line.encode(encoding))
elif line_xref is not None and line_no > line_xref + 2:
object_number = line_no - line_xref - 2
if (
object_number <= len(map_obj_offset)
and str(object_number) in map_obj_offset
):
eol = line[-2:]
xref_updated = (
"%010d" % map_obj_offset[str(object_number)]
) + " 00000 n"
if verbose:
console.print(f"{content} -> {xref_updated}")
line = xref_updated + eol
elif line_startxref is not None and line_no == line_startxref + 1:
if offset_xref is None:
raise NotImplementedError(
"Unsupported file: startxref without preceding xref-section (probable cross-reference stream)"
)
line = "%d\n" % offset_xref
lines_out.append(line)
offset_out += len(line.encode(encoding))
# Some checks
if len(map_obj_offset) == 0:
raise RuntimeError(
"Invalid PDF file: the command didn't find any PDF objects."
)
if offset_xref is None:
raise RuntimeError(
"Invalid PDF file: the command didn't find a xref-section"
)
if line_startxref is None:
raise RuntimeError(
"Invalid PDF file: the command didn't find a startxref-section"
)
for curr_obj, stream_len in map_stream_len.items():
if curr_obj in map_obj_length_line:
line = map_obj_length_line[curr_obj]
m_length = RE_LENGTH.match(line)
if m_length is None:
raise RuntimeError(
f"Invalid PDF file: line '{line}' does not contain a valid /Length."
)
prev_length = m_length.group(2)
len_digits = len(prev_length)
len_format = "%%0%dd" % len_digits
updated_length = len_format % stream_len
if len(updated_length) > len_digits:
raise RuntimeError(
f"Not enough digits in /Length-entry {prev_length}"
f" of object {curr_obj}:"
f" too short to take /Length {updated_length}"
)
line = m_length.group(1) + updated_length + m_length.group(3)
lines_out[map_obj_length_line_no[curr_obj] - 1] = line
elif curr_obj in map_obj_length_ref:
len_obj = map_obj_length_ref[curr_obj]
if len_obj not in map_obj_line:
raise RuntimeError(
f"obj {curr_obj} has unknown length-obj {len_obj}"
)
len_obj_line = map_obj_line[len_obj]
prev_length = lines_out[len_obj_line][:-1]
len_digits = len(prev_length)
len_format = "%%0%dd" % len_digits
updated_length = len_format % stream_len
if len(updated_length) > len_digits:
raise RuntimeError(
f"Not enough digits in /Length-ref-entry {prev_length}"
f" of object {curr_obj} and len-object {len_obj}:"
f" too short to take /Length {updated_length}"
)
if prev_length != updated_length:
if verbose:
console.print(
f"line {line_no}, ref-len {len_obj} of {curr_obj}: {prev_length} -> {updated_length}"
)
lines_out[len_obj_line] = updated_length + "\n"
else:
raise RuntimeError(
f"obj {curr_obj} with stream-len {stream_len} has no object-length-line: {map_obj_length_line}"
)
return lines_out
def read_binary_file(file_path: Path, encoding: str) -> list[str]:
"""
Reads a binary file line by line and returns these lines as a list of strings in the given encoding.
Encoding utf-8 can't be used to read random binary data.
:param file_path: file to be read line by line
:param encoding: encoding to be used (e.g. "iso-8859-1")
:return lines including line-breaks
"""
chunks: list[str] = []
with file_path.open("rb") as file:
buffer = bytearray()
while True:
chunk = file.read(4096) # Read in chunks of 4096 bytes
if not chunk:
break # End of file
buffer += chunk
# Split buffer into chunks based on LF, CR, or CRLF
while True:
match = re.search(b"(\x0d\x0a|\x0a|\x0d)", buffer)
if not match:
break # No more line breaks found, process the remaining buffer
end = match.end()
chunk_str = buffer[:end].decode(encoding, errors="strict")
buffer = buffer[end:]
chunks.append(chunk_str)
# Handle the last chunk
if buffer:
chunks.append(buffer.decode(encoding, errors="strict"))
return chunks
def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) -> None:
if not file_out:
file_out = file_in
console = Console()
console.print(f"Read {file_in}")
lines_in = read_binary_file(file_in, encoding)
lines_out = update_lines(lines_in, encoding, console, verbose)
with open(file_out, "wb") as f:
f.writelines(line.encode(encoding) for line in lines_out)
console.print(f"Wrote {file_out}", soft_wrap=True)
================================================
FILE: pdfly/x2pdf.py
================================================
"""Convert one or more files to PDF. Each file is a page."""
from io import BytesIO
from pathlib import Path
from fpdf import FPDF
from PIL import Image
from pypdf import PdfReader, PdfWriter
from rich.console import Console
def px_to_mm(px: float) -> float:
px_in_inch = 72
mm_in_inch = 25.4
inch = px / px_in_inch
mm = inch * mm_in_inch
return mm
def image_to_pdf(filepath: Path) -> BytesIO:
with Image.open(filepath) as cover:
w, h = cover.size
width, height = px_to_mm(w), px_to_mm(h)
pdf = FPDF(unit="mm")
pdf.add_page(format=(width, height)) # type: ignore
pdf.image(filepath, x=0, y=0)
return BytesIO(pdf.output())
def main(in_filepaths: list[Path], out_filepath: Path) -> int:
console = Console()
exit_code = 0
writer = PdfWriter()
for filepath in in_filepaths:
if filepath.name.endswith(".pdf"):
for page in PdfReader(filepath).pages:
writer.insert_page(page)
continue
try:
pdf_bytes = image_to_pdf(filepath)
new_page = PdfReader(pdf_bytes).pages[0]
writer.insert_page(new_page)
except Exception:
console.print(
f"[red]Error: Could not convert '{filepath}' to a PDF."
)
console.print_exception(extra_lines=1, max_frames=1)
exit_code += 1
writer.write(out_filepath)
return exit_code
================================================
FILE: pylock.toml
================================================
lock-version = "1.0"
created-by = "pip"
[[packages]]
name = "alabaster"
version = "1.0.0"
[[packages.wheels]]
name = "alabaster-1.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b"
[[packages]]
name = "annotated-doc"
version = "0.0.4"
[[packages.wheels]]
name = "annotated_doc-0.0.4-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320"
[[packages]]
name = "annotated-types"
version = "0.7.0"
[[packages.wheels]]
name = "annotated_types-0.7.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"
[[packages]]
name = "anyio"
version = "4.12.1"
[[packages.wheels]]
name = "anyio-4.12.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c"
[[packages]]
name = "asn1crypto"
version = "1.5.1"
[[packages.wheels]]
name = "asn1crypto-1.5.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "db4e40728b728508912cbb3d44f19ce188f218e9eba635821bb4b68564f8fd67"
[[packages]]
name = "attrs"
version = "25.4.0"
[[packages.wheels]]
name = "attrs-25.4.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373"
[[packages]]
name = "babel"
version = "2.18.0"
[[packages.wheels]]
name = "babel-2.18.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35"
[[packages]]
name = "bcrypt"
version = "5.0.0"
[[packages.wheels]]
name = "bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl"
url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl"
[packages.wheels.hashes]
sha256 = "611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a"
[[packages]]
name = "black"
version = "26.3.1"
[[packages.wheels]]
name = "black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/7f/0a/8d17d1a9c06f88d3d030d0b1d4373c1551146e252afe4547ed601c0e697f/black-26.3.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac"
[[packages]]
name = "certifi"
version = "2026.2.25"
[[packages.wheels]]
name = "certifi-2026.2.25-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa"
[[packages]]
name = "cffi"
version = "2.0.0"
[[packages.wheels]]
name = "cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
[packages.wheels.hashes]
sha256 = "fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453"
[[packages]]
name = "cfgv"
version = "3.5.0"
[[packages.wheels]]
name = "cfgv-3.5.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0"
[[packages]]
name = "charset-normalizer"
version = "3.4.6"
[[packages.wheels]]
name = "charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/fd/ce/865e4e09b041bad659d682bbd98b47fb490b8e124f9398c9448065f64fee/charset_normalizer-3.4.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "51fb3c322c81d20567019778cb5a4a6f2dc1c200b886bc0d636238e364848c89"
[[packages]]
name = "check-wheel-contents"
version = "0.6.3"
[[packages.wheels]]
name = "check_wheel_contents-0.6.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/be/05/f39fde9f31ef80b285ef5822fad4ddabf73fec62a1f02c5beb4b2f328972/check_wheel_contents-0.6.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "5ae39c8c434b972f0740d04610759168590713175aab584b012b1b84f6771874"
[[packages]]
name = "click"
version = "8.3.1"
[[packages.wheels]]
name = "click-8.3.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6"
[[packages]]
name = "colorama"
version = "0.4.6"
[[packages.wheels]]
name = "colorama-0.4.6-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"
[[packages]]
name = "coverage"
version = "7.13.4"
[[packages.wheels]]
name = "coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl"
[packages.wheels.hashes]
sha256 = "8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f"
[[packages]]
name = "cryptography"
version = "46.0.5"
[[packages.wheels]]
name = "cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl"
url = "https://files.pythonhosted.org/packages/99/0f/a3076874e9c88ecb2ecc31382f6e7c21b428ede6f55aafa1aa272613e3cd/cryptography-46.0.5-cp38-abi3-manylinux_2_34_x86_64.whl"
[packages.wheels.hashes]
sha256 = "a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c"
[[packages]]
name = "defusedxml"
version = "0.7.1"
[[packages.wheels]]
name = "defusedxml-0.7.1-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"
[[packages]]
name = "distlib"
version = "0.4.0"
[[packages.wheels]]
name = "distlib-0.4.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"
[[packages]]
name = "docutils"
version = "0.21.2"
[[packages.wheels]]
name = "docutils-0.21.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2"
[[packages]]
name = "endesive"
version = "2.19.3"
[[packages.wheels]]
name = "endesive-2.19.3-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/a0/c3/a0dcae019de40816352462371c473b22639cd8e68f33a5f23f07faf330fd/endesive-2.19.3-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e5e09c1011b1977fbb9d563d672de7f17f5638304ce57a35bf7d00f3b7a3972e"
[[packages]]
name = "exceptiongroup"
version = "1.3.1"
[[packages.wheels]]
name = "exceptiongroup-1.3.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598"
[[packages]]
name = "filelock"
version = "3.25.2"
[[packages.wheels]]
name = "filelock-3.25.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/a4/a5/842ae8f0c08b61d6484b52f99a03510a3a72d23141942d216ebe81fefbce/filelock-3.25.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "ca8afb0da15f229774c9ad1b455ed96e85a81373065fb10446672f64444ddf70"
[[packages]]
name = "flake8"
version = "7.3.0"
[[packages.wheels]]
name = "flake8-7.3.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9f/56/13ab06b4f93ca7cac71078fbe37fcea175d3216f31f85c3168a6bbd0bb9a/flake8-7.3.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "b9696257b9ce8beb888cdbe31cf885c90d31928fe202be0889a7cdafad32f01e"
[[packages]]
name = "flake8-bugbear"
version = "25.11.29"
[[packages.wheels]]
name = "flake8_bugbear-25.11.29-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0d/42/c18f199780d99a6f6a64c4a36f4ad28a445d9e11968a6025b21d0c8b6802/flake8_bugbear-25.11.29-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "9bf15e2970e736d2340da4c0a70493db964061c9c38f708cfe1f7b2d87392298"
[[packages]]
name = "flake8-comprehensions"
version = "3.17.0"
[[packages.wheels]]
name = "flake8_comprehensions-3.17.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/39/bd/d6739d685fdd79349aa51c37bdedc0d8eab6ae9c6e6ed2ca935b3f88210d/flake8_comprehensions-3.17.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "3943a9c6f2593c3bc5cc64106c2f89d63c6ecd49c8343597f8257b8fcfc8b0a2"
[[packages]]
name = "flake8-isort"
version = "7.0.0"
[[packages.wheels]]
name = "flake8_isort-7.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/17/7d/907ef4135f6ede5187930d9ddd1f36564e07c6cdcd15ae8fb9849c9517e0/flake8_isort-7.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "c301a0e55fc77582348e636194b84b1a0baf0dfdaa6eddf3b0eeea75f8be7f36"
[[packages]]
name = "flake8-simplify"
version = "0.30.0"
[[packages.wheels]]
name = "flake8_simplify-0.30.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/9b/d5/18a89f40c1a145a44d1fad825553be8131bcb727f5f2783d3727a2f4b2d0/flake8_simplify-0.30.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "c9f54a50d24780832a3f2bb7a687ef465b91f10d7cb4ea0845dff4b65d9c91f4"
[[packages]]
name = "flit"
version = "3.12.0"
[[packages.wheels]]
name = "flit-3.12.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/f5/82/ce1d3bb380b227e26e517655d1de7b32a72aad61fa21ff9bd91a2e2db6ee/flit-3.12.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2b4e7171dc22881fa6adc2dbf083e5ecc72520be3cd7587d2a803da94d6ef431"
[[packages]]
name = "flit-core"
version = "3.12.0"
[[packages.wheels]]
name = "flit_core-3.12.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/f2/65/b6ba90634c984a4fcc02c7e3afe523fef500c4980fec67cc27536ee50acf/flit_core-3.12.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "e7a0304069ea895172e3c7bb703292e992c5d1555dd1233ab7b5621b5b69e62c"
[[packages]]
name = "fonttools"
version = "4.62.1"
[[packages.wheels]]
name = "fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
url = "https://files.pythonhosted.org/packages/42/09/7dbe3d7023f57d9b580cfa832109d521988112fd59dddfda3fddda8218f9/fonttools-4.62.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl"
[packages.wheels.hashes]
sha256 = "7bca7a1c1faf235ffe25d4f2e555246b4750220b38de8261d94ebc5ce8a23c23"
[[packages]]
name = "fpdf2"
version = "2.8.7"
[[packages.wheels]]
name = "fpdf2-2.8.7-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/66/0a/cf50ecffa1e3747ed9380a3adfc829259f1f86b3fdbd9e505af789003141/fpdf2-2.8.7-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "d391fc508a3ce02fc43a577c830cda4fe6f37646f2d143d489839940932fbc19"
[[packages]]
name = "h11"
version = "0.16.0"
[[packages.wheels]]
name = "h11-0.16.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86"
[[packages]]
name = "identify"
version = "2.6.18"
[[packages.wheels]]
name = "identify-2.6.18-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/46/33/92ef41c6fad0233e41d3d84ba8e8ad18d1780f1e5d99b3c683e6d7f98b63/identify-2.6.18-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "8db9d3c8ea9079db92cafb0ebf97abdc09d52e97f4dcf773a2e694048b7cd737"
[[packages]]
name = "idna"
version = "3.11"
[[packages.wheels]]
name = "idna-3.11-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea"
[[packages]]
name = "imagesize"
version = "2.0.0"
[[packages.wheels]]
name = "imagesize-2.0.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96"
[[packages]]
name = "iniconfig"
version = "2.3.0"
[[packages.wheels]]
name = "iniconfig-2.3.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12"
[[packages]]
name = "invoke"
version = "2.2.1"
[[packages.wheels]]
name = "invoke-2.2.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8"
[[packages]]
name = "isort"
version = "8.0.1"
[[packages.wheels]]
name = "isort-8.0.1-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/3e/95/c7c34aa53c16353c56d0b802fba48d5f5caa2cdee7958acbcb795c830416/isort-8.0.1-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "28b89bc70f751b559aeca209e6120393d43fbe2490de0559662be7a9787e3d75"
[[packages]]
name = "jinja2"
version = "3.1.6"
[[packages.wheels]]
name = "jinja2-3.1.6-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"
[[packages]]
name = "librt"
version = "0.8.1"
[[packages.wheels]]
name = "librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/01/99/f85130582f05dcf0c8902f3d629270231d2f4afdfc567f8305a952ac7f14/librt-0.8.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "97c2b54ff6717a7a563b72627990bec60d8029df17df423f0ed37d56a17a176b"
[[packages]]
name = "lxml"
version = "6.0.2"
[[packages.wheels]]
name = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/20/cf/cab09478699b003857ed6ebfe95e9fb9fa3d3c25f1353b905c9b73cfb624/lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"
[[packages]]
name = "markdown-it-py"
version = "3.0.0"
[[packages.wheels]]
name = "markdown_it_py-3.0.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"
[[packages]]
name = "markupsafe"
version = "3.0.3"
[[packages.wheels]]
name = "markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591"
[[packages]]
name = "mccabe"
version = "0.7.0"
[[packages.wheels]]
name = "mccabe-0.7.0-py2.py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"
[[packages]]
name = "mdit-py-plugins"
version = "0.5.0"
[[packages.wheels]]
name = "mdit_py_plugins-0.5.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f"
[[packages]]
name = "mdurl"
version = "0.1.2"
[[packages.wheels]]
name = "mdurl-0.1.2-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"
[[packages]]
name = "mypy"
version = "1.19.1"
[[packages.wheels]]
name = "mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
url = "https://files.pythonhosted.org/packages/2a/0d/93c2e4a287f74ef11a66fb6d49c7a9f05e47b0a4399040e6719b57f500d2/mypy-1.19.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl"
[packages.wheels.hashes]
sha256 = "de759aafbae8763283b2ee5869c7255391fbc4de3ff171f8f030b5ec48381b74"
[[packages]]
name = "mypy-extensions"
version = "1.1.0"
[[packages.wheels]]
name = "mypy_extensions-1.1.0-py3-none-any.whl"
url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl"
[packages.wheels.hashes]
sha256 = "1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"
[[packages]]
name = "myst-parser"
version = "4.0.1"
[[packages.wheels]]
name = "myst_parser-4.0.1-py3-none-any.whl"
url = "https://files.pythonh
gitextract_w5t8afxl/
├── .all-contributorsrc
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ ├── config.yml
│ │ ├── feature_request.md
│ │ └── question.md
│ ├── dependabot.yml
│ ├── pull_request_template.md
│ ├── scripts/
│ │ └── check_pr_title.py
│ └── workflows/
│ ├── check-gitignored-files.yml
│ ├── create-github-release.yaml
│ ├── github-ci.yaml
│ ├── publish-to-pypi.yaml
│ ├── release.yaml
│ └── title-check.yml
├── .gitignore
├── .gitmodules
├── .isort.cfg
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── .typos.toml
├── CHANGELOG.md
├── CONTRIBUTORS.md
├── LICENSE
├── Makefile
├── README.md
├── dependabot.yml
├── docs/
│ ├── Makefile
│ ├── conf.py
│ ├── dev/
│ │ ├── intro.md
│ │ └── testing.md
│ ├── index.rst
│ ├── make.bat
│ ├── meta/
│ │ └── project-governance.md
│ └── user/
│ ├── installation.md
│ ├── subcommand-2-up.md
│ ├── subcommand-booklet.md
│ ├── subcommand-cat.md
│ ├── subcommand-check-sign.md
│ ├── subcommand-compress.md
│ ├── subcommand-extract-annotated-pages.md
│ ├── subcommand-extract-images.md
│ ├── subcommand-extract-text.md
│ ├── subcommand-meta.md
│ ├── subcommand-pagemeta.md
│ ├── subcommand-rm.md
│ ├── subcommand-rotate.md
│ ├── subcommand-sign.md
│ ├── subcommand-uncompress.md
│ ├── subcommand-update-offsets.md
│ └── subcommand-x2pdf.md
├── make_release.py
├── mypy.ini
├── pdfly/
│ ├── __init__.py
│ ├── __main__.py
│ ├── _utils.py
│ ├── _version.py
│ ├── booklet.py
│ ├── cat.py
│ ├── check_sign.py
│ ├── cli.py
│ ├── compress.py
│ ├── extract_annotated_pages.py
│ ├── extract_images.py
│ ├── metadata.py
│ ├── pagemeta.py
│ ├── rm.py
│ ├── rotate.py
│ ├── sign.py
│ ├── uncompress.py
│ ├── up2.py
│ ├── update_offsets.py
│ └── x2pdf.py
├── pylock.toml
├── pyproject.toml
├── renovate.json
├── resources/
│ ├── demo2_ca.root.crt.pem
│ ├── signing-certificate.crt
│ └── signing-certificate.p12
├── setup.cfg
├── setup.py
└── tests/
├── __init__.py
├── conftest.py
├── test_booklet.py
├── test_cat.py
├── test_check_sign.py
├── test_cli.py
├── test_compress.py
├── test_extract_annotated_pages.py
├── test_extract_images.py
├── test_pagemeta.py
├── test_rm.py
├── test_rotate.py
├── test_sign.py
├── test_uncompress.py
├── test_up2.py
├── test_update_offsets.py
└── test_x2pdf.py
SYMBOL INDEX (139 symbols across 34 files) FILE: make_release.py class Change (line 19) | class Change: function main (line 29) | def main(changelog_path: str) -> None: function print_instructions (line 67) | def print_instructions(new_version: str) -> None: function adjust_version_py (line 80) | def adjust_version_py(version: str) -> None: function get_version_interactive (line 86) | def get_version_interactive(new_version: str, changes: str) -> str: function is_semantic_version (line 100) | def is_semantic_version(version: str) -> bool: function write_commit_msg_file (line 111) | def write_commit_msg_file(new_version: str, commit_changes: str) -> None: function write_release_msg_file (line 125) | def write_release_msg_file( function strip_header (line 141) | def strip_header(md: str) -> str: function version_bump (line 146) | def version_bump(git_tag: str) -> str: function get_changelog (line 162) | def get_changelog(changelog_path: str) -> str: function write_changelog (line 178) | def write_changelog(new_changelog: str, changelog_path: str) -> None: function get_formatted_changes (line 191) | def get_formatted_changes(git_tag: str) -> tuple[str, str]: function get_most_recent_git_tag (line 270) | def get_most_recent_git_tag() -> str: function get_author_mapping (line 286) | def get_author_mapping(line_count: int) -> dict[str, str]: function get_git_commits_since_tag (line 317) | def get_git_commits_since_tag(git_tag: str) -> list[Change]: function parse_commit_line (line 348) | def parse_commit_line(line: str, authors: dict[str, str]) -> Change: FILE: pdfly/_utils.py class OutputOptions (line 4) | class OutputOptions(Enum): FILE: pdfly/booklet.py function main (line 31) | def main( function requires_rotate (line 96) | def requires_rotate(a: RectangleObject, b: RectangleObject) -> bool: function fetch_first_page (line 110) | def fetch_first_page(filename: Path) -> PageObject: function page_iter (line 126) | def page_iter(num_pages: int) -> Generator[tuple[int, int], None, None]: FILE: pdfly/cat.py function main (line 63) | def main( function parse_filepaths_and_pagerange_args (line 130) | def parse_filepaths_and_pagerange_args( FILE: pdfly/check_sign.py function main (line 19) | def main(filename: Path, pem: Path, verbose: bool | None) -> None: FILE: pdfly/cli.py function version_callback (line 29) | def version_callback(value: bool) -> None: function common (line 48) | def common( function up2 (line 56) | def up2( function booklet (line 71) | def booklet( function cat (line 115) | def cat( function check_sign (line 141) | def check_sign( function compress (line 164) | def compress( function extract_annotated_pages (line 184) | def extract_annotated_pages( function extract_images (line 208) | def extract_images( function extract_text (line 222) | def extract_text( function metadata (line 241) | def metadata( function pagemeta (line 262) | def pagemeta( function rm (line 288) | def rm( function rotate (line 309) | def rotate( function sign (line 326) | def sign( function uncompress (line 356) | def uncompress( function update_offsets (line 376) | def update_offsets( function x2pdf (line 400) | def x2pdf( FILE: pdfly/compress.py function main (line 10) | def main(pdf: Path, output: Path) -> None: FILE: pdfly/extract_annotated_pages.py function is_manipulable (line 19) | def is_manipulable(annot: AnnotationDictionary) -> bool: function main (line 24) | def main(input_pdf: Path, output_pdf: Path | None) -> None: FILE: pdfly/extract_images.py function main (line 13) | def main(pdf: Path) -> None: FILE: pdfly/metadata.py class EncryptionData (line 13) | class EncryptionData(BaseModel): class MetaInfo (line 18) | class MetaInfo(BaseModel): function main (line 46) | def main(pdf: Path, output: OutputOptions) -> None: FILE: pdfly/pagemeta.py class PageMeta (line 23) | class PageMeta(BaseModel): function main (line 32) | def main(pdf: Path, page_index: int, output: OutputOptions) -> None: function find_known_format (line 86) | def find_known_format(width: float, height: float) -> str: FILE: pdfly/rm.py function main (line 54) | def main( FILE: pdfly/rotate.py function main (line 51) | def main( function convert_range_to_pages (line 81) | def convert_range_to_pages(page_range: str, num_pages: int) -> set[int]: FILE: pdfly/sign.py function main (line 31) | def main( function pdf_is_unsigned_or_raise (line 62) | def pdf_is_unsigned_or_raise(pdf_reader: PdfReader) -> None: function is_signature (line 71) | def is_signature(annotation: PdfObject) -> bool: function _sign_pdf_contents (line 87) | def _sign_pdf_contents( function add_to_page (line 147) | def add_to_page(reader_page: PageObject, unit: str = "mm") -> Generator[... function validate_output_args_or_raise (line 157) | def validate_output_args_or_raise(output: Path | None, in_place: bool) -... FILE: pdfly/uncompress.py function main (line 10) | def main(pdf: Path, output: Path) -> None: function decompress_content_stream (line 40) | def decompress_content_stream(content: IndirectObject) -> None: FILE: pdfly/up2.py function main (line 15) | def main(pdf: Path, output: Path) -> None: FILE: pdfly/update_offsets.py function update_lines (line 42) | def update_lines( function read_binary_file (line 240) | def read_binary_file(file_path: Path, encoding: str) -> list[str]: function main (line 278) | def main(file_in: Path, file_out: Path, encoding: str, verbose: bool) ->... FILE: pdfly/x2pdf.py function px_to_mm (line 12) | def px_to_mm(px: float) -> float: function image_to_pdf (line 20) | def image_to_pdf(filepath: Path) -> BytesIO: function main (line 30) | def main(in_filepaths: list[Path], out_filepath: Path) -> int: FILE: tests/conftest.py function chdir (line 19) | def chdir(dir_path: Union[str, Path]) -> Iterator[None]: function run_cli (line 34) | def run_cli(args: list[str]) -> Union[None, int, str]: function two_pages_pdf_filepath (line 43) | def two_pages_pdf_filepath(tmp_path: Path) -> Path: function pdf_file_100 (line 58) | def pdf_file_100(tmp_path: Path) -> Path: function pdf_file_abc (line 75) | def pdf_file_abc(tmp_path: Path) -> Path: FILE: tests/test_booklet.py function test_booklet_fewer_args (line 9) | def test_booklet_fewer_args( function test_booklet_extra_args (line 19) | def test_booklet_extra_args( function test_booklet_page_size (line 31) | def test_booklet_page_size(tmp_path: Path) -> None: function test_booklet_order (line 72) | def test_booklet_order( FILE: tests/test_cat.py function extract_embedded_images (line 10) | def extract_embedded_images(pdf_filepath: Path) -> list[Any]: function extract_text_pages (line 15) | def extract_text_pages(pdf_filepath: Path) -> list[str]: function test_cat_incorrect_number_of_args (line 20) | def test_cat_incorrect_number_of_args( function test_cat_two_files_ok (line 30) | def test_cat_two_files_ok( function test_cat_subset_ok (line 53) | def test_cat_subset_ok(capsys: pytest.CaptureFixture, tmp_path: Path) ->... function test_cat_subset_invalid_args (line 75) | def test_cat_subset_invalid_args( function test_cat_subset_warn_on_missing_pages (line 93) | def test_cat_subset_warn_on_missing_pages( function test_cat_subset_ensure_reduced_size (line 111) | def test_cat_subset_ensure_reduced_size( function test_cat_combine_files (line 143) | def test_cat_combine_files( function test_cat_commands (line 224) | def test_cat_commands( function test_cat_decrypt_with_password_ok (line 254) | def test_cat_decrypt_with_password_ok( function test_cat_decrypt_with_password_ko (line 273) | def test_cat_decrypt_with_password_ko( FILE: tests/test_check_sign.py function test_check_sign_manipulated_content (line 9) | def test_check_sign_manipulated_content( function test_check_sign_missing_signature (line 47) | def test_check_sign_missing_signature( function test_check_sign_signature_not_matching_to_certificate (line 67) | def test_check_sign_signature_not_matching_to_certificate( function test_check_sign_pem (line 90) | def test_check_sign_pem(capsys: pytest.CaptureFixture, tmp_path: Path) -... function test_check_sign_pdfly_signed_pdf (line 108) | def test_check_sign_pdfly_signed_pdf( FILE: tests/test_cli.py function test_pypdf_cli_can_be_invoked_as_a_module (line 10) | def test_pypdf_cli_can_be_invoked_as_a_module() -> None: function test_pypdf_cli_version (line 21) | def test_pypdf_cli_version(capsys: pytest.CaptureFixture) -> None: FILE: tests/test_compress.py function test_compress_sample_files (line 14) | def test_compress_sample_files( function test_compress_no_compression_when_larger (line 44) | def test_compress_no_compression_when_larger(tmp_path: Path) -> None: function test_compress_file_integrity (line 77) | def test_compress_file_integrity(tmp_path: Path) -> None: function test_compress_output_metrics (line 134) | def test_compress_output_metrics(tmp_path: Path) -> None: function test_compress_same_input_output_not_allowed (line 179) | def test_compress_same_input_output_not_allowed(tmp_path: Path) -> None: function test_compress_preserves_metadata (line 201) | def test_compress_preserves_metadata(tmp_path: Path) -> None: FILE: tests/test_extract_annotated_pages.py function test_extract_annotated_pages_input8 (line 8) | def test_extract_annotated_pages_input8( FILE: tests/test_extract_images.py function test_extract_images_jpg_png (line 8) | def test_extract_images_jpg_png( function test_extract_images_monochrome (line 23) | def test_extract_images_monochrome( FILE: tests/test_pagemeta.py function test_pagemeta_json (line 9) | def test_pagemeta_json(capsys: pytest.CaptureFixture, tmp_path: Path) ->... function test_pagemeta_text_with_known_format (line 23) | def test_pagemeta_text_with_known_format( function test_pagemeta_text_with_close_format (line 33) | def test_pagemeta_text_with_close_format( FILE: tests/test_rm.py function test_rm_incorrect_number_of_args (line 13) | def test_rm_incorrect_number_of_args( function test_rm_subset_ok (line 23) | def test_rm_subset_ok(capsys: CaptureFixture, tmp_path: Path) -> None: function test_rm_subset_invalid_args (line 48) | def test_rm_subset_invalid_args( function test_rm_subset_warn_on_missing_pages (line 66) | def test_rm_subset_warn_on_missing_pages( function test_rm_subset_ensure_reduced_size (line 84) | def test_rm_subset_ensure_reduced_size( function test_rm_combine_files (line 116) | def test_rm_combine_files( function test_rm_commands (line 198) | def test_rm_commands( FILE: tests/test_rotate.py function test_rotate_fewer_args (line 9) | def test_rotate_fewer_args( function test_rotate_extra_args (line 23) | def test_rotate_extra_args( function get_page_rotations (line 43) | def get_page_rotations(fname: str) -> list[int]: function diff_rotations (line 48) | def diff_rotations( function test_rotate_default (line 57) | def test_rotate_default(tmp_path: Path) -> None: function test_rotate_slices (line 101) | def test_rotate_slices( FILE: tests/test_sign.py function test_sign_missing_certificate_key_option (line 9) | def test_sign_missing_certificate_key_option( function test_sign_already_signed_pdf (line 24) | def test_sign_already_signed_pdf( function test_sign_pkcs12 (line 48) | def test_sign_pkcs12(capsys: pytest.CaptureFixture, tmp_path: Path) -> N... function test_sign_pkcs12_in_place (line 78) | def test_sign_pkcs12_in_place( FILE: tests/test_uncompress.py function test_uncompress_all_sample_files (line 17) | def test_uncompress_all_sample_files( FILE: tests/test_up2.py function test_up2_fewer_args (line 10) | def test_up2_fewer_args(capsys: pytest.CaptureFixture, tmp_path: Path) -... function test_up2_extra_args (line 18) | def test_up2_extra_args(capsys: pytest.CaptureFixture, tmp_path: Path) -... function test_up2_8page_file (line 38) | def test_up2_8page_file(capsys: pytest.CaptureFixture, tmp_path: Path) -... function test_up2_odd_page_number (line 73) | def test_up2_odd_page_number( FILE: tests/test_update_offsets.py function test_update_offsets (line 17) | def test_update_offsets(capsys: pytest.CaptureFixture) -> None: function test_update_offsets_on_all_reference_files (line 80) | def test_update_offsets_on_all_reference_files( FILE: tests/test_x2pdf.py function test_x2pdf_succeed_to_convert_jpg (line 14) | def test_x2pdf_succeed_to_convert_jpg( function test_x2pdf_succeed_to_embed_pdfs (line 37) | def test_x2pdf_succeed_to_embed_pdfs( function test_x2pdf_fail_to_open_file (line 61) | def test_x2pdf_fail_to_open_file( function test_x2pdf_fail_to_convert (line 80) | def test_x2pdf_fail_to_convert(
Condensed preview — 97 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (304K chars).
[
{
"path": ".all-contributorsrc",
"chars": 4620,
"preview": "{\n \"projectName\": \"pdfly\",\n \"projectOwner\": \"py-pdf\",\n \"repoType\": \"github\",\n \"repoHost\": \"https://github.com\",\n \"f"
},
{
"path": ".github/ISSUE_TEMPLATE/bug_report.md",
"chars": 1277,
"preview": "---\nname: Bug report\nabout: Report some unexpected behaviour to help us improve\ntitle: ''\nlabels: bug\nassignees: ''\n---\n"
},
{
"path": ".github/ISSUE_TEMPLATE/config.yml",
"chars": 533,
"preview": "# Ref: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository#con"
},
{
"path": ".github/ISSUE_TEMPLATE/feature_request.md",
"chars": 831,
"preview": "---\nname: Feature request\nabout: Suggest an idea for this project\ntitle: ''\nlabels: enhancement\nassignees: ''\n---\n<!--\nH"
},
{
"path": ".github/ISSUE_TEMPLATE/question.md",
"chars": 434,
"preview": "---\nname: I have a question\nabout: Anything that is not a bug report or a feature request\ntitle: ''\nlabels: question\nass"
},
{
"path": ".github/dependabot.yml",
"chars": 160,
"preview": "# Set update schedule for GitHub Actions\n\nversion: 2\nupdates:\n\n - package-ecosystem: \"github-actions\"\n directory: \"/"
},
{
"path": ".github/pull_request_template.md",
"chars": 1792,
"preview": "<!--\nThanks for your interest in the project.\nBugs filed and PRs submitted are appreciated!\n\nSome guidelines are provide"
},
{
"path": ".github/scripts/check_pr_title.py",
"chars": 900,
"preview": "\"\"\"Check that all PR titles follow the desired scheme.\"\"\"\n\nimport os\nimport sys\n\nKNOWN_PREFIXES = (\n \"SEC: \",\n \"BU"
},
{
"path": ".github/workflows/check-gitignored-files.yml",
"chars": 852,
"preview": "name: Check for Gitignored Files\n\non:\n push:\n branches:\n - '**' # Run on all branches\n pull_request:\n\njobs:\n "
},
{
"path": ".github/workflows/create-github-release.yaml",
"chars": 1382,
"preview": "name: Create a GitHub release page\n\non:\n push:\n tags:\n - '*.*.*'\n workflow_dispatch:\n workflow_run:\n workf"
},
{
"path": ".github/workflows/github-ci.yaml",
"chars": 3241,
"preview": "# This workflow will install Python dependencies, run tests and lint with a variety of Python versions\n# For more inform"
},
{
"path": ".github/workflows/publish-to-pypi.yaml",
"chars": 942,
"preview": "name: Publish Python Package to PyPI\n\non:\n push:\n tags:\n - '*.*.*'\n workflow_dispatch:\n workflow_run:\n wor"
},
{
"path": ".github/workflows/release.yaml",
"chars": 1750,
"preview": "# This action assumes that there is a REL-commit which already has a\n# Markdown-formatted git tag. Hence the CHANGELOG i"
},
{
"path": ".github/workflows/title-check.yml",
"chars": 580,
"preview": "name: 'PR Title Check'\non:\n pull_request:\n # check when PR\n # * is created,\n # * title is edited, and\n # * "
},
{
"path": ".gitignore",
"chars": 2012,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": ".gitmodules",
"chars": 94,
"preview": "[submodule \"sample-files\"]\n\tpath = sample-files\n\turl = git@github.com:py-pdf/sample-files.git\n"
},
{
"path": ".isort.cfg",
"chars": 168,
"preview": "[settings]\nline_length=79\nindent=' '\nmulti_line_output=3\nlength_sort=0\ninclude_trailing_comma=True\nskip=docs\nknown_th"
},
{
"path": ".pre-commit-config.yaml",
"chars": 1462,
"preview": "# pre-commit run --all-files\nrepos:\n- repo: https://github.com/pre-commit/pre-commit-hooks\n rev: v6.0.0\n hooks:\n"
},
{
"path": ".readthedocs.yaml",
"chars": 500,
"preview": "# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details\nversion: 2\n\n\nbuild:\n os: ubuntu-22.04\n too"
},
{
"path": ".typos.toml",
"chars": 166,
"preview": "[default]\nextend-ignore-identifiers-re = [\n \"certifi\",\n \"FlateDecode\",\n # This typo appears in a .tex file in t"
},
{
"path": "CHANGELOG.md",
"chars": 4618,
"preview": "# CHANGELOG\n\n## Version 0.6.0, not released yet\n\n### Bug Fixes (BUG)\n- `2up` incorrectly handled documents with an odd n"
},
{
"path": "CONTRIBUTORS.md",
"chars": 153,
"preview": "# List of contributors\n\nThe list of contributors has been moved into the [README.md](https://github.com/py-pdf/pdfly/blo"
},
{
"path": "LICENSE",
"chars": 1514,
"preview": "BSD 3-Clause License\n\nCopyright (c) 2022, py-pdf\nAll rights reserved.\n\nRedistribution and use in source and binary forms"
},
{
"path": "Makefile",
"chars": 661,
"preview": "maint:\n\tpre-commit autoupdate\n\tpython -m pip install --upgrade .\n\tpython -m pip lock --group dev --group docs .\n\tuv pip "
},
{
"path": "README.md",
"chars": 12222,
"preview": "[](https://pypi.org/pypi/pdfly#history)\n[ for testing.\n\nTo run the tests you need to install "
},
{
"path": "docs/index.rst",
"chars": 6058,
"preview": "Welcome to pdfly\n================\n\n.. image:: https://img.shields.io/pypi/v/pdfly.svg\n :target: https://pypi.org/pypi/"
},
{
"path": "docs/make.bat",
"chars": 800,
"preview": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sp"
},
{
"path": "docs/meta/project-governance.md",
"chars": 6739,
"preview": "# Project Governance\n\nThis document describes how the pdfly project is managed. It describes the\ndifferent actors, their"
},
{
"path": "docs/user/installation.md",
"chars": 1625,
"preview": "# Installation\nThere are several ways to install pdfly. The most common option is to use pip.\n\n## pip\npdfly requires Pyt"
},
{
"path": "docs/user/subcommand-2-up.md",
"chars": 995,
"preview": "# 2-up\n\nCreate a booklet-style PDF from a single input.\n\n## Usage\n\n```\n$ pdfly 2-up --help\n Usage: pdfly 2-up [OPTIONS] "
},
{
"path": "docs/user/subcommand-booklet.md",
"chars": 1703,
"preview": "# booklet\n\nReorder and two-up PDF pages for booklet printing.\n\n## Usage\n\n```\n$ pdfly booklet --help\n Usage: pdfly bookle"
},
{
"path": "docs/user/subcommand-cat.md",
"chars": 3263,
"preview": "# cat\n\nThe cat command can split / extract pages from a PDF. It can also\njoin/merge/combine multiple PDF documents into "
},
{
"path": "docs/user/subcommand-check-sign.md",
"chars": 1546,
"preview": "# check-sign\n\nValidate that a PDF document has a digital signature matching a given certificate.\n\n## Usage\n\n```\n Usage: "
},
{
"path": "docs/user/subcommand-compress.md",
"chars": 1139,
"preview": "# compress\n\nCompress a PDF using lossless FlateDecode compression.\n\n**Note:** If compression would result in a larger fi"
},
{
"path": "docs/user/subcommand-extract-annotated-pages.md",
"chars": 1750,
"preview": "# extract-annotated-pages\n\nExtract only the annotated pages from a PDF. This can help to review or rework pages from a l"
},
{
"path": "docs/user/subcommand-extract-images.md",
"chars": 1028,
"preview": "# extract-images\n\nExtract text from a PDF file.\n## Usage\n\n```\n$ pdfly extract-images --help\n Usage: pdfly extract-images"
},
{
"path": "docs/user/subcommand-extract-text.md",
"chars": 839,
"preview": "# extract-text\n\nExtract text from a PDF file.\n## Usage\n\n```\n$ pdfly extract-text --help\n Usage: pdfly extract-text [OPTI"
},
{
"path": "docs/user/subcommand-meta.md",
"chars": 3897,
"preview": "# meta\n\nGet metadata of a PDF file.\n\n## Usage\n\n```\npdfly meta --help\n\n Usage: pdfly meta [OPTIONS] PDF\n\n Show metadata o"
},
{
"path": "docs/user/subcommand-pagemeta.md",
"chars": 2392,
"preview": "# pagemeta\n\nGive details about a PDF's single page.\n\n## Usage\n\n```\n$ pdfly pagemeta --help\n Usage: pdfly pagemeta [OPTIO"
},
{
"path": "docs/user/subcommand-rm.md",
"chars": 2802,
"preview": "# rm\n\nRemove pages from PDF files.\n\n## Usage\n\n```\n$ pdfly rm --help\nUsage: pdfly rm [OPTIONS] FILENAME FN_PGRGS...\n\n Rem"
},
{
"path": "docs/user/subcommand-rotate.md",
"chars": 3880,
"preview": "# rotate\n\n## Usage\n\n```\npdfly rotate --help\n\n Usage: pdfly rotate [OPTIONS] FILENAME DEGREES [PGRGS]\n\n Rotate specified "
},
{
"path": "docs/user/subcommand-sign.md",
"chars": 2203,
"preview": "# sign\n\nCreates a digitally-signed PDF from an existing PDF file and a given certificate.\n\n## Usage\n\n```\nUsage: pdfly si"
},
{
"path": "docs/user/subcommand-uncompress.md",
"chars": 705,
"preview": "# uncompress\n\nModule for uncompressing PDF content streams.\n## Usage\n\n```\n$ pdfly ucompress --help\n Module for uncompres"
},
{
"path": "docs/user/subcommand-update-offsets.md",
"chars": 2381,
"preview": "# update-offsets\n\nUpdates offsets and lengths in a simple PDF file.\n\n## Usage\n\n```\n$ pdfly update-offsets --help\n Usage:"
},
{
"path": "docs/user/subcommand-x2pdf.md",
"chars": 1420,
"preview": "# x2pdf\n\nConvert a file to PDF.\n\nCurrently supported for \"x\":\n\n* PNG\n* JPG\n\n\n## Usage\n\n```\n$ pdfly x2pdf --help\n\n Usage:"
},
{
"path": "make_release.py",
"chars": 10516,
"preview": "\"\"\"Internal tool to update the CHANGELOG.\"\"\"\n\nimport json\nimport subprocess\nimport urllib.request\nfrom dataclasses impor"
},
{
"path": "mypy.ini",
"chars": 31,
"preview": "[mypy]\nplugins = pydantic.mypy\n"
},
{
"path": "pdfly/__init__.py",
"chars": 165,
"preview": "\"\"\"pdfly is a command line utility for manipulating PDFs and getting information about them.\"\"\"\n\nfrom ._version import _"
},
{
"path": "pdfly/__main__.py",
"chars": 114,
"preview": "\"\"\"Execute pdfly as a module.\"\"\"\n\nfrom pdfly.cli import entry_point\n\nif __name__ == \"__main__\":\n entry_point()\n"
},
{
"path": "pdfly/_utils.py",
"chars": 87,
"preview": "from enum import Enum\n\n\nclass OutputOptions(Enum):\n json = \"json\"\n text = \"text\"\n"
},
{
"path": "pdfly/_version.py",
"chars": 22,
"preview": "__version__ = \"0.5.1\"\n"
},
{
"path": "pdfly/booklet.py",
"chars": 5217,
"preview": "\"\"\"\nReorder and two-up PDF pages for booklet printing.\n\nIf the number of pages is not a multiple of four, pages are\nadde"
},
{
"path": "pdfly/cat.py",
"chars": 5029,
"preview": "\"\"\"\nConcatenate pages from PDF files into a single PDF file.\n\nPage ranges refer to the previously-named file.\nA file not"
},
{
"path": "pdfly/check_sign.py",
"chars": 1343,
"preview": "\"\"\"\nVerifies the signature of a signed PDF.\n\nExamples\n pdfly verify input.pdf --pem certs.pem\n\n Verifies the i"
},
{
"path": "pdfly/cli.py",
"chars": 10467,
"preview": "\"\"\"\nDefine how the CLI should behave.\n\nSubcommands are added here.\n\"\"\"\n\nfrom pathlib import Path\nfrom typing import Anno"
},
{
"path": "pdfly/compress.py",
"chars": 1420,
"preview": "\"\"\"Compress a PDF.\"\"\"\n\nimport shutil\nfrom io import BytesIO\nfrom pathlib import Path\n\nfrom pypdf import PdfReader, PdfWr"
},
{
"path": "pdfly/extract_annotated_pages.py",
"chars": 1218,
"preview": "\"\"\"\nExtract only the annotated pages from a PDF.\n\nQ: Why does this help?\nA: https://github.com/py-pdf/pdfly/issues/97\n\"\""
},
{
"path": "pdfly/extract_images.py",
"chars": 850,
"preview": "\"\"\"\nExtract images from PDF without resampling or altering.\n\nAdapted from work by Sylvain Pelissier\nhttp://stackoverflow"
},
{
"path": "pdfly/metadata.py",
"chars": 6833,
"preview": "\"\"\"Show metadata of a PDF file\"\"\"\n\nimport stat\nfrom datetime import datetime\nfrom pathlib import Path\n\nfrom pydantic imp"
},
{
"path": "pdfly/pagemeta.py",
"chars": 3005,
"preview": "\"\"\"Give details about a single page.\"\"\"\n\nfrom pathlib import Path\n\nfrom pydantic import BaseModel\nfrom pypdf import PdfR"
},
{
"path": "pdfly/rm.py",
"chars": 1800,
"preview": "\"\"\"\nRemove pages from PDF files.\n\nPage ranges refer to the previously-named file.\nA file not followed by a page range me"
},
{
"path": "pdfly/rotate.py",
"chars": 2503,
"preview": "\"\"\"\nRotate specified pages by the specified amount\n\nExample:\n pdfly rotate --output output.pdf input.pdf 90\n R"
},
{
"path": "pdfly/sign.py",
"chars": 5145,
"preview": "\"\"\"\nCreates a signed PDF from an existing PDF file.\n\nExamples\n pdfly sign input.pdf --p12 certs.p12 -o signed.pdf\n\n "
},
{
"path": "pdfly/uncompress.py",
"chars": 1752,
"preview": "\"\"\"Module for uncompressing PDF content streams.\"\"\"\n\nimport zlib\nfrom pathlib import Path\n\nfrom pypdf import PdfReader, "
},
{
"path": "pdfly/up2.py",
"chars": 978,
"preview": "\"\"\"\nCreate a booklet-style PDF from a single input.\n\nPairs of two pages will be put on one page (left and right)\n\nusage:"
},
{
"path": "pdfly/update_offsets.py",
"chars": 11726,
"preview": "\"\"\"\nUpdates offsets and lengths in a simple PDF file.\n\nThe PDF specification requires that the xref section at the end\no"
},
{
"path": "pdfly/x2pdf.py",
"chars": 1440,
"preview": "\"\"\"Convert one or more files to PDF. Each file is a page.\"\"\"\n\nfrom io import BytesIO\nfrom pathlib import Path\n\nfrom fpdf"
},
{
"path": "pylock.toml",
"chars": 39926,
"preview": "lock-version = \"1.0\"\ncreated-by = \"pip\"\n\n[[packages]]\nname = \"alabaster\"\nversion = \"1.0.0\"\n\n[[packages.wheels]]\nname = \""
},
{
"path": "pyproject.toml",
"chars": 5393,
"preview": "[build-system]\nrequires = [\"flit_core >=3.2,<4\"]\nbuild-backend = \"flit_core.buildapi\"\n\n[project]\nname = \"pdfly\"\nauthors "
},
{
"path": "renovate.json",
"chars": 186,
"preview": "{\n \"commitMessagePrefix\": \"MAINT:\",\n \"extends\": [\"config:best-practices\"],\n \"labels\": [\"dependencies\"],\n \"osvVulnera"
},
{
"path": "resources/demo2_ca.root.crt.pem",
"chars": 1164,
"preview": "-----BEGIN CERTIFICATE-----\nMIIDLTCCAhWgAwIBAgIUHeQXwdDU4jyXtdItkEjDOw/SigAwDQYJKoZIhvcNAQEL\nBQAwHTEbMBkGA1UEAwwSQUEgVHJ"
},
{
"path": "resources/signing-certificate.crt",
"chars": 1697,
"preview": "Bag Attributes\n friendlyName: fpdf2\n localKeyID: C2 58 91 78 7F 3E 01 57 6E 39 AE AD CA 28 99 06 3B 55 2D F1\nsubje"
},
{
"path": "setup.cfg",
"chars": 519,
"preview": "[mutmut]\nbackup = False\nrunner = ./mutmut-test.sh\ntests_dir = tests/\n\n[mypy]\nignore_missing_imports = true\nstrict = true"
},
{
"path": "setup.py",
"chars": 472,
"preview": "\"\"\"Package pdfly with setuptools.\"\"\"\n\nimport re\n\nfrom setuptools import find_packages, setup\n\nVERSIONFILE = \"pdfly/_vers"
},
{
"path": "tests/__init__.py",
"chars": 23,
"preview": "\"\"\"Shared test code\"\"\"\n"
},
{
"path": "tests/conftest.py",
"chars": 2548,
"preview": "\"\"\"Utilities and fixtures that are available automatically for all tests.\"\"\"\n\nimport os\nfrom collections.abc import Iter"
},
{
"path": "tests/test_booklet.py",
"chars": 3672,
"preview": "from pathlib import Path\n\nimport pytest\nfrom pypdf import PdfReader\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cl"
},
{
"path": "tests/test_cat.py",
"chars": 7519,
"preview": "from pathlib import Path\nfrom typing import Any\n\nimport pytest\nfrom pypdf import PdfReader\n\nfrom .conftest import RESOUR"
},
{
"path": "tests/test_check_sign.py",
"chars": 3780,
"preview": "from pathlib import Path\n\nimport pytest\nfrom fpdf import FPDF\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cli\n\n\nde"
},
{
"path": "tests/test_cli.py",
"chars": 714,
"preview": "import sys\nfrom subprocess import check_output\n\nimport pytest\nfrom pypdf import __version__ as pypdf_version\n\nfrom .conf"
},
{
"path": "tests/test_compress.py",
"chars": 6309,
"preview": "\"\"\"Tests for the `compress` command.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\nfrom typer.testing import CliRunner\n\nfr"
},
{
"path": "tests/test_extract_annotated_pages.py",
"chars": 501,
"preview": "from pathlib import Path\n\nimport pytest\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cli\n\n\ndef test_extract_annotat"
},
{
"path": "tests/test_extract_images.py",
"chars": 886,
"preview": "from pathlib import Path\n\nimport pytest\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cli\n\n\ndef test_extract_images_"
},
{
"path": "tests/test_pagemeta.py",
"chars": 1284,
"preview": "import json\nfrom pathlib import Path\n\nimport pytest\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cli\n\n\ndef test_pag"
},
{
"path": "tests/test_rm.py",
"chars": 5942,
"preview": "\"\"\"Tests for the `rm` command.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\nfrom _pytest.capture import CaptureFixture\nfr"
},
{
"path": "tests/test_rotate.py",
"chars": 3462,
"preview": "from pathlib import Path\n\nimport pytest\nfrom pypdf import PdfReader\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cl"
},
{
"path": "tests/test_sign.py",
"chars": 2882,
"preview": "from pathlib import Path\n\nimport pytest\nfrom endesive import pdf\n\nfrom .conftest import RESOURCES_ROOT, chdir, run_cli\n\n"
},
{
"path": "tests/test_uncompress.py",
"chars": 1064,
"preview": "\"\"\"Tests for the `uncompress` command.\"\"\"\n\nfrom pathlib import Path\n\nimport pytest\nfrom pypdf import PdfReader\nfrom type"
},
{
"path": "tests/test_up2.py",
"chars": 2640,
"preview": "import os.path\nfrom pathlib import Path\n\nimport pytest\nfrom pypdf import PdfReader\n\nfrom .conftest import RESOURCES_ROOT"
},
{
"path": "tests/test_update_offsets.py",
"chars": 3806,
"preview": "\"\"\"\nEvery CLI command is called here with a typer CliRunner.\n\nHere should only be end-to-end tests.\n\"\"\"\n\nimport re\nimpor"
},
{
"path": "tests/test_x2pdf.py",
"chars": 2100,
"preview": "\"\"\"\nEvery CLI command is called here with a typer CliRunner.\n\nHere should only be end-to-end tests.\n\"\"\"\n\nfrom pathlib im"
}
]
// ... and 1 more files (download for full content)
About this extraction
This page contains the full source code of the py-pdf/pdfly GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 97 files (263.1 KB), approximately 77.8k tokens, and a symbol index with 139 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.