Repository: signcl/docusaurus-prince-pdf Branch: master Commit: bdb7b104ac1d Files: 13 Total size: 22.6 KB Directory structure: gitextract_w5clawzc/ ├── .github/ │ └── workflows/ │ └── test.yml ├── .gitignore ├── .vscode/ │ └── settings.json ├── Dockerfile ├── LICENSE ├── README.md ├── biome.jsonc ├── build.sh ├── docker-bake.hcl ├── index.ts ├── package.json ├── print.css └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/test.yml ================================================ # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby name: CI Test on: push: branches: - '**' tags: - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 pull_request: branches: - '**' env: PRINCE_VER: 15.3 DOCKERHUB_SLUG: openbayes/docusaurus-prince-pdf GHCR_SLUG: ghcr.io/signcl/docusaurus-prince-pdf jobs: build-n-deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install Node.js uses: actions/setup-node@v4 with: node-version: 20 - name: Install bun uses: oven-sh/setup-bun@v2 - name: Install dependencies run: bun install - name: Install Prince run: | curl https://www.princexml.com/download/prince-${{ env.PRINCE_VER }}-linux-generic-x86_64.tar.gz -O tar zxf prince-${{ env.PRINCE_VER }}-linux-generic-x86_64.tar.gz cd prince-${{ env.PRINCE_VER }}-linux-generic-x86_64 yes "" | sudo ./install.sh - name: Build PDF run: bun run test - name: Upload results uses: actions/upload-artifact@v4 with: name: result path: pdf/docusaurus.io-docs.pdf if-no-files-found: error # Start build Docker image - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@v3 - name: Available platforms run: echo ${{ steps.buildx.outputs.platforms }} - name: Log in to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USER }} password: ${{ secrets.DOCKER_PASS }} - name: Login to GitHub Container Registry uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Docker meta id: meta uses: docker/metadata-action@v5 with: images: | ${{ env.DOCKERHUB_SLUG }} ${{ env.GHCR_SLUG }} tags: | type=edge type=schedule type=ref,event=branch type=ref,event=pr type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} type=semver,pattern={{major}} type=sha - name: Build and push uses: docker/bake-action@v6 with: files: | ./docker-bake.hcl ${{ steps.meta.outputs.bake-file }} source: . targets: build-all push: ${{ github.event_name != 'pull_request' }} set: | *.cache-from=type=gha *.cache-to=type=gha,mode=max ================================================ FILE: .gitignore ================================================ pdf/ node_modules/ .DS_Store build/ ================================================ FILE: .vscode/settings.json ================================================ { "editor.formatOnSave": true, // https://biomejs.dev/reference/vscode/ "editor.codeActionsOnSave": { "source.fixAll.biome": "explicit", "source.organizeImports.biome": "explicit" } } ================================================ FILE: Dockerfile ================================================ # https://bun.sh/guides/ecosystem/docker # use the official Bun image # see all versions at https://hub.docker.com/r/oven/bun/tags FROM oven/bun:1-alpine as base WORKDIR /app # install dependencies into temp directory # this will cache them and speed up future builds FROM base AS install RUN mkdir -p /temp/dev COPY package.json bun.lock /temp/dev/ RUN cd /temp/dev && bun install --frozen-lockfile # install with --production (exclude devDependencies) RUN mkdir -p /temp/prod COPY package.json bun.lock /temp/prod/ RUN cd /temp/prod && bun install --frozen-lockfile --production # copy node_modules from temp directory # then copy all (non-ignored) project files into the image FROM base AS prerelease COPY --from=install /temp/dev/node_modules node_modules COPY . . # [optional] tests & build ENV NODE_ENV=production # RUN bun test # RUN bun run build # copy production dependencies and source code into final image FROM base AS release COPY --from=install /temp/prod/node_modules node_modules COPY --from=prerelease /app/index.ts . COPY --from=prerelease /app/print.css . COPY --from=prerelease /app/package.json . # install prince VOLUME /app ARG TARGETARCH # https://www.princexml.com/latest/ # https://www.princexml.com/download/prince-14.2-alpine3.13-x86_64.tar.gz -o prince.tar.gz ARG PRINCE_VER=15.1 ARG DISTRO=linux-generic RUN echo "Building for $TARGETARCH" RUN apk add --no-cache curl RUN prince_arch=$([ "$TARGETARCH" == "arm64" ] && echo "aarch64-musl" || echo "x86_64") \ && curl https://www.princexml.com/download/prince-${PRINCE_VER}-${DISTRO}-${prince_arch}.tar.gz -o prince.tar.gz \ && mkdir prince \ && tar -zxvf prince.tar.gz -C prince --strip-components=1 \ && rm prince.tar.gz \ && cd prince \ && yes "" | ./install.sh RUN apk add --no-cache \ terminus-font \ ttf-inconsolata \ ttf-dejavu \ font-croscore \ font-noto \ font-noto-extra \ --repository=https://dl-cdn.alpinelinux.org/alpine/edge/community/ # Install fonts RUN apk add --no-cache msttcorefonts-installer fontconfig && \ update-ms-fonts && \ fc-cache -f && rm -rf /var/cache/* # run the app USER bun EXPOSE 8080/tcp ENTRYPOINT [ "bun", "run", "index.ts" ] ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2023 OpenBayes Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Docusaurus Prince PDF Generator Extract rendered data from Docusaurus/Fumadocs/etc and generate PDF, the hard way ## Demo/Examples Prince PDF for Docusaurus Documentation You can download it in [GitHub Actions](https://github.com/signcl/docusaurus-prince-pdf/actions/workflows/test.yml) artifacts section to see the result. This project is using the method 1 (see below) for generating PDF. You must have [Prince](https://www.princexml.com/) installed on your local machine. ## Usage Install [Prince](https://www.princexml.com/download/) first. Run the following commands to generate PDF: ```bash # Genrate PDF from specific site under `docs` scope npx docusaurus-prince-pdf -u https://docusaurus.io/docs # Change generating scope to `/docs/cli/` npx docusaurus-prince-pdf -u https://docusaurus.io/docs/cli # Custom working (output) directory npx docusaurus-prince-pdf -u https://openbayes.com/docs --dest ./pdf-output # Custom output file name npx docusaurus-prince-pdf -u https://openbayes.com/docs --output docs.pdf ``` To generate PDF from a local Docusaurus instance. You need to first build the site locally: ```bash # Build the site (npm|bun|yarn|pnpm) build # Serve built site locally (npm|bun|yarn|pnpm) serve # Generate PDF from local Docusaurus instance npx docusaurus-prince-pdf -u http://localhost:4000/docs # Change port to your serving port ``` ## Docker - [Docker Hub](https://hub.docker.com/r/openbayes/docusaurus-prince-pdf) - [ghcr.io](https://github.com/orgs/signcl/packages/container/package/docusaurus-prince-pdf) You can run this program with Docker image: ```bash docker run --rm -it --init \ -v $(pwd)/pdf:/app/pdf \ openbayes/docusaurus-prince-pdf \ -u https://docusaurus.io/docs/ ``` If you need Asiatic languages support like Chinese and Japanese. You can mount your custom fonts directory to Docker image: ```bash docker run --rm -it --init \ -v $(pwd)/pdf:/app/pdf \ -v $(pwd)/fonts:/root/.fonts \ openbayes/docusaurus-prince-pdf \ -u https://docusaurus.io/docs/ ``` ## GitHub Actions You can also run this program inside GitHub Actions: ```yaml jobs: build: # prerequisites... - name: Install Prince run: | curl https://www.princexml.com/download/prince-14.2-linux-generic-x86_64.tar.gz -O tar zxf prince-14.2-linux-generic-x86_64.tar.gz cd prince-14.2-linux-generic-x86_64 yes "" | sudo ./install.sh - name: Build PDF run: npx docusaurus-prince-pdf -u https://docusaurus.io/docs/ - name: Upload results uses: actions/upload-artifact@v3 with: name: result # The output filename can be specified with --output option path: pdf/docusaurus.io-docs.pdf if-no-files-found: error # ...other steps ``` You can also run `prince` with prebuilt Prince Docker image: ```yaml jobs: build: # prerequisites... - name: Build PDF run: docker run --rm -it -v $(pwd)/pdf:/app/pdf openbayes/docusaurus-prince-pdf -u https://docusaurus.io/docs/ # ...other steps ``` ## Development You need to have [Bun](https://bun.sh/) installed first. This can also let you run latest code on your local machine. ```bash bun run index.ts -u http://localhost:4000/docs ``` ## Options - `--url` (`-u`): Base URL, should be the `baseUrl` of the Docusaurus instance (e.g. https://docusaurus.io/docs/) - `--selector` (`-s`): CSS selector to find the link of the next page - `--dest` (`-d`): Working directory. Default to `./pdf` - `--file` (`-f`): Change default list output filename - `--output` (`-o`): Change PDF output filename - `--include-index`: Include passed URL in generated PDF - `--prepend`: Prepend additional pages, split with comma - `--append`: Append additional pages, split with comma - `--prince-args`: Additional options for Prince. ie. `--prince-args="--page-size='210mm 297mm'"` or `--prince-args "\\-\\-page\\-size='210mm 297mm'"` - `--prince-docker`: Use external Prince docker image to generate PDF. See https://github.com/sparanoid/docker-prince for more info - `--list-only`: Fetch list without generating PDF - `--pdf-only`: Generate PDF without fetching list. Ensure list exists - `--cookie`: Specify the cookie with the domain part, e.g. `--cookie="token=123456; domain=example.com;"` ## How it works Like [mr-pdf](https://github.com/kohheepeace/mr-pdf), this package looks for the next pagination links on generated Docusaurus site. Collect them in a list and then pass the list to Prince to generate the PDF. You can specify the CSS selector if you're using custom Docusaurus theme: ```bash npx docusaurus-prince-pdf -u https://openbayes.com/ --selector 'nav.custom-pagination-item--next > a' ``` ## Does it work with Fumadocs or other static site/docs generators? It should work with any static site/docs generators that have consistent pagination links. For Fumadocs, you can use the same method to generate the PDF using custom selector like this: ```bash npx docusaurus-prince-pdf -u https://fumadocs.vercel.app/docs/ui --selector '#nd-page > article > div.grid.grid-cols-2.gap-4.pb-6 a:last-child' ``` The Tailwind-styled selector is not elegant but it works. ## Why this package? I made a comparison list for the two methods of generating PDF from Docusaurus. ### Method 1: Prince The good: - Best font subsetting support - Text can be selected and copy/paste correctly - Fancy Table of Contents The bad: - Watermark on first page of generated PDF make it hard to handle in CI/CD environments - Doesn't work with some CSS syntax (e.g. `mask-image`) - Doesn't work with some HTML features (e.g. `srcset`) - Commercial license is expensive ([$3,800](https://www.princexml.com/purchase/)) The ugly: - None ### Method 2: [mr-pdf](https://github.com/kohheepeace/mr-pdf) (not used in this project) The good: - Free and open-source - Works with Docusaurus sites - CI/CD friendly - Based on Puppeteer make it works for most modern CSS syntax (e.g. `mask-image`) The bad: - Doesn't work well with system Dark Mode. You will get a dark background in generated PDF when you have `respectPrefersColorScheme` enabled in your Docusaurus instance. But it's not an issue in Ci/CD environments - No Table of Contents The ugly: - Based on Puppeteer make the text cannot be copied or searched correctly - Link anchors (links start with `#`) not well handled Usage: ```bash npx mr-pdf --initialDocURLs="https://openbayes.com/docs/" --paginationSelector=".pagination-nav__item--next > a" --contentSelector="article" ``` ## License MIT ================================================ FILE: biome.jsonc ================================================ { "$schema": "https://biomejs.dev/schemas/2.2.0/schema.json", "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true }, "files": { "ignoreUnknown": true, "includes": ["**", "!node_modules", "!dist", "!build", "!output", "!out", "!references"] }, "formatter": { "enabled": true, "indentStyle": "space", "indentWidth": 2, "lineWidth": 120 }, "linter": { "enabled": true, "rules": { "recommended": true, "complexity": { "noImportantStyles": "off" } } }, "javascript": { "formatter": { "quoteStyle": "single", "jsxQuoteStyle": "single", "semicolons": "asNeeded", "trailingCommas": "es5", "arrowParentheses": "asNeeded" } } } ================================================ FILE: build.sh ================================================ #!/bin/bash # Build the project using bun bun build --compile --minify --sourcemap --target=bun-linux-x64 ./index.ts --outfile build/dpdf-linux-x64 bun build --compile --minify --sourcemap --target=bun-linux-arm64 ./index.ts --outfile build/dpdf-linux-arm64 bun build --compile --minify --sourcemap --target=bun-windows-x64 ./index.ts --outfile build/dpdf-windows-x64 bun build --compile --minify --sourcemap --target=bun-darwin-arm64 ./index.ts --outfile build/dpdf-darwin-arm64 bun build --compile --minify --sourcemap --target=bun-darwin-x64 ./index.ts --outfile build/dpdf-darwin-x64 ================================================ FILE: docker-bake.hcl ================================================ variable "DEFAULT_TAG" { default = ["openbayes/docusaurus-prince-pdf:local"] } # Special target: https://github.com/docker/metadata-action#bake-definition target "docker-metadata-action" {} # Default target if none specified group "default" { targets = ["build-local"] } target "build" { inherits = ["docker-metadata-action"] } target "build-local" { inherits = ["build"] tags = "${DEFAULT_TAG}" output = ["type=docker"] } target "build-all" { inherits = ["build"] platforms = [ "linux/amd64", "linux/arm64", ] } ================================================ FILE: index.ts ================================================ import { exec } from 'node:child_process' import { parseArgs } from 'node:util' import jsdom from 'jsdom' // const browser = new Browser() const { JSDOM } = jsdom const buffer = new Set() const baseDir = import.meta.dir const { values } = parseArgs({ args: Bun.argv, options: { /** Base URL, should be the `baseUrl` of the Docusaurus instance (e.g. https://docusaurus.io/docs/) */ url: { type: 'string', short: 'u', }, /** CSS selector to find the link of the next page */ selector: { type: 'string', short: 's', }, /** Working directory. Default to `./pdf` */ dest: { type: 'string', short: 'd', default: './pdf', }, /** Change default list output filename */ file: { type: 'string', short: 'f', }, /** Change PDF output filename */ output: { type: 'string', short: 'o', }, /** Include passed URL in generated PDF */ 'include-index': { type: 'boolean', }, /** Prepend additional pages, split with comma */ prepend: { type: 'string', }, /** Append additional pages, split with comma */ append: { type: 'string', }, /** Additional options for Prince. ie. `--prince-args="--page-size='210mm 297mm'"` or `--prince-args "\\-\\-page\\-size='210mm 297mm'"` */ 'prince-args': { type: 'string', }, /** Use external Prince docker image to generate PDF. See https://github.com/sparanoid/docker-prince for more info */ 'prince-docker': { type: 'boolean', }, /** Fetch list without generating PDF */ 'list-only': { type: 'boolean', }, /** Generate PDF without fetching list. Ensure list exists */ 'pdf-only': { type: 'boolean', }, /** Specify the cookie with the domain part, e.g. `--cookie="token=123456; domain=example.com;"` */ cookie: { type: 'string', }, }, strict: true, allowPositionals: true, }) const url = values.url ? values.url.replace(/\/$/, '') : 'https://dev.openbayes.com/docs' const parsedUrl = new URL(url) const baseUrl = parsedUrl.origin const scope = parsedUrl.pathname const scopeName = scope !== '/' ? `-${scope.replace(/\/$/g, '').replace(/^\//g, '').replace(/\//g, '-')}` : '' const dest = values.dest const listFile = values.file || `${dest}/${parsedUrl.hostname}${scopeName}.txt` const pdfFile = values.output || `${dest}/${parsedUrl.hostname}${scopeName}.pdf` const fetchOptions = {} function execute(cmd: string): Promise<{ stdout: string stderr: string }> { const s = (b: string) => String(b).trim() return new Promise((resolve, reject) => { exec(cmd, (error, stdout, stderr) => { if (error) return reject(error) resolve({ stdout: s(stdout), stderr: s(stderr) }) }) }) } async function generatePdf(list: string, filename: string, cookie?: string) { console.log(`Generating PDF ${filename}`) const args = values['prince-args'] || '' const cookieArg = cookie ? `--cookie "${cookie}"` : '' const princeCmd = values['prince-args'] ? `docker run --rm -i -v ${baseDir}/:/config sparanoid/prince --no-warn-css --style=/config/print.css ${cookieArg} --input-list=/config/${list} -o /config/${filename} ${args}` : `prince --no-warn-css --style=${baseDir}/print.css ${cookieArg} --input-list=${list} -o ${filename} ${args}` console.log(`Executing command: ${princeCmd}`) // TODO: https://github.com/oven-sh/bun/issues/9747 // await $`${princeCmd}` await execute(princeCmd) .then(resp => { console.log(resp.stdout) console.log(`Done`) }) .catch(err => { throw new Error(err) }) } async function requestPage(url: string) { try { const resp = await fetch(url, fetchOptions) const body = await resp.text() const dom = new JSDOM(body).window const nextLinkEl = dom.document.body.querySelector(values.selector || '.pagination-nav__link--next') // TODO: jsdom does not have bultin DOM types. const nextLink = nextLinkEl && 'href' in nextLinkEl && `${baseUrl}${nextLinkEl.href}` const cycle = buffer.has(nextLink) if (!cycle && nextLink) { const nextLink = `${baseUrl}${nextLinkEl.href}` console.log(`Got link: ${nextLink}`) buffer.add(nextLink) requestPage(nextLink) } else { if (cycle) { console.log(`Pagination cycle detected on ${url}`) } else { console.log('No next link found!') } if (values.append) { values.append.split(',').forEach(async item => { const url = item.match(/^https?:\/\//) ? item : `${baseUrl}${scope}${item}` buffer.add(url) console.log(`Got link: ${url} [append]`) }) } if (buffer.size > 0) { console.log(`Writing buffer (${buffer.size} links) to ${listFile}`) await Bun.write(listFile, [...buffer].join('\n')) if (!values['list-only']) { generatePdf(listFile, pdfFile, values.cookie) } } else { console.log('No buffer to write!') } } } catch (err) { console.error('Error fetching page:', err) } } if (values['pdf-only']) { generatePdf(listFile, pdfFile, values.cookie) } else { if (values.prepend) { values.prepend.split(',').forEach(item => { const url = item.match(/^https?:\/\//) ? item : `${baseUrl}${scope}${item}` buffer.add(url) console.log(`Got link: ${url} [prepend]`) }) } if (values['include-index']) { console.log(`Got link: ${baseUrl}${scope} [index]`) buffer.add(`${baseUrl}${scope}`) } requestPage(`${baseUrl}${scope}`) } ================================================ FILE: package.json ================================================ { "name": "docusaurus-prince-pdf", "version": "1.2.2", "description": "Extract rendered data from Docusaurus and generate PDF, the hard way", "main": "index.js", "bin": "index.js", "type": "module", "repository": "https://github.com/signcl/docusaurus-prince-pdf", "author": "OpenBayes", "license": "MIT", "scripts": { "test": "bun run index.ts -u https://docusaurus.io/docs/", "build": "bash run build.sh", "release": "bunx release-it", "lint": "biome check", "format": "biome format --write" }, "publishConfig": { "registry": "https://registry.npmjs.org/" }, "release-it": { "github": { "release": true } }, "dependencies": { "jsdom": "^26.1.0" }, "packageManager": "bun@1.1.7", "devDependencies": { "@biomejs/biome": "^2.2.0", "@types/bun": "latest", "@types/jsdom": "^21.1.7" }, "peerDependencies": { "typescript": "^5.9.2" } } ================================================ FILE: print.css ================================================ @media print { /* Fix pages cut off bug by Prince */ /* Ref https://www.princexml.com/forum/topic/4608 */ .row { display: block !important; } .markdown header h1 { /* biome-ignore lint/correctness/noUnknownFunction: vendor-specific */ string-set: doctitle content(); } @page { /* biome-ignore lint/correctness/noUnknownProperty: vendor-specific */ prince-shrink-to-fit: auto; } /* Elements should be removed in PDF */ .navbar, .pagination-nav, .theme-doc-breadcrumbs, a.hash-link, div[class*="docItemContainer"] article footer, aside[class*="docSidebarContainer"], a[class*="skipToContent"], div[class*="lastUpdated"], div[class*="tocCollapsible"], div[class*="tableOfContents"], .footer { display: none !important; } } ================================================ FILE: tsconfig.json ================================================ { "compilerOptions": { // Enable latest features "lib": ["ESNext"], "types": ["@types/bun"], "target": "ESNext", "module": "ESNext", "moduleDetection": "force", "jsx": "react-jsx", "allowJs": true, // Bundler mode "moduleResolution": "bundler", "allowImportingTsExtensions": true, "verbatimModuleSyntax": true, "noEmit": true, // Best practices "strict": true, "skipLibCheck": true, "noFallthroughCasesInSwitch": true, // Some stricter flags (disabled by default) "noUnusedLocals": false, "noUnusedParameters": false, "noPropertyAccessFromIndexSignature": false } }