Repository: alvarcarto/url-to-pdf-api
Branch: master
Commit: 2fa83fd9f886
Files: 32
Total size: 6.0 MB
Directory structure:
gitextract_altncbd2/
├── .eslintrc
├── .gitignore
├── .travis.yml
├── .vscode/
│ └── launch.json
├── CHANGELOG.md
├── LICENSE
├── Procfile
├── README.md
├── app.json
├── docs/
│ ├── heroku.xml
│ ├── local-examples.md
│ └── logo.sketch
├── package.json
├── src/
│ ├── app.js
│ ├── config.js
│ ├── core/
│ │ └── render-core.js
│ ├── http/
│ │ └── render-http.js
│ ├── index.js
│ ├── middleware/
│ │ ├── error-logger.js
│ │ ├── error-responder.js
│ │ └── require-https.js
│ ├── router.js
│ └── util/
│ ├── express.js
│ ├── logger.js
│ ├── require-envs.js
│ └── validation.js
└── test/
├── resources/
│ ├── large-linked.html
│ ├── large.html
│ ├── postmark-receipt.html
│ └── special-chars.html
├── test-all.js
└── util/
└── index.js
================================================
FILE CONTENTS
================================================
================================================
FILE: .eslintrc
================================================
{
"env": {
"browser": true,
"amd": true,
"node": true,
"es6": true
},
"extends": "airbnb-base",
"rules": {
"no-implicit-coercion": "error",
"no-process-env": "error",
"no-path-concat": "error",
"import/no-extraneous-dependencies": ["error", {"devDependencies": true}],
"no-use-before-define": ["error", { "functions": false }],
"no-underscore-dangle": "off",
"no-console": "off",
"comma-dangle": ["error", {
"arrays": "always-multiline",
"objects": "always-multiline",
"imports": "always-multiline",
"exports": "always-multiline",
"functions": "ignore"
}],
"function-paren-newline": "off"
}
}
================================================
FILE: .gitignore
================================================
.env
.DS_Store
.idea
# Logs
logs
*.log
npm-debug.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
# nyc test coverage
.nyc_output
# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# node-waf configuration
.lock-wscript
# Compiled binary addons (http://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules
jspm_packages
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
================================================
FILE: .travis.yml
================================================
language: node_js
node_js:
- "10"
env:
- ALLOW_HTTP=true
================================================
FILE: .vscode/launch.json
================================================
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceFolder}/src/index.js",
"env": {
"NODE_ENV": "development",
"PORT": "9000",
"ALLOW_HTTP": "true",
}
}
]
}
================================================
FILE: CHANGELOG.md
================================================
# CHANGELOG
* change the `:html` output to return `document.documentElement.innerHTML` instead of previously used `document.body.innerHTML`
## 1.0.0
* initial version
================================================
FILE: LICENSE
================================================
Copyright (c) 2017 Kimmo Brunfeldt
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: Procfile
================================================
web: NODE_ENV=production node src/index.js
================================================
FILE: README.md
================================================
[](https://heroku.com/deploy?template=https://github.com/alvarcarto/url-to-pdf-api)
[](https://travis-ci.org/alvarcarto/url-to-pdf-api)
# URL to PDF Microservice
> Web page PDF rendering done right. Microservice for rendering receipts, invoices, or any content. Packaged to an easy API.

**⚠️ WARNING ⚠️** *Don't serve this API publicly to the internet unless you are aware of the
risks. It allows API users to run any JavaScript code inside a Chrome session on the server.
It's fairly easy to expose the contents of files on the server. You have been warned!. See https://github.com/alvarcarto/url-to-pdf-api/issues/12 for background.*
**⭐️ Features:**
* Converts any URL or HTML content to a PDF file or an image (PNG/JPEG)
* Rendered with Headless Chrome, using [Puppeteer](https://github.com/GoogleChrome/puppeteer). The PDFs should match to the ones generated with a desktop Chrome.
* Sensible defaults but everything is configurable.
* Single-page app (SPA) support. Waits until all network requests are finished before rendering.
* Easy deployment to Heroku. We love Lambda but...Deploy to Heroku button.
* Renders lazy loaded elements. *(scrollPage option)*
* Supports optional `x-api-key` authentication. *(`API_TOKENS` env var)*
Usage is as simple as https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com. There's also a `POST /api/render` if you prefer to send options in the body.
**🔍 Why?**
This microservice is useful when you need to automatically produce PDF files
for whatever reason. The files could be receipts, weekly reports, invoices,
or any content.
PDFs can be generated in many ways, but one of them is to convert HTML+CSS
content to a PDF. This API does just that.
**🚀 Shortcuts:**
* [Examples](#examples)
* [API](#api)
* [I want to run this myself](#development)
## How it works

Local setup is identical except Express API is running on your machine
and requests are direct connections to it.
### Good to know
* **By default, page's `@media print` CSS rules are ignored**. We set Chrome to emulate `@media screen` to make the default PDFs look more like actual sites. To get results closer to desktop Chrome, add `&emulateScreenMedia=false` query parameter. See more at [Puppeteer API docs](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagepdfoptions).
* Chrome is launched with `--no-sandbox --disable-setuid-sandbox` flags to fix usage in Heroku. See [this issue](https://github.com/GoogleChrome/puppeteer/issues/290).
* Heavy pages may cause Chrome to crash if the server doesn't have enough RAM.
* Docker image for this can be found here: https://github.com/restorecommerce/pdf-rendering-srv
## Examples
**⚠️ Restrictions ⚠️:**
* For security reasons the urls have been restricted and HTML rendering is disabled. For full demo, run this app locally or deploy to Heroku.
* The demo Heroku app runs on a free dyno which sleep after idle. A request to sleeping dyno may take even 30 seconds.
**The most minimal example, render google.com**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com
**The most minimal example, render google.com as PNG image**
https://url-to-pdf-api.herokuapp.com/api/render?output=screenshot&url=http://google.com
**Use the default @media print instead of @media screen.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&emulateScreenMedia=false
**Use scrollPage=true which tries to reveal all lazy loaded elements. Not perfect but better than without.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://www.andreaverlicchi.eu/lazyload/demos/lazily_load_lazyLoad.html&scrollPage=true
**Render only the first page.**
https://url-to-pdf-api.herokuapp.com/api/render?url=https://en.wikipedia.org/wiki/Portable_Document_Format&pdf.pageRanges=1
**Render A5-sized PDF in landscape.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&pdf.format=A5&pdf.landscape=true
**Add 2cm margins to the PDF.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&pdf.margin.top=2cm&pdf.margin.right=2cm&pdf.margin.bottom=2cm&pdf.margin.left=2cm
**Wait for extra 1000ms before render.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&waitFor=1000
**Download the PDF with a given attachment name**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&attachmentName=google.pdf
**Wait for an element matching the selector `input` appears.**
https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com&waitFor=input
**Render HTML sent in JSON body**
*NOTE: Demo app has disabled html rendering for security reasons.*
```bash
curl -o html.pdf -XPOST -d'{"html": "
test"}' -H"content-type: application/json" http://localhost:9000/api/render
```
**Render HTML sent as text body**
*NOTE: Demo app has disabled html rendering for security reasons.*
```bash
curl -o html.pdf -XPOST -d@test/resources/large.html -H"content-type: text/html" http://localhost:9000/api/render
```
## API
To understand the API options, it's useful to know how [Puppeteer](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md)
is internally used by this API. The [render code](https://github.com/alvarcarto/url-to-pdf-api/blob/master/src/core/render-core.js)
is quite simple, check it out. Render flow:
1. **`page.setViewport(options)`** where options matches `viewport.*`.
2. *Possibly* **`page.emulateMedia('screen')`** if `emulateScreenMedia=true` is set.
3. Render url **or** html.
If `url` is defined, **`page.goto(url, options)`** is called and options match `goto.*`.
Otherwise **`page.setContent(html, options)`** is called where html is taken from request body, and options match `goto.*`.
4. *Possibly* **`page.waitFor(numOrStr)`** if e.g. `waitFor=1000` is set.
5. *Possibly* **Scroll the whole page** to the end before rendering if e.g. `scrollPage=true` is set.
Useful if you want to render a page which lazy loads elements.
6. Render the output
* If output is `pdf` rendering is done with **`page.pdf(options)`** where options matches `pdf.*`.
* Else if output is `screenshot` rendering is done with **`page.screenshot(options)`** where options matches `screenshot.*`.
### GET /api/render
All options are passed as query parameters.
Parameter names match [Puppeteer options](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md).
These options are exactly the same as its `POST` counterpart, but options are
expressed with the dot notation. E.g. `?pdf.scale=2` instead of `{ pdf: { scale: 2 }}`.
The only required parameter is `url`.
Parameter | Type | Default | Description
----------|------|---------|------------
url | string | - | URL to render as PDF. (required)
output | string | pdf | Specify the output format. Possible values: `pdf` , `screenshot` or `html`.
emulateScreenMedia | boolean | `true` | Emulates `@media screen` when rendering the PDF.
enableGPU | boolean | `false` | When set, enables chrome GPU. For windows user, this will always return false. See https://developers.google.com/web/updates/2017/04/headless-chrome
ignoreHttpsErrors | boolean | `false` | Ignores possible HTTPS errors when navigating to a page.
scrollPage | boolean | `false` | Scroll page down before rendering to trigger lazy loading elements.
waitFor | number or string | - | Number in ms to wait before render or selector element to wait before render.
attachmentName | string | - | When set, the `content-disposition` headers are set and browser will download the PDF instead of showing inline. The given string will be used as the name for the file.
viewport.width | number | `1600` | Viewport width.
viewport.height | number | `1200` | Viewport height.
viewport.deviceScaleFactor | number | `1` | Device scale factor (could be thought of as dpr).
viewport.isMobile | boolean | `false` | Whether the meta viewport tag is taken into account.
viewport.hasTouch | boolean | `false` | Specifies if viewport supports touch events.
viewport.isLandscape | boolean | `false` | Specifies if viewport is in landscape mode.
cookies[0][name] | string | - | Cookie name (required)
cookies[0][value] | string | - | Cookie value (required)
cookies[0][url] | string | - | Cookie url
cookies[0][domain] | string | - | Cookie domain
cookies[0][path] | string | - | Cookie path
cookies[0][expires] | number | - | Cookie expiry in unix time
cookies[0][httpOnly] | boolean | - | Cookie httpOnly
cookies[0][secure] | boolean | - | Cookie secure
cookies[0][sameSite] | string | - | `Strict` or `Lax`
goto.timeout | number | `30000` | Maximum navigation time in milliseconds, defaults to 30 seconds, pass 0 to disable timeout.
goto.waitUntil | string | `networkidle0` | When to consider navigation succeeded. Options: `load`, `domcontentloaded`, `networkidle0`, `networkidle2`. `load` - consider navigation to be finished when the load event is fired. `domcontentloaded` - consider navigation to be finished when the `DOMContentLoaded` event is fired. `networkidle0` - consider navigation to be finished when there are no more than 0 network connections for at least `500` ms. `networkidle2` - consider navigation to be finished when there are no more than 2 network connections for at least `500` ms.
pdf.scale | number | `1` | Scale of the webpage rendering.
pdf.printBackground | boolean | `false`| Print background graphics.
pdf.displayHeaderFooter | boolean | `false` | Display header and footer.
pdf.headerTemplate | string | - | HTML template to use as the header of each page in the PDF. **Currently Puppeteer basically only supports a single line of text and you must use pdf.margins+CSS to make the header appear!** See https://github.com/alvarcarto/url-to-pdf-api/issues/77.
pdf.footerTemplate | string | - | HTML template to use as the footer of each page in the PDF. **Currently Puppeteer basically only supports a single line of text and you must use pdf.margins+CSS to make the footer appear!** See https://github.com/alvarcarto/url-to-pdf-api/issues/77.
pdf.landscape | boolean | `false` | Paper orientation.
pdf.pageRanges | string | - | Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages.
pdf.format | string | `A4` | Paper format. If set, takes priority over width or height options.
pdf.width | string | - | Paper width, accepts values labeled with units.
pdf.height | string | - | Paper height, accepts values labeled with units.
pdf.fullPage | boolean | - | Create PDF in a single page
pdf.margin.top | string | - | Top margin, accepts values labeled with units.
pdf.margin.right | string | - | Right margin, accepts values labeled with units.
pdf.margin.bottom | string | - | Bottom margin, accepts values labeled with units.
pdf.margin.left | string | - | Left margin, accepts values labeled with units.
screenshot.fullPage | boolean | `true` | When true, takes a screenshot of the full scrollable page.
screenshot.type | string | `png` | Screenshot image type. Possible values: `png`, `jpeg`
screenshot.quality | number | - | The quality of the JPEG image, between 0-100. Only applies when `screenshot.type` is `jpeg`.
screenshot.omitBackground | boolean | `false` | Hides default white background and allows capturing screenshots with transparency.
screenshot.clip.x | number | - | Specifies x-coordinate of top-left corner of clipping region of the page.
screenshot.clip.y | number | - | Specifies y-coordinate of top-left corner of clipping region of the page.
screenshot.clip.width | number | - | Specifies width of clipping region of the page.
screenshot.clip.height | number | - | Specifies height of clipping region of the page.
screenshot.selector | string | - | Specifies css selector to clip the screenshot to.
**Example:**
```bash
curl -o google.pdf https://url-to-pdf-api.herokuapp.com/api/render?url=http://google.com
```
### POST /api/render - (JSON)
All options are passed in a JSON body object.
Parameter names match [Puppeteer options](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md).
These options are exactly the same as its `GET` counterpart.
**Body**
The only required parameter is `url`.
```js
{
// Url to render. Either url or html is required
url: "https://google.com",
// Either "pdf" or "screenshot"
output: "pdf",
// HTML content to render. Either url or html is required
html: "Your content",
// If we should emulate @media screen instead of print
emulateScreenMedia: true,
// If we should ignore HTTPS errors
ignoreHttpsErrors: false,
// If true, page is scrolled to the end before rendering
// Note: this makes rendering a bit slower
scrollPage: false,
// Passed to Puppeteer page.waitFor()
waitFor: null,
// Passsed to Puppeteer page.setCookies()
cookies: [{ ... }]
// Passed to Puppeteer page.setViewport()
viewport: { ... },
// Passed to Puppeteer page.goto() as the second argument after url
goto: { ... },
// Passed to Puppeteer page.pdf()
pdf: { ... },
// Passed to Puppeteer page.screenshot()
screenshot: { ... },
}
```
**Example:**
```bash
curl -o google.pdf -XPOST -d'{"url": "http://google.com"}' -H"content-type: application/json" http://localhost:9000/api/render
```
```bash
curl -o html.pdf -XPOST -d'{"html": "test"}' -H"content-type: application/json" http://localhost:9000/api/render
```
### POST /api/render - (HTML)
HTML to render is sent in body. All options are passed in query parameters.
Supports exactly the same query parameters as `GET /api/render`, except `url`
paremeter.
*Remember that relative links do not work.*
**Example:**
```bash
curl -o receipt.html https://rawgit.com/wildbit/postmark-templates/master/templates_inlined/receipt.html
curl -o html.pdf -XPOST -d@receipt.html -H"content-type: text/html" http://localhost:9000/api/render?pdf.scale=1
```
### GET /healthcheck
Health check endpoint used for monitoring if the service is still up and running.
```bash
curl -XGET http://localhost:9000/healthcheck
```
## Development
To get this thing running, you have two options: run it in Heroku, or locally.
The code requires Node 8+ (async, await).
#### 1. Heroku deployment
Scroll this readme up to the Deploy to Heroku -button. Click it and follow
instructions.
**WARNING:** *Heroku dynos have a very low amount of RAM. Rendering heavy pages
may cause Chrome instance to crash inside Heroku dyno. 512MB should be
enough for most real-life use cases such as receipts. Some news sites may need
even 2GB of RAM.*
#### 2. Local development
First, clone the repository and cd into it.
* `cp .env.sample .env`
* Fill in the blanks in `.env`
* `npm install`
* `npm start` Start express server locally
* Server runs at http://localhost:9000 or what `$PORT` env defines
### Techstack
* Node 8+ (async, await), written in ES7
* [Express.js](https://expressjs.com/) app with a nice internal architecture, based on [these conventions](https://github.com/kimmobrunfeldt/express-example).
* Hapi-style Joi validation with [express-validation](https://github.com/andrewkeig/express-validation)
* Heroku + [Puppeteer buildpack](https://github.com/jontewks/puppeteer-heroku-buildpack)
* [Puppeteer](https://github.com/GoogleChrome/puppeteer) to control Chrome
================================================
FILE: app.json
================================================
{
"name": "url-to-pdf-api",
"description": "Web page PDF rendering done right. Packaged to an easy API.",
"keywords": [
"pdf",
"html",
"html to pdf",
"html 2 pdf",
"render"
],
"website": "https://github.com/alvarcarto/url-to-pdf-api",
"repository": "https://github.com/alvarcarto/url-to-pdf-api",
"env": {
"ALLOW_HTTP": {
"description": "When set to \"true\", unsecure requests are allowed.",
"value": "false"
},
"API_TOKENS": {
"description": "Comma-separated list of accepted keys in x-api-key header.",
"required": false
}
},
"success_url": "/api/render?url=https://github.com/alvarcarto/url-to-pdf-api/blob/master/README.md",
"buildpacks": [
{
"url": "https://github.com/jontewks/puppeteer-heroku-buildpack"
},
{
"url": "http://github.com/heroku/heroku-buildpack-nodejs.git"
},
{
"url": "https://github.com/debitoor/heroku-buildpack-converter-fonts"
}
]
}
================================================
FILE: docs/heroku.xml
================================================
7VhJk+I2FP41HIeSLRb7CHSTSVVPhZo+JHMUtsBKC8uRZZb8+ujJ8obcDEx7Dqkac8D69LS9971FHuHV4fybJFnyRcSUj3wUn0f4aeT7Hp7O9R8glxIJJmEJ7CWLrVADvLJ/qQWRRQsW07wjqITgimVdMBJpSiPVwYiU4tQV2wneXTUje+oArxHhLvoni1ViTzFFDf6Zsn1Srewh27Ml0dteiiK16418vDNP2X0g1VxWPk9ILE4tCD+P8EoKocq3w3lFOei2Uls5bv1Ob71vSVN1z4BpOeBIeEGrHZt9qUulC3MaCvJohJenhCn6mpEIek/a+hpL1IHrlqdfcyXFG10JLqQZjScrhMKw7qm0iTWyY5xXkqlIKUAiVZYNHrbt1mQz82g8JnlitgRrZlSyA1VUwrZYutdwaGU2RGk8NYI+AmF7XioVPb+rM6+2hGY4FXpyedEidoA/t1qz5PZCa8xTQxWMLZa0aOJPLEgsPff13I2J9Iu10jsmdi1EY01e27SKjAp5rBXUsg89M/UXmHI8ta1vVU+qN9LqguY3a/RcEakW4FfNCgZbM9inkaFpXElEnOQ5i0rQisASf1OlLta8pFBCQ0KqROxFSviLENm7JHqCn0Mi7wHGlEoDTd22ulasKGRkpWwY00fd00oK9ZNDUk4UO3an/4ih545rPp8zSfNcg4vN7wP7aa2oHhW3/BTi2RR+D6v+Qy6HA9RxuTp+tlzO83tcbjaExyHHEqtE6nk0tiIpsbO1baHDegav7GASTVvzoAemM82Csz0EJgW0r9EXsqV8I3KmmIDerVBKHLQAh45lnV3a9ijzC16axRZ5ViZEIAGpGjt2BmYs7X6eEqUgky5AB/46ilNvzHQu3THNIDmO9Ir+OiaK6D/ANeXWXLupumQU3jEoLzI6+OT5wTiDoLtsCNjLMGSe7zLswUwwALkmXjeeYzxzyDXt4dZ0CG7hngQ84wryFzvq170yhyyhrbxGwE6uXA9U8GuEswp5IUUaJcBmS+t67kbGHfVHRtOqlPJXoC7ClCm2pAmH/xQ0V7mp3ABlKcuTO6f+SoGHpWp9tHla3xynQfd014q5clDNFnUrIlYsdIlJrN9yulM97nxgcQyL9Abgroe0KY5uUNzI2a37Q/B94nWD6Txwg+ls+pMI71acn6nWfAHr0625M6TiJ9gr0trRnBrMYp5rMV30Icdi3iAVZ3BVcU5ci6G+inMAi9V10a+C8w4OrM3zQMH5gUrSDt0Ilqq2e6OxN9dZDc9D7CHcTW6hP8aTSaDvMMHMR7Nw3p2+LH7tjO174/UiOBw3swThfNYlaHWPrOYtK2hnXsO9+uj3BZBZT8b8v5df6LvlV0yPLKL5pwkobwqXwPUXEm2FeOutwND1Jbn/NuwQv3x+5Lre4wt1BrvKmUPkMHR1IXDvA0FPPAyGiIfI4duveDhcPOxewL2eG/j8g2Hz7lLFvYHXpcpXUShToRoXRi+CxObrHydpBPhwd/Pa2rf8sf2h7V6DOIXRIIUK7uYB3PdpDLuOiR93TN1svpOWiaT5GI2f/wM=
================================================
FILE: docs/local-examples.md
================================================
# Local examples
curl -o html.pdf -XPOST -d@test/resources/large-linked.html -H"content-type: text/html" https://url-to-pdf-api.herokuapp.com/api/render
================================================
FILE: package.json
================================================
{
"name": "url-to-pdf-api",
"version": "1.0.0",
"description": "Web page PDF rendering done right. Packaged to an easy API.",
"main": "src/index.js",
"engines": {
"node": "10.x.x"
},
"scripts": {
"start": "env-cmd nodemon --watch ./src -e js src/index.js",
"test": "mocha --timeout 10000 && npm run lint",
"lint": "eslint ."
},
"repository": {
"type": "git",
"url": "git+https://github.com/alvarcarto/url-to-pdf-api.git"
},
"author": "Kimmo Brunfeldt",
"license": "MIT",
"bugs": {
"url": "https://github.com/alvarcarto/url-to-pdf-api/issues"
},
"homepage": "https://github.com/alvarcarto/url-to-pdf-api#readme",
"dependencies": {
"bluebird": "^3.5.0",
"body-parser": "^1.18.2",
"compression": "^1.7.1",
"cors": "^2.8.4",
"express": "^4.15.5",
"express-validation": "^1.0.2",
"joi": "^11.1.1",
"lodash": "^4.17.15",
"morgan": "^1.9.1",
"normalize-url": "^5.0.0",
"pdf-parse": "^1.1.1",
"puppeteer": "^2.0.0",
"server-destroy": "^1.0.1",
"winston": "^2.3.1"
},
"devDependencies": {
"chai": "^4.1.2",
"env-cmd": "^9.0.1",
"eslint": "^4.8.0",
"eslint-config-airbnb-base": "^12.0.2",
"eslint-plugin-import": "^2.7.0",
"mocha": "^4.0.1",
"nodemon": "^1.12.1",
"supertest": "^3.0.0"
}
}
================================================
FILE: src/app.js
================================================
const express = require('express');
const morgan = require('morgan');
const bodyParser = require('body-parser');
const compression = require('compression');
const cors = require('cors');
const logger = require('./util/logger')(__filename);
const errorResponder = require('./middleware/error-responder');
const errorLogger = require('./middleware/error-logger');
const requireHttps = require('./middleware/require-https');
const createRouter = require('./router');
const config = require('./config');
function createApp() {
const app = express();
// App is served behind Heroku's router.
// This is needed to be able to use req.ip or req.secure
app.enable('trust proxy', 1);
app.disable('x-powered-by');
if (config.NODE_ENV !== 'production') {
app.use(morgan('dev'));
}
if (!config.ALLOW_HTTP) {
logger.info('All requests require HTTPS.');
app.use(requireHttps());
} else {
logger.info('ALLOW_HTTP=true, unsafe requests are allowed. Don\'t use this in production.');
}
if (config.ALLOW_URLS) {
logger.info(`ALLOW_URLS set! Allowed urls patterns are: ${config.ALLOW_URLS.join(' ')}`);
}
if (config.DISABLE_HTML_INPUT) {
logger.info('DISABLE_HTML_INPUT=true! Input HTML is disabled!');
}
const corsOpts = {
origin: config.CORS_ORIGIN,
methods: ['GET', 'POST', 'PUT', 'DELETE', 'OPTIONS', 'HEAD', 'PATCH'],
};
logger.info('Using CORS options:', corsOpts);
app.use(cors(corsOpts));
// Limit to 10mb if HTML has e.g. inline images
app.use(bodyParser.text({ limit: '10mb', type: 'text/html' }));
app.use(bodyParser.json({ limit: '10mb' }));
app.use(compression({
// Compress everything over 10 bytes
threshold: 10,
}));
// Initialize routes
const router = createRouter();
app.use('/', router);
app.use(errorLogger());
app.use(errorResponder());
return app;
}
module.exports = createApp;
================================================
FILE: src/config.js
================================================
/* eslint-disable no-process-env */
// Env vars should be casted to correct types
const config = {
PORT: Number(process.env.PORT) || 9000,
NODE_ENV: process.env.NODE_ENV,
LOG_LEVEL: process.env.LOG_LEVEL,
ALLOW_HTTP: process.env.ALLOW_HTTP === 'true',
DEBUG_MODE: process.env.DEBUG_MODE === 'true',
DISABLE_HTML_INPUT: process.env.DISABLE_HTML_INPUT === 'true',
CORS_ORIGIN: process.env.CORS_ORIGIN || '*',
BROWSER_WS_ENDPOINT: process.env.BROWSER_WS_ENDPOINT,
BROWSER_EXECUTABLE_PATH: process.env.BROWSER_EXECUTABLE_PATH,
API_TOKENS: [],
ALLOW_URLS: [],
};
if (process.env.API_TOKENS) {
config.API_TOKENS = process.env.API_TOKENS.split(',');
}
if (process.env.ALLOW_URLS) {
config.ALLOW_URLS = process.env.ALLOW_URLS.split(',');
}
module.exports = config;
================================================
FILE: src/core/render-core.js
================================================
const puppeteer = require('puppeteer');
const _ = require('lodash');
const config = require('../config');
const logger = require('../util/logger')(__filename);
async function createBrowser(opts) {
const browserOpts = {
ignoreHTTPSErrors: opts.ignoreHttpsErrors,
sloMo: config.DEBUG_MODE ? 250 : undefined,
};
if (config.BROWSER_WS_ENDPOINT) {
browserOpts.browserWSEndpoint = config.BROWSER_WS_ENDPOINT;
return puppeteer.connect(browserOpts);
}
if (config.BROWSER_EXECUTABLE_PATH) {
browserOpts.executablePath = config.BROWSER_EXECUTABLE_PATH;
}
browserOpts.headless = !config.DEBUG_MODE;
browserOpts.args = ['--no-sandbox', '--disable-setuid-sandbox'];
if (!opts.enableGPU || navigator.userAgent.indexOf('Win') !== -1) {
browserOpts.args.push('--disable-gpu');
}
return puppeteer.launch(browserOpts);
}
async function getFullPageHeight(page) {
const height = await page.evaluate(() => {
const { body, documentElement } = document;
return Math.max(
body.scrollHeight,
body.offsetHeight,
documentElement.clientHeight,
documentElement.scrollHeight,
documentElement.offsetHeight
);
});
return height;
}
async function render(_opts = {}) {
const opts = _.merge({
cookies: [],
scrollPage: false,
emulateScreenMedia: true,
ignoreHttpsErrors: false,
html: null,
viewport: {
width: 1600,
height: 1200,
},
goto: {
waitUntil: 'networkidle0',
},
output: 'pdf',
pdf: {
format: 'A4',
printBackground: true,
},
screenshot: {
type: 'png',
fullPage: true,
},
failEarly: false,
}, _opts);
if ((_.get(_opts, 'pdf.width') && _.get(_opts, 'pdf.height')) || _.get(opts, 'pdf.fullPage')) {
// pdf.format always overrides width and height, so we must delete it
// when user explicitly wants to set width and height
opts.pdf.format = undefined;
}
logOpts(opts);
const browser = await createBrowser(opts);
const page = await browser.newPage();
page.on('console', (...args) => logger.info('PAGE LOG:', ...args));
page.on('error', (err) => {
logger.error(`Error event emitted: ${err}`);
logger.error(err.stack);
browser.close();
});
this.failedResponses = [];
page.on('requestfailed', (request) => {
this.failedResponses.push(request);
if (request.url === opts.url) {
this.mainUrlResponse = request;
}
});
page.on('response', (response) => {
if (response.status >= 400) {
this.failedResponses.push(response);
}
if (response.url === opts.url) {
this.mainUrlResponse = response;
}
});
let data;
try {
logger.info('Set browser viewport..');
await page.setViewport(opts.viewport);
if (opts.emulateScreenMedia) {
logger.info('Emulate @media screen..');
await page.emulateMedia('screen');
}
if (opts.cookies && opts.cookies.length > 0) {
logger.info('Setting cookies..');
const client = await page.target().createCDPSession();
await client.send('Network.enable');
await client.send('Network.setCookies', { cookies: opts.cookies });
}
if (_.isString(opts.html)) {
logger.info('Set HTML ..');
await page.setContent(opts.html, opts.goto);
} else {
logger.info(`Goto url ${opts.url} ..`);
await page.goto(opts.url, opts.goto);
}
if (_.isNumber(opts.waitFor) || _.isString(opts.waitFor)) {
logger.info(`Wait for ${opts.waitFor} ..`);
await page.waitFor(opts.waitFor);
}
if (opts.scrollPage) {
logger.info('Scroll page ..');
await scrollPage(page);
}
if (this.failedResponses.length) {
logger.warn(`Number of failed requests: ${this.failedResponses.length}`);
this.failedResponses.forEach((response) => {
logger.warn(`${response.status} ${response.url}`);
});
if (opts.failEarly === 'all') {
const err = new Error(`${this.failedResponses.length} requests have failed. See server log for more details.`);
err.status = 412;
throw err;
}
}
if (opts.failEarly === 'page' && this.mainUrlResponse.status !== 200) {
const msg = `Request for ${opts.url} did not directly succeed and returned status ${this.mainUrlResponse.status}`;
const err = new Error(msg);
err.status = 412;
throw err;
}
logger.info('Rendering ..');
if (config.DEBUG_MODE) {
const msg = `\n\n---------------------------------\n
Chrome does not support rendering in "headed" mode.
See this issue: https://github.com/GoogleChrome/puppeteer/issues/576
\n---------------------------------\n\n
`;
throw new Error(msg);
}
if (opts.output === 'pdf') {
if (opts.pdf.fullPage) {
const height = await getFullPageHeight(page);
opts.pdf.height = height;
}
data = await page.pdf(opts.pdf);
} else if (opts.output === 'html') {
data = await page.evaluate(() => document.documentElement.innerHTML);
} else {
// This is done because puppeteer throws an error if fullPage and clip is used at the same
// time even though clip is just empty object {}
const screenshotOpts = _.cloneDeep(_.omit(opts.screenshot, ['clip']));
const clipContainsSomething = _.some(opts.screenshot.clip, val => !_.isUndefined(val));
if (clipContainsSomething) {
screenshotOpts.clip = opts.screenshot.clip;
}
if (_.isNil(opts.screenshot.selector)) {
data = await page.screenshot(screenshotOpts);
} else {
const selElement = await page.$(opts.screenshot.selector);
const selectorScreenOpts = _.cloneDeep(_.omit(screenshotOpts, ['selector', 'fullPage']));
if (!_.isNull(selElement)) {
data = await selElement.screenshot(selectorScreenOpts);
}
}
}
} catch (err) {
logger.error(`Error when rendering page: ${err}`);
logger.error(err.stack);
throw err;
} finally {
logger.info('Closing browser..');
if (!config.DEBUG_MODE) {
await browser.close();
}
}
return data;
}
async function scrollPage(page) {
// Scroll to page end to trigger lazy loading elements
await page.evaluate(() => {
const scrollInterval = 100;
const scrollStep = Math.floor(window.innerHeight / 2);
const bottomThreshold = 400;
function bottomPos() {
return window.pageYOffset + window.innerHeight;
}
return new Promise((resolve, reject) => {
function scrollDown() {
window.scrollBy(0, scrollStep);
if (document.body.scrollHeight - bottomPos() < bottomThreshold) {
window.scrollTo(0, 0);
setTimeout(resolve, 500);
return;
}
setTimeout(scrollDown, scrollInterval);
}
setTimeout(reject, 30000);
scrollDown();
});
});
}
function logOpts(opts) {
const supressedOpts = _.cloneDeep(opts);
if (opts.html) {
supressedOpts.html = '...';
}
logger.info(`Rendering with opts: ${JSON.stringify(supressedOpts, null, 2)}`);
}
module.exports = {
render,
};
================================================
FILE: src/http/render-http.js
================================================
const { URL } = require('url');
const _ = require('lodash');
const normalizeUrl = require('normalize-url');
const ex = require('../util/express');
const renderCore = require('../core/render-core');
const logger = require('../util/logger')(__filename);
const config = require('../config');
function getMimeType(opts) {
if (opts.output === 'pdf') {
return 'application/pdf';
} else if (opts.output === 'html') {
return 'text/html';
}
const type = _.get(opts, 'screenshot.type');
switch (type) {
case 'png': return 'image/png';
case 'jpeg': return 'image/jpeg';
default: throw new Error(`Unknown screenshot type: ${type}`);
}
}
const getRender = ex.createRoute((req, res) => {
const opts = getOptsFromQuery(req.query);
assertOptionsAllowed(opts);
return renderCore.render(opts)
.then((data) => {
if (opts.attachmentName) {
res.attachment(opts.attachmentName);
}
res.set('content-type', getMimeType(opts));
res.send(data);
});
});
const postRender = ex.createRoute((req, res) => {
const isBodyJson = req.headers['content-type'].includes('application/json');
if (isBodyJson) {
const hasContent = _.isString(_.get(req.body, 'url')) || _.isString(_.get(req.body, 'html'));
if (!hasContent) {
ex.throwStatus(400, 'Body must contain url or html');
}
} else if (_.isString(req.query.url)) {
ex.throwStatus(400, 'url query parameter is not allowed when body is HTML');
}
let opts;
if (isBodyJson) {
opts = _.merge({
output: 'pdf',
screenshot: {
type: 'png',
},
}, req.body);
} else {
opts = getOptsFromQuery(req.query);
opts.html = req.body;
}
assertOptionsAllowed(opts);
return renderCore.render(opts)
.then((data) => {
if (opts.attachmentName) {
res.attachment(opts.attachmentName);
}
res.set('content-type', getMimeType(opts));
res.send(data);
});
});
function isHostMatch(host1, host2) {
return {
match: host1.toLowerCase() === host2.toLowerCase(),
type: 'host',
part1: host1.toLowerCase(),
part2: host2.toLowerCase(),
};
}
function isRegexMatch(urlPattern, inputUrl) {
const re = new RegExp(`${urlPattern}`);
return {
match: re.test(inputUrl),
type: 'regex',
part1: inputUrl,
part2: urlPattern,
};
}
function isNormalizedMatch(url1, url2) {
return {
match: normalizeUrl(url1) === normalizeUrl(url2),
type: 'normalized url',
part1: url1,
part2: url2,
};
}
function isUrlAllowed(inputUrl) {
const urlParts = new URL(inputUrl);
const matchInfos = _.map(config.ALLOW_URLS, (urlPattern) => {
if (_.startsWith(urlPattern, 'host:')) {
return isHostMatch(urlPattern.split(':')[1], urlParts.host);
} else if (_.startsWith(urlPattern, 'regex:')) {
return isRegexMatch(urlPattern.split(':')[1], inputUrl);
}
return isNormalizedMatch(urlPattern, inputUrl);
});
const isAllowed = _.some(matchInfos, info => info.match);
if (!isAllowed) {
logger.info('The url was not allowed because:');
_.forEach(matchInfos, (info) => {
logger.info(`${info.part1} !== ${info.part2} (with ${info.type} matching)`);
});
}
return isAllowed;
}
function assertOptionsAllowed(opts) {
const isDisallowedHtmlInput = !_.isString(opts.url) && config.DISABLE_HTML_INPUT;
if (isDisallowedHtmlInput) {
ex.throwStatus(403, 'Rendering HTML input is disabled.');
}
if (_.isString(opts.url) && config.ALLOW_URLS.length > 0 && !isUrlAllowed(opts.url)) {
ex.throwStatus(403, 'Url not allowed.');
}
}
function getOptsFromQuery(query) {
const opts = {
url: query.url,
attachmentName: query.attachmentName,
scrollPage: query.scrollPage,
emulateScreenMedia: query.emulateScreenMedia,
enableGPU: query.enableGPU,
ignoreHttpsErrors: query.ignoreHttpsErrors,
waitFor: query.waitFor,
output: query.output || 'pdf',
viewport: {
width: query['viewport.width'],
height: query['viewport.height'],
deviceScaleFactor: query['viewport.deviceScaleFactor'],
isMobile: query['viewport.isMobile'],
hasTouch: query['viewport.hasTouch'],
isLandscape: query['viewport.isLandscape'],
},
goto: {
timeout: query['goto.timeout'],
waitUntil: query['goto.waitUntil'],
},
pdf: {
fullPage: query['pdf.fullPage'],
scale: query['pdf.scale'],
displayHeaderFooter: query['pdf.displayHeaderFooter'],
footerTemplate: query['pdf.footerTemplate'],
headerTemplate: query['pdf.headerTemplate'],
landscape: query['pdf.landscape'],
pageRanges: query['pdf.pageRanges'],
format: query['pdf.format'],
width: query['pdf.width'],
height: query['pdf.height'],
margin: {
top: query['pdf.margin.top'],
right: query['pdf.margin.right'],
bottom: query['pdf.margin.bottom'],
left: query['pdf.margin.left'],
},
printBackground: query['pdf.printBackground'],
},
screenshot: {
fullPage: query['screenshot.fullPage'],
quality: query['screenshot.quality'],
type: query['screenshot.type'] || 'png',
clip: {
x: query['screenshot.clip.x'],
y: query['screenshot.clip.y'],
width: query['screenshot.clip.width'],
height: query['screenshot.clip.height'],
},
selector: query['screenshot.selector'],
omitBackground: query['screenshot.omitBackground'],
},
};
return opts;
}
module.exports = {
getRender,
postRender,
};
================================================
FILE: src/index.js
================================================
const createApp = require('./app');
const enableDestroy = require('server-destroy');
const BPromise = require('bluebird');
const logger = require('./util/logger')(__filename);
const config = require('./config');
BPromise.config({
warnings: config.NODE_ENV !== 'production',
longStackTraces: true,
});
const app = createApp();
const server = app.listen(config.PORT, () => {
logger.info(
'Express server listening on http://localhost:%d/ in %s mode',
config.PORT,
app.get('env')
);
});
enableDestroy(server);
function closeServer(signal) {
logger.info(`${signal} received`);
logger.info('Closing http.Server ..');
server.destroy();
}
// Handle signals gracefully. Heroku will send SIGTERM before idle.
process.on('SIGTERM', closeServer.bind(this, 'SIGTERM'));
process.on('SIGINT', closeServer.bind(this, 'SIGINT(Ctrl-C)'));
server.on('close', () => {
logger.info('Server closed');
process.emit('cleanup');
logger.info('Giving 100ms time to cleanup..');
// Give a small time frame to clean up
setTimeout(process.exit, 100);
});
================================================
FILE: src/middleware/error-logger.js
================================================
const _ = require('lodash');
const logger = require('../util/logger')(__filename);
const SLICE_THRESHOLD = 1000;
function createErrorLogger(_opts) {
const opts = _.merge({
logRequest: status => status >= 400 && status !== 404 && status !== 503,
logStackTrace: status => status >= 500 && status !== 503,
}, _opts);
return function errorHandler(err, req, res, next) {
const status = err.status ? err.status : 500;
const logLevel = getLogLevel(status);
const log = logger[logLevel];
if (opts.logRequest(status)) {
logRequestDetails(logLevel, req, status);
}
if (opts.logStackTrace(status)) {
log(err, err.stack);
} else {
log(err.toString());
}
next(err);
};
}
function getLogLevel(status) {
return status >= 500 ? 'error' : 'warn';
}
function logRequestDetails(logLevel, req) {
logger[logLevel]('Request headers:', deepSupressLongStrings(req.headers));
logger[logLevel]('Request parameters:', deepSupressLongStrings(req.params));
logger[logLevel]('Request body:', req.body);
}
function deepSupressLongStrings(obj) {
const newObj = {};
_.each(obj, (val, key) => {
if (_.isString(val) && val.length > SLICE_THRESHOLD) {
newObj[key] = `${val.slice(0, SLICE_THRESHOLD)} ... [CONTENT SLICED]`;
} else if (_.isPlainObject(val)) {
deepSupressLongStrings(val);
} else {
newObj[key] = val;
}
});
return newObj;
}
module.exports = createErrorLogger;
================================================
FILE: src/middleware/error-responder.js
================================================
const http = require('http');
const _ = require('lodash');
// This responder is assuming that all <500 errors are safe to be responded
// with their .message attribute.
// DO NOT write sensitive data into error messages.
function createErrorResponder(_opts) {
const opts = _.merge({
isErrorSafeToRespond: status => status < 500,
}, _opts);
// 4 params needed for Express to know it's a error handler middleware
// eslint-disable-next-line
return function errorResponder(err, req, res, next) {
let message;
const status = err.status ? err.status : 500;
const httpMessage = http.STATUS_CODES[status];
if (opts.isErrorSafeToRespond(status)) {
// eslint-disable-next-line
message = err.message;
} else {
message = httpMessage;
}
const isPrettyValidationErr = _.has(err, 'errors');
const body = isPrettyValidationErr
? JSON.stringify(err)
: { status, statusText: httpMessage, messages: [message] };
res.status(status);
res.send(body);
};
}
module.exports = createErrorResponder;
================================================
FILE: src/middleware/require-https.js
================================================
const createRequireHttps = () => function RequireHttps(req, res, next) {
if (req.secure) {
// Allow requests only over https
return next();
}
const err = new Error('Only HTTPS allowed.');
err.status = 403;
next(err);
};
module.exports = createRequireHttps;
================================================
FILE: src/router.js
================================================
const _ = require('lodash');
const validate = require('express-validation');
const express = require('express');
const render = require('./http/render-http');
const config = require('./config');
const logger = require('./util/logger')(__filename);
const { renderQuerySchema, renderBodySchema, sharedQuerySchema } = require('./util/validation');
function createRouter() {
const router = express.Router();
if (!_.isEmpty(config.API_TOKENS)) {
logger.info('x-api-key authentication required');
router.use('/*', (req, res, next) => {
const userToken = req.headers['x-api-key'];
if (!_.includes(config.API_TOKENS, userToken)) {
const err = new Error('Invalid API token in x-api-key header.');
err.status = 401;
return next(err);
}
return next();
});
} else {
logger.warn('Warning: no authentication required to use the API');
}
const getRenderSchema = {
query: renderQuerySchema,
options: {
allowUnknownBody: false,
allowUnknownQuery: false,
},
};
router.get('/api/render', validate(getRenderSchema), render.getRender);
const postRenderSchema = {
body: renderBodySchema,
query: sharedQuerySchema,
options: {
allowUnknownBody: false,
allowUnknownQuery: false,
// Without this option, text body causes an error
// https://github.com/AndrewKeig/express-validation/issues/36
contextRequest: true,
},
};
router.post('/api/render', validate(postRenderSchema), render.postRender);
router.get('/healthcheck', (req, res) => res.status(200).send('OK'));
return router;
}
module.exports = createRouter;
================================================
FILE: src/util/express.js
================================================
const _ = require('lodash');
const BPromise = require('bluebird');
// Route which assumes that the Promise `func` returns, will be resolved
// with data which will be sent as json response.
function createJsonRoute(func) {
return createRoute(func, (data, req, res) => {
res.json(data);
});
}
// Generic route creator
// Factory function to create a new route to reduce boilerplate in controllers
// and make it easier to interact with promises.
// `func` must return a promise
// `responseHandler` receives the data from asynchronous `func` as the first
// parameter
// Factory function to create a new 'raw' route handler.
// When using this function directly instead of `createJsonRoute`, you must
// send a response to express' `res` object.
function createRoute(func, responseHandler) {
return function route(req, res, next) {
try {
const callback = _.isFunction(responseHandler)
? func.bind(this, req, res)
: func.bind(this, req, res, next);
let valuePromise = callback();
if (!_.isFunction(_.get(valuePromise, 'then'))) {
// It was a not a Promise, so wrap it as a Promise
valuePromise = BPromise.resolve(valuePromise);
}
if (_.isFunction(responseHandler)) {
valuePromise
.then(data => responseHandler(data, req, res, next))
.catch(next);
} else {
valuePromise.catch(next);
}
} catch (err) {
next(err);
}
};
}
function throwStatus(status, message) {
const err = new Error(message);
err.status = status;
throw err;
}
module.exports = {
createRoute,
createJsonRoute,
throwStatus,
};
================================================
FILE: src/util/logger.js
================================================
const path = require('path');
const winston = require('winston');
const _ = require('lodash');
const config = require('../config');
const COLORIZE = config.NODE_ENV === 'development';
function createLogger(filePath) {
const fileName = path.basename(filePath);
const logger = new winston.Logger({
transports: [new winston.transports.Console({
colorize: COLORIZE,
label: fileName,
timestamp: true,
})],
});
_setLevelForTransports(logger, config.LOG_LEVEL || 'info');
return logger;
}
function _setLevelForTransports(logger, level) {
_.each(logger.transports, (transport) => {
// eslint-disable-next-line
transport.level = level;
});
}
module.exports = createLogger;
================================================
FILE: src/util/require-envs.js
================================================
/* eslint-disable no-process-env */
const _ = require('lodash');
function requireEnvs(arr) {
_.each(arr, (varName) => {
if (!process.env[varName]) {
throw new Error(`Environment variable not set: ${varName}`);
}
});
}
module.exports = requireEnvs;
================================================
FILE: src/util/validation.js
================================================
const Joi = require('joi');
const urlSchema = Joi.string().uri({
scheme: [
'http',
'https',
],
});
const cookieSchema = Joi.object({
name: Joi.string().required(),
value: Joi.string().required(),
url: Joi.string(),
domain: Joi.string(),
path: Joi.string(),
expires: Joi.number().min(1),
httpOnly: Joi.boolean(),
secure: Joi.boolean(),
sameSite: Joi.string().regex(/^(Strict|Lax)$/),
});
const sharedQuerySchema = Joi.object({
attachmentName: Joi.string(),
scrollPage: Joi.boolean(),
emulateScreenMedia: Joi.boolean(),
enableGPU: Joi.boolean(),
ignoreHttpsErrors: Joi.boolean(),
waitFor: Joi.alternatives([
Joi.number().min(1).max(60000),
Joi.string().min(1).max(2000),
]),
cookies: Joi.array().items(cookieSchema),
output: Joi.string().valid(['pdf', 'screenshot', 'html']),
'viewport.width': Joi.number().min(1).max(30000),
'viewport.height': Joi.number().min(1).max(30000),
'viewport.deviceScaleFactor': Joi.number().min(0).max(100),
'viewport.isMobile': Joi.boolean(),
'viewport.hasTouch': Joi.boolean(),
'viewport.isLandscape': Joi.boolean(),
'goto.timeout': Joi.number().min(0).max(60000),
'goto.waitUntil': Joi.string().min(1).max(2000),
'pdf.scale': Joi.number().min(0).max(1000),
'pdf.displayHeaderFooter': Joi.boolean(),
'pdf.landscape': Joi.boolean(),
'pdf.pageRanges': Joi.string().min(1).max(2000),
'pdf.format': Joi.string().min(1).max(2000),
'pdf.width': Joi.string().min(1).max(2000),
'pdf.height': Joi.string().min(1).max(2000),
'pdf.fullPage': Joi.boolean(),
'pdf.footerTemplate': Joi.string(),
'pdf.headerTemplate': Joi.string(),
'pdf.margin.top': Joi.string().min(1).max(2000),
'pdf.margin.right': Joi.string().min(1).max(2000),
'pdf.margin.bottom': Joi.string().min(1).max(2000),
'pdf.margin.left': Joi.string().min(1).max(2000),
'pdf.printBackground': Joi.boolean(),
'screenshot.fullPage': Joi.boolean(),
'screenshot.quality': Joi.number().integer().min(0).max(100),
'screenshot.type': Joi.string().valid(['png', 'jpeg']),
'screenshot.clip.x': Joi.number(),
'screenshot.clip.y': Joi.number(),
'screenshot.clip.width': Joi.number(),
'screenshot.clip.height': Joi.number(),
'screenshot.selector': Joi.string().regex(/(#|\.).*/),
'screenshot.omitBackground': Joi.boolean(),
});
const renderQuerySchema = Joi.object({
url: urlSchema.required(),
}).concat(sharedQuerySchema);
const renderBodyObject = Joi.object({
url: urlSchema,
html: Joi.string(),
attachmentName: Joi.string(),
scrollPage: Joi.boolean(),
ignoreHttpsErrors: Joi.boolean(),
emulateScreenMedia: Joi.boolean(),
cookies: Joi.array().items(cookieSchema),
output: Joi.string().valid(['pdf', 'screenshot', 'html']),
viewport: Joi.object({
width: Joi.number().min(1).max(30000),
height: Joi.number().min(1).max(30000),
deviceScaleFactor: Joi.number().min(0).max(100),
isMobile: Joi.boolean(),
hasTouch: Joi.boolean(),
isLandscape: Joi.boolean(),
}),
waitFor: Joi.alternatives([
Joi.number().min(1).max(60000),
Joi.string().min(1).max(2000),
]),
goto: Joi.object({
timeout: Joi.number().min(0).max(60000),
waitUntil: Joi.string().min(1).max(2000),
}),
pdf: Joi.object({
scale: Joi.number().min(0).max(1000),
displayHeaderFooter: Joi.boolean(),
landscape: Joi.boolean(),
pageRanges: Joi.string().min(1).max(2000),
format: Joi.string().min(1).max(2000),
width: Joi.string().min(1).max(2000),
height: Joi.string().min(1).max(2000),
fullPage: Joi.boolean(),
footerTemplate: Joi.string(),
headerTemplate: Joi.string(),
margin: Joi.object({
top: Joi.string().min(1).max(2000),
right: Joi.string().min(1).max(2000),
bottom: Joi.string().min(1).max(2000),
left: Joi.string().min(1).max(2000),
}),
printBackground: Joi.boolean(),
}),
screenshot: Joi.object({
fullPage: Joi.boolean(),
quality: Joi.number().integer().min(0).max(100),
type: Joi.string().valid(['png', 'jpeg']),
clip: {
x: Joi.number(),
y: Joi.number(),
width: Joi.number(),
height: Joi.number(),
},
selector: Joi.string().regex(/(#|\.).*/),
omitBackground: Joi.boolean(),
}),
failEarly: Joi.string(),
});
const renderBodySchema = Joi.alternatives([
Joi.string(),
renderBodyObject,
]);
module.exports = {
renderQuerySchema,
renderBodySchema,
sharedQuerySchema,
};
================================================
FILE: test/resources/large-linked.html
================================================
Test Page
Page
================================================
FILE: test/resources/large.html
================================================
Test Page
Page
================================================
FILE: test/resources/postmark-receipt.html
================================================
Receipt for [Product Name]
|
[Product Name]
|
Hi {{name}},
Thanks for using [Product Name]. This email is the receipt for your purchase. No payment is due.
This purchase will appear as “[Credit Card Statement Name]” on your credit card statement for your {{credit_card_brand}} ending in {{credit_card_last_four}}. Need to update your payment information?
10% off your next purchase!
Thanks for your support! Here's a coupon for 10% off your next purchase if used by {{expiration_date}}.
|
{{receipt_id}} |
{{date}} |
|
Description
|
Amount
|
{{#each receipt_details}}
| {{description}} |
{{amount}} |
{{/each}}
|
If you have any questions about this receipt, simply reply to this email or reach out to our support team for help.
Cheers,
The [Product Name] Team
|
Need a printable copy for your records? You can download a PDF version.
Moved recently? Have a new credit card? You can easily update your billing information.
|
|
|
|
|
|
================================================
FILE: test/resources/special-chars.html
================================================
special characters: ä ö ü
================================================
FILE: test/test-all.js
================================================
/* eslint-env mocha */
const chai = require('chai');
const fs = require('fs');
const request = require('supertest');
const BPromise = require('bluebird');
const { getResource } = require('./util');
const pdf = require('pdf-parse');
const createApp = require('../src/app');
const DEBUG = false;
BPromise.config({
longStackTraces: true,
});
const app = createApp();
function normalisePdfText(text) {
// Replace all non-alphanumeric characters with a hyphen to resolve some difference in
// character encoding when comparing strings extracted from the PDF and strings
// defined in the test environment
return text.replace(/[\W_]+/g, '-');
}
function getPdfTextContent(buffer, opts = {}) {
return pdf(buffer)
.then((data) => {
if (opts.raw) {
return data.text;
}
return normalisePdfText(data.text);
});
}
describe('GET /api/render', () => {
it('request must have "url" query parameter', () =>
request(app).get('/api/render').expect(400)
);
it('invalid cert should cause an error', () =>
request(app)
.get('/api/render')
.query({
url: 'https://self-signed.badssl.com/',
})
.expect(500)
);
it('invalid cert should not cause an error when ignoreHttpsErrors=true', () =>
request(app)
.get('/api/render')
.query({
url: 'https://self-signed.badssl.com/',
ignoreHttpsErrors: true,
})
.expect(200)
);
});
describe('POST /api/render', () => {
it('body must have "url" attribute', () =>
request(app)
.post('/api/render')
.send({
pdf: { scale: 2 },
})
.set('content-type', 'application/json')
.expect(400)
);
it('render github.com should succeed', () =>
request(app)
.post('/api/render')
.send({ url: 'https://github.com' })
.set('content-type', 'application/json')
.set('Connection', 'keep-alive')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
const length = Number(response.headers['content-length']);
chai.expect(length).to.be.above(1024 * 40);
})
);
it('html in json body should succeed', () =>
request(app)
.post('/api/render')
.send({ html: getResource('postmark-receipt.html') })
.set('Connection', 'keep-alive')
.set('content-type', 'application/json')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
const length = Number(response.headers['content-length']);
chai.expect(length).to.be.above(1024 * 40);
})
);
it('html as text body should succeed', () =>
request(app)
.post('/api/render')
.send(getResource('postmark-receipt.html'))
.set('Connection', 'keep-alive')
.set('content-type', 'text/html')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
const length = Number(response.headers['content-length']);
chai.expect(length).to.be.above(1024 * 40);
})
);
it('rendering large html should succeed', () =>
request(app)
.post('/api/render')
.send(getResource('large.html'))
.set('content-type', 'text/html')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
const length = Number(response.headers['content-length']);
chai.expect(length).to.be.above(1024 * 1024 * 1);
})
);
it('rendering html with large linked images should succeed', () =>
request(app)
.post('/api/render')
.send(getResource('large-linked.html'))
.set('content-type', 'text/html')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
if (DEBUG) {
console.log(response.headers);
console.log(response.body);
fs.writeFileSync('out.pdf', response.body, { encoding: null });
}
const length = Number(response.headers['content-length']);
chai.expect(length).to.be.above(30 * 1024 * 1);
})
);
it('cookies should exist on the page', () =>
request(app)
.post('/api/render')
.send({
url: 'http://www.html-kit.com/tools/cookietester/',
cookies:
[{
name: 'url-to-pdf-test',
value: 'test successful',
domain: 'www.html-kit.com',
}, {
name: 'url-to-pdf-test-2',
value: 'test successful 2',
domain: 'www.html-kit.com',
}],
})
.set('Connection', 'keep-alive')
.set('content-type', 'application/json')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
if (DEBUG) {
console.log(response.headers);
console.log(response.body);
fs.writeFileSync('cookies-pdf.pdf', response.body, { encoding: null });
}
return getPdfTextContent(response.body);
})
.then((text) => {
if (DEBUG) {
fs.writeFileSync('./cookies-content.txt', text);
}
chai.expect(text).to.have.string('Number-of-cookies-received-2');
chai.expect(text).to.have.string('Cookie-named-url-to-pdf-test');
chai.expect(text).to.have.string('Cookie-named-url-to-pdf-test-2');
})
);
it('special characters should be rendered correctly', () =>
request(app)
.post('/api/render')
.send({ html: getResource('special-chars.html') })
.set('Connection', 'keep-alive')
.set('content-type', 'application/json')
.expect(200)
.expect('content-type', 'application/pdf')
.then((response) => {
if (DEBUG) {
console.log(response.headers);
console.log(response.body);
fs.writeFileSync('special-chars.pdf', response.body, { encoding: null });
}
return getPdfTextContent(response.body, { raw: true });
})
.then((text) => {
if (DEBUG) {
fs.writeFileSync('./special-chars-content.txt', text);
}
chai.expect(text).to.have.string('special characters: ä ö ü');
})
);
});
describe('GET /healthcheck', () => {
it('should return ok', () => request(app).get('/healthcheck').expect(200));
});
================================================
FILE: test/util/index.js
================================================
const path = require('path');
const fs = require('fs');
function getResource(name) {
const filePath = path.join(__dirname, '../resources', name);
return fs.readFileSync(filePath, { encoding: 'utf-8' });
}
module.exports = {
getResource,
};