Repository: berstend/puppeteer-extra Branch: master Commit: 39248f1f5dee Files: 267 Total size: 703.5 KB Directory structure: gitextract_u4zlbb78/ ├── .editorconfig ├── .eslintrc ├── .gitattributes ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug.md │ │ ├── config.yml │ │ └── feature_request.md │ ├── labeler.yml │ └── workflows/ │ ├── extract-stealth.yml │ ├── label.yml │ └── test.yml ├── .gitignore ├── .prettierrc.js ├── .travis.yml ├── LICENSE ├── README.md ├── lerna.json ├── package.json └── packages/ ├── extract-stealth-evasions/ │ ├── .gitignore │ ├── index.js │ ├── package.json │ └── readme.md ├── playwright-extra/ │ ├── .prettierrc.js │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── extra.ts │ │ ├── helper/ │ │ │ └── loader.ts │ │ ├── index.ts │ │ ├── plugins.ts │ │ ├── puppeteer-compatiblity-shim/ │ │ │ ├── index.ts │ │ │ └── playwright-shim.d.ts │ │ └── types/ │ │ └── index.ts │ ├── test/ │ │ ├── exports.spec.ts │ │ ├── fixtures/ │ │ │ ├── dummyplugin.ts │ │ │ └── extra.ts │ │ ├── playwright.config.ts │ │ ├── plugin-events.spec.ts │ │ └── puppeteer-plugins/ │ │ ├── anonymize-ua.spec.ts │ │ ├── recaptcha.spec.ts │ │ └── stealth.spec.ts │ └── tsconfig.json ├── plugin-proxy-router/ │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── index.ts │ │ ├── plugin.ts │ │ ├── router.ts │ │ ├── stats.ts │ │ └── utils/ │ │ └── port.ts │ ├── tsconfig.json │ └── tslint.json ├── puppeteer-extra/ │ ├── ava.config-ts.js │ ├── ava.config.js │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── ambient.d.ts │ │ ├── index.ts │ │ └── puppeteer-legacy.d.ts │ ├── test/ │ │ ├── addExtra.ts │ │ ├── basic.ts │ │ ├── connect.js │ │ ├── events.js │ │ ├── options.js │ │ └── plugin-support.js │ ├── tsconfig.json │ └── tslint.json ├── puppeteer-extra-plugin/ │ ├── ava.config-ts.js │ ├── ava.config.js │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── ambient.d.ts │ │ ├── index.test.ts │ │ ├── index.ts │ │ └── puppeteer.ts │ ├── tsconfig.json │ └── tslint.json ├── puppeteer-extra-plugin-adblocker/ │ ├── ava.config-ts.js │ ├── ava.config.js │ ├── build_version_check.js │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── ambient.d.ts │ │ ├── index.test.ts │ │ └── index.ts │ ├── tsconfig.json │ └── tslint.json ├── puppeteer-extra-plugin-anonymize-ua/ │ ├── index.d.ts │ ├── index.js │ ├── index.test.js │ ├── package.json │ ├── readme.md │ └── test/ │ ├── headless.js │ ├── headless_off.js │ ├── popup.js │ └── stresstest.js ├── puppeteer-extra-plugin-block-resources/ │ ├── example.js │ ├── index.d.ts │ ├── index.js │ ├── index.test.js │ ├── package.json │ └── readme.md ├── puppeteer-extra-plugin-click-and-wait/ │ ├── example.js │ ├── index.js │ ├── package.json │ └── readme.md ├── puppeteer-extra-plugin-devtools/ │ ├── index.js │ ├── index.test.js │ ├── lib/ │ │ ├── RemoteDevTools.js │ │ └── RemoteDevTools.test.js │ ├── package.json │ ├── readme.md │ └── test/ │ └── headless.js ├── puppeteer-extra-plugin-flash/ │ ├── example.js │ ├── index.js │ ├── package.json │ └── readme.md ├── puppeteer-extra-plugin-font-size/ │ ├── index.js │ ├── package.json │ └── readme.md ├── puppeteer-extra-plugin-recaptcha/ │ ├── ava.config-ts.js │ ├── ava.config.js │ ├── package.json │ ├── readme.md │ ├── rollup.config.ts │ ├── src/ │ │ ├── ambient.d.ts │ │ ├── content-hcaptcha.ts │ │ ├── content.ts │ │ ├── detection.test.ts │ │ ├── index.test.ts │ │ ├── index.ts │ │ ├── playwright-mods.d.ts │ │ ├── provider/ │ │ │ ├── 2captcha-api.ts │ │ │ └── 2captcha.ts │ │ ├── puppeteer-mods.d.ts │ │ ├── solve.test.ts │ │ └── types.ts │ ├── tsconfig.json │ └── tslint.json ├── puppeteer-extra-plugin-repl/ │ ├── index.d.ts │ ├── index.js │ ├── index.test.js │ ├── lib/ │ │ ├── REPLSession.js │ │ ├── REPLSession.test.js │ │ ├── super-readline.js │ │ └── super-readline.test.js │ ├── package.json │ ├── readme.md │ └── test/ │ └── headless.js ├── puppeteer-extra-plugin-stealth/ │ ├── .npmignore │ ├── evasions/ │ │ ├── _template/ │ │ │ ├── index.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── _utils/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── readme.md │ │ │ └── withUtils.js │ │ ├── chrome.app/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── chrome.csi/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── chrome.loadTimes/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── chrome.runtime/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ ├── readme.md │ │ │ └── staticData.json │ │ ├── defaultArgs/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── iframe.contentWindow/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── media.codecs/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── navigator.hardwareConcurrency/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── navigator.languages/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── navigator.permissions/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── navigator.plugins/ │ │ │ ├── data.json │ │ │ ├── functionMocks.js │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── magicArray.js │ │ │ ├── mimeTypes.js │ │ │ ├── mimeTypes.test.js │ │ │ ├── package.json │ │ │ ├── plugins.js │ │ │ ├── plugins.test.js │ │ │ └── readme.md │ │ ├── navigator.vendor/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── navigator.webdriver/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── readme.md │ │ ├── sourceurl/ │ │ │ ├── _fixtures/ │ │ │ │ └── test.html │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── user-agent-override/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ ├── webgl.vendor/ │ │ │ ├── index.js │ │ │ ├── index.test.js │ │ │ ├── package.json │ │ │ └── readme.md │ │ └── window.outerdimensions/ │ │ ├── index.js │ │ ├── package.json │ │ └── readme.md │ ├── examples/ │ │ ├── detect-headless.js │ │ ├── test1.js │ │ └── test2.js │ ├── index.d.ts │ ├── index.js │ ├── index.test.js │ ├── package.json │ ├── readme.md │ ├── runall_stealthtests.sh │ ├── stealthtests/ │ │ ├── headful-chrome-stealth.js │ │ ├── headful-chrome-vanilla.js │ │ ├── headful-chromium-stealth.js │ │ ├── headful-chromium-vanilla.js │ │ ├── headless-chrome-stealth.js │ │ ├── headless-chrome-vanilla.js │ │ ├── headless-chromium-stealth.js │ │ └── headless-chromium-vanilla.js │ └── test/ │ ├── cat-and-mouse.test.js │ ├── fixtures/ │ │ ├── dummy-with-service-worker.html │ │ ├── dummy.html │ │ └── sw.js │ ├── fpscanner.test.js │ ├── service-worker.test.js │ └── util.js ├── puppeteer-extra-plugin-user-data-dir/ │ ├── index.js │ ├── package.json │ └── readme.md └── puppeteer-extra-plugin-user-preferences/ ├── index.js ├── package.json └── readme.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .editorconfig ================================================ # EditorConfig helps developers define and maintain # consistent coding styles between different editors and IDEs. root = true [*] charset = utf-8 indent_style = space indent_size = 2 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true [*.md] trim_trailing_whitespace = false ================================================ FILE: .eslintrc ================================================ { "extends": ["prettier-standard"], "rules": { "lines-between-class-members": "off" } } ================================================ FILE: .gitattributes ================================================ * text=auto eol=lf ================================================ FILE: .github/FUNDING.yml ================================================ custom: https://www.buymeacoffee.com/brstnd ================================================ FILE: .github/ISSUE_TEMPLATE/bug.md ================================================ --- name: Bug report about: Create a bug report title: '[Bug] ' labels: 'issue: bug report, needs triage' --- **Describe the bug** **Code Snippet** ```javascript const puppeteer = require('puppeteer-extra') ;(async () => { const browser = await puppeteer.launch() // ... })() ``` **Versions** ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ contact_links: - name: Questions and Help url: https://github.com/berstend/puppeteer-extra/wiki/Scraping-Chat about: This issue tracker is not for support questions. You can join our Discord server and ask the community for help. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea or feature title: '[Feature] ' labels: 'issue: proposal' --- **Feature request** ================================================ FILE: .github/labeler.yml ================================================ # This is used with the label workflow which # will triage pull requests and apply a label based on the # paths that are modified in the pull request. # # For more information, see: # https://github.com/actions/labeler 'package: core': - packages/automation-extra/**/* - packages/playright-extra/**/* - packages/puppeteer-extra/**/* - packages/automation-extra-plugin/**/* - packages/puppeteer-extra-plugin/**/* 'plugin: automation-extra': - packages/plugin-*/**/* 'plugin: puppeteer-extra': - packages/puppeteer-extra-plugin-*/**/* 'plugin: recaptcha 🏴': - packages/*recaptcha*/**/* 'plugin: stealth ㊙️': - packages/*stealth*/**/* ================================================ FILE: .github/workflows/extract-stealth.yml ================================================ name: Extract stealth.min.js on: push: branches: - master paths: - 'packages/puppeteer-extra-plugin-stealth/**' - 'packages/extract-stealth-evasions/**' - '.github/workflows/extract-stealth.yml' jobs: build: runs-on: ubuntu-latest steps: - name: Sleep for 190 seconds uses: jakejarvis/wait-action@master with: time: '190s' - name: Checkout uses: actions/checkout@v2 - name: 'Fix for: error fsevents@2.1.2: The platform "linux" is incompatible with this module.' run: npx json -I -f package.json -e 'this.resolutions={}' - name: Build packages run: | yarn install yarn bootstrap yarn build - name: Extract stealth.min.js run: | cd packages/extract-stealth-evasions node index.js cp stealth.min.js ../../ - name: Commit stealth.min.js uses: EndBug/add-and-commit@v4 with: add: 'stealth.min.js' force: true ref: 'stealth-js' message: 'Auto-updated stealth.min.js with newest evasions' env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} ================================================ FILE: .github/workflows/label.yml ================================================ # This workflow will triage pull requests and apply a label based on the # paths that are modified in the pull request. # # To use this workflow, you will need to set up a .github/labeler.yml # file with configuration. For more information, see: # https://github.com/actions/labeler name: "Pull Request Labeler" on: - pull_request_target jobs: triage: runs-on: ubuntu-latest steps: - uses: actions/labeler@main with: repo-token: "${{ secrets.GITHUB_TOKEN }}" sync-labels: true ================================================ FILE: .github/workflows/test.yml ================================================ name: Test on: pull_request: branches: - '*' types: - opened - synchronize - reopened push: branches: - master - 'test/*' workflow_dispatch: branches: - '*' env: CI: 'true' FORCE_COLOR: 'true' jobs: test: name: node v${{ matrix.node }}, pptr ${{ matrix.puppeteer_version }}, ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: node: # - 16 - 14 puppeteer_version: # - 15.5.0 - 14.2.0 # Chromium 103.0.5059.0 # requires >=14.1.0 # - 10.2.0 # Chromium 93.0.4577.0 # - 7.0.0 # Chromium 90.0.4403.0, Feb 3, 2021 # - 5.5.0 # Chromium 88.0.4298.0 # - 5.0.0 # Chromium 83.0.4103.0, Jul 2, 2020 # - 2.1.1 # Chromium 79.0.3942.0, Oct 24 2019 # - 2.0.0 # Chromium 79.0.3942.0, Oct 24 2019 # - 1.20.0 # Chromium 78.0.3882.0, Sep 13 2019 # - 1.15.0 # Chromium 75.0.3765.0, Apr 26 2019 # - 1.9.0 # Chromium 71.0.3563.0, Oct 4, 2018 # - 1.6.2 # Chromium 69.0.3494.0, Aug 1, 2018 os: - ubuntu-latest # - macOS-latest # - windows-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Setup Node uses: actions/setup-node@v1 with: node-version: ${{ matrix.node }} - name: 'Fix for: error fsevents@2.1.2: The platform "linux" is incompatible with this module.' run: npx json -I -f package.json -e 'this.resolutions={}' - name: yarn install uses: bahmutov/npm-install@v1 - name: yarn bootstrap run: yarn bootstrap - name: install puppeteer@${{ matrix.puppeteer_version }} run: yarn lerna add --dev puppeteer@${{ matrix.puppeteer_version }} - name: lerna link run: yarn lerna link - name: lerna build run: yarn lerna run build --concurrency 1 - name: debug run: | yarn list --pattern "puppeteer|puppeteer-extra|playwright" file node_modules/puppeteer-extra/dist/index.cjs.js - uses: microsoft/playwright-github-action@v1 - name: playwright install run: cd packages/playwright-extra && yarn playwright install - name: test uses: GabrielBB/xvfb-action@v1 env: DISPLAY: ':99.0' with: run: yarn test-ci ================================================ FILE: .gitignore ================================================ # See https://help.github.com/ignore-files/ for more about ignoring files. # dependencies node_modules # builds build dist # misc .DS_Store .env .env.local .env.development.local .env.test.local .env.production.local .cache .rpt2_cache lerna-debug.log* lerna-error.log* npm-debug.log* yarn-debug.log* yarn-error.log* TODO.md packages/testing/ packages/plugin-stealth/ packages/puppeteer-extra2/ packages/puppeteer-extra-old/ packages/test-* packages/internal-testing-* testing/ *.tgz* ================================================ FILE: .prettierrc.js ================================================ module.exports = { ...require('prettier-config-standard'), // override for Windows endOfLine: 'lf', } ================================================ FILE: .travis.yml ================================================ language: node_js dist: trusty addons: apt: packages: # This is required to run new chrome on old trusty - libnss3 language: node_js # allow headful tests before_install: - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" # test against multiple node versions node_js: - '13' - '10' # Fix for: error fsevents@2.1.2: The platform "linux" is incompatible with this module. install: skip # Prevent potential issues cache: npm: false yarn: false # test against multiple puppeteer versions env: - PUPPETEER_VERSION=5.0.0 - PUPPETEER_VERSION=2.1.1 # Chromium 79.0.3942.0, Oct 24 2019 # - PUPPETEER_VERSION=2.0.0 # Chromium 79.0.3942.0, Oct 24 2019 # - PUPPETEER_VERSION=1.20.0 # Chromium 78.0.3882.0, Sep 13 2019 # - PUPPETEER_VERSION=1.15.0 # Chromium 75.0.3765.0, Apr 26 2019 # - PUPPETEER_VERSION=1.9.0 # Chromium 71.0.3563.0, Oct 4, 2018 # - PUPPETEER_VERSION=1.6.2 # Chromium 69.0.3494.0, Aug 1, 2018 script: # Make sure to use latest @next package # https://github.com/yarnpkg/yarn/issues/4731 # Fix for: error fsevents@2.1.2: The platform "linux" is incompatible with this module. - npx json -I -f package.json -e 'this.resolutions={}' # - npx json -I -f package.json -e 'this.resolutions={"**/puppeteer":"'${PUPPETEER_VERSION}'"}' # Install older version when required - rm -rf ./node_modules/ # - 'yarn; echo 0' # - 'yarn lerna exec "rm -f yarn.lock; rm -rf node_modules; echo 0"' # - 'rm -rf yarn.lock && yarn cache clean && rm -rf ./node_modules/puppeteer && yarn lerna add puppeteer@${PUPPETEER_VERSION}' # - "yarn lerna exec --concurrency 1 'yarn set resolution --save puppeteer@* 5.0.0; echo 0'" - 'yarn' - 'yarn bootstrap' - yarn lerna add puppeteer@${PUPPETEER_VERSION} - 'yarn lerna link' - 'yarn lerna run build --concurrency 1' # For debugging - yarn list puppeteer - yarn list puppeteer-extra - file node_modules/puppeteer-extra/dist/index.cjs.js # Run tests - yarn test-ci ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2019 berstend Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # puppeteer-extra [![Downloads](https://img.shields.io/endpoint?style=social&url=https://runkit.io/fezvrasta/combined-npm-downloads/1.0.0?packages=puppeteer-extra,puppeteer-extra-plugin,puppeteer-extra-plugin-stealth,puppeteer-extra-plugin-recaptcha,puppeteer-extra-plugin-adblocker)](https://github.com/berstend/puppeteer-extra/) This is the monorepo for [`puppeteer-extra`](./packages/puppeteer-extra), a modular plugin framework for [`puppeteer`](https://github.com/puppeteer/puppeteer). :-) 🌟 **For the main documentation, please head over to the [`puppeteer-extra`](./packages/puppeteer-extra) package.** We've also recently introduced support for Playwright, if you're interested in that head over to [`playwright-extra`](./packages/playwright-extra). ## Monorepo
Contributing ### Contributing PRs and new plugins are welcome! The plugin API for `puppeteer-extra` is clean and fun to use. Have a look the [`PuppeteerExtraPlugin`](./packages/puppeteer-extra-plugin) base class documentation to get going and check out the [existing plugins](./packages/) (minimal example is the [anonymize-ua](./packages/puppeteer-extra-plugin-anonymize-ua/index.js) plugin) for reference. We use a [monorepo](https://github.com/berstend/puppeteer-extra) powered by [Lerna](https://github.com/lerna/lerna#--use-workspaces) (and yarn workspaces), [ava](https://github.com/avajs/ava) for testing, the [standard](https://standardjs.com/) style for linting and [JSDoc](http://usejsdoc.org/about-getting-started.html) heavily to auto-generate markdown [documentation](https://github.com/documentationjs/documentation) based on code. :-)
Lerna ### Lerna This monorepo is powered by [Lerna](https://github.com/lerna/lerna) and yarn workspaces. #### Initial setup ```bash # Install deps yarn # Bootstrap the packages in the current Lerna repo. # Installs all of their dependencies and links any cross-dependencies. yarn bootstrap # Build all TypeScript sources yarn build ``` #### Development flow ```bash # Install debug in all packages yarn lerna add debug # Install fs-extra to puppeteer-extra-plugin-user-data-dir yarn lerna add fs-extra --scope=puppeteer-extra-plugin-user-data-dir # Remove dependency # https://github.com/lerna/lerna/issues/833 yarn lerna exec --concurrency 1 'yarn remove fs-extra; echo 0' # Run test in all packages yarn test # Update JSDoc based documentation in markdown files yarn docs # Upgrade project wide deps like puppeteer # (We keep the devDependency version blurry) rm -rf node_modules rm -rf yarn.lock yarn yarn lerna bootstrap # Update deps within packages (interactive) yarn lernaupdate # If in doubt :-( yarn lerna exec "rm -f yarn.lock; rm -rf node_modules; echo 0" rm -f yarn.lock && rm -rf node_modules && yarn cache clean # Run tests of specific package cd packages/puppeteer-extra-plugin-stealth yarn test # Run tests of specific stealth evasion cd packages/puppeteer-extra-plugin-stealth yarn ava -v ./evasions/user-agent-override/index.test.js # Test a local monorepo package in an outside folder as it would've been installed from the registry # Change PACKAGE_DIR to the path of this monorepo and PACKAGE to the package you wish to install PACKAGE=puppeteer-extra PACKAGE_DIR=/Users/foo/puppeteer-extra/packages && yarn remove $(echo $PACKAGE); true && rm -f $(pwd)/$(echo $PACKAGE)-latest.tgz && yarn --cwd $(echo $PACKAGE_DIR)/$(echo $PACKAGE) pack --filename $(pwd)/$(echo $PACKAGE)-latest.tgz && YARN_CACHE_FOLDER=/tmp/yarn yarn add file:$(pwd)/$(echo $PACKAGE)-latest.tgz && rm -rf /tmp/yarn ``` #### Publishing ```bash # make sure you're signed into npm before publishing # yarn publishing is broken so lerna uses npm npm whoami # ensure everything is up2date and peachy yarn yarn bootstrap yarn lerna link yarn build yarn test # Phew, let's publish these packages! # - Will publish all changed packages # - Will ask for new pkg version per package # - Will updated inter-package dependency versions automatically yarn lerna publish # Fix new dependency version symlinks yarn bootstrap && yarn lerna link ```

================================================ FILE: lerna.json ================================================ { "packages": ["packages/*"], "version": "independent", "useWorkspaces": true, "npmClient": "yarn" } ================================================ FILE: package.json ================================================ { "private": true, "description": "Modular framework to teach Puppeteer new tricks.", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "main": "packages/puppeteer-extra/index.js", "engines": { "node": ">=8" }, "scripts": { "bootstrap": "lerna bootstrap", "build": "yarn lerna exec --concurrency 1 'yarn build; echo 0'", "docs": "lerna run docs", "test": "lerna run test --concurrency 1 --stream", "test-ci": "lerna run test-ci --concurrency 1 --stream", "prepare": "lerna run prepare", "release": "lerna publish --npm-client npm" }, "workspaces": { "packages": [ "packages/*" ], "nohoist": [ "**/@types", "**/@types/**", "**/typescript", "**/typescript/**" ] }, "devDependencies": { "eslint": "^6.7.1", "eslint-config-prettier": "^6.7.0", "eslint-config-prettier-standard": "^3.0.1", "eslint-config-standard": "^14.1.0", "eslint-plugin-import": "^2.18.2", "eslint-plugin-node": "^10.0.0", "eslint-plugin-prettier": "^3.1.1", "eslint-plugin-promise": "^4.2.1", "eslint-plugin-standard": "^4.0.1", "lerna": "^3.19.0", "lerna-update-wizard": "^0.17.5", "prettier": "^1.19.1", "prettier-config-standard": "^1.0.1" }, "optionalDependencies": { "fsevents": "^2.1.2" }, "resolutions": { "**/fsevents": "^2.1.2" }, "dependencies": {}, "version": "0.0.0" } ================================================ FILE: packages/extract-stealth-evasions/.gitignore ================================================ stealth.min.js stealth.js ================================================ FILE: packages/extract-stealth-evasions/index.js ================================================ #!/usr/bin/env node const puppeteer = require('puppeteer-extra') const stealth = require('puppeteer-extra-plugin-stealth')() const { minify } = require('terser') const argv = require('yargs') .usage('Usage: $0 [options]') .alias('e', 'exclude') .describe('e', 'Exclude evasion (repeat for multiple)') .alias('i', 'include') .describe('i', 'Include evasion (repeat for multiple)') .alias('l', 'list') .describe('l', 'List available evasions') .alias('m', 'minify') .describe('minify', 'Minify the output') .boolean('m') .default('m', true) .help('h') .alias('h', 'help').argv const fs = require('fs') const file = 'stealth' + (argv.minify === true ? '.min' : '') + '.js' if (argv.exclude) { if (typeof argv.exclude === 'string') { stealth.enabledEvasions.delete(argv.exclude) } else { argv.exclude.forEach(e => { stealth.enabledEvasions.delete(e) }) } } else if (argv.include) { if (typeof argv.include === 'string') { stealth.enabledEvasions = [argv.include] } else { stealth.enabledEvasions = [] argv.include.forEach(e => { stealth.enabledEvasions.push(e) }) } } else if (argv.list) { console.log('Available evasions:', [...stealth.availableEvasions].join(', ')) process.exit(0) } let scripts = '' puppeteer .use(stealth) .launch({ headless: true }) .then(async browser => { // Patch evaluateOnNewDocument() const page = (await browser.pages()).find(Boolean) page.__proto__.evaluateOnNewDocument = patchEval // eslint-disable-line no-proto page.__proto__.evaluate = patchEval // eslint-disable-line no-proto await (await browser.newPage()).goto('about:blank') await browser.close() fs.writeFile( file, `/*! * Note: Auto-generated, do not update manually. * Generated by: https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions * Generated on: ${new Date().toUTCString()} * License: MIT */ ` + (argv.minify === true ? (await minify(scripts, { toplevel: true })).code : scripts), err => { if (err) throw err console.log(`File ${file} written!`) console.log( 'Included evasions: ', [...stealth.enabledEvasions].join(', ') ) } ) }) function patchEval(f, args) { // Check if there are options supplied if (typeof args !== 'undefined') { scripts += '(' + f.toString() + ')(' + JSON.stringify(args) + ');\n' } else { scripts += '(' + f.toString() + ')();\n' } } ================================================ FILE: packages/extract-stealth-evasions/package.json ================================================ { "name": "extract-stealth-evasions", "version": "2.7.3", "description": "Extract stealth evasions from puppeteer-extra-plugin-stealth", "main": "index.js", "bin": { "extract-stealth-evasions": "./index.js" }, "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions#readme", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "chrome", "headless", "pupeteer" ], "dependencies": { "puppeteer": "*", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-stealth": "^2.11.2", "terser": "^5.1.0", "yargs": "^15.4.1" } } ================================================ FILE: packages/extract-stealth-evasions/readme.md ================================================ # extract-stealth-evasions This script offers a quick way to extract the latest stealth evasions from [puppeteer-extra-stealth](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) to (minified) JavaScript. The resulting JS file can be used in pure [CDP](https://chromedevtools.github.io/devtools-protocol/tot/) implementations or to test the evasions in your devtools. #### Usage with `npx` You don't need to install anything, `npx` runs wherever NodeJS is installed. :-) ```bash npx extract-stealth-evasions ``` Will create a `stealth.min.js` file in the current folder. #### Using the CDN version You can also fetch the latest version from [gitCDN](https://gitcdn.xyz/repo/berstend/puppeteer-extra/stealth-js/stealth.min.js). For example, paste this one-liner in your browser devtools console: ```js document.body.appendChild(Object.assign(document.createElement('script'), {src: 'https://gitcdn.xyz/repo/berstend/puppeteer-extra/stealth-js/stealth.min.js'})) ``` #### How to use locally ```bash yarn install node index.js ``` Use the resulting `stealth.min.js` file however you like. #### Options ```bash $ npx extract-stealth-evasions -h Usage: extract-stealth-evasions [options] Options: --version Show version number [boolean] -e, --exclude Exclude evasion (repeat for multiple) -i, --include Include evasion (repeat for multiple) -l, --list List available evasions -h, --help Show help [boolean] -m, --minify Minify the output [boolean] [default: true] ``` ================================================ FILE: packages/playwright-extra/.prettierrc.js ================================================ module.exports = 'prettier-config-standard' ================================================ FILE: packages/playwright-extra/package.json ================================================ { "name": "playwright-extra", "version": "4.3.6", "description": "Teach playwright new tricks through plugins.", "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/playwright-extra#readme", "author": "berstend", "license": "MIT", "typings": "dist/index.d.ts", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "files": [ "dist" ], "scripts": { "clean": "rimraf dist/*", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup ambient-dts", "build:tsc": "tsc --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "echo \"No docs\"", "test": "yarn playwright test --config test/playwright.config.ts", "test-ci": "run-s test", "ambient-dts": "run-s ambient-dts-copy ambient-dts-fix-path", "ambient-dts-copy": "copyfiles -u 1 \"src/**/*.d.ts\" dist", "ambient-dts-fix-path": "replace-in-files --string='/// =12" }, "devDependencies": { "@playwright/test": "^1.23.1", "@types/debug": "^4.1.7", "@types/node": "^18.0.0", "esbuild": "^0.14.47", "esbuild-register": "^3.3.3", "npm-run-all": "^4.1.5", "playwright": "1.24.2", "prettier": "^2.7.1", "puppeteer-extra-plugin": "^3.2.3", "puppeteer-extra-plugin-anonymize-ua": "^2.4.5", "rimraf": "^3.0.0", "rollup": "^1.27.5", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "typescript": "4.4.3" }, "dependencies": { "debug": "^4.3.4" }, "peerDependencies": { "playwright": "*", "playwright-core": "*" }, "peerDependenciesMeta": { "playwright": { "optional": true }, "playwright-core": { "optional": true } } } ================================================ FILE: packages/playwright-extra/readme.md ================================================ # playwright-extra [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push)](https://github.com/berstend/puppeteer-extra/actions) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/playwright-extra.svg)](https://www.npmjs.com/package/playwright-extra) > A modular plugin framework for [playwright](https://github.com/microsoft/playwright) to enable cool [plugins](#plugins) through a clean interface. ## Installation ```bash yarn add playwright playwright-extra # - or - npm install playwright playwright-extra ```
Changelog > Please check the `announcements` channel in our [discord server](https://extra.community) until we've automated readme updates. :) - **v4.3** - Rerelease due to versioning issues with previous beta packages - **v3.3** - Initial public release
## Quickstart ```js // playwright-extra is a drop-in replacement for playwright, // it augments the installed playwright with plugin functionality const { chromium } = require('playwright-extra') // Load the stealth plugin and use defaults (all tricks to hide playwright usage) // Note: playwright-extra is compatible with most puppeteer-extra plugins const stealth = require('puppeteer-extra-plugin-stealth')() // Add the plugin to playwright (any number of plugins can be added) chromium.use(stealth) // That's it, the rest is playwright usage as normal 😊 chromium.launch({ headless: true }).then(async browser => { const page = await browser.newPage() console.log('Testing the stealth plugin..') await page.goto('https://bot.sannysoft.com', { waitUntil: 'networkidle' }) await page.screenshot({ path: 'stealth.png', fullPage: true }) console.log('All done, check the screenshot. ✨') await browser.close() }) ``` The above example uses the compatible [`stealth`](/packages/puppeteer-extra-plugin-stealth) plugin from puppeteer-extra, that plugin needs to be installed as well: ```bash yarn add puppeteer-extra-plugin-stealth # - or - npm install puppeteer-extra-plugin-stealth ``` If you'd like to see debug output just run your script like so: ```bash # macOS/Linux (Bash) DEBUG=playwright-extra*,puppeteer-extra* node myscript.js # Windows (Powershell) $env:DEBUG='playwright-extra*,puppeteer-extra*';node myscript.js ``` ### More examples
TypeScript & ESM usage
`playwright-extra` and most plugins are written in TS, so you get perfect type support out of the box. :) ```ts // playwright-extra is a drop-in replacement for playwright, // it augments the installed playwright with plugin functionality import { chromium } from 'playwright-extra' // Load the stealth plugin and use defaults (all tricks to hide playwright usage) // Note: playwright-extra is compatible with most puppeteer-extra plugins import StealthPlugin from 'puppeteer-extra-plugin-stealth' // Add the plugin to playwright (any number of plugins can be added) chromium.use(StealthPlugin()) // ...(the rest of the quickstart code example is the same) chromium.launch({ headless: true }).then(async browser => { const page = await browser.newPage() console.log('Testing the stealth plugin..') await page.goto('https://bot.sannysoft.com', { waitUntil: 'networkidle' }) await page.screenshot({ path: 'stealth.png', fullPage: true }) console.log('All done, check the screenshot. ✨') await browser.close() }) ``` New to Typescript? Here it is in 30 seconds or less 😄: ```bash # Optional: If you don't have yarn yet npm i --global yarn # Optional: Create new package.json if it's a new project yarn init -y # Add basic typescript dependencies yarn add --dev typescript @types/node esbuild esbuild-register # Bootstrap a tsconfig.json yarn tsc --init --target ES2020 --lib ES2020 --module commonjs --rootDir src --outDir dist # Add dependencies used in the quick start example yarn add playwright playwright-extra puppeteer-extra-plugin-stealth # Create source folder for the .ts files mkdir src # Now place the example code above in `src/index.ts` # Run the typescript code without the need of compiling it first node -r esbuild-register src/index.ts # You can now add Typescript to your CV 🎉 ```
Using different browsers
```ts // Any browser supported by playwright can be used with plugins import { chromium, firefox, webkit } from 'playwright-extra' chromium.use(plugin) firefox.use(plugin) webkit.use(plugin) ```
Multiple instances with different plugins
Node.js imports are cached, therefore the default `chromium`, `firefox`, `webkit` export from `playwright-extra` will always return the same playwright instance. ```ts // Use `addExtra` to create a fresh and independent instance import playwright from 'playwright' import { addExtra } from 'playwright-extra' const chromium1 = addExtra(playwright.chromium) const chromium2 = addExtra(playwright.chromium) chromium1.use(onePlugin) chromium2.use(anotherPlugin) // chromium1 and chromium2 are independent ```
--- ## Plugins We're currently in the process of making the existing [puppeteer-extra](/packages/puppeteer-extra) plugins compatible with playwright-extra, the following plugins have been successfully tested already: ### 🔥 [`puppeteer-extra-plugin-stealth`](/packages/puppeteer-extra-plugin-stealth) - Applies various evasion techniques to make detection of an automated browser harder - Compatible with Puppeteer & Playwright and chromium based browsers
  Example: Using stealth in Playwright with custom options ```js // The stealth plugin is optimized for chromium based browsers currently import { chromium } from 'playwright-extra' import StealthPlugin from 'puppeteer-extra-plugin-stealth' chromium.use(StealthPlugin()) // New way to overwrite the default options of stealth evasion plugins // https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth/evasions chromium.plugins.setDependencyDefaults('stealth/evasions/webgl.vendor', { vendor: 'Bob', renderer: 'Alice' }) // That's it, the rest is playwright usage as normal 😊 chromium.launch({ headless: true }).then(async browser => { const page = await browser.newPage() console.log('Testing the webgl spoofing feature of the stealth plugin..') await page.goto('https://webglreport.com', { waitUntil: 'networkidle' }) await page.screenshot({ path: 'webgl.png', fullPage: true }) console.log('All done, check the screenshot. ✨') await browser.close() }) ```
### 🏴 [`puppeteer-extra-plugin-recaptcha`](/packages/puppeteer-extra-plugin-recaptcha) - Solves reCAPTCHAs and hCaptchas automatically, using a single line of code: `page.solveRecaptchas()` - Compatible with Puppeteer & Playwright and all browsers (chromium, firefox, webkit)
  Example: Solving captchas in Playwright & Firefox ```js // Any browser (chromium, webkit, firefox) can be used import { firefox } from 'playwright-extra' import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' firefox.use( RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN || 'YOUR_API_KEY' } }) ) // Works in headless as well, just so you can see it in action firefox.launch({ headless: false }).then(async browser => { const context = await browser.newContext() const page = await context.newPage() const url = 'https://www.google.com/recaptcha/api2/demo' await page.goto(url, { waitUntil: 'networkidle' }) console.log('Solving captchas..') await page.solveRecaptchas() await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle' }), page.click(`#recaptcha-demo-submit`) ]) const content = await page.content() const isSuccess = content.includes('Verification Success') console.log('Done', { isSuccess }) await browser.close() }) ```
### 🆕 [`plugin-proxy-router`](/packages/plugin-proxy-router) - Use multiple proxies dynamically with flexible per-host routing and more - Compatible with Puppeteer & Playwright and all browsers (chromium, firefox, webkit) **Notes** - If you're in need of adblocking use [this package](https://www.npmjs.com/package/@cliqz/adblocker-playwright) or [block resources natively](https://github.com/berstend/puppeteer-extra/wiki/Block-resources-without-request-interception) - We're focussing on compatiblity with existing plugins at the moment, more documentation on how to write your own playwright-extra plugins will follow --- ## Contributors --- ## License Copyright © 2018 - 2023, [berstend̡̲̫̹̠̖͚͓̔̄̓̐̄͛̀͘](https://github.com/berstend). Released under the MIT License. [playwright-extra]: https://github.com/berstend/puppeteer-extra/tree/master/packages/playwright-extra [puppeteer-extra]: https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra [`puppeteer-extra`]: https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra ================================================ FILE: packages/playwright-extra/rollup.config.ts ================================================ import commonjs from 'rollup-plugin-commonjs' import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() const defaultExportOutro = ` module.exports = exports.default || {} Object.entries(exports).forEach(([key, value]) => { module.exports[key] = value }) ` export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, exports: 'named', outro: defaultExportOutro, banner }, { file: pkg.module, format: 'es', sourcemap: true, exports: 'named', banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}) ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve(), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/playwright-extra/src/extra.ts ================================================ import Debug from 'debug' const debug = Debug('playwright-extra') import type * as pw from 'playwright-core' import type { CompatiblePlugin, Plugin } from './types' import { PluginList } from './plugins' import { playwrightLoader } from './helper/loader' type PlaywrightBrowserLauncher = pw.BrowserType /** * The Playwright browser launcher APIs we're augmenting * @private */ interface AugmentedLauncherAPIs extends Pick< PlaywrightBrowserLauncher, 'launch' | 'launchPersistentContext' | 'connect' | 'connectOverCDP' > {} /** * Modular plugin framework to teach `playwright` new tricks. */ export class PlaywrightExtraClass implements AugmentedLauncherAPIs { /** Plugin manager */ public readonly plugins: PluginList constructor(private _launcher?: Partial) { this.plugins = new PluginList() } /** * The **main interface** to register plugins. * * Can be called multiple times to enable multiple plugins. * * Plugins derived from `PuppeteerExtraPlugin` will be used with a compatiblity layer. * * @example * chromium.use(plugin1).use(plugin2) * firefox.use(plugin1).use(plugin2) * * @see [PuppeteerExtraPlugin] * * @return The same `PlaywrightExtra` instance (for optional chaining) */ public use(plugin: CompatiblePlugin): this { const isValid = plugin && 'name' in plugin if (!isValid) { throw new Error('A plugin must be provided to .use()') } if (this.plugins.add(plugin as Plugin)) { debug('Plugin registered', plugin.name) } return this } /** * In order to support a default export which will require vanilla playwright automatically, * as well as `addExtra` to patch a provided launcher, we need to so some gymnastics here. * * Otherwise this would throw immediately, even when only using the `addExtra` export with an arbitrary compatible launcher. * * The solution is to make the vanilla launcher optional and only throw once we try to effectively use and can't find it. * * @internal */ public get launcher(): Partial { if (!this._launcher) { throw playwrightLoader.requireError } return this._launcher } public async launch( ...args: Parameters ): ReturnType { if (!this.launcher.launch) { throw new Error('Launcher does not support "launch"') } let [options] = args options = { args: [], ...(options || {}) } // Initialize args array debug('launch', options) this.plugins.prepare() // Give plugins the chance to modify the options before continuing options = (await this.plugins.dispatchBlocking('beforeLaunch', options)) || options debug('launch with options', options) if ('userDataDir' in options) { debug( "A plugin defined userDataDir during .launch, which isn't supported by playwright - ignoring" ) delete (options as any).userDataDir } const browser = await this.launcher['launch'](options) await this.plugins.dispatchBlocking('onBrowser', browser) await this._bindBrowserEvents(browser) await this.plugins.dispatchBlocking('afterLaunch', browser) return browser } public async launchPersistentContext( ...args: Parameters ): ReturnType { if (!this.launcher.launchPersistentContext) { throw new Error('Launcher does not support "launchPersistentContext"') } let [userDataDir, options] = args options = { args: [], ...(options || {}) } // Initialize args array debug('launchPersistentContext', options) this.plugins.prepare() // Give plugins the chance to modify the options before continuing options = (await this.plugins.dispatchBlocking('beforeLaunch', options)) || options const context = await this.launcher['launchPersistentContext']( userDataDir, options ) await this.plugins.dispatchBlocking('afterLaunch', context) this._bindBrowserContextEvents(context) return context } async connect( wsEndpointOrOptions: string | (pw.ConnectOptions & { wsEndpoint?: string }), wsOptions: pw.ConnectOptions = {} ): ReturnType { if (!this.launcher.connect) { throw new Error('Launcher does not support "connect"') } this.plugins.prepare() // Playwright currently supports two function signatures for .connect let options: pw.ConnectOptions & { wsEndpoint?: string } = {} let wsEndpointAsString = false if (typeof wsEndpointOrOptions === 'object') { options = { ...wsEndpointOrOptions, ...wsOptions } } else { wsEndpointAsString = true options = { wsEndpoint: wsEndpointOrOptions, ...wsOptions } } debug('connect', options) // Give plugins the chance to modify the options before launch/connect options = (await this.plugins.dispatchBlocking('beforeConnect', options)) || options // Follow call signature of end user const args: any[] = [] const wsEndpoint = options.wsEndpoint if (wsEndpointAsString) { delete options.wsEndpoint args.push(wsEndpoint, options) } else { args.push(options) } const browser = (await (this.launcher['connect'] as any)( ...args )) as pw.Browser await this.plugins.dispatchBlocking('onBrowser', browser) await this._bindBrowserEvents(browser) await this.plugins.dispatchBlocking('afterConnect', browser) return browser } async connectOverCDP( wsEndpointOrOptions: | string | (pw.ConnectOverCDPOptions & { endpointURL?: string }), wsOptions: pw.ConnectOverCDPOptions = {} ): ReturnType { if (!this.launcher.connectOverCDP) { throw new Error(`Launcher does not implement 'connectOverCDP'`) } this.plugins.prepare() // Playwright currently supports two function signatures for .connectOverCDP let options: pw.ConnectOverCDPOptions & { endpointURL?: string } = {} let wsEndpointAsString = false if (typeof wsEndpointOrOptions === 'object') { options = { ...wsEndpointOrOptions, ...wsOptions } } else { wsEndpointAsString = true options = { endpointURL: wsEndpointOrOptions, ...wsOptions } } debug('connectOverCDP'), options // Give plugins the chance to modify the options before launch/connect options = (await this.plugins.dispatchBlocking('beforeConnect', options)) || options // Follow call signature of end user const args: any[] = [] const endpointURL = options.endpointURL if (wsEndpointAsString) { delete options.endpointURL args.push(endpointURL, options) } else { args.push(options) } const browser = (await (this.launcher['connectOverCDP'] as any)( ...args )) as pw.Browser await this.plugins.dispatchBlocking('onBrowser', browser) await this._bindBrowserEvents(browser) await this.plugins.dispatchBlocking('afterConnect', browser) return browser } protected async _bindBrowserContextEvents( context: pw.BrowserContext, contextOptions?: pw.BrowserContextOptions ) { debug('_bindBrowserContextEvents') this.plugins.dispatch('onContextCreated', context, contextOptions) // Make sure things like `addInitScript` show an effect on the very first page as well context.newPage = ((originalMethod, ctx) => { return async () => { const page = await originalMethod.call(ctx) await page.goto('about:blank') return page } })(context.newPage, context) context.on('close', () => { // When using `launchPersistentContext` context closing is the same as browser closing if (!context.browser()) { this.plugins.dispatch('onDisconnected') } }) context.on('page', page => { this.plugins.dispatch('onPageCreated', page) page.on('close', () => { this.plugins.dispatch('onPageClose', page) }) }) } protected async _bindBrowserEvents(browser: pw.Browser) { debug('_bindPlaywrightBrowserEvents') browser.on('disconnected', () => { this.plugins.dispatch('onDisconnected', browser) }) // Note: `browser.newPage` will implicitly call `browser.newContext` as well browser.newContext = ((originalMethod, ctx) => { return async (options: pw.BrowserContextOptions = {}) => { const contextOptions: pw.BrowserContextOptions = (await this.plugins.dispatchBlocking( 'beforeContext', options, browser )) || options const context = await originalMethod.call(ctx, contextOptions) this._bindBrowserContextEvents(context, contextOptions) return context } })(browser.newContext, browser) } } /** * PlaywrightExtra class with additional launcher methods. * * Augments the class with an instance proxy to pass on methods that are not augmented to the original target. * */ export const PlaywrightExtra = new Proxy(PlaywrightExtraClass, { construct(classTarget, args) { debug(`create instance of ${classTarget.name}`) const result = Reflect.construct(classTarget, args) as PlaywrightExtraClass return new Proxy(result, { get(target, prop) { if (prop in target) { return Reflect.get(target, prop) } debug('proxying property to original launcher: ', prop) return Reflect.get(target.launcher, prop) } }) } }) ================================================ FILE: packages/playwright-extra/src/helper/loader.ts ================================================ import type * as pw from 'playwright-core' /** Node.js module loader helper */ export class Loader { constructor(public moduleName: string, public packageNames: string[]) {} /** * Lazy load a top level export from another module by wrapping it in a JS proxy. * * This allows us to re-export e.g. `devices` from `playwright` while redirecting direct calls * to it to the module version the user has installed, rather than shipping with a hardcoded version. * * If we don't do this and the user doesn't have the target module installed we'd throw immediately when our code is imported. * * We use a "super" Proxy defining all traps, so calls like `Object.keys(playwright.devices).length` will return the correct value. */ public lazyloadExportOrDie(exportName: T) { const that = this const trapHandler = Object.fromEntries( Object.getOwnPropertyNames(Reflect).map((name: any) => [ name, function (target: any, ...args: any[]) { const moduleExport = that.loadModuleOrDie()[exportName] const customTarget = moduleExport as any const result = ((Reflect as any)[name] as any)( customTarget || target, ...args ) return result } ]) ) return new Proxy({}, trapHandler) as TargetModule[T] } /** Load the module if possible */ public loadModule() { return requirePackages(this.packageNames) } /** Load the module if possible or throw */ public loadModuleOrDie(): TargetModule { const module = requirePackages(this.packageNames) if (module) { return module } throw this.requireError } public get requireError() { const moduleNamePretty = this.moduleName.charAt(0).toUpperCase() + this.moduleName.slice(1) return new Error(` ${moduleNamePretty} is missing. :-) I've tried loading ${this.packageNames .map(p => `"${p}"`) .join(', ')} - no luck. Make sure you install one of those packages or use the named 'addExtra' export, to patch a specific (and maybe non-standard) implementation of ${moduleNamePretty}. To get the latest stable version of ${moduleNamePretty} run: 'yarn add ${this.moduleName}' or 'npm i ${this.moduleName}' `) } } export function requirePackages(packageNames: string[]) { for (const name of packageNames) { try { return require(name) as TargetModule } catch (_) { continue // noop } } return } /** Playwright specific module loader */ export const playwrightLoader = new Loader('playwright', [ 'playwright-core', 'playwright' ]) ================================================ FILE: packages/playwright-extra/src/index.ts ================================================ import type * as pw from 'playwright-core' import { PlaywrightExtra, PlaywrightExtraClass } from './extra' import { PluginList } from './plugins' import { playwrightLoader as loader } from './helper/loader' export { PlaywrightExtra, PlaywrightExtraClass } from './extra' export { PluginList } from './plugins' /** A playwright browser launcher */ export type PlaywrightBrowserLauncher = pw.BrowserType<{}> /** A playwright browser launcher with plugin functionality */ export type AugmentedBrowserLauncher = PlaywrightExtraClass & PlaywrightBrowserLauncher /** * The minimum shape we expect from a playwright compatible launcher object. * We intentionally keep this not strict so other custom or compatible launchers can be used. */ export interface PlaywrightCompatibleLauncher { connect(...args: any[]): Promise launch(...args: any[]): Promise } /** Our custom module exports */ interface ExtraModuleExports { PlaywrightExtra: typeof PlaywrightExtra PlaywrightExtraClass: typeof PlaywrightExtraClass PluginList: typeof PluginList addExtra: typeof addExtra chromium: AugmentedBrowserLauncher firefox: AugmentedBrowserLauncher webkit: AugmentedBrowserLauncher } /** Vanilla playwright module exports */ type PlaywrightModuleExports = typeof pw /** * Augment the provided Playwright browser launcher with plugin functionality. * * Using `addExtra` will always create a fresh PlaywrightExtra instance. * * @example * import playwright from 'playwright' * import { addExtra } from 'playwright-extra' * * const chromium = addExtra(playwright.chromium) * chromium.use(plugin) * * @param launcher - Playwright (or compatible) browser launcher */ export const addExtra = ( launcher?: Launcher ) => new PlaywrightExtra(launcher) as PlaywrightExtraClass & Launcher /** * This object can be used to launch or connect to Chromium with plugin functionality. * * This default export will behave exactly the same as the regular playwright * (just with extra plugin functionality) and can be used as a drop-in replacement. * * Behind the scenes it will try to require either the `playwright-core` * or `playwright` module from the installed dependencies. * * @note * Due to Node.js import caching this will result in a single * PlaywrightExtra instance, even when used in different files. If you need multiple * instances with different plugins please use `addExtra`. * * @example * // javascript import * const { chromium } = require('playwright-extra') * * // typescript/es6 module import * import { chromium } from 'playwright-extra' * * // Add plugins * chromium.use(...) */ export const chromium = addExtra((loader.loadModule() || {}).chromium) /** * This object can be used to launch or connect to Firefox with plugin functionality * @note This export will always return the same instance, if you wish to use multiple instances with different plugins use `addExtra` */ export const firefox = addExtra((loader.loadModule() || {}).firefox) /** * This object can be used to launch or connect to Webkit with plugin functionality * @note This export will always return the same instance, if you wish to use multiple instances with different plugins use `addExtra` */ export const webkit = addExtra((loader.loadModule() || {}).webkit) // Other playwright module exports we simply re-export with lazy loading export const _android = loader.lazyloadExportOrDie('_android') export const _electron = loader.lazyloadExportOrDie('_electron') export const request = loader.lazyloadExportOrDie('request') export const selectors = loader.lazyloadExportOrDie('selectors') export const devices = loader.lazyloadExportOrDie('devices') export const errors = loader.lazyloadExportOrDie('errors') /** Playwright with plugin functionality */ const moduleExports: ExtraModuleExports & PlaywrightModuleExports = { // custom exports PlaywrightExtra, PlaywrightExtraClass, PluginList, addExtra, chromium, firefox, webkit, // vanilla exports _android, _electron, request, selectors, devices, errors } export default moduleExports ================================================ FILE: packages/playwright-extra/src/plugins.ts ================================================ import Debug from 'debug' const debug = Debug('playwright-extra:plugins') import { Plugin, PluginMethodName, PluginMethodFn, PluginModule, CompatiblePluginModule } from './types' import { requirePackages } from './helper/loader' import { addPuppeteerCompat } from './puppeteer-compatiblity-shim' export class PluginList { private readonly _plugins: Plugin[] = [] private readonly _dependencyDefaults: Map = new Map() private readonly _dependencyResolution: Map = new Map() constructor() {} /** * Get a list of all registered plugins. */ public get list() { return this._plugins } /** * Get the names of all registered plugins. */ public get names() { return this._plugins.map(p => p.name) } /** * Add a new plugin to the list (after checking if it's well-formed). * * @param plugin * @internal */ public add(plugin: Plugin) { if (!this.isValidPluginInstance(plugin)) { return false } if (!!plugin.onPluginRegistered) { plugin.onPluginRegistered({ framework: 'playwright' }) } // PuppeteerExtraPlugin: Populate `_childClassMembers` list containing methods defined by the plugin if (!!plugin._registerChildClassMembers) { plugin._registerChildClassMembers(Object.getPrototypeOf(plugin)) } if (plugin.requirements?.has('dataFromPlugins')) { plugin.getDataFromPlugins = this.getData.bind(this) } this._plugins.push(plugin) return true } /** Check if the shape of a plugin is correct or warn */ protected isValidPluginInstance(plugin: Plugin) { if ( !plugin || typeof plugin !== 'object' || !plugin._isPuppeteerExtraPlugin ) { console.error( `Warning: Plugin is not derived from PuppeteerExtraPlugin, ignoring.`, plugin ) return false } if (!plugin.name) { console.error( `Warning: Plugin with no name registering, ignoring.`, plugin ) return false } return true } /** Error callback in case calling a plugin method throws an error. Can be overwritten. */ public onPluginError(plugin: Plugin, method: PluginMethodName, err: Error) { console.warn( `An error occured while executing "${method}" in plugin "${plugin.name}":`, err ) } /** * Define default values for plugins implicitly required through the `dependencies` plugin stanza. * * @param dependencyPath - The string by which the dependency is listed (not the plugin name) * * @example * chromium.use(stealth) * chromium.plugins.setDependencyDefaults('stealth/evasions/webgl.vendor', { vendor: 'Bob', renderer: 'Alice' }) */ public setDependencyDefaults(dependencyPath: string, opts: any) { this._dependencyDefaults.set(dependencyPath, opts) return this } /** * Define custom plugin modules for plugins implicitly required through the `dependencies` plugin stanza. * * Using this will prevent dynamic imports from being used, which JS bundlers often have issues with. * * @example * chromium.use(stealth) * chromium.plugins.setDependencyResolution('stealth/evasions/webgl.vendor', VendorPlugin) */ public setDependencyResolution( dependencyPath: string, pluginModule: CompatiblePluginModule ) { this._dependencyResolution.set(dependencyPath, pluginModule) return this } /** * Prepare plugins to be used (resolve dependencies, ordering) * @internal */ public prepare() { this.resolveDependencies() this.order() } /** Return all plugins using the supplied method */ protected filterByMethod(methodName: PluginMethodName) { return this._plugins.filter(plugin => { // PuppeteerExtraPlugin: The base class will already define all methods, hence we need to do a different check if ( !!plugin._childClassMembers && Array.isArray(plugin._childClassMembers) ) { return plugin._childClassMembers.includes(methodName) } return methodName in plugin }) } /** Conditionally add puppeteer compatibility to values provided to the plugins */ protected _addPuppeteerCompatIfNeeded( plugin: Plugin, method: TMethod, args: Parameters> ) { const canUseShim = plugin._isPuppeteerExtraPlugin && !plugin.noPuppeteerShim const methodWhitelist: PluginMethodName[] = [ 'onBrowser', 'onPageCreated', 'onPageClose', 'afterConnect', 'afterLaunch' ] const shouldUseShim = methodWhitelist.includes(method) if (!canUseShim || !shouldUseShim) { return args } debug('add puppeteer compatibility', plugin.name, method) return [...args.map(arg => addPuppeteerCompat(arg as any))] as Parameters< PluginMethodFn > } /** * Dispatch plugin lifecycle events in a typesafe way. * Only Plugins that expose the supplied property will be called. * * Will not await results to dispatch events as fast as possible to all plugins. * * @param method - The lifecycle method name * @param args - Optional: Any arguments to be supplied to the plugin methods * @internal */ public dispatch( method: TMethod, ...args: Parameters> ): void { const plugins = this.filterByMethod(method) debug('dispatch', method, { all: this._plugins.length, filteredByMethod: plugins.length }) for (const plugin of plugins) { try { args = this._addPuppeteerCompatIfNeeded.bind(this)(plugin, method, args) const fnType = plugin[method]?.constructor?.name debug('dispatch to plugin', { plugin: plugin.name, method, fnType }) if (fnType === 'AsyncFunction') { ;(plugin[method] as any)(...args).catch((err: any) => this.onPluginError(plugin, method, err) ) } else { ;(plugin[method] as any)(...args) } } catch (err) { this.onPluginError(plugin, method, err as any) } } } /** * Dispatch plugin lifecycle events in a typesafe way. * Only Plugins that expose the supplied property will be called. * * Can also be used to get a definite return value after passing it to plugins: * Calls plugins sequentially and passes on a value (waterfall style). * * The plugins can either modify the value or return an updated one. * Will return the latest, updated value which ran through all plugins. * * By convention only the first argument will be used as the updated value. * * @param method - The lifecycle method name * @param args - Optional: Any arguments to be supplied to the plugin methods * @internal */ public async dispatchBlocking( method: TMethod, ...args: Parameters> ): Promise>> { const plugins = this.filterByMethod(method) debug('dispatchBlocking', method, { all: this._plugins.length, filteredByMethod: plugins.length }) let retValue: any = null for (const plugin of plugins) { try { args = this._addPuppeteerCompatIfNeeded.bind(this)(plugin, method, args) retValue = await (plugin[method] as any)(...args) // In case we got a return value use that as new first argument for followup function calls if (retValue !== undefined) { args[0] = retValue } } catch (err) { this.onPluginError(plugin, method, err as any) return retValue } } return retValue } /** * Order plugins that have expressed a special placement requirement. * * This is useful/necessary for e.g. plugins that depend on the data from other plugins. * * @private */ protected order() { debug('order:before', this.names) const runLast = this._plugins .filter(p => p.requirements?.has('runLast')) .map(p => p.name) for (const name of runLast) { const index = this._plugins.findIndex(p => p.name === name) this._plugins.push(this._plugins.splice(index, 1)[0]) } debug('order:after', this.names) } /** * Collects the exposed `data` property of all registered plugins. * Will be reduced/flattened to a single array. * * Can be accessed by plugins that listed the `dataFromPlugins` requirement. * * Implemented mainly for plugins that need data from other plugins (e.g. `user-preferences`). * * @see [PuppeteerExtraPlugin]/data * @param name - Filter data by optional name * * @private */ protected getData(name?: string) { const data = this._plugins .filter((p: any) => !!p.data) .map((p: any) => (Array.isArray(p.data) ? p.data : [p.data])) .reduce((acc, arr) => [...acc, ...arr], []) return name ? data.filter((d: any) => d.name === name) : data } /** * Handle `plugins` stanza (already instantiated plugins that don't require dynamic imports) */ protected resolvePluginsStanza() { debug('resolvePluginsStanza') const pluginNames = new Set(this.names) this._plugins .filter(p => !!p.plugins && p.plugins.length) .filter(p => !pluginNames.has(p.name)) // TBD: Do we want to filter out existing? .forEach(parent => { ;(parent.plugins || []).forEach(p => { debug(parent.name, 'adding missing plugin', p.name) this.add(p as Plugin) }) }) } /** * Handle `dependencies` stanza (which requires dynamic imports) * * Plugins can define `dependencies` as a Set or Array of dependency paths, or a Map with additional opts * * @note * - The default opts for implicit dependencies can be defined using `setDependencyDefaults()` * - Dynamic imports can be avoided by providing plugin modules with `setDependencyResolution()` */ protected resolveDependenciesStanza() { debug('resolveDependenciesStanza') /** Attempt to dynamically require a plugin module */ const requireDependencyOrDie = ( parentName: string, dependencyPath: string ) => { // If the user provided the plugin module already we use that if (this._dependencyResolution.has(dependencyPath)) { return this._dependencyResolution.get(dependencyPath) as PluginModule } const possiblePrefixes = ['puppeteer-extra-plugin-'] // could be extended later const isAlreadyPrefixed = possiblePrefixes.some(prefix => dependencyPath.startsWith(prefix) ) const packagePaths: string[] = [] // If the dependency is not already prefixed we attempt to require all possible combinations to find one that works if (!isAlreadyPrefixed) { packagePaths.push( ...possiblePrefixes.map(prefix => prefix + dependencyPath) ) } // We always attempt to require the path verbatim (as a last resort) packagePaths.push(dependencyPath) const pluginModule = requirePackages(packagePaths) if (pluginModule) { return pluginModule } const explanation = ` The plugin '${parentName}' listed '${dependencyPath}' as dependency, which could not be found. Please install it: ${packagePaths .map(packagePath => `yarn add ${packagePath.split('/')[0]}`) .join(`\n or:\n`)} Note: You don't need to require the plugin yourself, unless you want to modify it's default settings. If your bundler has issues with dynamic imports take a look at '.plugins.setDependencyResolution()'. ` console.warn(explanation) throw new Error('Plugin dependency not found') } const existingPluginNames = new Set(this.names) const recursivelyLoadMissingDependencies = ({ name: parentName, dependencies }: Plugin): any => { if (!dependencies) { return } const processDependency = (dependencyPath: string, opts?: any) => { const pluginModule = requireDependencyOrDie(parentName, dependencyPath) opts = opts || this._dependencyDefaults.get(dependencyPath) || {} const plugin = pluginModule(opts) if (existingPluginNames.has(plugin.name)) { debug(parentName, '=> dependency already exists:', plugin.name) return } existingPluginNames.add(plugin.name) debug(parentName, '=> adding new dependency:', plugin.name, opts) this.add(plugin) return recursivelyLoadMissingDependencies(plugin) } if (dependencies instanceof Set || Array.isArray(dependencies)) { return [...dependencies].forEach(dependencyPath => processDependency(dependencyPath) ) } if (dependencies instanceof Map) { // Note: `k,v => v,k` (Map + forEach will reverse the order) return dependencies.forEach((v, k) => processDependency(k, v)) } } this.list.forEach(recursivelyLoadMissingDependencies) } /** * Lightweight plugin dependency management to require plugins and code mods on demand. * @private */ protected resolveDependencies() { debug('resolveDependencies') this.resolvePluginsStanza() this.resolveDependenciesStanza() } } ================================================ FILE: packages/playwright-extra/src/puppeteer-compatiblity-shim/index.ts ================================================ import Debug from 'debug' const debug = Debug('playwright-extra:puppeteer-compat') import type * as pw from 'playwright-core' export type PlaywrightObject = pw.Page | pw.Frame | pw.Browser export interface PuppeteerBrowserShim { isCompatShim?: boolean isPlaywright?: boolean pages?: pw.BrowserContext['pages'] userAgent: () => Promise<'string'> } export interface PuppeteerPageShim { isCompatShim?: boolean isPlaywright?: boolean browser?: () => pw.Browser evaluateOnNewDocument?: pw.Page['addInitScript'] _client: () => pw.CDPSession } export const isPlaywrightPage = (obj: unknown): obj is pw.Page => { return 'unroute' in (obj as pw.Page) } export const isPlaywrightFrame = (obj: unknown): obj is pw.Frame => { return ['parentFrame', 'frameLocator'].every(x => x in (obj as pw.Frame)) } export const isPlaywrightBrowser = (obj: unknown): obj is pw.Browser => { return 'newContext' in (obj as pw.Browser) } export const isPuppeteerCompat = (obj?: unknown): obj is PlaywrightObject => { return !!obj && typeof obj === 'object' && !!(obj as any).isCompatShim } const cache = { objectToShim: new Map(), cdpSession: { page: new Map(), browser: new Map() } } /** Augment a Playwright object with compatibility with certain Puppeteer methods */ export function addPuppeteerCompat< Input extends pw.Page | pw.Frame | pw.Browser | null >(object: Input): Input { if (!object || typeof object !== 'object') { return object } if (cache.objectToShim.has(object)) { return cache.objectToShim.get(object) as Input } if (isPuppeteerCompat(object)) { return object } debug('addPuppeteerCompat', cache.objectToShim.size) if (isPlaywrightPage(object) || isPlaywrightFrame(object)) { const shim = createPageShim(object) cache.objectToShim.set(object, shim) return shim as Input } if (isPlaywrightBrowser(object)) { const shim = createBrowserShim(object) cache.objectToShim.set(object, shim) return shim as Input } debug('Received unknown object:', Reflect.ownKeys(object)) return object } // Only chromium browsers support CDP const dummyCDPClient = { send: async (...args: any[]) => { debug('dummy CDP client called', 'send', args) }, on: (...args: any[]) => { debug('dummy CDP client called', 'on', args) } } as pw.CDPSession export async function getPageCDPSession(page: pw.Page | pw.Frame) { let session = cache.cdpSession.page.get(page) if (session) { debug('getPageCDPSession: use existing') return session } debug('getPageCDPSession: use new') const context = isPlaywrightFrame(page) ? page.page().context() : page.context() try { session = await context.newCDPSession(page) cache.cdpSession.page.set(page, session) return session } catch (err: any) { debug('getPageCDPSession: error while creating session:', err.message) debug( 'getPageCDPSession: Unable create CDP session (most likely a different browser than chromium) - returning a dummy' ) } return dummyCDPClient } export async function getBrowserCDPSession(browser: pw.Browser) { let session = cache.cdpSession.browser.get(browser) if (session) { debug('getBrowserCDPSession: use existing') return session } debug('getBrowserCDPSession: use new') try { session = await browser.newBrowserCDPSession() cache.cdpSession.browser.set(browser, session) return session } catch (err: any) { debug('getBrowserCDPSession: error while creating session:', err.message) debug( 'getBrowserCDPSession: Unable create CDP session (most likely a different browser than chromium) - returning a dummy' ) } return dummyCDPClient } export function createPageShim(page: pw.Page | pw.Frame) { const objId = Math.random().toString(36).substring(2, 7) const shim = new Proxy(page, { get(target, prop) { if (prop === 'isCompatShim' || prop === 'isPlaywright') { return true } debug('page - get', objId, prop) if (prop === '_client') { return () => ({ send: async (method: string, params: any) => { const session = await getPageCDPSession(page) return await session.send(method as any, params) }, on: (event: string, listener: any) => { getPageCDPSession(page).then(session => { session.on(event as any, listener) }) } }) } if (prop === 'setBypassCSP') { return async (enabled: boolean) => { const session = await getPageCDPSession(page) return await session.send('Page.setBypassCSP', { enabled }) } } if (prop === 'setUserAgent') { return async (userAgent: string, userAgentMetadata?: any) => { const session = await getPageCDPSession(page) return await session.send('Emulation.setUserAgentOverride', { userAgent, userAgentMetadata }) } } if (prop === 'browser') { if (isPlaywrightPage(page)) { return () => { let browser = page.context().browser() if (!browser) { debug( 'page.browser() - not available, most likely due to launchPersistentContext' ) // Use a page shim as quick drop-in (so browser.userAgent() still works) browser = page as any } return addPuppeteerCompat(browser) } } } if (prop === 'evaluateOnNewDocument') { if (isPlaywrightPage(page)) { return async function (pageFunction: any | string, ...args: any[]) { return await page.addInitScript(pageFunction, args[0]) } } } // Only relevant when page is being used a pseudo stand-in for the browser object (launchPersistentContext) if (prop === 'userAgent') { return async (enabled: boolean) => { const session = await getPageCDPSession(page) const data = await session.send('Browser.getVersion') return data.userAgent } } return Reflect.get(target, prop) } }) return shim } export function createBrowserShim(browser: pw.Browser) { const objId = Math.random().toString(36).substring(2, 7) const shim = new Proxy(browser, { get(target, prop) { if (prop === 'isCompatShim' || prop === 'isPlaywright') { return true } debug('browser - get', objId, prop) if (prop === 'pages') { return () => browser .contexts() .flatMap(c => c.pages().map(page => addPuppeteerCompat(page))) } if (prop === 'userAgent') { return async () => { const session = await getBrowserCDPSession(browser) const data = await session.send('Browser.getVersion') return data.userAgent } } return Reflect.get(target, prop) } }) return shim } ================================================ FILE: packages/playwright-extra/src/puppeteer-compatiblity-shim/playwright-shim.d.ts ================================================ // Playwright objects extended with puppeteer compatiblity shims import type {} from 'playwright-core' import type { PuppeteerPageShim, PuppeteerBrowserShim } from '.' declare module 'playwright-core' { interface Page extends PuppeteerPageShim {} interface Frame extends PuppeteerPageShim {} interface Browser extends PuppeteerBrowserShim {} } ================================================ FILE: packages/playwright-extra/src/types/index.ts ================================================ import type * as pw from 'playwright-core' type PropType = TObj[TProp] type PluginEnv = { framework: 'playwright' } /** Strongly typed plugin lifecycle events for internal use */ export abstract class PluginLifecycleMethods { async onPluginRegistered(env?: PluginEnv): Promise {} async beforeLaunch( options: pw.LaunchOptions ): Promise {} async afterLaunch(browserOrContext?: pw.Browser | pw.BrowserContext) {} async beforeConnect( options: pw.ConnectOptions ): Promise {} async afterConnect(browser: pw.Browser) {} async onBrowser(browser: pw.Browser) {} async onPageCreated(page: pw.Page) {} async onPageClose(page: pw.Page) {} async onDisconnected(browser?: pw.Browser) {} // Playwright only at the moment async beforeContext( options?: pw.BrowserContextOptions, browser?: pw.Browser ): Promise {} async onContextCreated( context?: pw.BrowserContext, options?: pw.BrowserContextOptions ) {} } /** A valid plugin method name */ export type PluginMethodName = keyof PluginLifecycleMethods /** A valid plugin method function */ export type PluginMethodFn = PropType< PluginLifecycleMethods, TName > type PluginRequirements = Set< 'launch' | 'headful' | 'dataFromPlugins' | 'runLast' > // PuppeteerExtraPlugin only supports Set, the others are future proofing type PluginDependencies = Set | Map | string[] interface PluginData { name: | string // below is compat with a previously incorrect typing | { [key: string]: any } value: { [key: string]: any } } export interface CompatiblePluginLifecycleMethods { onPluginRegistered(...any: any[]): Promise | any beforeLaunch(...any: any[]): Promise | any afterLaunch(...any: any[]): Promise | any beforeConnect(...any: any[]): Promise | any afterConnect(...any: any[]): Promise | any onBrowser(...any: any[]): Promise | any onPageCreated(...any: any[]): Promise | any onPageClose(...any: any[]): Promise | any onDisconnected(...any: any[]): Promise | any // Playwright only at the moment beforeContext(...any: any[]): Promise | any onContextCreated(...any: any[]): Promise | any } /** * PuppeteerExtraPlugin interface, strongly typed for internal use * @private */ export interface PuppeteerExtraPlugin extends Partial { _isPuppeteerExtraPlugin: boolean name: string /** Disable the puppeteer compatibility shim for this plugin */ noPuppeteerShim?: boolean requirements?: PluginRequirements dependencies?: PluginDependencies data?: PluginData[] getDataFromPlugins?(name?: string): void _registerChildClassMembers?(prototype: any): void _childClassMembers?: string[] plugins?: CompatiblePlugin[] // [propName: string]: any } /** * Minimal compatible PuppeteerExtraPlugin interface * @private */ export interface CompatiblePuppeteerPlugin extends Partial { _isPuppeteerExtraPlugin: boolean name?: string } // Future proofing export interface CompatiblePlaywrightPlugin extends Partial { _isPlaywrightExtraPlugin: boolean name?: string } // Future proofing export interface CompatibleExtraPlugin extends Partial { _isExtraPlugin: boolean name?: string } /** * A compatible plugin */ export type CompatiblePlugin = | CompatiblePuppeteerPlugin | CompatiblePlaywrightPlugin | CompatibleExtraPlugin export type CompatiblePluginModule = (...args: any[]) => CompatiblePlugin export type Plugin = PuppeteerExtraPlugin export type PluginModule = (...args: any[]) => Plugin ================================================ FILE: packages/playwright-extra/test/exports.spec.ts ================================================ import { test, expect } from './fixtures/extra' test('should export the basic functionality', async ({ playwrightExtra }) => { expect(playwrightExtra.addExtra).toBeDefined() expect(playwrightExtra.chromium).toBeDefined() expect(playwrightExtra.chromium.use).toBeDefined() expect(playwrightExtra.chromium.plugins).toBeDefined() expect(playwrightExtra.chromium.plugins.list).toBeDefined() expect(playwrightExtra.chromium.plugins.names).toBeDefined() expect(playwrightExtra.chromium.plugins.onPluginError).toBeDefined() expect(playwrightExtra.chromium.launch).toBeDefined() expect(playwrightExtra.chromium.launchPersistentContext).toBeDefined() expect(playwrightExtra.chromium.connect).toBeDefined() expect(playwrightExtra.chromium.connectOverCDP).toBeDefined() expect(playwrightExtra.firefox).toBeDefined() expect(playwrightExtra.firefox.use).toBeDefined() expect(playwrightExtra.firefox.launch).toBeDefined() expect(playwrightExtra.firefox.connect).toBeDefined() expect(playwrightExtra.webkit).toBeDefined() expect(playwrightExtra.webkit.use).toBeDefined() expect(playwrightExtra.webkit.launch).toBeDefined() expect(playwrightExtra.webkit.connect).toBeDefined() expect((playwrightExtra as any).nonexistent).toBeUndefined() }) test('chromium export should be well formed', async ({ playwrightExtra }) => { const { chromium } = playwrightExtra expect(typeof chromium).toBe('object') expect(typeof chromium.use).toBe('function') expect(typeof chromium.launch).toBe('function') expect(typeof chromium.connect).toBe('function') expect(typeof chromium.name).toBe('function') expect(typeof chromium.name()).toBe('string') expect(chromium.constructor.name).toBe('PlaywrightExtraClass') }) test('addExtra export should be well formed', async ({ playwrightExtra }) => { const { addExtra } = playwrightExtra expect(typeof addExtra).toBe('function') const launcher = addExtra() expect(typeof launcher).toBe('object') expect(launcher.constructor.name).toBe('PlaywrightExtraClass') }) test('should re-export the same additional exports verbatim', async ({ playwrightExtra, playwrightVanilla }) => { expect(playwrightExtra.errors).toStrictEqual(playwrightVanilla.errors) expect(playwrightExtra.devices).toStrictEqual(playwrightVanilla.devices) expect(playwrightExtra.selectors).toStrictEqual(playwrightVanilla.selectors) expect(playwrightExtra.request).toStrictEqual(playwrightVanilla.request) }) ================================================ FILE: packages/playwright-extra/test/fixtures/dummyplugin.ts ================================================ import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' export class DummyPlugin extends PuppeteerExtraPlugin { public pluginEventList: string[] = [] public pluginEventMap: Map = new Map() constructor(opts = {}) { super(opts) } get name() { return 'dummy' } async onPluginRegistered(...args: any[]) { this.pluginEventList.push('onPluginRegistered') } async beforeLaunch(...args: any[]) { this.pluginEventList.push('beforeLaunch') } async afterLaunch(...args: any[]) { this.pluginEventList.push('afterLaunch') } async beforeConnect(...args: any[]) { this.pluginEventList.push('beforeConnect') } async afterConnect(...args: any[]) { this.pluginEventList.push('afterConnect') } async onBrowser(...args: any[]) { this.pluginEventList.push('onBrowser') } async onTargetCreated(...args: any[]) { this.pluginEventList.push('onTargetCreated') } async onPageCreated(...args: any[]) { this.pluginEventList.push('onPageCreated') } async onTargetChanged(...args: any[]) { this.pluginEventList.push('onTargetChanged') } async onTargetDestroyed(...args: any[]) { this.pluginEventList.push('onTargetDestroyed') } async onDisconnected(...args: any[]) { this.pluginEventList.push('onDisconnected') } async onClose(...args: any[]) { this.pluginEventList.push('onClose') } // playwright only at the moment async beforeContext(...args: any[]) { this.pluginEventList.push('beforeContext') } async onContextCreated(...args: any[]) { this.pluginEventList.push('onContextCreated') } } ================================================ FILE: packages/playwright-extra/test/fixtures/extra.ts ================================================ // Playwrights test runner is great, originally based on folio (which unfortunately isn't maintained anymore): https://github.com/microsoft/folio import { test as base } from '@playwright/test' import * as pwTest from '@playwright/test' import * as pwExtraModule from '../../src' import * as pwVanillaModule from 'playwright-core' type PluginModuleWithOptions = { module: any; opts?: Record } export type ExtraOptions = {} export type ExtraFixtures = { /** playwright-extra module */ playwrightExtra: typeof pwExtraModule /** playwright-core module */ playwrightVanilla: typeof pwVanillaModule /** Augmented launcher */ extraLauncher: pwExtraModule.AugmentedBrowserLauncher } type WorkerFixtures = { _connectedBrowser: pwTest.Browser | undefined _browserOptions: pwTest.LaunchOptions _artifactsDir: () => string _snapshotSuffix: string plugins: PluginModuleWithOptions[] } export const worker = base.extend<{}, WorkerFixtures>({ plugins: [[], { option: true, scope: 'worker' as any }], browser: async ( { playwright, browserName, _connectedBrowser, plugins }, use ) => { if (_connectedBrowser) { await use(_connectedBrowser) return } if (!['chromium', 'firefox', 'webkit'].includes(browserName)) throw new Error( `Unexpected browserName "${browserName}", must be one of "chromium", "firefox" or "webkit"` ) const launcher = pwExtraModule.addExtra(playwright[browserName]) plugins.forEach(({ module: pluginModule, opts }) => { launcher.use(pluginModule(opts)) }) const browser = await launcher.launch() ;(browser as any)._launcher = launcher await use(browser as any) await browser.close() } }) // Extend base test by providing "todoPage" and "settingsPage". // This new "test" can be used in multiple test files, and each of them will get the fixtures. export const test = worker.extend({ extraLauncher: async ( { plugins, playwrightExtra, playwrightVanilla, browserName }, use ) => { const launcher = playwrightExtra.addExtra(playwrightVanilla[browserName]) plugins.forEach(({ module: pluginModule, opts }) => { launcher.use(pluginModule(opts)) }) await use(launcher) }, playwrightExtra: async ({}, use) => { await use(pwExtraModule) }, playwrightVanilla: async ({}, use) => { await use(pwVanillaModule) } }) export { expect } from '@playwright/test' ================================================ FILE: packages/playwright-extra/test/playwright.config.ts ================================================ import { type PlaywrightTestConfig } from '@playwright/test' const config: PlaywrightTestConfig = { retries: 3, workers: 3, use: { browserName: 'chromium' }, projects: [ { name: 'chromium', use: { browserName: 'chromium', launchOptions: { chromiumSandbox: process.env.CI ? false : true, args: process.env.CI ? ['--no-sandbox', '--disable-setuid-sandbox'] : [] } } }, { name: 'firefox', use: { browserName: 'firefox' } }, { name: 'webkit', use: { browserName: 'webkit' // Note: webkit doesn't support --no-sandbox } } ] } export default config ================================================ FILE: packages/playwright-extra/test/plugin-events.spec.ts ================================================ import { test, expect } from './fixtures/extra' import { DummyPlugin } from './fixtures/dummyplugin' test.use({ plugins: [{ module: (opts: any) => new DummyPlugin(opts) }] }) test('emits correct events for launch', async ({ extraLauncher }) => { const browser = await extraLauncher.launch() const context = await browser.newContext() const page = await context.newPage() await page.close() await browser.close() const plugin = extraLauncher.plugins.list[0] as unknown as DummyPlugin expect(plugin.pluginEventList).toStrictEqual([ 'onPluginRegistered', 'beforeLaunch', 'onBrowser', 'afterLaunch', 'beforeContext', 'onContextCreated', 'onPageCreated', 'onDisconnected' ]) }) test('emits correct events for launch without .newContext()', async ({ extraLauncher }) => { const browser = await extraLauncher.launch() const page = await browser.newPage() await page.close() await browser.close() const plugin = extraLauncher.plugins.list[0] as unknown as DummyPlugin expect(plugin.pluginEventList).toStrictEqual([ 'onPluginRegistered', 'beforeLaunch', 'onBrowser', 'afterLaunch', 'beforeContext', 'onContextCreated', 'onPageCreated', 'onDisconnected' ]) }) test('emits correct events for launchPersistentContext', async ({ extraLauncher }) => { const context = await extraLauncher.launchPersistentContext('') const page = await context.newPage() await page.close() await context.close() const plugin = extraLauncher.plugins.list[0] as unknown as DummyPlugin expect(plugin.pluginEventList).toStrictEqual([ 'onPluginRegistered', 'beforeLaunch', 'afterLaunch', 'onContextCreated', 'onPageCreated', 'onDisconnected' ]) }) test('emits correct events for connect', async ({ extraLauncher }) => { const server = await extraLauncher.launchServer() const browser = await extraLauncher.connect(server.wsEndpoint()) const context = await browser.newContext() const page = await context.newPage() await page.close() await browser.close() await server.close() const plugin = extraLauncher.plugins.list[0] as unknown as DummyPlugin expect(plugin.pluginEventList).toStrictEqual([ 'onPluginRegistered', 'beforeConnect', 'onBrowser', 'afterConnect', 'beforeContext', 'onContextCreated', 'onPageCreated', 'onDisconnected' ]) }) test('emits correct events for connectOverCDP', async ({ extraLauncher, browserName }) => { test.skip(browserName !== 'chromium', 'Chromium only') const server = await extraLauncher.launchServer({ args: ['--remote-debugging-port=9333'] }) const browser = await extraLauncher.connectOverCDP('http://localhost:9333') const context = await browser.newContext() const page = await context.newPage() await page.close() await browser.close() await server.close() const plugin = extraLauncher.plugins.list[0] as unknown as DummyPlugin expect(plugin.pluginEventList).toStrictEqual([ 'onPluginRegistered', 'beforeConnect', 'onBrowser', 'afterConnect', 'beforeContext', 'onContextCreated', 'onPageCreated', 'onDisconnected' ]) }) ================================================ FILE: packages/playwright-extra/test/puppeteer-plugins/anonymize-ua.spec.ts ================================================ import { test, expect } from '../fixtures/extra' import AnonymizeUAPlugin from 'puppeteer-extra-plugin-anonymize-ua' test('puppeteer-extra-plugin-anonymize-ua will remove headless', async ({ browserName, extraLauncher, _browserOptions }) => { test.skip(browserName !== 'chromium', 'Chromium only') const pluginErrors = [] extraLauncher.plugins.onPluginError = (plugin, method, err) => { pluginErrors.push(err) } extraLauncher.use(AnonymizeUAPlugin()) expect(extraLauncher.plugins.list.length).toEqual(1) expect(extraLauncher.plugins.list[0].name).toEqual('anonymize-ua') const browser = await extraLauncher.launch(_browserOptions) const context = await browser.newContext() const page = await context.newPage() await page.goto('https://example.com') const ua = await page.evaluate(() => navigator.userAgent) expect(ua.includes('Headless')).toBeFalsy() expect(pluginErrors).toStrictEqual([]) await browser.close() }) test('puppeteer-extra-plugin-anonymize-ua will allow a custom UA', async ({ browserName, extraLauncher, _browserOptions }) => { test.skip(browserName !== 'chromium', 'Chromium only') const pluginErrors = [] extraLauncher.plugins.onPluginError = (plugin, method, err) => { pluginErrors.push(err) } extraLauncher.use( AnonymizeUAPlugin({ customFn: ua => 'MyCoolUserAgent' }) ) expect(extraLauncher.plugins.list.length).toEqual(1) expect(extraLauncher.plugins.list[0].name).toEqual('anonymize-ua') const browser = await extraLauncher.launch(_browserOptions) const context = await browser.newContext() const page = await context.newPage() await page.goto('https://example.com') const ua = await page.evaluate(() => navigator.userAgent) expect(ua).toBe('MyCoolUserAgent') expect(pluginErrors).toStrictEqual([]) await browser.close() }) ================================================ FILE: packages/playwright-extra/test/puppeteer-plugins/recaptcha.spec.ts ================================================ import { test, expect } from '../fixtures/extra' import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' // Supports all browsers test('puppeteer-extra-plugin-recaptcha will detect captchas', async ({ extraLauncher, _browserOptions }) => { const pluginErrors = [] extraLauncher.plugins.onPluginError = (plugin, method, err) => { pluginErrors.push(err) } const instance = RecaptchaPlugin() extraLauncher.use(instance) expect(extraLauncher.plugins.list.length).toEqual(1) expect(extraLauncher.plugins.list[0].name).toEqual(instance.name) const url = 'https://berstend.github.io/static/recaptcha/v2-checkbox-auto.html' const browser = await extraLauncher.launch(_browserOptions) const context = await browser.newContext() const page = await context.newPage() await page.goto(url) const { captchas, error } = await (page as any).findRecaptchas() expect(error).toBeFalsy() expect(captchas).toBeTruthy() expect(captchas.length).toBe(1) const captcha = captchas[0] expect(captcha._vendor).toBe('recaptcha') expect(captcha._type).toBe('checkbox') expect(captcha.url).toBe(url) expect(captcha.id).toBeTruthy() expect(captcha.sitekey).toBeTruthy() expect(pluginErrors).toStrictEqual([]) await browser.close() }) test('puppeteer-extra-plugin-recaptcha will solve captchas', async ({ extraLauncher, _browserOptions }) => { test.skip(!process.env.TWOCAPTCHA_TOKEN, 'TWOCAPTCHA_TOKEN not set') test.slow() const pluginErrors = [] extraLauncher.plugins.onPluginError = (plugin, method, err) => { pluginErrors.push(err) } const instance = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN } }) extraLauncher.use(instance) expect(extraLauncher.plugins.list.length).toEqual(1) expect(extraLauncher.plugins.list[0].name).toEqual(instance.name) const url = 'https://www.google.com/recaptcha/api2/demo' const browser = await extraLauncher.launch(_browserOptions) const context = await browser.newContext() const page = await context.newPage() await page.goto(url, { waitUntil: 'networkidle' }) const { solved, error } = await (page as any).solveRecaptchas() expect(error).toBeFalsy() expect(solved).toBeTruthy() expect(solved.length).toBe(1) await Promise.all([ page.waitForNavigation({ waitUntil: 'networkidle' }), page.click(`#recaptcha-demo-submit`) ]) const content = await page.content() expect(content).toMatch('Verification Success... Hooray!') expect(pluginErrors).toStrictEqual([]) await browser.close() }) ================================================ FILE: packages/playwright-extra/test/puppeteer-plugins/stealth.spec.ts ================================================ import { test, expect } from '../fixtures/extra' import StealthPlugin from 'puppeteer-extra-plugin-stealth' test('puppeteer-extra-plugin-stealth will work', async ({ browserName, extraLauncher, _browserOptions }) => { test.skip(browserName !== 'chromium', 'Chromium only') const pluginErrors = [] extraLauncher.plugins.onPluginError = (plugin, method, err) => { pluginErrors.push(err) } extraLauncher.use(StealthPlugin()) expect(extraLauncher.plugins.list.length).toEqual(1) expect(extraLauncher.plugins.list[0].name).toEqual('stealth') extraLauncher.plugins.setDependencyDefaults('stealth/evasions/webgl.vendor', { vendor: 'Bob', renderer: 'Alice' }) const browser = await extraLauncher.launch(_browserOptions) const context = await browser.newContext() const page = await context.newPage() await page.goto('https://example.com') const webgl = await page.evaluate(getWebglUnmasked) expect(webgl).toStrictEqual({ renderer: 'Alice', vendor: 'Bob' }) expect(pluginErrors).toStrictEqual([]) await browser.close() }) function getWebglUnmasked() { const gl = document.createElement('canvas').getContext('webgl') as any if (!gl) { return { error: 'no webgl' } } const debugInfo = gl.getExtension('WEBGL_debug_renderer_info') if (debugInfo) { return { vendor: gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL), renderer: gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL) } } return { error: 'no WEBGL_debug_renderer_info' } } ================================================ FILE: packages/playwright-extra/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "dom"], "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": true, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": ["./src/**/*.tsx", "./src/**/*.ts"], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/plugin-proxy-router/package.json ================================================ { "name": "@extra/proxy-router", "version": "3.1.6", "description": "A plugin for playwright & puppeteer to route proxies dynamically.", "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/plugin-proxy-router", "author": "berstend", "license": "MIT", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "typings": "dist/index.d.ts", "files": [ "dist" ], "publishConfig": { "access": "public" }, "scripts": { "clean": "rimraf dist/*", "tscheck": "tsc --pretty --noEmit", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup", "build:tsc": "tsc --project tsconfig.json --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "node -e 0", "test": "run-s build", "pretest-ci": "run-s build", "test-ci": "run-s build" }, "engines": { "node": ">=14" }, "prettier": { "printWidth": 80, "semi": false, "singleQuote": true }, "keywords": [ "puppeteer", "playwright", "puppeteer-extra", "playwright-extra", "proxy", "proxy-router", "headless", "luminati" ], "devDependencies": { "@types/debug": "^4.1.5", "@types/node": "14.17.6", "@types/puppeteer": "*", "ava": "2.4.0", "copyfiles": "^2.1.1", "npm-run-all": "^4.1.5", "playwright-core": "1.24.2", "prettier": "^2.7.1", "puppeteer": "^15.5.0", "puppeteer-extra": "^3.3.6", "replace-in-files-cli": "^0.3.1", "rimraf": "^3.0.0", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "ts-node": "^8.5.4", "typescript": "^4.7.4" }, "dependencies": { "debug": "^4.1.1", "merge-deep": "^3.0.2", "proxy-chain": "^2.0.6", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } } } ================================================ FILE: packages/plugin-proxy-router/readme.md ================================================ # @extra/proxy-router [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push)](https://github.com/berstend/puppeteer-extra/actions) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/@extra/proxy-router.svg)](https://www.npmjs.com/package/@extra/proxy-router) > A plugin for [playwright-extra] and [puppeteer-extra] to route proxies dynamically. ## Install ```bash yarn add @extra/proxy-router # - or - npm install @extra/proxy-router ```
Playwright If this is your first [playwright-extra] plugin here's everything you need: ```bash yarn add playwright playwright-extra @extra/proxy-router # - or - npm install playwright playwright-extra @extra/proxy-router ```
Puppeteer If this is your first [puppeteer-extra] plugin here's everything you need: ```bash yarn add puppeteer puppeteer-extra @extra/proxy-router # - or - npm install puppeteer puppeteer-extra @extra/proxy-router ```
### Compatibility | 💫 | [Chrome](#)
Chromium | [Chrome](#)
Chrome | [Firefox](#)
Firefox | [Webkit](#)
Webkit | | :--------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------: | | **[playwright-extra]** | ✅ | ✅ | ✅ | ✅ | | **[puppeteer-extra]** | ✅ | ✅ | [🕒](https://github.com/berstend/puppeteer-extra/wiki/Is-Puppeteer-Firefox-ready-yet%3F) | - | | Headless | Headful | Launch | Connect | | :------: | :-----: | :----: | :------------------------------: | | ✅ | ✅ | ✅ | ✅ (local) | ### Features The plugin makes using proxies in the browser a lot more convenient: - Handles proxy authentication - Multiple proxies can be used - Flexible proxy routing using the host/domain - Change proxies dynamically after browser launch - Collect traffic stats per proxy or host - Uses native browser features, no performance loss ## Usage > Using puppeteer? To use the following playwright examples simply change your [imports](#imports) ### Simple A single proxy for all browser connections ```js // playwright-extra is a drop-in replacement for playwright, // it augments the installed playwright with plugin functionality // Note: Instead of chromium you can use firefox and webkit as well. const { chromium } = require('playwright-extra') // Configure and add the proxy router plugin with a default proxy const ProxyRouter = require('@extra/proxy-router') chromium.use( ProxyRouter({ proxies: { DEFAULT: 'http://user:pass@proxyhost:port' }, }) ) // That's it, the default proxy will be used and proxy authentication handled automatically chromium.launch({ headless: false }).then(async (browser) => { const page = await browser.newPage() await page.goto('https://canhazip.com', { waitUntil: 'domcontentloaded' }) const ip = await page.evaluate('document.body.innerText') console.log('Outbound IP:', ip) await browser.close() }) ``` ### Dynamic routing Use multiple proxies and route connections flexibly ```js // playwright-extra is a drop-in replacement for playwright, // it augments the installed playwright with plugin functionality // Note: Instead of chromium you can use firefox and webkit as well. const { chromium } = require('playwright-extra') // Configure the proxy router plugin const ProxyRouter = require('@extra/proxy-router') const proxyRouter = ProxyRouter({ // define the available proxies (replace this with your proxies) proxies: { // the default browser proxy, can be `null` as well for direct connections DEFAULT: 'http://user:pass@proxyhost:port', // optionally define more proxies you can use in `routeByHost` // you can use whatever names you'd like for them DATACENTER: 'http://user:pass@proxyhost2:port', RESIDENTIAL_US: 'http://user:pass@proxyhost3:port', }, // optional function for flexible proxy routing // if this is not specified the `DEFAULT` proxy will be used for all connections routeByHost: async ({ host }) => { if (['pagead2.googlesyndication.com', 'fonts.gstatic.com'].includes(host)) { return 'ABORT' // block connection to certain hosts } if (host.includes('google')) { return 'DIRECT' // use a direct connection for all google domains } if (host.endsWith('.tile.openstreetmap.org')) { return 'DATACENTER' // route heavy images through datacenter proxy } if (host === 'canhazip.com') { return 'RESIDENTIAL_US' // special proxy for this domain } // everything else will use `DEFAULT` proxy }, }) // Add the plugin chromium.use(proxyRouter) // Launch a browser and run some IP checks chromium.launch({ headless: true }).then(async (browser) => { const page = await browser.newPage() await page.goto('https://showmyip.com/', { waitUntil: 'domcontentloaded' }) const ip1 = await page.evaluate("document.querySelector('#ipv4').innerText") console.log('Outbound IP #1:', ip1) // => 77.191.128.0 (the DEFAULT proxy) await page.goto('https://canhazip.com', { waitUntil: 'domcontentloaded' }) const ip2 = await page.evaluate('document.body.innerText') console.log('Outbound IP #2:', ip2) // => 104.179.129.27 (the RESIDENTIAL_US proxy) console.log(proxyRouter.stats.connectionLog) // list of connections (host => proxy name) // { id: 0, proxy: 'DIRECT', host: 'accounts.google.com' }, // { id: 1, proxy: 'DEFAULT', host: 'www.showmyip.com' }, // { id: 2, proxy: 'ABORT', host: 'pagead2.googlesyndication.com' }, // { id: 3, proxy: 'DEFAULT', host: 'unpkg.com' }, // ... console.log(proxyRouter.stats.byProxy) // bytes used by proxy // { // DATACENTER: 441734, // DEFAULT: 125823, // DIRECT: 100457, // RESIDENTIAL_US: 4764, // ABORT: 0 // } console.log(proxyRouter.stats.byHost) // bytes used by host // { // 'a.tile.openstreetmap.org': 150685, // 'c.tile.openstreetmap.org': 147054, // 'b.tile.openstreetmap.org': 143995, // 'unpkg.com': 57621, // 'www.googletagmanager.com': 49572, // 'www.showmyip.com': 40408, // ... await browser.close() }) ``` ### Imports
Usage with Puppeteer
> The code is essentially the same as the playwright example above. :-) Just change the import and package name: ```diff - const { chromium } = require('playwright-extra') + const puppeteer = require('puppeteer-extra') // ... - chromium.use(proxyRouter) + puppeteer.use(proxyRouter) // ... - chromium.launch() + puppeteer.launch() // ... ```
Typescript & ESM
> The plugin is written in Typescript and ships with types. **Playwright:** ```js // You can use any browser: chromium, firefox, webkit import { firefox } from 'playwright-extra' import ProxyRouter from '@extra/proxy-router' // ... firefox.use(proxyRouter) ``` **Puppeteer:** ```js import puppeteer from 'puppeteer-extra' import ProxyRouter from '@extra/proxy-router' // ... puppeteer.use(proxyRouter) ```
### Debug logs If you'd like to see debug output just run your script like so: ```bash # macOS/Linux (Bash) DEBUG=*proxy-router* node myscript.js # Windows (Powershell) $env:DEBUG='*proxy-router*';node myscript.js ``` ## How it works The proxy router will launch a local proxy server and instruct the browser to use it. That local proxy server will in turn connect to the configured upstream proxy servers and relay connections depending on the optional user-defined routing function, while handling upstream proxy authentication and a few other things. ## API ### Options ```ts export interface ProxyRouterOpts { /** * A dictionary of proxies to be made available to the browser and router. * * An optional entry named `DEFAULT` will be used for all requests, unless overriden by `routeByHost`. * If the `DEFAULT` entry is omitted no proxy will be used by default. * * The value of an entry can be a string (format: `http://user:pass@proxyhost:port`) or `null` (direct connection). * Proxy authentication is handled automatically by the router. * * @example * proxies: { * DEFAULT: "http://user:pass@proxyhost:port", // use this proxy by default * RESIDENTIAL_US: "http://user:pass@proxyhost2:port" // use this for specific hosts with `routeByHost` * } */ proxies?: { /** * The default proxy for the browser (format: `http://user:pass@proxyhost:port`), * if omitted or `null` no proxy will be used by default */ DEFAULT?: string | null /** * Any other custom proxy names which can be used for routing later * (e.g. `'DATACENTER_US': 'http://user:pass@proxyhost:port'`) */ [key: string]: string | null } /** * An optional function to allow proxy routing based on the target host of the request. * * A return value of nothing, `null` or `DEFAULT` will result in the DEFAULT proxy being used as configured. * A return value of `DIRECT` will result in no proxy being used. * A return value of `ABORT` will cancel/block this request. * * Any other string as return value is assumed to be a reference to the configured `proxies` dict. * * @note The browser will most often establish only a single proxy connection per host. * * @example * routeByHost: async ({ host }) => { * if (host.includes('google')) { return "DIRECT" } * return 'RESIDENTIAL_US' * } * */ routeByHost?: RouteByHostFn /** Collect traffic and connection stats, default: true */ collectStats?: boolean /** Don't print any proxy connection errors to stderr, default: false */ muteProxyErrors?: boolean /** Suppress proxy errors for specific hosts */ muteProxyErrorsForHost?: string[] /** Options for the local proxy-chain server */ proxyServerOpts?: ProxyServerOpts /** * Optionally exempt hosts from going through a proxy, even our internal routing proxy. * * Examples: * `.com` or `chromium.org` or `.domain.com` * * @see * https://chromium.googlesource.com/chromium/src/+/HEAD/net/docs/proxy.md#proxy-bypass-rules * https://www-archive.mozilla.org/quality/networking/docs/aboutno_proxy_for.html */ proxyBypassList?: string[] } ``` ## Alternatives ### Proxy.pac files [Reference](https://developer.mozilla.org/en-US/docs/Web/HTTP/Proxy_servers_and_tunneling/Proxy_Auto-Configuration_PAC_file) - Only supported in chromium in headful mode - Despite the name (`FindProxyForURL`) can only route by host - Firefox supports PAC files and including the path through a pref - Only loaded once at browser launch, no dynamic proxies possible - Does not handle authentication ### Various "per-page proxy" plugins for puppeteer - Advantage: Route proxies by page not host - They rely on a massive hack: Using Node.js to send the requests instead of the browser - Will change the TLS fingerprint, error prone - Uses CDP request interception which is chromium only - Increased latency and resource overhead ## License Copyright © 2018 - 2023, [berstend̡̲̫̹̠̖͚͓̔̄̓̐̄͛̀͘](https://github.com/berstend). Released under the MIT License. [playwright-extra]: https://github.com/berstend/puppeteer-extra/tree/master/packages/playwright-extra [puppeteer-extra]: https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra ================================================ FILE: packages/plugin-proxy-router/rollup.config.ts ================================================ import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() const defaultExportOutro = ` module.exports = exports.default || {} Object.entries(exports).forEach(([key, value]) => { module.exports[key] = value }) ` export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, exports: 'named', outro: defaultExportOutro, banner }, { file: pkg.module, format: 'es', sourcemap: true, exports: 'named', banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}) ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) // commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve(), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/plugin-proxy-router/src/index.ts ================================================ import { ExtraPluginProxyRouter, ExtraPluginProxyRouterOptions } from './plugin' export * from './plugin' export * from './router' export * from './stats' /** Default export, ExtraPluginProxyRouter */ const defaultExport = (options?: Partial) => { return new ExtraPluginProxyRouter(options || {}) } export default defaultExport ================================================ FILE: packages/plugin-proxy-router/src/plugin.ts ================================================ import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' import { ProxyRouter, ProxyRouterOpts } from './router' export type ExtraPluginProxyRouterOptions = ProxyRouterOpts & { /** * Optionally exempt hosts from going through a proxy, even our internal routing proxy. * * Examples: * `.com` or `chromium.org` or `.domain.com` * * @see * https://chromium.googlesource.com/chromium/src/+/HEAD/net/docs/proxy.md#proxy-bypass-rules * https://www-archive.mozilla.org/quality/networking/docs/aboutno_proxy_for.html */ proxyBypassList?: string[] } export class ExtraPluginProxyRouter extends PuppeteerExtraPlugin { /** The underlying proxy router instance */ public router: ProxyRouter /** The name of the automation framework used */ public framework: 'playwright' | 'puppeteer' | null = null // Disable the puppeteer compat shim when used with playwright-extra public noPuppeteerShim = true constructor(opts: Partial) { super(opts) this.debug('Initialized', this.opts) this.router = new ProxyRouter(this.opts) } get name() { return 'proxy-router' } get defaults(): ExtraPluginProxyRouterOptions { return { collectStats: true, proxyServerOpts: { port: 2800, }, } } // Make accessing router methods shorter /** Get or set proxies at runtime */ public get proxies() { return this.router.proxies } public set proxies(proxies) { this.router.proxies = proxies } /** Retrieve traffic statistics */ public get stats() { return this.router.stats } /** Get or set the `routeByHost` function at runtime */ public get routeByHost() { return this.router.routeByHost } public set routeByHost(fn) { this.router.routeByHost = fn } private get proxyBypassListString() { return (this.opts.proxyBypassList || []).join(',') || undefined } async onPluginRegistered(args?: { framework: 'playwright' }): Promise { this.framework = args?.framework === 'playwright' ? 'playwright' : 'puppeteer' this.debug('plugin registered', this.framework) } async beforeLaunch(options: unknown = {}): Promise { this.debug('beforeLaunch - before', options) await this.router.listen() const proxyUrl = this.router.proxyServerUrl if (!proxyUrl) { throw new Error('No local proxy server available') } if (this.framework === 'playwright') { const pwOptions = options as PlaywrightLaunchOptions pwOptions.proxy = { server: proxyUrl, bypass: this.proxyBypassListString, } } else if (this.framework === 'puppeteer') { const pptrOptions = options as PuppeteerLaunchOptions pptrOptions.args = pptrOptions.args || [] pptrOptions.args.push(`--proxy-server=${proxyUrl}`) if (this.proxyBypassListString) { pptrOptions.args.push( `--proxy-bypass-list=${this.proxyBypassListString}` ) } } else { this.debug('Unsupported framework, not setting proxy') } this.debug('beforeLaunch - after', options) } async onDisconnected(): Promise { await this.router.close().catch(this.debug) } } interface PuppeteerLaunchOptions { args?: string[] } interface PlaywrightLaunchOptions { proxy?: { /** * Proxy to be used for all requests. HTTP and SOCKS proxies are supported, for example `http://myproxy.com:3128` or * `socks5://myproxy.com:3128`. Short form `myproxy.com:3128` is considered an HTTP proxy. */ server: string /** * Optional comma-separated domains to bypass proxy, for example `".com, chromium.org, .domain.com"`. */ bypass?: string } } ================================================ FILE: packages/plugin-proxy-router/src/router.ts ================================================ import { Server as ProxyServer, RequestError, redactUrl } from 'proxy-chain' import type * as ProxyChain from 'proxy-chain' import getPort from './utils/port' import { ProxyRouterStats } from './stats' import Debug from 'debug' const debug = Debug('puppeteer-extra:proxy-router') const debugVerbose = debug.extend('verbose') const warn = console.warn.bind(console, `\n[proxy-router] %s`) // Preserves line numbers type ProxyServerOpts = ConstructorParameters[0] export interface Proxies { /** The default proxy for the browser (format: `http://user:pass@proxyhost:port`), if omitted or `null` no proxy will be used by default */ DEFAULT?: string | null /** Any other custom proxy names which can be used for routing later (e.g. `'DATACENTER_US': 'http://user:pass@proxyhost:port'`) */ [key: string]: string | null } export type ProxyName = 'DIRECT' | 'DEFAULT' | 'ABORT' | string /** Data available to the `routeByHost` function */ export interface RouteByHostArgs { /** Request URL host */ host: string /** Whether the request is http or not */ isHttp: boolean /** Request port (typically 443 or 80) */ port: number } export type RouteByHostResponse = ProxyName | void export type RouteByHostFn = ( args: RouteByHostArgs ) => Promise export interface ProxyRouterOpts { /** * A dictionary of proxies to be made available to the browser and router. * * An optional entry named `DEFAULT` will be used for all requests, unless overriden by `routeByHost`. * If the `DEFAULT` entry is omitted no proxy will be used by default. * * The value of an entry can be a string (format: `http://user:pass@proxyhost:port`) or `null` (direct connection). * Proxy authentication is handled automatically by the router. * * @example * proxies: { * DEFAULT: "http://user:pass@proxyhost:port", // use this proxy by default * RESIDENTIAL_US: "http://user:pass@proxyhost2:port" // use this for specific hosts with `routeByHost` * } */ proxies?: Proxies /** * An optional function to allow proxy routing based on the target host of the request. * * A return value of nothing, `null` or `DEFAULT` will result in the DEFAULT proxy being used as configured. * A return value of `DIRECT` will result in no proxy being used. * A return value of `ABORT` will cancel/block this request. * * Any other string as return value is assumed to be a reference to the configured `proxies` dict. * * @note The browser will most often establish only a single proxy connection per host. * * @example * routeByHost: async ({ host }) => { * if (host.includes('google')) { return "DIRECT" } * return 'RESIDENTIAL_US' * } * */ routeByHost?: RouteByHostFn /** Collect traffic and connection stats, default: true */ collectStats?: boolean /** Don't print any proxy connection errors to stderr, default: false */ muteProxyErrors?: boolean /** Suppress proxy errors for specific hosts */ muteProxyErrorsForHost?: string[] /** Options for the local proxy-chain server */ proxyServerOpts?: ProxyServerOpts } export class ProxyRouter { /** The underlying local proxy server used for routing to upstream proxies */ public proxyServer: ProxyChain.Server /** An optional function to route hosts */ public routeByHost: RouteByHostFn | null /** * The dictionary of proxies made available (format: `FOOBAR: 'http://user:pass@proxyhost:port'`). * Can be modified at runtime. */ public proxies: Proxies /** Traffic stats collector */ public readonly stats: ProxyRouterStats public isListening: boolean = false protected serverStartPromise: Promise | null protected collectStats: boolean protected muteProxyErrors: boolean protected muteProxyErrorsForHost: string[] /** Internal list of failed connections to only print the same connection issue once */ protected failedConnections: { host: string; proxy: string }[] = [] constructor(opts: ProxyRouterOpts = {}) { const proxyServerOpts: ProxyServerOpts = { ...opts.proxyServerOpts, prepareRequestFunction: this.handleProxyServerRequest.bind(this), } proxyServerOpts.port = proxyServerOpts.port || 2800 this.proxies = opts.proxies || {} this.routeByHost = opts.routeByHost || null this.proxyServer = new ProxyServer(proxyServerOpts) this.collectStats = opts.collectStats ?? true this.stats = new ProxyRouterStats(this.proxyServer) this.muteProxyErrors = opts.muteProxyErrors ?? false this.muteProxyErrorsForHost = opts.muteProxyErrorsForHost || [] debug('initialized', opts) // Emitted when HTTP connection is closed this.proxyServer.on('connectionClosed', ({ connectionId, stats }) => { if (stats && this.collectStats) { this.stats.addStats(connectionId as number, stats) } debugVerbose(`Connection ${connectionId} closed`) }) // Emitted when a HTTP request fails this.proxyServer.on('requestFailed', ({ request, error }) => { if (!this.muteProxyErrors) { warn('Request failed:', request.url, error) } }) // Emitted in case of a upstream proxy error (which can mean various things) this.proxyServer.on( 'proxyAuthenticationFailed', ({ connectionId, str: errorStr, }: { connectionId: unknown str: string }) => { // resolve the affected host and proxy const { host, proxy } = this.stats.connectionLog.find(({ id }) => id === connectionId) || {} const proxyUrl = !!proxy ? this.getProxyForName(proxy) : null const info: string[] = [errorStr] info.push( "This error can be thrown if a resource on a site simply can't be accessed (often temporarily), in this case this can be ignored.", ` - To not have errors like this printed to the console you can set 'muteProxyErrors: true' ${ !!host ? `or 'muteProxyErrorsForHost: ["${host}"]'` : '' }`, 'It can also indicate incorrect proxy credentials or that the target host is blocked by the proxy.', ' - Make sure the provided proxy string and credentials are correct and the site is not blocked by the proxy (or vice versa).', " - In case the site is blocked by the proxy: Use 'routeByHost' to route the host through a different proxy or as 'DIRECT' or 'ABORT'." ) if (host && proxy) { info.push( '', `Affected target host: "${host}"`, `Affected proxy name: "${proxy}"` ) } if (proxyUrl) { info.push(`Affected proxy URL: "${proxyUrl}"`) info.push( '', `To test the proxy with curl: curl -v --proxy '${proxyUrl}' 'https://${host}'`, '' ) if (!`${proxyUrl}`.includes('http://')) { info.push('PS: Did you forget to prefix the proxy with "http://"?') } } const probablyNoise = errorStr.includes('authenticate') && errorStr.includes('522') const isMuted = this.muteProxyErrors || this.muteProxyErrorsForHost.includes(host) const alreadySeen = !!this.failedConnections.find( (entry) => entry.host === host && entry.proxy === proxy ) const logger = probablyNoise || isMuted || alreadySeen ? debug : warn logger(info.join('\n')) if (host && proxy) { this.failedConnections.push({ host, proxy }) } } ) // Resurface some errors that proxy-chain seems to swallow this.proxyServer.log = (function (originalMethod, context) { return function (connectionId: unknown, str: string) { if (`${str}`.includes('Failed to authenticate upstream proxy')) { context.emit('proxyAuthenticationFailed', { connectionId, str, }) } if (`${str}`.includes('Error: Invalid "upstreamProxyUrl" provided')) { context.emit('proxyAuthenticationFailed', { connectionId, str, }) } if (`${str}`.includes('Failed to connect to upstream proxy')) { context.emit('proxyAuthenticationFailed', { connectionId, str, }) } originalMethod.apply(context, [connectionId, str]) } })(this.proxyServer.log, this.proxyServer) } /** Proxy server URL of the local proxy server used for routing */ public get proxyServerUrl() { const port = this.proxyServer?.port if (!port || !this.isListening) { return } return `http://localhost:${port}` } public get effectiveProxies() { return { DIRECT: null, ...(this.proxies || {}), } } /** Start the local proxy server and accept connections */ public async listen(): Promise { debug('starting server..') if (this.serverStartPromise) { debug('server start promise exists already') return this.serverStartPromise } this.serverStartPromise = new Promise(async (resolve) => { if (this.isListening) { debug('server listening already') return resolve(this.proxyServer.port) } const desiredPort = this.proxyServer.port debug('finding available port', { desiredPort }) const availablePort = await getPort({ port: desiredPort }) debug('availablePort:', availablePort) this.proxyServer.port = availablePort this.proxyServer.listen((err) => { if (err === null) { debug(`server listening on port ${this.proxyServer.port}`) this.isListening = true return resolve(this.proxyServer.port) } warn('Unable to start local server:', err) }) }) return this.serverStartPromise } /** Stop the local proxy server */ public async close(): Promise { debug('closing..') return new Promise((resolve) => { this.proxyServer.close(true, (err) => { if (err === null) { debug('closed without error') return resolve(null) } debug('closed with error', err) return resolve(err) }) }) } public getProxyForName(name: ProxyName): string | null { return this.effectiveProxies[name] } /** Handle requests to the proxy server */ protected async handleProxyServerRequest({ request, hostname: host, port, connectionId, isHttp, }: ProxyChain.PrepareRequestFunctionOpts): Promise { let proxyName = 'DEFAULT' if (!!this.routeByHost) { const fnResult = await this.routeByHost({ host, isHttp, port }) if (typeof fnResult === 'string' && !!fnResult) { proxyName = fnResult } } if (this.collectStats) { this.stats.addConnection(connectionId, proxyName, host) } let proxyUrl = this.getProxyForName(proxyName) debugVerbose( 'handleProxyServerRequest', host, proxyName, redactProxyUrl(proxyUrl) ) if (proxyName === 'ABORT') { throw new RequestError('Request aborted', 400) } if (!proxyUrl && proxyUrl !== null) { warn( `No proxy configured for proxy name "${proxyName}" - configuration error?` ) proxyUrl = null } return { upstreamProxyUrl: proxyUrl, } } } function redactProxyUrl(input: unknown) { if (!input || typeof input !== 'string') { return `${input}` } try { return redactUrl(input) } catch (err) { return `${input}` } } /** Standalone proxy router not requiring plugin events */ export const ProxyRouterStandalone = ProxyRouter ================================================ FILE: packages/plugin-proxy-router/src/stats.ts ================================================ import type { Server as ProxyServer } from 'proxy-chain' export interface ConnectionLogEntry { /** Connection Id */ id: number /** Proxy name */ proxy: string /** Host */ host: string } export interface ConnectionStats { srcTxBytes: number srcRxBytes: number trgTxBytes: number trgRxBytes: number } export class ProxyRouterStats { /** Log of all connections (id, proxyName, host) */ public connectionLog: ConnectionLogEntry[] = [] protected connectionStats: Map = new Map() constructor(private proxyServer: ProxyServer) {} /** @internal */ public addConnection(id: number, proxy: string, host: string) { this.connectionLog.push({ id, proxy, host }) } /** @internal */ public addStats(connectionId: number, stats: ConnectionStats) { this.connectionStats.set(connectionId as number, stats) } /** Get bytes transferred by proxy */ public get byProxy() { this.getStatsFromActiveConnections() // Get unique proxy names from our actual connection logs const proxyNames = Array.from( new Set(this.connectionLog.map(({ proxy }) => proxy)) ) const getConnectionIdsForProxy = (proxyName: string) => this.connectionLog .filter(({ proxy }) => proxy === proxyName) .map(({ id }) => id) const trafficByProxy = Object.fromEntries( proxyNames .map((proxyName) => { const ids = getConnectionIdsForProxy(proxyName) const stats = ids.map((id) => this.connectionStats.get(id)) const totalBytes = stats .map((stat) => this.calculateProxyBytes(stat)) .reduce((a, b) => a + b) return [proxyName, totalBytes] }) // Sort by most bytes on top .sort((a, b) => (b[1] as number) - (a[1] as number)) ) return trafficByProxy } /** Get bytes transferred by host */ public get byHost() { this.getStatsFromActiveConnections() // Get unique proxy names from our actual connection logs const hostNames = Array.from( new Set(this.connectionLog.map(({ host }) => host)) ) const getConnectionIdsForHost = (hostName: string) => this.connectionLog .filter(({ host }) => host === hostName) .map(({ id }) => id) const trafficByHost = Object.fromEntries( hostNames .map((hostName) => { const ids = getConnectionIdsForHost(hostName) const stats = ids.map((id) => this.connectionStats.get(id)) const totalBytes = stats .map((stat) => this.calculateProxyBytes(stat)) .reduce((a, b) => a + b) return [hostName, totalBytes] }) // Sort by most bytes on top .sort((a, b) => (b[1] as number) - (a[1] as number)) ) return trafficByHost } protected getStatsFromActiveConnections() { // collect stats for active connections this.proxyServer.getConnectionIds().forEach((connectionId) => { const stats = this.proxyServer.getConnectionStats(connectionId) if (stats) { this.connectionStats.set(connectionId as number, stats) } }) } protected calculateProxyBytes(stats?: Partial) { if (!stats) { return 0 } return (stats.trgRxBytes || 0) + (stats.trgTxBytes || 0) } } ================================================ FILE: packages/plugin-proxy-router/src/utils/port.ts ================================================ import net from 'net' export interface Options { /** * A preferred port or an array of preferred ports to use. */ port?: number | ReadonlyArray /** * The host on which port resolution should be performed. Can be either an IPv4 or IPv6 address. */ host?: string } const isAvailable = (options: Options): Promise => new Promise((resolve, reject) => { const server = net.createServer() server.unref() server.on('error', reject) server.listen(options, () => { const { port } = server.address() as any server.close(() => { resolve(port as number) }) }) }) const getPort = (options: Options) => { options = Object.assign({}, options) if (typeof options.port === 'number') { options.port = [options.port] } return (options.port || []).reduce( (seq, port) => seq.catch(() => isAvailable(Object.assign({}, options, { port }))), Promise.reject() ) } export default (options?: Options) => options ? getPort(options).catch(() => getPort(Object.assign(options, { port: 0 }))) : getPort({ port: 0 }) ================================================ FILE: packages/plugin-proxy-router/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "es2019", "dom"], // "noResolve": true, // Important: Otherwise TS would rewrite our ambient d.ts file locations (see: yarn copy-dts) :( "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": false, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": [ "./src/**/*.tsx", "./src/**/*.ts", "./src/**/*.d.ts", "./src/**/*.test.ts", "./test/**/*.ts" ], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/plugin-proxy-router/tslint.json ================================================ { "extends": ["tslint-config-standard", "tslint-config-prettier"], "rules": { "ordered-imports": true } } ================================================ FILE: packages/puppeteer-extra/ava.config-ts.js ================================================ export default { compileEnhancements: false, environmentVariables: { TS_NODE_COMPILER_OPTIONS: '{"module":"commonjs"}' }, files: ['test/*.ts'], extensions: ['ts'], require: ['ts-node/register'] } ================================================ FILE: packages/puppeteer-extra/ava.config.js ================================================ export default { files: ['test/*.js'] } ================================================ FILE: packages/puppeteer-extra/package.json ================================================ { "name": "puppeteer-extra", "version": "3.3.6", "description": "Teach puppeteer new tricks through plugins.", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "typings": "dist/index.d.ts", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "files": [ "dist" ], "scripts": { "clean": "rimraf dist/*", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup ambient-dts", "build:tsc": "tsc --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "documentation readme --quiet --shallow --github --markdown-theme transitivebs --readme-file readme.md --section API ./src/index.ts", "postdocs": "npx prettier --write readme.md", "test:ts": "ava -v --config ava.config-ts.js", "test:js": "ava -v --serial --concurrency 1 --fail-fast", "test": "run-p test:js test:ts", "test-ci": "run-s test", "ambient-dts": "run-s ambient-dts-copy ambient-dts-fix-path", "ambient-dts-copy": "copyfiles -u 1 \"src/**/*.d.ts\" dist", "ambient-dts-fix-path": "replace-in-files --string='/// =8" }, "devDependencies": { "@types/node": "^18.0.0", "@types/puppeteer": "*", "ava": "^2.4.0", "documentation-markdown-themes": "^12.1.5", "npm-run-all": "^4.1.5", "puppeteer": "^10.2.0", "puppeteer-extra-plugin": "^3.2.3", "puppeteer-extra-plugin-anonymize-ua": "^2.4.6", "rimraf": "^3.0.0", "rollup": "^1.27.5", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "ts-node": "^8.5.4", "tslint": "^5.20.1", "tslint-config-prettier": "^1.18.0", "tslint-config-standard": "^9.0.0", "typescript": "4.4.3" }, "dependencies": { "@types/debug": "^4.1.0", "debug": "^4.1.1", "deepmerge": "^4.2.2" }, "peerDependencies": { "@types/puppeteer": "*", "puppeteer": "*", "puppeteer-core": "*" }, "peerDependenciesMeta": { "puppeteer": { "optional": true }, "puppeteer-core": { "optional": true }, "@types/puppeteer": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra/readme.md ================================================ # puppeteer-extra [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push)](https://github.com/berstend/puppeteer-extra/actions) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/puppeteer-extra.svg)](https://www.npmjs.com/package/puppeteer-extra) [![npm](https://img.shields.io/npm/dt/puppeteer-extra.svg)](https://www.npmjs.com/package/puppeteer-extra) [![npm](https://img.shields.io/npm/l/puppeteer-extra.svg)](https://www.npmjs.com/package/puppeteer-extra) > A light-weight wrapper around [`puppeteer`](https://github.com/GoogleChrome/puppeteer) and [friends](#more-examples) to enable cool [plugins](#plugins) through a clean interface. ## Installation ```bash yarn add puppeteer puppeteer-extra # - or - npm install puppeteer puppeteer-extra # puppeteer-extra works with any puppeteer version: yarn add puppeteer@2.0.0 puppeteer-extra ``` ## Quickstart ```js // puppeteer-extra is a drop-in replacement for puppeteer, // it augments the installed puppeteer with plugin functionality. // Any number of plugins can be added through `puppeteer.use()` const puppeteer = require('puppeteer-extra') // Add stealth plugin and use defaults (all tricks to hide puppeteer usage) const StealthPlugin = require('puppeteer-extra-plugin-stealth') puppeteer.use(StealthPlugin()) // Add adblocker plugin to block all ads and trackers (saves bandwidth) const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker') puppeteer.use(AdblockerPlugin({ blockTrackers: true })) // That's it, the rest is puppeteer usage as normal 😊 puppeteer.launch({ headless: true }).then(async browser => { const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) console.log(`Testing adblocker plugin..`) await page.goto('https://www.vanityfair.com') await page.waitForTimeout(1000) await page.screenshot({ path: 'adblocker.png', fullPage: true }) console.log(`Testing the stealth plugin..`) await page.goto('https://bot.sannysoft.com') await page.waitForTimeout(5000) await page.screenshot({ path: 'stealth.png', fullPage: true }) console.log(`All done, check the screenshots. ✨`) await browser.close() }) ``` The above example uses the [`stealth`](/packages/puppeteer-extra-plugin-stealth) and [`adblocker`](/packages/puppeteer-extra-plugin-adblocker) plugin, which need to be installed as well: ```bash yarn add puppeteer-extra-plugin-stealth puppeteer-extra-plugin-adblocker # - or - npm install puppeteer-extra-plugin-stealth puppeteer-extra-plugin-adblocker ``` If you'd like to see debug output just run your script like so: ```bash DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node myscript.js ``` ### More examples
TypeScript usage
> `puppeteer-extra` and most plugins are written in TS, > so you get perfect type support out of the box. :) ```ts import puppeteer from 'puppeteer-extra' import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker' import StealthPlugin from 'puppeteer-extra-plugin-stealth' puppeteer.use(AdblockerPlugin()).use(StealthPlugin()) puppeteer .launch({ headless: false, defaultViewport: null }) .then(async browser => { const page = await browser.newPage() await page.goto('https://bot.sannysoft.com') await page.waitForTimeout(5000) await page.screenshot({ path: 'stealth.png', fullPage: true }) await browser.close() }) ``` > Please check this [wiki](https://github.com/berstend/puppeteer-extra/wiki/TypeScript-usage) entry in case you have TypeScript related import issues. ![typings](https://i.imgur.com/bNtuTOt.png 'Typings')
Playwright usage
[`playright-extra`](/packages/playwright-extra) with plugin support is available as well.
Multiple puppeteers with different plugins
```js const vanillaPuppeteer = require('puppeteer') const { addExtra } = require('puppeteer-extra') const AnonymizeUA = require('puppeteer-extra-plugin-anonymize-ua') async function main() { const pptr1 = addExtra(vanillaPuppeteer) pptr1.use( AnonymizeUA({ customFn: ua => 'Hello1/' + ua.replace('Chrome', 'Beer') }) ) const pptr2 = addExtra(vanillaPuppeteer) pptr2.use( AnonymizeUA({ customFn: ua => 'Hello2/' + ua.replace('Chrome', 'Beer') }) ) await checkUserAgent(pptr1) await checkUserAgent(pptr2) } main() async function checkUserAgent(pptr) { const browser = await pptr.launch({ headless: true }) const page = await browser.newPage() await page.goto('https://httpbin.org/headers', { waitUntil: 'domcontentloaded' }) const content = await page.content() console.log(content) await browser.close() } ```
Using with puppeteer-cluster
> [puppeteer-cluster](https://github.com/thomasdondorf/puppeteer-cluster) allows you to create a cluster of puppeteer workers and plays well together with `puppeteer-extra`. ```js const { Cluster } = require('puppeteer-cluster') const vanillaPuppeteer = require('puppeteer') const { addExtra } = require('puppeteer-extra') const Stealth = require('puppeteer-extra-plugin-stealth') const Recaptcha = require('puppeteer-extra-plugin-recaptcha') async function main() { // Create a custom puppeteer-extra instance using `addExtra`, // so we could create additional ones with different plugin config. const puppeteer = addExtra(vanillaPuppeteer) puppeteer.use(Stealth()) puppeteer.use(Recaptcha()) // Launch cluster with puppeteer-extra const cluster = await Cluster.launch({ puppeteer, maxConcurrency: 2, concurrency: Cluster.CONCURRENCY_CONTEXT }) // Define task handler await cluster.task(async ({ page, data: url }) => { await page.goto(url) const { hostname } = new URL(url) const { captchas } = await page.findRecaptchas() console.log(`Found ${captchas.length} captcha on ${hostname}`) await page.screenshot({ path: `${hostname}.png`, fullPage: true }) }) // Queue any number of tasks cluster.queue('https://bot.sannysoft.com') cluster.queue('https://www.google.com/recaptcha/api2/demo') cluster.queue('http://www.wikipedia.org/') await cluster.idle() await cluster.close() console.log(`All done, check the screenshots. ✨`) } // Let's go main().catch(console.warn) ``` For using with TypeScript, just change your imports to: ```ts import { Cluster } from 'puppeteer-cluster' import vanillaPuppeteer from 'puppeteer' import { addExtra } from 'puppeteer-extra' import Stealth from 'puppeteer-extra-plugin-stealth' import Recaptcha from 'puppeteer-extra-plugin-recaptcha' ```
Using with chrome-aws-lambda
> If you plan to use [chrome-aws-lambda](https://github.com/alixaxel/chrome-aws-lambda) with the [`stealth`](/packages/puppeteer-extra-plugin-stealth) plugin, you'll need to modify the default args to remove the > `--disable-notifications` flag to pass all the tests. ```js const chromium = require('chrome-aws-lambda') const { addExtra } = require('puppeteer-extra') const puppeteerExtra = addExtra(chromium.puppeteer) const launch = async () => { puppeteerExtra .launch({ args: chromium.args, defaultViewport: chromium.defaultViewport, executablePath: await chromium.executablePath, headless: chromium.headless }) .then(async browser => { const page = await browser.newPage() await page.goto('https://www.spacejam.com/archive/spacejam/movie/jam.htm') await page.waitForTimeout(10 * 1000) await browser.close() }) } launch() // Launch Browser ```
Using with Kikobeats/browserless
> [Kikobeats/browserless](https://github.com/Kikobeats/browserless) is a puppeteer-like Node.js library for interacting with Headless production scenarios. ```js const puppeteer = require('puppeteer-extra') const StealthPlugin = require('puppeteer-extra-plugin-stealth') puppeteer.use(StealthPlugin()) const browserless = require('browserless')({ puppeteer }) const saveBufferToFile = (buffer, fileName) => { const wstream = require('fs').createWriteStream(fileName) wstream.write(buffer) wstream.end() } browserless .screenshot('https://bot.sannysoft.com', { device: 'iPhone 6' }) .then(buffer => { const fileName = 'screenshot.png' saveBufferToFile(buffer, fileName) console.log(`your screenshot is here: `, fileName) }) ```
--- ## Plugins #### 🔥 [`puppeteer-extra-plugin-stealth`](/packages/puppeteer-extra-plugin-stealth) - Applies various evasion techniques to make detection of puppeteer harder. #### 🏴 [`puppeteer-extra-plugin-recaptcha`](/packages/puppeteer-extra-plugin-recaptcha) - Solves reCAPTCHAs and hCaptchas automatically, using a single line of code: `page.solveRecaptchas()`. #### [`puppeteer-extra-plugin-adblocker`](/packages/puppeteer-extra-plugin-adblocker) - Very fast & efficient blocker for ads and trackers. Reduces bandwidth & load times. #### [`puppeteer-extra-plugin-devtools`](/packages/puppeteer-extra-plugin-devtools) - Makes puppeteer browser debugging possible from anywhere. - Creates a secure tunnel to make the devtools frontend (**incl. screencasting**) accessible from the public internet #### [`puppeteer-extra-plugin-repl`](/packages/puppeteer-extra-plugin-repl) - Makes quick puppeteer debugging and exploration fun with an interactive REPL. #### [`puppeteer-extra-plugin-block-resources`](/packages/puppeteer-extra-plugin-block-resources) - Blocks resources (images, media, css, etc.) in puppeteer. - Supports all resource types, blocking can be toggled dynamically. #### [`puppeteer-extra-plugin-flash`](/packages/puppeteer-extra-plugin-flash) - Allows flash content to run on all sites without user interaction. #### [`puppeteer-extra-plugin-anonymize-ua`](/packages/puppeteer-extra-plugin-anonymize-ua) - Anonymizes the user-agent on all pages. - Supports dynamic replacing, so the browser version stays intact and recent. #### [`puppeteer-extra-plugin-user-preferences`](/packages/puppeteer-extra-plugin-user-preferences) - Allows setting custom Chrome/Chromium user preferences. - Has itself a plugin interface which is used by e.g. [`puppeteer-extra-plugin-font-size`](/packages/puppeteer-extra-plugin-font-size). > Check out the [packages folder](/packages/) for more plugins. ### Community Plugins _These plugins have been generously contributed by members of the community._ _Please note that they're hosted outside the main project and not under our control or supervision._ #### [`puppeteer-extra-plugin-minmax`](https://github.com/Stillerman/puppeteer-extra-minmax) - Minimize and maximize puppeteer in real time. - Great for manually solving captchas. #### [`puppeteer-extra-plugin-portal`](https://github.com/claabs/puppeteer-extra-plugin-portal) - Use the Chromium screencast API to remotely view and interact with puppeteer sessions. - Great for remotely intervening when an automated task gets stuck, like captchas. > Please check the `Contributing` section below if you're interested in creating a plugin as well. --- ## Contributors ## Further info
Contributing
PRs and new plugins are welcome! 🎉 The plugin API for `puppeteer-extra` is clean and fun to use. Have a look the [PuppeteerExtraPlugin](/packages/puppeteer-extra-plugin) base class documentation to get going and check out the [existing plugins](./packages/) (minimal example is the [anonymize-ua](/packages/puppeteer-extra-plugin-anonymize-ua/index.js) plugin) for reference. We use a [monorepo](/) powered by [Lerna](https://github.com/lerna/lerna#--use-workspaces) (and yarn workspaces), [ava](https://github.com/avajs/ava) for testing, TypeScript for the core, the [standard](https://standardjs.com/) style for linting and [JSDoc](http://usejsdoc.org/about-getting-started.html) heavily to auto-generate markdown [documentation](https://github.com/documentationjs/documentation) based on code. :-)
Kudos
- Thanks to [skyiea](https://github.com/skyiea) for [this PR](https://github.com/GoogleChrome/puppeteer/pull/1806) that started the project idea. - Thanks to [transitive-bullshit](https://github.com/transitive-bullshit) for [suggesting](https://github.com/berstend/puppeteer-extra/issues/2) a modular plugin design, which was fun to implement.
Compatibility
`puppeteer-extra` and all plugins are [tested continously](https://github.com/berstend/puppeteer-extra/actions) in a matrix of current (stable & LTS) NodeJS and puppeteer versions. We never broke compatibility and still support puppeteer down to very early versions from 2018. A few plugins won't work in headless mode (it's noted if that's the case) due to Chrome limitations (e.g. the [`user-preferences`](/packages/puppeteer-extra-plugin-user-preferences) plugin), look into `xvfb-run` if you still require a headless experience in these circumstances.
## Changelog
2.1.6 ➠ 3.1.1 ### `2.1.6` ➠ `3.1.1` Big refactor, the core is now **written in TypeScript** 🎉 That means out of the box type safety for fellow TS users and nice auto-completion in VSCode for JS users. Also: - A new [`addExtra`](#addextrapuppeteer) export, to **patch any puppeteer compatible library with plugin functionality** (`chrome-aws-lambda`, etc). This also allows for multiple puppeteer instances with different plugins. The API is backwards compatible, I bumped the major version just in case I missed something. Please report any issues you might find with the new release. :)
--- ## API #### Table of Contents - [class: PuppeteerExtra](#class-puppeteerextra) - [.use(plugin)](#useplugin) - [.launch(options?)](#launchoptions) - [.connect(options?)](#connectoptions) - [.defaultArgs(options?)](#defaultargsoptions) - [.executablePath()](#executablepath) - [.createBrowserFetcher(options?)](#createbrowserfetcheroptions) - [.plugins](#plugins) - [.getPluginData(name?)](#getplugindataname) - [defaultExport()](#defaultexport) - [addExtra(puppeteer)](#addextrapuppeteer) ### class: [PuppeteerExtra](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L67-L474) Modular plugin framework to teach `puppeteer` new tricks. This module acts as a drop-in replacement for `puppeteer`. Allows PuppeteerExtraPlugin's to register themselves and to extend puppeteer with additional functionality. Example: ```javascript const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) puppeteer.use( require('puppeteer-extra-plugin-font-size')({ defaultFontSize: 18 }) ) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.goto('http://example.com', { waitUntil: 'domcontentloaded' }) await browser.close() })() ``` --- #### .[use(plugin)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L85-L107) - `plugin` **PuppeteerExtraPlugin** Returns: **this** The same `PuppeteerExtra` instance (for optional chaining) The **main interface** to register `puppeteer-extra` plugins. Example: ```javascript puppeteer.use(plugin1).use(plugin2) ``` - **See: [PuppeteerExtraPlugin]** --- #### .[launch(options?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L153-L177) - `options` **Puppeteer.LaunchOptions?** See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerlaunchoptions). Returns: **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<Puppeteer.Browser>** The method launches a browser instance with given arguments. The browser will be closed when the parent node.js process is closed. Augments the original `puppeteer.launch` method with plugin lifecycle methods. All registered plugins that have a `beforeLaunch` method will be called in sequence to potentially update the `options` Object before launching the browser. Example: ```javascript const browser = await puppeteer.launch({ headless: false, defaultViewport: null }) ``` --- #### .[connect(options?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L189-L208) - `options` **Puppeteer.ConnectOptions?** See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerconnectoptions). Returns: **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<Puppeteer.Browser>** Attach Puppeteer to an existing Chromium instance. Augments the original `puppeteer.connect` method with plugin lifecycle methods. All registered plugins that have a `beforeConnect` method will be called in sequence to potentially update the `options` Object before launching the browser. --- #### .[defaultArgs(options?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L215-L217) - `options` **Puppeteer.ChromeArgOptions?** See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerdefaultargsoptions). Returns: **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** The default flags that Chromium will be launched with. --- #### .[executablePath()](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L220-L222) Returns: **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Path where Puppeteer expects to find bundled Chromium. --- #### .[createBrowserFetcher(options?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L229-L233) - `options` **Puppeteer.FetcherOptions?** See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteercreatebrowserfetcheroptions). Returns: **Puppeteer.BrowserFetcher** This methods attaches Puppeteer to an existing Chromium instance. --- #### .[plugins](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L283-L285) Type: **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<PuppeteerExtraPlugin>** Get a list of all registered plugins. --- #### .[getPluginData(name?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L310-L315) - `name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** Filter data by optional plugin name Collects the exposed `data` property of all registered plugins. Will be reduced/flattened to a single array. Can be accessed by plugins that listed the `dataFromPlugins` requirement. Implemented mainly for plugins that need data from other plugins (e.g. `user-preferences`). - **See: [PuppeteerExtraPlugin]/data** --- ### [defaultExport()](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L494-L496) Type: **[PuppeteerExtra](#puppeteerextra)** The **default export** will behave exactly the same as the regular puppeteer (just with extra plugin functionality) and can be used as a drop-in replacement. Behind the scenes it will try to require either `puppeteer` or [`puppeteer-core`](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteer-vs-puppeteer-core) from the installed dependencies. Example: ```javascript // javascript import const puppeteer = require('puppeteer-extra') // typescript/es6 module import import puppeteer from 'puppeteer-extra' // Add plugins puppeteer.use(...) ``` --- ### [addExtra(puppeteer)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra/src/index.ts#L519-L520) - `puppeteer` **VanillaPuppeteer** Any puppeteer API-compatible puppeteer implementation or version. Returns: **[PuppeteerExtra](#puppeteerextra)** A fresh PuppeteerExtra instance using the provided puppeteer An **alternative way** to use `puppeteer-extra`: Augments the provided puppeteer with extra plugin functionality. This is useful in case you need multiple puppeteer instances with different plugins or to add plugins to a non-standard puppeteer package. Example: ```javascript // js import const puppeteerVanilla = require('puppeteer') const { addExtra } = require('puppeteer-extra') // ts/es6 import import puppeteerVanilla from 'puppeteer' import { addExtra } from 'puppeteer-extra' // Patch provided puppeteer and add plugins const puppeteer = addExtra(puppeteerVanilla) puppeteer.use(...) ``` --- ## License Copyright © 2018 - 2023, [berstend̡̲̫̹̠̖͚͓̔̄̓̐̄͛̀͘](mailto:github@berstend.com?subject=[GitHub]%20PuppeteerExtra). Released under the MIT License. [puppeteerextraplugin]: https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin 'PuppeteerExtraPlugin Documentation' ================================================ FILE: packages/puppeteer-extra/rollup.config.ts ================================================ import commonjs from 'rollup-plugin-commonjs' import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() const defaultExportOutro = ` module.exports = exports.default || {} Object.entries(exports).forEach(([key, value]) => { module.exports[key] = value }) ` export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, exports: 'named', outro: defaultExportOutro, banner }, { file: pkg.module, format: 'es', sourcemap: true, exports: 'named', banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}) ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve(), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/puppeteer-extra/src/ambient.d.ts ================================================ export {} // https://github.com/sindresorhus/type-fest/issues/19 declare global { interface SymbolConstructor { readonly observable: symbol } } ================================================ FILE: packages/puppeteer-extra/src/index.ts ================================================ /// import { PuppeteerNode, Browser, Page } from 'puppeteer' import Debug from 'debug' const debug = Debug('puppeteer-extra') import merge from 'deepmerge' /** * Original Puppeteer API * @private */ export interface VanillaPuppeteer extends Pick< PuppeteerNode, | 'connect' | 'defaultArgs' | 'executablePath' | 'launch' | 'createBrowserFetcher' > {} /** * Minimal plugin interface * @private */ export interface PuppeteerExtraPlugin { _isPuppeteerExtraPlugin: boolean [propName: string]: any } /** * We need to hook into non-public APIs in rare occasions to fix puppeteer bugs. :( * @private */ interface BrowserInternals extends Browser { _createPageInContext(contextId?: string): Promise } /** * Modular plugin framework to teach `puppeteer` new tricks. * * This module acts as a drop-in replacement for `puppeteer`. * * Allows PuppeteerExtraPlugin's to register themselves and * to extend puppeteer with additional functionality. * * @class PuppeteerExtra * @implements {VanillaPuppeteer} * * @example * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) * puppeteer.use(require('puppeteer-extra-plugin-font-size')({defaultFontSize: 18})) * * ;(async () => { * const browser = await puppeteer.launch({headless: false}) * const page = await browser.newPage() * await page.goto('http://example.com', {waitUntil: 'domcontentloaded'}) * await browser.close() * })() */ export class PuppeteerExtra implements VanillaPuppeteer { private _plugins: PuppeteerExtraPlugin[] = [] constructor( private _pptr?: VanillaPuppeteer, private _requireError?: Error ) {} /** * The **main interface** to register `puppeteer-extra` plugins. * * @example * puppeteer.use(plugin1).use(plugin2) * * @see [PuppeteerExtraPlugin] * * @return The same `PuppeteerExtra` instance (for optional chaining) */ use(plugin: PuppeteerExtraPlugin): this { if (typeof plugin !== 'object' || !plugin._isPuppeteerExtraPlugin) { console.error( `Warning: Plugin is not derived from PuppeteerExtraPlugin, ignoring.`, plugin ) return this } if (!plugin.name) { console.error( `Warning: Plugin with no name registering, ignoring.`, plugin ) return this } if (plugin.requirements.has('dataFromPlugins')) { plugin.getDataFromPlugins = this.getPluginData.bind(this) } plugin._register(Object.getPrototypeOf(plugin)) this._plugins.push(plugin) debug('plugin registered', plugin.name) return this } /** * To stay backwards compatible with puppeteer's (and our) default export after adding `addExtra` * we need to defer the check if we have a puppeteer instance to work with. * Otherwise we would throw even if the user intends to use their non-standard puppeteer implementation. * * @private */ get pptr(): VanillaPuppeteer { if (this._pptr) { return this._pptr } // Whoopsie console.warn(` Puppeteer is missing. :-) Note: puppeteer is a peer dependency of puppeteer-extra, which means you can install your own preferred version. - To get the latest stable version run: 'yarn add puppeteer' or 'npm i puppeteer' Alternatively: - To get puppeteer without the bundled Chromium browser install 'puppeteer-core' `) throw this._requireError || new Error('No puppeteer instance provided.') } /** * The method launches a browser instance with given arguments. The browser will be closed when the parent node.js process is closed. * * Augments the original `puppeteer.launch` method with plugin lifecycle methods. * * All registered plugins that have a `beforeLaunch` method will be called * in sequence to potentially update the `options` Object before launching the browser. * * @example * const browser = await puppeteer.launch({ * headless: false, * defaultViewport: null * }) * * @param options - See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerlaunchoptions). */ async launch( options?: Parameters[0] ): ReturnType { // Ensure there are certain properties (e.g. the `options.args` array) const defaultLaunchOptions = { args: [] } options = merge(defaultLaunchOptions, options || {}) this.resolvePluginDependencies() this.orderPlugins() // Give plugins the chance to modify the options before launch options = await this.callPluginsWithValue('beforeLaunch', options) const opts = { context: 'launch', options, defaultArgs: this.defaultArgs } // Let's check requirements after plugin had the chance to modify the options this.checkPluginRequirements(opts) const browser = await this.pptr.launch(options) this._patchPageCreationMethods(browser as BrowserInternals) await this.callPlugins('_bindBrowserEvents', browser, opts) return browser } /** * Attach Puppeteer to an existing Chromium instance. * * Augments the original `puppeteer.connect` method with plugin lifecycle methods. * * All registered plugins that have a `beforeConnect` method will be called * in sequence to potentially update the `options` Object before launching the browser. * * @param options - See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerconnectoptions). */ async connect( options: Parameters[0] ): ReturnType { this.resolvePluginDependencies() this.orderPlugins() // Give plugins the chance to modify the options before connect options = await this.callPluginsWithValue('beforeConnect', options) const opts = { context: 'connect', options } // Let's check requirements after plugin had the chance to modify the options this.checkPluginRequirements(opts) const browser = await this.pptr.connect(options) this._patchPageCreationMethods(browser as BrowserInternals) await this.callPlugins('_bindBrowserEvents', browser, opts) return browser } /** * The default flags that Chromium will be launched with. * * @param options - See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteerdefaultargsoptions). */ defaultArgs( options?: Parameters[0] ): ReturnType { return this.pptr.defaultArgs(options) } /** Path where Puppeteer expects to find bundled Chromium. */ executablePath(): string { return this.pptr.executablePath() } /** * This methods attaches Puppeteer to an existing Chromium instance. * * @param options - See [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteercreatebrowserfetcheroptions). */ createBrowserFetcher( options: Parameters[0] ): ReturnType { return this.pptr.createBrowserFetcher(options) } /** * Patch page creation methods (both regular and incognito contexts). * * Unfortunately it's possible that the `targetcreated` events are not triggered * early enough for listeners (e.g. plugins using `onPageCreated`) to be able to * modify the page instance (e.g. user-agent) before the browser request occurs. * * This only affects the first request of a newly created page target. * * As a workaround I've noticed that navigating to `about:blank` (again), * right after a page has been created reliably fixes this issue and adds * no noticable delay or side-effects. * * This problem is not specific to `puppeteer-extra` but default Puppeteer behaviour. * * Note: This patch only fixes explicitly created pages, implicitly created ones * (e.g. through `window.open`) are still subject to this issue. I didn't find a * reliable mitigation for implicitly created pages yet. * * Puppeteer issues: * https://github.com/GoogleChrome/puppeteer/issues/2669 * https://github.com/puppeteer/puppeteer/issues/3667 * https://github.com/GoogleChrome/puppeteer/issues/386#issuecomment-343059315 * https://github.com/GoogleChrome/puppeteer/issues/1378#issue-273733905 * * @private */ private _patchPageCreationMethods(browser: BrowserInternals) { if (!browser._createPageInContext) { debug( 'warning: _patchPageCreationMethods failed (no browser._createPageInContext)' ) return } browser._createPageInContext = (function(originalMethod, context) { return async function() { const page = await originalMethod.apply(context, arguments as any) await page.goto('about:blank') return page } })(browser._createPageInContext, browser) } /** * Get a list of all registered plugins. * * @member {Array} */ get plugins() { return this._plugins } /** * Get the names of all registered plugins. * * @member {Array} * @private */ get pluginNames() { return this._plugins.map(p => p.name) } /** * Collects the exposed `data` property of all registered plugins. * Will be reduced/flattened to a single array. * * Can be accessed by plugins that listed the `dataFromPlugins` requirement. * * Implemented mainly for plugins that need data from other plugins (e.g. `user-preferences`). * * @see [PuppeteerExtraPlugin]/data * @param name - Filter data by optional plugin name * * @private */ public getPluginData(name?: string) { const data = this._plugins .map(p => (Array.isArray(p.data) ? p.data : [p.data])) .reduce((acc, arr) => [...acc, ...arr], []) return name ? data.filter((d: any) => d.name === name) : data } /** * Get all plugins that feature a given property/class method. * * @private */ private getPluginsByProp(prop: string): PuppeteerExtraPlugin[] { return this._plugins.filter(plugin => prop in plugin) } /** * Lightweight plugin dependency management to require plugins and code mods on demand. * * This uses the `dependencies` stanza (a `Set`) exposed by `puppeteer-extra` plugins. * * @todo Allow objects as depdencies that contains opts for the requested plugin. * * @private */ private resolvePluginDependencies() { // Request missing dependencies from all plugins and flatten to a single Set const missingPlugins = this._plugins .map(p => p._getMissingDependencies(this._plugins)) .reduce((combined, list) => { return new Set([...combined, ...list]) }, new Set()) if (!missingPlugins.size) { debug('no dependencies are missing') return } debug('dependencies missing', missingPlugins) // Loop through all dependencies declared missing by plugins for (let name of [...missingPlugins]) { // Check if the dependency hasn't been registered as plugin already. // This might happen when multiple plugins have nested dependencies. if (this.pluginNames.includes(name)) { debug(`ignoring dependency '${name}', which has been required already.`) continue } // We follow a plugin naming convention, but let's rather enforce it <3 name = name.startsWith('puppeteer-extra-plugin') ? name : `puppeteer-extra-plugin-${name}` // In case a module sub resource is requested print out the main package name // e.g. puppeteer-extra-plugin-stealth/evasions/console.debug => puppeteer-extra-plugin-stealth const packageName = name.split('/')[0] let dep = null try { // Try to require and instantiate the stated dependency dep = require(name)() // Register it with `puppeteer-extra` as plugin this.use(dep) } catch (err) { console.warn(` A plugin listed '${name}' as dependency, which is currently missing. Please install it: yarn add ${packageName} Note: You don't need to require the plugin yourself, unless you want to modify it's default settings. `) throw err } // Handle nested dependencies :D if (dep.dependencies.size) { this.resolvePluginDependencies() } } } /** * Order plugins that have expressed a special placement requirement. * * This is useful/necessary for e.g. plugins that depend on the data from other plugins. * * @todo Support more than 'runLast'. * @todo If there are multiple plugins defining 'runLast', sort them depending on who depends on whom. :D * * @private */ private orderPlugins() { debug('orderPlugins:before', this.pluginNames) const runLast = this._plugins .filter(p => p.requirements.has('runLast')) .map(p => p.name) for (const name of runLast) { const index = this._plugins.findIndex(p => p.name === name) this._plugins.push(this._plugins.splice(index, 1)[0]) } debug('orderPlugins:after', this.pluginNames) } /** * Lightweight plugin requirement checking. * * The main intent is to notify the user when a plugin won't work as expected. * * @todo This could be improved, e.g. be evaluated by the plugin base class. * * @private */ private checkPluginRequirements(opts = {} as any) { for (const plugin of this._plugins) { for (const requirement of plugin.requirements) { if ( opts.context === 'launch' && requirement === 'headful' && opts.options.headless ) { console.warn( `Warning: Plugin '${plugin.name}' is not supported in headless mode.` ) } if (opts.context === 'connect' && requirement === 'launch') { console.warn( `Warning: Plugin '${plugin.name}' doesn't support puppeteer.connect().` ) } } } } /** * Call plugins sequentially with the same values. * Plugins that expose the supplied property will be called. * * @param prop - The plugin property to call * @param values - Any number of values * @private */ private async callPlugins(prop: string, ...values: any[]) { for (const plugin of this.getPluginsByProp(prop)) { await plugin[prop].apply(plugin, values) } } /** * Call plugins sequentially and pass on a value (waterfall style). * Plugins that expose the supplied property will be called. * * The plugins can either modify the value or return an updated one. * Will return the latest, updated value which ran through all plugins. * * @param prop - The plugin property to call * @param value - Any value * @return The new updated value * @private */ private async callPluginsWithValue(prop: string, value: any) { for (const plugin of this.getPluginsByProp(prop)) { const newValue = await plugin[prop](value) if (newValue) { value = newValue } } return value } } /** * The **default export** will behave exactly the same as the regular puppeteer * (just with extra plugin functionality) and can be used as a drop-in replacement. * * Behind the scenes it will try to require either `puppeteer` * or [`puppeteer-core`](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md#puppeteer-vs-puppeteer-core) * from the installed dependencies. * * @example * // javascript import * const puppeteer = require('puppeteer-extra') * * // typescript/es6 module import * import puppeteer from 'puppeteer-extra' * * // Add plugins * puppeteer.use(...) */ const defaultExport: PuppeteerExtra = (() => { return new PuppeteerExtra(...requireVanillaPuppeteer()) })() export default defaultExport /** * An **alternative way** to use `puppeteer-extra`: Augments the provided puppeteer with extra plugin functionality. * * This is useful in case you need multiple puppeteer instances with different plugins or to add plugins to a non-standard puppeteer package. * * @example * // js import * const { addExtra } = require('puppeteer-extra') * * // ts/es6 import * import { addExtra } from 'puppeteer-extra' * * // Patch e.g. puppeteer-firefox and add plugins * const puppeteer = addExtra(require('puppeteer-firefox')) * puppeteer.use(...) * * @param puppeteer Any puppeteer API-compatible puppeteer implementation or version. * @return A fresh PuppeteerExtra instance using the provided puppeteer */ export const addExtra = (puppeteer: VanillaPuppeteer): PuppeteerExtra => new PuppeteerExtra(puppeteer) /** * Attempt to require puppeteer or puppeteer-core from dependencies. * To stay backwards compatible with the existing default export we have to do some gymnastics here. * * @return Either a Puppeteer instance or an Error, which we'll throw later if need be. * @private */ function requireVanillaPuppeteer(): [VanillaPuppeteer?, Error?] { try { return [require('puppeteer'), undefined] } catch (_) { // noop } try { return [require('puppeteer-core'), undefined] } catch (err) { return [undefined, err as Error] } } ================================================ FILE: packages/puppeteer-extra/src/puppeteer-legacy.d.ts ================================================ // @ts-nocheck // NOTE: The above comment is crucial for all this to work // The puppeteer project caused a type breaking shift in v6 while switching from @types/puppeteer to built-in types // This type definition file is only relevant when puppeteer < v6 is being used, // if we don't instruct TS to skip checking this file it would cause errors when pptr >= v6 is used (e.g. ChromeArgOptions is missing) import {} from 'puppeteer' import { Browser, ConnectOptions, ChromeArgOptions, LaunchOptions, FetcherOptions, BrowserFetcher} from "puppeteer" // Make puppeteer-extra typings backwards compatible with puppeteer < v6 // In pptr >= v6 they switched to built-in types and the `@types/puppeteer` package is not needed anymore. // This is essentially a shim for `PuppeteerNode`, which is found in pptr >= v6 and missing in `@types/puppeteer`. // Requires the `@types/puppeteer` package to be installed when using pptr < v6, `@types/puppeteer` will be ignored by TS when built-in types are available. interface VanillaPuppeteer { /** Attaches Puppeteer to an existing Chromium instance */ connect(options?: ConnectOptions): Promise /** The default flags that Chromium will be launched with */ defaultArgs(options?: ChromeArgOptions): string[] /** Path where Puppeteer expects to find bundled Chromium */ executablePath(): string /** The method launches a browser instance with given arguments. The browser will be closed when the parent node.js process is closed. */ launch(options?: LaunchOptions): Promise /** This methods attaches Puppeteer to an existing Chromium instance. */ createBrowserFetcher( options?: FetcherOptions ): BrowserFetcher } declare module 'puppeteer' { interface PuppeteerNode extends VanillaPuppeteer {} } declare module 'puppeteer-core' { interface PuppeteerNode extends VanillaPuppeteer {} } ================================================ FILE: packages/puppeteer-extra/test/addExtra.ts ================================================ import test from 'ava' import { addExtra } from '../src/index' test('is a function', async t => { t.is(typeof addExtra, 'function') }) test('is an instance of Function', async t => { t.is(addExtra.constructor.name, 'Function') }) test('returns an object', async t => { t.is(typeof addExtra(null as any), 'object') }) test('returns an instance of PuppeteerExtra', async t => { t.is(addExtra(null as any).constructor.name, 'PuppeteerExtra') }) test('will throw without puppeteer', async t => { const pptr = addExtra(null as any) t.throws(() => pptr.pptr, null, 'No puppeteer instance provided.') }) ================================================ FILE: packages/puppeteer-extra/test/basic.ts ================================================ import test from 'ava' import puppeteer from '../src/index' test('is an object', async t => { t.is(typeof puppeteer, 'object') }) test('is an instance of PuppeteerExtra', async t => { t.is(puppeteer.constructor.name, 'PuppeteerExtra') }) test('should have the public class members', async t => { t.true(puppeteer.use instanceof Function) t.true(puppeteer.plugins instanceof Array) t.true(puppeteer.pluginNames instanceof Array) t.true(puppeteer.getPluginData instanceof Function) }) test('should have the internal class members', async t => { t.true('getPluginsByProp' in puppeteer) t.true('resolvePluginDependencies' in puppeteer) t.true('orderPlugins' in puppeteer) t.true('checkPluginRequirements' in puppeteer) t.true('callPlugins' in puppeteer) t.true('callPluginsWithValue' in puppeteer) }) test('should have the orginal puppeteer public class members', async t => { t.true(puppeteer.launch instanceof Function) t.true(puppeteer.connect instanceof Function) t.true(puppeteer.executablePath instanceof Function) t.true(puppeteer.defaultArgs instanceof Function) t.true(puppeteer.createBrowserFetcher instanceof Function) }) ================================================ FILE: packages/puppeteer-extra/test/connect.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test.beforeEach(t => { // Make sure we work with pristine modules try { delete require.cache[require.resolve('puppeteer-extra')] // delete require.cache[require.resolve('puppeteer-extra-plugin')] } catch (error) { console.log(error) } }) test('will remove headless from remote browser', async t => { // Mitigate CI quirks try { // Launch vanilla puppeteer browser with no plugins const puppeteerVanilla = require('puppeteer') const browserVanilla = await puppeteerVanilla.launch({ args: PUPPETEER_ARGS }) const browserWSEndpoint = browserVanilla.wsEndpoint() // Use puppeteer-extra with plugin to conntect to existing browser const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.connect({ browserWSEndpoint }) // Let's ensure we've anonymized the user-agent, despite not using .launch const page = await browser.newPage() const ua = await page.evaluate(() => window.navigator.userAgent) t.true(!ua.includes('HeadlessChrome')) await browser.close() t.true(true) } catch (err) { console.log(`Caught error:`, err) if ( err.message && err.message.includes( 'Session closed. Most likely the page has been closed' ) ) { t.true(true) // ignore this error } else { throw err } } }) ================================================ FILE: packages/puppeteer-extra/test/events.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] const puppeteerVanilla = require('puppeteer') const { addExtra } = require('puppeteer-extra') test.beforeEach(t => { // Make sure we work with pristine modules try { delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin')] } catch (error) { console.log(error) } }) test('will bind launched browser events to plugins', async t => { const PLUGIN_EVENTS = [] const puppeteer = addExtra(puppeteerVanilla) const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const pluginName = 'hello-world' class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } onPluginRegistered() { PLUGIN_EVENTS.push('onPluginRegistered') } beforeLaunch() { PLUGIN_EVENTS.push('beforeLaunch') } afterLaunch() { PLUGIN_EVENTS.push('afterLaunch') } beforeConnect() { PLUGIN_EVENTS.push('beforeConnect') } afterConnect() { PLUGIN_EVENTS.push('afterConnect') } onBrowser() { PLUGIN_EVENTS.push('onBrowser') } onTargetCreated() { PLUGIN_EVENTS.push('onTargetCreated') } onPageCreated() { PLUGIN_EVENTS.push('onPageCreated') } onTargetChanged() { PLUGIN_EVENTS.push('onTargetChanged') } onTargetDestroyed() { PLUGIN_EVENTS.push('onTargetDestroyed') } onDisconnected() { PLUGIN_EVENTS.push('onDisconnected') } onClose() { PLUGIN_EVENTS.push('onClose') } } const instance = new Plugin() puppeteer.use(instance) t.true(PLUGIN_EVENTS.includes('onPluginRegistered')) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) t.true(PLUGIN_EVENTS.includes('beforeLaunch')) t.true(PLUGIN_EVENTS.includes('afterLaunch')) // t.true(!PLUGIN_EVENTS.includes('beforeConnect')) // t.true(!PLUGIN_EVENTS.includes('afterConnect')) t.true(PLUGIN_EVENTS.includes('onBrowser')) const page = await browser.newPage().catch(console.log) t.true(PLUGIN_EVENTS.includes('onTargetCreated')) t.true(PLUGIN_EVENTS.includes('onPageCreated')) await page.goto('about:blank#foo').catch(console.log) t.true(PLUGIN_EVENTS.includes('onTargetChanged')) await page.close().catch(console.log) t.true(PLUGIN_EVENTS.includes('onTargetDestroyed')) await browser.close().catch(console.log) t.true(PLUGIN_EVENTS.includes('onDisconnected')) t.true(PLUGIN_EVENTS.includes('onClose')) }) test('will bind connected browser events to plugins', async t => { const PLUGIN_EVENTS = [] // Launch vanilla puppeteer browser with no plugins const pptr1 = addExtra(puppeteerVanilla) const browserVanilla = await pptr1.launch({ args: PUPPETEER_ARGS }) const browserWSEndpoint = browserVanilla.wsEndpoint() const puppeteer = addExtra(puppeteerVanilla) const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const pluginName = 'hello-world' class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } onPluginRegistered() { PLUGIN_EVENTS.push('onPluginRegistered') } beforeLaunch() { PLUGIN_EVENTS.push('beforeLaunch') } afterLaunch() { PLUGIN_EVENTS.push('afterLaunch') } beforeConnect() { PLUGIN_EVENTS.push('beforeConnect') } afterConnect() { PLUGIN_EVENTS.push('afterConnect') } onBrowser() { PLUGIN_EVENTS.push('onBrowser') } onTargetCreated() { PLUGIN_EVENTS.push('onTargetCreated') } onPageCreated() { PLUGIN_EVENTS.push('onPageCreated') } onTargetChanged() { PLUGIN_EVENTS.push('onTargetChanged') } onTargetDestroyed() { PLUGIN_EVENTS.push('onTargetDestroyed') } onDisconnected() { PLUGIN_EVENTS.push('onDisconnected') } onClose() { PLUGIN_EVENTS.push('onClose') } } const instance = new Plugin() puppeteer.use(instance) t.true(PLUGIN_EVENTS.includes('onPluginRegistered')) const browser = await puppeteer .connect({ browserWSEndpoint }) .catch(console.log) t.true(!PLUGIN_EVENTS.includes('beforeLaunch')) t.true(!PLUGIN_EVENTS.includes('afterLaunch')) t.true(PLUGIN_EVENTS.includes('beforeConnect')) t.true(PLUGIN_EVENTS.includes('afterConnect')) t.true(PLUGIN_EVENTS.includes('onBrowser')) const page = await browser.newPage() t.true(PLUGIN_EVENTS.includes('onTargetCreated')) t.true(PLUGIN_EVENTS.includes('onPageCreated')) await page.goto('about:blank#foo').catch(console.log) t.true(PLUGIN_EVENTS.includes('onTargetChanged')) await page.close().catch(console.log) t.true(PLUGIN_EVENTS.includes('onTargetDestroyed')) await browser.close().catch(console.log) t.true(PLUGIN_EVENTS.includes('onDisconnected')) t.true(!PLUGIN_EVENTS.includes('onClose')) }) ================================================ FILE: packages/puppeteer-extra/test/options.js ================================================ 'use strict' import test, { beforeEach } from 'ava' const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] beforeEach(t => { // Make sure we work with pristine modules try { delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin')] } catch (error) { console.log(error) } }) test('will modify puppeteer launch options through plugins', async t => { let FINAL_OPTIONS = null const puppeteer = require('puppeteer-extra') const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const pluginName = 'hello-world' const pluginData = [{ name: 'foo', value: 'bar' }] class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } get data() { return pluginData } beforeLaunch(options) { options.args.push('--foobar=true') options.timeout = 60 * 1000 options.headless = true } afterLaunch(browser, opts) { FINAL_OPTIONS = opts.options } } const instance = new Plugin() puppeteer.use(instance) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: false }) t.deepEqual(FINAL_OPTIONS, { headless: true, timeout: 60000, args: [].concat(PUPPETEER_ARGS, ['--foobar=true']) }) await browser.close() t.true(true) }) test('will modify puppeteer connect options through plugins', async t => { let FINAL_OPTIONS = null // Launch vanilla puppeteer browser with no plugins const puppeteerVanilla = require('puppeteer') const browserVanilla = await puppeteerVanilla.launch({ args: PUPPETEER_ARGS }) const browserWSEndpoint = browserVanilla.wsEndpoint() const puppeteer = require('puppeteer-extra') const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const pluginName = 'hello-world' const pluginData = [{ name: 'foo', value: 'bar' }] class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } get data() { return pluginData } beforeConnect(options) { options.foo1 = 60 * 1000 options.foo2 = true } afterConnect(browser, opts) { FINAL_OPTIONS = opts.options } } const instance = new Plugin() puppeteer.use(instance) const browser = await puppeteer.connect({ browserWSEndpoint }) t.deepEqual(FINAL_OPTIONS, { foo1: 60 * 1000, foo2: true, browserWSEndpoint }) await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra/test/plugin-support.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] const PAGE_TIMEOUT = 60 * 1000 // 60s test.beforeEach(t => { // Make sure we work with pristine modules try { delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin')] } catch (error) { console.log(error) } }) test('will launch the browser normally', async t => { const puppeteer = require('puppeteer-extra') const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const page = await browser.newPage() await page.goto('http://example.com', { waitUntil: 'domcontentloaded', timeout: PAGE_TIMEOUT }) await browser.close() t.true(true) }) test('will launch puppeteer with plugin support', async t => { const puppeteer = require('puppeteer-extra') const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const pluginName = 'hello-world' const pluginData = [{ name: 'foo', value: 'bar' }] class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } get data() { return pluginData } } const instance = new Plugin() puppeteer.use(instance) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const page = await browser.newPage() t.is(puppeteer.plugins.length, 1) t.is(puppeteer.plugins[0].name, pluginName) t.is(puppeteer.pluginNames.length, 1) t.is(puppeteer.pluginNames[0], pluginName) t.is(puppeteer.getPluginData().length, 1) t.deepEqual(puppeteer.getPluginData()[0], pluginData[0]) t.deepEqual(puppeteer.getPluginData('foo')[0], pluginData[0]) t.is(puppeteer.getPluginData('not-existing').length, 0) await page.goto('http://example.com', { waitUntil: 'domcontentloaded', timeout: PAGE_TIMEOUT }) await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "dom"], "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": true, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": ["./src/**/*.tsx", "./src/**/*.ts"], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/puppeteer-extra/tslint.json ================================================ { "extends": ["tslint-config-standard", "tslint-config-prettier"], "rules": { "ordered-imports": true } } ================================================ FILE: packages/puppeteer-extra-plugin/ava.config-ts.js ================================================ export default { compileEnhancements: false, environmentVariables: { TS_NODE_COMPILER_OPTIONS: '{"module":"commonjs"}' }, files: ['src/**/*.test.ts'], extensions: ['ts'], require: ['ts-node/register'] } ================================================ FILE: packages/puppeteer-extra-plugin/ava.config.js ================================================ export default { files: ['dist/*.test.js'] } ================================================ FILE: packages/puppeteer-extra-plugin/package.json ================================================ { "name": "puppeteer-extra-plugin", "version": "3.2.3", "description": "Base class for puppeteer-extra plugins.", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "typings": "dist/index.d.ts", "files": [ "dist" ], "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin", "author": "berstend", "license": "MIT", "scripts": { "clean": "rimraf dist/*", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup", "build:tsc": "tsc --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "documentation readme --quiet --shallow --github --markdown-theme transitivebs --readme-file readme.md --section API ./src/index.ts", "postdocs": "npx prettier --write readme.md", "test": "ava -v --config ava.config-ts.js", "pretest-ci": "run-s build", "test-ci": "ava --fail-fast -v" }, "engines": { "node": ">=9.11.2" }, "prettier": { "printWidth": 80, "semi": false, "singleQuote": true }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "ua", "user-agent", "chrome", "headless", "pupeteer" ], "devDependencies": { "@types/node": "14.14.34", "@types/puppeteer": "*", "ava": "2.4.0", "documentation-markdown-themes": "^12.1.5", "npm-run-all": "^4.1.5", "puppeteer": "9", "rimraf": "^3.0.0", "rollup": "^1.27.5", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "ts-node": "^8.5.4", "tslint": "^5.12.1", "tslint-config-prettier": "^1.18.0", "tslint-config-standard": "^9.0.0", "typescript": "4.4.3" }, "dependencies": { "@types/debug": "^4.1.0", "debug": "^4.1.1", "merge-deep": "^3.0.1" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin/readme.md ================================================ # puppeteer-extra-plugin [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push)](https://github.com/berstend/puppeteer-extra/actions) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/puppeteer-extra-plugin.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin) ## Installation ```bash yarn add puppeteer-extra-plugin ``` ## Changelog
v3.0.1
- Now written in TypeScript 🎉 - **Breaking change:** Now using a named export: ```js // Before const PuppeteerExtraPlugin = require('puppeteer-extra-plugin') // After (>= v3.0.1) const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') ```
## API #### Table of Contents - [puppeteer-extra-plugin ![GitHub Workflow Status](https://github.com/berstend/puppeteer-extra/actions) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/puppeteer-extra-plugin.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin)](#puppeteer-extra-plugin---) - [Installation](#installation) - [Changelog](#changelog) - [API](#api) - [Table of Contents](#table-of-contents) - [class: PuppeteerExtraPlugin](#class-puppeteerextraplugin) - [.name](#name) - [.defaults](#defaults) - [.requirements](#requirements) - [.dependencies](#dependencies) - [.data](#data) - [.opts](#opts) - [.debug](#debug) - [.beforeLaunch(options)](#beforelaunchoptions) - [.afterLaunch(browser, opts)](#afterlaunchbrowser-opts) - [.beforeConnect(options)](#beforeconnectoptions) - [.afterConnect(browser, opts)](#afterconnectbrowser-opts) - [.onBrowser(browser, opts)](#onbrowserbrowser-opts) - [.onTargetCreated(target)](#ontargetcreatedtarget) - [.onPageCreated(page, target)](#onpagecreatedpage-target) - [.onTargetChanged(target)](#ontargetchangedtarget) - [.onTargetDestroyed(target)](#ontargetdestroyedtarget) - [.onDisconnected()](#ondisconnected) - [.onClose()](#onclose) - [.onPluginRegistered()](#onpluginregistered) - [.getDataFromPlugins(name?)](#getdatafrompluginsname) ### class: [PuppeteerExtraPlugin](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L65-L572) - `opts` **PluginOptions?** Base class for `puppeteer-extra` plugins. Provides convenience methods to avoid boilerplate. All common `puppeteer` browser events will be bound to the plugin instance, if a respectively named class member is found. Please refer to the [puppeteer API documentation](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md) as well. Example: ```javascript // hello-world-plugin.js const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'hello-world' } async onPageCreated(page) { this.debug('page created', page.url()) const ua = await page.browser().userAgent() this.debug('user agent', ua) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } // foo.js const puppeteer = require('puppeteer-extra') puppeteer.use(require('./hello-world-plugin')()) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.goto('http://example.com', { waitUntil: 'domcontentloaded' }) await browser.close() })() ``` --- #### .[name](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L92-L94) Type: **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Plugin name (required). Convention: - Package: `puppeteer-extra-plugin-anonymize-ua` - Name: `anonymize-ua` Example: ```javascript get name () { return 'anonymize-ua' } ``` --- #### .[defaults](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L117-L119) Type: **PluginOptions** Plugin defaults (optional). If defined will be ([deep-](https://github.com/jonschlinkert/merge-deep))merged with the (optional) user supplied options (supplied during plugin instantiation). The result of merging defaults with user supplied options can be accessed through `this.opts`. Example: ```javascript get defaults () { return { stripHeadless: true, makeWindows: true, customFn: null } } // Users can overwrite plugin defaults during instantiation: puppeteer.use(require('puppeteer-extra-plugin-foobar')({ makeWindows: false })) ``` - **See: \[[opts]]** --- #### .[requirements](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L145-L147) Type: **PluginRequirements** Plugin requirements (optional). Signal certain plugin requirements to the base class and the user. Currently supported: - `launch` - If the plugin only supports locally created browser instances (no `puppeteer.connect()`), will output a warning to the user. - `headful` - If the plugin doesn't work in `headless: true` mode, will output a warning to the user. - `dataFromPlugins` - In case the plugin requires data from other plugins. will enable usage of `this.getDataFromPlugins()`. - `runLast` - In case the plugin prefers to run after the others. Useful when the plugin needs data from others. Example: ```javascript get requirements () { return new Set(['runLast', 'dataFromPlugins']) } ``` --- #### .[dependencies](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L160-L162) Type: **PluginDependencies** Plugin dependencies (optional). Missing plugins will be required() by puppeteer-extra. Example: ```javascript get dependencies () { return new Set(['user-preferences']) } // Will ensure the 'puppeteer-extra-plugin-user-preferences' plugin is loaded. ``` --- #### .[data](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L196-L198) Type: **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<PluginData>** Plugin data (optional). Plugins can expose data (an array of objects), which in turn can be consumed by other plugins, that list the `dataFromPlugins` requirement (by using `this.getDataFromPlugins()`). Convention: `[ {name: 'Any name', value: 'Any value'} ]` Example: ```javascript // plugin1.js get data () { return [ { name: 'userPreferences', value: { foo: 'bar' } }, { name: 'userPreferences', value: { hello: 'world' } } ] // plugin2.js get requirements () { return new Set(['dataFromPlugins']) } async beforeLaunch () { const prefs = this.getDataFromPlugins('userPreferences').map(d => d.value) this.debug(prefs) // => [ { foo: 'bar' }, { hello: 'world' } ] } ``` - **See: \[[getDataFromPlugins]]** --- #### .[opts](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L215-L217) Type: **PluginOptions** Access the plugin options (usually the `defaults` merged with user defined options) To skip the auto-merging of defaults with user supplied opts don't define a `defaults` property and set the `this._opts` Object in your plugin constructor directly. Example: ```javascript get defaults () { return { foo: "bar" } } async onPageCreated (page) { this.debug(this.opts.foo) // => bar } ``` - **See: \[[defaults]]** --- #### .[debug](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L235-L237) Type: **Debugger** Convenience debug logger based on the [debug] module. Will automatically namespace the logging output to the plugin package name. [debug]: https://www.npmjs.com/package/debug ```bash # toggle output using environment variables DEBUG=puppeteer-extra-plugin: node foo.js # to debug all the things: DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node foo.js ``` Example: ```javascript this.debug('hello world') // will output e.g. 'puppeteer-extra-plugin:anonymize-ua hello world' ``` --- #### .[beforeLaunch(options)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L256-L258) - `options` **any** Puppeteer launch options Before a new browser instance is created/launched. Can be used to modify the puppeteer launch options by modifying or returning them. Plugins using this method will be called in sequence to each be able to update the launch options. Example: ```javascript async beforeLaunch (options) { if (this.opts.flashPluginPath) { options.args.push(`--ppapi-flash-path=${this.opts.flashPluginPath}`) } } ``` --- #### .[afterLaunch(browser, opts)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L287-L292) - `browser` **Puppeteer.Browser** The `puppeteer` browser instance. - `opts` (optional, default `{options:({}as Puppeteer.LaunchOptions)}`) After the browser has launched. Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. It's possible that `pupeeteer.launch` will be called multiple times and more than one browser created. In order to make the plugins as stateless as possible don't store a reference to the browser instance in the plugin but rather consider alternatives. E.g. when using `onPageCreated` you can get a browser reference by using `page.browser()`. Alternatively you could expose a class method that takes a browser instance as a parameter to work with: ```es6 const fancyPlugin = require('puppeteer-extra-plugin-fancy')() puppeteer.use(fancyPlugin) const browser = await puppeteer.launch() await fancyPlugin.killBrowser(browser) ``` Example: ```javascript async afterLaunch (browser, opts) { this.debug('browser has been launched', opts.options) } ``` --- #### .[beforeConnect(options)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L305-L307) - `options` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Puppeteer connect options Before connecting to an existing browser instance. Can be used to modify the puppeteer connect options by modifying or returning them. Plugins using this method will be called in sequence to each be able to update the launch options. --- #### .[afterConnect(browser, opts)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L319-L321) - `browser` **Puppeteer.Browser** The `puppeteer` browser instance. - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** (optional, default `{}`) - `opts.options` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Puppeteer connect options used. After connecting to an existing browser instance. > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. --- #### .[onBrowser(browser, opts)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L335-L337) - `browser` **Puppeteer.Browser** The `puppeteer` browser instance. - `opts` **any** Returns: **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<void>** Called when a browser instance is available. This applies to both `puppeteer.launch()` and `puppeteer.connect()`. Convenience method created for plugins that need access to a browser instance and don't mind if it has been created through `launch` or `connect`. > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. --- #### .[onTargetCreated(target)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L348-L350) - `target` **Puppeteer.Target** Called when a target is created, for example when a new page is opened by window.open or browser.newPage. > Note: This includes target creations in incognito browser contexts. > > Note: This includes browser instances created through `.launch()` as well as `.connect()`. --- #### .[onPageCreated(page, target)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L371-L373) - `page` **Puppeteer.Page** - `target` **Puppeteer.Target** Same as `onTargetCreated` but prefiltered to only contain Pages, for convenience. > Note: This includes page creations in incognito browser contexts. > > Note: This includes browser instances created through `.launch()` as well as `.connect()`. Example: ```javascript async onPageCreated (page) { let ua = await page.browser().userAgent() if (this.opts.stripHeadless) { ua = ua.replace('HeadlessChrome/', 'Chrome/') } this.debug('new ua', ua) await page.setUserAgent(ua) } ``` --- #### .[onTargetChanged(target)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L384-L386) - `target` **Puppeteer.Target** Called when the url of a target changes. > Note: This includes target changes in incognito browser contexts. > > Note: This includes browser instances created through `.launch()` as well as `.connect()`. --- #### .[onTargetDestroyed(target)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L397-L399) - `target` **Puppeteer.Target** Called when a target is destroyed, for example when a page is closed. > Note: This includes target destructions in incognito browser contexts. > > Note: This includes browser instances created through `.launch()` as well as `.connect()`. --- #### .[onDisconnected()](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L408-L410) Called when Puppeteer gets disconnected from the Chromium instance. This might happen because of one of the following: - Chromium is closed or crashed - The `browser.disconnect` method was called --- #### .[onClose()](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L424-L426) **Deprecated:** Since puppeteer v1.6.0 `onDisconnected` has been improved and should be used instead of `onClose`. In puppeteer < v1.6.0 `onDisconnected` was not catching all exit scenarios. In order for plugins to clean up properly (e.g. deleting temporary files) the `onClose` method had been introduced. > Note: Might be called multiple times on exit. > > Note: This only includes browser instances created through `.launch()`. --- #### .[onPluginRegistered()](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L433-L435) After the plugin has been registered in `puppeteer-extra`. Normally right after `puppeteer.use(plugin)` is called --- #### .[getDataFromPlugins(name?)](https://github.com/berstend/puppeteer-extra/blob/dc8b90260a927c0c66c4585c5a56092ea9c35049/packages/puppeteer-extra-plugin/src/index.ts#L448-L450) - `name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** Filter data by `name` property Returns: **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<PluginData>** Helper method to retrieve `data` objects from other plugins. A plugin needs to state the `dataFromPlugins` requirement in order to use this method. Will be mapped to `puppeteer.getPluginData`. - **See: [data]** - **See: [requirements]** --- ================================================ FILE: packages/puppeteer-extra-plugin/rollup.config.ts ================================================ import commonjs from 'rollup-plugin-commonjs' import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, banner }, { file: pkg.module, format: 'es', sourcemap: true, banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}) ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve(), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/puppeteer-extra-plugin/src/ambient.d.ts ================================================ export {} // https://github.com/sindresorhus/type-fest/issues/19 declare global { interface SymbolConstructor { readonly observable: symbol } } ================================================ FILE: packages/puppeteer-extra-plugin/src/index.test.ts ================================================ import test from 'ava' import { PuppeteerExtraPlugin } from '.' test('is a function', async t => { t.is(typeof PuppeteerExtraPlugin, 'function') }) test('will throw without a name', async t => { class Derived extends PuppeteerExtraPlugin {} const error = await t.throws(() => new Derived()) t.is(error.message, `Plugin must override "name"`) }) test('should have the basic class members', async t => { const pluginName = 'hello-world' class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } } const instance = new Plugin() t.is(instance.name, pluginName) t.true(instance.requirements instanceof Set) t.true(instance.dependencies instanceof Set) t.true(instance.data instanceof Array) t.true(instance.defaults instanceof Object) t.is(instance.data.length, 0) t.true(instance.debug instanceof Function) t.is(instance.debug.namespace, `puppeteer-extra-plugin:${pluginName}`) t.true(instance._isPuppeteerExtraPlugin) }) test('should have the public class members', async t => { const pluginName = 'hello-world' class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } } const instance = new Plugin() t.true(instance.beforeLaunch instanceof Function) t.true(instance.afterLaunch instanceof Function) t.true(instance.onTargetCreated instanceof Function) t.true(instance.onBrowser instanceof Function) t.true(instance.onPageCreated instanceof Function) t.true(instance.onTargetChanged instanceof Function) t.true(instance.onTargetDestroyed instanceof Function) t.true(instance.onDisconnected instanceof Function) t.true(instance.onClose instanceof Function) t.true(instance.onPluginRegistered instanceof Function) t.true(instance.getDataFromPlugins instanceof Function) }) test('should have the internal class members', async t => { const pluginName = 'hello-world' class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } } const instance = new Plugin() t.true(instance._getMissingDependencies instanceof Function) t.true(instance._bindBrowserEvents instanceof Function) t.true(instance._onTargetCreated instanceof Function) t.true(instance._register instanceof Function) t.true(instance._registerChildClassMembers instanceof Function) t.true(instance._hasChildClassMember instanceof Function) }) test('should merge opts with defaults automatically', async t => { const pluginName = 'hello-world' const pluginDefaults = { foo: 'bar', foo2: 'bar2', extra1: 123 } const userOpts = { foo2: 'bob', extra2: 666 } class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } get defaults() { return pluginDefaults } } const instance = new Plugin(userOpts) t.deepEqual(instance.defaults, pluginDefaults) t.is(instance.opts.foo, pluginDefaults.foo) t.is(instance.opts.foo2, userOpts.foo2) t.is(instance.opts.extra1, pluginDefaults.extra1) t.is(instance.opts.extra2, userOpts.extra2) }) test('should have opts when defaults is not defined', async t => { const pluginName = 'hello-world' const userOpts = { foo2: 'bob', extra2: 666 } class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return pluginName } } const instance = new Plugin(userOpts) t.deepEqual(instance.opts, userOpts) }) ================================================ FILE: packages/puppeteer-extra-plugin/src/index.ts ================================================ import debug, { Debugger } from 'debug' import * as Puppeteer from './puppeteer' /** @private */ const merge = require('merge-deep') export interface PluginOptions { [key: string]: any } export interface PluginData { name: { [key: string]: any } value: { [key: string]: any } } export type PluginDependencies = Set export type PluginRequirements = Set< 'launch' | 'headful' | 'dataFromPlugins' | 'runLast' > /** * Base class for `puppeteer-extra` plugins. * * Provides convenience methods to avoid boilerplate. * * All common `puppeteer` browser events will be bound to * the plugin instance, if a respectively named class member is found. * * Please refer to the [puppeteer API documentation](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md) as well. * * @example * // hello-world-plugin.js * const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') * * class Plugin extends PuppeteerExtraPlugin { * constructor (opts = { }) { super(opts) } * * get name () { return 'hello-world' } * * async onPageCreated (page) { * this.debug('page created', page.url()) * const ua = await page.browser().userAgent() * this.debug('user agent', ua) * } * } * * module.exports = function (pluginConfig) { return new Plugin(pluginConfig) } * * * // foo.js * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('./hello-world-plugin')()) * * ;(async () => { * const browser = await puppeteer.launch({headless: false}) * const page = await browser.newPage() * await page.goto('http://example.com', {waitUntil: 'domcontentloaded'}) * await browser.close() * })() * */ export abstract class PuppeteerExtraPlugin { /** @private */ private _debugBase: Debugger /** @private */ private _opts: PluginOptions /** @private */ private _childClassMembers: string[] constructor(opts?: PluginOptions) { this._debugBase = debug(`puppeteer-extra-plugin:base:${this.name}`) this._childClassMembers = [] this._opts = merge(this.defaults, opts || {}) this._debugBase('Initialized.') } /** * Plugin name (required). * * Convention: * - Package: `puppeteer-extra-plugin-anonymize-ua` * - Name: `anonymize-ua` * * @example * get name () { return 'anonymize-ua' } */ get name(): string { throw new Error('Plugin must override "name"') } /** * Plugin defaults (optional). * * If defined will be ([deep-](https://github.com/jonschlinkert/merge-deep))merged with the (optional) user supplied options (supplied during plugin instantiation). * * The result of merging defaults with user supplied options can be accessed through `this.opts`. * * @see [[opts]] * * @example * get defaults () { * return { * stripHeadless: true, * makeWindows: true, * customFn: null * } * } * * // Users can overwrite plugin defaults during instantiation: * puppeteer.use(require('puppeteer-extra-plugin-foobar')({ makeWindows: false })) */ get defaults(): PluginOptions { return {} } /** * Plugin requirements (optional). * * Signal certain plugin requirements to the base class and the user. * * Currently supported: * - `launch` * - If the plugin only supports locally created browser instances (no `puppeteer.connect()`), * will output a warning to the user. * - `headful` * - If the plugin doesn't work in `headless: true` mode, * will output a warning to the user. * - `dataFromPlugins` * - In case the plugin requires data from other plugins. * will enable usage of `this.getDataFromPlugins()`. * - `runLast` * - In case the plugin prefers to run after the others. * Useful when the plugin needs data from others. * * @example * get requirements () { * return new Set(['runLast', 'dataFromPlugins']) * } */ get requirements(): PluginRequirements { return new Set([]) } /** * Plugin dependencies (optional). * * Missing plugins will be required() by puppeteer-extra. * * @example * get dependencies () { * return new Set(['user-preferences']) * } * // Will ensure the 'puppeteer-extra-plugin-user-preferences' plugin is loaded. */ get dependencies(): PluginDependencies { return new Set([]) } /** * Plugin data (optional). * * Plugins can expose data (an array of objects), which in turn can be consumed by other plugins, * that list the `dataFromPlugins` requirement (by using `this.getDataFromPlugins()`). * * Convention: `[ {name: 'Any name', value: 'Any value'} ]` * * @see [[getDataFromPlugins]] * * @example * // plugin1.js * get data () { * return [ * { * name: 'userPreferences', * value: { foo: 'bar' } * }, * { * name: 'userPreferences', * value: { hello: 'world' } * } * ] * * // plugin2.js * get requirements () { return new Set(['dataFromPlugins']) } * * async beforeLaunch () { * const prefs = this.getDataFromPlugins('userPreferences').map(d => d.value) * this.debug(prefs) // => [ { foo: 'bar' }, { hello: 'world' } ] * } */ get data(): PluginData[] { return [] } /** * Access the plugin options (usually the `defaults` merged with user defined options) * * To skip the auto-merging of defaults with user supplied opts don't define a `defaults` * property and set the `this._opts` Object in your plugin constructor directly. * * @see [[defaults]] * * @example * get defaults () { return { foo: "bar" } } * * async onPageCreated (page) { * this.debug(this.opts.foo) // => bar * } */ get opts(): PluginOptions { return this._opts } /** * Convenience debug logger based on the [debug] module. * Will automatically namespace the logging output to the plugin package name. * [debug]: https://www.npmjs.com/package/debug * * ```bash * # toggle output using environment variables * DEBUG=puppeteer-extra-plugin: node foo.js * # to debug all the things: * DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node foo.js * ``` * * @example * this.debug('hello world') * // will output e.g. 'puppeteer-extra-plugin:anonymize-ua hello world' */ get debug(): Debugger { return debug(`puppeteer-extra-plugin:${this.name}`) } /** * Before a new browser instance is created/launched. * * Can be used to modify the puppeteer launch options by modifying or returning them. * * Plugins using this method will be called in sequence to each * be able to update the launch options. * * @example * async beforeLaunch (options) { * if (this.opts.flashPluginPath) { * options.args.push(`--ppapi-flash-path=${this.opts.flashPluginPath}`) * } * } * * @param options - Puppeteer launch options */ async beforeLaunch(options: any) { // noop } /** * After the browser has launched. * * Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. * It's possible that `pupeeteer.launch` will be called multiple times and more than one browser created. * In order to make the plugins as stateless as possible don't store a reference to the browser instance * in the plugin but rather consider alternatives. * * E.g. when using `onPageCreated` you can get a browser reference by using `page.browser()`. * * Alternatively you could expose a class method that takes a browser instance as a parameter to work with: * * ```es6 * const fancyPlugin = require('puppeteer-extra-plugin-fancy')() * puppeteer.use(fancyPlugin) * const browser = await puppeteer.launch() * await fancyPlugin.killBrowser(browser) * ``` * * @param browser - The `puppeteer` browser instance. * @param opts.options - Puppeteer launch options used. * * @example * async afterLaunch (browser, opts) { * this.debug('browser has been launched', opts.options) * } */ async afterLaunch( browser: Puppeteer.Browser, opts = { options: {} as Puppeteer.LaunchOptions } ) { // noop } /** * Before connecting to an existing browser instance. * * Can be used to modify the puppeteer connect options by modifying or returning them. * * Plugins using this method will be called in sequence to each * be able to update the launch options. * * @param {Object} options - Puppeteer connect options * @return {Object=} */ async beforeConnect(options: Puppeteer.ConnectOptions) { // noop } /** * After connecting to an existing browser instance. * * > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. * * @param browser - The `puppeteer` browser instance. * @param {Object} opts * @param {Object} opts.options - Puppeteer connect options used. * */ async afterConnect(browser: Puppeteer.Browser, opts = {}) { // noop } /** * Called when a browser instance is available. * * This applies to both `puppeteer.launch()` and `puppeteer.connect()`. * * Convenience method created for plugins that need access to a browser instance * and don't mind if it has been created through `launch` or `connect`. * * > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin. * * @param browser - The `puppeteer` browser instance. */ public async onBrowser(browser: Puppeteer.Browser, opts: any): Promise { // noop } /** * Called when a target is created, for example when a new page is opened by window.open or browser.newPage. * * > Note: This includes target creations in incognito browser contexts. * * > Note: This includes browser instances created through `.launch()` as well as `.connect()`. * * @param {Puppeteer.Target} target */ async onTargetCreated(target: Puppeteer.Target) { // noop } /** * Same as `onTargetCreated` but prefiltered to only contain Pages, for convenience. * * > Note: This includes page creations in incognito browser contexts. * * > Note: This includes browser instances created through `.launch()` as well as `.connect()`. * * @param {Puppeteer.Target} target * * @example * async onPageCreated (page) { * let ua = await page.browser().userAgent() * if (this.opts.stripHeadless) { * ua = ua.replace('HeadlessChrome/', 'Chrome/') * } * this.debug('new ua', ua) * await page.setUserAgent(ua) * } */ async onPageCreated(page: Puppeteer.Page) { // noop } /** * Called when the url of a target changes. * * > Note: This includes target changes in incognito browser contexts. * * > Note: This includes browser instances created through `.launch()` as well as `.connect()`. * * @param {Puppeteer.Target} target */ async onTargetChanged(target: Puppeteer.Target) { // noop } /** * Called when a target is destroyed, for example when a page is closed. * * > Note: This includes target destructions in incognito browser contexts. * * > Note: This includes browser instances created through `.launch()` as well as `.connect()`. * * @param {Puppeteer.Target} target */ async onTargetDestroyed(target: Puppeteer.Target) { // noop } /** * Called when Puppeteer gets disconnected from the Chromium instance. * * This might happen because of one of the following: * - Chromium is closed or crashed * - The `browser.disconnect` method was called */ async onDisconnected() { // noop } /** * **Deprecated:** Since puppeteer v1.6.0 `onDisconnected` has been improved * and should be used instead of `onClose`. * * In puppeteer < v1.6.0 `onDisconnected` was not catching all exit scenarios. * In order for plugins to clean up properly (e.g. deleting temporary files) * the `onClose` method had been introduced. * * > Note: Might be called multiple times on exit. * * > Note: This only includes browser instances created through `.launch()`. */ async onClose() { // noop } /** * After the plugin has been registered in `puppeteer-extra`. * * Normally right after `puppeteer.use(plugin)` is called */ async onPluginRegistered() { // noop } /** * Helper method to retrieve `data` objects from other plugins. * * A plugin needs to state the `dataFromPlugins` requirement * in order to use this method. Will be mapped to `puppeteer.getPluginData`. * * @param name - Filter data by `name` property * * @see [data] * @see [requirements] */ getDataFromPlugins(name?: string): PluginData[] { return [] } /** * Will match plugin dependencies against all currently registered plugins. * Is being called by `puppeteer-extra` and used to require missing dependencies. * * @param {Array} plugins * @return {Set} - list of missing plugin names * * @private */ _getMissingDependencies(plugins: any) { const pluginNames = new Set(plugins.map((p: any) => p.name)) const missing = new Set( Array.from(this.dependencies.values()).filter(x => !pluginNames.has(x)) ) return missing } /** * Conditionally bind browser/process events to class members. * The idea is to reduce event binding boilerplate in plugins. * * For efficiency we make sure the plugin is using the respective event * by checking the child class members before registering the listener. * * @param {} browser * @param {Object} opts - Options * @param {string} opts.context - Puppeteer context (launch/connect) * @param {Object} [opts.options] - Puppeteer launch or connect options * @param {Array} [opts.defaultArgs] - The default flags that Chromium will be launched with * * @private */ async _bindBrowserEvents(browser: Puppeteer.Browser, opts: any = {}) { if ( this._hasChildClassMember('onTargetCreated') || this._hasChildClassMember('onPageCreated') ) { browser.on('targetcreated', this._onTargetCreated.bind(this)) } if (this._hasChildClassMember('onTargetChanged') && this.onTargetChanged) { browser.on('targetchanged', this.onTargetChanged.bind(this)) } if ( this._hasChildClassMember('onTargetDestroyed') && this.onTargetDestroyed ) { browser.on('targetdestroyed', this.onTargetDestroyed.bind(this)) } if (this._hasChildClassMember('onDisconnected') && this.onDisconnected) { browser.on('disconnected', this.onDisconnected.bind(this)) } if (opts.context === 'launch' && this._hasChildClassMember('onClose')) { // The disconnect event has been improved since puppeteer v1.6.0 // onClose is being kept mostly for legacy reasons if (this.onClose) { process.on('exit', this.onClose.bind(this)) browser.on('disconnected', this.onClose.bind(this)) if (opts.options.handleSIGINT !== false) { process.on('SIGINT', this.onClose.bind(this)) } if (opts.options.handleSIGTERM !== false) { process.on('SIGTERM', this.onClose.bind(this)) } if (opts.options.handleSIGHUP !== false) { process.on('SIGHUP', this.onClose.bind(this)) } } } if (opts.context === 'launch' && this.afterLaunch) { await this.afterLaunch(browser, opts) } if (opts.context === 'connect' && this.afterConnect) { await this.afterConnect(browser, opts) } if (this.onBrowser) await this.onBrowser(browser, opts) } /** * @private */ async _onTargetCreated(target: Puppeteer.Target) { if (this.onTargetCreated) await this.onTargetCreated(target) // Pre filter pages for plugin developers convenience if (target.type() === 'page') { try { const page = await target.page() if (!page) { return } const validPage = 'isClosed' in page && !page.isClosed() if (this.onPageCreated && validPage) { await this.onPageCreated(page) } } catch (err) { console.error(err) } } } /** * @private */ _register(prototype: any) { this._registerChildClassMembers(prototype) if (this.onPluginRegistered) this.onPluginRegistered() } /** * @private */ _registerChildClassMembers(prototype: any) { this._childClassMembers = Object.getOwnPropertyNames(prototype) } /** * @private */ _hasChildClassMember(name: string) { return !!this._childClassMembers.includes(name) } /** * @private */ get _isPuppeteerExtraPlugin() { return true } } ================================================ FILE: packages/puppeteer-extra-plugin/src/puppeteer.ts ================================================ // A wildcard import would result in a `require("puppeteer")` statement // at the top of the transpiled js file, not what we want. :-/ // "import type" is a solution here but requires TS >= v3.8 which we don't want to require yet as a minimum. export { Browser } from 'puppeteer' export { Page } from 'puppeteer' export { Target } from 'puppeteer' export { ConnectOptions } from 'puppeteer' export { LaunchOptions } from 'puppeteer' ================================================ FILE: packages/puppeteer-extra-plugin/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "dom"], "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": true, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": [ "./src/**/*.tsx", "./src/**/*.ts", "./src/**/*.test.ts", "./test/**/*.ts" ], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/puppeteer-extra-plugin/tslint.json ================================================ { "extends": ["tslint-config-standard", "tslint-config-prettier"], "rules": {} } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/ava.config-ts.js ================================================ export default { compileEnhancements: false, environmentVariables: { TS_NODE_COMPILER_OPTIONS: '{"module":"commonjs"}' }, files: ['src/**.test.ts'], extensions: ['ts'], require: ['ts-node/register'] } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/ava.config.js ================================================ export default { files: ['dist/*.test.js'] } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/build_version_check.js ================================================ const pkg = require('./package.json') const isIncompatiblePuppeteerVersion = () => { const version = pkg.devDependencies.puppeteer const majorVersion = parseInt(version.split('.')[0]) if (majorVersion >= 6) { return true } else { return false } } const incompatible = isIncompatiblePuppeteerVersion() if (incompatible) { console.warn( 'ERR: The adblocker plugin requires pptr >= 6', process.env.PUPPETEER_VERSION ) } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/package.json ================================================ { "name": "puppeteer-extra-plugin-adblocker", "version": "2.13.6", "description": "A puppeteer-extra plugin to block ads and trackers.", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "typings": "dist/index.d.ts", "files": [ "dist" ], "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-adblocker", "author": "remusao", "license": "MIT", "scripts": { "clean": "rimraf dist/*", "tscheck": "tsc --pretty --noEmit", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup; node build_version_check.js", "build:tsc": "tsc --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "node -e 0", "test": "ava -v --config ava.config-ts.js", "pretest-ci": "run-s build", "test-ci-back": "ava --concurrency 1 --serial --fail-fast -v", "test-ci": "exit 0" }, "engines": { "node": ">=8" }, "prettier": { "printWidth": 80, "semi": false, "singleQuote": true }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "ads", "adblocker", "adblocking" ], "devDependencies": { "@types/debug": "^4.1.5", "@types/node-fetch": "^2.5.4", "@types/puppeteer": "*", "ava": "^2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^10.2.0", "rimraf": "^3.0.0", "rollup": "^1.27.5", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "ts-node": "^8.5.4", "tslint": "^5.20.1", "tslint-config-prettier": "^1.18.0", "tslint-config-standard": "^9.0.0", "typescript": "4.7.4" }, "dependencies": { "@cliqz/adblocker-puppeteer": "1.23.8", "debug": "^4.1.1", "node-fetch": "^2.6.0", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "puppeteer": "*", "puppeteer-core": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer": { "optional": true }, "puppeteer-core": { "optional": true }, "puppeteer-extra": { "optional": true } }, "gitHead": "72fe830c158f1e971c8499fdd5232338dd53c220" } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/readme.md ================================================ # puppeteer-extra-plugin-adblocker [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/puppeteer-extra-plugin-adblocker.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin-adblocker) > A [puppeteer-extra](https://github.com/berstend/puppeteer-extra) plugin to block ads and trackers. ## Features - Extremely efficient adblocker (both in memory usage and raw speed) - Pure JavaScript implementation - Effectively blocks all types of ads and tracking - Small and minimal (only 64KB minified and gzipped) > Thanks to [@remusao](https://github.com/remusao) for contributing this sweet plugin and [adblocker engine](https://github.com/cliqz-oss/adblocker)! 👏 ## Installation ```bash yarn add puppeteer-extra-plugin-adblocker # - or - npm install puppeteer-extra-plugin-adblocker ``` If this is your first [puppeteer-extra](https://github.com/berstend/puppeteer-extra) plugin here's everything you need: ```bash yarn add puppeteer puppeteer-extra puppeteer-extra-plugin-adblocker # - or - npm install puppeteer puppeteer-extra puppeteer-extra-plugin-adblocker ``` ## Usage The plugin enables adblocking in puppeteer, optionally blocking trackers. ```javascript // puppeteer-extra is a drop-in replacement for puppeteer, // it augments the installed puppeteer with plugin functionality const puppeteer = require('puppeteer-extra') // Add adblocker plugin, which will transparently block ads in all pages you // create using puppeteer. const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer') const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker') puppeteer.use( AdblockerPlugin({ // Optionally enable Cooperative Mode for several request interceptors interceptResolutionPriority: DEFAULT_INTERCEPT_RESOLUTION_PRIORITY }) ) // puppeteer usage as normal puppeteer.launch({ headless: true }).then(async browser => { const page = await browser.newPage() // Visit a page, ads are blocked automatically! await page.goto('https://www.google.com/search?q=rent%20a%20car') await page.waitForTimeout(5 * 1000) await page.screenshot({ path: 'response.png', fullPage: true }) console.log(`All done, check the screenshots. ✨`) await browser.close() }) ```
TypeScript usage
```ts import puppeteer from 'puppeteer-extra' import Adblocker from 'puppeteer-extra-plugin-adblocker' puppeteer.use(Adblocker({ blockTrackers: true })) puppeteer .launch({ headless: false, defaultViewport: null }) .then(async browser => { const page = await browser.newPage() await page.goto('https://www.vanityfair.com') await page.waitForTimeout(60 * 1000) await browser.close() }) ```
## Options Usage: ```js const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker') const adblocker = AdblockerPlugin({ blockTrackers: true // default: false }) puppeteer.use(adblocker) ``` Available options: ```ts interface PluginOptions { /** Whether or not to block trackers (in addition to ads). Default: false */ blockTrackers: boolean /** Whether or not to block trackers and other annoyances, including cookie notices. Default: false */ blockTrackersAndAnnoyances: boolean /** Persist adblocker engine cache to disk for speedup. Default: true */ useCache: boolean /** Optional custom directory for adblocker cache files. Default: undefined */ cacheDir?: string } ``` ## Motivation Ads and trackers are on most pages and often cost a lot of bandwidth and time to load pages. Blocking ads and trackers allows pages to load much faster, because less requests are made and less JavaScript need to run. Also, in cases where you want to take screenshots of pages, it's nice to have an option to remove the ads before. ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/rollup.config.ts ================================================ import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() const defaultExportOutro = ` module.exports = exports.default || {} Object.entries(exports).forEach(([key, value]) => { module.exports[key] = value }) ` export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, exports: 'named', outro: defaultExportOutro, banner }, { file: pkg.module, format: 'es', sourcemap: true, exports: 'named', banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}), 'fs', 'os', 'path' ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) // commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve({ preferBuiltins: true }), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/src/ambient.d.ts ================================================ export {} // https://github.com/sindresorhus/type-fest/issues/19 declare global { interface SymbolConstructor { readonly observable: symbol } } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/src/index.test.ts ================================================ import test from 'ava' import AdblockerPlugin from './index' const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test('will block ads', async t => { const puppeteer = require('puppeteer-extra') const adblockerPlugin = AdblockerPlugin({ blockTrackers: true }) puppeteer.use(adblockerPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const blocker = await adblockerPlugin.getBlocker() const page = await browser.newPage() let blockedRequests = 0 blocker.on('request-blocked', () => { blockedRequests += 1 }) let hiddenAds = 0 blocker.on('style-injected', () => { hiddenAds += 1 }) const url = 'https://www.google.com/search?q=rent%20a%20car' await page.goto(url, { waitUntil: 'networkidle0' }) t.not(hiddenAds, 0) t.not(blockedRequests, 0) await browser.close() }) ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/src/index.ts ================================================ import { promises as fs } from 'fs' import os from 'os' import path from 'path' import { PuppeteerBlocker } from '@cliqz/adblocker-puppeteer' import fetch from 'node-fetch' import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' const pkg = require('../package.json') const engineCacheFilename = `${pkg.name}-${pkg.version}-engine.bin` /** Available plugin options */ export interface PluginOptions { /** Whether or not to block trackers (in addition to ads). Default: false */ blockTrackers: boolean /** Whether or not to block trackers and other annoyances, including cookie notices. Default: false */ blockTrackersAndAnnoyances: boolean /** Persist adblocker engine cache to disk for speedup. Default: true */ useCache: boolean /** Optional custom directory for adblocker cache files. Default: undefined */ cacheDir?: string /** Optional custom priority for interception resolution. Default: undefined */ interceptResolutionPriority?: number } /** * A puppeteer-extra plugin to automatically block ads and trackers. */ export class PuppeteerExtraPluginAdblocker extends PuppeteerExtraPlugin { private blocker: PuppeteerBlocker | undefined constructor(opts: Partial) { super(opts) this.debug('Initialized', this.opts) } get name() { return 'adblocker' } get defaults(): PluginOptions { return { blockTrackers: false, blockTrackersAndAnnoyances: false, useCache: true, cacheDir: undefined, interceptResolutionPriority: undefined } } get engineCacheFile() { const cacheDir = this.opts.cacheDir || os.tmpdir() return path.join(cacheDir, engineCacheFilename) } /** * Cache an instance of `PuppeteerBlocker` to disk if 'cacheDir' option was * specified for the plugin. It can then be used the next time this plugin is * used to load the adblocker faster. */ private async persistToCache(blocker: PuppeteerBlocker): Promise { if (!this.opts.useCache) { return } this.debug('persist to cache', this.engineCacheFile) await fs.writeFile(this.engineCacheFile, blocker.serialize()) } /** * Initialize instance of `PuppeteerBlocker` from cache if possible. * Otherwise, it throws and we will try to initialize it from remote instead. */ private async loadFromCache(): Promise { if (!this.opts.useCache) { throw new Error('caching disabled') } this.debug('load from cache', this.engineCacheFile) return PuppeteerBlocker.deserialize( new Uint8Array(await fs.readFile(this.engineCacheFile)) ) } /** * Initialize instance of `PuppeteerBlocker` from remote (either by fetching * a serialized version of the engine when available, or by downloading raw * lists for filters such as EasyList then parsing them to initialize * blocker). */ private async loadFromRemote(): Promise { this.debug('load from remote', { blockTrackers: this.opts.blockTrackers, blockTrackersAndAnnoyances: this.opts.blockTrackersAndAnnoyances }) if (this.opts.blockTrackersAndAnnoyances === true) { return PuppeteerBlocker.fromPrebuiltFull(fetch) } else if (this.opts.blockTrackers === true) { return PuppeteerBlocker.fromPrebuiltAdsAndTracking(fetch) } else { return PuppeteerBlocker.fromPrebuiltAdsOnly(fetch) } } /** * Return instance of `PuppeteerBlocker`. It will take care of initializing * it if necessary (first time it is called), or return the existing instance * if it already exists. */ async getBlocker(): Promise { this.debug('getBlocker', { hasBlocker: !!this.blocker }) if (this.blocker === undefined) { try { this.blocker = await this.loadFromCache() this.setRequestInterceptionPriority() } catch (ex) { this.blocker = await this.loadFromRemote() this.setRequestInterceptionPriority() await this.persistToCache(this.blocker) } } return this.blocker } /** * Sets the request interception priority on the `PuppeteerBlocker` instance. */ private setRequestInterceptionPriority(): void { this.blocker?.setRequestInterceptionPriority(this.opts.interceptResolutionPriority) } /** * Hook into this blocking event to make sure the cache is initialized before navigation. */ async beforeLaunch() { this.debug('beforeLaunch') await this.getBlocker() } /** * Hook into this blocking event to make sure the cache is initialized before navigation. */ async beforeConnect() { this.debug('beforeConnect') await this.getBlocker() } /** * Enable adblocking in `page`. */ async onPageCreated(page: any) { this.debug('onPageCreated') ;(await this.getBlocker()).enableBlockingInPage(page) } } export default (options: Partial = {}) => { return new PuppeteerExtraPluginAdblocker(options) } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "dom"], "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": true, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": [ "./src/**/*.tsx", "./src/**/*.ts", "./src/**/*.test.ts", "./test/**/*.ts" ], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/puppeteer-extra-plugin-adblocker/tslint.json ================================================ { "extends": ["tslint-config-standard", "tslint-config-prettier"], "rules": { "ordered-imports": true } } ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/index.d.ts ================================================ declare const PuppeteerExtraPlugin: typeof import("puppeteer-extra-plugin").PuppeteerExtraPlugin; declare const Page: typeof import("puppeteer").Page; type CustomFn = ((ua: string) => string | null) | null; declare class Plugin extends PuppeteerExtraPlugin { get name(): string; get defaults(): { stripHeadless: boolean; makeWindows: boolean; customFn: CustomFn; }; async onPageCreated(page: Page): void; } export default function (options?: { stripHeadless?: true; makeWindows?: true; customFn?: CustomFn; }): Plugin; ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Anonymize the User-Agent on all pages. * * Supports dynamic replacing, so the Chrome version stays intact and recent. * * @param {Object} opts - Options * @param {boolean} [opts.stripHeadless=true] - Replace `HeadlessChrome` with `Chrome`. * @param {boolean} [opts.makeWindows=true] - Sets the platform to Windows 10, 64bit (most common). * @param {Function} [opts.customFn=null] - A custom UA replacer function. * * @example * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) * // or * puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')({ * customFn: (ua) => 'MyCoolAgent/' + ua.replace('Chrome', 'Beer')}) * ) * const browser = await puppeteer.launch() */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'anonymize-ua' } get defaults() { return { stripHeadless: true, makeWindows: true, customFn: null } } async onPageCreated(page) { let ua = await page.browser().userAgent() if (this.opts.stripHeadless) { ua = ua.replace('HeadlessChrome/', 'Chrome/') } if (this.opts.makeWindows) { ua = ua.replace(/\(([^)]+)\)/, '(Windows NT 10.0; Win64; x64)') } if (this.opts.customFn) { ua = this.opts.customFn(ua) } this.debug('new ua', ua) await page.setUserAgent(ua) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/index.test.js ================================================ 'use strict' const PLUGIN_NAME = 'anonymize-ua' const test = require('ava') const Plugin = require('.') test('is a function', async t => { t.is(typeof Plugin, 'function') }) test('should have the basic class members', async t => { const instance = new Plugin() t.is(instance.name, PLUGIN_NAME) t.true(instance._isPuppeteerExtraPlugin) }) test('should have the public child class members', async t => { const instance = new Plugin() const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('name')) t.true(childClassMembers.includes('defaults')) t.true(childClassMembers.includes('onPageCreated')) t.true(childClassMembers.length === 4) }) test('should have opts with default values', async t => { const instance = new Plugin() const opts = instance.opts t.is(opts.stripHeadless, true) t.is(opts.makeWindows, true) t.is(opts.customFn, null) }) ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/package.json ================================================ { "name": "puppeteer-extra-plugin-anonymize-ua", "version": "2.4.6", "description": "Anonymize User-Agent in puppeteer.", "main": "index.js", "types": "index.d.ts", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test:js": "ava -v --serial --concurrency 1 --fail-fast", "test": "run-p test:js lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "ua", "user-agent", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0", "puppeteer-extra": "^3.3.6", "puppeteer-extra-plugin-devtools": "^2.4.6" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/readme.md ================================================ # puppeteer-extra-plugin-anonymize-ua > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-anonymize-ua ``` ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-anonymize-ua/index.js#L24-L51) **Extends: PuppeteerExtraPlugin** Anonymize the User-Agent on all pages. Supports dynamic replacing, so the Chrome version stays intact and recent. Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.stripHeadless` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Replace `HeadlessChrome` with `Chrome`. (optional, default `true`) - `opts.makeWindows` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Sets the platform to Windows 10, 64bit (most common). (optional, default `true`) - `opts.customFn` **[Function](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Statements/function)** A custom UA replacer function. (optional, default `null`) Example: ```javascript const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) // or puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')({ customFn: (ua) => 'MyCoolAgent/' + ua.replace('Chrome', 'Beer')}) ) const browser = await puppeteer.launch() ``` * * * ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/test/headless.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test.beforeEach(t => { // Make sure we work with pristine modules delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin-anonymize-ua')] }) test('will remove headless from the user-agent', async t => { const puppeteer = require('puppeteer-extra') const AnonymizeUA = require('puppeteer-extra-plugin-anonymize-ua')() puppeteer.use(AnonymizeUA) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const page = await browser.newPage() await page.goto('https://httpbin.org/headers', { waitUntil: 'domcontentloaded' }) const content = await page.content() t.true(content.includes('Windows NT 10.0')) t.true(!content.includes('HeadlessChrome')) await browser.close() t.true(true) }) test('will remove headless from the user-agent in incognito page', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) // Requires puppeteer@next currrently if (browser.createIncognitoBrowserContext) { const context = await browser.createIncognitoBrowserContext() const page = await context.newPage() await page.goto('https://httpbin.org/headers', { waitUntil: 'domcontentloaded' }) const content = await page.content() t.true(content.includes('Windows NT 10.0')) t.true(!content.includes('HeadlessChrome')) } await browser.close() t.true(true) }) test('will use a custom fn to modify the user-agent', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use( require('puppeteer-extra-plugin-anonymize-ua')({ customFn: ua => 'MyCoolAgent/' + ua.replace('Chrome', 'Beer') }) ) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const page = await browser.newPage() await page.goto('https://httpbin.org/headers', { waitUntil: 'domcontentloaded' }) const content = await page.content() t.true(content.includes('Windows NT 10.0')) t.true(!content.includes('HeadlessChrome')) t.true(content.includes('MyCoolAgent/Mozilla')) t.true(content.includes('Beer/')) await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/test/headless_off.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test.beforeEach(t => { // Make sure we work with pristine modules delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin-anonymize-ua')] }) test('will not modify the user-agent when disabled', async t => { const puppeteer = require('puppeteer-extra') const AnonymizeUA = require('puppeteer-extra-plugin-anonymize-ua')({ stripHeadless: false, makeWindows: false, customFn: null }) puppeteer.use(AnonymizeUA) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const page = await browser.newPage() await page.goto('https://httpbin.org/headers', { waitUntil: 'domcontentloaded' }) const content = await page.content() t.true(content.includes('HeadlessChrome')) t.true(!content.includes('MyCoolAgent/Mozilla')) t.true(!content.includes('Beer/')) await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/test/popup.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] const waitEvent = function(emitter, eventName) { return new Promise(resolve => emitter.once(eventName, resolve)) } test.beforeEach(t => { // Make sure we work with pristine modules delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin-anonymize-ua')] }) test('known issue: will not remove headless from implicitly created popup pages', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const pages = await Promise.all([...Array(10)].map(slot => browser.newPage())) for (const page of pages) { // Works const ua = await page.evaluate(() => window.navigator.userAgent) t.true(!ua.includes('HeadlessChrome')) // Works await page.goto('about:blank') const ua2 = await page.evaluate(() => window.navigator.userAgent) t.true(!ua2.includes('HeadlessChrome')) // Does NOT work: // https://github.com/GoogleChrome/puppeteer/issues/2669 page.evaluate(url => window.open(url), 'about:blank') const popupTarget = await waitEvent(browser, 'targetcreated') const popupPage = await popupTarget.page() const ua3 = await popupPage.evaluate(() => window.navigator.userAgent) // Test against the problem until it's fixed t.true(ua3.includes('HeadlessChrome')) // should be: !ua3.includes('HeadlessChrome') // Works: The bug only affects newly created popups, subsequent page navigations are fine. await popupPage.goto('about:blank') const ua4 = await page.evaluate(() => window.navigator.userAgent) t.true(!ua4.includes('HeadlessChrome')) } await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra-plugin-anonymize-ua/test/stresstest.js ================================================ 'use strict' const test = require('ava') const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test.beforeEach(t => { // Make sure we work with pristine modules delete require.cache[require.resolve('puppeteer-extra')] delete require.cache[require.resolve('puppeteer-extra-plugin-anonymize-ua')] }) test('will remove headless from the user-agent on multiple browsers', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const browsers = await Promise.all( [...Array(5)].map(slot => puppeteer.launch({ args: PUPPETEER_ARGS })) ) for (const browser of browsers) { const page = await browser.newPage() const ua = await page.evaluate(() => window.navigator.userAgent) t.true(ua.includes('Windows NT 10.0')) t.true(!ua.includes('HeadlessChrome')) } await browser.close() t.true(true) }) test('will remove headless from the user-agent on many pages', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const pages = await Promise.all([...Array(30)].map(slot => browser.newPage())) for (const page of pages) { const ua = await page.evaluate(() => window.navigator.userAgent) t.true(ua.includes('Windows NT 10.0')) t.true(!ua.includes('HeadlessChrome')) } await browser.close() t.true(true) }) test('will remove headless from the user-agent on many incognito pages', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) // Requires puppeteer@next currrently if (browser.createIncognitoBrowserContext) { const contexts = await Promise.all( [...Array(30)].map(slot => browser.createIncognitoBrowserContext()) ) for (const context of contexts) { const page = await context.newPage() const ua = await page.evaluate(() => window.navigator.userAgent) t.true(ua.includes('Windows NT 10.0')) t.true(!ua.includes('HeadlessChrome')) } } await browser.close() t.true(true) }) test('will remove headless from the user-agent on many pages in parallel', async t => { const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-anonymize-ua')()) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS }) const testCase = async () => { const page = await browser.newPage() const ua = await page.evaluate(() => window.navigator.userAgent) t.true(ua.includes('Windows NT 10.0')) t.true(!ua.includes('HeadlessChrome')) } await Promise.all([...Array(30)].map(slot => testCase())) await browser.close() t.true(true) }) ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/example.js ================================================ 'use strict' // // With debug logs: // DEBUG=puppeteer-extra,puppeteer-extra-plugin,puppeteer-extra-plugin:* node example.js // // const puppeteer = require('puppeteer-extra') // puppeteer.use(require('puppeteer-extra-plugin-block-resources')({ // blockedTypes: new Set(['image', 'stylesheet']) // })) // ;(async () => { // const browser = await puppeteer.launch({ headless: false }) // const page = await browser.newPage() // await page.goto('http://www.msn.com/', {waitUntil: 'domcontentloaded'}) // console.log('all done') // })() const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer') const puppeteer = require('puppeteer-extra') const blockResourcesPlugin = require('puppeteer-extra-plugin-block-resources')({ // Optionally enable Cooperative Mode for several request interceptors interceptResolutionPriority: DEFAULT_INTERCEPT_RESOLUTION_PRIORITY }) puppeteer.use(blockResourcesPlugin) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() blockResourcesPlugin.blockedTypes.add('image') await page.goto('http://www.msn.com/', { waitUntil: 'domcontentloaded' }) blockResourcesPlugin.blockedTypes.add('stylesheet') blockResourcesPlugin.blockedTypes.add('other') // e.g. favicon await page.goto('http://news.ycombinator.com', { waitUntil: 'domcontentloaded' }) blockResourcesPlugin.blockedTypes.delete('stylesheet') blockResourcesPlugin.blockedTypes.delete('other') blockResourcesPlugin.blockedTypes.add('media') blockResourcesPlugin.blockedTypes.add('script') await page.goto('http://www.youtube.com', { waitUntil: 'domcontentloaded' }) console.log('all done') })() ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/index.d.ts ================================================ import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' import { ResourceType } from 'puppeteer' declare interface PluginOptions { availableTypes?: Set blockedTypes?: Set interceptResolutionPriority?: number } declare class Plugin extends PuppeteerExtraPlugin { constructor(opts: Partial) get name(): string get defaults(): PluginOptions get engineCacheFile(): string get availableTypes(): Set get blockedTypes(): Set get interceptResolutionPriority(): number } declare const _default: (options?: Partial) => Plugin export default _default ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Block resources (images, media, css, etc.) in puppeteer. * * Supports all resource types, blocking can be toggled dynamically. * * @param {Object} opts - Options * @param {Set} [opts.blockedTypes] - Specify which resourceTypes to block (by default none) * * @example * const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer') * puppeteer.use(require('puppeteer-extra-plugin-block-resources')({ * blockedTypes: new Set(['image', 'stylesheet']), * // Optionally enable Cooperative Mode for several request interceptors * interceptResolutionPriority: DEFAULT_INTERCEPT_RESOLUTION_PRIORITY * })) * * // * // and/or dynamically: * // * * const blockResourcesPlugin = require('puppeteer-extra-plugin-block-resources')() * puppeteer.use(blockResourcesPlugin) * * const browser = await puppeteer.launch({ headless: false }) * const page = await browser.newPage() * * blockResourcesPlugin.blockedTypes.add('image') * await page.goto('http://www.msn.com/', {waitUntil: 'domcontentloaded'}) * * blockResourcesPlugin.blockedTypes.add('stylesheet') * blockResourcesPlugin.blockedTypes.add('other') // e.g. favicon * await page.goto('http://news.ycombinator.com', {waitUntil: 'domcontentloaded'}) * * blockResourcesPlugin.blockedTypes.delete('stylesheet') * blockResourcesPlugin.blockedTypes.delete('other') * blockResourcesPlugin.blockedTypes.add('media') * blockResourcesPlugin.blockedTypes.add('script') * await page.goto('http://www.youtube.com', {waitUntil: 'domcontentloaded'}) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'block-resources' } get defaults() { return { availableTypes: new Set([ 'document', 'stylesheet', 'image', 'media', 'font', 'script', 'texttrack', 'xhr', 'fetch', 'eventsource', 'websocket', 'manifest', 'other' ]), // Block nothing by default blockedTypes: new Set([]), interceptResolutionPriority: undefined } } /** * Get all available resource types. * * Resource type will be one of the following: `document`, `stylesheet`, `image`, `media`, `font`, `script`, `texttrack`, `xhr`, `fetch`, `eventsource`, `websocket`, `manifest`, `other`. * * @type {Set} - A Set of all available resource types. */ get availableTypes() { return this.defaults.availableTypes } /** * Get all blocked resource types. * * Blocked resource types can be configured either through `opts` or by modifying this property. * * @type {Set} - A Set of all blocked resource types. */ get blockedTypes() { return this.opts.blockedTypes } /** * Get the request interception resolution priority. * * Priority for Cooperative Intercept Mode can be configured either through `opts` or by modifying this property. * * @type {number} - A number for the request interception resolution priority. */ get interceptResolutionPriority() { return this.opts.interceptResolutionPriority } /** * @private */ onRequest(request) { const type = request.resourceType() const shouldBlock = this.blockedTypes.has(type) // Requests are immediately handled if not using Cooperative Intercept Mode const alreadyHandled = request.isInterceptResolutionHandled ? request.isInterceptResolutionHandled() : true this.debug('onRequest', { type, shouldBlock, alreadyHandled }) if (alreadyHandled) return if (shouldBlock) { const abortArgs = request.abortErrorReason ? ['blockedbyclient', this.interceptResolutionPriority] : [] return request.abort(...abortArgs) } const continueArgs = request.continueRequestOverrides ? [request.continueRequestOverrides(), this.interceptResolutionPriority] : [] return request.continue(...continueArgs) } /** * @private */ async onPageCreated(page) { this.debug('onPageCreated', { blockedTypes: this.blockedTypes }) await page.setRequestInterception(true) page.on('request', this.onRequest.bind(this)) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/index.test.js ================================================ 'use strict' const PLUGIN_NAME = 'block-resources' const test = require('ava') const Plugin = require('.') test('is a function', async t => { t.is(typeof Plugin, 'function') }) test('should have the basic class members', async t => { const instance = new Plugin() t.is(instance.name, PLUGIN_NAME) t.true(instance._isPuppeteerExtraPlugin) }) test('should have the public child class members', async t => { const instance = new Plugin() const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('name')) t.true(childClassMembers.includes('defaults')) t.true(childClassMembers.includes('availableTypes')) t.true(childClassMembers.includes('blockedTypes')) t.true(childClassMembers.includes('interceptResolutionPriority')) t.true(childClassMembers.includes('onRequest')) t.true(childClassMembers.includes('onPageCreated')) t.true(childClassMembers.length === 8) }) test('should have opts with default values', async t => { const instance = new Plugin() t.deepEqual(instance.opts.blockedTypes, new Set([])) t.is(instance.opts.availableTypes.size, 13) t.is(instance.opts.interceptResolutionPriority, undefined) }) ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/package.json ================================================ { "name": "puppeteer-extra-plugin-block-resources", "version": "2.4.3", "description": "Block resources (images, media, etc.) in puppeteer.", "main": "index.js", "types": "index.d.ts", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test:js": "ava -v", "test": "run-p test:js lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "block-resources", "datasaver", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-block-resources/readme.md ================================================ # puppeteer-extra-plugin-block-resources > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-block-resources ``` ## API #### Table of Contents - [Plugin](#plugin) - [availableTypes](#availabletypes) - [blockedTypes](#blockedtypes) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-block-resources/index.js#L41-L104) **Extends: PuppeteerExtraPlugin** Block resources (images, media, css, etc.) in puppeteer. Supports all resource types, blocking can be toggled dynamically. Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.blockedTypes` **[Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>?** Specify which resourceTypes to block (by default none) Example: ```javascript const { DEFAULT_INTERCEPT_RESOLUTION_PRIORITY } = require('puppeteer') puppeteer.use(require('puppeteer-extra-plugin-block-resources')({ blockedTypes: new Set(['image', 'stylesheet']), // Optionally enable Cooperative Mode for several request interceptors interceptResolutionPriority: DEFAULT_INTERCEPT_RESOLUTION_PRIORITY })) // // and/or dynamically: // const blockResourcesPlugin = require('puppeteer-extra-plugin-block-resources')() puppeteer.use(blockResourcesPlugin) const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() blockResourcesPlugin.blockedTypes.add('image') await page.goto('http://www.msn.com/', {waitUntil: 'domcontentloaded'}) blockResourcesPlugin.blockedTypes.add('stylesheet') blockResourcesPlugin.blockedTypes.add('other') // e.g. favicon await page.goto('http://news.ycombinator.com', {waitUntil: 'domcontentloaded'}) blockResourcesPlugin.blockedTypes.delete('stylesheet') blockResourcesPlugin.blockedTypes.delete('other') blockResourcesPlugin.blockedTypes.add('media') blockResourcesPlugin.blockedTypes.add('script') await page.goto('http://www.youtube.com', {waitUntil: 'domcontentloaded'}) ``` * * * #### [availableTypes](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-block-resources/index.js#L75-L75) Get all available resource types. Resource type will be one of the following: `document`, `stylesheet`, `image`, `media`, `font`, `script`, `texttrack`, `xhr`, `fetch`, `eventsource`, `websocket`, `manifest`, `other`. Type: [Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)> * * * #### [blockedTypes](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-block-resources/index.js#L84-L84) Get all blocked resource types. Blocked resource types can be configured either through `opts` or by modifying this property. Type: [Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)> * * * ================================================ FILE: packages/puppeteer-extra-plugin-click-and-wait/example.js ================================================ 'use strict' const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-click-and-wait')()) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.goto('https://example.com/', { waitUntil: 'domcontentloaded' }) console.log('clicking on first link') await page.clickAndWaitForNavigation('a') console.log('all done') })() ================================================ FILE: packages/puppeteer-extra-plugin-click-and-wait/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Convenience function to wait for navigation to complete after clicking on an element. * * Adds a new `page.clickAndWaitForNavigation(selector, clickOptions, waitOptions)` method. * * See this issue for more context: https://github.com/GoogleChrome/puppeteer/issues/1421 * * > Note: Be wary of ajax powered pages where the navigation event is not triggered. * * @example * await page.clickAndWaitForNavigation('input#submitData') * * // as opposed to: * * await Promise.all([ * page.waitForNavigation(waitOptions), * page.click('input#submitData', clickOptions), * ]) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'click-and-wait' } async clickAndWaitForNavigation(selector, clickOptions, waitOptions) { return Promise.all([ this.waitForNavigation(waitOptions), this.click(selector, clickOptions) ]).then(values => { return values[0] }) } async onPageCreated(page) { page.clickAndWaitForNavigation = this.clickAndWaitForNavigation.bind(page) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-click-and-wait/package.json ================================================ { "name": "puppeteer-extra-plugin-click-and-wait", "version": "2.3.3", "description": "Convenience function to wait for navigation to complete after clicking on an element.", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "clickAndWaitForNavigation", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-click-and-wait/readme.md ================================================ # puppeteer-extra-plugin-click-and-wait > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-click-and-wait ``` ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-click-and-wait/index.js#L24-L39) **Extends: PuppeteerExtraPlugin** Convenience function to wait for navigation to complete after clicking on an element. Adds a new `page.clickAndWaitForNavigation(selector, clickOptions, waitOptions)` method. See this issue for more context: > Note: Be wary of ajax powered pages where the navigation event is not triggered. Type: `function (opts)` - `opts` (optional, default `{}`) Example: ```javascript await page.clickAndWaitForNavigation('input#submitData') // as opposed to: await Promise.all([ page.waitForNavigation(waitOptions), page.click('input#submitData', clickOptions), ]) ``` * * * ================================================ FILE: packages/puppeteer-extra-plugin-devtools/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const RemoteDevTools = require('./lib/RemoteDevTools') const ow = require('ow') /** * As the tunnel page is public the plugin will require basic auth. * * You can set your own credentials using `opts` or `setAuthCredentials()`. * * If you don't specify basic auth credentials the plugin will * generate a password and print it to STDOUT. * * **opts** * @param {Object} opts - Options * @param {Object} [opts.auth] - Basic auth credentials for the public page * @param {string} [opts.auth.user] - Username (default: 'user') * @param {string} [opts.auth.pass] - Password (will be generated if not provided) * @param {Object} [opts.prefix] - The prefix to use for the localtunnel.me subdomain (default: 'devtools-tunnel') * * @example * const puppeteer = require('puppeteer-extra') * const devtools = require('puppeteer-extra-plugin-devtools')({ * auth: { user: 'francis', pass: 'president' } * }) * puppeteer.use(devtools) * * puppeteer.launch().then(async browser => { * console.log('tunnel url:', (await devtools.createTunnel(browser)).url) * // => tunnel url: https://devtools-tunnel-n9aogqwx3d.localtunnel.me * }) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) // To store a wsEndpoint (= browser instance) > tunnel reference this._browserSessions = {} } get name() { return 'devtools' } get defaults() { return { prefix: 'devtools-tunnel', auth: { user: 'user', pass: require('crypto') .randomBytes(20) .toString('hex') } } } /** * Create a new public tunnel. * * Supports multiple browser instances (will create a new tunnel for each). * * @param {Puppeteer.Browser} browser - The browser to create the tunnel for (there can be multiple) * @return {Tunnel} The {@link Tunnel} instance * * @example * const puppeteer = require('puppeteer-extra') * const devtools = require('puppeteer-extra-plugin-devtools')() * devtools.setAuthCredentials('bob', 'swordfish') * puppeteer.use(devtools) * * ;(async () => { * const browserFleet = await Promise.all( * [...Array(3)].map(slot => puppeteer.launch()) * ) * for (const [index, browser] of browserFleet.entries()) { * const {url} = await devtools.createTunnel(browser) * console.info(`Browser ${index}'s devtools frontend can be found at: ${url}`) * } * })() * // => * // Browser 0's devtools frontend can be found at: https://devtools-tunnel-fzenb4zuav.localtunnel.me * // Browser 1's devtools frontend can be found at: https://devtools-tunnel-qe2t5rghme.localtunnel.me * // Browser 2's devtools frontend can be found at: https://devtools-tunnel-pp83sdi4jo.localtunnel.me */ async createTunnel(browser) { ow(browser, ow.object.hasKeys('wsEndpoint')) const wsEndpoint = browser.wsEndpoint() if (!this._browserSessions[wsEndpoint]) { this._browserSessions[wsEndpoint] = await new Tunnel( wsEndpoint, this.opts ).create() } this._printGeneratedPasswordWhenNotOverridden( this._browserSessions[wsEndpoint].url ) this.debug('createTunnel', { wsEndpoint, sessions: Object.keys(this._browserSessions) }) return this._browserSessions[wsEndpoint] } /** * Set the basic auth credentials for the public tunnel page. * * Alternatively the credentials can be defined when instantiating the plugin. * * @param {string} user - Username * @param {string} pass - Password * * @example * const puppeteer = require('puppeteer-extra') * const devtools = require('puppeteer-extra-plugin-devtools')() * puppeteer.use(devtools) * * puppeteer.launch().then(async browser => { * devtools.setAuthCredentials('bob', 'swordfish') * const tunnel = await devtools.createTunnel(browser) * }) */ setAuthCredentials(user, pass) { ow(user, ow.string.nonEmpty) ow(pass, ow.string.nonEmpty) this.opts.auth = { user, pass } this.debug('updated credentials', this.opts.auth) return this } /** * Convenience function to get the local devtools frontend URL. * * @param {Puppeteer.Browser} browser * @return {string} * * @example * const puppeteer = require('puppeteer-extra') * const devtools = require('puppeteer-extra-plugin-devtools')() * puppeteer.use(devtools) * * puppeteer.launch().then(async browser => { * console.log(devtools.getLocalDevToolsUrl(browser)) * // => http://localhost:55952 * }) */ getLocalDevToolsUrl(browser) { ow(browser, ow.object.hasKeys('wsEndpoint')) const wsEndpoint = browser.wsEndpoint() return new RemoteDevTools.DevToolsLocal(wsEndpoint).url } /** * Prints the generated auth credentials, when not overriden by the user. * * As the tunnel is public we make basic auth a requirement, * without forcing the user to specify their own credentials. * * @ignore */ _printGeneratedPasswordWhenNotOverridden(url) { if (this.opts.auth.pass.length !== 40) { return } console.info(` DevTools Tunnel: You haven't specified basic auth credentials. Here are the generated ones, for your convenience: - user: 'user' - pass: '${this.opts.auth.pass}' Public Url: ${url} You can specify your own auth credentials when instantiating the plugin, or by using the plugin.setAuthCredentials(user, pass) method. `) } } /** * The devtools tunnel for a browser instance. * */ class Tunnel extends RemoteDevTools.DevToolsTunnel { constructor(wsEndpoint, opts = {}) { super(wsEndpoint, opts) } /** * Get the public devtools frontend url. * * @return {string} - url * * @example * const tunnel = await devtools.createTunnel(browser) * console.log(tunnel.url) * // => https://devtools-tunnel-sdoqqj95vg.localtunnel.me */ get url() { return super.url } /** * Get the devtools frontend deep link for a specific page. * * @param {Puppeteer.Page} page * @return {string} - url * * @example * const page = await browser.newPage() * const tunnel = await devtools.createTunnel(browser) * console.log(tunnel.getUrlForPage(page)) * // => https://devtools-tunnel-bmkjg26zmr.localtunnel.me/devtools/inspector.html?ws(...) */ getUrlForPage(page) { ow(page, ow.object.hasKeys('_target._targetInfo.targetId')) const pageId = page._target._targetInfo.targetId return super.getUrlForPageId(pageId) } /** * Close the tunnel. * * The tunnel will automatically stop when your script exits. * * @example * const tunnel = await devtools.createTunnel(browser) * tunnel.close() */ close() { return super.close() } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-devtools/index.test.js ================================================ 'use strict' const PLUGIN_NAME = 'devtools' const test = require('ava') const Plugin = require('.') test('is a function', async t => { t.is(typeof Plugin, 'function') }) test('should have the basic class members', async t => { const instance = new Plugin() t.is(instance.name, PLUGIN_NAME) t.true(instance._isPuppeteerExtraPlugin) }) test('should have opts with default values', async t => { const instance = new Plugin() t.is(instance.opts.prefix, 'devtools-tunnel') t.is(instance.opts.auth.user, 'user') t.is(instance.opts.auth.pass.length, 40) }) test('will throw without browser when creating a tunnel', async t => { const instance = new Plugin() let error = null try { await instance.createTunnel() } catch (err) { error = err } t.is(error.name, `ArgumentError`) }) // test('will accept a browser when creating a tunnel', async t => { // const instance = new Plugin({ auth: { user: 'bob', pass: 'yup' } }) // const fakeBrowser = { wsEndpoint: () => 'ws://foobar:1337' } // await instance.createTunnel(fakeBrowser) // t.is(true, true) // }) ================================================ FILE: packages/puppeteer-extra-plugin-devtools/lib/RemoteDevTools.js ================================================ 'use strict' const debug = require('debug')('remote-devtools') const ow = require('ow') const got = require('got') const http = require('http') const httpProxy = require('http-proxy') const localtunnel = require('localtunnel') const httpAuth = require('http-auth') const modifyResponse = require('http-proxy-response-rewrite') const getPort = require('get-port') const randomstring = require('randomstring') const urlParse = require('url-parse') /** * Base class handling common stuff * * @ignore */ class DevToolsCommon { constructor(webSocketDebuggerUrl, opts = {}) { ow(webSocketDebuggerUrl, ow.string) ow(webSocketDebuggerUrl, ow.string.includes('ws://')) ow(opts, ow.object.plain) this.opts = opts this.wsUrl = webSocketDebuggerUrl const wsUrlParts = urlParse(this.wsUrl) this.wsHost = wsUrlParts.hostname === '127.0.0.1' ? 'localhost' : wsUrlParts.hostname this.wsPort = wsUrlParts.port } async fetchVersion() { const { body } = await got( `http://${this.wsHost}:${this.wsPort}/json/version`, { json: true } ) return body } async fetchList() { const { body } = await got( `http://${this.wsHost}:${this.wsPort}/json/list`, { json: true } ) return body } } /** * Convenience functions for local remote debugging sessions. * * @ignore */ class DevToolsLocal extends DevToolsCommon { constructor(webSocketDebuggerUrl, opts = {}) { super(webSocketDebuggerUrl, opts) } get url() { return `http://${this.wsHost}:${this.wsPort}` } getUrlForPageId(pageId) { return `${this.url}/devtools/inspector.html?ws=${this.wsHost}:${this.wsPort}/devtools/page/${pageId}` } } /** * Create a proxy + tunnel to make a local devTools session accessible from the internet. * * - These devtools pages support screencasting the browser screen * - Proxy supports both http and websockets * - Proxy patches Host header to bypass devtools bug preventing non-localhost/ip access * - Proxy rewrites URLs, so links on the devtools index page will work * - Has a convenience function to return a deep link to a debug a specific page * - Supports basic auth ;-) * * @todo No idea how long-living a tunnel connection is yet, we might want to add keep-alive/reconnect capabilities * * @ignore */ class DevToolsTunnel extends DevToolsCommon { constructor(webSocketDebuggerUrl, opts = {}) { super(webSocketDebuggerUrl, opts) this.server = null this.tunnel = {} this.tunnelHost = null this.opts = Object.assign(this.defaults, opts) } get defaults() { return { prefix: 'devtools-tunnel', subdomain: null, auth: { user: null, pass: null }, localtunnel: {} } } get url() { return this.tunnel.url } getUrlForPageId(pageId) { return `https://${this.tunnelHost}/devtools/inspector.html?wss=${this.tunnelHost}/devtools/page/${pageId}` } async create() { const subdomain = this.opts.subdomain || this._generateSubdomain(this.opts.prefix) const basicAuth = this.opts.auth.user ? this._createBasicAuth(this.opts.auth.user, this.opts.auth.pass) : null const serverPort = await getPort() // only preference, will return an available one this.proxyServer = this._createProxyServer(this.wsHost, this.wsPort) this.server = await this._createServer(serverPort, basicAuth) this.tunnel = await this._createTunnel({ local_host: this.wsHost, port: serverPort, subdomain, ...this.opts.localtunnel }) this.tunnelHost = urlParse(this.tunnel.url).hostname debug( 'tunnel created.', ` local: http://${this.wsHost}:${this.wsPort} proxy: http://localhost:${serverPort} tunnel: ${this.tunnel.url} ` ) return this } close() { this.tunnel.close() this.server.close() this.proxyServer.close() debug('all closed') return this } _generateSubdomain(prefix) { const rand = randomstring.generate({ length: 10, readable: true, capitalization: 'lowercase' }) return `${prefix}-${rand}` } _createBasicAuth(user, pass) { const basicAuth = httpAuth.basic({}, (username, password, callback) => { const isValid = username === user && password === pass return callback(isValid) }) basicAuth.on('fail', (result, req) => { debug(`User authentication failed: ${result.user}`) }) basicAuth.on('error', (error, req) => { debug(`Authentication error: ${error.code + ' - ' + error.message}`) }) return basicAuth } /** * `fetch` used by the index page doesn't include credentials by default. * * LOVELY * THANKS * <3 * * @ignore */ _modifyFetchToIncludeCredentials(body) { if (!body) { return } body = body.replace(`fetch(url).`, `fetch(url, {credentials: 'include'}).`) // Fix for headless index pages that use weird client-side JS to modify the devtoolsFrontendUrl to something not working for us // https://github.com/berstend/puppeteer-extra/issues/566 body = body.replace( 'link.href = `https://chrome-devtools-frontend.appspot.com', 'link.href = item.devtoolsFrontendUrl; // ' ) debug('fetch:after', body) return body } _modifyJSONResponse(body) { if (!body) { return } debug('list body:before', body) body = body.replace(new RegExp(this.wsHost, 'g'), `${this.tunnelHost}`) body = body.replace(new RegExp('ws=', 'g'), 'wss=') body = body.replace(new RegExp('ws://', 'g'), 'wss://') debug('list body:after', body) return body } _createProxyServer(targetHost = 'localhost', targetPort) { // eslint-disable-next-line const proxyServer = new httpProxy.createProxyServer({ // eslint-disable-line target: { host: targetHost, port: parseInt(targetPort) } }) proxyServer.on('proxyReq', (proxyReq, req, res, options) => { debug('proxyReq', req.url) // https://github.com/GoogleChrome/puppeteer/issues/2242 proxyReq.setHeader('Host', 'localhost') }) proxyServer.on('proxyRes', (proxyRes, req, res, options) => { debug('proxyRes', req.url) if (req.url === '/') { delete proxyRes.headers['content-length'] modifyResponse( res, proxyRes.headers['content-encoding'], this._modifyFetchToIncludeCredentials.bind(this) ) } if (['/json/list', '/json/version'].includes(req.url)) { delete proxyRes.headers['content-length'] modifyResponse( res, proxyRes.headers['content-encoding'], this._modifyJSONResponse.bind(this) ) } }) return proxyServer } async _createServer(port, auth = null) { const server = http.createServer(auth, (req, res) => { this.proxyServer.web(req, res) }) server.on('upgrade', (req, socket, head) => { debug('upgrade request', req.url) this.proxyServer.ws(req, socket, head) }) server.listen(port) return server } async _createTunnel(options) { const tunnel = await localtunnel(options) tunnel.on('close', () => { // todo: add keep-alive? debug('tunnel:close') }) tunnel.on('error', err => { console.log('tunnel error', err) }) debug('tunnel:created', tunnel.url) return tunnel } } module.exports = { DevToolsCommon, DevToolsLocal, DevToolsTunnel } ================================================ FILE: packages/puppeteer-extra-plugin-devtools/lib/RemoteDevTools.test.js ================================================ 'use strict' const test = require('ava') const { DevToolsCommon, DevToolsLocal, DevToolsTunnel } = require('./RemoteDevTools') const webSocketDebuggerUrl = 'ws://127.0.0.1:9222/devtools/browser/ec78d039-2f19-4c6f-a08e-bcaf88e34b69' test('is a function', async t => { t.is(typeof DevToolsCommon, 'function') t.is(typeof DevToolsLocal, 'function') t.is(typeof DevToolsTunnel, 'function') }) test('will throw when missing webSocketDebuggerUrl', async t => { const error = await t.throws(() => new DevToolsCommon()) t.is( error.message, 'Expected argument to be of type `string` but received type `undefined`' ) // eslint-disable-line }) test('DevToolsLocal: has basic functionality', async t => { const instance = new DevToolsLocal(webSocketDebuggerUrl) t.is(instance.url, 'http://localhost:9222') t.is( instance.getUrlForPageId('foobar'), 'http://localhost:9222/devtools/inspector.html?ws=localhost:9222/devtools/page/foobar' ) }) test('DevToolsTunnel: has basic functionality', async t => { const instance = new DevToolsTunnel(webSocketDebuggerUrl) instance.tunnel = { url: 'https://faketunnel.com' } instance.tunnelHost = 'faketunnel.com' t.is(instance.url, instance.tunnel.url) t.is( instance.getUrlForPageId('foobar'), 'https://faketunnel.com/devtools/inspector.html?wss=faketunnel.com/devtools/page/foobar' ) }) test('DevToolsTunnel: has defaults', async t => { const instance = new DevToolsTunnel(webSocketDebuggerUrl) t.is(instance.opts.prefix, 'devtools-tunnel') t.is(instance.opts.subdomain, null) t.deepEqual(instance.opts.auth, { user: null, pass: null }) }) test('DevToolsTunnel: has public members', async t => { const instance = new DevToolsTunnel(webSocketDebuggerUrl) t.true(instance.create instanceof Function) t.true(instance.close instanceof Function) }) ================================================ FILE: packages/puppeteer-extra-plugin-devtools/package.json ================================================ { "name": "puppeteer-extra-plugin-devtools", "version": "2.4.6", "description": "Make puppeteer browser debugging possible from anywhere (devtools with screencasting on the internet).", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test-ava": "ava --fail-fast -v", "test": "run-p lint test-ava", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "devtools", "devtools-tunnel", "localtunnel", "remote-debugging", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0", "puppeteer-extra": "^3.3.6" }, "dependencies": { "debug": "^4.1.1", "get-port": "^3.2.0", "got": "^8.3.1", "http-auth": "^3.2.3", "http-proxy": "^1.17.0", "http-proxy-response-rewrite": "^0.0.1", "localtunnel": "^2.0.0", "ow": "^0.4.0", "puppeteer-extra-plugin": "^3.2.3", "randomstring": "^1.1.5", "url-parse": "^1.5.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-devtools/readme.md ================================================ # puppeteer-extra-plugin-devtools > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ## Installation ```bash yarn add puppeteer-extra-plugin-devtools ``` ## Purpose **Make puppeteer browser debugging possible from anywhere.** - Creates a secure tunnel to make the devtools frontend (**incl. screencasting**) accessible from the public internet - Works for both headless and headful puppeteer instances, as well as within docker containers - Uses the already existing DevTools Protocol websocket connection from puppeteer - Features some convenience functions for using the devtools frontend locally ## Magic ![screenshot](https://i.imgur.com/dYvsKfJ.png) ## Quickstart ```es6 const puppeteer = require('puppeteer-extra') const devtools = require('puppeteer-extra-plugin-devtools')() puppeteer.use(devtools) puppeteer .launch({ headless: true, defaultViewport: null }) .then(async browser => { console.log('Start') const tunnel = await devtools.createTunnel(browser) console.log(tunnel.url) const page = await browser.newPage() await page.goto('https://example.com') console.log('All setup.') }) ``` ## API #### Table of Contents - [Plugin](#plugin) - [createTunnel](#createtunnel) - [setAuthCredentials](#setauthcredentials) - [getLocalDevToolsUrl](#getlocaldevtoolsurl) - [Tunnel](#tunnel) - [url](#url) - [getUrlForPage](#geturlforpage) - [close](#close) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L34-L168) **Extends: PuppeteerExtraPlugin** As the tunnel page is public the plugin will require basic auth. You can set your own credentials using `opts` or `setAuthCredentials()`. If you don't specify basic auth credentials the plugin will generate a password and print it to STDOUT. **opts** Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.auth` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Basic auth credentials for the public page - `opts.auth.user` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** Username (default: 'user') - `opts.auth.pass` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** Password (will be generated if not provided) - `opts.prefix` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** The prefix to use for the localtunnel.me subdomain (default: 'devtools-tunnel') - `opts.localtunnel` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Advanced options to pass to [localtunnel](https://github.com/localtunnel/localtunnel#options) Example: ```javascript const puppeteer = require('puppeteer-extra') const devtools = require('puppeteer-extra-plugin-devtools')({ auth: { user: 'francis', pass: 'president' } }) puppeteer.use(devtools) puppeteer.launch().then(async browser => { console.log('tunnel url:', (await devtools.createTunnel(browser)).url) // => tunnel url: https://devtools-tunnel-n9aogqwx3d.localtunnel.me }) ``` --- #### [createTunnel](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L82-L93) Create a new public tunnel. Supports multiple browser instances (will create a new tunnel for each). Type: `function (browser): Tunnel` - `browser` **Puppeteer.Browser** The browser to create the tunnel for (there can be multiple) Example: ```javascript const puppeteer = require('puppeteer-extra') const devtools = require('puppeteer-extra-plugin-devtools')() devtools.setAuthCredentials('bob', 'swordfish') puppeteer.use(devtools) ;(async () => { const browserFleet = await Promise.all( [...Array(3)].map(slot => puppeteer.launch()) ) for (const [index, browser] of browserFleet.entries()) { const { url } = await devtools.createTunnel(browser) console.info(`Browser ${index}'s devtools frontend can be found at: ${url}`) } })() // => // Browser 0's devtools frontend can be found at: https://devtools-tunnel-fzenb4zuav.localtunnel.me // Browser 1's devtools frontend can be found at: https://devtools-tunnel-qe2t5rghme.localtunnel.me // Browser 2's devtools frontend can be found at: https://devtools-tunnel-pp83sdi4jo.localtunnel.me ``` --- #### [setAuthCredentials](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L113-L119) Set the basic auth credentials for the public tunnel page. Alternatively the credentials can be defined when instantiating the plugin. Type: `function (user, pass)` - `user` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Username - `pass` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Password Example: ```javascript const puppeteer = require('puppeteer-extra') const devtools = require('puppeteer-extra-plugin-devtools')() puppeteer.use(devtools) puppeteer.launch().then(async browser => { devtools.setAuthCredentials('bob', 'swordfish') const tunnel = await devtools.createTunnel(browser) }) ``` --- #### [getLocalDevToolsUrl](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L137-L142) Convenience function to get the local devtools frontend URL. Type: `function (browser): string` - `browser` **Puppeteer.Browser** Example: ```javascript const puppeteer = require('puppeteer-extra') const devtools = require('puppeteer-extra-plugin-devtools')() puppeteer.use(devtools) puppeteer.launch().then(async browser => { console.log(devtools.getLocalDevToolsUrl(browser)) // => http://localhost:55952 }) ``` --- ### [Tunnel](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L174-L217) **Extends: RemoteDevTools.DevToolsTunnel** The devtools tunnel for a browser instance. Type: `function (wsEndpoint, opts)` - `wsEndpoint` - `opts` (optional, default `{}`) --- #### [url](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L187-L187) Get the public devtools frontend url. Type: `function (): string` Example: ```javascript const tunnel = await devtools.createTunnel(browser) console.log(tunnel.url) // => https://devtools-tunnel-sdoqqj95vg.localtunnel.me ``` --- #### [getUrlForPage](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L201-L205) Get the devtools frontend deep link for a specific page. Type: `function (page): string` - `page` **Puppeteer.Page** Example: ```javascript const page = await browser.newPage() const tunnel = await devtools.createTunnel(browser) console.log(tunnel.getUrlForPage(page)) // => https://devtools-tunnel-bmkjg26zmr.localtunnel.me/devtools/inspector.html?ws(...) ``` --- #### [close](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-devtools/index.js#L216-L216) Close the tunnel. The tunnel will automatically stop when your script exits. Type: `function ()` Example: ```javascript const tunnel = await devtools.createTunnel(browser) tunnel.close() ``` --- ================================================ FILE: packages/puppeteer-extra-plugin-devtools/test/headless.js ================================================ 'use strict' const test = require('ava') // const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] // test.beforeEach(t => { // // Make sure we work with pristine modules // delete require.cache[require.resolve('puppeteer-extra')] // delete require.cache[require.resolve('puppeteer-extra-plugin-devtools')] // }) test('will create a tunnel', async t => { // const puppeteer = require('puppeteer-extra') // const devtools = require('puppeteer-extra-plugin-devtools')() // puppeteer.use(devtools) // devtools.setAuthCredentials('bob', 'swordfish') // await puppeteer.launch({ args: PUPPETEER_ARGS }).then(async browser => { // const tunnel = await devtools.createTunnel(browser) // t.true(tunnel.url.includes('https://devtools-tunnel-')) // await browser.close() // }) t.true(true) }) // Note: https://tunnel.datahub.at is gone and I don't have an alternative currently // test('will create a tunnel with custom localtunnel options', async t => { // const puppeteer = require('puppeteer-extra') // const devtools = require('puppeteer-extra-plugin-devtools')({ // auth: { user: 'francis', pass: 'president' }, // localtunnel: { // host: 'https://tunnel.datahub.at' // } // }) // puppeteer.use(devtools) // await puppeteer.launch({ args: PUPPETEER_ARGS }).then(async browser => { // const tunnel = await devtools.createTunnel(browser) // t.true(tunnel.url.includes('.tunnel.datahub.at')) // browser.close() // }) // t.true(true) // }) ================================================ FILE: packages/puppeteer-extra-plugin-flash/example.js ================================================ 'use strict' const puppeteer = require('puppeteer-extra') // This might not be the flashPath you're looking for. ;-) const userName = require('os').userInfo().username const pluginPath = ` /Users/${userName}/Library/Application Support/Google/Chrome/PepperFlash/29.0.0.171/PepperFlashPlayer.plugin `.trim() const pluginVersion = '29.0.0.171' // Will implicitely require 'user-preferences' which will require 'user-data-dir' // When using default Chromium the pluginPath/pluginVersion need to be specified puppeteer.use( require('puppeteer-extra-plugin-flash')({ pluginPath, pluginVersion }) ) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.goto('http://ultrasounds.com', { waitUntil: 'domcontentloaded' }) })() ================================================ FILE: packages/puppeteer-extra-plugin-flash/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Allow flash on all sites without user interaction. * * Note: The flash plugin is not working in headless mode. * * Note: When using the default Chromium browser * `pluginPath` and `pluginVersion` must be specified. * * Note: Unfortunately this doesn't seem to enable flash on incognito pages, * see [this gist] for a workaround using management policies. * [this gist]: https://gist.github.com/berstend/bcd64a4a2db28afbd6486daf69f4e787 * * @param {Object} opts - Options * @param {boolean} [opts.allowFlash=true] - Whether to allow flash content or not * @param {boolean} [opts.pluginPath=null] - Flash plugin path * @param {boolean} [opts.pluginVersion=9000] - Flash plugin version (9000 is high enough for Chrome not to complain) * * @example * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('puppeteer-extra-plugin-flash')()) * ;(async () => { * const browser = await puppeteer.launch({headless: false}) * const page = await browser.newPage() * await page.goto('http://ultrasounds.com', {waitUntil: 'domcontentloaded'}) * })() */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'flash' } get defaults() { return { allowFlash: true, pluginPath: null, pluginVersion: 9000 } } get requirements() { return new Set(['launch', 'headful']) } get dependencies() { return new Set(['user-preferences']) } async beforeLaunch(options) { if (this.opts.allowFlash === false) { return } if (this.opts.pluginPath) { options.args.push(`--ppapi-flash-path=${this.opts.pluginPath}`) } if (this.opts.pluginVersion) { options.args.push(`--ppapi-flash-version=${this.opts.pluginVersion}`) } } get data() { if (this.opts.allowFlash === false) { return } const allowFlashPreferences = { profile: { managed_default_content_settings: { plugins: 1 }, managed_plugins_allowed_for_urls: ['https://*', 'http://*'] } } return [ { name: 'userPreferences', value: allowFlashPreferences } ] } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-flash/package.json ================================================ { "name": "puppeteer-extra-plugin-flash", "version": "2.3.3", "description": "Allow flash on all sites without user interaction.", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "chrome", "flash", "allow-flash", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-flash/readme.md ================================================ # puppeteer-extra-plugin-flash > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ## Install ```bash yarn add puppeteer-extra-plugin-flash ``` ## Changelog #### `v2.2.5` - Improved: Fixes flash content in newer Chrome versions (76+) ([#133](https://github.com/berstend/puppeteer-extra/pull/133), thanks [@Niek](https://github.com/Niek)) ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-flash/index.js#L31-L100) **Extends: PuppeteerExtraPlugin** Allow flash on all sites without user interaction. Note: The flash plugin is not working in headless mode. Note: When using the default Chromium browser `pluginPath` and `pluginVersion` must be specified (stated in `chrome://version/`). Note: Unfortunately this doesn't seem to enable flash on incognito pages, see [this gist] for a workaround using management policies. [this gist]: https://gist.github.com/berstend/bcd64a4a2db28afbd6486daf69f4e787 Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.allowFlash` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Whether to allow flash content or not (optional, default `true`) - `opts.pluginPath` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Flash plugin path (optional, default `null`) - `opts.pluginVersion` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Flash plugin version (9000 is high enough for Chrome not to complain) (optional, default `9000`) Example: ```javascript const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-flash')()) ;(async () => { const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.goto('http://ultrasounds.com', { waitUntil: 'domcontentloaded' }) })() ``` --- ================================================ FILE: packages/puppeteer-extra-plugin-font-size/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Modify/increase the default font size in puppeteer. * * @param {Object} opts - Options * @param {Number} [opts.defaultFontSize=20] - Default browser font size * * @example * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('puppeteer-extra-plugin-font-size')()) * // or * puppeteer.use(require('puppeteer-extra-plugin-font-size')({defaultFontSize: 18})) * const browser = await puppeteer.launch() */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'font-size' } get defaults() { return { defaultFontSize: 20 } } get requirements() { return new Set(['launch', 'headful']) } get dependencies() { return new Set(['user-preferences']) } get data() { const userPreferences = { webkit: { webprefs: { default_font_size: this.opts.defaultFontSize } } } return [ { name: 'userPreferences', value: userPreferences } ] } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-font-size/package.json ================================================ { "name": "puppeteer-extra-plugin-font-size", "version": "2.3.3", "description": "Adjust font sizes in puppeteer.", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-font-size/readme.md ================================================ # puppeteer-extra-plugin-font-size > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-font-size ``` ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-font-size/index.js#L18-L44) **Extends: PuppeteerExtraPlugin** Modify/increase the default font size in puppeteer. Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.defaultFontSize` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Default browser font size (optional, default `20`) Example: ```javascript const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-font-size')()) // or puppeteer.use(require('puppeteer-extra-plugin-font-size')({defaultFontSize: 18})) const browser = await puppeteer.launch() ``` * * * ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/ava.config-ts.js ================================================ export default { compileEnhancements: false, environmentVariables: { TS_NODE_COMPILER_OPTIONS: '{"module":"commonjs"}' }, files: ['src/**.test.ts'], extensions: ['ts'], require: ['ts-node/register'] } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/ava.config.js ================================================ export default { files: ['dist/*.test.js'] } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/package.json ================================================ { "name": "puppeteer-extra-plugin-recaptcha", "version": "3.6.8", "description": "A puppeteer-extra plugin to solve reCAPTCHAs and hCaptchas automatically.", "main": "dist/index.cjs.js", "module": "dist/index.esm.js", "typings": "dist/index.d.ts", "files": [ "dist" ], "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-recaptcha", "author": "berstend", "license": "MIT", "scripts": { "clean": "rimraf dist/*", "tscheck": "tsc --pretty --noEmit", "prebuild": "run-s clean", "build": "run-s build:tsc build:rollup ambient-dts", "build:tsc": "tsc --project tsconfig.json --module commonjs", "build:rollup": "rollup -c rollup.config.ts", "docs": "node -e 0", "predocs2": "rimraf docs/*", "docs2": "typedoc --module commonjs --readme none --target ES6 --theme markdown --excludeNotExported --excludeExternals --excludePrivate --out docs --mode file src/index.ts", "test": "ava -v --config ava.config-ts.js", "pretest-ci": "run-s build", "test-ci": "ava --fail-fast --concurrency 2 -v", "ambient-dts": "run-s ambient-dts-copy ambient-dts-fix-path", "ambient-dts-copy": "copyfiles -u 1 \"src/**/*.d.ts\" dist", "ambient-dts-fix-path": "replace-in-files --string='/// =9.11.2" }, "prettier": { "printWidth": 80, "semi": false, "singleQuote": true }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "recaptcha", "hcaptcha", "captcha", "2captcha" ], "devDependencies": { "@types/debug": "^4.1.5", "@types/node": "14.17.6", "@types/puppeteer": "*", "ava": "2.4.0", "copyfiles": "^2.1.1", "npm-run-all": "^4.1.5", "puppeteer": "9", "puppeteer-extra": "^3.3.6", "replace-in-files-cli": "^0.3.1", "rimraf": "^3.0.0", "rollup-plugin-commonjs": "^10.1.0", "rollup-plugin-node-resolve": "^5.2.0", "rollup-plugin-sourcemaps": "^0.4.2", "rollup-plugin-typescript2": "^0.25.2", "ts-node": "^8.5.4", "tslint": "^5.20.1", "tslint-config-prettier": "^1.18.0", "tslint-config-standard": "^9.0.0", "typescript": "4.4.3" }, "dependencies": { "debug": "^4.1.1", "merge-deep": "^3.0.2", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/readme.md ================================================ # puppeteer-extra-plugin-recaptcha [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/dt/puppeteer-extra-plugin-recaptcha.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin-recaptcha) [![npm](https://img.shields.io/npm/v/puppeteer-extra-plugin-recaptcha.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin-recaptcha) > A [puppeteer-extra](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra) and [playwright-extra](https://github.com/berstend/puppeteer-extra/tree/master/packages/playwright-extra) plugin to solve reCAPTCHAs and hCaptchas automatically. ![](https://i.imgur.com/SWrIQw0.gif) ## Install ```bash yarn add puppeteer-extra-plugin-recaptcha # - or - npm install puppeteer-extra-plugin-recaptcha ``` If this is your first [puppeteer-extra](https://github.com/berstend/puppeteer-extra) plugin here's everything you need: ```bash yarn add puppeteer puppeteer-extra puppeteer-extra-plugin-recaptcha # - or - npm install puppeteer puppeteer-extra puppeteer-extra-plugin-recaptcha ```
Changelog ##### Latest > 🎁 **Note:** Until we've automated changelog updates in markdown files please follow the `#announcements` channel in our [discord server](https://extra.community/) for the latest updates and changelog info. _Older changelog:_ ##### `3.1.9` - Support reCAPTCHAs not in forms ([#57](https://github.com/berstend/puppeteer-extra/issues/57)) - Make script detection more fuzzy ([#48](https://github.com/berstend/puppeteer-extra/issues/48)) ##### `3.1.6` - We'll now add our custom methods to any existing pages and frames in the browser instance. - Fixed reference import path for our ambient declarations. ##### `3.1.5` - Solving reCAPTCHAs in frames is now supported as well, if need be: ```js for (const frame of page.mainFrame().childFrames()) { await frame.solveRecaptchas() } ``` ##### `3.1.4` - Improved TypeScript experience: I found a way to make your TypeScript compiler automatically aware of the additions to the `Page` and `Frame` object (e.g. `page.solveRecaptchas()`). - We now print a warning if the provider throws an error (e.g. invalid api key)
## Usage The plugin essentially provides a mighty `page.solveRecaptchas()` method that does everything needed automagically. ```js // puppeteer-extra is a drop-in replacement for puppeteer, // it augments the installed puppeteer with plugin functionality const puppeteer = require('puppeteer-extra') // add recaptcha plugin and provide it your 2captcha token (= their apiKey) // 2captcha is the builtin solution provider but others would work as well. // Please note: You need to add funds to your 2captcha account for this to work const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha') puppeteer.use( RecaptchaPlugin({ provider: { id: '2captcha', token: 'XXXXXXX' // REPLACE THIS WITH YOUR OWN 2CAPTCHA API KEY ⚡ }, visualFeedback: true // colorize reCAPTCHAs (violet = detected, green = solved) }) ) // puppeteer usage as normal puppeteer.launch({ headless: true }).then(async browser => { const page = await browser.newPage() await page.goto('https://www.google.com/recaptcha/api2/demo') // That's it, a single line of code to solve reCAPTCHAs 🎉 await page.solveRecaptchas() await Promise.all([ page.waitForNavigation(), page.click(`#recaptcha-demo-submit`) ]) await page.screenshot({ path: 'response.png', fullPage: true }) await browser.close() }) ```
TypeScript usage ```ts // `puppeteer-extra` and the recaptcha plugin are written in TS, // hence you get perfect type support out of the box :) import puppeteer from 'puppeteer-extra' import RecaptchaPlugin from 'puppeteer-extra-plugin-recaptcha' puppeteer.use( RecaptchaPlugin({ provider: { id: '2captcha', token: 'ENTER_YOUR_2CAPTCHA_API_KEY_HERE' } }) ) // Puppeteer usage as normal (headless is "false" just for this demo) puppeteer.launch({ headless: false }).then(async browser => { const page = await browser.newPage() await page.goto('https://www.google.com/recaptcha/api2/demo') // Even this `Puppeteer.Page` extension is recognized and fully type safe 🎉 await page.solveRecaptchas() await Promise.all([ page.waitForNavigation(), page.click(`#recaptcha-demo-submit`) ]) await page.screenshot({ path: 'response.png', fullPage: true }) await browser.close() }) ```

If you'd like to see debug output just run your script like so: ```bash DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node myscript.js ``` _**Tip:** The recaptcha plugin works really well together with the [stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth)._ ## Motivation 🏴 These days [captchas](https://en.wikipedia.org/wiki/CAPTCHA) are unfortunately everywhere, with [reCAPTCHA](https://developers.google.com/recaptcha/) having the biggest "market share" in that space (> 80%). The situation got really bad, with privacy minded users (tracking blocker, VPNs) being penalized heavily and having to solve a lot of reCAPTCHA challenges constantly while browsing the web. The stated reasons for this omnipresent captcha plague vary from site owners having to protect themselves against increasingly malicious actors to some believing that we're essentially forced into free labour to train Google's various machine learning endeavours. In any case I strongly feel that captchas in their current form have failed. They're a much bigger obstacle and annoyance to humans than to robots, which renders them useless. My anarchist contribution to this discussion is to demonstrate this absurdity, with a plugin for robots with which **a single line of code is all it takes to bypass reCAPTCHAs on any site**. > Note: Since `v3.3.0` the plugin will solve [hCaptchas](https://www.hcaptcha.com/) as well, as they've gained significant marketshare through their Cloudflare partnership. ## Provider I thought about having the plugin solve captchas directly (e.g. using the [audio challenge](https://github.com/dessant/buster) and speech-to-text APIs), but external solution providers are so cheap and reliable that there is really no benefit in doing that. ¯\\\_(ツ)\_/¯ _Please note:_ You need a provider configured for this plugin to do it's magic. If you decide to use the built-in 2captcha provider you need to add funds to your 2captcha account. ### 2captcha Currently the only builtin solution provider as it's the cheapest and most reliable, from my experience. If you'd like to throw some free captcha credit my way feel free to [signup here](https://2captcha.com?from=6690177) (referral link, allows me to write automated tests against their API). - Cost: 1000 reCAPTCHAs (and hCaptchas) for 3 USD - Delay: Solving a reCAPTCHA takes between 10 to 60 seconds - Error rate (incorrect solutions): Very rare #### Other providers You can easily use your own provider as well, by providing the plugin a function instead of 2captcha credentials (explained in the API docs). PRs for new providers are welcome as well. ## Q&A ### How does this work? - When summoned with `page.solveRecaptchas()` the plugin will attempt to find any active reCAPTCHAs & hCaptchas, extract their configuration, pass that on to the specified solutions provider, take the solutions and put them back into the page (triggering any callback that might be required). ### Is this production ready? - Yes, the plugin is actively maintained, has been battle-hardened over several years and is used in high workload production setups. ### How do reCAPTCHAs work? - reCAPTCHAs use a per-site `sitekey`. Interestingly enough the response token after solving a challenge is (currently) not tied to a specific session or IP and can be passed on to others (until they expire). This is how the external solutions provider work: They're being given a `sitekey` and URL, solve the challenge and respond with a response token. - This plugin automates all these steps in a generic and robust way (detecting captchas, extracting their config and `sitekey`) as well as triggering the (optional) response callback the site owner might have specified. ### Are ordinary image captchas supported as well? - No. This plugin focusses on reCAPTCHAs and hCaptchas exclusively, with the benefit of being fully automatic. 🔮 ### What about invisible reCAPTCHAs? - [Invisible reCAPTCHAs](https://developers.google.com/recaptcha/docs/invisible) are supported. They're basically used to compute a score of how likely the user is a bot. Based on that score the site owner can block access to resources or (most often) present the user with a reCAPTCHA challenge (which this plugin can solve). The [stealth plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth) might be of interest here, as it masks the usage of puppeteer. - Technically speaking the plugin supports: reCAPTCHA v2, reCAPTCHA v3, invisible reCAPTCHA, hCaptcha, invisible hCaptcha. All of those (multiple as well) are solved when `page.solveRecaptchas()` is called. ### When should I call `page.solveRecaptchas()`? - reCAPTCHAs will be solved automatically whenever they **are visible** (_aka their "I'm not a robot" iframe in the DOM_). It's your responsibility to do any required actions to trigger the captcha being shown, if needed. - Note about "invisible" versions of reCAPTCHA/hCaptchas: They don't feature a visible checkbox iframe, the plugin will then solve any open challenge popups instead. :-) - If you summon the plugin immediately after navigating to a page it's got your back and will wait automatically until the reCAPTCHA script (if any) has been loaded and initialized. - If you call `page.solveRecaptchas()` on a page that has no reCAPTCHAs nothing bad will happen (😄) but the promise will resolve and the rest of your code executes as normal. - After solving the reCAPTCHAs the plugin will automatically detect and trigger their [optional callback](https://developers.google.com/recaptcha/docs/display#render_param). This might result in forms being submitted and page navigations to occur, depending on how the site owner implemented the reCAPTCHA. ## Debug ```bash DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node myscript.js ``` ## Fine grained control ### Defaults By default the plugin will never throw, but return any errors silently in the `{ error }` property of the result object. You can change that behaviour by passing `throwOnError: true` to the initializier and use `try/catch` blocks to catch errors. For convenience and because it looks cool the plugin will "colorize" reCAPTCHAs depending on their state (violet = detected and being solved, green = solved). You can turn that feature off by passing `visualFeedback: false` to the plugin initializer. ### Options ```ts interface PluginOptions { /** Visualize reCAPTCHAs based on their state */ visualFeedback: boolean // default: true /** Throw on errors instead of returning them in the error property */ throwOnError: boolean // default: false /** Only solve captchas and challenges visible in the browser viewport */ solveInViewportOnly: boolean // default: false /** Solve scored based captchas with no challenge (e.g. reCAPTCHA v3) */ solveScoreBased: boolean // default: false /** Solve invisible captchas that have no active challenge */ solveInactiveChallenges: boolean // default: false } ``` ### Result object ```js const { captchas, filtered, solutions, solved, error } = await page.solveRecaptchas() ``` - `captchas` is an array of captchas found in the page - `filtered` is an array of captchas that have been detected but are ignored due to plugin options - `solutions` is an array of solutions returned from the provider - `solved` is an array of "solved" (= solution entered) captchas on the page ### Manual control flow `page.solveRecaptchas()` is a convenience method that wraps the following steps: ```js let { captchas, filtered, error } = await page.findRecaptchas() let { solutions, error } = await page.getRecaptchaSolutions(captchas) let { solved, error } = await page.enterRecaptchaSolutions(solutions) ``` ### Proxies If you wish for 2captcha to use a specific proxy (= IP address) while solving the captcha you can set the enviroment variables `2CAPTCHA_PROXY_TYPE` and `2CAPTCHA_PROXY_ADDRESS`. ## Troubleshooting ### Solving captchas in iframes By default the plugin will only solve reCAPTCHAs showing up on the immediate page. In case you encounter captchas in frames the plugin extends the `Puppeteer.Frame` object with custom methods as well: ```js // Loop over all potential frames on that page for (const frame of page.mainFrame().childFrames()) { // Attempt to solve any potential captchas in those frames await frame.solveRecaptchas() } ``` In addition you might want to disable site isolation, so puppeteer is able to access [cross-origin iframes](https://github.com/puppeteer/puppeteer/issues/2548): ```js puppeteer.launch({ args: [ '--disable-features=IsolateOrigins,site-per-process,SitePerProcess', '--flag-switches-begin --disable-site-isolation-trials --flag-switches-end' ] }) ``` ### Solving captchas in pre-existing browser pages In case you're not using `browser.newPage()` but re-use the existing `about:blank` tab (which is not recommended for various reasons) you will experience a `page.solveRecaptchas is not a function` error, as the plugin hasn't hooked into this page yet. As a workaround you can manually add existing pages to the lifecycle methods of the plugin: ```js const recaptcha = RecaptchaPlugin() const pages = await browser.pages() for (const page in pages) { // Add plugin methods to existing pages await recaptcha.onPageCreated(page) } ``` ### Tips - Make sure to use debug logging if something is not working right or when reporting issues. - Check for ignored captchas in the filtered array in case a captcha you intend to solve is being ignored, filtered captchas will state the reason why they have been ignored (or better: which plugin option is responsible) - Keep in mind that by default the plugin will only solve "active" captchas (the means a visible checkbox or an active challenge popup). In extreme cases (like a very weird or super slow loading site) you can help the plugin by making sure the captcha you intend to solve is there before calling `page.solveRecaptchas`: ```js await page.waitForSelector('iframe[src*="recaptcha/"]') await page.solveRecaptchas() ``` --- ## License Copyright © 2018 - 2023, [berstend̡̲̫̹̠̖͚͓̔̄̓̐̄͛̀͘](https://github.com/berstend). Released under the MIT License. ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/rollup.config.ts ================================================ import resolve from 'rollup-plugin-node-resolve' import sourceMaps from 'rollup-plugin-sourcemaps' import typescript from 'rollup-plugin-typescript2' const pkg = require('./package.json') const entryFile = 'index' const banner = ` /*! * ${pkg.name} v${pkg.version} by ${pkg.author} * ${pkg.homepage || `https://github.com/${pkg.repository}`} * @license ${pkg.license} */ `.trim() const defaultExportOutro = ` module.exports = exports.default || {} Object.entries(exports).forEach(([key, value]) => { module.exports[key] = value }) ` export default { input: `src/${entryFile}.ts`, output: [ { file: pkg.main, format: 'cjs', sourcemap: true, exports: 'named', outro: defaultExportOutro, banner }, { file: pkg.module, format: 'es', sourcemap: true, exports: 'named', banner } ], // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash') external: [ ...Object.keys(pkg.dependencies || {}), ...Object.keys(pkg.peerDependencies || {}) ], watch: { include: 'src/**' }, plugins: [ // Compile TypeScript files typescript({ useTsconfigDeclarationDir: true }), // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs) // commonjs(), // Allow node_modules resolution, so you can use 'external' to control // which external modules to include in the bundle // https://github.com/rollup/rollup-plugin-node-resolve#usage resolve(), // Resolve source maps to the original source sourceMaps() ] } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/ambient.d.ts ================================================ export {} // https://github.com/sindresorhus/type-fest/issues/19 declare global { interface SymbolConstructor { readonly observable: symbol } } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/content-hcaptcha.ts ================================================ import * as types from './types' export const ContentScriptDefaultOpts: types.ContentScriptOpts = { visualFeedback: true } export const ContentScriptDefaultData: types.ContentScriptData = { solutions: [] } /** * Content script for Hcaptcha handling (runs in browser context) * @note External modules are not supported here (due to content script isolation) */ export class HcaptchaContentScript { private opts: types.ContentScriptOpts private data: types.ContentScriptData private baseUrls = [ 'assets.hcaptcha.com/captcha/v1/', 'newassets.hcaptcha.com/captcha/v1/', ] constructor( opts = ContentScriptDefaultOpts, data = ContentScriptDefaultData ) { // Workaround for https://github.com/esbuild-kit/tsx/issues/113 if (typeof globalThis.__name === 'undefined') { globalThis.__defProp = Object.defineProperty globalThis.__name = (target, value) => globalThis.__defProp(target, 'name', { value, configurable: true }) } this.opts = opts this.data = data } private async _waitUntilDocumentReady() { return new Promise(function(resolve) { if (!document || !window) return resolve(null) const loadedAlready = /^loaded|^i|^c/.test(document.readyState) if (loadedAlready) return resolve(null) function onReady() { resolve(null) document.removeEventListener('DOMContentLoaded', onReady) window.removeEventListener('load', onReady) } document.addEventListener('DOMContentLoaded', onReady) window.addEventListener('load', onReady) }) } private _paintCaptchaBusy($iframe: HTMLIFrameElement) { try { if (this.opts.visualFeedback) { $iframe.style.filter = `opacity(60%) hue-rotate(400deg)` // violet } } catch (error) { // noop } return $iframe } /** Regular checkboxes */ private _findRegularCheckboxes() { const nodeList = document.querySelectorAll( this.baseUrls.map(url => `iframe[src*='${url}'][data-hcaptcha-widget-id]:not([src*='invisible'])`).join(',') ) return Array.from(nodeList) } /** Find active challenges from invisible hcaptchas */ private _findActiveChallenges() { const nodeList = document.querySelectorAll( this.baseUrls.map(url => `div[style*='visible'] iframe[src*='${url}'][src*='hcaptcha.html']`).join(',') ) return Array.from(nodeList) } private _extractInfoFromIframes(iframes: HTMLIFrameElement[]) { return iframes .map(el => el.src.replace('.html#', '.html?')) .map(url => { const { searchParams } = new URL(url) const result: types.CaptchaInfo = { _vendor: 'hcaptcha', url: document.location.href, id: searchParams.get('id'), sitekey: searchParams.get('sitekey'), display: { size: searchParams.get('size') || 'normal' } } return result }) } public async findRecaptchas() { const result = { captchas: [] as types.CaptchaInfo[], error: null as null | Error } try { await this._waitUntilDocumentReady() const iframes = [ ...this._findRegularCheckboxes(), ...this._findActiveChallenges() ] if (!iframes.length) { return result } result.captchas = this._extractInfoFromIframes(iframes) iframes.forEach(el => { this._paintCaptchaBusy(el) }) } catch (error) { result.error = error return result } return result } public async enterRecaptchaSolutions() { const result = { solved: [] as types.CaptchaSolved[], error: null as any } try { await this._waitUntilDocumentReady() const solutions = this.data.solutions if (!solutions || !solutions.length) { result.error = 'No solutions provided' return result } result.solved = solutions .filter(solution => solution._vendor === 'hcaptcha') .filter(solution => solution.hasSolution === true) .map(solution => { window.postMessage( JSON.stringify({ id: solution.id, label: 'challenge-closed', source: 'hcaptcha', contents: { event: 'challenge-passed', expiration: 120, response: solution.text } }), '*' ) return { _vendor: solution._vendor, id: solution.id, isSolved: true, solvedAt: new Date() } }) } catch (error) { result.error = error return result } return result } } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/content.ts ================================================ import * as types from './types' export const ContentScriptDefaultOpts: types.ContentScriptOpts = { visualFeedback: true, debugBinding: undefined } export const ContentScriptDefaultData: types.ContentScriptData = { solutions: [] } interface FrameSources { anchor: string[] bframe: string[] } /** * Content script for Recaptcha handling (runs in browser context) * @note External modules are not supported here (due to content script isolation) */ export class RecaptchaContentScript { private opts: types.ContentScriptOpts private data: types.ContentScriptData private frameSources: FrameSources constructor( opts = ContentScriptDefaultOpts, data = ContentScriptDefaultData ) { // Workaround for https://github.com/esbuild-kit/tsx/issues/113 if (typeof globalThis.__name === 'undefined') { globalThis.__defProp = Object.defineProperty globalThis.__name = (target, value) => globalThis.__defProp(target, 'name', { value, configurable: true }) } this.opts = opts this.data = data this.frameSources = this._generateFrameSources() this.log('Intialized', { url: document.location.href, opts: this.opts }) } /** Log using debug binding if available */ private log = (message: string, data?: any) => { if (this.opts.debugBinding && window.top[this.opts.debugBinding]) { window.top[this.opts.debugBinding](message, JSON.stringify(data)) } } // Poor mans _.pluck private _pick = (props: any[]) => (o: any) => props.reduce((a, e) => ({ ...a, [e]: o[e] }), {}) // make sure the element is visible - this is equivalent to jquery's is(':visible') private _isVisible = (elem: any) => !!( elem.offsetWidth || elem.offsetHeight || (typeof elem.getClientRects === 'function' && elem.getClientRects().length) ) /** Check if an element is in the current viewport */ private _isInViewport(elem: any) { const rect = elem.getBoundingClientRect() return ( rect.top >= 0 && rect.left >= 0 && rect.bottom <= (window.innerHeight || (document.documentElement.clientHeight && rect.right <= (window.innerWidth || document.documentElement.clientWidth))) ) } // Recaptcha client is a nested, circular object with object keys that seem generated // We flatten that object a couple of levels deep for easy access to certain keys we're interested in. private _flattenObject(item: any, levels = 2, ignoreHTML = true) { const isObject = (x: any) => x && typeof x === 'object' const isHTML = (x: any) => x && x instanceof HTMLElement let newObj = {} as any for (let i = 0; i < levels; i++) { item = Object.keys(newObj).length ? newObj : item Object.keys(item).forEach(key => { if (ignoreHTML && isHTML(item[key])) return if (isObject(item[key])) { Object.keys(item[key]).forEach(innerKey => { if (ignoreHTML && isHTML(item[key][innerKey])) return const keyName = isObject(item[key][innerKey]) ? `obj_${key}_${innerKey}` : `${innerKey}` newObj[keyName] = item[key][innerKey] }) } else { newObj[key] = item[key] } }) } return newObj } // Helper function to return an object based on a well known value private _getKeyByValue(object: any, value: any) { return Object.keys(object).find(key => object[key] === value) } private async _waitUntilDocumentReady() { return new Promise(function(resolve) { if (!document || !window) { return resolve(null) } const loadedAlready = /^loaded|^i|^c/.test(document.readyState) if (loadedAlready) { return resolve(null) } function onReady() { resolve(null) document.removeEventListener('DOMContentLoaded', onReady) window.removeEventListener('load', onReady) } document.addEventListener('DOMContentLoaded', onReady) window.addEventListener('load', onReady) }) } private _paintCaptchaBusy($iframe: HTMLIFrameElement) { try { if (this.opts.visualFeedback) { $iframe.style.filter = `opacity(60%) hue-rotate(400deg)` // violet } } catch (error) { // noop } return $iframe } private _paintCaptchaSolved($iframe: HTMLIFrameElement) { try { if (this.opts.visualFeedback) { $iframe.style.filter = `opacity(60%) hue-rotate(230deg)` // green } } catch (error) { // noop } return $iframe } private _findVisibleIframeNodes() { return Array.from( document.querySelectorAll( this.getFrameSelectorForId('anchor', '') // intentionally blank ) ) } private _findVisibleIframeNodeById(id?: string) { return document.querySelector( this.getFrameSelectorForId('anchor', id) ) } private _hideChallengeWindowIfPresent(id: string = '') { let frame: HTMLElement | null = document.querySelector( this.getFrameSelectorForId('bframe', id) ) this.log(' - _hideChallengeWindowIfPresent', { id, hasFrame: !!frame }) if (!frame) { return } while ( frame && frame.parentElement && frame.parentElement !== document.body ) { frame = frame.parentElement } if (frame) { frame.style.visibility = 'hidden' } } // There's so many different possible deployments URLs that we better generate them private _generateFrameSources(): FrameSources { const protos = ['http', 'https'] const hosts = [ 'google.com', 'www.google.com', 'recaptcha.net', 'www.recaptcha.net' ] const origins = protos.flatMap(proto => hosts.map(host => `${proto}://${host}`) ) const paths = { anchor: ['/recaptcha/api2/anchor', '/recaptcha/enterprise/anchor'], bframe: ['/recaptcha/api2/bframe', '/recaptcha/enterprise/bframe'] } return { anchor: origins.flatMap(origin => paths.anchor.map(path => `${origin}${path}`) ), bframe: origins.flatMap(origin => paths.bframe.map(path => `${origin}${path}`) ) } } private getFrameSelectorForId(type: 'anchor' | 'bframe' = 'anchor', id = '') { const namePrefix = type === 'anchor' ? 'a' : 'c' return this.frameSources[type] .map(src => `iframe[src^='${src}'][name^="${namePrefix}-${id}"]`) .join(',') } private getClients() { // Bail out early if there's no indication of recaptchas if (!window || !window.__google_recaptcha_client) return if (!window.___grecaptcha_cfg || !window.___grecaptcha_cfg.clients) { return } if (!Object.keys(window.___grecaptcha_cfg.clients).length) return return window.___grecaptcha_cfg.clients } private getVisibleIframesIds() { // Find all regular visible recaptcha boxes through their iframes const result = this._findVisibleIframeNodes() .filter($f => this._isVisible($f)) .map($f => this._paintCaptchaBusy($f)) .filter($f => $f && $f.getAttribute('name')) .map($f => $f.getAttribute('name') || '') // a-841543e13666 .map( rawId => rawId.split('-').slice(-1)[0] // a-841543e13666 => 841543e13666 ) .filter(id => id) this.log('getVisibleIframesIds', result) return result } // TODO: Obsolete with recent changes private getInvisibleIframesIds() { // Find all invisible recaptcha boxes through their iframes (only the ones with an active challenge window) const result = this._findVisibleIframeNodes() .filter($f => $f && $f.getAttribute('name')) .map($f => $f.getAttribute('name') || '') // a-841543e13666 .map( rawId => rawId.split('-').slice(-1)[0] // a-841543e13666 => 841543e13666 ) .filter(id => id) .filter( id => document.querySelectorAll(this.getFrameSelectorForId('bframe', id)) .length ) this.log('getInvisibleIframesIds', result) return result } private getIframesIds() { // Find all recaptcha boxes through their iframes, check for invisible ones as fallback const results = [ ...this.getVisibleIframesIds(), ...this.getInvisibleIframesIds() ] this.log('getIframesIds', results) // Deduplicate results by using the unique id as key const dedup = Array.from(new Set(results)) this.log('getIframesIds - dedup', dedup) return dedup } private isEnterpriseCaptcha(id?: string) { if (!id) return false // The only way to determine if a captcha is an enterprise one is by looking at their iframes const prefix = 'iframe[src*="/recaptcha/"][src*="/enterprise/"]' const nameSelectors = [`[name^="a-${id}"]`, `[name^="c-${id}"]`] const fullSelector = nameSelectors.map(name => prefix + name).join(',') return document.querySelectorAll(fullSelector).length > 0 } private isInvisible(id?: string) { if (!id) return false const selector = `iframe[src*="/recaptcha/"][src*="/anchor"][name="a-${id}"][src*="&size=invisible"]` return document.querySelectorAll(selector).length > 0 } /** Whether an active challenge popup is open */ private hasActiveChallengePopup(id?: string) { if (!id) return false const selector = `iframe[src*="/recaptcha/"][src*="/bframe"][name="c-${id}"]` const elem = document.querySelector(selector) if (!elem) { return false } return this._isInViewport(elem) // note: _isVisible doesn't work here as the outer div is hidden, not the iframe itself } /** Whether an (invisible) captcha has a challenge bframe - otherwise it's a score based captcha */ private hasChallengeFrame(id?: string) { if (!id) return false return ( document.querySelectorAll(this.getFrameSelectorForId('bframe', id)) .length > 0 ) } private isInViewport(id?: string) { if (!id) return const prefix = 'iframe[src*="recaptcha"]' const nameSelectors = [`[name^="a-${id}"]`, `[name^="c-${id}"]`] const fullSelector = nameSelectors.map(name => prefix + name).join(',') const elem = document.querySelector(fullSelector) if (!elem) { return false } return this._isInViewport(elem) } private getResponseInputById(id?: string) { if (!id) return const $iframe = this._findVisibleIframeNodeById(id) if (!$iframe) return const $parentForm = $iframe.closest(`form`) if ($parentForm) { return $parentForm.querySelector(`[name='g-recaptcha-response']`) } // Not all reCAPTCHAs are in forms // https://github.com/berstend/puppeteer-extra/issues/57 if (document && document.body) { return document.body.querySelector(`[name='g-recaptcha-response']`) } } private getClientById(id?: string) { if (!id) return const clients = this.getClients() // Lookup captcha "client" info using extracted id let client: any = Object.values(clients || {}) .filter(obj => this._getKeyByValue(obj, id)) .shift() // returns first entry in array or undefined this.log(' - getClientById:client', { id, hasClient: !!client }) if (!client) return try { client = this._flattenObject(client) as any client.widgetId = client.id client.id = id this.log(' - getClientById:client:flatten', { id, hasClient: !!client }) } catch (err) { this.log(' - getClientById:client ERROR', err.toString()) } return client } private extractInfoFromClient(client?: any) { if (!client) return const info: types.CaptchaInfo = this._pick(['sitekey', 'callback'])(client) if (!info.sitekey) return info._vendor = 'recaptcha' info.id = client.id info.s = client.s // google site specific info.widgetId = client.widgetId info.display = this._pick([ 'size', 'top', 'left', 'width', 'height', 'theme' ])(client) if (client && client.action) { info.action = client.action } // callbacks can be strings or funtion refs if (info.callback && typeof info.callback === 'function') { info.callback = info.callback.name || 'anonymous' } if (document && document.location) info.url = document.location.href return info } public async findRecaptchas() { const result = { captchas: [] as (types.CaptchaInfo | undefined)[], error: null as any } try { await this._waitUntilDocumentReady() const clients = this.getClients() this.log('findRecaptchas', { url: document.location.href, hasClients: !!clients }) if (!clients) return result result.captchas = this.getIframesIds() .map(id => this.getClientById(id)) .map(client => this.extractInfoFromClient(client)) .map(info => { this.log(' - captchas:info', info) if (!info) return const $input = this.getResponseInputById(info.id) info.hasResponseElement = !!$input return info }) .filter(info => !!info && !!info.sitekey) .map(info => { info.sitekey = info.sitekey.trim() info.isEnterprise = this.isEnterpriseCaptcha(info.id) info.isInViewport = this.isInViewport(info.id) info.isInvisible = this.isInvisible(info.id) info._type = 'checkbox' if (info.isInvisible) { info._type = 'invisible' info.hasActiveChallengePopup = this.hasActiveChallengePopup(info.id) info.hasChallengeFrame = this.hasChallengeFrame(info.id) if (!info.hasChallengeFrame) { info._type = 'score' } } return info }) } catch (error) { result.error = error return result } this.log('findRecaptchas - result', { captchaNum: result.captchas.length, result }) return result } public async enterRecaptchaSolutions() { const result = { solved: [] as (types.CaptchaSolved | undefined)[], error: null as any } try { await this._waitUntilDocumentReady() const clients = this.getClients() this.log('enterRecaptchaSolutions', { url: document.location.href, hasClients: !!clients, solutionNum: this.data.solutions.length }) if (!clients) { result.error = 'No recaptchas found' return result } const solutions = this.data.solutions if (!solutions || !solutions.length) { result.error = 'No solutions provided' return result } result.solved = this.data.solutions.map(solution => { const client = this.getClientById(solution.id) this.log(' - client', !!client) const solved: types.CaptchaSolved = { _vendor: 'recaptcha', id: client.id, responseElement: false, responseCallback: false } const $iframe = this._findVisibleIframeNodeById(solved.id) this.log(' - $iframe', !!$iframe) if (!$iframe) { solved.error = `Iframe not found for id '${solved.id}'` return solved } if (this.hasActiveChallengePopup(solved.id)) { // Hide if present challenge window this._hideChallengeWindowIfPresent(solved.id) } // Enter solution in response textarea const $input = this.getResponseInputById(solved.id) this.log(' - $input', !!$input) if ($input) { $input.innerHTML = solution.text solved.responseElement = true } // Enter solution in optional callback this.log(' - callback', !!client.callback) if (client.callback) { try { this.log(' - callback - type', { typeof: typeof client.callback, value: '' + client.callback }) if (typeof client.callback === 'function') { client.callback.call(window, solution.text) } else { eval(client.callback).call(window, solution.text) // tslint:disable-line this.log(' - callback - aftereval') } solved.responseCallback = true } catch (error) { solved.error = error } } // Finishing up solved.isSolved = solved.responseCallback || solved.responseElement solved.solvedAt = new Date() this._paintCaptchaSolved($iframe) this.log(' - solved', solved) return solved }) } catch (error) { result.error = error return result } this.log('enterRecaptchaSolutions - finished', result) return result } } /* // Example data { "captchas": [{ "sitekey": "6LdAUwoUAAAAAH44X453L0tUWOvx11XXXXXXXX", "id": "lnfy52r0cccc", "widgetId": 0, "display": { "size": null, "top": 23, "left": 13, "width": 28, "height": 28, "theme": null }, "url": "https://example.com", "hasResponseElement": true }], "error": null } { "solutions": [{ "id": "lnfy52r0cccc", "provider": "2captcha", "providerCaptchaId": "61109548000", "text": "03AF6jDqVSOVODT-wLKZ47U0UXz...", "requestAt": "2019-02-09T18:30:43.587Z", "responseAt": "2019-02-09T18:30:57.937Z" }] "error": null } { "solved": [{ "id": "lnfy52r0cccc", "responseElement": true, "responseCallback": false, "isSolved": true, "solvedAt": {} }] "error": null } */ ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/detection.test.ts ================================================ import test from 'ava' import RecaptchaPlugin from './index' import { addExtra } from 'puppeteer-extra' const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] const getBrowser = async (url = '', opts = {}) => { const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin(opts) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true, defaultViewport: null }) const page = await browser.newPage() await page.goto(url, { waitUntil: 'networkidle0' }) return { browser, page } } test('will correctly detect v2-checkbox-auto.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/v2-checkbox-auto.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c._type, 'checkbox') t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) t.is(c.callback, undefined) t.is(c.hasResponseElement, true) t.is(c.isEnterprise, false) t.is(c.isInViewport, true) t.is(c.isInvisible, false) await browser.close() }) test('will correctly detect v2-checkbox-auto-nowww.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/v2-checkbox-auto-nowww.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, undefined) t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) await browser.close() }) test('will correctly detect v2-checkbox-auto-recaptchadotnet.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/v2-checkbox-auto-recaptchadotnet.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, undefined) t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) await browser.close() }) test('will correctly detect enterprise-checkbox-auto.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/enterprise-checkbox-auto.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, undefined) t.is(c.isEnterprise, true) t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) await browser.close() }) test('will correctly detect enterprise-checkbox-auto-recaptchadotnet.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/enterprise-checkbox-auto-recaptchadotnet.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, undefined) t.is(c.isEnterprise, true) t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) await browser.close() }) test('will correctly detect enterprise-checkbox-explicit.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/enterprise-checkbox-explicit.html' const { browser, page } = await getBrowser(url) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, undefined) t.is(c.action, 'homepage') // NOTE t.is(c.isEnterprise, true) t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.is(c.widgetId, 0) t.not(c.display, undefined) await browser.close() }) test('will correctly detect v2-invisible-explicit-isolated.html', async t => { const url = 'https://berstend.github.io/static/recaptcha/v2-invisible-explicit-isolated.html' const { browser, page } = await getBrowser(url, { solveInactiveChallenges: true }) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.not(c.display, undefined) t.not(c.id, undefined) delete c.url delete c.sitekey delete c.display delete c.id t.deepEqual(c, { callback: 'onSubmit', _vendor: 'recaptcha', s: null, widgetId: 100000, hasResponseElement: true, isEnterprise: false, isInViewport: true, isInvisible: true, _type: 'invisible', hasActiveChallengePopup: false, hasChallengeFrame: true }) await browser.close() }) test('will correctly detect v2-invisible-auto.html - active challenge', async t => { const url = 'https://berstend.github.io/static/recaptcha/v2-invisible-explicit.html' const { browser, page } = await getBrowser('about:blank') await page.setUserAgent('BOT') // we want to trigger the invisible recaptcha challenge window await page.goto(url, { waitUntil: 'networkidle2' }) if (page.waitForTimeout) { await page.waitForTimeout(1000) } else { await page.waitFor(1000) } await page.click('#submit') if (page.waitForTimeout) { await page.waitForTimeout(1000) } else { await page.waitFor(1000) } if (page.url() !== url) { // we didn't get a challenge t.truthy('foo') return } const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.true(c.sitekey && c.sitekey.length > 5) t.not(c.display, undefined) t.not(c.id, undefined) delete c.url delete c.sitekey delete c.display delete c.id t.deepEqual(c, { callback: 'onSubmit', _vendor: 'recaptcha', s: null, widgetId: 0, hasResponseElement: true, isEnterprise: false, isInViewport: true, isInvisible: true, _type: 'invisible', hasActiveChallengePopup: true, // the important bit hasChallengeFrame: true }) await browser.close() }) test('will correctly detect v3-programmatic.html with solveScoreBased:false and filter captcha', async t => { const url = 'https://berstend.github.io/static/recaptcha/v3-programmatic.html' const { browser, page } = await getBrowser(url, { solveScoreBased: false }) const { captchas, filtered, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 0) t.is(filtered.length, 1) const c = filtered[0] t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.not(c.display, undefined) t.not(c.id, undefined) delete c.url delete c.sitekey delete c.display delete c.id t.deepEqual(c, { _vendor: 'recaptcha', s: null, widgetId: 100000, hasResponseElement: true, isEnterprise: false, isInViewport: true, isInvisible: true, _type: 'score', hasActiveChallengePopup: false, hasChallengeFrame: false, // important filtered: true, // important filteredReason: 'solveScoreBased' // important }) await browser.close() }) test('will correctly detect v3-programmatic.html with solveScoreBased:true', async t => { const url = 'https://berstend.github.io/static/recaptcha/v3-programmatic.html' const { browser, page } = await getBrowser(url, { solveScoreBased: true }) const { captchas, filtered, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) t.is(filtered.length, 0) const c = captchas[0] t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) t.not(c.display, undefined) t.not(c.id, undefined) delete c.url delete c.sitekey delete c.display delete c.id t.deepEqual(c, { _vendor: 'recaptcha', s: null, widgetId: 100000, hasResponseElement: true, isEnterprise: false, isInViewport: true, isInvisible: true, _type: 'score', hasActiveChallengePopup: false, hasChallengeFrame: false // important }) await browser.close() }) ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/index.test.ts ================================================ import test from 'ava' import RecaptchaPlugin from './index' // import * as types from './types' // import { Puppeteer } from './puppeteer-mods' import { addExtra } from 'puppeteer-extra' const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test('will detect reCAPTCHAs', async t => { const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin() puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://www.google.com/recaptcha/api2/demo' await page.goto(url, { waitUntil: 'networkidle0' }) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'recaptcha') t.is(c.callback, 'onSuccess') t.is(c.hasResponseElement, true) t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) await browser.close() }) test('will detect hCAPTCHAs', async t => { const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin() puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const urls = [ 'https://accounts.hcaptcha.com/demo', 'https://democaptcha.com/demo-form-eng/hcaptcha.html' ] for (const url of urls) { await page.goto(url, { waitUntil: 'networkidle0' }) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'hcaptcha') t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) } await browser.close() }) test('will detect active hCAPTCHA challenges', async t => { const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin() puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const urls = [ 'https://accounts.hcaptcha.com/demo', 'https://democaptcha.com/demo-form-eng/hcaptcha.html' ] for (const url of urls) { await page.goto(url, { waitUntil: 'networkidle0' }) await page.evaluate(() => (window as any).hcaptcha.execute()) // trigger challenge popup await page.waitForTimeout(2 * 1000) await page.evaluate(() => document .querySelector(`[data-hcaptcha-widget-id]:not([src*='invisible'])`) .remove() ) // remove regular checkbox so we definitely test against the popup const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 1) const c = captchas[0] t.is(c._vendor, 'hcaptcha') t.is(c.url, url) t.true(c.sitekey && c.sitekey.length > 5) } await browser.close() }) test('will not throw when no captchas are found', async t => { const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin() puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://www.example.com' await page.goto(url, { waitUntil: 'networkidle0' }) const { captchas, error } = await (page as any).findRecaptchas() t.is(error, null) t.is(captchas.length, 0) await browser.close() }) // TODO: test/mock the rest ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/index.ts ================================================ import { PuppeteerExtraPlugin } from 'puppeteer-extra-plugin' import { Browser, Frame, Page } from 'puppeteer' import * as types from './types' import { RecaptchaContentScript } from './content' import { HcaptchaContentScript } from './content-hcaptcha' import * as TwoCaptcha from './provider/2captcha' export const BuiltinSolutionProviders: types.SolutionProvider[] = [ { id: TwoCaptcha.PROVIDER_ID, fn: TwoCaptcha.getSolutions } ] /** * A puppeteer-extra plugin to automatically detect and solve reCAPTCHAs. * @noInheritDoc */ export class PuppeteerExtraPluginRecaptcha extends PuppeteerExtraPlugin { private contentScriptDebug: debug.Debugger constructor(opts: Partial) { super(opts) this.debug('Initialized', this.opts) this.contentScriptDebug = this.debug.extend('cs') } get name() { return 'recaptcha' } get defaults(): types.PluginOptions { return { visualFeedback: true, throwOnError: false, solveInViewportOnly: false, solveScoreBased: false, solveInactiveChallenges: false } } get opts(): types.PluginOptions { return super.opts as any } get contentScriptOpts(): types.ContentScriptOpts { const { visualFeedback } = this.opts return { visualFeedback, debugBinding: this.contentScriptDebug.enabled ? this.debugBindingName : undefined } } /** An optional global window object we use for contentscript debug logging */ private debugBindingName = '___pepr_cs' private _generateContentScript( vendor: types.CaptchaVendor, fn: 'findRecaptchas' | 'enterRecaptchaSolutions', data?: any ) { this.debug('_generateContentScript', vendor, fn, data) let scriptSource = RecaptchaContentScript.toString() let scriptName = 'RecaptchaContentScript' if (vendor === 'hcaptcha') { scriptSource = HcaptchaContentScript.toString() scriptName = 'HcaptchaContentScript' } // Some bundlers transform classes to anonymous classes that are assigned to // vars (e.g. esbuild). In such cases, `unexpected token '{'` errors are thrown // once the script is executed. Let's bring class name back to script in such // cases! scriptSource = scriptSource.replace(/class \{/, `class ${scriptName} {`) return `(async() => { const DATA = ${JSON.stringify(data || null)} const OPTS = ${JSON.stringify(this.contentScriptOpts)} ${scriptSource} const script = new ${scriptName}(OPTS, DATA) return script.${fn}() })()` } /** Based on the user defined options we may want to filter out certain captchas (inactive, etc) */ private _filterRecaptchas(recaptchas: types.CaptchaInfo[] = []) { const results = recaptchas.map((c: types.FilteredCaptcha) => { if ( c._type === 'invisible' && !c.hasActiveChallengePopup && !this.opts.solveInactiveChallenges ) { c.filtered = true c.filteredReason = 'solveInactiveChallenges' } if (c._type === 'score' && !this.opts.solveScoreBased) { c.filtered = true c.filteredReason = 'solveScoreBased' } if ( c._type === 'checkbox' && !c.isInViewport && this.opts.solveInViewportOnly ) { c.filtered = true c.filteredReason = 'solveInViewportOnly' } if (c.filtered) { this.debug('Filtered out captcha based on provided options', { id: c.id, reason: c.filteredReason, captcha: c }) } return c }) return { captchas: results.filter(c => !c.filtered) as types.CaptchaInfo[], filtered: results.filter(c => c.filtered) } } async findRecaptchas(page: Page | Frame) { this.debug('findRecaptchas') // As this might be called very early while recaptcha is still loading // we add some extra waiting logic for developer convenience. const hasRecaptchaScriptTag = await page.$( `script[src*="/recaptcha/api.js"], script[src*="/recaptcha/enterprise.js"]` ) this.debug('hasRecaptchaScriptTag', !!hasRecaptchaScriptTag) if (hasRecaptchaScriptTag) { this.debug('waitForRecaptchaClient - start', new Date()) await page .waitForFunction( ` (function() { return Object.keys((window.___grecaptcha_cfg || {}).clients || {}).length })() `, { polling: 200, timeout: 10 * 1000 } ) .catch(this.debug) this.debug('waitForRecaptchaClient - end', new Date()) // used as timer } const hasHcaptchaScriptTag = await page.$( `script[src*="hcaptcha.com/1/api.js"]` ) this.debug('hasHcaptchaScriptTag', !!hasHcaptchaScriptTag) if (hasHcaptchaScriptTag) { this.debug('wait:hasHcaptchaScriptTag - start', new Date()) await page.waitForFunction( ` (function() { return window.hcaptcha })() `, { polling: 200, timeout: 10 * 1000 } ) this.debug('wait:hasHcaptchaScriptTag - end', new Date()) // used as timer } const onDebugBindingCalled = (message: string, data: any) => { this.contentScriptDebug(message, data) } if (this.contentScriptDebug.enabled) { if ('exposeFunction' in page) { await page.exposeFunction(this.debugBindingName, onDebugBindingCalled) } } // Even without a recaptcha script tag we're trying, just in case. const resultRecaptcha: types.FindRecaptchasResult = (await page.evaluate( this._generateContentScript('recaptcha', 'findRecaptchas') )) as any const resultHcaptcha: types.FindRecaptchasResult = (await page.evaluate( this._generateContentScript('hcaptcha', 'findRecaptchas') )) as any const filterResults = this._filterRecaptchas(resultRecaptcha.captchas) this.debug( `Filter results: ${filterResults.filtered.length} of ${filterResults.captchas.length} captchas filtered from results.` ) const response: types.FindRecaptchasResult = { captchas: [...filterResults.captchas, ...resultHcaptcha.captchas], filtered: filterResults.filtered, error: resultRecaptcha.error || resultHcaptcha.error } this.debug('findRecaptchas', response) if (this.opts.throwOnError && response.error) { throw new Error(response.error) } return response } async getRecaptchaSolutions( captchas: types.CaptchaInfo[], provider?: types.SolutionProvider ) { this.debug('getRecaptchaSolutions', { captchaNum: captchas.length }) provider = provider || this.opts.provider if ( !provider || (!provider.token && !provider.fn) || (provider.token && provider.token === 'XXXXXXX' && !provider.fn) ) { throw new Error('Please provide a solution provider to the plugin.') } let fn = provider.fn if (!fn) { const builtinProvider = BuiltinSolutionProviders.find( p => p.id === (provider || {}).id ) if (!builtinProvider || !builtinProvider.fn) { throw new Error( `Cannot find builtin provider with id '${provider.id}'.` ) } fn = builtinProvider.fn } const response = await fn.call( this, captchas, provider.token, provider.opts || {} ) response.error = response.error || response.solutions.find((s: types.CaptchaSolution) => !!s.error) this.debug('getRecaptchaSolutions', response) if (response && response.error) { console.warn( 'PuppeteerExtraPluginRecaptcha: An error occured during "getRecaptchaSolutions":', response.error ) } if (this.opts.throwOnError && response.error) { throw new Error(response.error) } return response } async enterRecaptchaSolutions( page: Page | Frame, solutions: types.CaptchaSolution[] ) { this.debug('enterRecaptchaSolutions', { solutions }) const hasRecaptcha = !!solutions.find(s => s._vendor === 'recaptcha') const solvedRecaptcha: types.EnterRecaptchaSolutionsResult = hasRecaptcha ? ((await page.evaluate( this._generateContentScript('recaptcha', 'enterRecaptchaSolutions', { solutions }) )) as any) : { solved: [] } const hasHcaptcha = !!solutions.find(s => s._vendor === 'hcaptcha') const solvedHcaptcha: types.EnterRecaptchaSolutionsResult = hasHcaptcha ? ((await page.evaluate( this._generateContentScript('hcaptcha', 'enterRecaptchaSolutions', { solutions }) )) as any) : { solved: [] } const response: types.EnterRecaptchaSolutionsResult = { solved: [...solvedRecaptcha.solved, ...solvedHcaptcha.solved], error: solvedRecaptcha.error || solvedHcaptcha.error } response.error = response.error || response.solved.find(s => !!s.error) this.debug('enterRecaptchaSolutions', response) if (this.opts.throwOnError && response.error) { throw new Error(response.error) } return response } async solveRecaptchas( page: Page | Frame ): Promise { this.debug('solveRecaptchas') const response: types.SolveRecaptchasResult = { captchas: [], filtered: [], solutions: [], solved: [], error: null } try { // If `this.opts.throwOnError` is set any of the // following will throw and abort execution. const { captchas, filtered, error: captchasError } = await this.findRecaptchas(page) response.captchas = captchas response.filtered = filtered if (captchas.length) { const { solutions, error: solutionsError } = await this.getRecaptchaSolutions(response.captchas) response.solutions = solutions const { solved, error: solvedError } = await this.enterRecaptchaSolutions(page, response.solutions) response.solved = solved response.error = captchasError || solutionsError || solvedError } } catch (error) { response.error = error.toString() } this.debug('solveRecaptchas', response) if (this.opts.throwOnError && response.error) { throw new Error(response.error) } return response } private _addCustomMethods(prop: Page | Frame) { prop.findRecaptchas = async () => this.findRecaptchas(prop) prop.getRecaptchaSolutions = async ( captchas: types.CaptchaInfo[], provider?: types.SolutionProvider ) => this.getRecaptchaSolutions(captchas, provider) prop.enterRecaptchaSolutions = async (solutions: types.CaptchaSolution[]) => this.enterRecaptchaSolutions(prop, solutions) // Add convenience methods that wraps all others prop.solveRecaptchas = async () => this.solveRecaptchas(prop) } async onPageCreated(page: Page) { this.debug('onPageCreated', page.url()) // Make sure we can run our content script await page.setBypassCSP(true) // Add custom page methods this._addCustomMethods(page) // Add custom methods to potential frames as well page.on('frameattached', frame => { if (!frame) return this._addCustomMethods(frame) }) } /** Add additions to already existing pages and frames */ async onBrowser(browser: Browser) { const pages = await browser.pages() for (const page of pages) { this._addCustomMethods(page) for (const frame of page.mainFrame().childFrames()) { this._addCustomMethods(frame) } } } } /** Default export, PuppeteerExtraPluginRecaptcha */ const defaultExport = (options?: Partial) => { return new PuppeteerExtraPluginRecaptcha(options || {}) } export default defaultExport ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/playwright-mods.d.ts ================================================ // Extend Playwright interfaces transparently to the end user. import {} from 'playwright-core' import { RecaptchaPluginPageAdditions } from './types' declare module 'playwright-core' { interface Page extends RecaptchaPluginPageAdditions {} interface Frame extends RecaptchaPluginPageAdditions {} } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/provider/2captcha-api.ts ================================================ // https://github.com/bochkarev-artem/2captcha/blob/master/index.js // TODO: Create our own API wrapper var https = require('https') var url = require('url') var querystring = require('querystring') var apiKey var apiInUrl = 'https://2captcha.com/in.php' var apiResUrl = 'https://2captcha.com/res.php' var apiMethod = 'base64' var SOFT_ID = '2589' var defaultOptions = { pollingInterval: 2000, retries: 3 } function pollCaptcha(captchaId, options, invalid, callback) { invalid = invalid.bind({ options: options, captchaId: captchaId }) var intervalId = setInterval(function() { var httpsRequestOptions = url.parse( apiResUrl + '?action=get&soft_id=' + SOFT_ID + '&key=' + apiKey + '&id=' + captchaId ) var request = https.request(httpsRequestOptions, function(response) { var body = '' response.on('data', function(chunk) { body += chunk }) response.on('end', function() { if (body === 'CAPCHA_NOT_READY') { return } clearInterval(intervalId) var result = body.split('|') if (result[0] !== 'OK') { callback(result[0]) //error } else { callback( null, { id: captchaId, text: result[1] }, invalid ) } callback = function() {} // prevent the callback from being called more than once, if multiple https requests are open at the same time. }) }) request.on('error', function(e) { request.destroy() callback(e) }) request.end() }, options.pollingInterval || defaultOptions.pollingInterval) } export const setApiKey = function(key) { apiKey = key } export const decode = function(base64, options, callback) { if (!callback) { callback = options options = defaultOptions } var httpsRequestOptions = url.parse(apiInUrl) httpsRequestOptions.method = 'POST' var postData = { method: apiMethod, key: apiKey, soft_id: SOFT_ID, body: base64 } postData = querystring.stringify(postData) var request = https.request(httpsRequestOptions, function(response) { var body = '' response.on('data', function(chunk) { body += chunk }) response.on('end', function() { var result = body.split('|') if (result[0] !== 'OK') { return callback(result[0]) } pollCaptcha( result[1], options, function(error) { var callbackToInitialCallback = callback report(this.captchaId) if (error) { return callbackToInitialCallback('CAPTCHA_FAILED') } if (!this.options.retries) { this.options.retries = defaultOptions.retries } if (this.options.retries > 1) { this.options.retries = this.options.retries - 1 decode(base64, this.options, callback) } else { callbackToInitialCallback('CAPTCHA_FAILED_TOO_MANY_TIMES') } }, callback ) }) }) request.on('error', function(e) { request.destroy() callback(e) }) request.write(postData) request.end() } export const decodeReCaptcha = function( captchaMethod, captcha, pageUrl, extraData, options, callback ) { if (!callback) { callback = options options = defaultOptions } var httpsRequestOptions = url.parse(apiInUrl) httpsRequestOptions.method = 'POST' var postData = { method: captchaMethod, key: apiKey, soft_id: SOFT_ID, // googlekey: captcha, pageurl: pageUrl, ...extraData } if (captchaMethod === 'userrecaptcha') { postData.googlekey = captcha } if (captchaMethod === 'hcaptcha') { postData.sitekey = captcha } postData = querystring.stringify(postData) var request = https.request(httpsRequestOptions, function(response) { var body = '' response.on('data', function(chunk) { body += chunk }) response.on('end', function() { var result = body.split('|') if (result[0] !== 'OK') { return callback(result[0]) } pollCaptcha( result[1], options, function(error) { var callbackToInitialCallback = callback report(this.captchaId) if (error) { return callbackToInitialCallback('CAPTCHA_FAILED') } if (!this.options.retries) { this.options.retries = defaultOptions.retries } if (this.options.retries > 1) { this.options.retries = this.options.retries - 1 decodeReCaptcha( captchaMethod, captcha, pageUrl, extraData, this.options, callback ) } else { callbackToInitialCallback('CAPTCHA_FAILED_TOO_MANY_TIMES') } }, callback ) }) }) request.on('error', function(e) { request.destroy() callback(e) }) request.write(postData) request.end() } export const decodeUrl = function(uri, options, callback) { if (!callback) { callback = options options = defaultOptions } var options = url.parse(uri) var request = https.request(options, function(response) { var body = '' response.setEncoding('base64') response.on('data', function(chunk) { body += chunk }) response.on('end', function() { decode(body, options, callback) }) }) request.on('error', function(e) { request.destroy() callback(e) }) request.end() } export const solveRecaptchaFromHtml = function(html, options, callback) { if (!callback) { callback = options options = defaultOptions } var googleUrl = html.split('/challenge?k=') if (googleUrl.length < 2) return callback('No captcha found in html') googleUrl = googleUrl[1] googleUrl = googleUrl.split('"')[0] googleUrl = googleUrl.split("'")[0] googleUrl = 'https://www.google.com/recaptcha/api/challenge?k=' + googleUrl var httpsRequestOptions = url.parse(googleUrl) var request = https.request(httpsRequestOptions, function(response) { var body = '' response.on('data', function(chunk) { body += chunk }) response.on('end', function() { var challengeArr = body.split("'") if (!challengeArr[1]) return callback('Parsing captcha failed') var challenge = challengeArr[1] if (challenge.length === 0) return callback('Parsing captcha failed') decodeUrl( 'https://www.google.com/recaptcha/api/image?c=' + challenge, options, function(error, result, invalid) { if (result) { result.challenge = challenge } callback(error, result, invalid) } ) }) }) request.end() } export const report = function(captchaId) { var reportUrl = apiResUrl + '?action=reportbad&soft_id=' + SOFT_ID + '&key=' + apiKey + '&id=' + captchaId var options = url.parse(reportUrl) var request = https.request(options, function(response) { // var body = '' // response.on('data', function(chunk) { // body += chunk // }) // response.on('end', function() {}) }) request.end() } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/provider/2captcha.ts ================================================ export const PROVIDER_ID = '2captcha' import * as types from '../types' import Debug from 'debug' const debug = Debug(`puppeteer-extra-plugin:recaptcha:${PROVIDER_ID}`) // const solver = require('./2captcha-api') import * as solver from './2captcha-api' const secondsBetweenDates = (before: Date, after: Date) => (after.getTime() - before.getTime()) / 1000 export interface DecodeRecaptchaAsyncResult { err?: any result?: any invalid?: any } export interface TwoCaptchaProviderOpts { useEnterpriseFlag?: boolean useActionValue?: boolean } const providerOptsDefaults: TwoCaptchaProviderOpts = { useEnterpriseFlag: false, // Seems to make solving chance worse? useActionValue: true } async function decodeRecaptchaAsync( token: string, vendor: types.CaptchaVendor, sitekey: string, url: string, extraData: any, opts = { pollingInterval: 2000 } ): Promise { return new Promise(resolve => { const cb = (err: any, result: any, invalid: any) => resolve({ err, result, invalid }) try { solver.setApiKey(token) let method = 'userrecaptcha' if (vendor === 'hcaptcha') { method = 'hcaptcha' } solver.decodeReCaptcha(method, sitekey, url, extraData, opts, cb) } catch (error) { return resolve({ err: error }) } }) } export async function getSolutions( captchas: types.CaptchaInfo[] = [], token: string = '', opts: TwoCaptchaProviderOpts = {} ): Promise { opts = { ...providerOptsDefaults, ...opts } const solutions = await Promise.all( captchas.map(c => getSolution(c, token, opts)) ) return { solutions, error: solutions.find(s => !!s.error) } } async function getSolution( captcha: types.CaptchaInfo, token: string, opts: TwoCaptchaProviderOpts ): Promise { const solution: types.CaptchaSolution = { _vendor: captcha._vendor, provider: PROVIDER_ID } try { if (!captcha || !captcha.sitekey || !captcha.url || !captcha.id) { throw new Error('Missing data in captcha') } solution.id = captcha.id solution.requestAt = new Date() debug('Requesting solution..', solution) const extraData = {} if (captcha.s) { extraData['data-s'] = captcha.s // google site specific property } if (opts.useActionValue && captcha.action) { extraData['action'] = captcha.action // Optional v3/enterprise action } if (opts.useEnterpriseFlag && captcha.isEnterprise) { extraData['enterprise'] = 1 } if (process.env['2CAPTCHA_PROXY_TYPE'] && process.env['2CAPTCHA_PROXY_ADDRESS']) { extraData['proxytype'] = process.env['2CAPTCHA_PROXY_TYPE'].toUpperCase() extraData['proxy'] = process.env['2CAPTCHA_PROXY_ADDRESS'] } const { err, result, invalid } = await decodeRecaptchaAsync( token, captcha._vendor, captcha.sitekey, captcha.url, extraData ) debug('Got response', { err, result, invalid }) if (err) throw new Error(`${PROVIDER_ID} error: ${err}`) if (!result || !result.text || !result.id) { throw new Error(`${PROVIDER_ID} error: Missing response data: ${result}`) } solution.providerCaptchaId = result.id solution.text = result.text solution.responseAt = new Date() solution.hasSolution = !!solution.text solution.duration = secondsBetweenDates( solution.requestAt, solution.responseAt ) } catch (error) { debug('Error', error) solution.error = error.toString() } return solution } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/puppeteer-mods.d.ts ================================================ // Extend Puppeteer interfaces transparently to the end user. // Note, we need to manually copy this file into the build dir (yarn ambient-dts): https://stackoverflow.com/questions/56018167 // Note2: It's not sufficient to just copy over this d.ts file, it needs to be referenced by another .ts file! // Note3: To make it even more urgh the TS compiler will change the reference import path, hence we need to fix that in the end as well // This import statement is important for all this to work, otherwise we don't extend but replace the puppeteer module definition. // https://github.com/microsoft/TypeScript/issues/10859 import {} from 'puppeteer' import { RecaptchaPluginPageAdditions } from './types' declare module 'puppeteer' { interface Page extends RecaptchaPluginPageAdditions {} interface Frame extends RecaptchaPluginPageAdditions {} } declare module 'puppeteer-core' { interface Page extends RecaptchaPluginPageAdditions {} interface Frame extends RecaptchaPluginPageAdditions {} } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/solve.test.ts ================================================ import test from 'ava' import RecaptchaPlugin from './index' import { addExtra } from 'puppeteer-extra' const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test('will solve reCAPTCHAs', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://www.google.com/recaptcha/api2/demo' await page.goto(url, { waitUntil: 'networkidle0' }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 1) t.is(solutions.length, 1) t.is(solved.length, 1) t.is(solved[0]._vendor, 'recaptcha') t.is(solved[0].isSolved, true) await browser.close() }) test('will solve hCAPTCHAs', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const urls = [ 'https://accounts.hcaptcha.com/demo', 'http://democaptcha.com/demo-form-eng/hcaptcha.html', ] for (const url of urls) { await page.goto(url, { waitUntil: 'networkidle0' }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 1) t.is(solutions.length, 1) t.is(solved.length, 1) t.is(solved[0]._vendor, 'hcaptcha') t.is(solved[0].isSolved, true) } await browser.close() }) test('will solve reCAPTCHA enterprise', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN, opts: { useEnterpriseFlag: false // Not sure but using the enterprise flag makes it worse } } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://berstend.github.io/static/recaptcha/enterprise-checkbox-explicit.html' await page.goto(url, { waitUntil: 'networkidle0' }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 1) t.is(solutions.length, 1) t.is(solved.length, 1) t.is(solved[0]._vendor, 'recaptcha') t.is(solved[0].isSolved, true) await browser.close() }) test('will solve multiple reCAPTCHAs', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN, opts: { useEnterpriseFlag: false // Not sure but using the enterprise flag makes it worse } } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://berstend.github.io/static/recaptcha/v2-checkbox-explicit-multi.html' await page.goto(url, { waitUntil: 'networkidle0' }) page.on('dialog', async dialog => { dialog.dismiss() // the test page has blocking `alert`s }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 3) t.is(solutions.length, 3) t.is(solved.length, 3) t.is(solved[0]._vendor, 'recaptcha') t.is(solved[0].isSolved, true) await browser.close() }) test('will not solve inactive invisible reCAPTCHAs by default', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://berstend.github.io/static/recaptcha/v2-invisible-auto.html' await page.goto(url, { waitUntil: 'networkidle0' }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 0) t.is(solutions.length, 0) t.is(solved.length, 0) await browser.close() }) test('will not solve score based reCAPTCHAs by default', async t => { if (!process.env.TWOCAPTCHA_TOKEN) { t.truthy('foo') console.log('TWOCAPTCHA_TOKEN not set, skipping test.') return } const puppeteer = addExtra(require('puppeteer')) const recaptchaPlugin = RecaptchaPlugin({ provider: { id: '2captcha', token: process.env.TWOCAPTCHA_TOKEN } }) puppeteer.use(recaptchaPlugin) const browser = await puppeteer.launch({ args: PUPPETEER_ARGS, headless: true }) const page = await browser.newPage() const url = 'https://berstend.github.io/static/recaptcha/v3-programmatic.html' await page.goto(url, { waitUntil: 'networkidle0' }) const result = await (page as any).solveRecaptchas() const { captchas, solutions, solved, error } = result t.falsy(error) t.is(captchas.length, 0) t.is(solutions.length, 0) t.is(solved.length, 0) await browser.close() }) ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/src/types.ts ================================================ /// /// // Warn: The above is EXTREMELY important for our custom page mods to be recognized by the end users typescript! /** * Extend window object with recaptcha things */ declare global { interface Window { __google_recaptcha_client?: boolean ___grecaptcha_cfg?: { clients?: any } } } export type RecaptchaPluginPageAdditions = { /** Attempt to find all reCAPTCHAs on this page. */ findRecaptchas: () => Promise getRecaptchaSolutions: ( captchas: CaptchaInfo[], provider?: SolutionProvider ) => Promise enterRecaptchaSolutions: ( solutions: CaptchaSolution[] ) => Promise /** Attempt to detect and solve reCAPTCHAs on this page automatically. 🔮 */ solveRecaptchas: () => Promise } export interface SolutionProvider { id?: string token?: string fn?: (captchas: CaptchaInfo[], token?: string) => Promise opts?: TOpts // Optional options ;-) } export interface FindRecaptchasResult { captchas: CaptchaInfo[] filtered: FilteredCaptcha[] error?: any } export interface EnterRecaptchaSolutionsResult { solved: CaptchaSolved[] error?: any } export interface GetSolutionsResult { solutions: CaptchaSolution[] error?: any } export type SolveRecaptchasResult = FindRecaptchasResult & EnterRecaptchaSolutionsResult & GetSolutionsResult export type CaptchaVendor = 'recaptcha' | 'hcaptcha' export type CaptchaType = 'checkbox' | 'invisible' | 'score' export interface CaptchaInfo { _vendor: CaptchaVendor id?: string // captcha id widgetId?: number sitekey?: string s?: string // new google site specific property isEnterprise?: boolean isInViewport?: boolean /** Is captcha invisible */ isInvisible?: boolean /** Invisible recaptchas: Does the captcha have an active challenge popup */ hasActiveChallengePopup?: boolean /** Invisible recaptchas: Can the captcha trigger a challenge or is it purely score based (v3) */ hasChallengeFrame?: boolean _type?: CaptchaType action?: string // Optional action (v3/enterprise): https://developers.google.com/recaptcha/docs/v3#actions callback?: string | Function hasResponseElement?: boolean url?: string display?: { size?: string theme?: string top?: string left?: string width?: string height?: string } } export type FilteredCaptcha = CaptchaInfo & { filtered: boolean filteredReason: | 'solveInViewportOnly' | 'solveScoreBased' | 'solveInactiveChallenges' } export interface CaptchaSolution { _vendor: CaptchaVendor id?: string // captcha id provider?: string providerCaptchaId?: string text?: string // the solution requestAt?: Date responseAt?: Date duration?: number error?: string | Error hasSolution?: boolean } export interface CaptchaSolved { _vendor: CaptchaVendor id?: string // captcha id responseElement?: boolean responseCallback?: boolean solvedAt?: Date error?: string | Error isSolved?: boolean } export interface PluginOptions { /** Visualize reCAPTCHAs based on their state */ visualFeedback: boolean /** Throw on errors instead of returning them in the error property */ throwOnError: boolean /** Only solve captchas and challenges visible in the viewport */ solveInViewportOnly: boolean /** Solve invisible captchas used to acquire a score and not present a challenge (e.g. reCAPTCHA v3) */ solveScoreBased: boolean /** Solve invisible captchas that have no active challenge */ solveInactiveChallenges: boolean provider?: SolutionProvider } export interface ContentScriptOpts { visualFeedback: boolean debugBinding?: string } export interface ContentScriptData { solutions?: CaptchaSolution[] } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/tsconfig.json ================================================ { "compilerOptions": { "outDir": "./dist", "target": "es2017", "module": "es2015", "moduleResolution": "node", "lib": ["es2015", "es2016", "es2017", "es2019", "dom"], // "noResolve": true, // Important: Otherwise TS would rewrite our ambient d.ts file locations (see: yarn copy-dts) :( "sourceMap": true, "declaration": true, "allowSyntheticDefaultImports": true, "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, "strict": false, "noFallthroughCasesInSwitch": true, "noImplicitReturns": false, "noUnusedLocals": true, "noUnusedParameters": false, "pretty": true, "stripInternal": true, "types": ["node"] }, "include": [ "./src/**/*.tsx", "./src/**/*.ts", "./src/**/*.d.ts", "./src/**/*.test.ts", "./test/**/*.ts", "src/provider/2captcha-api.js" ], "exclude": ["node_modules", "dist", "./test/**/*.spec.ts"] } ================================================ FILE: packages/puppeteer-extra-plugin-recaptcha/tslint.json ================================================ { "extends": ["tslint-config-standard", "tslint-config-prettier"], "rules": { "ordered-imports": true } } ================================================ FILE: packages/puppeteer-extra-plugin-repl/index.d.ts ================================================ /// import { EventEmitter } from 'events'; import 'puppeteer'; import { PuppeteerExtraPlugin, PluginOptions } from 'puppeteer-extra-plugin'; // augment repl() for Page/Browser declare module 'puppeteer' { export interface Page extends EventEmitter, FrameBase { repl(): Promise; } export interface Browser extends EventEmitter, TargetAwaiter { repl(): Promise; } } /** * Create an interactive REPL for the provided object. * Uses an extended (colorized) readline interface under the hood. Will resolve the returned Promise when the readline interface is closed. * If opts.addToPuppeteerClass is true (default) then page.repl()/browser.repl() will point to this method, for convenience. * Can be used standalone as well, to inspect an arbitrary class instance or object. */ declare function repl(config?: Options): Plugin; declare interface Options extends DefaultOptions, PluginOptions {} declare interface DefaultOptions { /** * If a .repl() method should be attached to Puppeteer Page and Browser instances * @default true */ addToPuppeteerClass?: boolean; } declare class Plugin extends PuppeteerExtraPlugin { get name(): 'repl'; get defaults(): DefaultOptions; repl(obj: any): Promise; } export = repl; ================================================ FILE: packages/puppeteer-extra-plugin-repl/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const REPLSession = require('./lib/REPLSession') /** * Interrupt your puppeteer code with an interactive REPL. * * Features tab auto-completion for the given object properties and a colorized prompt. * * Works with arbitrary objects ands class instances, though `Page` & `Browser` make the most sense. :-) * * **opts** * @param {Object} opts - Options * @param {boolean} [opts.addToPuppeteerClass] - If a `.repl()` method should be attached to Puppeteer `Page` and `Browser` instances (default: true). * * @todo enumerate instance members differently, so e.g. clickAndWaitForNavigation shows up. * * @example * // In this example we don't extend the native puppeteer classes * * const puppeteer = require('puppeteer-extra') * const repl = require('puppeteer-extra-plugin-repl')({ addToPuppeteerClass: false }) * puppeteer.use(repl) * * puppeteer.launch({ headless: true }).then(async browser => { * const page = await browser.newPage() * await page.goto('https://example.com') * * // Start an interactive REPL here with the `page` instance. * await repl.repl(page) * // Afterwards start REPL with the `repl` instance itself. 🐴 * await repl.repl(repl) * * await browser.close() * }) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'repl' } get defaults() { return { addToPuppeteerClass: true } } /** * Run last so other plugins can extend e.g. Page :-) * * @ignore */ get requirements() { return new Set(['runLast']) } /** * Create an interactive REPL for the provided object. * * Uses an extended (colorized) readline interface under the hood. * Will resolve the returned Promise when the readline interface is closed. * * If `opts.addToPuppeteerClass` is true (default) then `page.repl()`/`browser.repl()` * will point to this method, for convenience. * * Can be used standalone as well, to inspect an arbitrary class instance or object. * * @param {Object} obj - An object or class instance to use in the repl (e.g. `page`, `browser`) * @return {Promise} * * @example * const repl = require('puppeteer-extra-plugin-repl')() * await repl.repl() */ async repl(obj) { return new REPLSession({ obj }).start() } /** * Conditionally add a .repl() method to `page` and `browser` instances. * * @ignore */ async onPageCreated(page) { if (!this.opts.addToPuppeteerClass) { return } page.repl = () => this.repl(page) const browser = page.browser() browser.repl = () => this.repl(browser) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-repl/index.test.js ================================================ 'use strict' const PLUGIN_NAME = 'repl' const test = require('ava') const Plugin = require('.') test('is a function', async t => { t.is(typeof Plugin, 'function') }) test('should have the basic class members', async t => { const instance = new Plugin() t.is(instance.name, PLUGIN_NAME) t.true(instance._isPuppeteerExtraPlugin) }) test('should have the public child class members', async t => { const instance = new Plugin() const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('name')) t.true(childClassMembers.includes('defaults')) t.true(childClassMembers.includes('requirements')) t.true(childClassMembers.includes('repl')) t.true(childClassMembers.includes('onPageCreated')) t.true(childClassMembers.length === 6) }) test('should have opts with default values', async t => { const instance = new Plugin() const opts = instance.opts t.is(opts.addToPuppeteerClass, true) }) ================================================ FILE: packages/puppeteer-extra-plugin-repl/lib/REPLSession.js ================================================ const ow = require('ow') const readline = require('./super-readline') class REPLSession { constructor(opts) { ow(opts, ow.object.hasKeys('obj')) ow(opts.obj, ow.object.hasKeys('constructor')) this._obj = opts.obj this._meta = { type: typeof this._obj, name: this._obj.constructor.name, members: Object.getOwnPropertyNames(Object.getPrototypeOf(this._obj)) || [] } this._completions = [].concat(this.extraMethods, this._meta.members) } get extraMethods() { return ['inspect', 'exit'] } async start() { this._createInterface() this._showIntro() this._rl.prompt() return this._closePromise } _createInterface() { this._rl = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: this._meta.name ? `> ${this._meta.name.toLowerCase()}.` : `> `, completer: readline.defaultCompleter(this._completions), colors: { prompt: readline.chalk.cyan, completer: readline.chalk.yellow } }) this._rl.on('line', this._onLineInput.bind(this)) this._closePromise = new Promise(resolve => this._rl.once('close', () => resolve()) ) } _showIntro() { console.log(` Started puppeteer-extra repl for ${this._meta.type} '${this._meta.name}' with ${this._meta.members.length} properties. - Type 'inspect' to return the current ${this._meta.type}. - Type 'exit' to leave the repl. Tab auto-completion available: `) this._rl.showTabCompletions() } async _onLineInput(line) { if (!line) { return this._rl.prompt() } if (line === 'exit') { return this._rl.close() } const cmd = line === 'inspect' ? this._obj : `this._obj.${line}` await this._evalAsync(cmd) this._rl.prompt() } async _evalAsync(cmd) { try { // eslint-disable-next-line no-eval const out = await eval(cmd) console.log(out) } catch (err) { console.warn(err) } } } module.exports = REPLSession ================================================ FILE: packages/puppeteer-extra-plugin-repl/lib/REPLSession.test.js ================================================ 'use strict' const test = require('ava') const REPLSession = require('./REPLSession') test('is a function', async t => { t.is(typeof REPLSession, 'function') }) test('is a class', async t => { t.is(REPLSession.constructor.name, 'Function') }) test('will throw without opts', async t => { const error = await t.throws(() => new REPLSession()) t.is( error.message, 'Expected argument to be of type `object` but received type `undefined`' ) }) test('will throw when opts.obj is not a class derivative', async t => { const error = await t.throws(() => new REPLSession({ obj: 'foobar' })) t.is( error.message, 'Expected argument to be of type `object` but received type `string`' ) }) test('should have the expected class members', async t => { const FakeClass = class Foo {} const opts = { obj: new FakeClass() } const instance = new REPLSession(opts) const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('extraMethods')) t.true(childClassMembers.includes('start')) t.true(childClassMembers.includes('_createInterface')) t.true(childClassMembers.includes('_showIntro')) t.true(childClassMembers.includes('_onLineInput')) t.true(childClassMembers.includes('_evalAsync')) t.true(childClassMembers.length === 7) }) ================================================ FILE: packages/puppeteer-extra-plugin-repl/lib/super-readline.js ================================================ const chalk = require('chalk') const { Interface, clearLine, clearScreenDown, cursorTo, emitKeypressEvents, moveCursor } = require('readline') /** * Extends the native readline interface with color support. * * A drop-in replacement for `readline`. * * Additionally accepts an options.color object with chalk colors * for `prompt` and `completer`. * * @todo this could be enhanced with auto complete hints in grey. * @todo similar to this: https://github.com/aantthony/node-color-readline * * @ignore * * @example * const readline = require('./super-readline') * * const rl = readline.createInterface({ * input: process.stdin, * output: process.stdout, * prompt: '> ', * completer: readline.defaultCompleter([ 'bob', 'yolk' ]), * colors: { * prompt: readline.chalk.cyan, * completer: readline.chalk.yellow * } * }) * * rl.prompt() */ class SuperInterface extends Interface { constructor(options) { super(options) this._colors = options.colors || {} this._writingTabComplete = false } _tabComplete(lastKeypressWasTab) { this._writingTabComplete = true super._tabComplete(lastKeypressWasTab) this._writingTabComplete = false } showTabCompletions() { this._tabComplete(true) } _writeToOutput(stringToWrite) { // colorize prompt itself const startsWithPrompt = stringToWrite.startsWith(this._prompt) if (this._colors.prompt && startsWithPrompt) { stringToWrite = `${this._colors.prompt( this._prompt )}${stringToWrite.replace(this._prompt, '')}` return super._writeToOutput(stringToWrite) } // colorize completer output if (this._colors.completer && this._writingTabComplete) { return super._writeToOutput(this._colors.completer(stringToWrite)) } // anything else super._writeToOutput(stringToWrite) } } const createSuperInterface = function(options) { return new SuperInterface(options) } /** * A typical default completer that can be used, for convenience. * * @ignore */ const defaultCompleter = completions => line => { const hits = completions.filter(c => c.startsWith(line)) // show all completions if none found const arr = hits.length ? hits : completions return [arr, line] } module.exports = { // customized exports: chalk, Interface: SuperInterface, createInterface: createSuperInterface, defaultCompleter, // default readline exports: clearLine, clearScreenDown, cursorTo, emitKeypressEvents, moveCursor } ================================================ FILE: packages/puppeteer-extra-plugin-repl/lib/super-readline.test.js ================================================ 'use strict' const test = require('ava') const readline = require('./super-readline') test('is an object', async t => { t.is(typeof readline, 'object') }) test('should have the expected number of exports', async t => { const exportedKeys = Object.keys(readline) t.true(exportedKeys.includes('chalk')) t.true(exportedKeys.includes('Interface')) t.true(exportedKeys.includes('createInterface')) t.true(exportedKeys.includes('defaultCompleter')) t.true(exportedKeys.includes('clearLine')) t.true(exportedKeys.includes('clearScreenDown')) t.true(exportedKeys.includes('cursorTo')) t.true(exportedKeys.includes('emitKeypressEvents')) t.true(exportedKeys.includes('moveCursor')) t.is(exportedKeys.length, 9) }) test('can create an interface', async t => { const instance = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: '> ', completer: readline.defaultCompleter(['bob', 'yolk']), colors: { prompt: readline.chalk.cyan, completer: readline.chalk.yellow } }) t.is(instance.constructor.name, 'SuperInterface') t.is(typeof instance, 'object') }) test('should have the extended class members', async t => { const instance = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: '> ', completer: readline.defaultCompleter(['bob', 'yolk']), colors: { prompt: readline.chalk.cyan, completer: readline.chalk.yellow } }) const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('_tabComplete')) t.true(childClassMembers.includes('_writeToOutput')) t.true(childClassMembers.includes('showTabCompletions')) }) ================================================ FILE: packages/puppeteer-extra-plugin-repl/package.json ================================================ { "name": "puppeteer-extra-plugin-repl", "version": "2.3.3", "description": "Start an interactive REPL in your puppeteer code.", "main": "index.js", "types": "index.d.ts", "repository": "berstend/puppeteer-extra", "author": "nswbmw & berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "repl", "debug", "interactive", "puppeteer-debug", "puppeteer-repl", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "mock-stdin": "^0.3.1", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "chalk": "^3.0.0", "debug": "^4.1.1", "ow": "^0.4.0", "puppeteer-extra-plugin": "^3.2.3" }, "peerDependencies": { "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-repl/readme.md ================================================ # puppeteer-extra-plugin-repl > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ## Installation ```bash yarn add puppeteer-extra-plugin-repl ``` ## Purpose **Make quick puppeteer debugging and exploration fun with an interactive REPL.** - Can interrupt your code at anytime to start an interactive [REPL](https://en.wikipedia.org/wiki/Read%E2%80%93eval%E2%80%93print_loop) in your console. - Adds convenience `.repl()` methods to `Page` and `Browser` instances. - Supports inspecting arbitrary objects and instances. - Features tab auto-completion for the available object properties and a colorized prompt. #### Kudos - Inspired by [puppeteer-debug](https://github.com/nswbmw/puppeteer-debug) from [nswbmw](https://github.com/nswbmw), thanks! ## REPL ![repl](https://i.imgur.com/xeP7hEc.gif) ## Quickstart ```es6 const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-repl')()) puppeteer.launch({ headless: true }).then(async browser => { const page = await browser.newPage() await page.goto('https://example.com') // Start an interactive REPL here with the `page` instance. await page.repl() // Afterwards start REPL with the `browser` instance. await browser.repl() await browser.close() }) ``` In the REPL session (hit `tab` two times to see all available properties): ```es6 > page.url() // => https://example.com > page.click('a') > page.url() // => https://www.iana.org/domains/reserved > page.content() // => ... > page.goto('https://google.com') > page.type('input', 'what is the answer to life the universe and everything') > page.click('input[type=submit]') > page.url() // => https://www.google.com/search?source=hp&ei=u9oXW5HpO8a ... > page.evaluate(() => document.querySelector('h3 a').textContent) // => Question 42 (The Impossible Quiz) - The Impossible Quiz Wiki - Fandom ``` - Type `inspect` to return the current object. - Type `exit` (or hit ctrl+c) to leave the repl. ## API #### Table of Contents - [Plugin](#plugin) - [repl](#repl) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-repl/index.js#L38-L83) **Extends: PuppeteerExtraPlugin** Interrupt your puppeteer code with an interactive REPL. Features tab auto-completion for the given object properties and a colorized prompt. Works with arbitrary objects ands class instances, though `Page` & `Browser` make the most sense. :-) **opts** Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.addToPuppeteerClass` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)?** If a `.repl()` method should be attached to Puppeteer `Page` and `Browser` instances (default: true). Example: ```javascript // In this example we don't extend the native puppeteer classes const puppeteer = require('puppeteer-extra') const repl = require('puppeteer-extra-plugin-repl')({ addToPuppeteerClass: false }) puppeteer.use(repl) puppeteer.launch({ headless: true }).then(async browser => { const page = await browser.newPage() await page.goto('https://example.com') // Start an interactive REPL here with the `page` instance. await repl.repl(page) // Afterwards start REPL with the `repl` instance itself. 🐴 await repl.repl(repl) await browser.close() }) ``` * * * #### [repl](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-repl/index.js#L70-L70) Create an interactive REPL for the provided object. Uses an extended (colorized) readline interface under the hood. Will resolve the returned Promise when the readline interface is closed. If `opts.addToPuppeteerClass` is true (default) then `page.repl()`/`browser.repl()` will point to this method, for convenience. Can be used standalone as well, to inspect an arbitrary class instance or object. Type: `function (obj): Promise` - `obj` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** An object or class instance to use in the repl (e.g. `page`, `browser`) Example: ```javascript const repl = require('puppeteer-extra-plugin-repl')() await repl.repl() ``` * * * ================================================ FILE: packages/puppeteer-extra-plugin-repl/test/headless.js ================================================ 'use strict' const test = require('ava') // const PUPPETEER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox'] test.beforeEach(t => { // Make sure we work with pristine modules // delete require.cache[require.resolve('puppeteer-extra')] // delete require.cache[require.resolve('puppeteer-extra-plugin-repl')] }) test('will create a repl', async t => { t.pass() // @TODO: This test is a little brittle and fails in CI sometimes. // const stdin = require('mock-stdin').stdin() // const puppeteer = require('puppeteer-extra') // const repl = require('puppeteer-extra-plugin-repl')() // puppeteer.use(repl) // await puppeteer.launch({ args: PUPPETEER_ARGS }).then(async browser => { // const page = await browser.newPage() // // Mock stdout, there might be cleaner ways to do this :-) // let stdoutOutput = '' // const origStdout = process.stdout.write // process.stdout.write = (string, encoding, fd) => { stdoutOutput += string } // await Promise.all([ // page.repl(), // stdin.send('url()'), // stdin.end() // ]) // process.stdout.write = origStdout // t.true(stdoutOutput.includes(`Started puppeteer-extra repl for object 'Page' with`)) // t.true(stdoutOutput.includes(`> page.`)) // t.true(stdoutOutput.includes(`url()`)) // t.true(stdoutOutput.includes(`about:blank`)) // browser.close() // }) // t.true(true) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/.npmignore ================================================ stealthtests/ runall_stealthtests.sh ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_template/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Minimal stealth plugin template, not being used. :-) * * Feel free to copy this folder as the basis for additional detection evasion plugins. */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/_template' } async onPageCreated(page) { await page.evaluateOnNewDocument(() => { console.debug('hello world') }) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_template/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_template/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_template/index.js#L10-L24) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Minimal stealth plugin template, not being used. :-) Feel free to copy this folder as the basis for additional detection evasion plugins. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js ================================================ /** * A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces. * * Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser). * * Note: If for whatever reason you need to use this outside of `puppeteer-extra`: * Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context. * * Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities. * */ const utils = {} utils.init = () => { utils.preloadCache() } /** * Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw. * * The presence of a JS Proxy can be revealed as it shows up in error stack traces. * * @param {object} handler - The JS Proxy handler to wrap */ utils.stripProxyFromErrors = (handler = {}) => { const newHandler = { setPrototypeOf: function (target, proto) { if (proto === null) throw new TypeError('Cannot convert object to primitive value') if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) { throw new TypeError('Cyclic __proto__ value') } return Reflect.setPrototypeOf(target, proto) } } // We wrap each trap in the handler in a try/catch and modify the error stack if they throw const traps = Object.getOwnPropertyNames(handler) traps.forEach(trap => { newHandler[trap] = function () { try { // Forward the call to the defined proxy handler return handler[trap].apply(this, arguments || []) } catch (err) { // Stack traces differ per browser, we only support chromium based ones currently if (!err || !err.stack || !err.stack.includes(`at `)) { throw err } // When something throws within one of our traps the Proxy will show up in error stacks // An earlier implementation of this code would simply strip lines with a blacklist, // but it makes sense to be more surgical here and only remove lines related to our Proxy. // We try to use a known "anchor" line for that and strip it with everything above it. // If the anchor line cannot be found for some reason we fall back to our blacklist approach. const stripWithBlacklist = (stack, stripFirstLine = true) => { const blacklist = [ `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply `at Object.${trap} `, // e.g. Object.get or Object.apply `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-) ] return ( err.stack .split('\n') // Always remove the first (file) line in the stack (guaranteed to be our proxy) .filter((line, index) => !(index === 1 && stripFirstLine)) // Check if the line starts with one of our blacklisted strings .filter(line => !blacklist.some(bl => line.trim().startsWith(bl))) .join('\n') ) } const stripWithAnchor = (stack, anchor) => { const stackArr = stack.split('\n') anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor) ) if (anchorIndex === -1) { return false // 404, anchor not found } // Strip everything from the top until we reach the anchor line // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) stackArr.splice(1, anchorIndex) return stackArr.join('\n') } // Special cases due to our nested toString proxies err.stack = err.stack.replace( 'at Object.toString (', 'at Function.toString (' ) if ((err.stack || '').includes('at Function.toString (')) { err.stack = stripWithBlacklist(err.stack, false) throw err } // Try using the anchor method, fallback to blacklist if necessary err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack) throw err // Re-throw our now sanitized error } } }) return newHandler } /** * Strip error lines from stack traces until (and including) a known line the stack. * * @param {object} err - The error to sanitize * @param {string} anchor - The string the anchor line starts with */ utils.stripErrorWithAnchor = (err, anchor) => { const stackArr = err.stack.split('\n') const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor)) if (anchorIndex === -1) { return err // 404, anchor not found } // Strip everything from the top until we reach the anchor line (remove anchor line as well) // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`) stackArr.splice(1, anchorIndex) err.stack = stackArr.join('\n') return err } /** * Replace the property of an object in a stealthy way. * * Note: You also want to work on the prototype of an object most often, * as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)). * * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty * * @example * replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: "alice" }) * // or * replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] }) * * @param {object} obj - The object which has the property to replace * @param {string} propName - The property name to replace * @param {object} descriptorOverrides - e.g. { value: "alice" } */ utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => { return Object.defineProperty(obj, propName, { // Copy over the existing descriptors (writable, enumerable, configurable, etc) ...(Object.getOwnPropertyDescriptor(obj, propName) || {}), // Add our overrides (e.g. value, get()) ...descriptorOverrides }) } /** * Preload a cache of function copies and data. * * For a determined enough observer it would be possible to overwrite and sniff usage of functions * we use in our internal Proxies, to combat that we use a cached copy of those functions. * * Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before, * by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups). * * This is evaluated once per execution context (e.g. window) */ utils.preloadCache = () => { if (utils.cache) { return } utils.cache = { // Used in our proxies Reflect: { get: Reflect.get.bind(Reflect), apply: Reflect.apply.bind(Reflect) }, // Used in `makeNativeString` nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }` } } /** * Utility function to generate a cross-browser `toString` result representing native code. * * There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings. * To future-proof this we use an existing native toString result as the basis. * * The only advantage we have over the other team is that our JS runs first, hence we cache the result * of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it. * * @example * makeNativeString('foobar') // => `function foobar() { [native code] }` * * @param {string} [name] - Optional function name */ utils.makeNativeString = (name = '') => { return utils.cache.nativeToStringStr.replace('toString', name || '') } /** * Helper function to modify the `toString()` result of the provided object. * * Note: Use `utils.redirectToString` instead when possible. * * There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object. * If no string is provided we will generate a `[native code]` thing based on the name of the property object. * * @example * patchToString(WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }') * * @param {object} obj - The object for which to modify the `toString()` representation * @param {string} str - Optional string used as a return value */ utils.patchToString = (obj, str = '') => { const handler = { apply: function (target, ctx) { // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` if (ctx === Function.prototype.toString) { return utils.makeNativeString('toString') } // `toString` targeted at our proxied Object detected if (ctx === obj) { // We either return the optional string verbatim or derive the most desired result automatically return str || utils.makeNativeString(obj.name) } // Check if the toString protype of the context is the same as the global prototype, // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case const hasSameProto = Object.getPrototypeOf( Function.prototype.toString ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins if (!hasSameProto) { // Pass the call on to the local Function.prototype.toString instead return ctx.toString() } return target.call(ctx) } } const toStringProxy = new Proxy( Function.prototype.toString, utils.stripProxyFromErrors(handler) ) utils.replaceProperty(Function.prototype, 'toString', { value: toStringProxy }) } /** * Make all nested functions of an object native. * * @param {object} obj */ utils.patchToStringNested = (obj = {}) => { return utils.execRecursively(obj, ['function'], utils.patchToString) } /** * Redirect toString requests from one object to another. * * @param {object} proxyObj - The object that toString will be called on * @param {object} originalObj - The object which toString result we wan to return */ utils.redirectToString = (proxyObj, originalObj) => { const handler = { apply: function (target, ctx) { // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + ""` if (ctx === Function.prototype.toString) { return utils.makeNativeString('toString') } // `toString` targeted at our proxied Object detected if (ctx === proxyObj) { const fallback = () => originalObj && originalObj.name ? utils.makeNativeString(originalObj.name) : utils.makeNativeString(proxyObj.name) // Return the toString representation of our original object if possible return originalObj + '' || fallback() } if (typeof ctx === 'undefined' || ctx === null) { return target.call(ctx) } // Check if the toString protype of the context is the same as the global prototype, // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case const hasSameProto = Object.getPrototypeOf( Function.prototype.toString ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins if (!hasSameProto) { // Pass the call on to the local Function.prototype.toString instead return ctx.toString() } return target.call(ctx) } } const toStringProxy = new Proxy( Function.prototype.toString, utils.stripProxyFromErrors(handler) ) utils.replaceProperty(Function.prototype, 'toString', { value: toStringProxy }) } /** * All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps. * * Will stealthify these aspects (strip error stack traces, redirect toString, etc). * Note: This is meant to modify native Browser APIs and works best with prototype objects. * * @example * replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler) * * @param {object} obj - The object which has the property to replace * @param {string} propName - The name of the property to replace * @param {object} handler - The JS Proxy handler to use */ utils.replaceWithProxy = (obj, propName, handler) => { const originalObj = obj[propName] const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler)) utils.replaceProperty(obj, propName, { value: proxyObj }) utils.redirectToString(proxyObj, originalObj) return true } /** * All-in-one method to replace a getter with a JS Proxy using the provided Proxy handler with traps. * * @example * replaceGetterWithProxy(Object.getPrototypeOf(navigator), 'vendor', proxyHandler) * * @param {object} obj - The object which has the property to replace * @param {string} propName - The name of the property to replace * @param {object} handler - The JS Proxy handler to use */ utils.replaceGetterWithProxy = (obj, propName, handler) => { const fn = Object.getOwnPropertyDescriptor(obj, propName).get const fnStr = fn.toString() // special getter function string const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler)) utils.replaceProperty(obj, propName, { get: proxyObj }) utils.patchToString(proxyObj, fnStr) return true } /** * All-in-one method to replace a getter and/or setter. Functions get and set * of handler have one more argument that contains the native function. * * @example * replaceGetterSetter(HTMLIFrameElement.prototype, 'contentWindow', handler) * * @param {object} obj - The object which has the property to replace * @param {string} propName - The name of the property to replace * @param {object} handlerGetterSetter - The handler with get and/or set * functions * @see https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object/defineProperty#description */ utils.replaceGetterSetter = (obj, propName, handlerGetterSetter) => { const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName) const handler = { ...ownPropertyDescriptor } if (handlerGetterSetter.get !== undefined) { const nativeFn = ownPropertyDescriptor.get handler.get = function() { return handlerGetterSetter.get.call(this, nativeFn.bind(this)) } utils.redirectToString(handler.get, nativeFn) } if (handlerGetterSetter.set !== undefined) { const nativeFn = ownPropertyDescriptor.set handler.set = function(newValue) { handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this)) } utils.redirectToString(handler.set, nativeFn) } Object.defineProperty(obj, propName, handler) } /** * All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps. * * Will stealthify these aspects (strip error stack traces, redirect toString, etc). * * @example * mockWithProxy(chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler) * * @param {object} obj - The object which has the property to replace * @param {string} propName - The name of the property to replace or create * @param {object} pseudoTarget - The JS Proxy target to use as a basis * @param {object} handler - The JS Proxy handler to use */ utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => { const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) utils.replaceProperty(obj, propName, { value: proxyObj }) utils.patchToString(proxyObj) return true } /** * All-in-one method to create a new JS Proxy with stealth tweaks. * * This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property. * * Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc). * * @example * createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy * * @param {object} pseudoTarget - The JS Proxy target to use as a basis * @param {object} handler - The JS Proxy handler to use */ utils.createProxy = (pseudoTarget, handler) => { const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler)) utils.patchToString(proxyObj) return proxyObj } /** * Helper function to split a full path to an Object into the first part and property. * * @example * splitObjPath(`HTMLMediaElement.prototype.canPlayType`) * // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"} * * @param {string} objPath - The full path to an object as dot notation string */ utils.splitObjPath = objPath => ({ // Remove last dot entry (property) ==> `HTMLMediaElement.prototype` objName: objPath.split('.').slice(0, -1).join('.'), // Extract last dot entry ==> `canPlayType` propName: objPath.split('.').slice(-1)[0] }) /** * Convenience method to replace a property with a JS Proxy using the provided objPath. * * Supports a full path (dot notation) to the object as string here, in case that makes it easier. * * @example * replaceObjPathWithProxy('WebGLRenderingContext.prototype.getParameter', proxyHandler) * * @param {string} objPath - The full path to an object (dot notation string) to replace * @param {object} handler - The JS Proxy handler to use */ utils.replaceObjPathWithProxy = (objPath, handler) => { const { objName, propName } = utils.splitObjPath(objPath) const obj = eval(objName) // eslint-disable-line no-eval return utils.replaceWithProxy(obj, propName, handler) } /** * Traverse nested properties of an object recursively and apply the given function on a whitelist of value types. * * @param {object} obj * @param {array} typeFilter - e.g. `['function']` * @param {Function} fn - e.g. `utils.patchToString` */ utils.execRecursively = (obj = {}, typeFilter = [], fn) => { function recurse(obj) { for (const key in obj) { if (obj[key] === undefined) { continue } if (obj[key] && typeof obj[key] === 'object') { recurse(obj[key]) } else { if (obj[key] && typeFilter.includes(typeof obj[key])) { fn.call(this, obj[key]) } } } } recurse(obj) return obj } /** * Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one. * That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter. * * Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process. * This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings. * * We use this to pass down our utility functions as well as any other functions (to be able to split up code better). * * @see utils.materializeFns * * @param {object} fnObj - An object containing functions as properties */ utils.stringifyFns = (fnObj = { hello: () => 'world' }) => { // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine // https://github.com/feross/fromentries function fromEntries(iterable) { return [...iterable].reduce((obj, [key, val]) => { obj[key] = val return obj }, {}) } return (Object.fromEntries || fromEntries)( Object.entries(fnObj) .filter(([key, value]) => typeof value === 'function') .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval ) } /** * Utility function to reverse the process of `utils.stringifyFns`. * Will materialize an object with stringified functions (supports classic and fat arrow functions). * * @param {object} fnStrObj - An object containing stringified functions as properties */ utils.materializeFns = (fnStrObj = { hello: "() => 'world'" }) => { return Object.fromEntries( Object.entries(fnStrObj).map(([key, value]) => { if (value.startsWith('function')) { // some trickery is needed to make oldschool functions work :-) return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval } else { // arrow functions just work return [key, eval(value)] // eslint-disable-line no-eval } }) ) } // Proxy handler templates for re-usability utils.makeHandler = () => ({ // Used by simple `navigator` getter evasions getterValue: value => ({ apply(target, ctx, args) { // Let's fetch the value first, to trigger and escalate potential errors // Illegal invocations like `navigator.__proto__.vendor` will throw here utils.cache.Reflect.apply(...arguments) return value } }) }) /** * Compare two arrays. * * @param {array} array1 - First array * @param {array} array2 - Second array */ utils.arrayEquals = (array1, array2) => { if (array1.length !== array2.length) { return false } for (let i = 0; i < array1.length; ++i) { if (array1[i] !== array2[i]) { return false } } return true } /** * Cache the method return according to its arguments. * * @param {Function} fn - A function that will be cached */ utils.memoize = fn => { const cache = [] return function(...args) { if (!cache.some(c => utils.arrayEquals(c.key, args))) { cache.push({ key: args, value: fn.apply(this, args) }) } return cache.find(c => utils.arrayEquals(c.key, args)).value } } // -- // Stuff starting below this line is NodeJS specific. // -- module.exports = utils ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer } = require('../../test/util') const utils = require('.') const withUtils = require('./withUtils') /* global HTMLMediaElement WebGLRenderingContext */ test('splitObjPath: will do what it says', async t => { const { objName, propName } = utils.splitObjPath( 'HTMLMediaElement.prototype.canPlayType' ) t.is(objName, 'HTMLMediaElement.prototype') t.is(propName, 'canPlayType') }) test('makeNativeString: will do what it says', async t => { utils.init() t.is(utils.makeNativeString('bob'), 'function bob() { [native code] }') t.is( utils.makeNativeString('toString'), 'function toString() { [native code] }' ) t.is(utils.makeNativeString(), 'function () { [native code] }') }) test('replaceWithProxy: will work correctly', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await withUtils(page).evaluate(utils => { const dummyProxyHandler = { get(target, param) { if (param && param === 'ping') { return 'pong' } return utils.cache.Reflect.get(...(arguments || [])) }, apply() { return utils.cache.Reflect.apply(...arguments) } } utils.replaceWithProxy( HTMLMediaElement.prototype, 'canPlayType', dummyProxyHandler ) return { toString: HTMLMediaElement.prototype.canPlayType.toString(), ping: HTMLMediaElement.prototype.canPlayType.ping } }) t.deepEqual(test1, { toString: 'function canPlayType() { [native code] }', ping: 'pong' }) }) test('replaceObjPathWithProxy: will work correctly', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await withUtils(page).evaluate(utils => { const dummyProxyHandler = { get(target, param) { if (param && param === 'ping') { return 'pong' } return utils.cache.Reflect.get(...(arguments || [])) }, apply() { return utils.cache.Reflect.apply(...arguments) } } utils.replaceObjPathWithProxy( 'HTMLMediaElement.prototype.canPlayType', dummyProxyHandler ) return { toString: HTMLMediaElement.prototype.canPlayType.toString(), ping: HTMLMediaElement.prototype.canPlayType.ping } }) t.deepEqual(test1, { toString: 'function canPlayType() { [native code] }', ping: 'pong' }) }) test('redirectToString: is battle hardened', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // Patch all documents including iframes await withUtils(page).evaluateOnNewDocument(utils => { // We redirect toString calls targeted at `canPlayType` to `getParameter`, // so if everything works correctly we expect `getParameter` as response. const proxyObj = HTMLMediaElement.prototype.canPlayType const originalObj = WebGLRenderingContext.prototype.getParameter utils.redirectToString(proxyObj, originalObj) }) await page.goto('about:blank') const result = await withUtils(page).evaluate(utils => { const iframe = document.createElement('iframe') document.body.appendChild(iframe) return { target: { raw: HTMLMediaElement.prototype.canPlayType + '', rawiframe: iframe.contentWindow.HTMLMediaElement.prototype.canPlayType + '', raw2: HTMLMediaElement.prototype.canPlayType.toString(), rawiframe2: iframe.contentWindow.HTMLMediaElement.prototype.canPlayType.toString(), direct: Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), directWithiframe: iframe.contentWindow.Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), iframeWithdirect: Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ), iframeWithiframe: iframe.contentWindow.Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ) }, toString: { obj: HTMLMediaElement.prototype.canPlayType.toString + '', objiframe: iframe.contentWindow.HTMLMediaElement.prototype.canPlayType.toString + '', raw: Function.prototype.toString + '', rawiframe: iframe.contentWindow.Function.prototype.toString + '', direct: Function.prototype.toString.call(Function.prototype.toString), directWithiframe: iframe.contentWindow.Function.prototype.toString.call( Function.prototype.toString ), iframeWithdirect: Function.prototype.toString.call( iframe.contentWindow.Function.prototype.toString ), iframeWithiframe: iframe.contentWindow.Function.prototype.toString.call( iframe.contentWindow.Function.prototype.toString ) } } }) t.deepEqual(result, { target: { raw: 'function getParameter() { [native code] }', raw2: 'function getParameter() { [native code] }', rawiframe: 'function getParameter() { [native code] }', rawiframe2: 'function getParameter() { [native code] }', direct: 'function getParameter() { [native code] }', directWithiframe: 'function getParameter() { [native code] }', iframeWithdirect: 'function getParameter() { [native code] }', iframeWithiframe: 'function getParameter() { [native code] }' }, toString: { obj: 'function toString() { [native code] }', objiframe: 'function toString() { [native code] }', raw: 'function toString() { [native code] }', rawiframe: 'function toString() { [native code] }', direct: 'function toString() { [native code] }', directWithiframe: 'function toString() { [native code] }', iframeWithdirect: 'function toString() { [native code] }', iframeWithiframe: 'function toString() { [native code] }' } }) }) test('redirectToString: has proper errors', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // Patch all documents including iframes await withUtils(page).evaluateOnNewDocument(utils => { // We redirect toString calls targeted at `canPlayType` to `getParameter`, // so if everything works correctly we expect `getParameter` as response. const proxyObj = HTMLMediaElement.prototype.canPlayType const originalObj = WebGLRenderingContext.prototype.getParameter utils.redirectToString(proxyObj, originalObj) }) await page.goto('about:blank') const result = await withUtils(page).evaluate(utils => { const evalErr = (str = '') => { try { // eslint-disable-next-line no-eval return eval(str) } catch (err) { return err.toString() } } return { blank: evalErr(`Function.prototype.toString.apply()`), null: evalErr(`Function.prototype.toString.apply(null)`), undef: evalErr(`Function.prototype.toString.apply(undefined)`), emptyObject: evalErr(`Function.prototype.toString.apply({})`) } }) t.deepEqual(result, { blank: "TypeError: Function.prototype.toString requires that 'this' be a Function", null: "TypeError: Function.prototype.toString requires that 'this' be a Function", undef: "TypeError: Function.prototype.toString requires that 'this' be a Function", emptyObject: "TypeError: Function.prototype.toString requires that 'this' be a Function" }) }) test('patchToString: will work correctly', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // Test verbatim string replacement const test1 = await withUtils(page).evaluate(utils => { utils.patchToString(HTMLMediaElement.prototype.canPlayType, 'bob') return HTMLMediaElement.prototype.canPlayType.toString() }) t.is(test1, 'bob') // Test automatic mode derived from `.name` const test2 = await withUtils(page).evaluate(utils => { utils.patchToString(HTMLMediaElement.prototype.canPlayType) return HTMLMediaElement.prototype.canPlayType.toString() }) t.is(test2, 'function canPlayType() { [native code] }') // Make sure automatic mode derived from `.name` works with proxies const test3 = await withUtils(page).evaluate(utils => { HTMLMediaElement.prototype.canPlayType = new Proxy( HTMLMediaElement.prototype.canPlayType, {} ) utils.patchToString(HTMLMediaElement.prototype.canPlayType) return HTMLMediaElement.prototype.canPlayType.toString() }) t.is(test3, 'function canPlayType() { [native code] }') // Actually verify there's an issue when using vanilla Proxies const test4 = await withUtils(page).evaluate(utils => { HTMLMediaElement.prototype.canPlayType = new Proxy( HTMLMediaElement.prototype.canPlayType, {} ) return HTMLMediaElement.prototype.canPlayType.toString() }) t.is(test4, 'function () { [native code] }') }) function toStringTest(obj) { obj = eval(obj) // eslint-disable-line no-eval return ` - obj.toString(): ${obj.toString()} - obj.name: ${obj.name} - obj.toString + "": ${obj.toString + ''} - obj.toString.name: ${obj.toString.name} - obj.valueOf + "": ${obj.valueOf + ''} - obj.valueOf().name: ${obj.valueOf().name} - Object.prototype.toString.apply(obj): ${Object.prototype.toString.apply(obj)} - Function.prototype.toString.call(obj): ${Function.prototype.toString.call( obj )} - Function.prototype.valueOf.call(obj) + "": ${ Function.prototype.valueOf.call(obj) + '' } - obj.toString === Function.prototype.toString: ${ obj.toString === Function.prototype.toString } `.trim() } test('patchToString: passes all toString tests', async t => { const toStringVanilla = await (async function () { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() return page.evaluate(toStringTest, 'HTMLMediaElement.prototype.canPlayType') })() const toStringStealth = await (async function () { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await withUtils(page).evaluate(utils => { HTMLMediaElement.prototype.canPlayType = function canPlayType() {} utils.patchToString(HTMLMediaElement.prototype.canPlayType) }) return page.evaluate(toStringTest, 'HTMLMediaElement.prototype.canPlayType') })() // Check that the unmodified results are as expected t.is( toStringVanilla, ` - obj.toString(): function canPlayType() { [native code] } - obj.name: canPlayType - obj.toString + "": function toString() { [native code] } - obj.toString.name: toString - obj.valueOf + "": function valueOf() { [native code] } - obj.valueOf().name: canPlayType - Object.prototype.toString.apply(obj): [object Function] - Function.prototype.toString.call(obj): function canPlayType() { [native code] } - Function.prototype.valueOf.call(obj) + "": function canPlayType() { [native code] } - obj.toString === Function.prototype.toString: true `.trim() ) // Make sure our customizations leave no trace t.is(toStringVanilla, toStringStealth) }) test('patchToString: passes stack trace tests', async t => { const toStringStackTrace = () => { try { Object.create( Object.getOwnPropertyDescriptor(Function.prototype, 'toString').get ).toString() } catch (err) { return err.stack.split('\n').slice(0, 2).join('|') } return 'error not thrown' } const toStringVanilla = await (async function () { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() return page.evaluate(toStringStackTrace) })() const toStringStealth = await (async function () { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await withUtils(page).evaluate(utils => { HTMLMediaElement.prototype.canPlayType = function canPlayType() {} utils.patchToString(HTMLMediaElement.prototype.canPlayType) }) return page.evaluate(toStringStackTrace) })() // Check that the unmodified results are as expected t.is( toStringVanilla, `TypeError: Object prototype may only be an Object or null: undefined| at Function.create ()`.trim() ) // Make sure our customizations leave no trace t.is(toStringVanilla, toStringStealth) }) test('patchToString: vanilla has iframe issues', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // Only patch the main window const result = await withUtils(page).evaluate(utils => { utils.patchToString(HTMLMediaElement.prototype.canPlayType, 'bob') const iframe = document.createElement('iframe') document.body.appendChild(iframe) return { direct: Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), directWithiframe: iframe.contentWindow.Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), iframeWithdirect: Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ), iframeWithiframe: iframe.contentWindow.Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ) } }) t.deepEqual(result, { direct: 'bob', directWithiframe: 'function canPlayType() { [native code] }', iframeWithdirect: 'function canPlayType() { [native code] }', iframeWithiframe: 'function canPlayType() { [native code] }' }) }) test('patchToString: stealth has no iframe issues', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // Patch all documents including iframes await withUtils(page).evaluateOnNewDocument(utils => { utils.patchToString(HTMLMediaElement.prototype.canPlayType, 'alice') }) await page.goto('about:blank') const result = await withUtils(page).evaluate(utils => { const iframe = document.createElement('iframe') document.body.appendChild(iframe) return { direct: Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), directWithiframe: iframe.contentWindow.Function.prototype.toString.call( HTMLMediaElement.prototype.canPlayType ), iframeWithdirect: Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ), iframeWithiframe: iframe.contentWindow.Function.prototype.toString.call( iframe.contentWindow.HTMLMediaElement.prototype.canPlayType ) } }) t.deepEqual(result, { direct: 'alice', directWithiframe: 'alice', iframeWithdirect: 'alice', iframeWithiframe: 'alice' }) }) test('stripProxyFromErrors: will work correctly', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await withUtils(page).evaluate(utils => { const getStack = prop => { try { prop.caller() // Will throw (HTMLMediaElement.prototype.canPlayType.caller) return false } catch (err) { return err.stack } } /** We need traps to show up in the error stack */ const dummyProxyHandler = { get() { return utils.cache.Reflect.get(...(arguments || [])) }, apply() { return utils.cache.Reflect.apply(...arguments) } } const vanillaProxy = new Proxy( HTMLMediaElement.prototype.canPlayType, dummyProxyHandler ) const stealthProxy = new Proxy( HTMLMediaElement.prototype.canPlayType, utils.stripProxyFromErrors(dummyProxyHandler) ) const stacks = { vanilla: getStack(HTMLMediaElement.prototype.canPlayType), vanillaProxy: getStack(vanillaProxy), stealthProxy: getStack(stealthProxy) } return stacks }) // Check that the untouched stuff behaves as expected t.true(results.vanilla.includes(`TypeError: 'caller'`)) t.false(results.vanilla.includes(`at Object.get`)) // Regression test: Make sure vanilla JS Proxies leak the stack trace t.true(results.vanillaProxy.includes(`TypeError: 'caller'`)) t.true(results.vanillaProxy.includes(`at Object.get`)) // Stealth tests t.true(results.stealthProxy.includes(`TypeError: 'caller'`)) t.false(results.stealthProxy.includes(`at Object.get`)) }) test('replaceProperty: will work without traces', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await withUtils(page).evaluate(utils => { utils.replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['de-DE'] }) return { propNames: Object.getOwnPropertyNames(navigator) } }) t.false(results.propNames.includes('languages')) }) test('cache: will prevent leaks through overriding methods', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await withUtils(page).evaluate(utils => { const sniffResults = { vanilla: false, stealth: false } const vanillaProxy = new Proxy( {}, { get() { return Reflect.get(...arguments) } } ) Reflect.get = () => (sniffResults.vanilla = true) // trigger get trap vanillaProxy.foo // eslint-disable-line const stealthProxy = new Proxy( {}, { get() { return utils.cache.Reflect.get(...arguments) // using cached copy } } ) Reflect.get = () => (sniffResults.stealth = true) // trigger get trap stealthProxy.foo // eslint-disable-line return sniffResults }) t.deepEqual(results, { vanilla: true, stealth: false }) }) test('replaceWithProxy: will throw prototype errors', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('about:blank') const result = await withUtils(page).evaluate(utils => { utils.replaceWithProxy(HTMLMediaElement.prototype, 'canPlayType', {}) const evalErr = (str = '') => { try { // eslint-disable-next-line no-eval return eval(str) } catch (err) { return err.toString() } } return { same: evalErr( `Object.setPrototypeOf(HTMLMediaElement.prototype.canPlayType, HTMLMediaElement.prototype.canPlayType) + ""` ), sameString: evalErr( `Object.setPrototypeOf(Function.prototype.toString, Function.prototype.toString) + ""` ), null: evalErr( `Object.setPrototypeOf(Function.prototype.toString, null) + ""` ), undef: evalErr( `Object.setPrototypeOf(Function.prototype.toString, undefined) + ""` ), none: evalErr(`Object.setPrototypeOf(Function.prototype.toString) + ""`) } }) t.deepEqual(result, { same: 'TypeError: Cyclic __proto__ value', sameString: 'TypeError: Cyclic __proto__ value', null: 'TypeError: Cannot convert object to primitive value', undef: 'TypeError: Object prototype may only be an Object or null: undefined', none: 'TypeError: Object prototype may only be an Object or null: undefined' }) }) test('replaceGetterSetter', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('about:blank') const results = await withUtils(page).evaluate(utils => { const getDetails = a => ({ href: a.href, typeof: typeof a.href, in: 'href' in a, keys: Object.keys(a), // eslint-disable-next-line no-undef prototypeKeys: Object.keys(HTMLAnchorElement.prototype), getOwnPropertyNames: Object.getOwnPropertyNames(a), prototypeGetOwnPropertyNames: Object.getOwnPropertyNames( // eslint-disable-next-line no-undef HTMLAnchorElement.prototype ), ownPropertyDescriptor: undefined === Object.getOwnPropertyDescriptor(a, 'href'), prototypeOwnPropertyDescriptor: Object.getOwnPropertyDescriptor( // eslint-disable-next-line no-undef HTMLAnchorElement.prototype, 'href' ), ownPropertyDescriptors: Object.getOwnPropertyDescriptors(a, 'href'), prototypeOwnPropertyDescriptors: Object.getOwnPropertyDescriptors( // eslint-disable-next-line no-undef HTMLAnchorElement.prototype, 'href' ), getToString: Object.getOwnPropertyDescriptor( // eslint-disable-next-line no-undef HTMLAnchorElement.prototype, 'href' ).get.toString(), setToString: Object.getOwnPropertyDescriptor( // eslint-disable-next-line no-undef HTMLAnchorElement.prototype, 'href' ).set.toString() }) // Use native a.href. const a1 = document.createElement('a') a1.href = 'http://foo.com/' const details1 = getDetails(a1) // Override a.href. let href = '' // eslint-disable-next-line no-undef utils.replaceGetterSetter(HTMLAnchorElement.prototype, 'href', { get: function() { return href }, set: function(newValue) { href = newValue } }) // Use overrided a.href. const a2 = document.createElement('a') a2.href = 'http://foo.com/' const details2 = getDetails(a2) return [details1, details2] }) t.deepEqual(results[1], results[0]) }) test('arrayEquals', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('about:blank') const results = await withUtils(page).evaluate(utils => { const obj = { foo: 'bar' } return { a: utils.arrayEquals(['a', 'Alpha'], ['a', 'Alpha']), b: !utils.arrayEquals(['b', 'Beta'], ['b', 'Blue']), c: !utils.arrayEquals(['c', { foo: 'bar' }], ['c', { foo: 'bar' }]), d: utils.arrayEquals(['d', obj], ['d', obj]), e: utils.arrayEquals([null], [null]), f: utils.arrayEquals([undefined], [undefined]), g: utils.arrayEquals([false], [false]) } }) t.deepEqual(results, { a: true, b: true, c: true, d: true, e: true, f: true, g: true }) }) test('memoize', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('about:blank') const results = await withUtils(page).evaluate(utils => { const objectify = utils.memoize((valueAdded, valueIgnored) => { return { valueAdded } }) const obj = { foo: 'bar' } /* eslint-disable no-self-compare */ return { a: objectify('a', 'Alpha') === objectify('a', 'Alpha'), b: objectify('b', 'Beta') !== objectify('b', 'Blue'), c: objectify('c', { foo: 'bar' }) !== objectify('c', { foo: 'bar' }), d: objectify('d', obj) === objectify('d', obj), e: objectify(null) === objectify(null), f: objectify(undefined) === objectify(undefined), g: objectify(false) === objectify(false) } /* eslint-enable no-self-compare */ }) t.deepEqual(results, { a: true, b: true, c: true, d: true, e: true, f: true, g: true }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_utils/readme.md ================================================ ## API #### Table of Contents - [utils()](#utils) - [.stripProxyFromErrors(handler)](#stripproxyfromerrorshandler) - [.stripErrorWithAnchor(err, anchor)](#striperrorwithanchorerr-anchor) - [.replaceProperty(obj, propName, descriptorOverrides)](#replacepropertyobj-propname-descriptoroverrides) - [.preloadCache()](#preloadcache) - [.makeNativeString(name?)](#makenativestringname) - [.patchToString(obj, str)](#patchtostringobj-str) - [.patchToStringNested(obj)](#patchtostringnestedobj) - [.redirectToString(proxyObj, originalObj)](#redirecttostringproxyobj-originalobj) - [.replaceWithProxy(obj, propName, handler)](#replacewithproxyobj-propname-handler) - [.mockWithProxy(obj, propName, pseudoTarget, handler)](#mockwithproxyobj-propname-pseudotarget-handler) - [.createProxy(pseudoTarget, handler)](#createproxypseudotarget-handler) - [.splitObjPath(objPath)](#splitobjpathobjpath) - [.replaceObjPathWithProxy(objPath, handler)](#replaceobjpathwithproxyobjpath-handler) - [.execRecursively(obj, typeFilter, fn)](#execrecursivelyobj-typefilter-fn) - [.stringifyFns(fnObj)](#stringifyfnsfnobj) - [.materializeFns(fnStrObj)](#materializefnsfnstrobj) ### [utils()](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L12-L12) A set of shared utility functions specifically for the purpose of modifying native browser APIs without leaving traces. Meant to be passed down in puppeteer and used in the context of the page (everything in here runs in NodeJS as well as a browser). Note: If for whatever reason you need to use this outside of `puppeteer-extra`: Just remove the `module.exports` statement at the very bottom, the rest can be copy pasted into any browser context. Alternatively take a look at the `extract-stealth-evasions` package to create a finished bundle which includes these utilities. --- #### .[stripProxyFromErrors(handler)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L21-L82) - `handler` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy handler to wrap (optional, default `{}`) Wraps a JS Proxy Handler and strips it's presence from error stacks, in case the traps throw. The presence of a JS Proxy can be revealed as it shows up in error stack traces. --- #### .[stripErrorWithAnchor(err, anchor)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L90-L101) - `err` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The error to sanitize - `anchor` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The string the anchor line starts with Strip error lines from stack traces until (and including) a known line the stack. --- #### .[replaceProperty(obj, propName, descriptorOverrides)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L120-L127) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object which has the property to replace - `propName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The property name to replace - `descriptorOverrides` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** e.g. { value: "alice" } (optional, default `{}`) Replace the property of an object in a stealthy way. Note: You also want to work on the prototype of an object most often, as you'd otherwise leave traces (e.g. showing up in Object.getOwnPropertyNames(obj)). Example: ```javascript replaceProperty(WebGLRenderingContext.prototype, 'getParameter', { value: 'alice' }) // or replaceProperty(Object.getPrototypeOf(navigator), 'languages', { get: () => ['en-US', 'en'] }) ``` - **See: ** --- #### .[preloadCache()](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L137-L150) Preload a cache of function copies and data. For a determined enough observer it would be possible to overwrite and sniff usage of functions we use in our internal Proxies, to combat that we use a cached copy of those functions. This is evaluated once per execution context (e.g. window) --- #### .[makeNativeString(name?)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L169-L173) - `name` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** Optional function name (optional, default `''`) Utility function to generate a cross-browser `toString` result representing native code. There's small differences: Chromium uses a single line, whereas FF & Webkit uses multiline strings. To future-proof this we use an existing native toString result as the basis. The only advantage we have over the other team is that our JS runs first, hence we cache the result of the native toString result once, so they cannot spoof it afterwards and reveal that we're using it. Note: Whenever we add a `Function.prototype.toString` proxy we should preload the cache before, by executing `utils.preloadCache()` before the proxy is applied (so we don't cause recursive lookups). Example: ```javascript makeNativeString('foobar') // => `function foobar() { [native code] }` ``` --- #### .[patchToString(obj, str)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L189-L218) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object for which to modify the `toString()` representation - `str` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Optional string used as a return value (optional, default `''`) Helper function to modify the `toString()` result of the provided object. Note: Use `utils.redirectToString` instead when possible. There's a quirk in JS Proxies that will cause the `toString()` result to differ from the vanilla Object. If no string is provided we will generate a `[native code]` thing based on the name of the property object. Example: ```javascript patchToString( WebGLRenderingContext.prototype.getParameter, 'function getParameter() { [native code] }' ) ``` --- #### .[patchToStringNested(obj)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L225-L227) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** (optional, default `{}`) Make all nested functions of an object native. --- #### .[redirectToString(proxyObj, originalObj)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L235-L272) - `proxyObj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object that toString will be called on - `originalObj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object which toString result we wan to return Redirect toString requests from one object to another. --- #### .[replaceWithProxy(obj, propName, handler)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L287-L296) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object which has the property to replace - `propName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the property to replace - `handler` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy handler to use All-in-one method to replace a property with a JS Proxy using the provided Proxy handler with traps. Will stealthify these aspects (strip error stack traces, redirect toString, etc). Note: This is meant to modify native Browser APIs and works best with prototype objects. Example: ```javascript replaceWithProxy(WebGLRenderingContext.prototype, 'getParameter', proxyHandler) ``` --- #### .[mockWithProxy(obj, propName, pseudoTarget, handler)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L311-L319) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The object which has the property to replace - `propName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the property to replace or create - `pseudoTarget` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy target to use as a basis - `handler` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy handler to use All-in-one method to mock a non-existing property with a JS Proxy using the provided Proxy handler with traps. Will stealthify these aspects (strip error stack traces, redirect toString, etc). Example: ```javascript mockWithProxy( chrome.runtime, 'sendMessage', function sendMessage() {}, proxyHandler ) ``` --- #### .[createProxy(pseudoTarget, handler)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L334-L340) - `pseudoTarget` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy target to use as a basis - `handler` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy handler to use All-in-one method to create a new JS Proxy with stealth tweaks. This is meant to be used whenever we need a JS Proxy but don't want to replace or mock an existing known property. Will stealthify certain aspects of the Proxy (strip error stack traces, redirect toString, etc). Example: ```javascript createProxy(navigator.mimeTypes.__proto__.namedItem, proxyHandler) // => Proxy ``` --- #### .[splitObjPath(objPath)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L351-L359) - `objPath` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The full path to an object as dot notation string Helper function to split a full path to an Object into the first part and property. Example: ```javascript splitObjPath(`HTMLMediaElement.prototype.canPlayType`) // => {objName: "HTMLMediaElement.prototype", propName: "canPlayType"} ``` --- #### .[replaceObjPathWithProxy(objPath, handler)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L372-L376) - `objPath` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The full path to an object (dot notation string) to replace - `handler` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** The JS Proxy handler to use Convenience method to replace a property with a JS Proxy using the provided objPath. Supports a full path (dot notation) to the object as string here, in case that makes it easier. Example: ```javascript replaceObjPathWithProxy( 'WebGLRenderingContext.prototype.getParameter', proxyHandler ) ``` --- #### .[execRecursively(obj, typeFilter, fn)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L385-L402) - `obj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** (optional, default `{}`) - `typeFilter` **[array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)** e.g. `['function']` (optional, default `[]`) - `fn` **[Function](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Statements/function)** e.g. `utils.patchToString` Traverse nested properties of an object recursively and apply the given function on a whitelist of value types. --- #### .[stringifyFns(fnObj)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L417-L431) - `fnObj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** An object containing functions as properties (optional, default `{hello:()=>'world'}`) Everything we run through e.g. `page.evaluate` runs in the browser context, not the NodeJS one. That means we cannot just use reference variables and functions from outside code, we need to pass everything as a parameter. Unfortunately the data we can pass is only allowed to be of primitive types, regular functions don't survive the built-in serialization process. This utility function will take an object with functions and stringify them, so we can pass them down unharmed as strings. We use this to pass down our utility functions as well as any other functions (to be able to split up code better). - **See: utils.materializeFns** --- #### .[materializeFns(fnStrObj)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/_utils/index.js#L439-L451) - `fnStrObj` **[object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** An object containing stringified functions as properties (optional, default `{hello:"() => 'world'"}`) Utility function to reverse the process of `utils.stringifyFns`. Will materialize an object with stringified functions (supports classic and fat arrow functions). --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/_utils/withUtils.js ================================================ const utils = require('./index') /** * Wrap a page with utilities. * * @param {Puppeteer.Page} page */ module.exports = page => ({ /** * Simple `page.evaluate` replacement to preload utils */ evaluate: async function (mainFunction, ...args) { return page.evaluate( ({ _utilsFns, _mainFunction, _args }) => { // Add this point we cannot use our utililty functions as they're just strings, we need to materialize them first const utils = Object.fromEntries( Object.entries(_utilsFns).map(([key, value]) => [key, eval(value)]) // eslint-disable-line no-eval ) utils.init() return eval(_mainFunction)(utils, ..._args) // eslint-disable-line no-eval }, { _utilsFns: utils.stringifyFns(utils), _mainFunction: mainFunction.toString(), _args: args || [] } ) }, /** * Simple `page.evaluateOnNewDocument` replacement to preload utils */ evaluateOnNewDocument: async function (mainFunction, ...args) { return page.evaluateOnNewDocument( ({ _utilsFns, _mainFunction, _args }) => { // Add this point we cannot use our utililty functions as they're just strings, we need to materialize them first const utils = Object.fromEntries( Object.entries(_utilsFns).map(([key, value]) => [key, eval(value)]) // eslint-disable-line no-eval ) utils.init() return eval(_mainFunction)(utils, ..._args) // eslint-disable-line no-eval }, { _utilsFns: utils.stringifyFns(utils), _mainFunction: mainFunction.toString(), _args: args || [] } ) } }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Mock the `chrome.app` object if not available (e.g. when running headless). */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/chrome.app' } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument(utils => { if (!window.chrome) { // Use the exact property descriptor found in headful Chrome // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` Object.defineProperty(window, 'chrome', { writable: true, enumerable: true, configurable: false, // note! value: {} // We'll extend that later }) } // That means we're running headful and don't need to mock anything if ('app' in window.chrome) { return // Nothing to do here } const makeError = { ErrorInInvocation: fn => { const err = new TypeError(`Error in invocation of app.${fn}()`) return utils.stripErrorWithAnchor( err, `at ${fn} (eval at ` ) } } // There's a some static data in that property which doesn't seem to change, // we should periodically check for updates: `JSON.stringify(window.app, null, 2)` const STATIC_DATA = JSON.parse( ` { "isInstalled": false, "InstallState": { "DISABLED": "disabled", "INSTALLED": "installed", "NOT_INSTALLED": "not_installed" }, "RunningState": { "CANNOT_RUN": "cannot_run", "READY_TO_RUN": "ready_to_run", "RUNNING": "running" } } `.trim() ) window.chrome.app = { ...STATIC_DATA, get isInstalled() { return false }, getDetails: function getDetails() { if (arguments.length) { throw makeError.ErrorInInvocation(`getDetails`) } return null }, getIsInstalled: function getDetails() { if (arguments.length) { throw makeError.ErrorInInvocation(`getIsInstalled`) } return false }, runningState: function getDetails() { if (arguments.length) { throw makeError.ErrorInInvocation(`runningState`) } return 'cannot_run' } } utils.patchToStringNested(window.chrome.app) }) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') /* global chrome */ test('stealth: will add convincing chrome.app object', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin({})) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const catchErr = (fn, ...args) => { try { return fn.apply(this, args) } catch ({ name, message, stack }) { return { name, message, stack } } } return { app: { exists: window.chrome && 'app' in window.chrome, toString: chrome.app.toString(), deepToString: chrome.app.runningState.toString() }, data: { getIsInstalled: chrome.app.getIsInstalled(), runningState: chrome.app.runningState(), getDetails: chrome.app.getDetails(), InstallState: chrome.app.InstallState, RunningState: chrome.app.RunningState }, errors: { getIsInstalled: catchErr(chrome.app.getDetails, 'foo').message, stackOK: !catchErr(chrome.app.getDetails, 'foo').stack.includes( 'at getDetails' ) } } }) t.deepEqual(results, { app: { exists: true, toString: '[object Object]', deepToString: 'function getDetails() { [native code] }' }, data: { InstallState: { DISABLED: 'disabled', INSTALLED: 'installed', NOT_INSTALLED: 'not_installed' }, RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' }, getDetails: null, getIsInstalled: false, runningState: 'cannot_run' }, errors: { getIsInstalled: 'Error in invocation of app.getDetails()', stackOK: true } }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.app/index.js#L11-L97) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Mock the `chrome.app` object if not available (e.g. when running headless). --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Mock the `chrome.csi` function if not available (e.g. when running headless). * It's a deprecated (but unfortunately still existing) chrome specific API to fetch browser timings. * * Internally chromium switched the implementation to use the WebPerformance API, * so we can do the same to create a fully functional mock. :-) * * Note: We're using the deprecated PerformanceTiming API instead of the new Navigation Timing Level 2 API on purpopse. * * @see https://bugs.chromium.org/p/chromium/issues/detail?id=113048 * @see https://codereview.chromium.org/2456293003/ * @see https://developers.google.com/web/updates/2017/12/chrome-loadtimes-deprecated * @see https://developer.mozilla.org/en-US/docs/Web/API/PerformanceTiming * @see https://source.chromium.org/chromium/chromium/src/+/master:chrome/renderer/loadtimes_extension_bindings.cc;l=124?q=loadtimes&ss=chromium * @see `chrome.loadTimes` evasion * */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/chrome.csi' } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument(utils => { if (!window.chrome) { // Use the exact property descriptor found in headful Chrome // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` Object.defineProperty(window, 'chrome', { writable: true, enumerable: true, configurable: false, // note! value: {} // We'll extend that later }) } // That means we're running headful and don't need to mock anything if ('csi' in window.chrome) { return // Nothing to do here } // Check that the Navigation Timing API v1 is available, we need that if (!window.performance || !window.performance.timing) { return } const { timing } = window.performance window.chrome.csi = function() { return { onloadT: timing.domContentLoadedEventEnd, startE: timing.navigationStart, pageT: Date.now() - timing.navigationStart, tran: 15 // Transition type or something } } utils.patchToString(window.chrome.csi) }) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') /* global chrome */ test('stealth: will add functional chrome.csi function mock', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ runOnInsecureOrigins: true // for testing }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const { timing } = window.performance const csi = window.chrome.csi() return { csi: { exists: window.chrome && 'csi' in window.chrome, toString: chrome.csi.toString() }, dataOK: { onloadT: csi.onloadT === timing.domContentLoadedEventEnd, startE: csi.startE === timing.navigationStart, pageT: Number.isInteger(csi.pageT), tran: Number.isInteger(csi.tran) } } }) t.deepEqual(results, { csi: { exists: true, toString: 'function () { [native code] }' }, dataOK: { onloadT: true, pageT: true, startE: true, tran: true } }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.csi/index.js#L25-L70) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Mock the `chrome.csi` function if not available (e.g. when running headless). It's a deprecated (but unfortunately still existing) chrome specific API to fetch browser timings. Internally chromium switched the implementation to use the WebPerformance API, so we can do the same to create a fully functional mock. :-) Note: We're using the deprecated PerformanceTiming API instead of the new Navigation Timing Level 2 API on purpopse. - **See: ** - **See: ** - **See: ** - **See: ** - **See: ** - **See: `chrome.loadTimes` evasion** --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Mock the `chrome.loadTimes` function if not available (e.g. when running headless). * It's a deprecated (but unfortunately still existing) chrome specific API to fetch browser timings and connection info. * * Internally chromium switched the implementation to use the WebPerformance API, * so we can do the same to create a fully functional mock. :-) * * Note: We're using the deprecated PerformanceTiming API instead of the new Navigation Timing Level 2 API on purpopse. * * @see https://developers.google.com/web/updates/2017/12/chrome-loadtimes-deprecated * @see https://developer.mozilla.org/en-US/docs/Web/API/PerformanceTiming * @see https://source.chromium.org/chromium/chromium/src/+/master:chrome/renderer/loadtimes_extension_bindings.cc;l=124?q=loadtimes&ss=chromium * @see `chrome.csi` evasion * */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/chrome.loadTimes' } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument( (utils, { opts }) => { if (!window.chrome) { // Use the exact property descriptor found in headful Chrome // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` Object.defineProperty(window, 'chrome', { writable: true, enumerable: true, configurable: false, // note! value: {} // We'll extend that later }) } // That means we're running headful and don't need to mock anything if ('loadTimes' in window.chrome) { return // Nothing to do here } // Check that the Navigation Timing API v1 + v2 is available, we need that if ( !window.performance || !window.performance.timing || !window.PerformancePaintTiming ) { return } const { performance } = window // Some stuff is not available on about:blank as it requires a navigation to occur, // let's harden the code to not fail then: const ntEntryFallback = { nextHopProtocol: 'h2', type: 'other' } // The API exposes some funky info regarding the connection const protocolInfo = { get connectionInfo() { const ntEntry = performance.getEntriesByType('navigation')[0] || ntEntryFallback return ntEntry.nextHopProtocol }, get npnNegotiatedProtocol() { // NPN is deprecated in favor of ALPN, but this implementation returns the // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. const ntEntry = performance.getEntriesByType('navigation')[0] || ntEntryFallback return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) ? ntEntry.nextHopProtocol : 'unknown' }, get navigationType() { const ntEntry = performance.getEntriesByType('navigation')[0] || ntEntryFallback return ntEntry.type }, get wasAlternateProtocolAvailable() { // The Alternate-Protocol header is deprecated in favor of Alt-Svc // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this // should always return false. return false }, get wasFetchedViaSpdy() { // SPDY is deprecated in favor of HTTP/2, but this implementation returns // true for HTTP/2 or HTTP2+QUIC/39 as well. const ntEntry = performance.getEntriesByType('navigation')[0] || ntEntryFallback return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) }, get wasNpnNegotiated() { // NPN is deprecated in favor of ALPN, but this implementation returns true // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN. const ntEntry = performance.getEntriesByType('navigation')[0] || ntEntryFallback return ['h2', 'hq'].includes(ntEntry.nextHopProtocol) } } const { timing } = window.performance // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3 function toFixed(num, fixed) { var re = new RegExp('^-?\\d+(?:.\\d{0,' + (fixed || -1) + '})?') return num.toString().match(re)[0] } const timingInfo = { get firstPaintAfterLoadTime() { // This was never actually implemented and always returns 0. return 0 }, get requestTime() { return timing.navigationStart / 1000 }, get startLoadTime() { return timing.navigationStart / 1000 }, get commitLoadTime() { return timing.responseStart / 1000 }, get finishDocumentLoadTime() { return timing.domContentLoadedEventEnd / 1000 }, get finishLoadTime() { return timing.loadEventEnd / 1000 }, get firstPaintTime() { const fpEntry = performance.getEntriesByType('paint')[0] || { startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`) } return toFixed( (fpEntry.startTime + performance.timeOrigin) / 1000, 3 ) } } window.chrome.loadTimes = function() { return { ...protocolInfo, ...timingInfo } } utils.patchToString(window.chrome.loadTimes) }, { opts: this.opts } ) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') /* global chrome */ test('stealth: will add functional chrome.loadTimes function mock', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin({})) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const loadTimes = window.chrome.loadTimes() return { loadTimes: { exists: window.chrome && 'loadTimes' in window.chrome, toString: chrome.loadTimes.toString() }, dataOK: { connectionInfo: 'connectionInfo' in loadTimes, npnNegotiatedProtocol: 'npnNegotiatedProtocol' in loadTimes, navigationType: 'navigationType' in loadTimes, wasAlternateProtocolAvailable: 'wasAlternateProtocolAvailable' in loadTimes, wasFetchedViaSpdy: 'wasFetchedViaSpdy' in loadTimes, wasNpnNegotiated: 'wasNpnNegotiated' in loadTimes, firstPaintAfterLoadTime: 'firstPaintAfterLoadTime' in loadTimes, requestTime: 'requestTime' in loadTimes, startLoadTime: 'startLoadTime' in loadTimes, commitLoadTime: 'commitLoadTime' in loadTimes, finishDocumentLoadTime: 'finishDocumentLoadTime' in loadTimes, finishLoadTime: 'finishLoadTime' in loadTimes, firstPaintTime: 'firstPaintTime' in loadTimes } } }) t.deepEqual(results, { loadTimes: { exists: true, toString: 'function () { [native code] }' }, dataOK: { commitLoadTime: true, connectionInfo: true, finishDocumentLoadTime: true, finishLoadTime: true, firstPaintAfterLoadTime: true, firstPaintTime: true, navigationType: true, npnNegotiatedProtocol: true, requestTime: true, startLoadTime: true, wasAlternateProtocolAvailable: true, wasFetchedViaSpdy: true, wasNpnNegotiated: true } }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.loadTimes/index.js#L23-L164) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Mock the `chrome.loadTimes` function if not available (e.g. when running headless). It's a deprecated (but unfortunately still existing) chrome specific API to fetch browser timings and connection info. Internally chromium switched the implementation to use the WebPerformance API, so we can do the same to create a fully functional mock. :-) Note: We're using the deprecated PerformanceTiming API instead of the new Navigation Timing Level 2 API on purpopse. - **See: ** - **See: ** - **See: ** - **See: `chrome.csi` evasion** --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') const STATIC_DATA = require('./staticData.json') /** * Mock the `chrome.runtime` object if not available (e.g. when running headless) and on a secure site. */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/chrome.runtime' } get defaults() { return { runOnInsecureOrigins: false } // Override for testing } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument( (utils, { opts, STATIC_DATA }) => { if (!window.chrome) { // Use the exact property descriptor found in headful Chrome // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')` Object.defineProperty(window, 'chrome', { writable: true, enumerable: true, configurable: false, // note! value: {} // We'll extend that later }) } // That means we're running headful and don't need to mock anything const existsAlready = 'runtime' in window.chrome // `chrome.runtime` is only exposed on secure origins const isNotSecure = !window.location.protocol.startsWith('https') if (existsAlready || (isNotSecure && !opts.runOnInsecureOrigins)) { return // Nothing to do here } window.chrome.runtime = { // There's a bunch of static data in that property which doesn't seem to change, // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)` ...STATIC_DATA, // `chrome.runtime.id` is extension related and returns undefined in Chrome get id() { return undefined }, // These two require more sophisticated mocks connect: null, sendMessage: null } const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({ NoMatchingSignature: new TypeError( preamble + `No matching signature.` ), MustSpecifyExtensionID: new TypeError( preamble + `${method} called from a webpage must specify an Extension ID (string) for its first argument.` ), InvalidExtensionID: new TypeError( preamble + `Invalid extension id: '${extensionId}'` ) }) // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`: // https://source.chromium.org/chromium/chromium/src/+/master:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90 const isValidExtensionID = str => str.length === 32 && str.toLowerCase().match(/^[a-p]+$/) /** Mock `chrome.runtime.sendMessage` */ const sendMessageHandler = { apply: function(target, ctx, args) { const [extensionId, options, responseCallback] = args || [] // Define custom errors const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): ` const Errors = makeCustomRuntimeErrors( errorPreamble, `chrome.runtime.sendMessage()`, extensionId ) // Check if the call signature looks ok const noArguments = args.length === 0 const tooManyArguments = args.length > 4 const incorrectOptions = options && typeof options !== 'object' const incorrectResponseCallback = responseCallback && typeof responseCallback !== 'function' if ( noArguments || tooManyArguments || incorrectOptions || incorrectResponseCallback ) { throw Errors.NoMatchingSignature } // At least 2 arguments are required before we even validate the extension ID if (args.length < 2) { throw Errors.MustSpecifyExtensionID } // Now let's make sure we got a string as extension ID if (typeof extensionId !== 'string') { throw Errors.NoMatchingSignature } if (!isValidExtensionID(extensionId)) { throw Errors.InvalidExtensionID } return undefined // Normal behavior } } utils.mockWithProxy( window.chrome.runtime, 'sendMessage', function sendMessage() {}, sendMessageHandler ) /** * Mock `chrome.runtime.connect` * * @see https://developer.chrome.com/apps/runtime#method-connect */ const connectHandler = { apply: function(target, ctx, args) { const [extensionId, connectInfo] = args || [] // Define custom errors const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): ` const Errors = makeCustomRuntimeErrors( errorPreamble, `chrome.runtime.connect()`, extensionId ) // Behavior differs a bit from sendMessage: const noArguments = args.length === 0 const emptyStringArgument = args.length === 1 && extensionId === '' if (noArguments || emptyStringArgument) { throw Errors.MustSpecifyExtensionID } const tooManyArguments = args.length > 2 const incorrectConnectInfoType = connectInfo && typeof connectInfo !== 'object' if (tooManyArguments || incorrectConnectInfoType) { throw Errors.NoMatchingSignature } const extensionIdIsString = typeof extensionId === 'string' if (extensionIdIsString && extensionId === '') { throw Errors.MustSpecifyExtensionID } if (extensionIdIsString && !isValidExtensionID(extensionId)) { throw Errors.InvalidExtensionID } // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate const validateConnectInfo = ci => { // More than a first param connectInfo as been provided if (args.length > 1) { throw Errors.NoMatchingSignature } // An empty connectInfo has been provided if (Object.keys(ci).length === 0) { throw Errors.MustSpecifyExtensionID } // Loop over all connectInfo props an check them Object.entries(ci).forEach(([k, v]) => { const isExpected = ['name', 'includeTlsChannelId'].includes(k) if (!isExpected) { throw new TypeError( errorPreamble + `Unexpected property: '${k}'.` ) } const MismatchError = (propName, expected, found) => TypeError( errorPreamble + `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.` ) if (k === 'name' && typeof v !== 'string') { throw MismatchError(k, 'string', typeof v) } if (k === 'includeTlsChannelId' && typeof v !== 'boolean') { throw MismatchError(k, 'boolean', typeof v) } }) } if (typeof extensionId === 'object') { validateConnectInfo(extensionId) throw Errors.MustSpecifyExtensionID } // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well return utils.patchToStringNested(makeConnectResponse()) } } utils.mockWithProxy( window.chrome.runtime, 'connect', function connect() {}, connectHandler ) function makeConnectResponse() { const onSomething = () => ({ addListener: function addListener() {}, dispatch: function dispatch() {}, hasListener: function hasListener() {}, hasListeners: function hasListeners() { return false }, removeListener: function removeListener() {} }) const response = { name: '', sender: undefined, disconnect: function disconnect() {}, onDisconnect: onSomething(), onMessage: onSomething(), postMessage: function postMessage() { if (!arguments.length) { throw new TypeError(`Insufficient number of arguments.`) } throw new Error(`Attempting to use a disconnected port object`) } } return response } }, { opts: this.opts, STATIC_DATA } ) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') const STATIC_DATA = require('./staticData.json') /* global chrome */ test('vanilla: is chrome false', async t => { const pageFn = async page => await page.evaluate(() => window.chrome) // eslint-disable-line const { pageFnResult: chrome, hasChrome } = await getVanillaFingerPrint( pageFn ) t.is(hasChrome, false) t.false(chrome instanceof Object) t.is(chrome, undefined) }) test('stealth: is chrome true', async t => { const pageFn = async page => await page.evaluate(() => window.chrome) // eslint-disable-line const { pageFnResult: chrome, hasChrome } = await getStealthFingerPrint( Plugin, pageFn ) t.is(hasChrome, true) t.true(chrome instanceof Object) }) test('stealth: will add convincing chrome.runtime object', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ runOnInsecureOrigins: true // for testing }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() // const results = await page.evaluate(() => { const catchErr = (fn, ...args) => { try { return fn.apply(this, args) } catch (err) { return err.toString() } } return { runtime: { exists: window.chrome && 'runtime' in window.chrome, toString: chrome.runtime.toString() }, staticData: { OnInstalledReason: chrome.runtime.OnInstalledReason, OnRestartRequiredReason: chrome.runtime.OnRestartRequiredReason, PlatformArch: chrome.runtime.PlatformArch, PlatformNaclArch: chrome.runtime.PlatformNaclArch, PlatformOs: chrome.runtime.PlatformOs, RequestUpdateCheckStatus: chrome.runtime.RequestUpdateCheckStatus }, id: { exists: 'id' in chrome.runtime, undefined: chrome.runtime.id === undefined }, sendMessage: { exists: 'sendMessage' in chrome.runtime, name: chrome.runtime.sendMessage.name, toString1: chrome.runtime.sendMessage + '', toString2: chrome.runtime.sendMessage.toString(), validIdWorks: chrome.runtime.sendMessage('nckgahadagoaajjgafhacjanaoiihapd', '') === undefined }, sendMessageErrors: { noArg: catchErr(chrome.runtime.sendMessage), singleArg: catchErr(chrome.runtime.sendMessage, ''), tooManyArg: catchErr( chrome.runtime.sendMessage, '', '', '', '', '', '' ), incorrectArg: catchErr(chrome.runtime.sendMessage, '', '', {}, ''), noValidID: catchErr(chrome.runtime.sendMessage, 'foo', '') } } }) const bla = `TypeError: Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback)` t.deepEqual(results, { runtime: { exists: true, toString: '[object Object]' }, staticData: STATIC_DATA, id: { exists: true, undefined: true }, sendMessage: { exists: true, name: 'sendMessage', toString1: 'function sendMessage() { [native code] }', toString2: 'function sendMessage() { [native code] }', validIdWorks: true }, sendMessageErrors: { noArg: `${bla}: No matching signature.`, singleArg: `${bla}: chrome.runtime.sendMessage() called from a webpage must specify an Extension ID (string) for its first argument.`, tooManyArg: `${bla}: No matching signature.`, incorrectArg: `${bla}: No matching signature.`, noValidID: `${bla}: Invalid extension id: 'foo'` } }) }) test('stealth: will add convincing chrome.runtime.connect', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ runOnInsecureOrigins: true // for testing }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const catchErr = (fn, ...args) => { try { return fn.apply(this, args) } catch (err) { return err.toString() } } return { connect: { exists: 'connect' in chrome.runtime, name: chrome.runtime.connect.name, toString1: chrome.runtime.connect + '', toString2: chrome.runtime.connect.toString(), validIdWorks: chrome.runtime.connect('nckgahadagoaajjgafhacjanaoiihapd') !== undefined }, connectErrors: { noArg: catchErr(chrome.runtime.connect), singleArg: catchErr(chrome.runtime.connect, ''), tooManyArg: catchErr(chrome.runtime.connect, '', '', '', '', '', ''), incorrectArg: catchErr(chrome.runtime.connect, '', '', {}, ''), noValidID: catchErr(chrome.runtime.connect, 'foo', ''), connectInfoFirst: { emptyObject: catchErr(chrome.runtime.connect, {}), tooManyArg: catchErr(chrome.runtime.connect, {}, {}), unexpectedProp: catchErr(chrome.runtime.connect, { wtf: true }), invalidName: catchErr(chrome.runtime.connect, { name: 666 }), invalidTLS: catchErr(chrome.runtime.connect, { includeTlsChannelId: 777 }), invalidBoth: catchErr(chrome.runtime.connect, { name: 666, includeTlsChannelId: 777 }), validName: catchErr(chrome.runtime.connect, { name: 'foo' }), missingExtensionId: catchErr(chrome.runtime.connect, { name: 'bob', includeTlsChannelId: false }) } } } }) const bla = `TypeError: Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo)` t.deepEqual(results, { connect: { exists: true, name: 'connect', toString1: 'function connect() { [native code] }', toString2: 'function connect() { [native code] }', validIdWorks: true }, connectErrors: { noArg: `${bla}: chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.`, singleArg: `${bla}: chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.`, tooManyArg: `${bla}: No matching signature.`, incorrectArg: `${bla}: No matching signature.`, noValidID: `${bla}: Invalid extension id: 'foo'`, connectInfoFirst: { emptyObject: `${bla}: chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.`, tooManyArg: `${bla}: No matching signature.`, unexpectedProp: `${bla}: Unexpected property: 'wtf'.`, invalidName: `${bla}: Error at property 'name': Invalid type: expected string, found number.`, invalidTLS: `${bla}: Error at property 'includeTlsChannelId': Invalid type: expected boolean, found number.`, invalidBoth: `${bla}: Error at property 'name': Invalid type: expected string, found number.`, validName: `${bla}: chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.`, missingExtensionId: `${bla}: chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.` } } }) }) test('stealth: will add convincing chrome.runtime.connect response', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ runOnInsecureOrigins: true // for testing }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const connectResponse = chrome.runtime.connect( 'nckgahadagoaajjgafhacjanaoiihapd' ) return { connectResponse: { exists: !!connectResponse, toString1: connectResponse + '', toString2: connectResponse.toString(), nestedToString: connectResponse.onDisconnect.addListener + '' }, disconnect: { toString: connectResponse.disconnect + '', noReturn: connectResponse.disconnect() === undefined } } }) t.deepEqual(results, { connectResponse: { exists: true, toString1: '[object Object]', toString2: '[object Object]', nestedToString: `function addListener() { [native code] }` }, disconnect: { toString: `function disconnect() { [native code] }`, noReturn: true } }) }) // FIXME: This changed in more recent chrome versions // test('stealth: error stack is fine', async t => { // const puppeteer = addExtra(vanillaPuppeteer).use( // Plugin({ // runOnInsecureOrigins: true // for testing // }) // ) // const browser = await puppeteer.launch({ headless: true }) // const page = await browser.newPage() // const result = await page.evaluate(() => { // const catchErr = (fn, ...args) => { // try { // return fn.apply(this, args) // } catch ({ name, message, stack }) { // return { // name, // message, // stack // } // } // } // return catchErr(chrome.runtime.connect, '').stack // }) // /** // * OK: // TypeError: Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): chrome.runtime.connect() called from a webpage must specify an Extension ID (string) for its first argument.␊ // - at catchErr (__puppeteer_evaluation_script__:4:19)␊ // - at __puppeteer_evaluation_script__:18:12 // */ // t.is(result.split('\n').length, 3) // }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) - [sendMessageHandler()](#sendmessagehandler) - [connectHandler()](#connecthandler) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.js#L13-L251) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Mock the `chrome.runtime` object if not available (e.g. when running headless) and on a secure site. --- ### [sendMessageHandler()](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.js#L80-L123) Mock `chrome.runtime.sendMessage` --- ### [connectHandler()](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/index.js#L136-L210) Mock `chrome.runtime.connect` - **See: ** --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/chrome.runtime/staticData.json ================================================ { "OnInstalledReason": { "CHROME_UPDATE": "chrome_update", "INSTALL": "install", "SHARED_MODULE_UPDATE": "shared_module_update", "UPDATE": "update" }, "OnRestartRequiredReason": { "APP_UPDATE": "app_update", "OS_UPDATE": "os_update", "PERIODIC": "periodic" }, "PlatformArch": { "ARM": "arm", "ARM64": "arm64", "MIPS": "mips", "MIPS64": "mips64", "X86_32": "x86-32", "X86_64": "x86-64" }, "PlatformNaclArch": { "ARM": "arm", "MIPS": "mips", "MIPS64": "mips64", "X86_32": "x86-32", "X86_64": "x86-64" }, "PlatformOs": { "ANDROID": "android", "CROS": "cros", "LINUX": "linux", "MAC": "mac", "OPENBSD": "openbsd", "WIN": "win" }, "RequestUpdateCheckStatus": { "NO_UPDATE": "no_update", "THROTTLED": "throttled", "UPDATE_AVAILABLE": "update_available" } } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/defaultArgs/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const argsToIgnore = [ '--disable-extensions', '--disable-default-apps', '--disable-component-extensions-with-background-pages' ] /** * A CDP driver like puppeteer can make use of various browser launch arguments that are * adversarial to mimicking a regular browser and need to be stripped when launching the browser. */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/defaultArgs' } get requirements() { return new Set(['runLast']) // So other plugins can modify launch options before } async beforeLaunch(options = {}) { options.ignoreDefaultArgs = options.ignoreDefaultArgs || [] if (options.ignoreDefaultArgs === true) { // that means the user explicitly wants to disable all default arguments return } argsToIgnore.forEach(arg => { if (options.ignoreDefaultArgs.includes(arg)) { return } options.ignoreDefaultArgs.push(arg) }) } } module.exports = function (pluginConfig) { return new Plugin(pluginConfig) } module.exports.argsToIgnore = argsToIgnore ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/defaultArgs/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') const { argsToIgnore } = require('.') test('vanilla: uses args to ignore', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const client = typeof page._client === 'function' ? page._client() : page._client const { arguments: launchArgs } = await client.send( 'Browser.getBrowserCommandLine' ) const ok = argsToIgnore.every(arg => launchArgs.includes(arg)) if (!ok) { console.log({ argsToIgnore, launchArgs }) } t.is(ok, true) }) test('stealth: does not use args to ignore', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const client = typeof page._client === 'function' ? page._client() : page._client const { arguments: launchArgs } = await client.send( 'Browser.getBrowserCommandLine' ) const ok = argsToIgnore.every(arg => !launchArgs.includes(arg)) if (!ok) { console.log({ argsToIgnore, launchArgs }) } t.is(ok, true) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/defaultArgs/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/defaultArgs/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/358246d5cc56bbb8800624128503482b8d7b426a/packages/puppeteer-extra-plugin-stealth/evasions/defaultArgs/index.js#L15-L41) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** A CDP driver like puppeteer can make use of various browser launch arguments that are adversarial to mimicking a regular browser and need to be stripped when launching the browser. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Fix for the HEADCHR_IFRAME detection (iframe.contentWindow.chrome), hopefully this time without breaking iframes. * Note: Only `srcdoc` powered iframes cause issues due to a chromium bug: * * https://github.com/puppeteer/puppeteer/issues/1106 */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/iframe.contentWindow' } get requirements() { // Make sure `chrome.runtime` has ran, we use data defined by it (e.g. `window.chrome`) return new Set(['runLast']) } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument((utils, opts) => { try { // Adds a contentWindow proxy to the provided iframe element const addContentWindowProxy = iframe => { const contentWindowProxy = { get(target, key) { // Now to the interesting part: // We actually make this thing behave like a regular iframe window, // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :) // That makes it possible for these assertions to be correct: // iframe.contentWindow.self === window.top // must be false if (key === 'self') { return this } // iframe.contentWindow.frameElement === iframe // must be true if (key === 'frameElement') { return iframe } // Intercept iframe.contentWindow[0] to hide the property 0 added by the proxy. if (key === '0') { return undefined } return Reflect.get(target, key) } } if (!iframe.contentWindow) { const proxy = new Proxy(window, contentWindowProxy) Object.defineProperty(iframe, 'contentWindow', { get() { return proxy }, set(newValue) { return newValue // contentWindow is immutable }, enumerable: true, configurable: false }) } } // Handles iframe element creation, augments `srcdoc` property so we can intercept further const handleIframeCreation = (target, thisArg, args) => { const iframe = target.apply(thisArg, args) // We need to keep the originals around const _iframe = iframe const _srcdoc = _iframe.srcdoc // Add hook for the srcdoc property // We need to be very surgical here to not break other iframes by accident Object.defineProperty(iframe, 'srcdoc', { configurable: true, // Important, so we can reset this later get: function() { return _srcdoc }, set: function(newValue) { addContentWindowProxy(this) // Reset property, the hook is only needed once Object.defineProperty(iframe, 'srcdoc', { configurable: false, writable: false, value: _srcdoc }) _iframe.srcdoc = newValue } }) return iframe } // Adds a hook to intercept iframe creation events const addIframeCreationSniffer = () => { /* global document */ const createElementHandler = { // Make toString() native get(target, key) { return Reflect.get(target, key) }, apply: function(target, thisArg, args) { const isIframe = args && args.length && `${args[0]}`.toLowerCase() === 'iframe' if (!isIframe) { // Everything as usual return target.apply(thisArg, args) } else { return handleIframeCreation(target, thisArg, args) } } } // All this just due to iframes with srcdoc bug utils.replaceWithProxy( document, 'createElement', createElementHandler ) } // Let's go addIframeCreationSniffer() } catch (err) { // console.warn(err) } }) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint, dummyHTMLPath, vanillaPuppeteer, addExtra } = require('../../test/util') // const Plugin = require('.') // NOTE: We're using the full plugin for testing here as `iframe.contentWindow` uses data set by `chrome.runtime` const Plugin = require('puppeteer-extra-plugin-stealth') // Fix CI issues with old versions const isOldPuppeteerVersion = () => { const version = process.env.PUPPETEER_VERSION const isOld = version && (version === '1.9.0' || version === '1.6.2') return isOld } test('vanilla: will be undefined', async t => { const { iframeChrome } = await getVanillaFingerPrint() t.is(iframeChrome, 'undefined') }) test('stealth: will be object', async t => { const { iframeChrome } = await getStealthFingerPrint(Plugin) t.is(iframeChrome, 'object') }) test('stealth: will not break iframes', async t => { const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() const testFuncReturnValue = 'TESTSTRING' await page.evaluate(returnValue => { const { document } = window // eslint-disable-line const body = document.querySelector('body') const iframe = document.createElement('iframe') body.srcdoc = 'foobar' body.appendChild(iframe) iframe.contentWindow.mySuperFunction = () => returnValue }, testFuncReturnValue) const realReturn = await page.evaluate( () => document.querySelector('iframe').contentWindow.mySuperFunction() // eslint-disable-line ) await browser.close() t.is(realReturn, 'TESTSTRING') }) test('vanilla: will not have contentWindow[0]', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const zero = await page.evaluate(returnValue => { const { document } = window // eslint-disable-line const body = document.querySelector('body') const iframe = document.createElement('iframe') iframe.srcdoc = 'foobar' body.appendChild(iframe) return typeof iframe.contentWindow[0] }) await browser.close() t.is(zero, 'undefined') }) test('stealth: will not have contentWindow[0]', async t => { const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() const zero = await page.evaluate(returnValue => { const { document } = window // eslint-disable-line const body = document.querySelector('body') const iframe = document.createElement('iframe') iframe.srcdoc = 'foobar' body.appendChild(iframe) return typeof iframe.contentWindow[0] }) await browser.close() t.is(zero, 'undefined') }) test('vanilla: will not have chrome runtine in any frame', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + dummyHTMLPath) const basiciframe = await page.evaluate(() => { const el = document.createElement('iframe') document.body.appendChild(el) return el.contentWindow.chrome }) const sandboxSOiframe = await page.evaluate(() => { const el = document.createElement('iframe') el.setAttribute('sandbox', 'allow-same-origin') document.body.appendChild(el) return el.contentWindow.chrome }) const sandboxSOASiframe = await page.evaluate(() => { const el = document.createElement('iframe') el.setAttribute('sandbox', 'allow-same-origin allow-scripts') document.body.appendChild(el) return el.contentWindow.chrome }) const srcdociframe = await page.evaluate(() => { const el = document.createElement('iframe') el.srcdoc = 'blank page, boys.' document.body.appendChild(el) return el.contentWindow.chrome }) // console.log('basic iframe', basiciframe) // console.log('sandbox same-origin iframe', sandboxSOiframe) // console.log('sandbox same-origin&scripts iframe', sandboxSOASiframe) // console.log('srcdoc iframe', srcdociframe) await browser.close() t.is(typeof basiciframe, 'undefined') t.is(typeof sandboxSOiframe, 'undefined') t.is(typeof sandboxSOASiframe, 'undefined') t.is(typeof srcdociframe, 'undefined') }) test('stealth: it will cover all frames including srcdoc', async t => { // const browser = await vanillaPuppeteer.launch({ headless: false }) const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + dummyHTMLPath) const basiciframe = await page.evaluate(() => { const el = document.createElement('iframe') document.body.appendChild(el) return el.contentWindow.chrome }) const sandboxSOiframe = await page.evaluate(() => { const el = document.createElement('iframe') el.setAttribute('sandbox', 'allow-same-origin') document.body.appendChild(el) return el.contentWindow.chrome }) const sandboxSOASiframe = await page.evaluate(() => { const el = document.createElement('iframe') el.setAttribute('sandbox', 'allow-same-origin allow-scripts') document.body.appendChild(el) return el.contentWindow.chrome }) const srcdociframe = await page.evaluate(() => { const el = document.createElement('iframe') el.srcdoc = 'blank page, boys.' document.body.appendChild(el) return el.contentWindow.chrome }) // console.log('basic iframe', basiciframe) // console.log('sandbox same-origin iframe', sandboxSOiframe) // console.log('sandbox same-origin&scripts iframe', sandboxSOASiframe) // console.log('srcdoc iframe', srcdociframe) await browser.close() if (isOldPuppeteerVersion()) { t.is(typeof basiciframe, 'object') } else { t.is(typeof basiciframe, 'object') t.is(typeof sandboxSOiframe, 'object') t.is(typeof sandboxSOASiframe, 'object') t.is(typeof srcdociframe, 'object') } }) test('vanilla: will allow to define property contentWindow', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const iframe = await page.evaluate(() => { const { document } = window // eslint-disable-line const iframe = document.createElement('iframe') iframe.srcdoc = 'foobar' return Object.defineProperty(iframe, 'contentWindow', { value: 'baz' }) }) await browser.close() t.is(typeof iframe, 'object') }) // test('stealth: will allow to define property contentWindow', async t => { // const browser = await addExtra(vanillaPuppeteer) // .use(Plugin()) // .launch({ headless: true }) // const page = await browser.newPage() // const iframe = await page.evaluate(() => { // const { document } = window // eslint-disable-line // const iframe = document.createElement('iframe') // iframe.srcdoc = 'foobar' // return Object.defineProperty(iframe, 'contentWindow', { value: 'baz' }) // }) // await browser.close() // t.is(typeof iframe, 'object') // }) test('vanilla: will return undefined for getOwnPropertyDescriptor of contentWindow', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const iframe = await page.evaluate(() => { const { document } = window // eslint-disable-line const iframe = document.createElement('iframe') iframe.srcdoc = 'foobar' return Object.getOwnPropertyDescriptor(iframe, 'contentWindow') }) await browser.close() t.is(iframe, undefined) }) // test('stealth: will return undefined for getOwnPropertyDescriptor of contentWindow', async t => { // const browser = await addExtra(vanillaPuppeteer) // .use(Plugin()) // .launch({ headless: true }) // const page = await browser.newPage() // const iframe = await page.evaluate(() => { // const { document } = window // eslint-disable-line // const iframe = document.createElement('iframe') // iframe.srcdoc = 'foobar' // return Object.getOwnPropertyDescriptor(iframe, 'contentWindow') // }) // await browser.close() // t.is(iframe, undefined) // }) /* global HTMLIFrameElement */ test('stealth: it will emulate advanved contentWindow features correctly', async t => { // const browser = await vanillaPuppeteer.launch({ headless: false }) const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + dummyHTMLPath) // page.on('console', msg => { // console.log('Page console: ', msg.text()) // }) const results = await page.evaluate(() => { const results = {} const iframe = document.createElement('iframe') iframe.srcdoc = 'page intentionally left blank' // Note: srcdoc document.body.appendChild(iframe) const basicIframe = document.createElement('iframe') basicIframe.src = 'data:text/plain;charset=utf-8,foobar' document.body.appendChild(iframe) results.descriptors = (() => { // Verify iframe prototype isn't touched const descriptors = Object.getOwnPropertyDescriptors( HTMLIFrameElement.prototype ) return descriptors.contentWindow.get.toString() })() results.noProxySignature = (() => { return iframe.srcdoc.toString.hasOwnProperty('[[IsRevoked]]') // eslint-disable-line })() results.doesExist = (() => { // Verify iframe isn't remapped to main window return !!iframe.contentWindow })() results.isNotAClone = (() => { // Verify iframe isn't remapped to main window return iframe.contentWindow !== window })() results.hasPlugins = (() => { return iframe.contentWindow.navigator.plugins.length > 0 })() results.hasSameNumberOfPlugins = (() => { return ( window.navigator.plugins.length === iframe.contentWindow.navigator.plugins.length ) })() results.SelfIsNotWindow = (() => { return iframe.contentWindow.self !== window })() results.SelfIsNotWindowTop = (() => { return iframe.contentWindow.self !== window.top })() results.TopIsNotSame = (() => { return iframe.contentWindow.top !== iframe.contentWindow })() results.FrameElementMatches = (() => { return iframe.contentWindow.frameElement === iframe })() results.StackTraces = (() => { try { // eslint-disable-next-line document['createElement'](0) } catch (e) { return e.stack } return false })() return results }) await browser.close() if (isOldPuppeteerVersion()) { t.true(true) return } t.is(results.descriptors, 'function get contentWindow() { [native code] }') t.true(results.doesExist) t.true(results.isNotAClone) t.true(results.hasPlugins) t.true(results.hasSameNumberOfPlugins) t.true(results.SelfIsNotWindow) t.true(results.SelfIsNotWindowTop) t.true(results.TopIsNotSame) t.false(results.StackTraces.includes(`at Object.apply`)) }) test('regression: new method will not break hcaptcha', async t => { const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() page.waitForTimeout = page.waitForTimeout || page.waitFor await page.goto('https://democaptcha.com/demo-form-eng/hcaptcha.html', { waitUntil: 'networkidle2' }) await page.evaluate(() => { window.hcaptcha.execute() }) await page.waitForTimeout(2 * 1000) const { hasChallengePopup } = await page.evaluate(() => { const hasChallengePopup = !!document.querySelectorAll( `div[style*='visible'] iframe[title*='hCaptcha challenge']` ).length return { hasChallengePopup } }) await browser.close() t.true(hasChallengePopup) }) test('regression: new method will not break recaptcha popup', async t => { // const browser = await vanillaPuppeteer.launch({ headless: false }) const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() page.waitForTimeout = page.waitForTimeout || page.waitFor await page.goto('https://www.fbdemo.com/invisible-captcha/index.html', { waitUntil: 'networkidle2' }) await page.type('#tswname', 'foo') await page.type('#tswemail', 'foo@foo.foo') await page.type( '#tswcomments', 'In the depth of winter, I finally learned that within me there lay an invincible summer.' ) await page.click('#tswsubmit') await page.waitForTimeout(1000) const { hasRecaptchaPopup } = await page.evaluate(() => { const hasRecaptchaPopup = !!document.querySelectorAll( `iframe[title*="recaptcha challenge"]` ).length return { hasRecaptchaPopup } }) await browser.close() t.true(hasRecaptchaPopup) }) test('regression: old method indeed did break recaptcha popup', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() page.waitForTimeout = page.waitForTimeout || page.waitFor // Old method await page.evaluateOnNewDocument(() => { // eslint-disable-next-line Object.defineProperty(HTMLIFrameElement.prototype, 'contentWindow', { get: function() { return window } }) }) await page.goto('https://www.fbdemo.com/invisible-captcha/index.html', { waitUntil: 'networkidle2' }) await page.type('#tswname', 'foo') await page.type('#tswemail', 'foo@foo.foo') await page.type( '#tswcomments', 'In the depth of winter, I finally learned that within me there lay an invincible summer.' ) await page.click('#tswsubmit') await page.waitForTimeout(1000) const { hasRecaptchaPopup } = await page.evaluate(() => { const hasRecaptchaPopup = !!document.querySelectorAll( `iframe[title*="recaptcha challenge"]` ).length return { hasRecaptchaPopup } }) await browser.close() t.false(hasRecaptchaPopup) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/iframe.contentWindow/index.js#L11-L125) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Fix for the HEADCHR_IFRAME detection (iframe.contentWindow.chrome), hopefully this time without breaking iframes. Note: Only `srcdoc` powered iframes cause issues due to a chromium bug: --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Fix Chromium not reporting "probably" to codecs like `videoEl.canPlayType('video/mp4; codecs="avc1.42E01E"')`. * (Chromium doesn't support proprietary codecs, only Chrome does) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/media.codecs' } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument(utils => { /** * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing. * * @example * video/webm; codecs="vp8, vorbis" * video/mp4; codecs="avc1.42E01E" * audio/x-m4a; * audio/ogg; codecs="vorbis" * @param {String} arg */ const parseInput = arg => { const [mime, codecStr] = arg.trim().split(';') let codecs = [] if (codecStr && codecStr.includes('codecs="')) { codecs = codecStr .trim() .replace(`codecs="`, '') .replace(`"`, '') .trim() .split(',') .filter(x => !!x) .map(x => x.trim()) } return { mime, codecStr, codecs } } const canPlayType = { // Intercept certain requests apply: function(target, ctx, args) { if (!args || !args.length) { return target.apply(ctx, args) } const { mime, codecs } = parseInput(args[0]) // This specific mp4 codec is missing in Chromium if (mime === 'video/mp4') { if (codecs.includes('avc1.42E01E')) { return 'probably' } } // This mimetype is only supported if no codecs are specified if (mime === 'audio/x-m4a' && !codecs.length) { return 'maybe' } // This mimetype is only supported if no codecs are specified if (mime === 'audio/aac' && !codecs.length) { return 'probably' } // Everything else as usual return target.apply(ctx, args) } } /* global HTMLMediaElement */ utils.replaceWithProxy( HTMLMediaElement.prototype, 'canPlayType', canPlayType ) }) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('vanilla: doesnt support proprietary codecs', async t => { const { videoCodecs, audioCodecs } = await getVanillaFingerPrint() t.deepEqual(videoCodecs, { ogg: 'probably', h264: '', webm: 'probably' }) t.deepEqual(audioCodecs, { ogg: 'probably', mp3: 'probably', wav: 'probably', m4a: '', aac: '' }) }) test('vanilla: will not have modifications', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() // https://datadome.co/bot-detection/client-side-detection-is-essential-for-bot-protection/ const test1 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.toString() }) t.is(test1, 'function canPlayType() { [native code] }') const test2 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.name }) t.is(test2, 'canPlayType') }) test('stealth: supports proprietary codecs', async t => { const { videoCodecs, audioCodecs } = await getStealthFingerPrint(Plugin) t.deepEqual(videoCodecs, { ogg: 'probably', h264: 'probably', webm: 'probably' }) t.deepEqual(audioCodecs, { ogg: 'probably', mp3: 'probably', wav: 'probably', m4a: 'maybe', aac: 'probably' }) }) test('stealth: will not leak modifications', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() // https://datadome.co/bot-detection/client-side-detection-is-essential-for-bot-protection/ const test1 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.toString() }) t.is(test1, 'function canPlayType() { [native code] }') const test2 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.name }) t.is(test2, 'canPlayType') // Double check the plugin is active and spoofing e.g. the aac codec results const isWorkingTest = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType('audio/aac') === 'probably' // empty in Chromium without stealth plugin }) t.true(isWorkingTest) }) test('vanilla: normal toString stuff', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.toString + '' }) t.is(test1, 'function toString() { [native code] }') }) test('stealth: will not leak toString stuff', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => { const audioElt = document.createElement('audio') return audioElt.canPlayType.toString + '' }) t.is(test1, 'function toString() { [native code] }') // returns function () { [native code] } }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) - [parseInput(arg)](#parseinputarg) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/index.js#L12-L88) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Fix Chromium not reporting "probably" to codecs like `videoEl.canPlayType('video/mp4; codecs="avc1.42E01E"')`. (Chromium doesn't support proprietary codecs, only Chrome does) --- ### [parseInput(arg)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/media.codecs/index.js#L33-L51) - `arg` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing. Example: ```javascript video / webm codecs = 'vp8, vorbis' video / mp4 codecs = 'avc1.42E01E' audio / x - m4a audio / ogg codecs = 'vorbis' ``` --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Set the hardwareConcurrency to 4 (optionally configurable with `hardwareConcurrency`) * * @see https://arh.antoinevastel.com/reports/stats/osName_hardwareConcurrency_report.html * * @param {Object} [opts] - Options * @param {number} [opts.hardwareConcurrency] - The value to use in `navigator.hardwareConcurrency` (default: `4`) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.hardwareConcurrency' } get defaults() { return { hardwareConcurrency: 4 } } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument( (utils, { opts }) => { utils.replaceGetterWithProxy( Object.getPrototypeOf(navigator), 'hardwareConcurrency', utils.makeHandler().getterValue(opts.hardwareConcurrency) ) }, { opts: this.opts } ) } } module.exports = function (pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/index.test.js ================================================ const test = require('ava') const os = require('os') const { vanillaPuppeteer, addExtra } = require('../../test/util') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const Plugin = require('.') const fingerprintFn = page => page.evaluate('navigator.hardwareConcurrency') test('vanilla: matches real core count', async t => { const { pageFnResult } = await getVanillaFingerPrint(fingerprintFn) t.is(pageFnResult, os.cpus().length) }) test('stealth: default is set to 4', async t => { const { pageFnResult } = await getStealthFingerPrint(Plugin, fingerprintFn) t.is(pageFnResult, 4) }) test('stealth: will override value correctly', async t => { const { pageFnResult } = await getStealthFingerPrint(Plugin, fingerprintFn, { hardwareConcurrency: 8 }) t.is(pageFnResult, 8) }) test('stealth: does patch getters properly', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const hasInvocationError = (() => { try { // eslint-disable-next-line dot-notation Object['seal'](Object.getPrototypeOf(navigator)['hardwareConcurrency']) return false } catch (err) { return true } })() return { hasInvocationError, toString: Object.getOwnPropertyDescriptor( Object.getPrototypeOf(navigator), 'hardwareConcurrency' ).get.toString() } }) t.deepEqual(results, { hasInvocationError: true, toString: 'function get hardwareConcurrency() { [native code] }' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/9534845cc95088e65c2d53bfb029263976fc9add/packages/puppeteer-extra-plugin-stealth/evasions/navigator.hardwareConcurrency/index.js#L16-L37) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.hardwareConcurrency` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)?** The value to use in `navigator.hardwareConcurrency` (default: `4`) **Extends: PuppeteerExtraPlugin** Set the hardwareConcurrency to 4 (optionally configurable with `hardwareConcurrency`) - **See: ** --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.languages/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Pass the Languages Test. Allows setting custom languages. * * @param {Object} [opts] - Options * @param {Array} [opts.languages] - The languages to use (default: `['en-US', 'en']`) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.languages' } get defaults() { return { languages: [] // Empty default, otherwise this would be merged with user defined array override } } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument( (utils, { opts }) => { const languages = opts.languages.length ? opts.languages : ['en-US', 'en'] utils.replaceGetterWithProxy( Object.getPrototypeOf(navigator), 'languages', utils.makeHandler().getterValue(Object.freeze([...languages])) ) }, { opts: this.opts } ) } } module.exports = function (pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.languages/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') // TODO: Vanilla seems fine, evasion obsolete? // Note: We keep it around for now, as we will need this method in a fingerprinting plugin later anyway test('vanilla: is array with en-US', async t => { const { languages } = await getVanillaFingerPrint() t.is(Array.isArray(languages), true) t.is(languages[0], 'en-US') }) test('vanilla: will not have modifications', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate( () => Object.getOwnPropertyDescriptor(navigator, 'languages') // Must be undefined if native ) t.is(test1, undefined) const test2 = await page.evaluate( () => Object.getOwnPropertyNames(navigator) // Must be an empty array if native ) t.false(test2.includes('languages')) }) test('stealth: is array with en-US', async t => { const { languages } = await getStealthFingerPrint(Plugin) t.is(Array.isArray(languages), true) t.is(languages[0], 'en-US') }) test('stealth: customized value', async t => { const { languages } = await getStealthFingerPrint(Plugin, null, { languages: ['foo', 'bar'] }) t.deepEqual(languages, ['foo', 'bar']) }) test('stealth: will not leak modifications', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate( () => Object.getOwnPropertyDescriptor(navigator, 'languages') // Must be undefined if native ) t.is(test1, undefined) const test2 = await page.evaluate( () => Object.getOwnPropertyNames(navigator) // Must be an empty array if native ) t.false(test2.includes('languages')) }) test('stealth: does patch getters properly', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const hasInvocationError = (() => { try { // eslint-disable-next-line dot-notation Object['seal'](Object.getPrototypeOf(navigator)['languages']) return false } catch (err) { return true } })() const hasPushError = (() => { try { // eslint-disable-next-line dot-notation navigator.languages.push(null) return false } catch (err) { return true } })() return { hasInvocationError, hasPushError, toString: Object.getOwnPropertyDescriptor( Object.getPrototypeOf(navigator), 'languages' ).get.toString() } }) t.deepEqual(results, { hasInvocationError: true, hasPushError: true, toString: 'function get languages() { [native code] }' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.languages/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.languages/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/navigator.languages/index.js#L11-L28) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.languages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>?** The languages to use (default: `['en-US', 'en']`) **Extends: PuppeteerExtraPlugin** Pass the Languages Test. Allows setting custom languages. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Fix `Notification.permission` behaving weirdly in headless mode * * @see https://bugs.chromium.org/p/chromium/issues/detail?id=1052332 */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.permissions' } /* global Notification Permissions PermissionStatus */ async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument((utils, opts) => { const isSecure = document.location.protocol.startsWith('https') // In headful on secure origins the permission should be "default", not "denied" if (isSecure) { utils.replaceGetterWithProxy(Notification, 'permission', { apply() { return 'default' } }) } // Another weird behavior: // On insecure origins in headful the state is "denied", // whereas in headless it's "prompt" if (!isSecure) { const handler = { apply(target, ctx, args) { const param = (args || [])[0] const isNotifications = param && param.name && param.name === 'notifications' if (!isNotifications) { return utils.cache.Reflect.apply(...arguments) } return Promise.resolve( Object.setPrototypeOf( { state: 'denied', onchange: null }, PermissionStatus.prototype ) ) } } // Note: Don't use `Object.getPrototypeOf` here utils.replaceWithProxy(Permissions.prototype, 'query', handler) } }, this.opts) } } module.exports = function (pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/index.test.js ================================================ /* global Notification */ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('vanilla: is prompt', async t => { const { permissions } = await getVanillaFingerPrint() t.deepEqual(permissions, { permission: 'denied', state: 'prompt' // this is WRONG behavior, it's "denied" in headful! }) }) test('stealth: is denied', async t => { const { permissions } = await getStealthFingerPrint(Plugin) t.deepEqual(permissions, { permission: 'denied', state: 'denied' // this is FIXED behavior, it's "denied" in headful! }) }) async function getNotificationPermission() { const { state, onchange } = await navigator.permissions.query({ name: 'notifications' }) return { state, onchange, permission: Notification.permission } } test('vanilla headful: as expected', async t => { const puppeteer = addExtra(vanillaPuppeteer) const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() const result = await page.evaluate(getNotificationPermission) t.deepEqual(result, { state: 'denied', onchange: null, permission: 'denied' }) await page.goto('https://example.com', { waitUntil: 'domcontentloaded' }) const result2 = await page.evaluate(getNotificationPermission) t.deepEqual(result2, { state: 'prompt', onchange: null, permission: 'default' }) }) test('vanilla headless: as expected', async t => { const puppeteer = addExtra(vanillaPuppeteer) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const result = await page.evaluate(getNotificationPermission) t.deepEqual(result, { state: 'prompt', // should be denied onchange: null, permission: 'denied' }) await page.goto('https://example.com', { waitUntil: 'domcontentloaded' }) const result2 = await page.evaluate(getNotificationPermission) t.deepEqual(result2, { state: 'prompt', onchange: null, permission: 'denied' // should be default }) }) test('stealth headless: as vanilla headful', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const result = await page.evaluate(getNotificationPermission) t.deepEqual(result, { state: 'denied', onchange: null, permission: 'denied' }) await page.goto('https://example.com', { waitUntil: 'domcontentloaded' }) const result2 = await page.evaluate(getNotificationPermission) t.deepEqual(result2, { state: 'prompt', onchange: null, permission: 'default' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/navigator.permissions/index.js#L12-L45) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Pass the Permissions Test. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/data.json ================================================ { "mimeTypes": [ { "type": "application/pdf", "suffixes": "pdf", "description": "", "__pluginName": "Chrome PDF Viewer" }, { "type": "application/x-google-chrome-pdf", "suffixes": "pdf", "description": "Portable Document Format", "__pluginName": "Chrome PDF Plugin" }, { "type": "application/x-nacl", "suffixes": "", "description": "Native Client Executable", "__pluginName": "Native Client" }, { "type": "application/x-pnacl", "suffixes": "", "description": "Portable Native Client Executable", "__pluginName": "Native Client" } ], "plugins": [ { "name": "Chrome PDF Plugin", "filename": "internal-pdf-viewer", "description": "Portable Document Format", "__mimeTypes": ["application/x-google-chrome-pdf"] }, { "name": "Chrome PDF Viewer", "filename": "mhjfbmdgcfjbbpaeojofohoefgiehjai", "description": "", "__mimeTypes": ["application/pdf"] }, { "name": "Native Client", "filename": "internal-nacl-plugin", "description": "", "__mimeTypes": ["application/x-nacl", "application/x-pnacl"] } ] } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/functionMocks.js ================================================ /** * `navigator.{plugins,mimeTypes}` share similar custom functions to look up properties * * Note: This is meant to be run in the context of the page. */ module.exports.generateFunctionMocks = utils => ( proto, itemMainProp, dataArray ) => ({ /** Returns the MimeType object with the specified index. */ item: utils.createProxy(proto.item, { apply(target, ctx, args) { if (!args.length) { throw new TypeError( `Failed to execute 'item' on '${ proto[Symbol.toStringTag] }': 1 argument required, but only 0 present.` ) } // Special behavior alert: // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup // - If anything else than an integer (including as string) is provided it will return the first entry const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer // Note: Vanilla never returns `undefined` return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null } }), /** Returns the MimeType object with the specified name. */ namedItem: utils.createProxy(proto.namedItem, { apply(target, ctx, args) { if (!args.length) { throw new TypeError( `Failed to execute 'namedItem' on '${ proto[Symbol.toStringTag] }': 1 argument required, but only 0 present.` ) } return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`! } }), /** Does nothing and shall return nothing */ refresh: proto.refresh ? utils.createProxy(proto.refresh, { apply(target, ctx, args) { return undefined } }) : undefined }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const utils = require('../_utils') const withUtils = require('../_utils/withUtils') const { generateMimeTypeArray } = require('./mimeTypes') const { generatePluginArray } = require('./plugins') const { generateMagicArray } = require('./magicArray') const { generateFunctionMocks } = require('./functionMocks') const data = require('./data.json') /** * In headless mode `navigator.mimeTypes` and `navigator.plugins` are empty. * This plugin emulates both of these with functional mocks to match regular headful Chrome. * * Note: mimeTypes and plugins cross-reference each other, so it makes sense to do them at the same time. * * @see https://developer.mozilla.org/en-US/docs/Web/API/NavigatorPlugins/mimeTypes * @see https://developer.mozilla.org/en-US/docs/Web/API/MimeTypeArray * @see https://developer.mozilla.org/en-US/docs/Web/API/NavigatorPlugins/plugins * @see https://developer.mozilla.org/en-US/docs/Web/API/PluginArray */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.plugins' } async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument( (utils, { fns, data }) => { fns = utils.materializeFns(fns) // That means we're running headful const hasPlugins = 'plugins' in navigator && navigator.plugins.length if (hasPlugins) { return // nothing to do here } const mimeTypes = fns.generateMimeTypeArray(utils, fns)(data.mimeTypes) const plugins = fns.generatePluginArray(utils, fns)(data.plugins) // Plugin and MimeType cross-reference each other, let's do that now // Note: We're looping through `data.plugins` here, not the generated `plugins` for (const pluginData of data.plugins) { pluginData.__mimeTypes.forEach((type, index) => { plugins[pluginData.name][index] = mimeTypes[type] Object.defineProperty(plugins[pluginData.name], type, { value: mimeTypes[type], writable: false, enumerable: false, // Not enumerable configurable: true }) Object.defineProperty(mimeTypes[type], 'enabledPlugin', { value: type === 'application/x-pnacl' ? mimeTypes['application/x-nacl'].enabledPlugin // these reference the same plugin, so we need to re-use the Proxy in order to avoid leaks : new Proxy(plugins[pluginData.name], {}), // Prevent circular references writable: false, enumerable: false, // Important: `JSON.stringify(navigator.plugins)` configurable: true }) }) } const patchNavigator = (name, value) => utils.replaceProperty(Object.getPrototypeOf(navigator), name, { get() { return value } }) patchNavigator('mimeTypes', mimeTypes) patchNavigator('plugins', plugins) // All done }, { // We pass some functions to evaluate to structure the code more nicely fns: utils.stringifyFns({ generateMimeTypeArray, generatePluginArray, generateMagicArray, generateFunctionMocks }), data } ) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('vanilla: empty plugins, empty mimetypes', async t => { const { plugins, mimeTypes } = await getVanillaFingerPrint() t.is(plugins.length, 0) t.is(mimeTypes.length, 0) }) test('vanilla: will not have modifications', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => ({ mimeTypes: Object.getOwnPropertyDescriptor(navigator, 'mimeTypes'), // Must be undefined if native plugins: Object.getOwnPropertyDescriptor(navigator, 'plugins') // Must be undefined if native })) t.is(test1.mimeTypes, undefined) t.is(test1.plugins, undefined) const test2 = await page.evaluate( () => Object.getOwnPropertyNames(navigator) // Must be an empty array if native ) t.false(test2.includes('plugins')) }) test('stealth: has plugin, has mimetypes', async t => { const { plugins, mimeTypes } = await getStealthFingerPrint(Plugin) t.is(plugins.length, 3) t.is(mimeTypes.length, 4) }) test('stealth: will not leak modifications', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => ({ mimeTypes: Object.getOwnPropertyDescriptor(navigator, 'mimeTypes'), // Must be undefined if native plugins: Object.getOwnPropertyDescriptor(navigator, 'plugins') // Must be undefined if native })) t.is(test1.mimeTypes, undefined) t.is(test1.plugins, undefined) const test2 = await page.evaluate( () => Object.getOwnPropertyNames(navigator) // Must be an empty array if native ) t.false(test2.includes('plugins')) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/magicArray.js ================================================ /* global MimeType MimeTypeArray Plugin PluginArray */ /** * Generate a convincing and functional MimeType or Plugin array from scratch. * They're so similar that it makes sense to use a single generator here. * * Note: This is meant to be run in the context of the page. */ module.exports.generateMagicArray = (utils, fns) => function( dataArray = [], proto = MimeTypeArray.prototype, itemProto = MimeType.prototype, itemMainProp = 'type' ) { // Quick helper to set props with the same descriptors vanilla is using const defineProp = (obj, prop, value) => Object.defineProperty(obj, prop, { value, writable: false, enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)` configurable: true }) // Loop over our fake data and construct items const makeItem = data => { const item = {} for (const prop of Object.keys(data)) { if (prop.startsWith('__')) { continue } defineProp(item, prop, data[prop]) } return patchItem(item, data) } const patchItem = (item, data) => { let descriptor = Object.getOwnPropertyDescriptors(item) // Special case: Plugins have a magic length property which is not enumerable // e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes if (itemProto === Plugin.prototype) { descriptor = { ...descriptor, length: { value: data.__mimeTypes.length, writable: false, enumerable: false, configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length` } } } // We need to spoof a specific `MimeType` or `Plugin` object const obj = Object.create(itemProto, descriptor) // Virtually all property keys are not enumerable in vanilla const blacklist = [...Object.keys(data), 'length', 'enabledPlugin'] return new Proxy(obj, { ownKeys(target) { return Reflect.ownKeys(target).filter(k => !blacklist.includes(k)) }, getOwnPropertyDescriptor(target, prop) { if (blacklist.includes(prop)) { return undefined } return Reflect.getOwnPropertyDescriptor(target, prop) } }) } const magicArray = [] // Loop through our fake data and use that to create convincing entities dataArray.forEach(data => { magicArray.push(makeItem(data)) }) // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards magicArray.forEach(entry => { defineProp(magicArray, entry[itemMainProp], entry) }) // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)` const magicArrayObj = Object.create(proto, { ...Object.getOwnPropertyDescriptors(magicArray), // There's one ugly quirk we unfortunately need to take care of: // The `MimeTypeArray` prototype has an enumerable `length` property, // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`. // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap. length: { value: magicArray.length, writable: false, enumerable: false, configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length` } }) // Generate our functional function mocks :-) const functionMocks = fns.generateFunctionMocks(utils)( proto, itemMainProp, magicArray ) // We need to overlay our custom object with a JS Proxy const magicArrayObjProxy = new Proxy(magicArrayObj, { get(target, key = '') { // Redirect function calls to our custom proxied versions mocking the vanilla behavior if (key === 'item') { return functionMocks.item } if (key === 'namedItem') { return functionMocks.namedItem } if (proto === PluginArray.prototype && key === 'refresh') { return functionMocks.refresh } // Everything else can pass through as normal return utils.cache.Reflect.get(...arguments) }, ownKeys(target) { // There are a couple of quirks where the original property demonstrates "magical" behavior that makes no sense // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length` // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing const keys = [] const typeProps = magicArray.map(mt => mt[itemMainProp]) typeProps.forEach((_, i) => keys.push(`${i}`)) typeProps.forEach(propName => keys.push(propName)) return keys }, getOwnPropertyDescriptor(target, prop) { if (prop === 'length') { return undefined } return Reflect.getOwnPropertyDescriptor(target, prop) } }) return magicArrayObjProxy } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/mimeTypes.js ================================================ /* global MimeType MimeTypeArray */ /** * Generate a convincing and functional MimeTypeArray (with mime types) from scratch. * * Note: This is meant to be run in the context of the page. * * @see https://developer.mozilla.org/en-US/docs/Web/API/NavigatorPlugins/mimeTypes * @see https://developer.mozilla.org/en-US/docs/Web/API/MimeTypeArray */ module.exports.generateMimeTypeArray = (utils, fns) => mimeTypesData => { return fns.generateMagicArray(utils, fns)( mimeTypesData, MimeTypeArray.prototype, MimeType.prototype, 'type' ) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/mimeTypes.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('stealth: will have convincing mimeTypes', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { // We need to help serializing the error or it won't survive being sent back from `page.evaluate` const catchErr = function(fn, ...args) { try { return fn.apply(this, args) } catch ({ name, message, stack }) { return { name, message, stack, str: stack.split('\n')[0] } } } return { mimeTypes: { exists: 'mimeTypes' in navigator, isArray: Array.isArray(navigator.mimeTypes), length: navigator.mimeTypes.length, // value: navigator.mimeTypes, toString: navigator.mimeTypes.toString(), toStringProto: navigator.mimeTypes.__proto__.toString(), // eslint-disable-line no-proto protoSymbol: navigator.mimeTypes.__proto__[Symbol.toStringTag], // eslint-disable-line no-proto // valueOf: navigator.mimeTypes.valueOf(), valueOfSame: navigator.mimeTypes.valueOf() === navigator.mimeTypes, json: JSON.stringify(navigator.mimeTypes), hasPropPush: 'push' in navigator.mimeTypes, hasPropLength: 'length' in navigator.mimeTypes, hasLengthDescriptor: !!Object.getOwnPropertyDescriptor( navigator.mimeTypes, 'length' ), propertyNames: JSON.stringify( Object.getOwnPropertyNames(navigator.mimeTypes) ), lengthInProps: Object.getOwnPropertyNames(navigator.mimeTypes).includes( 'length' ), keys: JSON.stringify(Object.keys(navigator.mimeTypes)), namedPropsAuthentic: (function() { navigator.mimeTypes.alice = 'bob' return navigator.mimeTypes.namedItem('alice') === null // true on chrome })(), loopResult: (function() { let res = '' for (var bK = 0; bK < window.navigator.mimeTypes.length; bK++) bK === window.navigator.mimeTypes.length - 1 ? (res += window.navigator.mimeTypes[bK].type) : (res += window.navigator.mimeTypes[bK].type + ',') return res })() }, namedItem: { exists: 'namedItem' in navigator.mimeTypes, toString: navigator.mimeTypes.namedItem.toString(), resultNotFound: navigator.mimeTypes.namedItem('foo'), resultFound: navigator.mimeTypes // eslint-disable-line no-proto .namedItem('application/pdf') .__proto__.toString(), errors: { // For whatever weird reason the normal context doesn't suffice, we need to bind this to `navigator.mimeTypes` noArgs: catchErr.bind(navigator.mimeTypes)( navigator.mimeTypes.namedItem ).str, noStackLeaks: !catchErr .bind(navigator.mimeTypes)(navigator.mimeTypes.namedItem) .stack.includes(`.apply`), protoCall: catchErr.bind(navigator.mimeTypes)( navigator.mimeTypes.__proto__.namedItem // eslint-disable-line no-proto ).str } }, item: { exists: 'item' in navigator.mimeTypes, toString: navigator.mimeTypes.item.toString(), resultNotFound: navigator.mimeTypes.item('madness').type, resultNotFoundNumberString: navigator.mimeTypes.item('777'), resultEmptyString: navigator.mimeTypes.item('').type, resultByNumberString: navigator.mimeTypes.item('2').type, resultByNumberStringZero: navigator.mimeTypes.item('0').type, resultByNumber: navigator.mimeTypes.item(2).type, resultNull: navigator.mimeTypes.item(null).type, resultFound: navigator.mimeTypes.item('application/x-nacl').type, resultBrackets: navigator.mimeTypes['application/x-pnacl'].type, errors: { // For whatever weird reason the normal context doesn't suffice, we need to bind this to `navigator.mimeTypes` noArgs: catchErr.bind(navigator.mimeTypes)(navigator.mimeTypes.item) .str, noStackLeaks: !catchErr .bind(navigator.mimeTypes)(navigator.mimeTypes.item) .stack.includes(`.apply`), protoCall: catchErr.bind(navigator.mimeTypes)( navigator.mimeTypes.__proto__.item // eslint-disable-line no-proto ).str } } } }) t.deepEqual(results.mimeTypes, { exists: true, hasPropPush: false, hasPropLength: true, hasLengthDescriptor: false, isArray: false, json: `{"0":{},"1":{},"2":{},"3":{}}`, keys: `["0","1","2","3"]`, length: 4, lengthInProps: false, loopResult: 'application/pdf,application/x-google-chrome-pdf,application/x-nacl,application/x-pnacl', namedPropsAuthentic: true, propertyNames: `["0","1","2","3","application/pdf","application/x-google-chrome-pdf","application/x-nacl","application/x-pnacl"]`, protoSymbol: 'MimeTypeArray', toString: '[object MimeTypeArray]', toStringProto: '[object MimeTypeArray]', valueOfSame: true }) t.deepEqual(results.namedItem, { exists: true, toString: 'function namedItem() { [native code] }', resultFound: '[object MimeType]', resultNotFound: null, errors: { noArgs: "TypeError: Failed to execute 'namedItem' on 'MimeTypeArray': 1 argument required, but only 0 present.", noStackLeaks: true, protoCall: 'TypeError: Illegal invocation' } }) t.deepEqual(results.item, { exists: true, resultBrackets: 'application/x-pnacl', resultByNumber: 'application/x-nacl', resultByNumberString: 'application/x-nacl', resultByNumberStringZero: 'application/pdf', resultEmptyString: 'application/pdf', resultFound: 'application/pdf', resultNotFound: 'application/pdf', resultNotFoundNumberString: null, resultNull: 'application/pdf', toString: 'function item() { [native code] }', errors: { noArgs: "TypeError: Failed to execute 'item' on 'MimeTypeArray': 1 argument required, but only 0 present.", noStackLeaks: true, protoCall: 'TypeError: Illegal invocation' } }) }) test('stealth: will have convincing mimeType entry', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => ({ mimeType: { exists: !!navigator.mimeTypes[0], toString: navigator.mimeTypes[0].toString(), toStringProto: navigator.mimeTypes[0].__proto__.toString(), // eslint-disable-line no-proto protoSymbol: navigator.mimeTypes[0].__proto__[Symbol.toStringTag], // eslint-disable-line no-proto enabledPlugin: !!navigator.mimeTypes[0].enabledPlugin, // should not throw enabledPlugin2: !!navigator.mimeTypes['application/pdf'].enabledPlugin, // should not throw enabledPlugins: !!navigator.mimeTypes[0].enabledPlugins, // regression: should not exist (anymore) pdfPlugin: JSON.stringify( navigator.mimeTypes['application/pdf'].enabledPlugin ), length: !!navigator.mimeTypes[0].length, // should not throw and return mimeTypes length lengthDescriptor: !!Object.getOwnPropertyDescriptor( navigator.mimeTypes[0], 'length' ), json: JSON.stringify(navigator.mimeTypes[0]), propertyNames: JSON.stringify( Object.getOwnPropertyNames(navigator.mimeTypes[0]) ), nested: navigator.mimeTypes['application/pdf'].enabledPlugin[0].enabledPlugin[0] .enabledPlugin[0].enabledPlugin[0].enabledPlugin[0].suffixes } })) t.deepEqual(results.mimeType, { exists: true, protoSymbol: 'MimeType', toString: '[object MimeType]', toStringProto: '[object MimeType]', enabledPlugin: true, enabledPlugin2: true, enabledPlugins: false, pdfPlugin: '{"0":{}}', length: false, lengthDescriptor: false, json: '{}', propertyNames: '[]', nested: 'pdf' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/plugins.js ================================================ /* global Plugin PluginArray */ /** * Generate a convincing and functional PluginArray (with plugins) from scratch. * * Note: This is meant to be run in the context of the page. * * @see https://developer.mozilla.org/en-US/docs/Web/API/NavigatorPlugins/plugins * @see https://developer.mozilla.org/en-US/docs/Web/API/PluginArray */ module.exports.generatePluginArray = (utils, fns) => pluginsData => { return fns.generateMagicArray(utils, fns)( pluginsData, PluginArray.prototype, Plugin.prototype, 'name' ) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/plugins.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('stealth: will have convincing plugins', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { // We need to help serializing the error or it won't survive being sent back from `page.evaluate` const catchErr = function(fn, ...args) { try { return fn.apply(this, args) } catch ({ name, message, stack }) { return { name, message, stack, str: stack.split('\n')[0] } } } return { plugins: { exists: 'plugins' in navigator, isArray: Array.isArray(navigator.plugins), length: navigator.plugins.length, // value: navigator.plugins, toString: navigator.plugins.toString(), toStringProto: navigator.plugins.__proto__.toString(), // eslint-disable-line no-proto protoSymbol: navigator.plugins.__proto__[Symbol.toStringTag], // eslint-disable-line no-proto // valueOf: navigator.plugins.valueOf(), valueOfSame: navigator.plugins.valueOf() === navigator.plugins, json: JSON.stringify(navigator.plugins), hasPropPush: 'push' in navigator.plugins, hasPropLength: 'length' in navigator.plugins, hasLengthDescriptor: !!Object.getOwnPropertyDescriptor( navigator.plugins, 'length' ), propertyNames: JSON.stringify( Object.getOwnPropertyNames(navigator.plugins) ), lengthInProps: Object.getOwnPropertyNames(navigator.plugins).includes( 'length' ), keys: JSON.stringify(Object.keys(navigator.plugins)), loopResult: [...navigator.plugins].map(p => p.name).join(',') }, namedItem: { exists: 'namedItem' in navigator.plugins, toString: navigator.plugins.namedItem.toString(), resultNotFound: navigator.plugins.namedItem('foo'), resultFound: navigator.plugins // eslint-disable-line no-proto .namedItem('Chrome PDF Viewer') .__proto__.toString(), errors: { // For whatever weird reason the normal context doesn't suffice, we need to bind this to `navigator.plugins` noArgs: catchErr.bind(navigator.plugins)(navigator.plugins.namedItem) .str, noStackLeaks: !catchErr .bind(navigator.plugins)(navigator.plugins.namedItem) .stack.includes(`.apply`), protoCall: catchErr.bind(navigator.plugins)( navigator.plugins.__proto__.namedItem // eslint-disable-line no-proto ).str } }, item: { exists: 'item' in navigator.plugins, toString: navigator.plugins.item.toString(), resultNotFound: navigator.plugins.item('madness').name, resultNotFoundNumberString: navigator.plugins.item('777'), resultEmptyString: navigator.plugins.item('').name, resultByNumberString: navigator.plugins.item('2').name, resultByNumberStringZero: navigator.plugins.item('0').name, resultByNumber: navigator.plugins.item(2).name, resultNull: navigator.plugins.item(null).name, resultFound: navigator.plugins.item('application/x-nacl').name, errors: { // For whatever weird reason the normal context doesn't suffice, we need to bind this to `navigator.plugins` noArgs: catchErr.bind(navigator.plugins)(navigator.plugins.item).str, noStackLeaks: !catchErr .bind(navigator.plugins)(navigator.plugins.item) .stack.includes(`.apply`), protoCall: catchErr.bind(navigator.plugins)( navigator.plugins.__proto__.item // eslint-disable-line no-proto ).str } } } }) t.deepEqual(results.plugins, { exists: true, hasPropLength: true, hasLengthDescriptor: false, hasPropPush: false, isArray: false, json: `{"0":{"0":{}},"1":{"0":{}},"2":{"0":{},"1":{}}}`, keys: `["0","1","2"]`, length: 3, lengthInProps: false, loopResult: 'Chrome PDF Plugin,Chrome PDF Viewer,Native Client', propertyNames: `["0","1","2","Chrome PDF Plugin","Chrome PDF Viewer","Native Client"]`, protoSymbol: 'PluginArray', toString: '[object PluginArray]', toStringProto: '[object PluginArray]', valueOfSame: true }) t.deepEqual(results.namedItem, { exists: true, toString: 'function namedItem() { [native code] }', resultFound: '[object Plugin]', resultNotFound: null, errors: { noArgs: "TypeError: Failed to execute 'namedItem' on 'PluginArray': 1 argument required, but only 0 present.", noStackLeaks: true, protoCall: 'TypeError: Illegal invocation' } }) t.deepEqual(results.item, { exists: true, resultByNumber: 'Native Client', resultByNumberString: 'Native Client', resultByNumberStringZero: 'Chrome PDF Plugin', resultEmptyString: 'Chrome PDF Plugin', resultFound: 'Chrome PDF Plugin', resultNotFound: 'Chrome PDF Plugin', resultNotFoundNumberString: null, resultNull: 'Chrome PDF Plugin', toString: 'function item() { [native code] }', errors: { noArgs: "TypeError: Failed to execute 'item' on 'PluginArray': 1 argument required, but only 0 present.", noStackLeaks: true, protoCall: 'TypeError: Illegal invocation' } }) }) test('stealth: will have convincing plugin entry', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => ({ plugins: { exists: !!navigator.plugins[0], toString: navigator.plugins[0].toString(), toStringProto: navigator.plugins[0].__proto__.toString(), // eslint-disable-line no-proto protoSymbol: navigator.plugins[0].__proto__[Symbol.toStringTag], // eslint-disable-line no-proto length: navigator.plugins[0].length, // should not throw and return mimeTypes length lengthDescriptor: Object.getOwnPropertyDescriptor( navigator.plugins[0], 'length' ) }, plugin: { mtIndex: !!navigator.plugins[0][0], // mimeType should be accessible through index mtNamed: !!navigator.plugins[0]['application/x-google-chrome-pdf'], // mimeType should be accessible through name json: JSON.stringify(navigator.plugins[0]), propertyNames: JSON.stringify( Object.getOwnPropertyNames(navigator.plugins[0]) ) } })) t.deepEqual(results.plugins, { exists: true, protoSymbol: 'Plugin', toString: '[object Plugin]', toStringProto: '[object Plugin]', length: 1 }) t.deepEqual(results.plugin, { mtIndex: true, mtNamed: true, json: '{"0":{}}', propertyNames: '["0","application/x-google-chrome-pdf"]' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/navigator.plugins/index.js#L26-L88) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** In headless mode `navigator.mimeTypes` and `navigator.plugins` are empty. This plugin emulates both of these with functional mocks to match regular headful Chrome. Note: mimeTypes and plugins cross-reference each other, so it makes sense to do them at the same time. - **See: ** - **See: ** - **See: ** - **See: ** --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * By default puppeteer will have a fixed `navigator.vendor` property. * * This plugin makes it possible to change this property. * * @example * const puppeteer = require("puppeteer-extra") * * const StealthPlugin = require("puppeteer-extra-plugin-stealth") * const stealth = StealthPlugin() * // Remove this specific stealth plugin from the default set * stealth.enabledEvasions.delete("navigator.vendor") * puppeteer.use(stealth) * * // Stealth plugins are just regular `puppeteer-extra` plugins and can be added as such * const NavigatorVendorPlugin = require("puppeteer-extra-plugin-stealth/evasions/navigator.vendor") * const nvp = NavigatorVendorPlugin({ vendor: 'Apple Computer, Inc.' }) // Custom vendor * puppeteer.use(nvp) * * @param {Object} [opts] - Options * @param {string} [opts.vendor] - The vendor to use in `navigator.vendor` (default: `Google Inc.`) * */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.vendor' } get defaults() { return { vendor: 'Google Inc.' } } async onPageCreated(page) { this.debug('onPageCreated', { opts: this.opts }) await withUtils(page).evaluateOnNewDocument( (utils, { opts }) => { utils.replaceGetterWithProxy( Object.getPrototypeOf(navigator), 'vendor', utils.makeHandler().getterValue(opts.vendor) ) }, { opts: this.opts } ) } // onPageCreated } const defaultExport = opts => new Plugin(opts) module.exports = defaultExport ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') test('vanilla: navigator.vendor is always Google Inc.', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const vendor = await page.evaluate(() => navigator.vendor) t.is(vendor, 'Google Inc.') }) test('stealth: navigator.vendor set to custom value', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ vendor: 'Apple Computer, Inc.' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const vendor = await page.evaluate(() => navigator.vendor) t.is(vendor, 'Apple Computer, Inc.') }) test('stealth: will not leak modifications', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate( () => Object.getOwnPropertyDescriptor(navigator, 'vendor') // Must be undefined if native ) t.is(test1, undefined) const test2 = await page.evaluate( () => Object.getOwnPropertyNames(navigator) // Must be an empty array if native ) t.false(test2.includes('vendor')) }) test('stealth: does patch getters properly', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const results = await page.evaluate(() => { const hasInvocationError = (() => { try { // eslint-disable-next-line dot-notation Object['seal'](Object.getPrototypeOf(navigator)['vendor']) return false } catch (err) { return true } })() return { hasInvocationError, toString: Object.getOwnPropertyDescriptor( Object.getPrototypeOf(navigator), 'vendor' ).get.toString() } }) t.deepEqual(results, { hasInvocationError: true, toString: 'function get vendor() { [native code] }' }) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/navigator.vendor/index.js#L28-L55) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.vendor` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** The vendor to use in `navigator.vendor` (default: `Google Inc.`) **Extends: PuppeteerExtraPlugin** By default puppeteer will have a fixed `navigator.vendor` property. This plugin makes it possible to change this property. Example: ```javascript const puppeteer = require('puppeteer-extra') const StealthPlugin = require('puppeteer-extra-plugin-stealth') const stealth = StealthPlugin() // Remove this specific stealth plugin from the default set stealth.enabledEvasions.delete('navigator.vendor') puppeteer.use(stealth) // Stealth plugins are just regular `puppeteer-extra` plugins and can be added as such const NavigatorVendorPlugin = require('puppeteer-extra-plugin-stealth/evasions/navigator.vendor') const nvp = NavigatorVendorPlugin({ vendor: 'Apple Computer, Inc.' }) // Custom vendor puppeteer.use(nvp) ``` --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.webdriver/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Pass the Webdriver Test. * Will delete `navigator.webdriver` property. */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/navigator.webdriver' } async onPageCreated(page) { await page.evaluateOnNewDocument(() => { if (navigator.webdriver === false) { // Post Chrome 89.0.4339.0 and already good } else if (navigator.webdriver === undefined) { // Pre Chrome 89.0.4339.0 and already good } else { // Pre Chrome 88.0.4291.0 and needs patching delete Object.getPrototypeOf(navigator).webdriver } }) } // Post Chrome 88.0.4291.0 // Note: this will add an infobar to Chrome with a warning that an unsupported flag is set // To remove this bar on Linux, run: mkdir -p /etc/opt/chrome/policies/managed && echo '{ "CommandLineFlagSecurityWarningsEnabled": false }' > /etc/opt/chrome/policies/managed/managed_policies.json async beforeLaunch(options) { // If disable-blink-features is already passed, append the AutomationControlled switch const idx = options.args.findIndex((arg) => arg.startsWith('--disable-blink-features=')); if (idx !== -1) { const arg = options.args[idx]; options.args[idx] = `${arg},AutomationControlled`; } else { options.args.push('--disable-blink-features=AutomationControlled'); } } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.webdriver/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra, compareLooseVersionStrings } = require('../../test/util') const Plugin = require('.') function getExpectedValue(looseVersionString) { if (compareLooseVersionStrings(looseVersionString, '89.0.4339.0') >= 0) { return false } else { return undefined } } test('vanilla: navigator.webdriver is defined', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const data = await page.evaluate(() => navigator.webdriver) t.is(data, true) }) test('stealth: navigator.webdriver is undefined', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const data = await page.evaluate(() => navigator.webdriver) // XXX: launch this test multiple times with browsers of different versions? t.is(data, getExpectedValue(await browser.version())) }) // https://github.com/berstend/puppeteer-extra/pull/130 test('stealth: regression: wont kill other navigator methods', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() try { const data = await page.evaluate(() => navigator.javaEnabled()) t.is(data, false) } catch (err) { t.is(err, undefined) } }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.webdriver/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/navigator.webdriver/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/navigator.webdriver/index.js#L9-L23) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Pass the Webdriver Test. Will delete `navigator.webdriver` property. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/readme.md ================================================ # puppeteer-extra-plugin-stealth/evasions Various detection evasion plugins for `puppeteer-extra-plugin-stealth`. You can bypass the main module and require specific evasion plugins yourself, if you wish to do so: ```es6 puppeteer.use( require('puppeteer-extra-plugin-stealth/evasions/console.debug')() ) ``` If you want to add a new evasion technique I suggest you look at the [template](./_template/) to kickstart things. ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/_fixtures/test.html ================================================ Page Title

Please use `document.querySelector`..

================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Strip sourceURL from scripts injected by puppeteer. * It can be used to identify the presence of pptr via stacktraces. */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/sourceurl' } async onPageCreated(page) { const client = page && typeof page._client === 'function' ? page._client() : page._client if (!client) { this.debug('Warning, missing properties to intercept CDP.', { page }) return } // Intercept CDP commands and strip identifying and unnecessary sourceURL // https://github.com/puppeteer/puppeteer/blob/9b3005c105995cd267fdc7fb95b78aceab82cf0e/new-docs/puppeteer.cdpsession.md const debug = this.debug client.send = (function(originalMethod, context) { return async function() { const [method, paramArgs] = arguments || [] const next = async () => { try { return await originalMethod.apply(context, [method, paramArgs]) } catch (error) { // This seems to happen sometimes when redirects cause other outstanding requests to be cut short if ( error instanceof Error && error.message.includes( `Protocol error (Network.getResponseBody): No resource with given identifier found` ) ) { debug( `Caught and ignored an error about a missing network resource.`, { error } ) } else { throw error } } } if (!method || !paramArgs) { return next() } // To find the methods/props in question check `_evaluateInternal` at: // https://github.com/puppeteer/puppeteer/blob/main/src/common/ExecutionContext.ts#L186 const methodsToPatch = { 'Runtime.evaluate': 'expression', 'Runtime.callFunctionOn': 'functionDeclaration' } const SOURCE_URL_SUFFIX = '//# sourceURL=__puppeteer_evaluation_script__' if (!methodsToPatch[method] || !paramArgs[methodsToPatch[method]]) { return next() } debug('Stripping sourceURL', { method }) paramArgs[methodsToPatch[method]] = paramArgs[ methodsToPatch[method] ].replace(SOURCE_URL_SUFFIX, '') return next() } })(client.send, client) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') const TEST_HTML_FILE = require('path').join(__dirname, './_fixtures/test.html') test('vanilla: sourceurl is leaking', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + TEST_HTML_FILE, { waitUntil: 'load' }) // Trigger test await page.$('title') const result = await page.evaluate( () => document.querySelector('#result').innerText ) t.is(result, 'FAIL') const result2 = await page.evaluate(() => { try { Function.prototype.toString.apply({}) } catch (err) { return err.stack } }) t.true(result2.includes('__puppeteer_evaluation_script')) }) test('stealth: sourceurl is not leaking', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + TEST_HTML_FILE, { waitUntil: 'load' }) // Trigger test await page.$('title') const result = await page.evaluate( () => document.querySelector('#result').innerText ) t.is(result, 'PASS') const result2 = await page.evaluate(() => { try { Function.prototype.toString.apply({}) } catch (err) { return err.stack } }) t.false(result2.includes('__puppeteer_evaluation_script')) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/sourceurl/index.js#L9-L58) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Strip sourceURL from scripts injected by puppeteer. It can be used to identify the presence of pptr via stacktraces. --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/user-agent-override/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Fixes the UserAgent info (composed of UA string, Accept-Language, Platform, and UA hints). * * If you don't provide any values this plugin will default to using the regular UserAgent string (while stripping the headless part). * Default language is set to "en-US,en", the other settings match the UserAgent string. * If you are running on Linux, it will mask the settins to look like Windows. This behavior can be disabled with the `maskLinux` option. * * By default puppeteer will not set a `Accept-Language` header in headless: * It's (theoretically) possible to fix that using either `page.setExtraHTTPHeaders` or a `--lang` launch arg. * Unfortunately `page.setExtraHTTPHeaders` will lowercase everything and launch args are not always available. :) * * In addition, the `navigator.platform` property is always set to the host value, e.g. `Linux` which makes detection very easy. * * Note: You cannot use the regular `page.setUserAgent()` puppeteer call in your code, * as it will reset the language and platform values you set with this plugin. * * @example * const puppeteer = require("puppeteer-extra") * * const StealthPlugin = require("puppeteer-extra-plugin-stealth") * const stealth = StealthPlugin() * // Remove this specific stealth plugin from the default set * stealth.enabledEvasions.delete("user-agent-override") * puppeteer.use(stealth) * * // Stealth plugins are just regular `puppeteer-extra` plugins and can be added as such * const UserAgentOverride = require("puppeteer-extra-plugin-stealth/evasions/user-agent-override") * // Define custom UA and locale * const ua = UserAgentOverride({ userAgent: "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", locale: "de-DE,de" }) * puppeteer.use(ua) * * @param {Object} [opts] - Options * @param {string} [opts.userAgent] - The user agent to use (default: browser.userAgent()) * @param {string} [opts.locale] - The locale to use in `Accept-Language` header and in `navigator.languages` (default: `en-US,en`) * @param {boolean} [opts.maskLinux] - Wether to hide Linux as platform in the user agent or not - true by default * */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) this._headless = false } get name() { return 'stealth/evasions/user-agent-override' } get dependencies() { return new Set(['user-preferences']) } get defaults() { return { userAgent: null, locale: 'en-US,en', maskLinux: true } } async onPageCreated(page) { // Determine the full user agent string, strip the "Headless" part let ua = this.opts.userAgent || (await page.browser().userAgent()).replace('HeadlessChrome/', 'Chrome/') if ( this.opts.maskLinux && ua.includes('Linux') && !ua.includes('Android') // Skip Android user agents since they also contain Linux ) { ua = ua.replace(/\(([^)]+)\)/, '(Windows NT 10.0; Win64; x64)') // Replace the first part in parentheses with Windows data } // Full version number from Chrome const uaVersion = ua.includes('Chrome/') ? ua.match(/Chrome\/([\d|.]+)/)[1] : (await page.browser().version()).match(/\/([\d|.]+)/)[1] // Get platform identifier (short or long version) const _getPlatform = (extended = false) => { if (ua.includes('Mac OS X')) { return extended ? 'Mac OS X' : 'MacIntel' } else if (ua.includes('Android')) { return 'Android' } else if (ua.includes('Linux')) { return 'Linux' } else { return extended ? 'Windows' : 'Win32' } } // Source in C++: https://source.chromium.org/chromium/chromium/src/+/master:components/embedder_support/user_agent_utils.cc;l=55-100 const _getBrands = () => { const seed = uaVersion.split('.')[0] // the major version number of Chrome const order = [ [0, 1, 2], [0, 2, 1], [1, 0, 2], [1, 2, 0], [2, 0, 1], [2, 1, 0] ][seed % 6] const escapedChars = [' ', ' ', ';'] const greaseyBrand = `${escapedChars[order[0]]}Not${ escapedChars[order[1]] }A${escapedChars[order[2]]}Brand` const greasedBrandVersionList = [] greasedBrandVersionList[order[0]] = { brand: greaseyBrand, version: '99' } greasedBrandVersionList[order[1]] = { brand: 'Chromium', version: seed } greasedBrandVersionList[order[2]] = { brand: 'Google Chrome', version: seed } return greasedBrandVersionList } // Return OS version const _getPlatformVersion = () => { if (ua.includes('Mac OS X ')) { return ua.match(/Mac OS X ([^)]+)/)[1] } else if (ua.includes('Android ')) { return ua.match(/Android ([^;]+)/)[1] } else if (ua.includes('Windows ')) { return ua.match(/Windows .*?([\d|.]+);?/)[1] } else { return '' } } // Get architecture, this seems to be empty on mobile and x86 on desktop const _getPlatformArch = () => (_getMobile() ? '' : 'x86') // Return the Android model, empty on desktop const _getPlatformModel = () => _getMobile() ? ua.match(/Android.*?;\s([^)]+)/)[1] : '' const _getMobile = () => ua.includes('Android') const override = { userAgent: ua, platform: _getPlatform(), userAgentMetadata: { brands: _getBrands(), fullVersion: uaVersion, platform: _getPlatform(true), platformVersion: _getPlatformVersion(), architecture: _getPlatformArch(), model: _getPlatformModel(), mobile: _getMobile() } } // In case of headless, override the acceptLanguage in CDP. // This is not preferred, as it messed up the header order. // On headful, we set the user preference language setting instead. if (this._headless) { override.acceptLanguage = this.opts.locale || 'en-US,en' } this.debug('onPageCreated - Will set these user agent options', { override, opts: this.opts }) const client = typeof page._client === 'function' ? page._client() : page._client client.send('Network.setUserAgentOverride', override) } async beforeLaunch(options) { // Check if launched headless this._headless = options.headless } async beforeConnect() { // Treat browsers using connect() as headless browsers this._headless = true } get data() { return [ { name: 'userPreferences', value: { intl: { accept_languages: this.opts.locale || 'en-US,en' } } } ] } } const defaultExport = opts => new Plugin(opts) module.exports = defaultExport ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/user-agent-override/index.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') // Fixed since 2.1.1? // test('vanilla: Accept-Language header is missing', async t => { // const browser = await vanillaPuppeteer.launch({ headless: true }) // const page = await browser.newPage() // await page.goto('http://httpbin.org/headers') // const content = await page.content() // t.true(content.includes(`"User-Agent"`)) // t.false(content.includes(`"Accept-Language"`)) // }) test('vanilla: User-Agent header contains HeadlessChrome', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('http://httpbin.org/headers') const content = await page.content() t.true(content.includes(`"User-Agent"`)) t.true(content.includes(`HeadlessChrome`)) }) test('vanilla: navigator.languages is always en-US', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const lang = await page.evaluate(() => navigator.languages) t.true(lang.length === 1 && lang[0] === 'en-US') }) test('vanilla: navigator.platform set to host platform', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const platform = await page.evaluate(() => navigator.platform) switch (process.platform) { case 'linux': t.true(platform.includes('Linux')) // TravisCI break case 'darwin': t.true(platform === 'MacIntel') break case 'win32': t.true(platform === 'Win32') break default: t.true(platform === process.platform) } }) test('stealth: Accept-Language header with default locale', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('http://httpbin.org/headers') const content = await page.content() t.true(content.includes(`"User-Agent"`)) t.true(content.includes(`"Accept-Language": "en-US,en;q=0.9"`)) }) test('stealth: Accept-Language header with optional locale', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ locale: 'de-DE,de' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('http://httpbin.org/headers') const content = await page.content() t.true(content.includes(`"User-Agent"`)) t.true(content.includes(`"Accept-Language": "de-DE,de;q=0.9"`)) }) test('stealth: User-Agent header does not contain HeadlessChrome', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('http://httpbin.org/headers') const content = await page.content() t.true(content.includes(`"User-Agent"`)) t.false(content.includes(`HeadlessChrome`)) }) test('stealth: User-Agent header with custom userAgent', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ userAgent: 'MyFunkyUA/1.0' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('http://httpbin.org/headers') const content = await page.content() t.true(content.includes(`"User-Agent": "MyFunkyUA/1.0"`)) }) test('stealth: navigator.languages with default locale', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const lang = await page.evaluate(() => navigator.languages) t.true(lang.length === 2 && lang[0] === 'en-US' && lang[1] === 'en') }) test('stealth: navigator.languages with custom locale', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ locale: 'de-DE,de' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const langs = await page.evaluate(() => navigator.languages) t.deepEqual(langs, ['de-DE', 'de']) const lang = await page.evaluate(() => navigator.language) t.deepEqual(lang, 'de-DE') }) test('stealth: navigator.platform with maskLinux true (default)', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ userAgent: 'Mozilla/5.0 (X11; Ubuntu; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.9.9999.99 Safari/537.36' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const platform = await page.evaluate(() => navigator.platform) t.true(platform === 'Win32') }) test('stealth: navigator.platform with maskLinux false', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ userAgent: 'Mozilla/5.0 (X11; Ubuntu; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.9.9999.99 Safari/537.36', maskLinux: false }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const platform = await page.evaluate(() => navigator.platform) t.true(platform === 'Linux') }) const _testUAHint = async (userAgent, locale) => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ userAgent, locale }) ) const browser = await puppeteer.launch({ headless: false, // only works on headful args: ['--enable-features=UserAgentClientHint'] }) const majorVersion = parseInt( (await browser.version()).match(/\/([^\.]+)/)[1] ) if (majorVersion < 88) { return null // Skip test on browsers that don't support UA hints } const page = await browser.newPage() await page.goto('https://headers.cf/headers/?format=raw') return page } test('stealth: test if UA hints are correctly set - Windows 10', async t => { const page = await _testUAHint( 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.99 Safari/537.36', 'en-AU' ) if (!page) { t.true(true) // skip return } const firstLoad = await page.content() t.true( firstLoad.includes( `sec-ch-ua: "Google Chrome";v="99", " Not;A Brand";v="99", "Chromium";v="99"` ) ) t.true(firstLoad.includes(`Accept-Language: en-AU`)) await page.reload() const secondLoad = await page.content() if (secondLoad.includes('sec-ch-ua-full-version')) { t.true(secondLoad.includes('sec-ch-ua-mobile: ?0')) t.true(secondLoad.includes('sec-ch-ua-full-version: "99.0.9999.99"')) t.true(secondLoad.includes('sec-ch-ua-arch: "x86"')) t.true(secondLoad.includes('sec-ch-ua-platform: "Windows"')) t.true(secondLoad.includes('sec-ch-ua-platform-version: "10.0"')) t.true(secondLoad.includes('sec-ch-ua-model: ""')) } }) test('stealth: test if UA hints are correctly set - macOS 11', async t => { const page = await _testUAHint( 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.99 Safari/537.36', 'de-DE' ) if (!page) { t.true(true) // skip return } const firstLoad = await page.content() t.true( firstLoad.includes( `sec-ch-ua: "Google Chrome";v="99", " Not;A Brand";v="99", "Chromium";v="99"` ) ) t.true(firstLoad.includes(`Accept-Language: de-DE`)) await page.reload() const secondLoad = await page.content() if (secondLoad.includes('sec-ch-ua-full-version')) { t.true(secondLoad.includes('sec-ch-ua-mobile: ?0')) t.true(secondLoad.includes('sec-ch-ua-full-version: "99.0.9999.99"')) t.true(secondLoad.includes('sec-ch-ua-arch: "x86"')) t.true(secondLoad.includes('sec-ch-ua-platform: "Mac OS X"')) t.true(secondLoad.includes('sec-ch-ua-platform-version: "11_1_0"')) t.true(secondLoad.includes('sec-ch-ua-model: ""')) } }) test('stealth: test if UA hints are correctly set - Android 10', async t => { const page = await _testUAHint( 'Mozilla/5.0 (Linux; Android 10; SM-P205) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.99 Safari/537.36', 'nl-NL' ) if (!page) { t.true(true) // skip return } const firstLoad = await page.content() t.true( firstLoad.includes( `sec-ch-ua: "Google Chrome";v="99", " Not;A Brand";v="99", "Chromium";v="99"` ) ) t.true(firstLoad.includes(`Accept-Language: nl-NL`)) await page.reload() const secondLoad = await page.content() if (secondLoad.includes('sec-ch-ua-full-version')) { t.true(secondLoad.includes('sec-ch-ua-mobile: ?1')) t.true(secondLoad.includes('sec-ch-ua-full-version: "99.0.9999.99"')) t.true(secondLoad.includes('sec-ch-ua-arch: ""')) t.true(secondLoad.includes('sec-ch-ua-platform: "Android"')) t.true(secondLoad.includes('sec-ch-ua-platform-version: "10"')) t.true(secondLoad.includes('sec-ch-ua-model: "SM-P205"')) } }) async function userAgentData() { if (!('userAgentData' in navigator)) { return undefined } // https://wicg.github.io/ua-client-hints/#getHighEntropyValues const UADataProps = ['brands', 'mobile'] const UADataValues = [ 'architecture', // "arm" 'bitness', // "64" 'model', // "X644GTM" 'platform', // "PhoneOS" 'platformVersion', // "10A" 'uaFullVersion' // "73.32.AGX.5" ] const highEntropy = await navigator.userAgentData.getHighEntropyValues( UADataValues ) const result = { ...highEntropy, ...Object.fromEntries(UADataProps.map(k => [k, navigator.userAgentData[k]])) } return result } test('stealth: test if UA hints are correctly set - Windows 10 Generic', async t => { const userAgent = 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.9999.99 Safari/537.36' const locale = 'en-AU' const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ userAgent, locale }) ) const browser = await puppeteer.launch({ headless: true }) const majorVersion = parseInt( (await browser.version()).match(/\/([^\.]+)/)[1] ) if (majorVersion < 90) { t.truthy('foo') console.log('Skipping test, browser version too old', majorVersion) return } const page = await browser.newPage() await page.goto('https://example.com') // secure context const results = await page.evaluate(userAgentData) t.is(results.platform, 'Windows') t.is(results.platformVersion, '10.0') t.is(results.uaFullVersion, '99.0.9999.99') const language = await page.evaluate(() => navigator.language) t.is(language, locale) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/user-agent-override/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/user-agent-override/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/ab0047d1af7dc38412744abdb61bcfc35c42dc34/packages/puppeteer-extra-plugin-stealth/evasions/user-agent-override/index.js#L42-L203) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.userAgent` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** The user agent to use (default: browser.userAgent()) - `opts.locale` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** The locale to use in `Accept-Language` header and in `navigator.languages` (default: `en-US,en`) - `opts.maskLinux` **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)?** Wether to hide Linux as platform in the user agent or not - true by default **Extends: PuppeteerExtraPlugin** Fixes the UserAgent info (composed of UA string, Accept-Language, Platform, and UA hints). If you don't provide any values this plugin will default to using the regular UserAgent string (while stripping the headless part). Default language is set to "en-US,en", the other settings match the UserAgent string. If you are running on Linux, it will mask the settins to look like Windows. This behavior can be disabled with the `maskLinux` option. By default puppeteer will not set a `Accept-Language` header in headless: It's (theoretically) possible to fix that using either `page.setExtraHTTPHeaders` or a `--lang` launch arg. Unfortunately `page.setExtraHTTPHeaders` will lowercase everything and launch args are not always available. :) In addition, the `navigator.platform` property is always set to the host value, e.g. `Linux` which makes detection very easy. Note: You cannot use the regular `page.setUserAgent()` puppeteer call in your code, as it will reset the language and platform values you set with this plugin. Example: ```javascript const puppeteer = require('puppeteer-extra') const StealthPlugin = require('puppeteer-extra-plugin-stealth') const stealth = StealthPlugin() // Remove this specific stealth plugin from the default set stealth.enabledEvasions.delete('user-agent-override') puppeteer.use(stealth) // Stealth plugins are just regular `puppeteer-extra` plugins and can be added as such const UserAgentOverride = require('puppeteer-extra-plugin-stealth/evasions/user-agent-override') // Define custom UA and locale const ua = UserAgentOverride({ userAgent: 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', locale: 'de-DE,de' }) puppeteer.use(ua) ``` --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/webgl.vendor/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') const withUtils = require('../_utils/withUtils') /** * Fix WebGL Vendor/Renderer being set to Google in headless mode * * Example data (Apple Retina MBP 13): {vendor: "Intel Inc.", renderer: "Intel(R) Iris(TM) Graphics 6100"} * * @param {Object} [opts] - Options * @param {string} [opts.vendor] - The vendor string to use (default: `Intel Inc.`) * @param {string} [opts.renderer] - The renderer string (default: `Intel Iris OpenGL Engine`) */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/webgl.vendor' } /* global WebGLRenderingContext WebGL2RenderingContext */ async onPageCreated(page) { await withUtils(page).evaluateOnNewDocument((utils, opts) => { const getParameterProxyHandler = { apply: function(target, ctx, args) { const param = (args || [])[0] const result = utils.cache.Reflect.apply(target, ctx, args) // UNMASKED_VENDOR_WEBGL if (param === 37445) { return opts.vendor || 'Intel Inc.' // default in headless: Google Inc. } // UNMASKED_RENDERER_WEBGL if (param === 37446) { return opts.renderer || 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader } return result } } // There's more than one WebGL rendering context // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter) const addProxy = (obj, propName) => { utils.replaceWithProxy(obj, propName, getParameterProxyHandler) } // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing: addProxy(WebGLRenderingContext.prototype, 'getParameter') addProxy(WebGL2RenderingContext.prototype, 'getParameter') }, this.opts) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/webgl.vendor/index.test.js ================================================ const test = require('ava') const { getVanillaFingerPrint, getStealthFingerPrint } = require('../../test/util') const { vanillaPuppeteer, addExtra } = require('../../test/util') const Plugin = require('.') const { errors } = require('puppeteer') // FIXME: This changed in more recent chrome versions // test('vanilla: videoCard is Google Inc', async t => { // const pageFn = async page => await page.evaluate(() => window.chrome) // eslint-disable-line // const { videoCard } = await getVanillaFingerPrint(pageFn) // t.deepEqual(videoCard, ['Google Inc.', 'Google SwiftShader']) // }) test('stealth: videoCard is Intel Inc', async t => { const pageFn = async page => await page.evaluate(() => window.chrome) // eslint-disable-line const { videoCard } = await getStealthFingerPrint(Plugin, pageFn) t.deepEqual(videoCard, ['Intel Inc.', 'Intel Iris OpenGL Engine']) }) test('stealth: customized values', async t => { const pageFn = async page => await page.evaluate(() => window.chrome) // eslint-disable-line const { videoCard } = await getStealthFingerPrint(Plugin, pageFn, { vendor: 'foo', renderer: 'bar' }) t.deepEqual(videoCard, ['foo', 'bar']) }) /* global WebGLRenderingContext */ async function extendedTests() { const results = {} async function test(name, fn) { const detectionPassed = await fn() if (detectionPassed) console.log(`Chrome headless detected via ${name}`) results[name] = detectionPassed } const canvas = document.createElement('canvas') const context = canvas.getContext('webgl') await test('descriptorsOK', _ => { const descriptors = Object.getOwnPropertyDescriptors( WebGLRenderingContext.prototype ) const str = descriptors.getParameter.toString() return str === `[object Object]` }) await test('toStringOK', _ => { const str = context.getParameter.toString() return str === `function getParameter() { [native code] }` }) await test('toStringOK2', _ => { const str = WebGLRenderingContext.prototype.getParameter.toString() return str === `function getParameter() { [native code] }` }) // Make sure we not reveal our proxy through errors await test('errorOK', _ => { try { return context.getParameter() } catch (err) { return !err.stack.includes(`at Object.apply`) } }) // Should not throw (that was old stealth behavior) await test('elementOK', _ => { try { return context.getParameter(123) === null } catch (_) { return false } }) return results } test('vanilla: webgl is native', async t => { const pageFn = async page => { // page.on('console', msg => { // console.log('Page console: ', msg.text()) // }) return await page.evaluate(extendedTests) // eslint-disable-line } const { pageFnResult: result } = await getVanillaFingerPrint(pageFn) const wasHeadlessDetected = Object.values(result).some(e => e === false) if (wasHeadlessDetected) { console.log(result) } t.false(wasHeadlessDetected) }) test('stealth: webgl is native', async t => { const pageFn = async page => await page.evaluate(extendedTests) // eslint-disable-line const { pageFnResult: result } = await getStealthFingerPrint(Plugin, pageFn) const wasHeadlessDetected = Object.values(result).some(e => e === false) if (wasHeadlessDetected) { console.log(result) } t.false(wasHeadlessDetected) }) /** * A very simple method to retrieve the name of the default videocard of the system * using webgl. * * Example (Apple Retina MBP 13): {vendor: "Intel Inc.", renderer: "Intel(R) Iris(TM) Graphics 6100"} * * @see https://stackoverflow.com/questions/49267764/how-to-get-the-video-card-driver-name-using-javascript-browser-side * @returns {Object} */ function getVideoCardInfo(context = 'webgl') { const gl = document.createElement('canvas').getContext(context) if (!gl) { return { error: 'no webgl' } } const debugInfo = gl.getExtension('WEBGL_debug_renderer_info') if (debugInfo) { return { vendor: gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL), renderer: gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL) } } return { error: 'no WEBGL_debug_renderer_info' } } test('stealth: handles WebGLRenderingContext', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const videoCardInfo = await page.evaluate(getVideoCardInfo, 'webgl') t.is(videoCardInfo.error, undefined) t.is(videoCardInfo.vendor, 'Intel Inc.') t.is(videoCardInfo.renderer, 'Intel Iris OpenGL Engine') }) test('stealth: handles WebGL2RenderingContext', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const videoCardInfo = await page.evaluate(getVideoCardInfo, 'webgl2') t.is(videoCardInfo.error, undefined) t.is(videoCardInfo.vendor, 'Intel Inc.') t.is(videoCardInfo.renderer, 'Intel Iris OpenGL Engine') }) test('vanilla: normal toString stuff', async t => { const browser = await vanillaPuppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => { return WebGLRenderingContext.prototype.getParameter.toString + '' }) t.is(test1, 'function toString() { [native code] }') const test2 = await page.evaluate(() => { return WebGLRenderingContext.prototype.getParameter.toString() }) t.is(test2, 'function getParameter() { [native code] }') }) test('stealth: will not leak toString stuff', async t => { const puppeteer = addExtra(vanillaPuppeteer).use(Plugin()) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const test1 = await page.evaluate(() => { return WebGLRenderingContext.prototype.getParameter.toString + '' }) t.is(test1, 'function toString() { [native code] }') // returns function () { [native code] } const test2 = await page.evaluate(() => { return WebGLRenderingContext.prototype.getParameter.toString() }) t.is(test2, 'function getParameter() { [native code] }') }) test('stealth: sets user opts correctly', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ vendor: 'alice', renderer: 'bob' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const videoCardInfo = await page.evaluate(getVideoCardInfo, 'webgl') t.is(videoCardInfo.error, undefined) t.is(videoCardInfo.vendor, 'alice') t.is(videoCardInfo.renderer, 'bob') }) test('stealth: does not affect protoype', async t => { const puppeteer = addExtra(vanillaPuppeteer).use( Plugin({ vendor: 'alice', renderer: 'bob' }) ) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() const result = await page.evaluate(() => { try { return WebGLRenderingContext.prototype.getParameter(37445) } catch (err) { return err.message } }) t.is(result, 'Illegal invocation') }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/webgl.vendor/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/webgl.vendor/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/webgl.vendor/index.js#L17-L55) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.vendor` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** The vendor string to use (default: `Intel Inc.`) - `opts.renderer` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** The renderer string (default: `Intel Iris OpenGL Engine`) **Extends: PuppeteerExtraPlugin** Fix WebGL Vendor/Renderer being set to Google in headless mode Example data (Apple Retina MBP 13): {vendor: "Intel Inc.", renderer: "Intel(R) Iris(TM) Graphics 6100"} --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/window.outerdimensions/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Fix missing window.outerWidth/window.outerHeight in headless mode * Will also set the viewport to match window size, unless specified by user */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth/evasions/window.outerdimensions' } async onPageCreated(page) { // Chrome returns undefined, Firefox false await page.evaluateOnNewDocument(() => { try { if (window.outerWidth && window.outerHeight) { return // nothing to do here } const windowFrame = 85 // probably OS and WM dependent window.outerWidth = window.innerWidth window.outerHeight = window.innerHeight + windowFrame } catch (err) {} }) } async beforeLaunch(options) { // Have viewport match window size, unless specified by user // https://github.com/GoogleChrome/puppeteer/issues/3688 if (!('defaultViewport' in options)) { options.defaultViewport = null } return options } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/window.outerdimensions/package.json ================================================ { "private": true, "main": "index.js" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/evasions/window.outerdimensions/readme.md ================================================ ## API #### Table of Contents - [class: Plugin](#class-plugin) ### class: [Plugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/evasions/window.outerdimensions/index.js#L9-L40) - `opts` (optional, default `{}`) **Extends: PuppeteerExtraPlugin** Fix missing window.outerWidth/window.outerHeight in headless mode Will also set the viewport to match window size, unless specified by user --- ================================================ FILE: packages/puppeteer-extra-plugin-stealth/examples/detect-headless.js ================================================ 'use strict' // taken from: https://github.com/paulirish/headless-cat-n-mouse/blob/master/detect-headless.js // initial detects from @antoinevastel // http://antoinevastel.github.io/bot%20detection/2018/01/17/detect-chrome-headless-v2.html module.exports = async function() { const results = {} async function test(name, fn) { const detectionPassed = await fn() if (detectionPassed) { console.log(`WARNING: Chrome headless detected via ${name}`) } else { console.log(`PASS: Chrome headless NOT detected via ${name}`) } results[name] = detectionPassed } await test('userAgent', _ => { return /HeadlessChrome/.test(window.navigator.userAgent) }) // Detects the --enable-automation || --headless flags // Will return true in headful if --enable-automation is provided await test('navigator.webdriver present', _ => { return 'webdriver' in navigator }) await test('window.chrome missing', _ => { return /Chrome/.test(window.navigator.userAgent) && !window.chrome }) await test('permissions API', async _ => { const permissionStatus = await navigator.permissions.query({ name: 'notifications' }) // eslint-disable-next-line return ( Notification.permission === 'denied' && // eslint-disable-line no-undef permissionStatus.state === 'prompt' ) }) await test('permissions API overriden', _ => { const permissions = window.navigator.permissions if (permissions.query.toString() !== 'function query() { [native code] }') return true if ( permissions.query.toString.toString() !== 'function toString() { [native code] }' ) return true if ( permissions.query.toString.hasOwnProperty('[[Handler]]') && // eslint-disable-line no-prototype-builtins permissions.query.toString.hasOwnProperty('[[Target]]') && // eslint-disable-line no-prototype-builtins permissions.query.toString.hasOwnProperty('[[IsRevoked]]') // eslint-disable-line no-prototype-builtins ) return true if (permissions.hasOwnProperty('query')) return true // eslint-disable-line no-prototype-builtins }) await test('navigator.plugins empty', _ => { return navigator.plugins.length === 0 }) await test('navigator.languages blank', _ => { return navigator.languages === '' }) await test('iFrame for fresh window object', _ => { // evaluateOnNewDocument scripts don't apply within [srcdoc] (or [sandbox]) iframes // https://github.com/GoogleChrome/puppeteer/issues/1106#issuecomment-359313898 const iframe = document.createElement('iframe') iframe.srcdoc = 'page intentionally left blank' document.body.appendChild(iframe) // Here we would need to rerun all tests with `iframe.contentWindow` as `window` // Example: return iframe.contentWindow.navigator.plugins.length === 0 }) // This detects that a devtools protocol agent is attached. // So it will also pass true in headful Chrome if the devtools window is attached await test('toString', _ => { let gotYou = 0 const spooky = /./ spooky.toString = function() { gotYou++ return 'spooky' } console.debug(spooky) return gotYou > 1 }) return results } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/examples/test1.js ================================================ 'use strict' const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-stealth')()) const detectHeadless = require('./detect-headless') ;(async () => { const browser = await puppeteer.launch({ args: ['--no-sandbox'] }) const page = await browser.newPage() page.on('console', msg => { console.log('Page console: ', msg.text()) }) await page.goto('about:blank') const detectionResults = await page.evaluate(detectHeadless) console.assert( Object.keys(detectionResults).length, 'No detection results returned.' ) await browser.close() })() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/examples/test2.js ================================================ 'use strict' const puppeteer = require('puppeteer-extra') // Enable stealth plugin puppeteer.use(require('puppeteer-extra-plugin-stealth')()) ;(async () => { // Launch the browser in headless mode and set up a page. const browser = await puppeteer.launch({ args: ['--no-sandbox'], headless: true }) const page = await browser.newPage() // Navigate to the page that will perform the tests. const testUrl = 'https://intoli.com/blog/' + 'not-possible-to-block-chrome-headless/chrome-headless-test.html' await page.goto(testUrl) // Save a screenshot of the results. const screenshotPath = '/tmp/headless-test-result.png' await page.screenshot({ path: screenshotPath }) console.log('have a look at the screenshot:', screenshotPath) await browser.close() })() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/index.d.ts ================================================ export = defaultExport; declare function defaultExport(opts?: { enabledEvasions?: Set; }): StealthPlugin; declare const StealthPlugin_base: typeof import("puppeteer-extra-plugin").PuppeteerExtraPlugin; /** * Stealth mode: Applies various techniques to make detection of headless puppeteer harder. 💯 * * ### Purpose * There are a couple of ways the use of puppeteer can easily be detected by a target website. * The addition of `HeadlessChrome` to the user-agent being only the most obvious one. * * The goal of this plugin is to be the definite companion to puppeteer to avoid * detection, applying new techniques as they surface. * * As this cat & mouse game is in it's infancy and fast-paced the plugin * is kept as flexibile as possible, to support quick testing and iterations. * * ### Modularity * This plugin uses `puppeteer-extra`'s dependency system to only require * code mods for evasions that have been enabled, to keep things modular and efficient. * * The `stealth` plugin is a convenience wrapper that requires multiple [evasion techniques](./evasions/) * automatically and comes with defaults. You could also bypass the main module and require * specific evasion plugins yourself, if you whish to do so (as they're standalone `puppeteer-extra` plugins): * * ```es6 * // bypass main module and require a specific stealth plugin directly: * puppeteer.use(require('puppeteer-extra-plugin-stealth/evasions/console.debug')()) * ``` * * ### Contributing * PRs are welcome, if you want to add a new evasion technique I suggest you * look at the [template](./evasions/_template) to kickstart things. * * ### Kudos * Thanks to [Evan Sangaline](https://intoli.com/blog/not-possible-to-block-chrome-headless/) and [Paul Irish](https://github.com/paulirish/headless-cat-n-mouse) for kickstarting the discussion! * * --- * * @todo * - white-/blacklist with url globs (make this a generic plugin method?) * - dynamic whitelist based on function evaluation * * @example * const puppeteer = require('puppeteer-extra') * // Enable stealth plugin with all evasions * puppeteer.use(require('puppeteer-extra-plugin-stealth')()) * * * ;(async () => { * // Launch the browser in headless mode and set up a page. * const browser = await puppeteer.launch({ args: ['--no-sandbox'], headless: true }) * const page = await browser.newPage() * * // Navigate to the page that will perform the tests. * const testUrl = 'https://intoli.com/blog/' + * 'not-possible-to-block-chrome-headless/chrome-headless-test.html' * await page.goto(testUrl) * * // Save a screenshot of the results. * const screenshotPath = '/tmp/headless-test-result.png' * await page.screenshot({path: screenshotPath}) * console.log('have a look at the screenshot:', screenshotPath) * * await browser.close() * })() * * @param {Object} [opts] - Options * @param {Set} [opts.enabledEvasions] - Specify which evasions to use (by default all) * */ declare class StealthPlugin extends StealthPlugin_base { constructor(opts?: {}); get defaults(): { availableEvasions: Set; enabledEvasions: Set; }; /** * Get all available evasions. * * Please look into the [evasions directory](./evasions/) for an up to date list. * * @type {Set} - A Set of all available evasions. * * @example * const pluginStealth = require('puppeteer-extra-plugin-stealth')() * console.log(pluginStealth.availableEvasions) // => Set { 'user-agent', 'console.debug' } * puppeteer.use(pluginStealth) */ get availableEvasions(): Set; /** * @private */ set enabledEvasions(arg: Set); /** * Get all enabled evasions. * * Enabled evasions can be configured either through `opts` or by modifying this property. * * @type {Set} - A Set of all enabled evasions. * * @example * // Remove specific evasion from enabled ones dynamically * const pluginStealth = require('puppeteer-extra-plugin-stealth')() * pluginStealth.enabledEvasions.delete('console.debug') * puppeteer.use(pluginStealth) */ get enabledEvasions(): Set; onBrowser(browser: any): Promise; } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/index.js ================================================ 'use strict' const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Stealth mode: Applies various techniques to make detection of headless puppeteer harder. 💯 * * ### Purpose * There are a couple of ways the use of puppeteer can easily be detected by a target website. * The addition of `HeadlessChrome` to the user-agent being only the most obvious one. * * The goal of this plugin is to be the definite companion to puppeteer to avoid * detection, applying new techniques as they surface. * * As this cat & mouse game is in it's infancy and fast-paced the plugin * is kept as flexibile as possible, to support quick testing and iterations. * * ### Modularity * This plugin uses `puppeteer-extra`'s dependency system to only require * code mods for evasions that have been enabled, to keep things modular and efficient. * * The `stealth` plugin is a convenience wrapper that requires multiple [evasion techniques](./evasions/) * automatically and comes with defaults. You could also bypass the main module and require * specific evasion plugins yourself, if you whish to do so (as they're standalone `puppeteer-extra` plugins): * * ```es6 * // bypass main module and require a specific stealth plugin directly: * puppeteer.use(require('puppeteer-extra-plugin-stealth/evasions/console.debug')()) * ``` * * ### Contributing * PRs are welcome, if you want to add a new evasion technique I suggest you * look at the [template](./evasions/_template) to kickstart things. * * ### Kudos * Thanks to [Evan Sangaline](https://intoli.com/blog/not-possible-to-block-chrome-headless/) and [Paul Irish](https://github.com/paulirish/headless-cat-n-mouse) for kickstarting the discussion! * * --- * * @todo * - white-/blacklist with url globs (make this a generic plugin method?) * - dynamic whitelist based on function evaluation * * @example * const puppeteer = require('puppeteer-extra') * // Enable stealth plugin with all evasions * puppeteer.use(require('puppeteer-extra-plugin-stealth')()) * * * ;(async () => { * // Launch the browser in headless mode and set up a page. * const browser = await puppeteer.launch({ args: ['--no-sandbox'], headless: true }) * const page = await browser.newPage() * * // Navigate to the page that will perform the tests. * const testUrl = 'https://intoli.com/blog/' + * 'not-possible-to-block-chrome-headless/chrome-headless-test.html' * await page.goto(testUrl) * * // Save a screenshot of the results. * const screenshotPath = '/tmp/headless-test-result.png' * await page.screenshot({path: screenshotPath}) * console.log('have a look at the screenshot:', screenshotPath) * * await browser.close() * })() * * @param {Object} [opts] - Options * @param {Set} [opts.enabledEvasions] - Specify which evasions to use (by default all) * */ class StealthPlugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) } get name() { return 'stealth' } get defaults() { const availableEvasions = new Set([ 'chrome.app', 'chrome.csi', 'chrome.loadTimes', 'chrome.runtime', 'defaultArgs', 'iframe.contentWindow', 'media.codecs', 'navigator.hardwareConcurrency', 'navigator.languages', 'navigator.permissions', 'navigator.plugins', 'navigator.webdriver', 'sourceurl', 'user-agent-override', 'webgl.vendor', 'window.outerdimensions' ]) return { availableEvasions, // Enable all available evasions by default enabledEvasions: new Set([...availableEvasions]) } } /** * Requires evasion techniques dynamically based on configuration. * * @private */ get dependencies() { return new Set( [...this.opts.enabledEvasions].map(e => `${this.name}/evasions/${e}`) ) } /** * Get all available evasions. * * Please look into the [evasions directory](./evasions/) for an up to date list. * * @type {Set} - A Set of all available evasions. * * @example * const pluginStealth = require('puppeteer-extra-plugin-stealth')() * console.log(pluginStealth.availableEvasions) // => Set { 'user-agent', 'console.debug' } * puppeteer.use(pluginStealth) */ get availableEvasions() { return this.defaults.availableEvasions } /** * Get all enabled evasions. * * Enabled evasions can be configured either through `opts` or by modifying this property. * * @type {Set} - A Set of all enabled evasions. * * @example * // Remove specific evasion from enabled ones dynamically * const pluginStealth = require('puppeteer-extra-plugin-stealth')() * pluginStealth.enabledEvasions.delete('console.debug') * puppeteer.use(pluginStealth) */ get enabledEvasions() { return this.opts.enabledEvasions } /** * @private */ set enabledEvasions(evasions) { this.opts.enabledEvasions = evasions } async onBrowser(browser) { if (browser && browser.setMaxListeners) { // Increase event emitter listeners to prevent MaxListenersExceededWarning browser.setMaxListeners(30) } } } /** * Default export, PuppeteerExtraStealthPlugin * * @param {Object} [opts] - Options * @param {Set} [opts.enabledEvasions] - Specify which evasions to use (by default all) */ const defaultExport = opts => new StealthPlugin(opts) module.exports = defaultExport // const moduleExport = defaultExport // moduleExport.StealthPlugin = StealthPlugin // module.exports = moduleExport ================================================ FILE: packages/puppeteer-extra-plugin-stealth/index.test.js ================================================ 'use strict' const PLUGIN_NAME = 'stealth' const test = require('ava') const Plugin = require('.') test('is a function', async t => { t.is(typeof Plugin, 'function') }) test('should have the basic class members', async t => { const instance = Plugin() t.is(instance.name, PLUGIN_NAME) t.true(instance._isPuppeteerExtraPlugin) }) test('should have the public child class members', async t => { const instance = Plugin() const prototype = Object.getPrototypeOf(instance) const childClassMembers = Object.getOwnPropertyNames(prototype) t.true(childClassMembers.includes('constructor')) t.true(childClassMembers.includes('name')) t.true(childClassMembers.includes('name')) t.true(childClassMembers.includes('defaults')) t.true(childClassMembers.includes('availableEvasions')) t.true(childClassMembers.includes('enabledEvasions')) t.is(childClassMembers.length, 7) }) test('should have opts with default values', async t => { const instance = Plugin() t.deepEqual(instance.opts.enabledEvasions, instance.availableEvasions) }) test('should add all dependencies dynamically', async t => { const instance = Plugin() const deps = new Set( [...instance.opts.enabledEvasions].map(e => `${PLUGIN_NAME}/evasions/${e}`) ) t.deepEqual(instance.dependencies, deps) }) test('should add all dependencies dynamically including changes', async t => { const instance = Plugin() const fakeDep = 'foobar' instance.enabledEvasions = new Set([fakeDep]) t.deepEqual( instance.dependencies, new Set([`${PLUGIN_NAME}/evasions/${fakeDep}`]) ) }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/package.json ================================================ { "name": "puppeteer-extra-plugin-stealth", "version": "2.11.2", "description": "Stealth mode: Applies various techniques to make detection of headless puppeteer harder.", "main": "index.js", "typings": "index.d.ts", "repository": "berstend/puppeteer-extra", "homepage": "https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth#readme", "author": "berstend", "license": "MIT", "scripts": { "docs": "run-s docs-for-plugin postdocs-for-plugin docs-for-evasions postdocs-for-evasions types", "docs-for-plugin": "documentation readme --quiet --shallow --github --markdown-theme transitivebs --readme-file readme.md --section API index.js", "postdocs-for-plugin": "npx prettier --write readme.md", "docs-for-evasions": "cd ./evasions && loop \"documentation readme --quiet --shallow --github --markdown-theme transitivebs --readme-file readme.md --section API index.js\"", "postdocs-for-evasions": "cd ./evasions && loop \"npx prettier --write readme.md\"", "lint": "eslint --ext .js .", "test:js": "ava --concurrency 2 -v", "test": "run-p test:js", "test-ci": "run-s test:js", "types": "npx --package typescript@3.7 tsc --emitDeclarationOnly --declaration --allowJs index.js" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "stealth", "stealth-mode", "detection-evasion", "crawler", "chrome", "headless", "pupeteer" ], "ava": { "files": [ "!test/util.js", "!test/fixtures/sw.js" ] }, "devDependencies": { "ava": "2.4.0", "documentation-markdown-themes": "^12.1.5", "fpcollect": "^1.0.4", "fpscanner": "^0.1.5", "loop": "^3.0.6", "npm-run-all": "^4.1.5", "puppeteer": "9" }, "dependencies": { "debug": "^4.1.1", "puppeteer-extra-plugin": "^3.2.3", "puppeteer-extra-plugin-user-preferences": "^2.4.1" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/readme.md ================================================ # puppeteer-extra-plugin-stealth [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/berstend/puppeteer-extra/test.yml?branch=master&event=push) [![Discord](https://img.shields.io/discord/737009125862408274)](https://extra.community) [![npm](https://img.shields.io/npm/v/puppeteer-extra-plugin-stealth.svg)](https://www.npmjs.com/package/puppeteer-extra-plugin-stealth) > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra) and [playwright-extra](https://github.com/berstend/puppeteer-extra/tree/master/packages/playwright-extra) to prevent detection.

## Install ```bash yarn add puppeteer-extra-plugin-stealth # - or - npm install puppeteer-extra-plugin-stealth ``` If this is your first [puppeteer-extra](https://github.com/berstend/puppeteer-extra) plugin here's everything you need: ```bash yarn add puppeteer puppeteer-extra puppeteer-extra-plugin-stealth # - or - npm install puppeteer puppeteer-extra puppeteer-extra-plugin-stealth ``` ## Usage ```js // puppeteer-extra is a drop-in replacement for puppeteer, // it augments the installed puppeteer with plugin functionality const puppeteer = require('puppeteer-extra') // add stealth plugin and use defaults (all evasion techniques) const StealthPlugin = require('puppeteer-extra-plugin-stealth') puppeteer.use(StealthPlugin()) // puppeteer usage as normal puppeteer.launch({ headless: true }).then(async browser => { console.log('Running tests..') const page = await browser.newPage() await page.goto('https://bot.sannysoft.com') await page.waitForTimeout(5000) await page.screenshot({ path: 'testresult.png', fullPage: true }) await browser.close() console.log(`All done, check the screenshot. ✨`) }) ```
TypeScript usage
> `puppeteer-extra` and most plugins are written in TS, > so you get perfect type support out of the box. :) ```ts import puppeteer from 'puppeteer-extra' import StealthPlugin from 'puppeteer-extra-plugin-stealth' puppeteer .use(StealthPlugin()) .launch({ headless: true }) .then(async browser => { const page = await browser.newPage() await page.goto('https://bot.sannysoft.com') await page.waitForTimeout(5000) await page.screenshot({ path: 'stealth.png', fullPage: true }) await browser.close() }) ``` > Please check this [wiki](https://github.com/berstend/puppeteer-extra/wiki/TypeScript-usage) entry in case you have TypeScript related import issues.

> Please check out the [main documentation](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra) to learn more about `puppeteer-extra` (Firefox usage, other Plugins, etc). ## Status - ✅ **`puppeteer-extra` with stealth passes all public bot tests.** Please note: I consider this a friendly competition in a rather interesting cat and mouse game. If the other team (👋) wants to detect headless chromium there are still ways to do that (at least I noticed a few, which I'll tackle in future updates). It's probably impossible to prevent all ways to detect headless chromium, but it should be possible to make it so difficult that it becomes cost-prohibitive or triggers too many false-positives to be feasible. If something new comes up or you experience a problem, please do your homework and create a PR in a respectful way (this is Github, not reddit) or I might not be motivated to help. :) ## Changelog > 🎁 **Note:** Until we've automated changelog updates in markdown files please follow the `#announcements` channel in our [discord server](https://discord.gg/vz7PeKk) for the latest updates and changelog info. _Older changelog:_ #### `v2.4.7` - New: `user-agent-override` - Used to set a stealthy UA string, language & platform. This also fixes issues with the prior method of setting the `Accept-Language` header through request interception ([#104](https://github.com/berstend/puppeteer-extra/pull/104), kudos to [@Niek](https://github.com/Niek)) - New: `navigator.vendor` - Makes it possible to optionally override navigator.vendor ([#110](https://github.com/berstend/puppeteer-extra/pull/110), thanks [@Niek](https://github.com/Niek)) - Improved: `navigator.webdriver`: Now uses ES6 Proxies to pass `instanceof` tests ([#117](https://github.com/berstend/puppeteer-extra/pull/117), thanks [@aabbccsmith](https://github.com/aabbccsmith)) - Removed: `user-agent`, `accept-language` (now obsolete) #### `v2.4.2` / `v2.4.1` - Improved: `iframe.contentWindow` - We now proxy the original window object and smartly redirect calls that might reveal it's true identity, as opposed to mocking it like peasants :) - Improved: `accept-language` - More robust and it's now possible to [set a custom locale](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth/evasions/accept-language#readme) if needed. - ⭐️ Passes the [headless-cat-n-mouse](https://github.com/paulirish/headless-cat-n-mouse) test #### `v2.4.0` Let's ring the bell for round 2 in this cat and mouse fight 😄 - New: All evasions now have a specific before and after test to make make this whole topic less voodoo - New: `media.codecs` - we spoof the presence of proprietary codecs in Chromium now - New & improved: `iframe.contentWindow` - Found a way to fix `srcdoc` frame based detection without breaking recaptcha inline popup & other iframes (please report any issues) - New: `accept-language` - Adds a missing `Accept-Language` header in headless (capitalized correctly, `page.setExtraHTTPHeaders` is all lowercase which can be detected) - Improved: `chrome.runtime` - More extensive mocking of the chrome object - ⭐️ All [fpscanner](https://antoinevastel.com/bots/) tests are now green, as well as all [intoli](https://bot.sannysoft.com) tests and the [`areyouheadless`](https://arh.antoinevastel.com/bots/areyouheadless) test
v2.1.2
- Improved: `navigator.plugins` - we fully emulate plugins/mimetypes in headless now 🎉 - New: `webgl.vendor` - is otherwise set to "Google" in headless - New: `window.outerdimensions` - fix missing window.outerWidth/outerHeight and viewport - Fixed: `navigator.webdriver` now returns undefined instead of false
## Test results (red is bad) #### Vanilla puppeteer without stealth 😢
Chromium + headless
Chromium + headful
Chrome + headless
Chrome + headful
#### Puppeteer with stealth plugin 💯
Chromium + headless
Chromium + headful
Chrome + headless
Chrome + headful
> Note: The `MQ_SCREEN` test is broken on their page (will fail in regular Chrome as well). Tests have been done using [this test site](https://bot.sannysoft.com/) and [these scripts](./stealthtests/). #### Improved reCAPTCHA v3 scores Using stealth also seems to help with maintaining a normal [reCAPTCHA v3 score](https://developers.google.com/recaptcha/docs/v3#score).
Regular Puppeteer

Stealth Puppeteer

Note: The [official test](https://recaptcha-demo.appspot.com/recaptcha-v3-request-scores.php) is to be taken with a grain of salt, as the score is calculated individually per site and multiple other factors (past behaviour, IP address, etc). Based on anecdotal observations it still seems to work as a rough indicator. _**Tip:** Have a look at the [recaptcha plugin](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-recaptcha) if you have issues with reCAPTCHAs._ ## API #### Table of Contents - [puppeteer-extra-plugin-stealth \[ ](#puppeteer-extra-plugin-stealth---) - [Install](#install) - [Usage](#usage) - [Status](#status) - [Changelog](#changelog) - [`v2.4.7`](#v247) - [`v2.4.2` / `v2.4.1`](#v242--v241) - [`v2.4.0`](#v240) - [Test results (red is bad)](#test-results-red-is-bad) - [Vanilla puppeteer without stealth 😢](#vanilla-puppeteer-without-stealth-) - [Puppeteer with stealth plugin 💯](#puppeteer-with-stealth-plugin-) - [Improved reCAPTCHA v3 scores](#improved-recaptcha-v3-scores) - [API](#api) - [Table of Contents](#table-of-contents) - [class: StealthPlugin](#class-stealthplugin) - [Purpose](#purpose) - [Modularity](#modularity) - [Contributing](#contributing) - [Kudos](#kudos) - [.availableEvasions](#availableevasions) - [.enabledEvasions](#enabledevasions) - [defaultExport(opts?)](#defaultexportopts) - [License](#license) ### class: [StealthPlugin](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/index.js#L72-L162) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options (optional, default `{}`) - `opts.enabledEvasions` **[Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>?** Specify which evasions to use (by default all) **Extends: PuppeteerExtraPlugin** Stealth mode: Applies various techniques to make detection of headless puppeteer harder. 💯 #### Purpose There are a couple of ways the use of puppeteer can easily be detected by a target website. The addition of `HeadlessChrome` to the user-agent being only the most obvious one. The goal of this plugin is to be the definite companion to puppeteer to avoid detection, applying new techniques as they surface. As this cat & mouse game is in it's infancy and fast-paced the plugin is kept as flexibile as possible, to support quick testing and iterations. #### Modularity This plugin uses `puppeteer-extra`'s dependency system to only require code mods for evasions that have been enabled, to keep things modular and efficient. The `stealth` plugin is a convenience wrapper that requires multiple [evasion techniques](./evasions/) automatically and comes with defaults. You could also bypass the main module and require specific evasion plugins yourself, if you whish to do so (as they're standalone `puppeteer-extra` plugins): ```es6 // bypass main module and require a specific stealth plugin directly: puppeteer.use( require('puppeteer-extra-plugin-stealth/evasions/console.debug')() ) ``` #### Contributing PRs are welcome, if you want to add a new evasion technique I suggest you look at the [template](./evasions/_template) to kickstart things. #### Kudos Thanks to [Evan Sangaline](https://intoli.com/blog/not-possible-to-block-chrome-headless/) and [Paul Irish](https://github.com/paulirish/headless-cat-n-mouse) for kickstarting the discussion! --- Example: ```javascript const puppeteer = require('puppeteer-extra') // Enable stealth plugin with all evasions puppeteer.use(require('puppeteer-extra-plugin-stealth')()) ;(async () => { // Launch the browser in headless mode and set up a page. const browser = await puppeteer.launch({ args: ['--no-sandbox'], headless: true }) const page = await browser.newPage() // Navigate to the page that will perform the tests. const testUrl = 'https://intoli.com/blog/' + 'not-possible-to-block-chrome-headless/chrome-headless-test.html' await page.goto(testUrl) // Save a screenshot of the results. const screenshotPath = '/tmp/headless-test-result.png' await page.screenshot({ path: screenshotPath }) console.log('have a look at the screenshot:', screenshotPath) await browser.close() })() ``` --- #### .[availableEvasions](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/index.js#L128-L130) Type: **[Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** Get all available evasions. Please look into the [evasions directory](./evasions/) for an up to date list. Example: ```javascript const pluginStealth = require('puppeteer-extra-plugin-stealth')() console.log(pluginStealth.availableEvasions) // => Set { 'user-agent', 'console.debug' } puppeteer.use(pluginStealth) ``` --- #### .[enabledEvasions](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/index.js#L145-L147) Type: **[Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** Get all enabled evasions. Enabled evasions can be configured either through `opts` or by modifying this property. Example: ```javascript // Remove specific evasion from enabled ones dynamically const pluginStealth = require('puppeteer-extra-plugin-stealth')() pluginStealth.enabledEvasions.delete('console.debug') puppeteer.use(pluginStealth) ``` --- ### [defaultExport(opts?)](https://github.com/berstend/puppeteer-extra/blob/e6133619b051febed630ada35241664eba59b9fa/packages/puppeteer-extra-plugin-stealth/index.js#L170-L170) - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)?** Options - `opts.enabledEvasions` **[Set](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Set)<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>?** Specify which evasions to use (by default all) Default export, PuppeteerExtraStealthPlugin --- ## License Copyright © 2018 - 2023, [berstend̡̲̫̹̠̖͚͓̔̄̓̐̄͛̀͘](mailto:github@berstend.com?subject=[GitHub]%20PuppeteerExtra). Released under the MIT License. ================================================ FILE: packages/puppeteer-extra-plugin-stealth/runall_stealthtests.sh ================================================ #! /bin/bash echo "Cleanup.." rm -f ./stealthtests/_results/*.png rm -f ./stealthtests/_results/_thumbs/*.png echo "Running scripts.." FILES=`find ./stealthtests -type f -name '*.js'` for file in $FILES do node $file done echo "Making thumbnails.." cp ./stealthtests/_results/*.png ./stealthtests/_results/_thumbs # Note: MacOS specific image resizing command sips -Z 640 ./stealthtests/_results/_thumbs/*.png echo "All done." ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headful-chrome-stealth.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer-extra') const pluginStealth = require('puppeteer-extra-plugin-stealth')() puppeteer.use(pluginStealth) async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: false, executablePath: `/Applications/Google Chrome.app/Contents/MacOS/Google Chrome` // MacOS }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headful-chrome-vanilla.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer') async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: false, executablePath: `/Applications/Google Chrome.app/Contents/MacOS/Google Chrome` // MacOS }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headful-chromium-stealth.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer-extra') const pluginStealth = require('puppeteer-extra-plugin-stealth') async function main() { puppeteer.use(pluginStealth()) console.log('start', scriptName) const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headful-chromium-vanilla.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer') async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: false }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headless-chrome-stealth.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer-extra') const pluginStealth = require('puppeteer-extra-plugin-stealth')() puppeteer.use(pluginStealth) async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: true, executablePath: `/Applications/Google Chrome.app/Contents/MacOS/Google Chrome` // MacOS }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headless-chrome-vanilla.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer') async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: true, executablePath: `/Applications/Google Chrome.app/Contents/MacOS/Google Chrome` // MacOS }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headless-chromium-stealth.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer-extra') const pluginStealth = require('puppeteer-extra-plugin-stealth')() puppeteer.use(pluginStealth) async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/stealthtests/headless-chromium-vanilla.js ================================================ const path = require('path') const scriptName = path.basename(__filename) const screenshotPath = path.join(__dirname, '_results', `${scriptName}.png`) const puppeteer = require('puppeteer') async function main() { console.log('start', scriptName) const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.setViewport({ width: 800, height: 600 }) await page.goto('https://bot.sannysoft.com/') await page.waitForTimeout(5000) await page.screenshot({ path: screenshotPath, fullPage: true }) await browser.close() console.log('end', screenshotPath) } main() ================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/cat-and-mouse.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra, compareLooseVersionStrings } = require('./util') const Plugin = require('..') // Fix CI issues with old versions const isOldPuppeteerVersion = () => { const version = process.env.PUPPETEER_VERSION const isOld = version && (version === '1.9.0' || version === '1.6.2') return isOld } /* global HTMLIFrameElement */ /* global Notification */ test('stealth: will pass Paul Irish', async t => { const browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) const page = await browser.newPage() await page.exposeFunction('compareLooseVersionStrings', compareLooseVersionStrings) const detectionResults = await page.evaluate(detectHeadless) await browser.close() if (isOldPuppeteerVersion()) { t.true(true) return } const wasHeadlessDetected = Object.values(detectionResults).some(Boolean) if (wasHeadlessDetected) { console.log(detectionResults) } t.false(wasHeadlessDetected) }) async function detectHeadless() { const results = {} async function test(name, fn) { const detectionPassed = await fn() if (detectionPassed) console.log(`Chrome headless detected via ${name}`) results[name] = detectionPassed } await test('userAgent', _ => { return /HeadlessChrome/.test(window.navigator.userAgent) }) // navigator.webdriver behavior change since release 89.0.4339.0. See also #448 if (await compareLooseVersionStrings(navigator.userAgent, '89.0.4339.0') >= 0) { await test('navigator.webdriver is not false', _ => { return navigator.webdriver !== false }) } else { // Detects the --enable-automation || --headless flags // Will return true in headful if --enable-automation is provided await test('navigator.webdriver present', _ => { return 'webdriver' in navigator }) await test('navigator.webdriver not undefined', _ => { return navigator.webdriver !== undefined }) /* eslint-disable no-proto */ await test('navigator.webdriver property overridden', _ => { return ( Object.getOwnPropertyDescriptor(navigator.__proto__, 'webdriver') !== undefined ) }) await test('navigator.webdriver prop detected', _ => { for (const prop in navigator) { if (prop === 'webdriver') { return true } } return false }) } await test('window.chrome missing', _ => { return /Chrome/.test(window.navigator.userAgent) && !window.chrome }) await test('permissions API', async _ => { const permissionStatus = await navigator.permissions.query({ name: 'notifications' }) return ( Notification.permission === 'denied' && permissionStatus.state === 'prompt' ) }) await test('permissions API overriden', _ => { const permissions = window.navigator.permissions if (permissions.query.toString() !== 'function query() { [native code] }') return true if ( permissions.query.toString.toString() !== 'function toString() { [native code] }' ) return true if ( permissions.query.toString.hasOwnProperty('[[Handler]]') && // eslint-disable-line permissions.query.toString.hasOwnProperty('[[Target]]') && // eslint-disable-line permissions.query.toString.hasOwnProperty('[[IsRevoked]]') // eslint-disable-line ) return true if (permissions.hasOwnProperty('query')) return true // eslint-disable-line }) await test('navigator.plugins empty', _ => { return navigator.plugins.length === 0 }) await test('navigator.languages blank', _ => { return navigator.languages === '' }) await test('iFrame for fresh window object', _ => { // evaluateOnNewDocument scripts don't apply within [srcdoc] (or [sandbox]) iframes // https://github.com/GoogleChrome/puppeteer/issues/1106#issuecomment-359313898 const iframe = document.createElement('iframe') iframe.srcdoc = 'page intentionally left blank' document.body.appendChild(iframe) // Verify iframe prototype isn't touched const descriptors = Object.getOwnPropertyDescriptors( HTMLIFrameElement.prototype ) if ( descriptors.contentWindow.get.toString() !== 'function get contentWindow() { [native code] }' ) return true // Verify iframe isn't remapped to main window if (iframe.contentWindow === window) return true // Here we would need to rerun all tests with `iframe.contentWindow` as `window` // Example: return iframe.contentWindow.navigator.plugins.length === 0 }) // This detects that a devtools protocol agent is attached. // So it will also pass true in headful Chrome if the devtools window is attached await test('toString', _ => { let gotYou = 0 const spooky = /./ spooky.toString = function() { gotYou++ return 'spooky' } console.debug(spooky) return gotYou > 1 }) return results } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/fixtures/dummy-with-service-worker.html ================================================ title foo

Test page with service worker

================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/fixtures/dummy.html ================================================ title foo

Test page

================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/fixtures/sw.js ================================================ // Left empty ================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/fpscanner.test.js ================================================ const test = require('ava') const fpscanner = require('fpscanner') const { getVanillaFingerPrint, getStealthFingerPrint, compareLooseVersionStrings } = require('./util') const Plugin = require('../.') // Fix CI issues with old versions const isOldPuppeteerVersion = () => { const version = process.env.PUPPETEER_VERSION if (!version) { return false } if (version === '1.9.0' || version === '1.6.2') { return true } return false } test('vanilla: will fail multiple fpscanner tests', async t => { const fingerPrint = await getVanillaFingerPrint() const testedFingerPrints = fpscanner.analyseFingerprint(fingerPrint) const failedChecks = Object.values(testedFingerPrints).filter( val => val.consistent < 3 ) if (isOldPuppeteerVersion()) { t.is(failedChecks.length, 8) } else { t.is(failedChecks.length, 7) } }) test('stealth: will not fail a single fpscanner test', async t => { const fingerPrint = await getStealthFingerPrint(Plugin) const testedFingerPrints = fpscanner.analyseFingerprint(fingerPrint) const failedChecks = Object.values(testedFingerPrints).filter( val => val.consistent < 3 ) if (failedChecks.length) { console.warn('The following fingerprints failed:', failedChecks) } if (compareLooseVersionStrings(fingerPrint.userAgent, '89.0.4339.0') >= 0) { // Updated navigator.webdriver behavior breaks the fpscanner tests. t.is(failedChecks.length, 1) t.is(failedChecks[0].name, 'WEBDRIVER') } else { t.is(failedChecks.length, 0) } }) ================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/service-worker.test.js ================================================ const test = require('ava') const { vanillaPuppeteer, addExtra } = require('./util') const Plugin = require('..') const http = require('http') const fs = require('fs') const path = require('path') // Create a simple HTTP server. Service Workers cannot be served from file:// URIs const httpServer = async () => { const server = await http .createServer((req, res) => { let contents, type if (req.url === '/sw.js') { contents = fs.readFileSync(path.join(__dirname, './fixtures/sw.js')) type = 'application/javascript' } else { contents = fs.readFileSync( path.join(__dirname, './fixtures/dummy-with-service-worker.html') ) type = 'text/html' } res.setHeader('Content-Type', type) res.writeHead(200) res.end(contents) }) .listen(0) // random free port return `http://127.0.0.1:${server.address().port}/` } let browser, page, worker test.before(async t => { const address = await httpServer() console.log(`Server is running on port ${address}`) browser = await addExtra(vanillaPuppeteer) .use(Plugin()) .launch({ headless: true }) page = await browser.newPage() worker = new Promise(resolve => { browser.on('targetcreated', async target => { if (target.type() === 'service_worker') { resolve(target.worker()) } }) }) await page.goto(address) worker = await worker }) test.after(async t => { await browser.close() }) test.skip('stealth: inconsistencies between page and worker', async t => { const pageFP = await page.evaluate(detectFingerprint) const workerFP = await worker.evaluate(detectFingerprint) t.deepEqual(pageFP, workerFP) }) test.serial.skip('stealth: creepjs has good trust score', async t => { page.goto('https://abrahamjuliot.github.io/creepjs/') const score = await ( await ( await page.waitForSelector('#fingerprint-data .unblurred') ).getProperty('textContent') ).jsonValue() t.true( parseInt(score) > 80, `The creepjs score is: ${parseInt(score)}% but it should be at least 80%` ) }) /* global OffscreenCanvas */ function detectFingerprint() { const results = {} const props = [ 'userAgent', 'language', 'hardwareConcurrency', 'deviceMemory', 'languages', 'platform' ] props.forEach(el => { results[el] = navigator[el].toString() }) const canvasOffscreenWebgl = new OffscreenCanvas(256, 256) const contextWebgl = canvasOffscreenWebgl.getContext('webgl') const rendererInfo = contextWebgl.getExtension('WEBGL_debug_renderer_info') results.webglVendor = contextWebgl.getParameter( rendererInfo.UNMASKED_VENDOR_WEBGL ) results.webglRenderer = contextWebgl.getParameter( rendererInfo.UNMASKED_RENDERER_WEBGL ) results.timeZone = Intl.DateTimeFormat().resolvedOptions().timeZone return results } ================================================ FILE: packages/puppeteer-extra-plugin-stealth/test/util.js ================================================ const assert = require('assert') const vanillaPuppeteer = require('puppeteer') const { addExtra } = require('puppeteer-extra') const fpCollectPath = require.resolve('fpcollect/dist/fpCollect.min.js') const getFingerPrintFromPage = async page => { return page.evaluate(() => fpCollect.generateFingerprint()) // eslint-disable-line } const dummyHTMLPath = require('path').join(__dirname, './fixtures/dummy.html') const getFingerPrint = async (puppeteer, pageFn) => { const browser = await puppeteer.launch({ headless: true }) const page = await browser.newPage() await page.goto('file://' + dummyHTMLPath) await page.addScriptTag({ path: fpCollectPath }) const fingerPrint = await getFingerPrintFromPage(page) let pageFnResult = null if (pageFn) { pageFnResult = await pageFn(page) } await browser.close() return { ...fingerPrint, pageFnResult } } const getVanillaFingerPrint = async pageFn => getFingerPrint(vanillaPuppeteer, pageFn) const getStealthFingerPrint = async (Plugin, pageFn, pluginOptions = null) => getFingerPrint(addExtra(vanillaPuppeteer).use(Plugin(pluginOptions)), pageFn) // Expecting the input string to be in one of these formats: // - The UA string // - The shorter version string from Puppeteers browser.version() // - The shortest four-integer string const parseLooseVersionString = looseVersionString => looseVersionString .match(/(\d+\.){3}\d+/)[0] .split('.') .map(x => parseInt(x)) const compareLooseVersionStrings = (version0, version1) => { const parsed0 = parseLooseVersionString(version0) const parsed1 = parseLooseVersionString(version1) assert(parsed0.length == 4) assert(parsed1.length == 4) for (let i = 0; i < parsed0.length; i++) { if (parsed0[i] < parsed1[i]) { return -1 } else if (parsed0[i] > parsed1[i]) { return 1 } } return 0 } module.exports = { getVanillaFingerPrint, getStealthFingerPrint, dummyHTMLPath, vanillaPuppeteer, addExtra, compareLooseVersionStrings } ================================================ FILE: packages/puppeteer-extra-plugin-user-data-dir/index.js ================================================ 'use strict' const util = require('util') const fs = require('fs') const fse = require('fs-extra') const os = require('os') const path = require('path') const rimraf = require('rimraf') const debug = require('debug')('puppeteer-extra-plugin:user-data-dir') const mkdtempAsync = util.promisify(fs.mkdtemp) const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * * Further reading: * https://chromium.googlesource.com/chromium/src/+/master/docs/user_data_dir.md */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) this._userDataDir = null this._isTemporary = false const defaults = { deleteTemporary: true, deleteExisting: false, files: [] } // Follow Puppeteers temporary user data dir naming convention by default defaults.folderPath = os.tmpdir() defaults.folderPrefix = 'puppeteer_dev_profile-' this._opts = Object.assign(defaults, opts) debug('initialized', this._opts) } get name() { return 'user-data-dir' } get requirements() { return new Set(['runLast', 'dataFromPlugins']) } get shouldDeleteDirectory() { if (this._isTemporary && this._opts.deleteTemporary) { return true } return this._opts.deleteExisting } get temporaryDirectoryPath() { return path.join(this._opts.folderPath, this._opts.folderPrefix) } get defaultProfilePath() { return path.join(this._userDataDir, 'Default') } async makeTemporaryDirectory() { this._userDataDir = await mkdtempAsync(this.temporaryDirectoryPath) this._isTemporary = true } deleteUserDataDir() { debug('removeUserDataDir', this._userDataDir) if (!this._userDataDir) { debug('No userDataDir, not running rimraf') return } // We're using rimraf here because it throw errors and don't seem to freeze the process // If ressources busy or locked by chrome try again 4 times, then give up. overall a timout of 400ms rimraf( this._userDataDir, { maxBusyTries: 4 }, err => { debug(err) } ) } async writeFilesToProfile() { const filesFromPlugins = this.getDataFromPlugins('userDataDirFile').map( d => d.value ) const files = [].concat(filesFromPlugins, this._opts.files) if (!files.length) { return } for (const file of files) { if (file.target !== 'Profile') { console.warn(`Warning: Ignoring file with invalid target`, file) continue } const filePath = path.join(this.defaultProfilePath, file.file) try { await fse.outputFile(filePath, file.contents) debug(`Wrote file`, filePath) } catch (err) { console.warn('Warning: Failure writing file', filePath, file, err) } } } async beforeLaunch(options) { this._userDataDir = options.userDataDir if (!this._userDataDir) { await this.makeTemporaryDirectory() options.userDataDir = this._userDataDir debug('created custom dir', options.userDataDir) } await this.writeFilesToProfile() } async onDisconnected() { debug('onDisconnected') if (this.shouldDeleteDirectory) { this.deleteUserDataDir() } } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-user-data-dir/package.json ================================================ { "name": "puppeteer-extra-plugin-user-data-dir", "version": "2.4.1", "description": "Custom user data directory for puppeteer.", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "user-data", "userDataDir", "profile", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "fs-extra": "^10.0.0", "puppeteer-extra-plugin": "^3.2.3", "rimraf": "^3.0.2" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-user-data-dir/readme.md ================================================ # puppeteer-extra-plugin-user-data-dir > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-user-data-dir ``` ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-user-data-dir/index.js#L19-L113) **Extends: PuppeteerExtraPlugin** Further reading: Type: `function (opts)` - `opts` (optional, default `{}`) * * * ================================================ FILE: packages/puppeteer-extra-plugin-user-preferences/index.js ================================================ 'use strict' const merge = require('deepmerge') const { PuppeteerExtraPlugin } = require('puppeteer-extra-plugin') /** * Launch puppeteer with arbitrary user preferences. * * The user defined preferences will be merged with preferences set by other plugins. * Plugins can add user preferences by exposing a data entry with the name `userPreferences`. * * Overview: * https://chromium.googlesource.com/chromium/src/+/master/chrome/common/pref_names.cc * * @param {Object} opts - Options * @param {Object} [opts.userPrefs={}] - An object containing the preferences. * * @example * const puppeteer = require('puppeteer-extra') * puppeteer.use(require('puppeteer-extra-plugin-user-preferences')({userPrefs: { * webkit: { * webprefs: { * default_font_size: 22 * } * } * }})) * const browser = await puppeteer.launch() */ class Plugin extends PuppeteerExtraPlugin { constructor(opts = {}) { super(opts) this._userPrefsFromPlugins = {} const defaults = { userPrefs: {} } this._opts = Object.assign(defaults, opts) } get name() { return 'user-preferences' } get requirements() { return new Set(['runLast', 'dataFromPlugins']) } get dependencies() { return new Set(['user-data-dir']) } get data() { return [ { name: 'userDataDirFile', value: { target: 'Profile', file: 'Preferences', contents: JSON.stringify(this.combinedPrefs, null, 2) } } ] } get combinedPrefs() { return merge(this._opts.userPrefs, this._userPrefsFromPlugins) } async beforeLaunch(options) { this._userPrefsFromPlugins = merge.all( this.getDataFromPlugins('userPreferences').map(d => d.value) ) this.debug('_userPrefsFromPlugins', this._userPrefsFromPlugins) } } module.exports = function(pluginConfig) { return new Plugin(pluginConfig) } ================================================ FILE: packages/puppeteer-extra-plugin-user-preferences/package.json ================================================ { "name": "puppeteer-extra-plugin-user-preferences", "version": "2.4.1", "description": "Launch puppeteer with arbitrary user preferences.", "main": "index.js", "repository": "berstend/puppeteer-extra", "author": "berstend", "license": "MIT", "scripts": { "docs": "node -e 0", "lint": "eslint --ext .js .", "test": "run-p lint", "test-ci": "run-s test" }, "engines": { "node": ">=8" }, "keywords": [ "puppeteer", "puppeteer-extra", "puppeteer-extra-plugin", "user-prefs", "user-preferences", "chrome", "headless", "pupeteer" ], "devDependencies": { "ava": "2.4.0", "npm-run-all": "^4.1.5", "puppeteer": "^2.0.0" }, "dependencies": { "debug": "^4.1.1", "deepmerge": "^4.2.2", "puppeteer-extra-plugin": "^3.2.3", "puppeteer-extra-plugin-user-data-dir": "^2.4.1" }, "peerDependencies": { "playwright-extra": "*", "puppeteer-extra": "*" }, "peerDependenciesMeta": { "puppeteer-extra": { "optional": true }, "playwright-extra": { "optional": true } }, "gitHead": "babb041828cab50c525e0b9aab02d58f73416ef3" } ================================================ FILE: packages/puppeteer-extra-plugin-user-preferences/readme.md ================================================ # puppeteer-extra-plugin-user-preferences > A plugin for [puppeteer-extra](https://github.com/berstend/puppeteer-extra). ### Install ```bash yarn add puppeteer-extra-plugin-user-preferences ``` ## API #### Table of Contents - [Plugin](#plugin) ### [Plugin](https://github.com/berstend/puppeteer-extra/blob/db57ea66cf10d407cf63af387892492e495a84f2/packages/puppeteer-extra-plugin-user-preferences/index.js#L30-L73) **Extends: PuppeteerExtraPlugin** Launch puppeteer with arbitrary user preferences. The user defined preferences will be merged with preferences set by other plugins. Plugins can add user preferences by exposing a data entry with the name `userPreferences`. Overview: Type: `function (opts)` - `opts` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** Options (optional, default `{}`) - `opts.userPrefs` **[Object](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Object)** An object containing the preferences. (optional, default `{}`) Example: ```javascript const puppeteer = require('puppeteer-extra') puppeteer.use(require('puppeteer-extra-plugin-user-preferences')({userPrefs: { webkit: { webprefs: { default_font_size: 22 } } }})) const browser = await puppeteer.launch() ``` * * *