Repository: website-scraper/website-scraper-puppeteer
Branch: master
Commit: 5d55fe463900
Files: 18
Total size: 20.1 KB
Directory structure:
gitextract_5f61e0q7/
├── .eslintrc.yml
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── codeql.yml
│ ├── node.js.yml
│ ├── publish.yml
│ ├── sponsors.yml
│ └── stale.yml
├── .gitignore
├── LICENSE
├── README.md
├── lib/
│ ├── browserUtils/
│ │ ├── .eslintrc.yml
│ │ └── scrollToBottom.js
│ ├── index.js
│ └── logger.js
├── package.json
└── test/
├── mock/
│ ├── index.html
│ └── navigation.html
└── puppeteer-plugin.test.js
================================================
FILE CONTENTS
================================================
================================================
FILE: .eslintrc.yml
================================================
extends: "eslint:recommended"
parserOptions:
ecmaVersion: 8
sourceType: "module"
env:
node: true
es6: true
rules:
consistent-return: "error"
curly: "error"
default-case: "error"
dot-notation: "error"
eqeqeq: "error"
no-extend-native: "error"
no-implicit-coercion: "error"
no-loop-func: "error"
no-multi-spaces: "error"
no-throw-literal: "error"
global-require: "error"
no-path-concat: "error"
brace-style: ["error", "1tbs", {allowSingleLine: true}]
camelcase: "error"
consistent-this: ["error", "self"]
indent: ["error", "tab", {SwitchCase: 1}]
linebreak-style: ["error", "unix"]
eol-last: "error"
quotes: ["error", "single"]
semi: "error"
space-infix-ops: "error"
space-unary-ops: "error"
func-names: "warn"
space-before-function-paren: "warn"
no-spaced-func: "warn"
keyword-spacing: "error"
space-before-blocks: "error"
no-console: "error"
================================================
FILE: .github/dependabot.yml
================================================
version: 2
updates:
- package-ecosystem: "npm"
directory: "/"
assignees:
- "s0ph1e"
open-pull-requests-limit: 10
schedule:
interval: "weekly"
- package-ecosystem: "github-actions"
directory: "/"
assignees:
- "aivus"
schedule:
interval: "weekly"
================================================
FILE: .github/workflows/codeql.yml
================================================
name: "CodeQL"
on:
push:
branches: ["master"]
pull_request:
branches: ["master"]
schedule:
- cron: '0 1 * * 2'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'javascript' ]
steps:
- name: Checkout repository
uses: actions/checkout@v6
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
- name: Autobuild
uses: github/codeql-action/autobuild@v4
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
with:
category: "/language:${{matrix.language}}"
================================================
FILE: .github/workflows/node.js.yml
================================================
name: Node.js CI
on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
schedule:
- cron: '17 2 * * *'
workflow_dispatch: ~
jobs:
test:
timeout-minutes: 10
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
node-version:
- 20
- 22
- 24
- current
os:
- ubuntu-latest
- windows-latest
include:
- node-version: 24
os: macos-latest
steps:
- uses: actions/checkout@v6
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v6
with:
node-version: ${{ matrix.node-version }}
- run: npm i
- name: Disable AppArmor
if: ${{ matrix.os == 'ubuntu-latest' }}
run: echo 0 | sudo tee /proc/sys/kernel/apparmor_restrict_unprivileged_userns
- run: npm test
- run: npm run eslint
if: ${{ matrix.node-version == '24' && matrix.os == 'ubuntu-latest' }}
- name: Publish Qlty code coverage
if: ${{ matrix.node-version == '24' && matrix.os == 'ubuntu-latest' }}
uses: qltysh/qlty-action/coverage@v2
with:
token: ${{ secrets.QLTY_COVERAGE_TOKEN }}
files: coverage/lcov.info
================================================
FILE: .github/workflows/publish.yml
================================================
name: Create a tag and publish to npm
on:
workflow_dispatch:
inputs:
bump:
description: 'Version bump type'
type: choice
required: true
default: 'minor'
options:
- patch
- minor
- major
- premajor
- preminor
- prepatch
- prerelease
preid:
description: 'Prerelease identifier (see "npm version") for pre* bumps'
type: string
required: false
npmPreTag:
description: 'NPM tag used for all pre* bumps'
type: string
default: 'next'
required: false
dryRun:
description: 'Run in "dry run" mode'
type: boolean
default: false
required: true
permissions:
id-token: write
contents: write
jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v6
with:
# Required to allow push to master without the checks/PR
token: ${{ secrets.GH_PUSH_TOKEN }}
- name: Set up Node.js
uses: actions/setup-node@v6
with:
node-version: '24'
registry-url: 'https://registry.npmjs.org'
- name: Update npm
run: npm install -g npm@latest
- name: Install dependencies
run: npm install
- name: List of installed dependencies
run: npm ls -a
- name: Verifying provenance attestations
run: npm audit signatures
- name: Disable AppArmor
run: echo 0 | sudo tee /proc/sys/kernel/apparmor_restrict_unprivileged_userns
- name: Run tests
run: npm test
- name: Bump version and create tag
id: bump-version
env:
PREID_FLAG: ${{ startsWith(inputs.bump, 'pre') && inputs.preid && format('--preid {0}', inputs.preid) || '' }}
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
TAG=$(npm version ${{ github.event.inputs.bump }} $PREID_FLAG -m "Release %s")
echo "Created tag: $TAG"
echo "tag=$TAG" >> "$GITHUB_OUTPUT"
- name: Publish to npm
env:
DRY_RUN_FLAG: ${{ inputs.dryRun && '--dry-run' || '' }}
TAG_FLAG: ${{ startsWith(inputs.bump, 'pre') && format('--tag {0}', inputs.npmPreTag) || ''}}
run: npm publish --provenance --access=public $DRY_RUN_FLAG $TAG_FLAG
- name: Push changes to master
if: ${{ !inputs.dryRun }}
run: |
git push origin master --follow-tags
- name: Create GitHub Release
if: ${{ !inputs.dryRun }}
uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda # v3.0.0
with:
tag_name: ${{ steps.bump-version.outputs.tag }}
generate_release_notes: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
================================================
FILE: .github/workflows/sponsors.yml
================================================
name: Generate Sponsors list
on:
workflow_dispatch:
schedule:
- cron: '31 6 * * *'
permissions:
contents: write
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- name: Checkout 🛎️
uses: actions/checkout@v6
with:
# Required to allow push to master without the checks/PR
token: ${{ secrets.GH_PUSH_TOKEN }}
- name: Generate Sponsors 💖
uses: JamesIves/github-sponsors-readme-action@2fd9142e765f755780202122261dc85e78459405 # v1.6.0
with:
token: ${{ secrets.SOFIIA_SPONSORS_READ_TOKEN }}
file: 'README.md'
active-only: false
include-private: true
- name: Commit changes
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add README.md
# Check if there are any staged changes
if git diff --cached --quiet; then
echo "No changes to commit."
exit 0
fi
git commit -m "Update list of sponsors"
git push origin master
================================================
FILE: .github/workflows/stale.yml
================================================
# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
#
# You can adjust the behavior by modifying this file.
# For more information, see:
# https://github.com/actions/stale
name: Mark stale issues and pull requests
on:
workflow_dispatch: ~
schedule:
- cron: '39 3 * * *'
jobs:
stale:
runs-on: ubuntu-latest
permissions:
issues: write
steps:
- uses: actions/stale@v10
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
days-before-stale: 60
days-before-close: 7
# Do not stale PRs
days-before-pr-stale: -1
days-before-pr-close: -1
exempt-issue-labels: 'bug,maybe-later,help wanted'
stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'
stale-issue-label: 'wontfix'
# debug-only: true
================================================
FILE: .gitignore
================================================
node_modules
package-lock.json
.idea
coverage
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2018-2023 Sofiia Antypenko
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
[](https://www.npmjs.org/package/website-scraper-puppeteer)
[](https://www.npmjs.org/package/website-scraper-puppeteer)
[](https://github.com/website-scraper/website-scraper-puppeteer)
[](https://qlty.sh/gh/website-scraper/projects/website-scraper-puppeteer)
# website-scraper-puppeteer
Plugin for [website-scraper](https://github.com/website-scraper/node-website-scraper) which returns html for dynamic websites using [puppeteer](https://github.com/puppeteer/puppeteer).
## Sponsors
Maintenance of this project is made possible by all the [contributors](https://github.com/website-scraper/website-scraper-puppeteer/graphs/contributors) and [sponsors](https://github.com/sponsors/s0ph1e).
If you'd like to sponsor this project and have your avatar or company logo appear below [click here](https://github.com/sponsors/s0ph1e). 💖
<!-- sponsors --><a href="https://github.com/aivus"><img src="https://github.com/aivus.png" width="60px" alt="User avatar: Illia Antypenko" /></a><a href="https://github.com/swissspidy"><img src="https://github.com/swissspidy.png" width="60px" alt="User avatar: Pascal Birchler" /></a><a href="https://github.com/itscarlosrufo"><img src="https://github.com/itscarlosrufo.png" width="60px" alt="User avatar: Carlos Rufo" /></a><a href="https://github.com/francescamarano"><img src="https://github.com/francescamarano.png" width="60px" alt="User avatar: Francesca Marano" /></a><a href="https://github.com/github"><img src="https://github.com/github.png" width="60px" alt="User avatar: GitHub" /></a><a href="https://github.com/Belrestro"><img src="https://github.com/Belrestro.png" width="60px" alt="User avatar: Andrew Vorobiov" /></a><a href="https://github.com/Effiezhu"><img src="https://github.com/Effiezhu.png" width="60px" alt="User avatar: " /></a><a href="https://github.com/slicemedia"><img src="https://github.com/slicemedia.png" width="60px" alt="User avatar: " /></a><!-- sponsors -->
## Requirements
* nodejs version >= 20
* website-scraper version >= 5
## Installation
```sh
npm install website-scraper website-scraper-puppeteer
```
## Usage
```javascript
import scrape from 'website-scraper';
import PuppeteerPlugin from 'website-scraper-puppeteer';
await scrape({
urls: ['https://www.instagram.com/gopro/'],
directory: '/path/to/save',
plugins: [
new PuppeteerPlugin({
launchOptions: { headless: "new" }, /* optional */
gotoOptions: { waitUntil: "networkidle0" }, /* optional */
scrollToBottom: { timeout: 10000, viewportN: 10 }, /* optional */
})
]
});
```
Puppeteer plugin constructor accepts next params:
* `launchOptions` - *(optional)* - puppeteer launch options, can be found in [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/main/docs/api/puppeteer.puppeteerlaunchoptions.md)
* `gotoOptions` - *(optional)* - puppeteer page.goto options, can be found in [puppeteer docs](https://github.com/puppeteer/puppeteer/blob/main/docs/api/puppeteer.frame.goto.md#parameters)
* `scrollToBottom` - *(optional)* - in some cases, the page needs to be scrolled down to render its assets (lazyloading). Because some pages can be really endless, the scrolldown process can be interrupted before reaching the bottom when one or both of the bellow limitations are reached:
* `timeout` - in milliseconds
* `viewportN` - viewport height multiplier
## How it works
It starts Chromium in headless mode which just opens page and waits until page is loaded.
It is far from ideal because probably you need to wait until some resource is loaded or click some button or log in. Currently this module doesn't support such functionality.
================================================
FILE: lib/browserUtils/.eslintrc.yml
================================================
extends: '../../.eslintrc.yml'
env:
browser: true
================================================
FILE: lib/browserUtils/scrollToBottom.js
================================================
export default async (timeout, viewportN) => {
await new Promise((resolve) => {
let totalHeight = 0, distance = 200, duration = 0, maxHeight = window.innerHeight * viewportN;
const timer = setInterval(() => {
duration += 200;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= document.body.scrollHeight || duration >= timeout || totalHeight >= maxHeight) {
clearInterval(timer);
resolve();
}
}, 200);
});
};
================================================
FILE: lib/index.js
================================================
import puppeteer from '@website-scraper/puppeteer-version-wrapper';
import logger from './logger.js';
import scrollToBottomBrowser from './browserUtils/scrollToBottom.js';
class PuppeteerPlugin {
constructor ({
launchOptions = {},
gotoOptions = {},
scrollToBottom = null,
} = {}) {
this.launchOptions = launchOptions;
this.gotoOptions = gotoOptions;
this.scrollToBottom = scrollToBottom;
this.browser = null;
this.headers = {};
logger.info('init plugin', { launchOptions, scrollToBottom });
}
apply (registerAction) {
registerAction('beforeStart', async () => {
this.browser = await puppeteer.launch(this.launchOptions);
});
registerAction('beforeRequest', async ({requestOptions}) => {
if (hasValues(requestOptions.headers)) {
this.headers = Object.assign({}, requestOptions.headers);
}
return {requestOptions};
});
registerAction('afterResponse', async ({response}) => {
const contentType = response.headers['content-type'];
const isHtml = contentType && contentType.split(';')[0] === 'text/html';
if (isHtml) {
const url = response.url;
const page = await this.browser.newPage();
if (hasValues(this.headers)) {
logger.info('set headers to puppeteer page', this.headers);
await page.setExtraHTTPHeaders(this.headers);
}
await page.goto(url, this.gotoOptions);
if (this.scrollToBottom) {
await scrollToBottom(page, this.scrollToBottom.timeout, this.scrollToBottom.viewportN);
}
const content = await page.content();
await page.close();
// convert utf-8 -> binary string because website-scraper needs binary
return Buffer.from(content).toString('binary');
} else {
return response.body;
}
});
registerAction('afterFinish', () => this.browser && this.browser.close());
}
}
function hasValues (obj) {
return obj && Object.keys(obj).length > 0;
}
async function scrollToBottom (page, timeout, viewportN) {
logger.info(`scroll puppeteer page to bottom ${viewportN} times with timeout = ${timeout}`);
await page.evaluate(scrollToBottomBrowser, timeout, viewportN);
}
export default PuppeteerPlugin;
================================================
FILE: lib/logger.js
================================================
import debug from 'debug';
const appName = 'website-scraper-puppeteer';
const logLevels = ['error', 'warn', 'info', 'debug', 'log'];
const logger = {};
logLevels.forEach(logLevel => {
logger[logLevel] = debug(`${appName}:${logLevel}`);
});
export default logger;
================================================
FILE: package.json
================================================
{
"name": "website-scraper-puppeteer",
"version": "2.0.0",
"description": "Plugin for website-scraper which returns html for dynamic websites using puppeteer",
"readmeFilename": "README.md",
"type": "module",
"exports": {
".": "./lib/index.js"
},
"keywords": [
"website-scraper",
"puppeteer",
"chromium",
"chrome",
"headless",
"html"
],
"dependencies": {
"debug": "^4.1.1",
"@website-scraper/puppeteer-version-wrapper": "^1.0.0"
},
"peerDependencies": {
"website-scraper": "^6.0.0"
},
"devDependencies": {
"c8": "^11.0.0",
"chai": "^6.0.1",
"eslint": "^8.5.0",
"finalhandler": "^2.1.0",
"fs-extra": "^11.1.0",
"mocha": "^11.0.1",
"serve-static": "^2.2.0",
"website-scraper": "^6.0.0"
},
"scripts": {
"test": "c8 --all --reporter=text --reporter=lcov mocha --recursive --timeout 15000",
"eslint": "eslint lib/**"
},
"repository": {
"type": "git",
"url": "git+https://github.com/website-scraper/website-scraper-puppeteer.git"
},
"author": "Sofiia Antypenko <sofiia@antypenko.dev>",
"license": "MIT",
"bugs": {
"url": "https://github.com/website-scraper/website-scraper-puppeteer/issues"
},
"homepage": "https://github.com/website-scraper/website-scraper-puppeteer#readme",
"files": [
"lib"
],
"engines": {
"node": ">=20"
}
}
================================================
FILE: test/mock/index.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Test</title>
</head>
<body>
<div id="root"></div>
<div id="special-characters-test"></div>
<script>
window.onload = function() {
document.getElementById('root').innerText = 'Hello world from JS!';
/**
* TODO: Original innerText "저는 7년 동안 한국에서 살았어요. Слава Україні!" was changed due to issues
* with cheerio and website-scraper itself.
* See https://github.com/cheeriojs/cheerio/pull/2280
*/
document.getElementById('special-characters-test').innerText = '7년 동안 한국에서 살았어요. Слава Україні!';
};
</script>
</body>
</html>
================================================
FILE: test/mock/navigation.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Test</title>
</head>
<body>
<div id="root"></div>
<script>
window.onload = function() {
window.location.replace('http://example.com');
document.getElementById('root').innerText = 'Navigation blocked!';
};
</script>
</body>
</html>
================================================
FILE: test/puppeteer-plugin.test.js
================================================
import { expect } from 'chai';
import http from 'http';
import finalhandler from 'finalhandler';
import serveStatic from 'serve-static';
import fs from 'fs-extra';
import scrape from 'website-scraper';
import PuppeteerPlugin from '../lib/index.js';
const directory = './test/tmp';
const SERVE_WEBSITE_PORT = 4567;
describe('Puppeteer plugin test', () => {
let result, content, server;
before('start webserver', () => server = startWebserver(SERVE_WEBSITE_PORT));
after('stop webserver', () => server.close())
describe('Dynamic content', () => {
before('scrape website', async () => {
result = await scrape({
urls: [`http://localhost:${SERVE_WEBSITE_PORT}`],
directory: directory,
plugins: [ new PuppeteerPlugin({
scrollToBottom: { timeout: 50, viewportN: 10 }
}) ]
});
});
before('get content from file', () => {
content = fs.readFileSync(`${directory}/${result[0].filename}`).toString();
});
after('delete dir', () => fs.removeSync(directory));
it('should have 1 item in result array', () => {
expect(result.length).eql(1);
});
it('should render dymanic website', async () => {
expect(content).to.contain('<div id="root">Hello world from JS!</div>');
});
it('should render special characters correctly', async () => {
expect(content).to.contain('<div id="special-characters-test">7년 동안 한국에서 살았어요. Слава Україні!</div>');
});
});
});
function startWebserver(port = 3000) {
const serve = serveStatic('./test/mock', {'index': ['index.html']});
const server = http.createServer(function onRequest (req, res) {
serve(req, res, finalhandler(req, res))
});
return server.listen(port)
}
gitextract_5f61e0q7/
├── .eslintrc.yml
├── .github/
│ ├── dependabot.yml
│ └── workflows/
│ ├── codeql.yml
│ ├── node.js.yml
│ ├── publish.yml
│ ├── sponsors.yml
│ └── stale.yml
├── .gitignore
├── LICENSE
├── README.md
├── lib/
│ ├── browserUtils/
│ │ ├── .eslintrc.yml
│ │ └── scrollToBottom.js
│ ├── index.js
│ └── logger.js
├── package.json
└── test/
├── mock/
│ ├── index.html
│ └── navigation.html
└── puppeteer-plugin.test.js
SYMBOL INDEX (7 symbols across 2 files)
FILE: lib/index.js
class PuppeteerPlugin (line 5) | class PuppeteerPlugin {
method constructor (line 6) | constructor ({
method apply (line 20) | apply (registerAction) {
function hasValues (line 64) | function hasValues (obj) {
function scrollToBottom (line 69) | async function scrollToBottom (page, timeout, viewportN) {
FILE: test/puppeteer-plugin.test.js
constant SERVE_WEBSITE_PORT (line 10) | const SERVE_WEBSITE_PORT = 4567;
function startWebserver (line 47) | function startWebserver(port = 3000) {
Condensed preview — 18 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (23K chars).
[
{
"path": ".eslintrc.yml",
"chars": 906,
"preview": "extends: \"eslint:recommended\"\nparserOptions:\n ecmaVersion: 8\n sourceType: \"module\"\nenv:\n node: true\n es6: true\nrules"
},
{
"path": ".github/dependabot.yml",
"chars": 301,
"preview": "version: 2\nupdates:\n - package-ecosystem: \"npm\"\n directory: \"/\"\n assignees:\n - \"s0ph1e\"\n open-pull-reques"
},
{
"path": ".github/workflows/codeql.yml",
"chars": 790,
"preview": "name: \"CodeQL\"\n\non:\n push:\n branches: [\"master\"]\n pull_request:\n branches: [\"master\"]\n schedule:\n - cron: '0"
},
{
"path": ".github/workflows/node.js.yml",
"chars": 1283,
"preview": "name: Node.js CI\n\non:\n push:\n branches: [ master ]\n pull_request:\n branches: [ master ]\n schedule:\n - cron: "
},
{
"path": ".github/workflows/publish.yml",
"chars": 2963,
"preview": "name: Create a tag and publish to npm\n\non:\n workflow_dispatch:\n inputs:\n bump:\n description: 'Version bu"
},
{
"path": ".github/workflows/sponsors.yml",
"chars": 1160,
"preview": "name: Generate Sponsors list\non:\n workflow_dispatch:\n schedule:\n - cron: '31 6 * * *'\npermissions:\n contents: writ"
},
{
"path": ".github/workflows/stale.yml",
"chars": 1045,
"preview": "# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.\n#\n# You c"
},
{
"path": ".gitignore",
"chars": 46,
"preview": "node_modules\npackage-lock.json\n.idea\ncoverage\n"
},
{
"path": "LICENSE",
"chars": 1078,
"preview": "MIT License\n\nCopyright (c) 2018-2023 Sofiia Antypenko\n\nPermission is hereby granted, free of charge, to any person obtai"
},
{
"path": "README.md",
"chars": 4143,
"preview": "[](https://www.npmjs.org/package/websit"
},
{
"path": "lib/browserUtils/.eslintrc.yml",
"chars": 52,
"preview": "extends: '../../.eslintrc.yml'\nenv:\n browser: true\n"
},
{
"path": "lib/browserUtils/scrollToBottom.js",
"chars": 463,
"preview": "export default async (timeout, viewportN) => {\n\tawait new Promise((resolve) => {\n\t\tlet totalHeight = 0, distance = 200, "
},
{
"path": "lib/index.js",
"chars": 2144,
"preview": "import puppeteer from '@website-scraper/puppeteer-version-wrapper';\nimport logger from './logger.js';\nimport scrollToBot"
},
{
"path": "lib/logger.js",
"chars": 267,
"preview": "import debug from 'debug';\n\nconst appName = 'website-scraper-puppeteer';\nconst logLevels = ['error', 'warn', 'info', 'de"
},
{
"path": "package.json",
"chars": 1379,
"preview": "{\n \"name\": \"website-scraper-puppeteer\",\n \"version\": \"2.0.0\",\n \"description\": \"Plugin for website-scraper which return"
},
{
"path": "test/mock/index.html",
"chars": 612,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n\t<meta charset=\"UTF-8\">\n\t<title>Test</title>\n</head>\n<body>\n\n<div id=\"root\"></di"
},
{
"path": "test/mock/navigation.html",
"chars": 311,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n\t<meta charset=\"UTF-8\">\n\t<title>Test</title>\n</head>\n<body>\n\n<div id=\"root\"></di"
},
{
"path": "test/puppeteer-plugin.test.js",
"chars": 1661,
"preview": "import { expect } from 'chai';\nimport http from 'http';\nimport finalhandler from 'finalhandler';\nimport serveStatic from"
}
]
About this extraction
This page contains the full source code of the website-scraper/website-scraper-puppeteer GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 18 files (20.1 KB), approximately 6.0k tokens, and a symbol index with 7 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.